docs: Fix empty parallelism argument
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / gt / selftest_lrc.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2018 Intel Corporation
5  */
6
7 #include <linux/prime_numbers.h>
8
9 #include "gem/i915_gem_pm.h"
10 #include "gt/intel_engine_heartbeat.h"
11 #include "gt/intel_reset.h"
12
13 #include "i915_selftest.h"
14 #include "selftests/i915_random.h"
15 #include "selftests/igt_flush_test.h"
16 #include "selftests/igt_live_test.h"
17 #include "selftests/igt_spinner.h"
18 #include "selftests/lib_sw_fence.h"
19
20 #include "gem/selftests/igt_gem_utils.h"
21 #include "gem/selftests/mock_context.h"
22
23 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
24 #define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */
25
26 static struct i915_vma *create_scratch(struct intel_gt *gt)
27 {
28         struct drm_i915_gem_object *obj;
29         struct i915_vma *vma;
30         int err;
31
32         obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
33         if (IS_ERR(obj))
34                 return ERR_CAST(obj);
35
36         i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
37
38         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
39         if (IS_ERR(vma)) {
40                 i915_gem_object_put(obj);
41                 return vma;
42         }
43
44         err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
45         if (err) {
46                 i915_gem_object_put(obj);
47                 return ERR_PTR(err);
48         }
49
50         return vma;
51 }
52
53 static void engine_heartbeat_disable(struct intel_engine_cs *engine,
54                                      unsigned long *saved)
55 {
56         *saved = engine->props.heartbeat_interval_ms;
57         engine->props.heartbeat_interval_ms = 0;
58
59         intel_engine_pm_get(engine);
60         intel_engine_park_heartbeat(engine);
61 }
62
63 static void engine_heartbeat_enable(struct intel_engine_cs *engine,
64                                     unsigned long saved)
65 {
66         intel_engine_pm_put(engine);
67
68         engine->props.heartbeat_interval_ms = saved;
69 }
70
71 static int live_sanitycheck(void *arg)
72 {
73         struct intel_gt *gt = arg;
74         struct intel_engine_cs *engine;
75         enum intel_engine_id id;
76         struct igt_spinner spin;
77         int err = 0;
78
79         if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
80                 return 0;
81
82         if (igt_spinner_init(&spin, gt))
83                 return -ENOMEM;
84
85         for_each_engine(engine, gt, id) {
86                 struct intel_context *ce;
87                 struct i915_request *rq;
88
89                 ce = intel_context_create(engine);
90                 if (IS_ERR(ce)) {
91                         err = PTR_ERR(ce);
92                         break;
93                 }
94
95                 rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
96                 if (IS_ERR(rq)) {
97                         err = PTR_ERR(rq);
98                         goto out_ctx;
99                 }
100
101                 i915_request_add(rq);
102                 if (!igt_wait_for_spinner(&spin, rq)) {
103                         GEM_TRACE("spinner failed to start\n");
104                         GEM_TRACE_DUMP();
105                         intel_gt_set_wedged(gt);
106                         err = -EIO;
107                         goto out_ctx;
108                 }
109
110                 igt_spinner_end(&spin);
111                 if (igt_flush_test(gt->i915)) {
112                         err = -EIO;
113                         goto out_ctx;
114                 }
115
116 out_ctx:
117                 intel_context_put(ce);
118                 if (err)
119                         break;
120         }
121
122         igt_spinner_fini(&spin);
123         return err;
124 }
125
126 static int live_unlite_restore(struct intel_gt *gt, int prio)
127 {
128         struct intel_engine_cs *engine;
129         enum intel_engine_id id;
130         struct igt_spinner spin;
131         int err = -ENOMEM;
132
133         /*
134          * Check that we can correctly context switch between 2 instances
135          * on the same engine from the same parent context.
136          */
137
138         if (igt_spinner_init(&spin, gt))
139                 return err;
140
141         err = 0;
142         for_each_engine(engine, gt, id) {
143                 struct intel_context *ce[2] = {};
144                 struct i915_request *rq[2];
145                 struct igt_live_test t;
146                 unsigned long saved;
147                 int n;
148
149                 if (prio && !intel_engine_has_preemption(engine))
150                         continue;
151
152                 if (!intel_engine_can_store_dword(engine))
153                         continue;
154
155                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
156                         err = -EIO;
157                         break;
158                 }
159                 engine_heartbeat_disable(engine, &saved);
160
161                 for (n = 0; n < ARRAY_SIZE(ce); n++) {
162                         struct intel_context *tmp;
163
164                         tmp = intel_context_create(engine);
165                         if (IS_ERR(tmp)) {
166                                 err = PTR_ERR(tmp);
167                                 goto err_ce;
168                         }
169
170                         err = intel_context_pin(tmp);
171                         if (err) {
172                                 intel_context_put(tmp);
173                                 goto err_ce;
174                         }
175
176                         /*
177                          * Setup the pair of contexts such that if we
178                          * lite-restore using the RING_TAIL from ce[1] it
179                          * will execute garbage from ce[0]->ring.
180                          */
181                         memset(tmp->ring->vaddr,
182                                POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
183                                tmp->ring->vma->size);
184
185                         ce[n] = tmp;
186                 }
187                 GEM_BUG_ON(!ce[1]->ring->size);
188                 intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
189                 __execlists_update_reg_state(ce[1], engine);
190
191                 rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
192                 if (IS_ERR(rq[0])) {
193                         err = PTR_ERR(rq[0]);
194                         goto err_ce;
195                 }
196
197                 i915_request_get(rq[0]);
198                 i915_request_add(rq[0]);
199                 GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
200
201                 if (!igt_wait_for_spinner(&spin, rq[0])) {
202                         i915_request_put(rq[0]);
203                         goto err_ce;
204                 }
205
206                 rq[1] = i915_request_create(ce[1]);
207                 if (IS_ERR(rq[1])) {
208                         err = PTR_ERR(rq[1]);
209                         i915_request_put(rq[0]);
210                         goto err_ce;
211                 }
212
213                 if (!prio) {
214                         /*
215                          * Ensure we do the switch to ce[1] on completion.
216                          *
217                          * rq[0] is already submitted, so this should reduce
218                          * to a no-op (a wait on a request on the same engine
219                          * uses the submit fence, not the completion fence),
220                          * but it will install a dependency on rq[1] for rq[0]
221                          * that will prevent the pair being reordered by
222                          * timeslicing.
223                          */
224                         i915_request_await_dma_fence(rq[1], &rq[0]->fence);
225                 }
226
227                 i915_request_get(rq[1]);
228                 i915_request_add(rq[1]);
229                 GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
230                 i915_request_put(rq[0]);
231
232                 if (prio) {
233                         struct i915_sched_attr attr = {
234                                 .priority = prio,
235                         };
236
237                         /* Alternatively preempt the spinner with ce[1] */
238                         engine->schedule(rq[1], &attr);
239                 }
240
241                 /* And switch back to ce[0] for good measure */
242                 rq[0] = i915_request_create(ce[0]);
243                 if (IS_ERR(rq[0])) {
244                         err = PTR_ERR(rq[0]);
245                         i915_request_put(rq[1]);
246                         goto err_ce;
247                 }
248
249                 i915_request_await_dma_fence(rq[0], &rq[1]->fence);
250                 i915_request_get(rq[0]);
251                 i915_request_add(rq[0]);
252                 GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
253                 i915_request_put(rq[1]);
254                 i915_request_put(rq[0]);
255
256 err_ce:
257                 tasklet_kill(&engine->execlists.tasklet); /* flush submission */
258                 igt_spinner_end(&spin);
259                 for (n = 0; n < ARRAY_SIZE(ce); n++) {
260                         if (IS_ERR_OR_NULL(ce[n]))
261                                 break;
262
263                         intel_context_unpin(ce[n]);
264                         intel_context_put(ce[n]);
265                 }
266
267                 engine_heartbeat_enable(engine, saved);
268                 if (igt_live_test_end(&t))
269                         err = -EIO;
270                 if (err)
271                         break;
272         }
273
274         igt_spinner_fini(&spin);
275         return err;
276 }
277
278 static int live_unlite_switch(void *arg)
279 {
280         return live_unlite_restore(arg, 0);
281 }
282
283 static int live_unlite_preempt(void *arg)
284 {
285         return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
286 }
287
288 static int
289 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
290 {
291         u32 *cs;
292
293         cs = intel_ring_begin(rq, 10);
294         if (IS_ERR(cs))
295                 return PTR_ERR(cs);
296
297         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
298
299         *cs++ = MI_SEMAPHORE_WAIT |
300                 MI_SEMAPHORE_GLOBAL_GTT |
301                 MI_SEMAPHORE_POLL |
302                 MI_SEMAPHORE_SAD_NEQ_SDD;
303         *cs++ = 0;
304         *cs++ = i915_ggtt_offset(vma) + 4 * idx;
305         *cs++ = 0;
306
307         if (idx > 0) {
308                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
309                 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
310                 *cs++ = 0;
311                 *cs++ = 1;
312         } else {
313                 *cs++ = MI_NOOP;
314                 *cs++ = MI_NOOP;
315                 *cs++ = MI_NOOP;
316                 *cs++ = MI_NOOP;
317         }
318
319         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
320
321         intel_ring_advance(rq, cs);
322         return 0;
323 }
324
325 static struct i915_request *
326 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
327 {
328         struct intel_context *ce;
329         struct i915_request *rq;
330         int err;
331
332         ce = intel_context_create(engine);
333         if (IS_ERR(ce))
334                 return ERR_CAST(ce);
335
336         rq = intel_context_create_request(ce);
337         if (IS_ERR(rq))
338                 goto out_ce;
339
340         err = 0;
341         if (rq->engine->emit_init_breadcrumb)
342                 err = rq->engine->emit_init_breadcrumb(rq);
343         if (err == 0)
344                 err = emit_semaphore_chain(rq, vma, idx);
345         if (err == 0)
346                 i915_request_get(rq);
347         i915_request_add(rq);
348         if (err)
349                 rq = ERR_PTR(err);
350
351 out_ce:
352         intel_context_put(ce);
353         return rq;
354 }
355
356 static int
357 release_queue(struct intel_engine_cs *engine,
358               struct i915_vma *vma,
359               int idx, int prio)
360 {
361         struct i915_sched_attr attr = {
362                 .priority = prio,
363         };
364         struct i915_request *rq;
365         u32 *cs;
366
367         rq = intel_engine_create_kernel_request(engine);
368         if (IS_ERR(rq))
369                 return PTR_ERR(rq);
370
371         cs = intel_ring_begin(rq, 4);
372         if (IS_ERR(cs)) {
373                 i915_request_add(rq);
374                 return PTR_ERR(cs);
375         }
376
377         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
378         *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
379         *cs++ = 0;
380         *cs++ = 1;
381
382         intel_ring_advance(rq, cs);
383
384         i915_request_get(rq);
385         i915_request_add(rq);
386
387         local_bh_disable();
388         engine->schedule(rq, &attr);
389         local_bh_enable(); /* kick tasklet */
390
391         i915_request_put(rq);
392
393         return 0;
394 }
395
396 static int
397 slice_semaphore_queue(struct intel_engine_cs *outer,
398                       struct i915_vma *vma,
399                       int count)
400 {
401         struct intel_engine_cs *engine;
402         struct i915_request *head;
403         enum intel_engine_id id;
404         int err, i, n = 0;
405
406         head = semaphore_queue(outer, vma, n++);
407         if (IS_ERR(head))
408                 return PTR_ERR(head);
409
410         for_each_engine(engine, outer->gt, id) {
411                 for (i = 0; i < count; i++) {
412                         struct i915_request *rq;
413
414                         rq = semaphore_queue(engine, vma, n++);
415                         if (IS_ERR(rq)) {
416                                 err = PTR_ERR(rq);
417                                 goto out;
418                         }
419
420                         i915_request_put(rq);
421                 }
422         }
423
424         err = release_queue(outer, vma, n, INT_MAX);
425         if (err)
426                 goto out;
427
428         if (i915_request_wait(head, 0,
429                               2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) {
430                 pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
431                        count, n);
432                 GEM_TRACE_DUMP();
433                 intel_gt_set_wedged(outer->gt);
434                 err = -EIO;
435         }
436
437 out:
438         i915_request_put(head);
439         return err;
440 }
441
442 static int live_timeslice_preempt(void *arg)
443 {
444         struct intel_gt *gt = arg;
445         struct drm_i915_gem_object *obj;
446         struct i915_vma *vma;
447         void *vaddr;
448         int err = 0;
449         int count;
450
451         /*
452          * If a request takes too long, we would like to give other users
453          * a fair go on the GPU. In particular, users may create batches
454          * that wait upon external input, where that input may even be
455          * supplied by another GPU job. To avoid blocking forever, we
456          * need to preempt the current task and replace it with another
457          * ready task.
458          */
459         if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
460                 return 0;
461
462         obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
463         if (IS_ERR(obj))
464                 return PTR_ERR(obj);
465
466         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
467         if (IS_ERR(vma)) {
468                 err = PTR_ERR(vma);
469                 goto err_obj;
470         }
471
472         vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
473         if (IS_ERR(vaddr)) {
474                 err = PTR_ERR(vaddr);
475                 goto err_obj;
476         }
477
478         err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
479         if (err)
480                 goto err_map;
481
482         for_each_prime_number_from(count, 1, 16) {
483                 struct intel_engine_cs *engine;
484                 enum intel_engine_id id;
485
486                 for_each_engine(engine, gt, id) {
487                         unsigned long saved;
488
489                         if (!intel_engine_has_preemption(engine))
490                                 continue;
491
492                         memset(vaddr, 0, PAGE_SIZE);
493
494                         engine_heartbeat_disable(engine, &saved);
495                         err = slice_semaphore_queue(engine, vma, count);
496                         engine_heartbeat_enable(engine, saved);
497                         if (err)
498                                 goto err_pin;
499
500                         if (igt_flush_test(gt->i915)) {
501                                 err = -EIO;
502                                 goto err_pin;
503                         }
504                 }
505         }
506
507 err_pin:
508         i915_vma_unpin(vma);
509 err_map:
510         i915_gem_object_unpin_map(obj);
511 err_obj:
512         i915_gem_object_put(obj);
513         return err;
514 }
515
516 static struct i915_request *nop_request(struct intel_engine_cs *engine)
517 {
518         struct i915_request *rq;
519
520         rq = intel_engine_create_kernel_request(engine);
521         if (IS_ERR(rq))
522                 return rq;
523
524         i915_request_get(rq);
525         i915_request_add(rq);
526
527         return rq;
528 }
529
530 static int wait_for_submit(struct intel_engine_cs *engine,
531                            struct i915_request *rq,
532                            unsigned long timeout)
533 {
534         timeout += jiffies;
535         do {
536                 cond_resched();
537                 intel_engine_flush_submission(engine);
538                 if (i915_request_is_active(rq))
539                         return 0;
540         } while (time_before(jiffies, timeout));
541
542         return -ETIME;
543 }
544
545 static long timeslice_threshold(const struct intel_engine_cs *engine)
546 {
547         return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1;
548 }
549
550 static int live_timeslice_queue(void *arg)
551 {
552         struct intel_gt *gt = arg;
553         struct drm_i915_gem_object *obj;
554         struct intel_engine_cs *engine;
555         enum intel_engine_id id;
556         struct i915_vma *vma;
557         void *vaddr;
558         int err = 0;
559
560         /*
561          * Make sure that even if ELSP[0] and ELSP[1] are filled with
562          * timeslicing between them disabled, we *do* enable timeslicing
563          * if the queue demands it. (Normally, we do not submit if
564          * ELSP[1] is already occupied, so must rely on timeslicing to
565          * eject ELSP[0] in favour of the queue.)
566          */
567         if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
568                 return 0;
569
570         obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
571         if (IS_ERR(obj))
572                 return PTR_ERR(obj);
573
574         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
575         if (IS_ERR(vma)) {
576                 err = PTR_ERR(vma);
577                 goto err_obj;
578         }
579
580         vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
581         if (IS_ERR(vaddr)) {
582                 err = PTR_ERR(vaddr);
583                 goto err_obj;
584         }
585
586         err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
587         if (err)
588                 goto err_map;
589
590         for_each_engine(engine, gt, id) {
591                 struct i915_sched_attr attr = {
592                         .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
593                 };
594                 struct i915_request *rq, *nop;
595                 unsigned long saved;
596
597                 if (!intel_engine_has_preemption(engine))
598                         continue;
599
600                 engine_heartbeat_disable(engine, &saved);
601                 memset(vaddr, 0, PAGE_SIZE);
602
603                 /* ELSP[0]: semaphore wait */
604                 rq = semaphore_queue(engine, vma, 0);
605                 if (IS_ERR(rq)) {
606                         err = PTR_ERR(rq);
607                         goto err_heartbeat;
608                 }
609                 engine->schedule(rq, &attr);
610                 err = wait_for_submit(engine, rq, HZ / 2);
611                 if (err) {
612                         pr_err("%s: Timed out trying to submit semaphores\n",
613                                engine->name);
614                         goto err_rq;
615                 }
616
617                 /* ELSP[1]: nop request */
618                 nop = nop_request(engine);
619                 if (IS_ERR(nop)) {
620                         err = PTR_ERR(nop);
621                         goto err_rq;
622                 }
623                 err = wait_for_submit(engine, nop, HZ / 2);
624                 i915_request_put(nop);
625                 if (err) {
626                         pr_err("%s: Timed out trying to submit nop\n",
627                                engine->name);
628                         goto err_rq;
629                 }
630
631                 GEM_BUG_ON(i915_request_completed(rq));
632                 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
633
634                 /* Queue: semaphore signal, matching priority as semaphore */
635                 err = release_queue(engine, vma, 1, effective_prio(rq));
636                 if (err)
637                         goto err_rq;
638
639                 intel_engine_flush_submission(engine);
640                 if (!READ_ONCE(engine->execlists.timer.expires) &&
641                     !i915_request_completed(rq)) {
642                         struct drm_printer p =
643                                 drm_info_printer(gt->i915->drm.dev);
644
645                         GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n",
646                                       engine->name);
647                         intel_engine_dump(engine, &p,
648                                           "%s\n", engine->name);
649                         GEM_TRACE_DUMP();
650
651                         memset(vaddr, 0xff, PAGE_SIZE);
652                         err = -EINVAL;
653                 }
654
655                 /* Timeslice every jiffy, so within 2 we should signal */
656                 if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) {
657                         struct drm_printer p =
658                                 drm_info_printer(gt->i915->drm.dev);
659
660                         pr_err("%s: Failed to timeslice into queue\n",
661                                engine->name);
662                         intel_engine_dump(engine, &p,
663                                           "%s\n", engine->name);
664
665                         memset(vaddr, 0xff, PAGE_SIZE);
666                         err = -EIO;
667                 }
668 err_rq:
669                 i915_request_put(rq);
670 err_heartbeat:
671                 engine_heartbeat_enable(engine, saved);
672                 if (err)
673                         break;
674         }
675
676         i915_vma_unpin(vma);
677 err_map:
678         i915_gem_object_unpin_map(obj);
679 err_obj:
680         i915_gem_object_put(obj);
681         return err;
682 }
683
684 static int live_busywait_preempt(void *arg)
685 {
686         struct intel_gt *gt = arg;
687         struct i915_gem_context *ctx_hi, *ctx_lo;
688         struct intel_engine_cs *engine;
689         struct drm_i915_gem_object *obj;
690         struct i915_vma *vma;
691         enum intel_engine_id id;
692         int err = -ENOMEM;
693         u32 *map;
694
695         /*
696          * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
697          * preempt the busywaits used to synchronise between rings.
698          */
699
700         ctx_hi = kernel_context(gt->i915);
701         if (!ctx_hi)
702                 return -ENOMEM;
703         ctx_hi->sched.priority =
704                 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
705
706         ctx_lo = kernel_context(gt->i915);
707         if (!ctx_lo)
708                 goto err_ctx_hi;
709         ctx_lo->sched.priority =
710                 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
711
712         obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
713         if (IS_ERR(obj)) {
714                 err = PTR_ERR(obj);
715                 goto err_ctx_lo;
716         }
717
718         map = i915_gem_object_pin_map(obj, I915_MAP_WC);
719         if (IS_ERR(map)) {
720                 err = PTR_ERR(map);
721                 goto err_obj;
722         }
723
724         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
725         if (IS_ERR(vma)) {
726                 err = PTR_ERR(vma);
727                 goto err_map;
728         }
729
730         err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
731         if (err)
732                 goto err_map;
733
734         for_each_engine(engine, gt, id) {
735                 struct i915_request *lo, *hi;
736                 struct igt_live_test t;
737                 u32 *cs;
738
739                 if (!intel_engine_has_preemption(engine))
740                         continue;
741
742                 if (!intel_engine_can_store_dword(engine))
743                         continue;
744
745                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
746                         err = -EIO;
747                         goto err_vma;
748                 }
749
750                 /*
751                  * We create two requests. The low priority request
752                  * busywaits on a semaphore (inside the ringbuffer where
753                  * is should be preemptible) and the high priority requests
754                  * uses a MI_STORE_DWORD_IMM to update the semaphore value
755                  * allowing the first request to complete. If preemption
756                  * fails, we hang instead.
757                  */
758
759                 lo = igt_request_alloc(ctx_lo, engine);
760                 if (IS_ERR(lo)) {
761                         err = PTR_ERR(lo);
762                         goto err_vma;
763                 }
764
765                 cs = intel_ring_begin(lo, 8);
766                 if (IS_ERR(cs)) {
767                         err = PTR_ERR(cs);
768                         i915_request_add(lo);
769                         goto err_vma;
770                 }
771
772                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
773                 *cs++ = i915_ggtt_offset(vma);
774                 *cs++ = 0;
775                 *cs++ = 1;
776
777                 /* XXX Do we need a flush + invalidate here? */
778
779                 *cs++ = MI_SEMAPHORE_WAIT |
780                         MI_SEMAPHORE_GLOBAL_GTT |
781                         MI_SEMAPHORE_POLL |
782                         MI_SEMAPHORE_SAD_EQ_SDD;
783                 *cs++ = 0;
784                 *cs++ = i915_ggtt_offset(vma);
785                 *cs++ = 0;
786
787                 intel_ring_advance(lo, cs);
788
789                 i915_request_get(lo);
790                 i915_request_add(lo);
791
792                 if (wait_for(READ_ONCE(*map), 10)) {
793                         i915_request_put(lo);
794                         err = -ETIMEDOUT;
795                         goto err_vma;
796                 }
797
798                 /* Low priority request should be busywaiting now */
799                 if (i915_request_wait(lo, 0, 1) != -ETIME) {
800                         i915_request_put(lo);
801                         pr_err("%s: Busywaiting request did not!\n",
802                                engine->name);
803                         err = -EIO;
804                         goto err_vma;
805                 }
806
807                 hi = igt_request_alloc(ctx_hi, engine);
808                 if (IS_ERR(hi)) {
809                         err = PTR_ERR(hi);
810                         i915_request_put(lo);
811                         goto err_vma;
812                 }
813
814                 cs = intel_ring_begin(hi, 4);
815                 if (IS_ERR(cs)) {
816                         err = PTR_ERR(cs);
817                         i915_request_add(hi);
818                         i915_request_put(lo);
819                         goto err_vma;
820                 }
821
822                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
823                 *cs++ = i915_ggtt_offset(vma);
824                 *cs++ = 0;
825                 *cs++ = 0;
826
827                 intel_ring_advance(hi, cs);
828                 i915_request_add(hi);
829
830                 if (i915_request_wait(lo, 0, HZ / 5) < 0) {
831                         struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
832
833                         pr_err("%s: Failed to preempt semaphore busywait!\n",
834                                engine->name);
835
836                         intel_engine_dump(engine, &p, "%s\n", engine->name);
837                         GEM_TRACE_DUMP();
838
839                         i915_request_put(lo);
840                         intel_gt_set_wedged(gt);
841                         err = -EIO;
842                         goto err_vma;
843                 }
844                 GEM_BUG_ON(READ_ONCE(*map));
845                 i915_request_put(lo);
846
847                 if (igt_live_test_end(&t)) {
848                         err = -EIO;
849                         goto err_vma;
850                 }
851         }
852
853         err = 0;
854 err_vma:
855         i915_vma_unpin(vma);
856 err_map:
857         i915_gem_object_unpin_map(obj);
858 err_obj:
859         i915_gem_object_put(obj);
860 err_ctx_lo:
861         kernel_context_close(ctx_lo);
862 err_ctx_hi:
863         kernel_context_close(ctx_hi);
864         return err;
865 }
866
867 static struct i915_request *
868 spinner_create_request(struct igt_spinner *spin,
869                        struct i915_gem_context *ctx,
870                        struct intel_engine_cs *engine,
871                        u32 arb)
872 {
873         struct intel_context *ce;
874         struct i915_request *rq;
875
876         ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
877         if (IS_ERR(ce))
878                 return ERR_CAST(ce);
879
880         rq = igt_spinner_create_request(spin, ce, arb);
881         intel_context_put(ce);
882         return rq;
883 }
884
885 static int live_preempt(void *arg)
886 {
887         struct intel_gt *gt = arg;
888         struct i915_gem_context *ctx_hi, *ctx_lo;
889         struct igt_spinner spin_hi, spin_lo;
890         struct intel_engine_cs *engine;
891         enum intel_engine_id id;
892         int err = -ENOMEM;
893
894         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
895                 return 0;
896
897         if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
898                 pr_err("Logical preemption supported, but not exposed\n");
899
900         if (igt_spinner_init(&spin_hi, gt))
901                 return -ENOMEM;
902
903         if (igt_spinner_init(&spin_lo, gt))
904                 goto err_spin_hi;
905
906         ctx_hi = kernel_context(gt->i915);
907         if (!ctx_hi)
908                 goto err_spin_lo;
909         ctx_hi->sched.priority =
910                 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
911
912         ctx_lo = kernel_context(gt->i915);
913         if (!ctx_lo)
914                 goto err_ctx_hi;
915         ctx_lo->sched.priority =
916                 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
917
918         for_each_engine(engine, gt, id) {
919                 struct igt_live_test t;
920                 struct i915_request *rq;
921
922                 if (!intel_engine_has_preemption(engine))
923                         continue;
924
925                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
926                         err = -EIO;
927                         goto err_ctx_lo;
928                 }
929
930                 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
931                                             MI_ARB_CHECK);
932                 if (IS_ERR(rq)) {
933                         err = PTR_ERR(rq);
934                         goto err_ctx_lo;
935                 }
936
937                 i915_request_add(rq);
938                 if (!igt_wait_for_spinner(&spin_lo, rq)) {
939                         GEM_TRACE("lo spinner failed to start\n");
940                         GEM_TRACE_DUMP();
941                         intel_gt_set_wedged(gt);
942                         err = -EIO;
943                         goto err_ctx_lo;
944                 }
945
946                 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
947                                             MI_ARB_CHECK);
948                 if (IS_ERR(rq)) {
949                         igt_spinner_end(&spin_lo);
950                         err = PTR_ERR(rq);
951                         goto err_ctx_lo;
952                 }
953
954                 i915_request_add(rq);
955                 if (!igt_wait_for_spinner(&spin_hi, rq)) {
956                         GEM_TRACE("hi spinner failed to start\n");
957                         GEM_TRACE_DUMP();
958                         intel_gt_set_wedged(gt);
959                         err = -EIO;
960                         goto err_ctx_lo;
961                 }
962
963                 igt_spinner_end(&spin_hi);
964                 igt_spinner_end(&spin_lo);
965
966                 if (igt_live_test_end(&t)) {
967                         err = -EIO;
968                         goto err_ctx_lo;
969                 }
970         }
971
972         err = 0;
973 err_ctx_lo:
974         kernel_context_close(ctx_lo);
975 err_ctx_hi:
976         kernel_context_close(ctx_hi);
977 err_spin_lo:
978         igt_spinner_fini(&spin_lo);
979 err_spin_hi:
980         igt_spinner_fini(&spin_hi);
981         return err;
982 }
983
984 static int live_late_preempt(void *arg)
985 {
986         struct intel_gt *gt = arg;
987         struct i915_gem_context *ctx_hi, *ctx_lo;
988         struct igt_spinner spin_hi, spin_lo;
989         struct intel_engine_cs *engine;
990         struct i915_sched_attr attr = {};
991         enum intel_engine_id id;
992         int err = -ENOMEM;
993
994         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
995                 return 0;
996
997         if (igt_spinner_init(&spin_hi, gt))
998                 return -ENOMEM;
999
1000         if (igt_spinner_init(&spin_lo, gt))
1001                 goto err_spin_hi;
1002
1003         ctx_hi = kernel_context(gt->i915);
1004         if (!ctx_hi)
1005                 goto err_spin_lo;
1006
1007         ctx_lo = kernel_context(gt->i915);
1008         if (!ctx_lo)
1009                 goto err_ctx_hi;
1010
1011         /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1012         ctx_lo->sched.priority = I915_USER_PRIORITY(1);
1013
1014         for_each_engine(engine, gt, id) {
1015                 struct igt_live_test t;
1016                 struct i915_request *rq;
1017
1018                 if (!intel_engine_has_preemption(engine))
1019                         continue;
1020
1021                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1022                         err = -EIO;
1023                         goto err_ctx_lo;
1024                 }
1025
1026                 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1027                                             MI_ARB_CHECK);
1028                 if (IS_ERR(rq)) {
1029                         err = PTR_ERR(rq);
1030                         goto err_ctx_lo;
1031                 }
1032
1033                 i915_request_add(rq);
1034                 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1035                         pr_err("First context failed to start\n");
1036                         goto err_wedged;
1037                 }
1038
1039                 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1040                                             MI_NOOP);
1041                 if (IS_ERR(rq)) {
1042                         igt_spinner_end(&spin_lo);
1043                         err = PTR_ERR(rq);
1044                         goto err_ctx_lo;
1045                 }
1046
1047                 i915_request_add(rq);
1048                 if (igt_wait_for_spinner(&spin_hi, rq)) {
1049                         pr_err("Second context overtook first?\n");
1050                         goto err_wedged;
1051                 }
1052
1053                 attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1054                 engine->schedule(rq, &attr);
1055
1056                 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1057                         pr_err("High priority context failed to preempt the low priority context\n");
1058                         GEM_TRACE_DUMP();
1059                         goto err_wedged;
1060                 }
1061
1062                 igt_spinner_end(&spin_hi);
1063                 igt_spinner_end(&spin_lo);
1064
1065                 if (igt_live_test_end(&t)) {
1066                         err = -EIO;
1067                         goto err_ctx_lo;
1068                 }
1069         }
1070
1071         err = 0;
1072 err_ctx_lo:
1073         kernel_context_close(ctx_lo);
1074 err_ctx_hi:
1075         kernel_context_close(ctx_hi);
1076 err_spin_lo:
1077         igt_spinner_fini(&spin_lo);
1078 err_spin_hi:
1079         igt_spinner_fini(&spin_hi);
1080         return err;
1081
1082 err_wedged:
1083         igt_spinner_end(&spin_hi);
1084         igt_spinner_end(&spin_lo);
1085         intel_gt_set_wedged(gt);
1086         err = -EIO;
1087         goto err_ctx_lo;
1088 }
1089
1090 struct preempt_client {
1091         struct igt_spinner spin;
1092         struct i915_gem_context *ctx;
1093 };
1094
1095 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1096 {
1097         c->ctx = kernel_context(gt->i915);
1098         if (!c->ctx)
1099                 return -ENOMEM;
1100
1101         if (igt_spinner_init(&c->spin, gt))
1102                 goto err_ctx;
1103
1104         return 0;
1105
1106 err_ctx:
1107         kernel_context_close(c->ctx);
1108         return -ENOMEM;
1109 }
1110
1111 static void preempt_client_fini(struct preempt_client *c)
1112 {
1113         igt_spinner_fini(&c->spin);
1114         kernel_context_close(c->ctx);
1115 }
1116
1117 static int live_nopreempt(void *arg)
1118 {
1119         struct intel_gt *gt = arg;
1120         struct intel_engine_cs *engine;
1121         struct preempt_client a, b;
1122         enum intel_engine_id id;
1123         int err = -ENOMEM;
1124
1125         /*
1126          * Verify that we can disable preemption for an individual request
1127          * that may be being observed and not want to be interrupted.
1128          */
1129
1130         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1131                 return 0;
1132
1133         if (preempt_client_init(gt, &a))
1134                 return -ENOMEM;
1135         if (preempt_client_init(gt, &b))
1136                 goto err_client_a;
1137         b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1138
1139         for_each_engine(engine, gt, id) {
1140                 struct i915_request *rq_a, *rq_b;
1141
1142                 if (!intel_engine_has_preemption(engine))
1143                         continue;
1144
1145                 engine->execlists.preempt_hang.count = 0;
1146
1147                 rq_a = spinner_create_request(&a.spin,
1148                                               a.ctx, engine,
1149                                               MI_ARB_CHECK);
1150                 if (IS_ERR(rq_a)) {
1151                         err = PTR_ERR(rq_a);
1152                         goto err_client_b;
1153                 }
1154
1155                 /* Low priority client, but unpreemptable! */
1156                 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
1157
1158                 i915_request_add(rq_a);
1159                 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1160                         pr_err("First client failed to start\n");
1161                         goto err_wedged;
1162                 }
1163
1164                 rq_b = spinner_create_request(&b.spin,
1165                                               b.ctx, engine,
1166                                               MI_ARB_CHECK);
1167                 if (IS_ERR(rq_b)) {
1168                         err = PTR_ERR(rq_b);
1169                         goto err_client_b;
1170                 }
1171
1172                 i915_request_add(rq_b);
1173
1174                 /* B is much more important than A! (But A is unpreemptable.) */
1175                 GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
1176
1177                 /* Wait long enough for preemption and timeslicing */
1178                 if (igt_wait_for_spinner(&b.spin, rq_b)) {
1179                         pr_err("Second client started too early!\n");
1180                         goto err_wedged;
1181                 }
1182
1183                 igt_spinner_end(&a.spin);
1184
1185                 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
1186                         pr_err("Second client failed to start\n");
1187                         goto err_wedged;
1188                 }
1189
1190                 igt_spinner_end(&b.spin);
1191
1192                 if (engine->execlists.preempt_hang.count) {
1193                         pr_err("Preemption recorded x%d; should have been suppressed!\n",
1194                                engine->execlists.preempt_hang.count);
1195                         err = -EINVAL;
1196                         goto err_wedged;
1197                 }
1198
1199                 if (igt_flush_test(gt->i915))
1200                         goto err_wedged;
1201         }
1202
1203         err = 0;
1204 err_client_b:
1205         preempt_client_fini(&b);
1206 err_client_a:
1207         preempt_client_fini(&a);
1208         return err;
1209
1210 err_wedged:
1211         igt_spinner_end(&b.spin);
1212         igt_spinner_end(&a.spin);
1213         intel_gt_set_wedged(gt);
1214         err = -EIO;
1215         goto err_client_b;
1216 }
1217
1218 struct live_preempt_cancel {
1219         struct intel_engine_cs *engine;
1220         struct preempt_client a, b;
1221 };
1222
1223 static int __cancel_active0(struct live_preempt_cancel *arg)
1224 {
1225         struct i915_request *rq;
1226         struct igt_live_test t;
1227         int err;
1228
1229         /* Preempt cancel of ELSP0 */
1230         GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1231         if (igt_live_test_begin(&t, arg->engine->i915,
1232                                 __func__, arg->engine->name))
1233                 return -EIO;
1234
1235         rq = spinner_create_request(&arg->a.spin,
1236                                     arg->a.ctx, arg->engine,
1237                                     MI_ARB_CHECK);
1238         if (IS_ERR(rq))
1239                 return PTR_ERR(rq);
1240
1241         clear_bit(CONTEXT_BANNED, &rq->context->flags);
1242         i915_request_get(rq);
1243         i915_request_add(rq);
1244         if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
1245                 err = -EIO;
1246                 goto out;
1247         }
1248
1249         intel_context_set_banned(rq->context);
1250         err = intel_engine_pulse(arg->engine);
1251         if (err)
1252                 goto out;
1253
1254         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1255                 err = -EIO;
1256                 goto out;
1257         }
1258
1259         if (rq->fence.error != -EIO) {
1260                 pr_err("Cancelled inflight0 request did not report -EIO\n");
1261                 err = -EINVAL;
1262                 goto out;
1263         }
1264
1265 out:
1266         i915_request_put(rq);
1267         if (igt_live_test_end(&t))
1268                 err = -EIO;
1269         return err;
1270 }
1271
1272 static int __cancel_active1(struct live_preempt_cancel *arg)
1273 {
1274         struct i915_request *rq[2] = {};
1275         struct igt_live_test t;
1276         int err;
1277
1278         /* Preempt cancel of ELSP1 */
1279         GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1280         if (igt_live_test_begin(&t, arg->engine->i915,
1281                                 __func__, arg->engine->name))
1282                 return -EIO;
1283
1284         rq[0] = spinner_create_request(&arg->a.spin,
1285                                        arg->a.ctx, arg->engine,
1286                                        MI_NOOP); /* no preemption */
1287         if (IS_ERR(rq[0]))
1288                 return PTR_ERR(rq[0]);
1289
1290         clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
1291         i915_request_get(rq[0]);
1292         i915_request_add(rq[0]);
1293         if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
1294                 err = -EIO;
1295                 goto out;
1296         }
1297
1298         rq[1] = spinner_create_request(&arg->b.spin,
1299                                        arg->b.ctx, arg->engine,
1300                                        MI_ARB_CHECK);
1301         if (IS_ERR(rq[1])) {
1302                 err = PTR_ERR(rq[1]);
1303                 goto out;
1304         }
1305
1306         clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
1307         i915_request_get(rq[1]);
1308         err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
1309         i915_request_add(rq[1]);
1310         if (err)
1311                 goto out;
1312
1313         intel_context_set_banned(rq[1]->context);
1314         err = intel_engine_pulse(arg->engine);
1315         if (err)
1316                 goto out;
1317
1318         igt_spinner_end(&arg->a.spin);
1319         if (i915_request_wait(rq[1], 0, HZ / 5) < 0) {
1320                 err = -EIO;
1321                 goto out;
1322         }
1323
1324         if (rq[0]->fence.error != 0) {
1325                 pr_err("Normal inflight0 request did not complete\n");
1326                 err = -EINVAL;
1327                 goto out;
1328         }
1329
1330         if (rq[1]->fence.error != -EIO) {
1331                 pr_err("Cancelled inflight1 request did not report -EIO\n");
1332                 err = -EINVAL;
1333                 goto out;
1334         }
1335
1336 out:
1337         i915_request_put(rq[1]);
1338         i915_request_put(rq[0]);
1339         if (igt_live_test_end(&t))
1340                 err = -EIO;
1341         return err;
1342 }
1343
1344 static int __cancel_queued(struct live_preempt_cancel *arg)
1345 {
1346         struct i915_request *rq[3] = {};
1347         struct igt_live_test t;
1348         int err;
1349
1350         /* Full ELSP and one in the wings */
1351         GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1352         if (igt_live_test_begin(&t, arg->engine->i915,
1353                                 __func__, arg->engine->name))
1354                 return -EIO;
1355
1356         rq[0] = spinner_create_request(&arg->a.spin,
1357                                        arg->a.ctx, arg->engine,
1358                                        MI_ARB_CHECK);
1359         if (IS_ERR(rq[0]))
1360                 return PTR_ERR(rq[0]);
1361
1362         clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
1363         i915_request_get(rq[0]);
1364         i915_request_add(rq[0]);
1365         if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
1366                 err = -EIO;
1367                 goto out;
1368         }
1369
1370         rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
1371         if (IS_ERR(rq[1])) {
1372                 err = PTR_ERR(rq[1]);
1373                 goto out;
1374         }
1375
1376         clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
1377         i915_request_get(rq[1]);
1378         err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
1379         i915_request_add(rq[1]);
1380         if (err)
1381                 goto out;
1382
1383         rq[2] = spinner_create_request(&arg->b.spin,
1384                                        arg->a.ctx, arg->engine,
1385                                        MI_ARB_CHECK);
1386         if (IS_ERR(rq[2])) {
1387                 err = PTR_ERR(rq[2]);
1388                 goto out;
1389         }
1390
1391         i915_request_get(rq[2]);
1392         err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
1393         i915_request_add(rq[2]);
1394         if (err)
1395                 goto out;
1396
1397         intel_context_set_banned(rq[2]->context);
1398         err = intel_engine_pulse(arg->engine);
1399         if (err)
1400                 goto out;
1401
1402         if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
1403                 err = -EIO;
1404                 goto out;
1405         }
1406
1407         if (rq[0]->fence.error != -EIO) {
1408                 pr_err("Cancelled inflight0 request did not report -EIO\n");
1409                 err = -EINVAL;
1410                 goto out;
1411         }
1412
1413         if (rq[1]->fence.error != 0) {
1414                 pr_err("Normal inflight1 request did not complete\n");
1415                 err = -EINVAL;
1416                 goto out;
1417         }
1418
1419         if (rq[2]->fence.error != -EIO) {
1420                 pr_err("Cancelled queued request did not report -EIO\n");
1421                 err = -EINVAL;
1422                 goto out;
1423         }
1424
1425 out:
1426         i915_request_put(rq[2]);
1427         i915_request_put(rq[1]);
1428         i915_request_put(rq[0]);
1429         if (igt_live_test_end(&t))
1430                 err = -EIO;
1431         return err;
1432 }
1433
1434 static int __cancel_hostile(struct live_preempt_cancel *arg)
1435 {
1436         struct i915_request *rq;
1437         int err;
1438
1439         /* Preempt cancel non-preemptible spinner in ELSP0 */
1440         if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
1441                 return 0;
1442
1443         GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1444         rq = spinner_create_request(&arg->a.spin,
1445                                     arg->a.ctx, arg->engine,
1446                                     MI_NOOP); /* preemption disabled */
1447         if (IS_ERR(rq))
1448                 return PTR_ERR(rq);
1449
1450         clear_bit(CONTEXT_BANNED, &rq->context->flags);
1451         i915_request_get(rq);
1452         i915_request_add(rq);
1453         if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
1454                 err = -EIO;
1455                 goto out;
1456         }
1457
1458         intel_context_set_banned(rq->context);
1459         err = intel_engine_pulse(arg->engine); /* force reset */
1460         if (err)
1461                 goto out;
1462
1463         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1464                 err = -EIO;
1465                 goto out;
1466         }
1467
1468         if (rq->fence.error != -EIO) {
1469                 pr_err("Cancelled inflight0 request did not report -EIO\n");
1470                 err = -EINVAL;
1471                 goto out;
1472         }
1473
1474 out:
1475         i915_request_put(rq);
1476         if (igt_flush_test(arg->engine->i915))
1477                 err = -EIO;
1478         return err;
1479 }
1480
1481 static int live_preempt_cancel(void *arg)
1482 {
1483         struct intel_gt *gt = arg;
1484         struct live_preempt_cancel data;
1485         enum intel_engine_id id;
1486         int err = -ENOMEM;
1487
1488         /*
1489          * To cancel an inflight context, we need to first remove it from the
1490          * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
1491          */
1492
1493         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1494                 return 0;
1495
1496         if (preempt_client_init(gt, &data.a))
1497                 return -ENOMEM;
1498         if (preempt_client_init(gt, &data.b))
1499                 goto err_client_a;
1500
1501         for_each_engine(data.engine, gt, id) {
1502                 if (!intel_engine_has_preemption(data.engine))
1503                         continue;
1504
1505                 err = __cancel_active0(&data);
1506                 if (err)
1507                         goto err_wedged;
1508
1509                 err = __cancel_active1(&data);
1510                 if (err)
1511                         goto err_wedged;
1512
1513                 err = __cancel_queued(&data);
1514                 if (err)
1515                         goto err_wedged;
1516
1517                 err = __cancel_hostile(&data);
1518                 if (err)
1519                         goto err_wedged;
1520         }
1521
1522         err = 0;
1523 err_client_b:
1524         preempt_client_fini(&data.b);
1525 err_client_a:
1526         preempt_client_fini(&data.a);
1527         return err;
1528
1529 err_wedged:
1530         GEM_TRACE_DUMP();
1531         igt_spinner_end(&data.b.spin);
1532         igt_spinner_end(&data.a.spin);
1533         intel_gt_set_wedged(gt);
1534         goto err_client_b;
1535 }
1536
1537 static int live_suppress_self_preempt(void *arg)
1538 {
1539         struct intel_gt *gt = arg;
1540         struct intel_engine_cs *engine;
1541         struct i915_sched_attr attr = {
1542                 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
1543         };
1544         struct preempt_client a, b;
1545         enum intel_engine_id id;
1546         int err = -ENOMEM;
1547
1548         /*
1549          * Verify that if a preemption request does not cause a change in
1550          * the current execution order, the preempt-to-idle injection is
1551          * skipped and that we do not accidentally apply it after the CS
1552          * completion event.
1553          */
1554
1555         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1556                 return 0;
1557
1558         if (USES_GUC_SUBMISSION(gt->i915))
1559                 return 0; /* presume black blox */
1560
1561         if (intel_vgpu_active(gt->i915))
1562                 return 0; /* GVT forces single port & request submission */
1563
1564         if (preempt_client_init(gt, &a))
1565                 return -ENOMEM;
1566         if (preempt_client_init(gt, &b))
1567                 goto err_client_a;
1568
1569         for_each_engine(engine, gt, id) {
1570                 struct i915_request *rq_a, *rq_b;
1571                 int depth;
1572
1573                 if (!intel_engine_has_preemption(engine))
1574                         continue;
1575
1576                 if (igt_flush_test(gt->i915))
1577                         goto err_wedged;
1578
1579                 intel_engine_pm_get(engine);
1580                 engine->execlists.preempt_hang.count = 0;
1581
1582                 rq_a = spinner_create_request(&a.spin,
1583                                               a.ctx, engine,
1584                                               MI_NOOP);
1585                 if (IS_ERR(rq_a)) {
1586                         err = PTR_ERR(rq_a);
1587                         intel_engine_pm_put(engine);
1588                         goto err_client_b;
1589                 }
1590
1591                 i915_request_add(rq_a);
1592                 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1593                         pr_err("First client failed to start\n");
1594                         intel_engine_pm_put(engine);
1595                         goto err_wedged;
1596                 }
1597
1598                 /* Keep postponing the timer to avoid premature slicing */
1599                 mod_timer(&engine->execlists.timer, jiffies + HZ);
1600                 for (depth = 0; depth < 8; depth++) {
1601                         rq_b = spinner_create_request(&b.spin,
1602                                                       b.ctx, engine,
1603                                                       MI_NOOP);
1604                         if (IS_ERR(rq_b)) {
1605                                 err = PTR_ERR(rq_b);
1606                                 intel_engine_pm_put(engine);
1607                                 goto err_client_b;
1608                         }
1609                         i915_request_add(rq_b);
1610
1611                         GEM_BUG_ON(i915_request_completed(rq_a));
1612                         engine->schedule(rq_a, &attr);
1613                         igt_spinner_end(&a.spin);
1614
1615                         if (!igt_wait_for_spinner(&b.spin, rq_b)) {
1616                                 pr_err("Second client failed to start\n");
1617                                 intel_engine_pm_put(engine);
1618                                 goto err_wedged;
1619                         }
1620
1621                         swap(a, b);
1622                         rq_a = rq_b;
1623                 }
1624                 igt_spinner_end(&a.spin);
1625
1626                 if (engine->execlists.preempt_hang.count) {
1627                         pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
1628                                engine->name,
1629                                engine->execlists.preempt_hang.count,
1630                                depth);
1631                         intel_engine_pm_put(engine);
1632                         err = -EINVAL;
1633                         goto err_client_b;
1634                 }
1635
1636                 intel_engine_pm_put(engine);
1637                 if (igt_flush_test(gt->i915))
1638                         goto err_wedged;
1639         }
1640
1641         err = 0;
1642 err_client_b:
1643         preempt_client_fini(&b);
1644 err_client_a:
1645         preempt_client_fini(&a);
1646         return err;
1647
1648 err_wedged:
1649         igt_spinner_end(&b.spin);
1650         igt_spinner_end(&a.spin);
1651         intel_gt_set_wedged(gt);
1652         err = -EIO;
1653         goto err_client_b;
1654 }
1655
1656 static int __i915_sw_fence_call
1657 dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
1658 {
1659         return NOTIFY_DONE;
1660 }
1661
1662 static struct i915_request *dummy_request(struct intel_engine_cs *engine)
1663 {
1664         struct i915_request *rq;
1665
1666         rq = kzalloc(sizeof(*rq), GFP_KERNEL);
1667         if (!rq)
1668                 return NULL;
1669
1670         rq->engine = engine;
1671
1672         spin_lock_init(&rq->lock);
1673         INIT_LIST_HEAD(&rq->fence.cb_list);
1674         rq->fence.lock = &rq->lock;
1675         rq->fence.ops = &i915_fence_ops;
1676
1677         i915_sched_node_init(&rq->sched);
1678
1679         /* mark this request as permanently incomplete */
1680         rq->fence.seqno = 1;
1681         BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */
1682         rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1;
1683         GEM_BUG_ON(i915_request_completed(rq));
1684
1685         i915_sw_fence_init(&rq->submit, dummy_notify);
1686         set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
1687
1688         spin_lock_init(&rq->lock);
1689         rq->fence.lock = &rq->lock;
1690         INIT_LIST_HEAD(&rq->fence.cb_list);
1691
1692         return rq;
1693 }
1694
1695 static void dummy_request_free(struct i915_request *dummy)
1696 {
1697         /* We have to fake the CS interrupt to kick the next request */
1698         i915_sw_fence_commit(&dummy->submit);
1699
1700         i915_request_mark_complete(dummy);
1701         dma_fence_signal(&dummy->fence);
1702
1703         i915_sched_node_fini(&dummy->sched);
1704         i915_sw_fence_fini(&dummy->submit);
1705
1706         dma_fence_free(&dummy->fence);
1707 }
1708
1709 static int live_suppress_wait_preempt(void *arg)
1710 {
1711         struct intel_gt *gt = arg;
1712         struct preempt_client client[4];
1713         struct i915_request *rq[ARRAY_SIZE(client)] = {};
1714         struct intel_engine_cs *engine;
1715         enum intel_engine_id id;
1716         int err = -ENOMEM;
1717         int i;
1718
1719         /*
1720          * Waiters are given a little priority nudge, but not enough
1721          * to actually cause any preemption. Double check that we do
1722          * not needlessly generate preempt-to-idle cycles.
1723          */
1724
1725         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1726                 return 0;
1727
1728         if (preempt_client_init(gt, &client[0])) /* ELSP[0] */
1729                 return -ENOMEM;
1730         if (preempt_client_init(gt, &client[1])) /* ELSP[1] */
1731                 goto err_client_0;
1732         if (preempt_client_init(gt, &client[2])) /* head of queue */
1733                 goto err_client_1;
1734         if (preempt_client_init(gt, &client[3])) /* bystander */
1735                 goto err_client_2;
1736
1737         for_each_engine(engine, gt, id) {
1738                 int depth;
1739
1740                 if (!intel_engine_has_preemption(engine))
1741                         continue;
1742
1743                 if (!engine->emit_init_breadcrumb)
1744                         continue;
1745
1746                 for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
1747                         struct i915_request *dummy;
1748
1749                         engine->execlists.preempt_hang.count = 0;
1750
1751                         dummy = dummy_request(engine);
1752                         if (!dummy)
1753                                 goto err_client_3;
1754
1755                         for (i = 0; i < ARRAY_SIZE(client); i++) {
1756                                 struct i915_request *this;
1757
1758                                 this = spinner_create_request(&client[i].spin,
1759                                                               client[i].ctx, engine,
1760                                                               MI_NOOP);
1761                                 if (IS_ERR(this)) {
1762                                         err = PTR_ERR(this);
1763                                         goto err_wedged;
1764                                 }
1765
1766                                 /* Disable NEWCLIENT promotion */
1767                                 __i915_active_fence_set(&i915_request_timeline(this)->last_request,
1768                                                         &dummy->fence);
1769
1770                                 rq[i] = i915_request_get(this);
1771                                 i915_request_add(this);
1772                         }
1773
1774                         dummy_request_free(dummy);
1775
1776                         GEM_BUG_ON(i915_request_completed(rq[0]));
1777                         if (!igt_wait_for_spinner(&client[0].spin, rq[0])) {
1778                                 pr_err("%s: First client failed to start\n",
1779                                        engine->name);
1780                                 goto err_wedged;
1781                         }
1782                         GEM_BUG_ON(!i915_request_started(rq[0]));
1783
1784                         if (i915_request_wait(rq[depth],
1785                                               I915_WAIT_PRIORITY,
1786                                               1) != -ETIME) {
1787                                 pr_err("%s: Waiter depth:%d completed!\n",
1788                                        engine->name, depth);
1789                                 goto err_wedged;
1790                         }
1791
1792                         for (i = 0; i < ARRAY_SIZE(client); i++) {
1793                                 igt_spinner_end(&client[i].spin);
1794                                 i915_request_put(rq[i]);
1795                                 rq[i] = NULL;
1796                         }
1797
1798                         if (igt_flush_test(gt->i915))
1799                                 goto err_wedged;
1800
1801                         if (engine->execlists.preempt_hang.count) {
1802                                 pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n",
1803                                        engine->name,
1804                                        engine->execlists.preempt_hang.count,
1805                                        depth);
1806                                 err = -EINVAL;
1807                                 goto err_client_3;
1808                         }
1809                 }
1810         }
1811
1812         err = 0;
1813 err_client_3:
1814         preempt_client_fini(&client[3]);
1815 err_client_2:
1816         preempt_client_fini(&client[2]);
1817 err_client_1:
1818         preempt_client_fini(&client[1]);
1819 err_client_0:
1820         preempt_client_fini(&client[0]);
1821         return err;
1822
1823 err_wedged:
1824         for (i = 0; i < ARRAY_SIZE(client); i++) {
1825                 igt_spinner_end(&client[i].spin);
1826                 i915_request_put(rq[i]);
1827         }
1828         intel_gt_set_wedged(gt);
1829         err = -EIO;
1830         goto err_client_3;
1831 }
1832
1833 static int live_chain_preempt(void *arg)
1834 {
1835         struct intel_gt *gt = arg;
1836         struct intel_engine_cs *engine;
1837         struct preempt_client hi, lo;
1838         enum intel_engine_id id;
1839         int err = -ENOMEM;
1840
1841         /*
1842          * Build a chain AB...BA between two contexts (A, B) and request
1843          * preemption of the last request. It should then complete before
1844          * the previously submitted spinner in B.
1845          */
1846
1847         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1848                 return 0;
1849
1850         if (preempt_client_init(gt, &hi))
1851                 return -ENOMEM;
1852
1853         if (preempt_client_init(gt, &lo))
1854                 goto err_client_hi;
1855
1856         for_each_engine(engine, gt, id) {
1857                 struct i915_sched_attr attr = {
1858                         .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
1859                 };
1860                 struct igt_live_test t;
1861                 struct i915_request *rq;
1862                 int ring_size, count, i;
1863
1864                 if (!intel_engine_has_preemption(engine))
1865                         continue;
1866
1867                 rq = spinner_create_request(&lo.spin,
1868                                             lo.ctx, engine,
1869                                             MI_ARB_CHECK);
1870                 if (IS_ERR(rq))
1871                         goto err_wedged;
1872
1873                 i915_request_get(rq);
1874                 i915_request_add(rq);
1875
1876                 ring_size = rq->wa_tail - rq->head;
1877                 if (ring_size < 0)
1878                         ring_size += rq->ring->size;
1879                 ring_size = rq->ring->size / ring_size;
1880                 pr_debug("%s(%s): Using maximum of %d requests\n",
1881                          __func__, engine->name, ring_size);
1882
1883                 igt_spinner_end(&lo.spin);
1884                 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
1885                         pr_err("Timed out waiting to flush %s\n", engine->name);
1886                         i915_request_put(rq);
1887                         goto err_wedged;
1888                 }
1889                 i915_request_put(rq);
1890
1891                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1892                         err = -EIO;
1893                         goto err_wedged;
1894                 }
1895
1896                 for_each_prime_number_from(count, 1, ring_size) {
1897                         rq = spinner_create_request(&hi.spin,
1898                                                     hi.ctx, engine,
1899                                                     MI_ARB_CHECK);
1900                         if (IS_ERR(rq))
1901                                 goto err_wedged;
1902                         i915_request_add(rq);
1903                         if (!igt_wait_for_spinner(&hi.spin, rq))
1904                                 goto err_wedged;
1905
1906                         rq = spinner_create_request(&lo.spin,
1907                                                     lo.ctx, engine,
1908                                                     MI_ARB_CHECK);
1909                         if (IS_ERR(rq))
1910                                 goto err_wedged;
1911                         i915_request_add(rq);
1912
1913                         for (i = 0; i < count; i++) {
1914                                 rq = igt_request_alloc(lo.ctx, engine);
1915                                 if (IS_ERR(rq))
1916                                         goto err_wedged;
1917                                 i915_request_add(rq);
1918                         }
1919
1920                         rq = igt_request_alloc(hi.ctx, engine);
1921                         if (IS_ERR(rq))
1922                                 goto err_wedged;
1923
1924                         i915_request_get(rq);
1925                         i915_request_add(rq);
1926                         engine->schedule(rq, &attr);
1927
1928                         igt_spinner_end(&hi.spin);
1929                         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1930                                 struct drm_printer p =
1931                                         drm_info_printer(gt->i915->drm.dev);
1932
1933                                 pr_err("Failed to preempt over chain of %d\n",
1934                                        count);
1935                                 intel_engine_dump(engine, &p,
1936                                                   "%s\n", engine->name);
1937                                 i915_request_put(rq);
1938                                 goto err_wedged;
1939                         }
1940                         igt_spinner_end(&lo.spin);
1941                         i915_request_put(rq);
1942
1943                         rq = igt_request_alloc(lo.ctx, engine);
1944                         if (IS_ERR(rq))
1945                                 goto err_wedged;
1946
1947                         i915_request_get(rq);
1948                         i915_request_add(rq);
1949
1950                         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1951                                 struct drm_printer p =
1952                                         drm_info_printer(gt->i915->drm.dev);
1953
1954                                 pr_err("Failed to flush low priority chain of %d requests\n",
1955                                        count);
1956                                 intel_engine_dump(engine, &p,
1957                                                   "%s\n", engine->name);
1958
1959                                 i915_request_put(rq);
1960                                 goto err_wedged;
1961                         }
1962                         i915_request_put(rq);
1963                 }
1964
1965                 if (igt_live_test_end(&t)) {
1966                         err = -EIO;
1967                         goto err_wedged;
1968                 }
1969         }
1970
1971         err = 0;
1972 err_client_lo:
1973         preempt_client_fini(&lo);
1974 err_client_hi:
1975         preempt_client_fini(&hi);
1976         return err;
1977
1978 err_wedged:
1979         igt_spinner_end(&hi.spin);
1980         igt_spinner_end(&lo.spin);
1981         intel_gt_set_wedged(gt);
1982         err = -EIO;
1983         goto err_client_lo;
1984 }
1985
1986 static int create_gang(struct intel_engine_cs *engine,
1987                        struct i915_request **prev)
1988 {
1989         struct drm_i915_gem_object *obj;
1990         struct intel_context *ce;
1991         struct i915_request *rq;
1992         struct i915_vma *vma;
1993         u32 *cs;
1994         int err;
1995
1996         ce = intel_context_create(engine);
1997         if (IS_ERR(ce))
1998                 return PTR_ERR(ce);
1999
2000         obj = i915_gem_object_create_internal(engine->i915, 4096);
2001         if (IS_ERR(obj)) {
2002                 err = PTR_ERR(obj);
2003                 goto err_ce;
2004         }
2005
2006         vma = i915_vma_instance(obj, ce->vm, NULL);
2007         if (IS_ERR(vma)) {
2008                 err = PTR_ERR(vma);
2009                 goto err_obj;
2010         }
2011
2012         err = i915_vma_pin(vma, 0, 0, PIN_USER);
2013         if (err)
2014                 goto err_obj;
2015
2016         cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
2017         if (IS_ERR(cs))
2018                 goto err_obj;
2019
2020         /* Semaphore target: spin until zero */
2021         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2022
2023         *cs++ = MI_SEMAPHORE_WAIT |
2024                 MI_SEMAPHORE_POLL |
2025                 MI_SEMAPHORE_SAD_EQ_SDD;
2026         *cs++ = 0;
2027         *cs++ = lower_32_bits(vma->node.start);
2028         *cs++ = upper_32_bits(vma->node.start);
2029
2030         if (*prev) {
2031                 u64 offset = (*prev)->batch->node.start;
2032
2033                 /* Terminate the spinner in the next lower priority batch. */
2034                 *cs++ = MI_STORE_DWORD_IMM_GEN4;
2035                 *cs++ = lower_32_bits(offset);
2036                 *cs++ = upper_32_bits(offset);
2037                 *cs++ = 0;
2038         }
2039
2040         *cs++ = MI_BATCH_BUFFER_END;
2041         i915_gem_object_flush_map(obj);
2042         i915_gem_object_unpin_map(obj);
2043
2044         rq = intel_context_create_request(ce);
2045         if (IS_ERR(rq))
2046                 goto err_obj;
2047
2048         rq->batch = vma;
2049         i915_request_get(rq);
2050
2051         i915_vma_lock(vma);
2052         err = i915_request_await_object(rq, vma->obj, false);
2053         if (!err)
2054                 err = i915_vma_move_to_active(vma, rq, 0);
2055         if (!err)
2056                 err = rq->engine->emit_bb_start(rq,
2057                                                 vma->node.start,
2058                                                 PAGE_SIZE, 0);
2059         i915_vma_unlock(vma);
2060         i915_request_add(rq);
2061         if (err)
2062                 goto err_rq;
2063
2064         i915_gem_object_put(obj);
2065         intel_context_put(ce);
2066
2067         rq->client_link.next = &(*prev)->client_link;
2068         *prev = rq;
2069         return 0;
2070
2071 err_rq:
2072         i915_request_put(rq);
2073 err_obj:
2074         i915_gem_object_put(obj);
2075 err_ce:
2076         intel_context_put(ce);
2077         return err;
2078 }
2079
2080 static int live_preempt_gang(void *arg)
2081 {
2082         struct intel_gt *gt = arg;
2083         struct intel_engine_cs *engine;
2084         enum intel_engine_id id;
2085
2086         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2087                 return 0;
2088
2089         /*
2090          * Build as long a chain of preempters as we can, with each
2091          * request higher priority than the last. Once we are ready, we release
2092          * the last batch which then precolates down the chain, each releasing
2093          * the next oldest in turn. The intent is to simply push as hard as we
2094          * can with the number of preemptions, trying to exceed narrow HW
2095          * limits. At a minimum, we insist that we can sort all the user
2096          * high priority levels into execution order.
2097          */
2098
2099         for_each_engine(engine, gt, id) {
2100                 struct i915_request *rq = NULL;
2101                 struct igt_live_test t;
2102                 IGT_TIMEOUT(end_time);
2103                 int prio = 0;
2104                 int err = 0;
2105                 u32 *cs;
2106
2107                 if (!intel_engine_has_preemption(engine))
2108                         continue;
2109
2110                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2111                         return -EIO;
2112
2113                 do {
2114                         struct i915_sched_attr attr = {
2115                                 .priority = I915_USER_PRIORITY(prio++),
2116                         };
2117
2118                         err = create_gang(engine, &rq);
2119                         if (err)
2120                                 break;
2121
2122                         /* Submit each spinner at increasing priority */
2123                         engine->schedule(rq, &attr);
2124
2125                         if (prio <= I915_PRIORITY_MAX)
2126                                 continue;
2127
2128                         if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT))
2129                                 break;
2130
2131                         if (__igt_timeout(end_time, NULL))
2132                                 break;
2133                 } while (1);
2134                 pr_debug("%s: Preempt chain of %d requests\n",
2135                          engine->name, prio);
2136
2137                 /*
2138                  * Such that the last spinner is the highest priority and
2139                  * should execute first. When that spinner completes,
2140                  * it will terminate the next lowest spinner until there
2141                  * are no more spinners and the gang is complete.
2142                  */
2143                 cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC);
2144                 if (!IS_ERR(cs)) {
2145                         *cs = 0;
2146                         i915_gem_object_unpin_map(rq->batch->obj);
2147                 } else {
2148                         err = PTR_ERR(cs);
2149                         intel_gt_set_wedged(gt);
2150                 }
2151
2152                 while (rq) { /* wait for each rq from highest to lowest prio */
2153                         struct i915_request *n =
2154                                 list_next_entry(rq, client_link);
2155
2156                         if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
2157                                 struct drm_printer p =
2158                                         drm_info_printer(engine->i915->drm.dev);
2159
2160                                 pr_err("Failed to flush chain of %d requests, at %d\n",
2161                                        prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT);
2162                                 intel_engine_dump(engine, &p,
2163                                                   "%s\n", engine->name);
2164
2165                                 err = -ETIME;
2166                         }
2167
2168                         i915_request_put(rq);
2169                         rq = n;
2170                 }
2171
2172                 if (igt_live_test_end(&t))
2173                         err = -EIO;
2174                 if (err)
2175                         return err;
2176         }
2177
2178         return 0;
2179 }
2180
2181 static int live_preempt_hang(void *arg)
2182 {
2183         struct intel_gt *gt = arg;
2184         struct i915_gem_context *ctx_hi, *ctx_lo;
2185         struct igt_spinner spin_hi, spin_lo;
2186         struct intel_engine_cs *engine;
2187         enum intel_engine_id id;
2188         int err = -ENOMEM;
2189
2190         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2191                 return 0;
2192
2193         if (!intel_has_reset_engine(gt))
2194                 return 0;
2195
2196         if (igt_spinner_init(&spin_hi, gt))
2197                 return -ENOMEM;
2198
2199         if (igt_spinner_init(&spin_lo, gt))
2200                 goto err_spin_hi;
2201
2202         ctx_hi = kernel_context(gt->i915);
2203         if (!ctx_hi)
2204                 goto err_spin_lo;
2205         ctx_hi->sched.priority =
2206                 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
2207
2208         ctx_lo = kernel_context(gt->i915);
2209         if (!ctx_lo)
2210                 goto err_ctx_hi;
2211         ctx_lo->sched.priority =
2212                 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
2213
2214         for_each_engine(engine, gt, id) {
2215                 struct i915_request *rq;
2216
2217                 if (!intel_engine_has_preemption(engine))
2218                         continue;
2219
2220                 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
2221                                             MI_ARB_CHECK);
2222                 if (IS_ERR(rq)) {
2223                         err = PTR_ERR(rq);
2224                         goto err_ctx_lo;
2225                 }
2226
2227                 i915_request_add(rq);
2228                 if (!igt_wait_for_spinner(&spin_lo, rq)) {
2229                         GEM_TRACE("lo spinner failed to start\n");
2230                         GEM_TRACE_DUMP();
2231                         intel_gt_set_wedged(gt);
2232                         err = -EIO;
2233                         goto err_ctx_lo;
2234                 }
2235
2236                 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
2237                                             MI_ARB_CHECK);
2238                 if (IS_ERR(rq)) {
2239                         igt_spinner_end(&spin_lo);
2240                         err = PTR_ERR(rq);
2241                         goto err_ctx_lo;
2242                 }
2243
2244                 init_completion(&engine->execlists.preempt_hang.completion);
2245                 engine->execlists.preempt_hang.inject_hang = true;
2246
2247                 i915_request_add(rq);
2248
2249                 if (!wait_for_completion_timeout(&engine->execlists.preempt_hang.completion,
2250                                                  HZ / 10)) {
2251                         pr_err("Preemption did not occur within timeout!");
2252                         GEM_TRACE_DUMP();
2253                         intel_gt_set_wedged(gt);
2254                         err = -EIO;
2255                         goto err_ctx_lo;
2256                 }
2257
2258                 set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
2259                 intel_engine_reset(engine, NULL);
2260                 clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
2261
2262                 engine->execlists.preempt_hang.inject_hang = false;
2263
2264                 if (!igt_wait_for_spinner(&spin_hi, rq)) {
2265                         GEM_TRACE("hi spinner failed to start\n");
2266                         GEM_TRACE_DUMP();
2267                         intel_gt_set_wedged(gt);
2268                         err = -EIO;
2269                         goto err_ctx_lo;
2270                 }
2271
2272                 igt_spinner_end(&spin_hi);
2273                 igt_spinner_end(&spin_lo);
2274                 if (igt_flush_test(gt->i915)) {
2275                         err = -EIO;
2276                         goto err_ctx_lo;
2277                 }
2278         }
2279
2280         err = 0;
2281 err_ctx_lo:
2282         kernel_context_close(ctx_lo);
2283 err_ctx_hi:
2284         kernel_context_close(ctx_hi);
2285 err_spin_lo:
2286         igt_spinner_fini(&spin_lo);
2287 err_spin_hi:
2288         igt_spinner_fini(&spin_hi);
2289         return err;
2290 }
2291
2292 static int live_preempt_timeout(void *arg)
2293 {
2294         struct intel_gt *gt = arg;
2295         struct i915_gem_context *ctx_hi, *ctx_lo;
2296         struct igt_spinner spin_lo;
2297         struct intel_engine_cs *engine;
2298         enum intel_engine_id id;
2299         int err = -ENOMEM;
2300
2301         /*
2302          * Check that we force preemption to occur by cancelling the previous
2303          * context if it refuses to yield the GPU.
2304          */
2305         if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2306                 return 0;
2307
2308         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2309                 return 0;
2310
2311         if (!intel_has_reset_engine(gt))
2312                 return 0;
2313
2314         if (igt_spinner_init(&spin_lo, gt))
2315                 return -ENOMEM;
2316
2317         ctx_hi = kernel_context(gt->i915);
2318         if (!ctx_hi)
2319                 goto err_spin_lo;
2320         ctx_hi->sched.priority =
2321                 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
2322
2323         ctx_lo = kernel_context(gt->i915);
2324         if (!ctx_lo)
2325                 goto err_ctx_hi;
2326         ctx_lo->sched.priority =
2327                 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
2328
2329         for_each_engine(engine, gt, id) {
2330                 unsigned long saved_timeout;
2331                 struct i915_request *rq;
2332
2333                 if (!intel_engine_has_preemption(engine))
2334                         continue;
2335
2336                 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
2337                                             MI_NOOP); /* preemption disabled */
2338                 if (IS_ERR(rq)) {
2339                         err = PTR_ERR(rq);
2340                         goto err_ctx_lo;
2341                 }
2342
2343                 i915_request_add(rq);
2344                 if (!igt_wait_for_spinner(&spin_lo, rq)) {
2345                         intel_gt_set_wedged(gt);
2346                         err = -EIO;
2347                         goto err_ctx_lo;
2348                 }
2349
2350                 rq = igt_request_alloc(ctx_hi, engine);
2351                 if (IS_ERR(rq)) {
2352                         igt_spinner_end(&spin_lo);
2353                         err = PTR_ERR(rq);
2354                         goto err_ctx_lo;
2355                 }
2356
2357                 /* Flush the previous CS ack before changing timeouts */
2358                 while (READ_ONCE(engine->execlists.pending[0]))
2359                         cpu_relax();
2360
2361                 saved_timeout = engine->props.preempt_timeout_ms;
2362                 engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
2363
2364                 i915_request_get(rq);
2365                 i915_request_add(rq);
2366
2367                 intel_engine_flush_submission(engine);
2368                 engine->props.preempt_timeout_ms = saved_timeout;
2369
2370                 if (i915_request_wait(rq, 0, HZ / 10) < 0) {
2371                         intel_gt_set_wedged(gt);
2372                         i915_request_put(rq);
2373                         err = -ETIME;
2374                         goto err_ctx_lo;
2375                 }
2376
2377                 igt_spinner_end(&spin_lo);
2378                 i915_request_put(rq);
2379         }
2380
2381         err = 0;
2382 err_ctx_lo:
2383         kernel_context_close(ctx_lo);
2384 err_ctx_hi:
2385         kernel_context_close(ctx_hi);
2386 err_spin_lo:
2387         igt_spinner_fini(&spin_lo);
2388         return err;
2389 }
2390
2391 static int random_range(struct rnd_state *rnd, int min, int max)
2392 {
2393         return i915_prandom_u32_max_state(max - min, rnd) + min;
2394 }
2395
2396 static int random_priority(struct rnd_state *rnd)
2397 {
2398         return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
2399 }
2400
2401 struct preempt_smoke {
2402         struct intel_gt *gt;
2403         struct i915_gem_context **contexts;
2404         struct intel_engine_cs *engine;
2405         struct drm_i915_gem_object *batch;
2406         unsigned int ncontext;
2407         struct rnd_state prng;
2408         unsigned long count;
2409 };
2410
2411 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
2412 {
2413         return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
2414                                                           &smoke->prng)];
2415 }
2416
2417 static int smoke_submit(struct preempt_smoke *smoke,
2418                         struct i915_gem_context *ctx, int prio,
2419                         struct drm_i915_gem_object *batch)
2420 {
2421         struct i915_request *rq;
2422         struct i915_vma *vma = NULL;
2423         int err = 0;
2424
2425         if (batch) {
2426                 struct i915_address_space *vm;
2427
2428                 vm = i915_gem_context_get_vm_rcu(ctx);
2429                 vma = i915_vma_instance(batch, vm, NULL);
2430                 i915_vm_put(vm);
2431                 if (IS_ERR(vma))
2432                         return PTR_ERR(vma);
2433
2434                 err = i915_vma_pin(vma, 0, 0, PIN_USER);
2435                 if (err)
2436                         return err;
2437         }
2438
2439         ctx->sched.priority = prio;
2440
2441         rq = igt_request_alloc(ctx, smoke->engine);
2442         if (IS_ERR(rq)) {
2443                 err = PTR_ERR(rq);
2444                 goto unpin;
2445         }
2446
2447         if (vma) {
2448                 i915_vma_lock(vma);
2449                 err = i915_request_await_object(rq, vma->obj, false);
2450                 if (!err)
2451                         err = i915_vma_move_to_active(vma, rq, 0);
2452                 if (!err)
2453                         err = rq->engine->emit_bb_start(rq,
2454                                                         vma->node.start,
2455                                                         PAGE_SIZE, 0);
2456                 i915_vma_unlock(vma);
2457         }
2458
2459         i915_request_add(rq);
2460
2461 unpin:
2462         if (vma)
2463                 i915_vma_unpin(vma);
2464
2465         return err;
2466 }
2467
2468 static int smoke_crescendo_thread(void *arg)
2469 {
2470         struct preempt_smoke *smoke = arg;
2471         IGT_TIMEOUT(end_time);
2472         unsigned long count;
2473
2474         count = 0;
2475         do {
2476                 struct i915_gem_context *ctx = smoke_context(smoke);
2477                 int err;
2478
2479                 err = smoke_submit(smoke,
2480                                    ctx, count % I915_PRIORITY_MAX,
2481                                    smoke->batch);
2482                 if (err)
2483                         return err;
2484
2485                 count++;
2486         } while (!__igt_timeout(end_time, NULL));
2487
2488         smoke->count = count;
2489         return 0;
2490 }
2491
2492 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
2493 #define BATCH BIT(0)
2494 {
2495         struct task_struct *tsk[I915_NUM_ENGINES] = {};
2496         struct preempt_smoke arg[I915_NUM_ENGINES];
2497         struct intel_engine_cs *engine;
2498         enum intel_engine_id id;
2499         unsigned long count;
2500         int err = 0;
2501
2502         for_each_engine(engine, smoke->gt, id) {
2503                 arg[id] = *smoke;
2504                 arg[id].engine = engine;
2505                 if (!(flags & BATCH))
2506                         arg[id].batch = NULL;
2507                 arg[id].count = 0;
2508
2509                 tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
2510                                       "igt/smoke:%d", id);
2511                 if (IS_ERR(tsk[id])) {
2512                         err = PTR_ERR(tsk[id]);
2513                         break;
2514                 }
2515                 get_task_struct(tsk[id]);
2516         }
2517
2518         yield(); /* start all threads before we kthread_stop() */
2519
2520         count = 0;
2521         for_each_engine(engine, smoke->gt, id) {
2522                 int status;
2523
2524                 if (IS_ERR_OR_NULL(tsk[id]))
2525                         continue;
2526
2527                 status = kthread_stop(tsk[id]);
2528                 if (status && !err)
2529                         err = status;
2530
2531                 count += arg[id].count;
2532
2533                 put_task_struct(tsk[id]);
2534         }
2535
2536         pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
2537                 count, flags,
2538                 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
2539         return 0;
2540 }
2541
2542 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
2543 {
2544         enum intel_engine_id id;
2545         IGT_TIMEOUT(end_time);
2546         unsigned long count;
2547
2548         count = 0;
2549         do {
2550                 for_each_engine(smoke->engine, smoke->gt, id) {
2551                         struct i915_gem_context *ctx = smoke_context(smoke);
2552                         int err;
2553
2554                         err = smoke_submit(smoke,
2555                                            ctx, random_priority(&smoke->prng),
2556                                            flags & BATCH ? smoke->batch : NULL);
2557                         if (err)
2558                                 return err;
2559
2560                         count++;
2561                 }
2562         } while (!__igt_timeout(end_time, NULL));
2563
2564         pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
2565                 count, flags,
2566                 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
2567         return 0;
2568 }
2569
2570 static int live_preempt_smoke(void *arg)
2571 {
2572         struct preempt_smoke smoke = {
2573                 .gt = arg,
2574                 .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
2575                 .ncontext = 1024,
2576         };
2577         const unsigned int phase[] = { 0, BATCH };
2578         struct igt_live_test t;
2579         int err = -ENOMEM;
2580         u32 *cs;
2581         int n;
2582
2583         if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915))
2584                 return 0;
2585
2586         smoke.contexts = kmalloc_array(smoke.ncontext,
2587                                        sizeof(*smoke.contexts),
2588                                        GFP_KERNEL);
2589         if (!smoke.contexts)
2590                 return -ENOMEM;
2591
2592         smoke.batch =
2593                 i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
2594         if (IS_ERR(smoke.batch)) {
2595                 err = PTR_ERR(smoke.batch);
2596                 goto err_free;
2597         }
2598
2599         cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB);
2600         if (IS_ERR(cs)) {
2601                 err = PTR_ERR(cs);
2602                 goto err_batch;
2603         }
2604         for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
2605                 cs[n] = MI_ARB_CHECK;
2606         cs[n] = MI_BATCH_BUFFER_END;
2607         i915_gem_object_flush_map(smoke.batch);
2608         i915_gem_object_unpin_map(smoke.batch);
2609
2610         if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
2611                 err = -EIO;
2612                 goto err_batch;
2613         }
2614
2615         for (n = 0; n < smoke.ncontext; n++) {
2616                 smoke.contexts[n] = kernel_context(smoke.gt->i915);
2617                 if (!smoke.contexts[n])
2618                         goto err_ctx;
2619         }
2620
2621         for (n = 0; n < ARRAY_SIZE(phase); n++) {
2622                 err = smoke_crescendo(&smoke, phase[n]);
2623                 if (err)
2624                         goto err_ctx;
2625
2626                 err = smoke_random(&smoke, phase[n]);
2627                 if (err)
2628                         goto err_ctx;
2629         }
2630
2631 err_ctx:
2632         if (igt_live_test_end(&t))
2633                 err = -EIO;
2634
2635         for (n = 0; n < smoke.ncontext; n++) {
2636                 if (!smoke.contexts[n])
2637                         break;
2638                 kernel_context_close(smoke.contexts[n]);
2639         }
2640
2641 err_batch:
2642         i915_gem_object_put(smoke.batch);
2643 err_free:
2644         kfree(smoke.contexts);
2645
2646         return err;
2647 }
2648
2649 static int nop_virtual_engine(struct intel_gt *gt,
2650                               struct intel_engine_cs **siblings,
2651                               unsigned int nsibling,
2652                               unsigned int nctx,
2653                               unsigned int flags)
2654 #define CHAIN BIT(0)
2655 {
2656         IGT_TIMEOUT(end_time);
2657         struct i915_request *request[16] = {};
2658         struct intel_context *ve[16];
2659         unsigned long n, prime, nc;
2660         struct igt_live_test t;
2661         ktime_t times[2] = {};
2662         int err;
2663
2664         GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
2665
2666         for (n = 0; n < nctx; n++) {
2667                 ve[n] = intel_execlists_create_virtual(siblings, nsibling);
2668                 if (IS_ERR(ve[n])) {
2669                         err = PTR_ERR(ve[n]);
2670                         nctx = n;
2671                         goto out;
2672                 }
2673
2674                 err = intel_context_pin(ve[n]);
2675                 if (err) {
2676                         intel_context_put(ve[n]);
2677                         nctx = n;
2678                         goto out;
2679                 }
2680         }
2681
2682         err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
2683         if (err)
2684                 goto out;
2685
2686         for_each_prime_number_from(prime, 1, 8192) {
2687                 times[1] = ktime_get_raw();
2688
2689                 if (flags & CHAIN) {
2690                         for (nc = 0; nc < nctx; nc++) {
2691                                 for (n = 0; n < prime; n++) {
2692                                         struct i915_request *rq;
2693
2694                                         rq = i915_request_create(ve[nc]);
2695                                         if (IS_ERR(rq)) {
2696                                                 err = PTR_ERR(rq);
2697                                                 goto out;
2698                                         }
2699
2700                                         if (request[nc])
2701                                                 i915_request_put(request[nc]);
2702                                         request[nc] = i915_request_get(rq);
2703                                         i915_request_add(rq);
2704                                 }
2705                         }
2706                 } else {
2707                         for (n = 0; n < prime; n++) {
2708                                 for (nc = 0; nc < nctx; nc++) {
2709                                         struct i915_request *rq;
2710
2711                                         rq = i915_request_create(ve[nc]);
2712                                         if (IS_ERR(rq)) {
2713                                                 err = PTR_ERR(rq);
2714                                                 goto out;
2715                                         }
2716
2717                                         if (request[nc])
2718                                                 i915_request_put(request[nc]);
2719                                         request[nc] = i915_request_get(rq);
2720                                         i915_request_add(rq);
2721                                 }
2722                         }
2723                 }
2724
2725                 for (nc = 0; nc < nctx; nc++) {
2726                         if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
2727                                 pr_err("%s(%s): wait for %llx:%lld timed out\n",
2728                                        __func__, ve[0]->engine->name,
2729                                        request[nc]->fence.context,
2730                                        request[nc]->fence.seqno);
2731
2732                                 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
2733                                           __func__, ve[0]->engine->name,
2734                                           request[nc]->fence.context,
2735                                           request[nc]->fence.seqno);
2736                                 GEM_TRACE_DUMP();
2737                                 intel_gt_set_wedged(gt);
2738                                 break;
2739                         }
2740                 }
2741
2742                 times[1] = ktime_sub(ktime_get_raw(), times[1]);
2743                 if (prime == 1)
2744                         times[0] = times[1];
2745
2746                 for (nc = 0; nc < nctx; nc++) {
2747                         i915_request_put(request[nc]);
2748                         request[nc] = NULL;
2749                 }
2750
2751                 if (__igt_timeout(end_time, NULL))
2752                         break;
2753         }
2754
2755         err = igt_live_test_end(&t);
2756         if (err)
2757                 goto out;
2758
2759         pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
2760                 nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
2761                 prime, div64_u64(ktime_to_ns(times[1]), prime));
2762
2763 out:
2764         if (igt_flush_test(gt->i915))
2765                 err = -EIO;
2766
2767         for (nc = 0; nc < nctx; nc++) {
2768                 i915_request_put(request[nc]);
2769                 intel_context_unpin(ve[nc]);
2770                 intel_context_put(ve[nc]);
2771         }
2772         return err;
2773 }
2774
2775 static int live_virtual_engine(void *arg)
2776 {
2777         struct intel_gt *gt = arg;
2778         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
2779         struct intel_engine_cs *engine;
2780         enum intel_engine_id id;
2781         unsigned int class, inst;
2782         int err;
2783
2784         if (USES_GUC_SUBMISSION(gt->i915))
2785                 return 0;
2786
2787         for_each_engine(engine, gt, id) {
2788                 err = nop_virtual_engine(gt, &engine, 1, 1, 0);
2789                 if (err) {
2790                         pr_err("Failed to wrap engine %s: err=%d\n",
2791                                engine->name, err);
2792                         return err;
2793                 }
2794         }
2795
2796         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
2797                 int nsibling, n;
2798
2799                 nsibling = 0;
2800                 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
2801                         if (!gt->engine_class[class][inst])
2802                                 continue;
2803
2804                         siblings[nsibling++] = gt->engine_class[class][inst];
2805                 }
2806                 if (nsibling < 2)
2807                         continue;
2808
2809                 for (n = 1; n <= nsibling + 1; n++) {
2810                         err = nop_virtual_engine(gt, siblings, nsibling,
2811                                                  n, 0);
2812                         if (err)
2813                                 return err;
2814                 }
2815
2816                 err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
2817                 if (err)
2818                         return err;
2819         }
2820
2821         return 0;
2822 }
2823
2824 static int mask_virtual_engine(struct intel_gt *gt,
2825                                struct intel_engine_cs **siblings,
2826                                unsigned int nsibling)
2827 {
2828         struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
2829         struct intel_context *ve;
2830         struct igt_live_test t;
2831         unsigned int n;
2832         int err;
2833
2834         /*
2835          * Check that by setting the execution mask on a request, we can
2836          * restrict it to our desired engine within the virtual engine.
2837          */
2838
2839         ve = intel_execlists_create_virtual(siblings, nsibling);
2840         if (IS_ERR(ve)) {
2841                 err = PTR_ERR(ve);
2842                 goto out_close;
2843         }
2844
2845         err = intel_context_pin(ve);
2846         if (err)
2847                 goto out_put;
2848
2849         err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
2850         if (err)
2851                 goto out_unpin;
2852
2853         for (n = 0; n < nsibling; n++) {
2854                 request[n] = i915_request_create(ve);
2855                 if (IS_ERR(request[n])) {
2856                         err = PTR_ERR(request[n]);
2857                         nsibling = n;
2858                         goto out;
2859                 }
2860
2861                 /* Reverse order as it's more likely to be unnatural */
2862                 request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
2863
2864                 i915_request_get(request[n]);
2865                 i915_request_add(request[n]);
2866         }
2867
2868         for (n = 0; n < nsibling; n++) {
2869                 if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
2870                         pr_err("%s(%s): wait for %llx:%lld timed out\n",
2871                                __func__, ve->engine->name,
2872                                request[n]->fence.context,
2873                                request[n]->fence.seqno);
2874
2875                         GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
2876                                   __func__, ve->engine->name,
2877                                   request[n]->fence.context,
2878                                   request[n]->fence.seqno);
2879                         GEM_TRACE_DUMP();
2880                         intel_gt_set_wedged(gt);
2881                         err = -EIO;
2882                         goto out;
2883                 }
2884
2885                 if (request[n]->engine != siblings[nsibling - n - 1]) {
2886                         pr_err("Executed on wrong sibling '%s', expected '%s'\n",
2887                                request[n]->engine->name,
2888                                siblings[nsibling - n - 1]->name);
2889                         err = -EINVAL;
2890                         goto out;
2891                 }
2892         }
2893
2894         err = igt_live_test_end(&t);
2895 out:
2896         if (igt_flush_test(gt->i915))
2897                 err = -EIO;
2898
2899         for (n = 0; n < nsibling; n++)
2900                 i915_request_put(request[n]);
2901
2902 out_unpin:
2903         intel_context_unpin(ve);
2904 out_put:
2905         intel_context_put(ve);
2906 out_close:
2907         return err;
2908 }
2909
2910 static int live_virtual_mask(void *arg)
2911 {
2912         struct intel_gt *gt = arg;
2913         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
2914         unsigned int class, inst;
2915         int err;
2916
2917         if (USES_GUC_SUBMISSION(gt->i915))
2918                 return 0;
2919
2920         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
2921                 unsigned int nsibling;
2922
2923                 nsibling = 0;
2924                 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
2925                         if (!gt->engine_class[class][inst])
2926                                 break;
2927
2928                         siblings[nsibling++] = gt->engine_class[class][inst];
2929                 }
2930                 if (nsibling < 2)
2931                         continue;
2932
2933                 err = mask_virtual_engine(gt, siblings, nsibling);
2934                 if (err)
2935                         return err;
2936         }
2937
2938         return 0;
2939 }
2940
2941 static int preserved_virtual_engine(struct intel_gt *gt,
2942                                     struct intel_engine_cs **siblings,
2943                                     unsigned int nsibling)
2944 {
2945         struct i915_request *last = NULL;
2946         struct intel_context *ve;
2947         struct i915_vma *scratch;
2948         struct igt_live_test t;
2949         unsigned int n;
2950         int err = 0;
2951         u32 *cs;
2952
2953         scratch = create_scratch(siblings[0]->gt);
2954         if (IS_ERR(scratch))
2955                 return PTR_ERR(scratch);
2956
2957         ve = intel_execlists_create_virtual(siblings, nsibling);
2958         if (IS_ERR(ve)) {
2959                 err = PTR_ERR(ve);
2960                 goto out_scratch;
2961         }
2962
2963         err = intel_context_pin(ve);
2964         if (err)
2965                 goto out_put;
2966
2967         err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
2968         if (err)
2969                 goto out_unpin;
2970
2971         for (n = 0; n < NUM_GPR_DW; n++) {
2972                 struct intel_engine_cs *engine = siblings[n % nsibling];
2973                 struct i915_request *rq;
2974
2975                 rq = i915_request_create(ve);
2976                 if (IS_ERR(rq)) {
2977                         err = PTR_ERR(rq);
2978                         goto out_end;
2979                 }
2980
2981                 i915_request_put(last);
2982                 last = i915_request_get(rq);
2983
2984                 cs = intel_ring_begin(rq, 8);
2985                 if (IS_ERR(cs)) {
2986                         i915_request_add(rq);
2987                         err = PTR_ERR(cs);
2988                         goto out_end;
2989                 }
2990
2991                 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
2992                 *cs++ = CS_GPR(engine, n);
2993                 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
2994                 *cs++ = 0;
2995
2996                 *cs++ = MI_LOAD_REGISTER_IMM(1);
2997                 *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
2998                 *cs++ = n + 1;
2999
3000                 *cs++ = MI_NOOP;
3001                 intel_ring_advance(rq, cs);
3002
3003                 /* Restrict this request to run on a particular engine */
3004                 rq->execution_mask = engine->mask;
3005                 i915_request_add(rq);
3006         }
3007
3008         if (i915_request_wait(last, 0, HZ / 5) < 0) {
3009                 err = -ETIME;
3010                 goto out_end;
3011         }
3012
3013         cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
3014         if (IS_ERR(cs)) {
3015                 err = PTR_ERR(cs);
3016                 goto out_end;
3017         }
3018
3019         for (n = 0; n < NUM_GPR_DW; n++) {
3020                 if (cs[n] != n) {
3021                         pr_err("Incorrect value[%d] found for GPR[%d]\n",
3022                                cs[n], n);
3023                         err = -EINVAL;
3024                         break;
3025                 }
3026         }
3027
3028         i915_gem_object_unpin_map(scratch->obj);
3029
3030 out_end:
3031         if (igt_live_test_end(&t))
3032                 err = -EIO;
3033         i915_request_put(last);
3034 out_unpin:
3035         intel_context_unpin(ve);
3036 out_put:
3037         intel_context_put(ve);
3038 out_scratch:
3039         i915_vma_unpin_and_release(&scratch, 0);
3040         return err;
3041 }
3042
3043 static int live_virtual_preserved(void *arg)
3044 {
3045         struct intel_gt *gt = arg;
3046         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3047         unsigned int class, inst;
3048
3049         /*
3050          * Check that the context image retains non-privileged (user) registers
3051          * from one engine to the next. For this we check that the CS_GPR
3052          * are preserved.
3053          */
3054
3055         if (USES_GUC_SUBMISSION(gt->i915))
3056                 return 0;
3057
3058         /* As we use CS_GPR we cannot run before they existed on all engines. */
3059         if (INTEL_GEN(gt->i915) < 9)
3060                 return 0;
3061
3062         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3063                 int nsibling, err;
3064
3065                 nsibling = 0;
3066                 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3067                         if (!gt->engine_class[class][inst])
3068                                 continue;
3069
3070                         siblings[nsibling++] = gt->engine_class[class][inst];
3071                 }
3072                 if (nsibling < 2)
3073                         continue;
3074
3075                 err = preserved_virtual_engine(gt, siblings, nsibling);
3076                 if (err)
3077                         return err;
3078         }
3079
3080         return 0;
3081 }
3082
3083 static int bond_virtual_engine(struct intel_gt *gt,
3084                                unsigned int class,
3085                                struct intel_engine_cs **siblings,
3086                                unsigned int nsibling,
3087                                unsigned int flags)
3088 #define BOND_SCHEDULE BIT(0)
3089 {
3090         struct intel_engine_cs *master;
3091         struct i915_request *rq[16];
3092         enum intel_engine_id id;
3093         struct igt_spinner spin;
3094         unsigned long n;
3095         int err;
3096
3097         /*
3098          * A set of bonded requests is intended to be run concurrently
3099          * across a number of engines. We use one request per-engine
3100          * and a magic fence to schedule each of the bonded requests
3101          * at the same time. A consequence of our current scheduler is that
3102          * we only move requests to the HW ready queue when the request
3103          * becomes ready, that is when all of its prerequisite fences have
3104          * been signaled. As one of those fences is the master submit fence,
3105          * there is a delay on all secondary fences as the HW may be
3106          * currently busy. Equally, as all the requests are independent,
3107          * they may have other fences that delay individual request
3108          * submission to HW. Ergo, we do not guarantee that all requests are
3109          * immediately submitted to HW at the same time, just that if the
3110          * rules are abided by, they are ready at the same time as the
3111          * first is submitted. Userspace can embed semaphores in its batch
3112          * to ensure parallel execution of its phases as it requires.
3113          * Though naturally it gets requested that perhaps the scheduler should
3114          * take care of parallel execution, even across preemption events on
3115          * different HW. (The proper answer is of course "lalalala".)
3116          *
3117          * With the submit-fence, we have identified three possible phases
3118          * of synchronisation depending on the master fence: queued (not
3119          * ready), executing, and signaled. The first two are quite simple
3120          * and checked below. However, the signaled master fence handling is
3121          * contentious. Currently we do not distinguish between a signaled
3122          * fence and an expired fence, as once signaled it does not convey
3123          * any information about the previous execution. It may even be freed
3124          * and hence checking later it may not exist at all. Ergo we currently
3125          * do not apply the bonding constraint for an already signaled fence,
3126          * as our expectation is that it should not constrain the secondaries
3127          * and is outside of the scope of the bonded request API (i.e. all
3128          * userspace requests are meant to be running in parallel). As
3129          * it imposes no constraint, and is effectively a no-op, we do not
3130          * check below as normal execution flows are checked extensively above.
3131          *
3132          * XXX Is the degenerate handling of signaled submit fences the
3133          * expected behaviour for userpace?
3134          */
3135
3136         GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
3137
3138         if (igt_spinner_init(&spin, gt))
3139                 return -ENOMEM;
3140
3141         err = 0;
3142         rq[0] = ERR_PTR(-ENOMEM);
3143         for_each_engine(master, gt, id) {
3144                 struct i915_sw_fence fence = {};
3145
3146                 if (master->class == class)
3147                         continue;
3148
3149                 memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
3150
3151                 rq[0] = igt_spinner_create_request(&spin,
3152                                                    master->kernel_context,
3153                                                    MI_NOOP);
3154                 if (IS_ERR(rq[0])) {
3155                         err = PTR_ERR(rq[0]);
3156                         goto out;
3157                 }
3158                 i915_request_get(rq[0]);
3159
3160                 if (flags & BOND_SCHEDULE) {
3161                         onstack_fence_init(&fence);
3162                         err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
3163                                                                &fence,
3164                                                                GFP_KERNEL);
3165                 }
3166
3167                 i915_request_add(rq[0]);
3168                 if (err < 0)
3169                         goto out;
3170
3171                 if (!(flags & BOND_SCHEDULE) &&
3172                     !igt_wait_for_spinner(&spin, rq[0])) {
3173                         err = -EIO;
3174                         goto out;
3175                 }
3176
3177                 for (n = 0; n < nsibling; n++) {
3178                         struct intel_context *ve;
3179
3180                         ve = intel_execlists_create_virtual(siblings, nsibling);
3181                         if (IS_ERR(ve)) {
3182                                 err = PTR_ERR(ve);
3183                                 onstack_fence_fini(&fence);
3184                                 goto out;
3185                         }
3186
3187                         err = intel_virtual_engine_attach_bond(ve->engine,
3188                                                                master,
3189                                                                siblings[n]);
3190                         if (err) {
3191                                 intel_context_put(ve);
3192                                 onstack_fence_fini(&fence);
3193                                 goto out;
3194                         }
3195
3196                         err = intel_context_pin(ve);
3197                         intel_context_put(ve);
3198                         if (err) {
3199                                 onstack_fence_fini(&fence);
3200                                 goto out;
3201                         }
3202
3203                         rq[n + 1] = i915_request_create(ve);
3204                         intel_context_unpin(ve);
3205                         if (IS_ERR(rq[n + 1])) {
3206                                 err = PTR_ERR(rq[n + 1]);
3207                                 onstack_fence_fini(&fence);
3208                                 goto out;
3209                         }
3210                         i915_request_get(rq[n + 1]);
3211
3212                         err = i915_request_await_execution(rq[n + 1],
3213                                                            &rq[0]->fence,
3214                                                            ve->engine->bond_execute);
3215                         i915_request_add(rq[n + 1]);
3216                         if (err < 0) {
3217                                 onstack_fence_fini(&fence);
3218                                 goto out;
3219                         }
3220                 }
3221                 onstack_fence_fini(&fence);
3222                 intel_engine_flush_submission(master);
3223                 igt_spinner_end(&spin);
3224
3225                 if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
3226                         pr_err("Master request did not execute (on %s)!\n",
3227                                rq[0]->engine->name);
3228                         err = -EIO;
3229                         goto out;
3230                 }
3231
3232                 for (n = 0; n < nsibling; n++) {
3233                         if (i915_request_wait(rq[n + 1], 0,
3234                                               MAX_SCHEDULE_TIMEOUT) < 0) {
3235                                 err = -EIO;
3236                                 goto out;
3237                         }
3238
3239                         if (rq[n + 1]->engine != siblings[n]) {
3240                                 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
3241                                        siblings[n]->name,
3242                                        rq[n + 1]->engine->name,
3243                                        rq[0]->engine->name);
3244                                 err = -EINVAL;
3245                                 goto out;
3246                         }
3247                 }
3248
3249                 for (n = 0; !IS_ERR(rq[n]); n++)
3250                         i915_request_put(rq[n]);
3251                 rq[0] = ERR_PTR(-ENOMEM);
3252         }
3253
3254 out:
3255         for (n = 0; !IS_ERR(rq[n]); n++)
3256                 i915_request_put(rq[n]);
3257         if (igt_flush_test(gt->i915))
3258                 err = -EIO;
3259
3260         igt_spinner_fini(&spin);
3261         return err;
3262 }
3263
3264 static int live_virtual_bond(void *arg)
3265 {
3266         static const struct phase {
3267                 const char *name;
3268                 unsigned int flags;
3269         } phases[] = {
3270                 { "", 0 },
3271                 { "schedule", BOND_SCHEDULE },
3272                 { },
3273         };
3274         struct intel_gt *gt = arg;
3275         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3276         unsigned int class, inst;
3277         int err;
3278
3279         if (USES_GUC_SUBMISSION(gt->i915))
3280                 return 0;
3281
3282         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3283                 const struct phase *p;
3284                 int nsibling;
3285
3286                 nsibling = 0;
3287                 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3288                         if (!gt->engine_class[class][inst])
3289                                 break;
3290
3291                         GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings));
3292                         siblings[nsibling++] = gt->engine_class[class][inst];
3293                 }
3294                 if (nsibling < 2)
3295                         continue;
3296
3297                 for (p = phases; p->name; p++) {
3298                         err = bond_virtual_engine(gt,
3299                                                   class, siblings, nsibling,
3300                                                   p->flags);
3301                         if (err) {
3302                                 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
3303                                        __func__, p->name, class, nsibling, err);
3304                                 return err;
3305                         }
3306                 }
3307         }
3308
3309         return 0;
3310 }
3311
3312 int intel_execlists_live_selftests(struct drm_i915_private *i915)
3313 {
3314         static const struct i915_subtest tests[] = {
3315                 SUBTEST(live_sanitycheck),
3316                 SUBTEST(live_unlite_switch),
3317                 SUBTEST(live_unlite_preempt),
3318                 SUBTEST(live_timeslice_preempt),
3319                 SUBTEST(live_timeslice_queue),
3320                 SUBTEST(live_busywait_preempt),
3321                 SUBTEST(live_preempt),
3322                 SUBTEST(live_late_preempt),
3323                 SUBTEST(live_nopreempt),
3324                 SUBTEST(live_preempt_cancel),
3325                 SUBTEST(live_suppress_self_preempt),
3326                 SUBTEST(live_suppress_wait_preempt),
3327                 SUBTEST(live_chain_preempt),
3328                 SUBTEST(live_preempt_gang),
3329                 SUBTEST(live_preempt_hang),
3330                 SUBTEST(live_preempt_timeout),
3331                 SUBTEST(live_preempt_smoke),
3332                 SUBTEST(live_virtual_engine),
3333                 SUBTEST(live_virtual_mask),
3334                 SUBTEST(live_virtual_preserved),
3335                 SUBTEST(live_virtual_bond),
3336         };
3337
3338         if (!HAS_EXECLISTS(i915))
3339                 return 0;
3340
3341         if (intel_gt_is_wedged(&i915->gt))
3342                 return 0;
3343
3344         return intel_gt_live_subtests(tests, &i915->gt);
3345 }
3346
3347 static void hexdump(const void *buf, size_t len)
3348 {
3349         const size_t rowsize = 8 * sizeof(u32);
3350         const void *prev = NULL;
3351         bool skip = false;
3352         size_t pos;
3353
3354         for (pos = 0; pos < len; pos += rowsize) {
3355                 char line[128];
3356
3357                 if (prev && !memcmp(prev, buf + pos, rowsize)) {
3358                         if (!skip) {
3359                                 pr_info("*\n");
3360                                 skip = true;
3361                         }
3362                         continue;
3363                 }
3364
3365                 WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
3366                                                 rowsize, sizeof(u32),
3367                                                 line, sizeof(line),
3368                                                 false) >= sizeof(line));
3369                 pr_info("[%04zx] %s\n", pos, line);
3370
3371                 prev = buf + pos;
3372                 skip = false;
3373         }
3374 }
3375
3376 static int live_lrc_layout(void *arg)
3377 {
3378         struct intel_gt *gt = arg;
3379         struct intel_engine_cs *engine;
3380         enum intel_engine_id id;
3381         u32 *lrc;
3382         int err;
3383
3384         /*
3385          * Check the registers offsets we use to create the initial reg state
3386          * match the layout saved by HW.
3387          */
3388
3389         lrc = kmalloc(PAGE_SIZE, GFP_KERNEL);
3390         if (!lrc)
3391                 return -ENOMEM;
3392
3393         err = 0;
3394         for_each_engine(engine, gt, id) {
3395                 u32 *hw;
3396                 int dw;
3397
3398                 if (!engine->default_state)
3399                         continue;
3400
3401                 hw = i915_gem_object_pin_map(engine->default_state,
3402                                              I915_MAP_WB);
3403                 if (IS_ERR(hw)) {
3404                         err = PTR_ERR(hw);
3405                         break;
3406                 }
3407                 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
3408
3409                 execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE),
3410                                          engine->kernel_context,
3411                                          engine,
3412                                          engine->kernel_context->ring,
3413                                          true);
3414
3415                 dw = 0;
3416                 do {
3417                         u32 lri = hw[dw];
3418
3419                         if (lri == 0) {
3420                                 dw++;
3421                                 continue;
3422                         }
3423
3424                         if (lrc[dw] == 0) {
3425                                 pr_debug("%s: skipped instruction %x at dword %d\n",
3426                                          engine->name, lri, dw);
3427                                 dw++;
3428                                 continue;
3429                         }
3430
3431                         if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
3432                                 pr_err("%s: Expected LRI command at dword %d, found %08x\n",
3433                                        engine->name, dw, lri);
3434                                 err = -EINVAL;
3435                                 break;
3436                         }
3437
3438                         if (lrc[dw] != lri) {
3439                                 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
3440                                        engine->name, dw, lri, lrc[dw]);
3441                                 err = -EINVAL;
3442                                 break;
3443                         }
3444
3445                         lri &= 0x7f;
3446                         lri++;
3447                         dw++;
3448
3449                         while (lri) {
3450                                 if (hw[dw] != lrc[dw]) {
3451                                         pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
3452                                                engine->name, dw, hw[dw], lrc[dw]);
3453                                         err = -EINVAL;
3454                                         break;
3455                                 }
3456
3457                                 /*
3458                                  * Skip over the actual register value as we
3459                                  * expect that to differ.
3460                                  */
3461                                 dw += 2;
3462                                 lri -= 2;
3463                         }
3464                 } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
3465
3466                 if (err) {
3467                         pr_info("%s: HW register image:\n", engine->name);
3468                         hexdump(hw, PAGE_SIZE);
3469
3470                         pr_info("%s: SW register image:\n", engine->name);
3471                         hexdump(lrc, PAGE_SIZE);
3472                 }
3473
3474                 i915_gem_object_unpin_map(engine->default_state);
3475                 if (err)
3476                         break;
3477         }
3478
3479         kfree(lrc);
3480         return err;
3481 }
3482
3483 static int find_offset(const u32 *lri, u32 offset)
3484 {
3485         int i;
3486
3487         for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
3488                 if (lri[i] == offset)
3489                         return i;
3490
3491         return -1;
3492 }
3493
3494 static int live_lrc_fixed(void *arg)
3495 {
3496         struct intel_gt *gt = arg;
3497         struct intel_engine_cs *engine;
3498         enum intel_engine_id id;
3499         int err = 0;
3500
3501         /*
3502          * Check the assumed register offsets match the actual locations in
3503          * the context image.
3504          */
3505
3506         for_each_engine(engine, gt, id) {
3507                 const struct {
3508                         u32 reg;
3509                         u32 offset;
3510                         const char *name;
3511                 } tbl[] = {
3512                         {
3513                                 i915_mmio_reg_offset(RING_START(engine->mmio_base)),
3514                                 CTX_RING_START - 1,
3515                                 "RING_START"
3516                         },
3517                         {
3518                                 i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
3519                                 CTX_RING_CTL - 1,
3520                                 "RING_CTL"
3521                         },
3522                         {
3523                                 i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
3524                                 CTX_RING_HEAD - 1,
3525                                 "RING_HEAD"
3526                         },
3527                         {
3528                                 i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
3529                                 CTX_RING_TAIL - 1,
3530                                 "RING_TAIL"
3531                         },
3532                         {
3533                                 i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
3534                                 lrc_ring_mi_mode(engine),
3535                                 "RING_MI_MODE"
3536                         },
3537                         {
3538                                 i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
3539                                 CTX_BB_STATE - 1,
3540                                 "BB_STATE"
3541                         },
3542                         { },
3543                 }, *t;
3544                 u32 *hw;
3545
3546                 if (!engine->default_state)
3547                         continue;
3548
3549                 hw = i915_gem_object_pin_map(engine->default_state,
3550                                              I915_MAP_WB);
3551                 if (IS_ERR(hw)) {
3552                         err = PTR_ERR(hw);
3553                         break;
3554                 }
3555                 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
3556
3557                 for (t = tbl; t->name; t++) {
3558                         int dw = find_offset(hw, t->reg);
3559
3560                         if (dw != t->offset) {
3561                                 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
3562                                        engine->name,
3563                                        t->name,
3564                                        t->reg,
3565                                        dw,
3566                                        t->offset);
3567                                 err = -EINVAL;
3568                         }
3569                 }
3570
3571                 i915_gem_object_unpin_map(engine->default_state);
3572         }
3573
3574         return err;
3575 }
3576
3577 static int __live_lrc_state(struct intel_engine_cs *engine,
3578                             struct i915_vma *scratch)
3579 {
3580         struct intel_context *ce;
3581         struct i915_request *rq;
3582         enum {
3583                 RING_START_IDX = 0,
3584                 RING_TAIL_IDX,
3585                 MAX_IDX
3586         };
3587         u32 expected[MAX_IDX];
3588         u32 *cs;
3589         int err;
3590         int n;
3591
3592         ce = intel_context_create(engine);
3593         if (IS_ERR(ce))
3594                 return PTR_ERR(ce);
3595
3596         err = intel_context_pin(ce);
3597         if (err)
3598                 goto err_put;
3599
3600         rq = i915_request_create(ce);
3601         if (IS_ERR(rq)) {
3602                 err = PTR_ERR(rq);
3603                 goto err_unpin;
3604         }
3605
3606         cs = intel_ring_begin(rq, 4 * MAX_IDX);
3607         if (IS_ERR(cs)) {
3608                 err = PTR_ERR(cs);
3609                 i915_request_add(rq);
3610                 goto err_unpin;
3611         }
3612
3613         *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
3614         *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
3615         *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
3616         *cs++ = 0;
3617
3618         expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
3619
3620         *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
3621         *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
3622         *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
3623         *cs++ = 0;
3624
3625         i915_request_get(rq);
3626         i915_request_add(rq);
3627
3628         intel_engine_flush_submission(engine);
3629         expected[RING_TAIL_IDX] = ce->ring->tail;
3630
3631         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
3632                 err = -ETIME;
3633                 goto err_rq;
3634         }
3635
3636         cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
3637         if (IS_ERR(cs)) {
3638                 err = PTR_ERR(cs);
3639                 goto err_rq;
3640         }
3641
3642         for (n = 0; n < MAX_IDX; n++) {
3643                 if (cs[n] != expected[n]) {
3644                         pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
3645                                engine->name, n, cs[n], expected[n]);
3646                         err = -EINVAL;
3647                         break;
3648                 }
3649         }
3650
3651         i915_gem_object_unpin_map(scratch->obj);
3652
3653 err_rq:
3654         i915_request_put(rq);
3655 err_unpin:
3656         intel_context_unpin(ce);
3657 err_put:
3658         intel_context_put(ce);
3659         return err;
3660 }
3661
3662 static int live_lrc_state(void *arg)
3663 {
3664         struct intel_gt *gt = arg;
3665         struct intel_engine_cs *engine;
3666         struct i915_vma *scratch;
3667         enum intel_engine_id id;
3668         int err = 0;
3669
3670         /*
3671          * Check the live register state matches what we expect for this
3672          * intel_context.
3673          */
3674
3675         scratch = create_scratch(gt);
3676         if (IS_ERR(scratch))
3677                 return PTR_ERR(scratch);
3678
3679         for_each_engine(engine, gt, id) {
3680                 err = __live_lrc_state(engine, scratch);
3681                 if (err)
3682                         break;
3683         }
3684
3685         if (igt_flush_test(gt->i915))
3686                 err = -EIO;
3687
3688         i915_vma_unpin_and_release(&scratch, 0);
3689         return err;
3690 }
3691
3692 static int gpr_make_dirty(struct intel_engine_cs *engine)
3693 {
3694         struct i915_request *rq;
3695         u32 *cs;
3696         int n;
3697
3698         rq = intel_engine_create_kernel_request(engine);
3699         if (IS_ERR(rq))
3700                 return PTR_ERR(rq);
3701
3702         cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
3703         if (IS_ERR(cs)) {
3704                 i915_request_add(rq);
3705                 return PTR_ERR(cs);
3706         }
3707
3708         *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
3709         for (n = 0; n < NUM_GPR_DW; n++) {
3710                 *cs++ = CS_GPR(engine, n);
3711                 *cs++ = STACK_MAGIC;
3712         }
3713         *cs++ = MI_NOOP;
3714
3715         intel_ring_advance(rq, cs);
3716         i915_request_add(rq);
3717
3718         return 0;
3719 }
3720
3721 static int __live_gpr_clear(struct intel_engine_cs *engine,
3722                             struct i915_vma *scratch)
3723 {
3724         struct intel_context *ce;
3725         struct i915_request *rq;
3726         u32 *cs;
3727         int err;
3728         int n;
3729
3730         if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS)
3731                 return 0; /* GPR only on rcs0 for gen8 */
3732
3733         err = gpr_make_dirty(engine);
3734         if (err)
3735                 return err;
3736
3737         ce = intel_context_create(engine);
3738         if (IS_ERR(ce))
3739                 return PTR_ERR(ce);
3740
3741         rq = intel_context_create_request(ce);
3742         if (IS_ERR(rq)) {
3743                 err = PTR_ERR(rq);
3744                 goto err_put;
3745         }
3746
3747         cs = intel_ring_begin(rq, 4 * NUM_GPR_DW);
3748         if (IS_ERR(cs)) {
3749                 err = PTR_ERR(cs);
3750                 i915_request_add(rq);
3751                 goto err_put;
3752         }
3753
3754         for (n = 0; n < NUM_GPR_DW; n++) {
3755                 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
3756                 *cs++ = CS_GPR(engine, n);
3757                 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
3758                 *cs++ = 0;
3759         }
3760
3761         i915_request_get(rq);
3762         i915_request_add(rq);
3763
3764         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
3765                 err = -ETIME;
3766                 goto err_rq;
3767         }
3768
3769         cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
3770         if (IS_ERR(cs)) {
3771                 err = PTR_ERR(cs);
3772                 goto err_rq;
3773         }
3774
3775         for (n = 0; n < NUM_GPR_DW; n++) {
3776                 if (cs[n]) {
3777                         pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
3778                                engine->name,
3779                                n / 2, n & 1 ? "udw" : "ldw",
3780                                cs[n]);
3781                         err = -EINVAL;
3782                         break;
3783                 }
3784         }
3785
3786         i915_gem_object_unpin_map(scratch->obj);
3787
3788 err_rq:
3789         i915_request_put(rq);
3790 err_put:
3791         intel_context_put(ce);
3792         return err;
3793 }
3794
3795 static int live_gpr_clear(void *arg)
3796 {
3797         struct intel_gt *gt = arg;
3798         struct intel_engine_cs *engine;
3799         struct i915_vma *scratch;
3800         enum intel_engine_id id;
3801         int err = 0;
3802
3803         /*
3804          * Check that GPR registers are cleared in new contexts as we need
3805          * to avoid leaking any information from previous contexts.
3806          */
3807
3808         scratch = create_scratch(gt);
3809         if (IS_ERR(scratch))
3810                 return PTR_ERR(scratch);
3811
3812         for_each_engine(engine, gt, id) {
3813                 err = __live_gpr_clear(engine, scratch);
3814                 if (err)
3815                         break;
3816         }
3817
3818         if (igt_flush_test(gt->i915))
3819                 err = -EIO;
3820
3821         i915_vma_unpin_and_release(&scratch, 0);
3822         return err;
3823 }
3824
3825 int intel_lrc_live_selftests(struct drm_i915_private *i915)
3826 {
3827         static const struct i915_subtest tests[] = {
3828                 SUBTEST(live_lrc_layout),
3829                 SUBTEST(live_lrc_fixed),
3830                 SUBTEST(live_lrc_state),
3831                 SUBTEST(live_gpr_clear),
3832         };
3833
3834         if (!HAS_LOGICAL_RING_CONTEXTS(i915))
3835                 return 0;
3836
3837         return intel_gt_live_subtests(tests, &i915->gt);
3838 }