drivers/gpu/drm/i915/gt/selftest_execlists.c

   1 // SPDX-License-Identifier: MIT
   2 /*
   3  * Copyright © 2018 Intel Corporation
   4  */
   5
   6 #include <linux/prime_numbers.h>
   7
   8 #include "gem/i915_gem_pm.h"
   9 #include "gt/intel_engine_heartbeat.h"
  10 #include "gt/intel_reset.h"
  11 #include "gt/selftest_engine_heartbeat.h"
  12
  13 #include "i915_selftest.h"
  14 #include "selftests/i915_random.h"
  15 #include "selftests/igt_flush_test.h"
  16 #include "selftests/igt_live_test.h"
  17 #include "selftests/igt_spinner.h"
  18 #include "selftests/lib_sw_fence.h"
  19
  20 #include "gem/selftests/igt_gem_utils.h"
  21 #include "gem/selftests/mock_context.h"
  22
  23 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
  24 #define NUM_GPR 16
  25 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
  26
  27 static bool is_active(struct i915_request *rq)
  28 {
  29         if (i915_request_is_active(rq))
  30                 return true;
  31
  32         if (i915_request_on_hold(rq))
  33                 return true;
  34
  35         if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
  36                 return true;
  37
  38         return false;
  39 }
  40
  41 static int wait_for_submit(struct intel_engine_cs *engine,
  42                            struct i915_request *rq,
  43                            unsigned long timeout)
  44 {
  45         /* Ignore our own attempts to suppress excess tasklets */
  46         tasklet_hi_schedule(&engine->sched_engine->tasklet);
  47
  48         timeout += jiffies;
  49         do {
  50                 bool done = time_after(jiffies, timeout);
  51
  52                 if (i915_request_completed(rq)) /* that was quick! */
  53                         return 0;
  54
  55                 /* Wait until the HW has acknowleged the submission (or err) */
  56                 intel_engine_flush_submission(engine);
  57                 if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
  58                         return 0;
  59
  60                 if (done)
  61                         return -ETIME;
  62
  63                 cond_resched();
  64         } while (1);
  65 }
  66
  67 static int wait_for_reset(struct intel_engine_cs *engine,
  68                           struct i915_request *rq,
  69                           unsigned long timeout)
  70 {
  71         timeout += jiffies;
  72
  73         do {
  74                 cond_resched();
  75                 intel_engine_flush_submission(engine);
  76
  77                 if (READ_ONCE(engine->execlists.pending[0]))
  78                         continue;
  79
  80                 if (i915_request_completed(rq))
  81                         break;
  82
  83                 if (READ_ONCE(rq->fence.error))
  84                         break;
  85         } while (time_before(jiffies, timeout));
  86
  87         flush_scheduled_work();
  88
  89         if (rq->fence.error != -EIO) {
  90                 pr_err("%s: hanging request %llx:%lld not reset\n",
  91                        engine->name,
  92                        rq->fence.context,
  93                        rq->fence.seqno);
  94                 return -EINVAL;
  95         }
  96
  97         /* Give the request a jiffie to complete after flushing the worker */
  98         if (i915_request_wait(rq, 0,
  99                               max(0l, (long)(timeout - jiffies)) + 1) < 0) {
 100                 pr_err("%s: hanging request %llx:%lld did not complete\n",
 101                        engine->name,
 102                        rq->fence.context,
 103                        rq->fence.seqno);
 104                 return -ETIME;
 105         }
 106
 107         return 0;
 108 }
 109
 110 static int live_sanitycheck(void *arg)
 111 {
 112         struct intel_gt *gt = arg;
 113         struct intel_engine_cs *engine;
 114         enum intel_engine_id id;
 115         struct igt_spinner spin;
 116         int err = 0;
 117
 118         if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
 119                 return 0;
 120
 121         if (igt_spinner_init(&spin, gt))
 122                 return -ENOMEM;
 123
 124         for_each_engine(engine, gt, id) {
 125                 struct intel_context *ce;
 126                 struct i915_request *rq;
 127
 128                 ce = intel_context_create(engine);
 129                 if (IS_ERR(ce)) {
 130                         err = PTR_ERR(ce);
 131                         break;
 132                 }
 133
 134                 rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
 135                 if (IS_ERR(rq)) {
 136                         err = PTR_ERR(rq);
 137                         goto out_ctx;
 138                 }
 139
 140                 i915_request_add(rq);
 141                 if (!igt_wait_for_spinner(&spin, rq)) {
 142                         GEM_TRACE("spinner failed to start\n");
 143                         GEM_TRACE_DUMP();
 144                         intel_gt_set_wedged(gt);
 145                         err = -EIO;
 146                         goto out_ctx;
 147                 }
 148
 149                 igt_spinner_end(&spin);
 150                 if (igt_flush_test(gt->i915)) {
 151                         err = -EIO;
 152                         goto out_ctx;
 153                 }
 154
 155 out_ctx:
 156                 intel_context_put(ce);
 157                 if (err)
 158                         break;
 159         }
 160
 161         igt_spinner_fini(&spin);
 162         return err;
 163 }
 164
 165 static int live_unlite_restore(struct intel_gt *gt, int prio)
 166 {
 167         struct intel_engine_cs *engine;
 168         enum intel_engine_id id;
 169         struct igt_spinner spin;
 170         int err = -ENOMEM;
 171
 172         /*
 173          * Check that we can correctly context switch between 2 instances
 174          * on the same engine from the same parent context.
 175          */
 176
 177         if (igt_spinner_init(&spin, gt))
 178                 return err;
 179
 180         err = 0;
 181         for_each_engine(engine, gt, id) {
 182                 struct intel_context *ce[2] = {};
 183                 struct i915_request *rq[2];
 184                 struct igt_live_test t;
 185                 int n;
 186
 187                 if (prio && !intel_engine_has_preemption(engine))
 188                         continue;
 189
 190                 if (!intel_engine_can_store_dword(engine))
 191                         continue;
 192
 193                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
 194                         err = -EIO;
 195                         break;
 196                 }
 197                 st_engine_heartbeat_disable(engine);
 198
 199                 for (n = 0; n < ARRAY_SIZE(ce); n++) {
 200                         struct intel_context *tmp;
 201
 202                         tmp = intel_context_create(engine);
 203                         if (IS_ERR(tmp)) {
 204                                 err = PTR_ERR(tmp);
 205                                 goto err_ce;
 206                         }
 207
 208                         err = intel_context_pin(tmp);
 209                         if (err) {
 210                                 intel_context_put(tmp);
 211                                 goto err_ce;
 212                         }
 213
 214                         /*
 215                          * Setup the pair of contexts such that if we
 216                          * lite-restore using the RING_TAIL from ce[1] it
 217                          * will execute garbage from ce[0]->ring.
 218                          */
 219                         memset(tmp->ring->vaddr,
 220                                POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
 221                                tmp->ring->vma->size);
 222
 223                         ce[n] = tmp;
 224                 }
 225                 GEM_BUG_ON(!ce[1]->ring->size);
 226                 intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
 227                 lrc_update_regs(ce[1], engine, ce[1]->ring->head);
 228
 229                 rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
 230                 if (IS_ERR(rq[0])) {
 231                         err = PTR_ERR(rq[0]);
 232                         goto err_ce;
 233                 }
 234
 235                 i915_request_get(rq[0]);
 236                 i915_request_add(rq[0]);
 237                 GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
 238
 239                 if (!igt_wait_for_spinner(&spin, rq[0])) {
 240                         i915_request_put(rq[0]);
 241                         goto err_ce;
 242                 }
 243
 244                 rq[1] = i915_request_create(ce[1]);
 245                 if (IS_ERR(rq[1])) {
 246                         err = PTR_ERR(rq[1]);
 247                         i915_request_put(rq[0]);
 248                         goto err_ce;
 249                 }
 250
 251                 if (!prio) {
 252                         /*
 253                          * Ensure we do the switch to ce[1] on completion.
 254                          *
 255                          * rq[0] is already submitted, so this should reduce
 256                          * to a no-op (a wait on a request on the same engine
 257                          * uses the submit fence, not the completion fence),
 258                          * but it will install a dependency on rq[1] for rq[0]
 259                          * that will prevent the pair being reordered by
 260                          * timeslicing.
 261                          */
 262                         i915_request_await_dma_fence(rq[1], &rq[0]->fence);
 263                 }
 264
 265                 i915_request_get(rq[1]);
 266                 i915_request_add(rq[1]);
 267                 GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
 268                 i915_request_put(rq[0]);
 269
 270                 if (prio) {
 271                         struct i915_sched_attr attr = {
 272                                 .priority = prio,
 273                         };
 274
 275                         /* Alternatively preempt the spinner with ce[1] */
 276                         engine->sched_engine->schedule(rq[1], &attr);
 277                 }
 278
 279                 /* And switch back to ce[0] for good measure */
 280                 rq[0] = i915_request_create(ce[0]);
 281                 if (IS_ERR(rq[0])) {
 282                         err = PTR_ERR(rq[0]);
 283                         i915_request_put(rq[1]);
 284                         goto err_ce;
 285                 }
 286
 287                 i915_request_await_dma_fence(rq[0], &rq[1]->fence);
 288                 i915_request_get(rq[0]);
 289                 i915_request_add(rq[0]);
 290                 GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
 291                 i915_request_put(rq[1]);
 292                 i915_request_put(rq[0]);
 293
 294 err_ce:
 295                 intel_engine_flush_submission(engine);
 296                 igt_spinner_end(&spin);
 297                 for (n = 0; n < ARRAY_SIZE(ce); n++) {
 298                         if (IS_ERR_OR_NULL(ce[n]))
 299                                 break;
 300
 301                         intel_context_unpin(ce[n]);
 302                         intel_context_put(ce[n]);
 303                 }
 304
 305                 st_engine_heartbeat_enable(engine);
 306                 if (igt_live_test_end(&t))
 307                         err = -EIO;
 308                 if (err)
 309                         break;
 310         }
 311
 312         igt_spinner_fini(&spin);
 313         return err;
 314 }
 315
 316 static int live_unlite_switch(void *arg)
 317 {
 318         return live_unlite_restore(arg, 0);
 319 }
 320
 321 static int live_unlite_preempt(void *arg)
 322 {
 323         return live_unlite_restore(arg, I915_PRIORITY_MAX);
 324 }
 325
 326 static int live_unlite_ring(void *arg)
 327 {
 328         struct intel_gt *gt = arg;
 329         struct intel_engine_cs *engine;
 330         struct igt_spinner spin;
 331         enum intel_engine_id id;
 332         int err = 0;
 333
 334         /*
 335          * Setup a preemption event that will cause almost the entire ring
 336          * to be unwound, potentially fooling our intel_ring_direction()
 337          * into emitting a forward lite-restore instead of the rollback.
 338          */
 339
 340         if (igt_spinner_init(&spin, gt))
 341                 return -ENOMEM;
 342
 343         for_each_engine(engine, gt, id) {
 344                 struct intel_context *ce[2] = {};
 345                 struct i915_request *rq;
 346                 struct igt_live_test t;
 347                 int n;
 348
 349                 if (!intel_engine_has_preemption(engine))
 350                         continue;
 351
 352                 if (!intel_engine_can_store_dword(engine))
 353                         continue;
 354
 355                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
 356                         err = -EIO;
 357                         break;
 358                 }
 359                 st_engine_heartbeat_disable(engine);
 360
 361                 for (n = 0; n < ARRAY_SIZE(ce); n++) {
 362                         struct intel_context *tmp;
 363
 364                         tmp = intel_context_create(engine);
 365                         if (IS_ERR(tmp)) {
 366                                 err = PTR_ERR(tmp);
 367                                 goto err_ce;
 368                         }
 369
 370                         err = intel_context_pin(tmp);
 371                         if (err) {
 372                                 intel_context_put(tmp);
 373                                 goto err_ce;
 374                         }
 375
 376                         memset32(tmp->ring->vaddr,
 377                                  0xdeadbeef, /* trigger a hang if executed */
 378                                  tmp->ring->vma->size / sizeof(u32));
 379
 380                         ce[n] = tmp;
 381                 }
 382
 383                 /* Create max prio spinner, followed by N low prio nops */
 384                 rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
 385                 if (IS_ERR(rq)) {
 386                         err = PTR_ERR(rq);
 387                         goto err_ce;
 388                 }
 389
 390                 i915_request_get(rq);
 391                 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
 392                 i915_request_add(rq);
 393
 394                 if (!igt_wait_for_spinner(&spin, rq)) {
 395                         intel_gt_set_wedged(gt);
 396                         i915_request_put(rq);
 397                         err = -ETIME;
 398                         goto err_ce;
 399                 }
 400
 401                 /* Fill the ring, until we will cause a wrap */
 402                 n = 0;
 403                 while (intel_ring_direction(ce[0]->ring,
 404                                             rq->wa_tail,
 405                                             ce[0]->ring->tail) <= 0) {
 406                         struct i915_request *tmp;
 407
 408                         tmp = intel_context_create_request(ce[0]);
 409                         if (IS_ERR(tmp)) {
 410                                 err = PTR_ERR(tmp);
 411                                 i915_request_put(rq);
 412                                 goto err_ce;
 413                         }
 414
 415                         i915_request_add(tmp);
 416                         intel_engine_flush_submission(engine);
 417                         n++;
 418                 }
 419                 intel_engine_flush_submission(engine);
 420                 pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
 421                          engine->name, n,
 422                          ce[0]->ring->size,
 423                          ce[0]->ring->tail,
 424                          ce[0]->ring->emit,
 425                          rq->tail);
 426                 GEM_BUG_ON(intel_ring_direction(ce[0]->ring,
 427                                                 rq->tail,
 428                                                 ce[0]->ring->tail) <= 0);
 429                 i915_request_put(rq);
 430
 431                 /* Create a second ring to preempt the first ring after rq[0] */
 432                 rq = intel_context_create_request(ce[1]);
 433                 if (IS_ERR(rq)) {
 434                         err = PTR_ERR(rq);
 435                         goto err_ce;
 436                 }
 437
 438                 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
 439                 i915_request_get(rq);
 440                 i915_request_add(rq);
 441
 442                 err = wait_for_submit(engine, rq, HZ / 2);
 443                 i915_request_put(rq);
 444                 if (err) {
 445                         pr_err("%s: preemption request was not submitted\n",
 446                                engine->name);
 447                         err = -ETIME;
 448                 }
 449
 450                 pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
 451                          engine->name,
 452                          ce[0]->ring->tail, ce[0]->ring->emit,
 453                          ce[1]->ring->tail, ce[1]->ring->emit);
 454
 455 err_ce:
 456                 intel_engine_flush_submission(engine);
 457                 igt_spinner_end(&spin);
 458                 for (n = 0; n < ARRAY_SIZE(ce); n++) {
 459                         if (IS_ERR_OR_NULL(ce[n]))
 460                                 break;
 461
 462                         intel_context_unpin(ce[n]);
 463                         intel_context_put(ce[n]);
 464                 }
 465                 st_engine_heartbeat_enable(engine);
 466                 if (igt_live_test_end(&t))
 467                         err = -EIO;
 468                 if (err)
 469                         break;
 470         }
 471
 472         igt_spinner_fini(&spin);
 473         return err;
 474 }
 475
 476 static int live_pin_rewind(void *arg)
 477 {
 478         struct intel_gt *gt = arg;
 479         struct intel_engine_cs *engine;
 480         enum intel_engine_id id;
 481         int err = 0;
 482
 483         /*
 484          * We have to be careful not to trust intel_ring too much, for example
 485          * ring->head is updated upon retire which is out of sync with pinning
 486          * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
 487          * or else we risk writing an older, stale value.
 488          *
 489          * To simulate this, let's apply a bit of deliberate sabotague.
 490          */
 491
 492         for_each_engine(engine, gt, id) {
 493                 struct intel_context *ce;
 494                 struct i915_request *rq;
 495                 struct intel_ring *ring;
 496                 struct igt_live_test t;
 497
 498                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
 499                         err = -EIO;
 500                         break;
 501                 }
 502
 503                 ce = intel_context_create(engine);
 504                 if (IS_ERR(ce)) {
 505                         err = PTR_ERR(ce);
 506                         break;
 507                 }
 508
 509                 err = intel_context_pin(ce);
 510                 if (err) {
 511                         intel_context_put(ce);
 512                         break;
 513                 }
 514
 515                 /* Keep the context awake while we play games */
 516                 err = i915_active_acquire(&ce->active);
 517                 if (err) {
 518                         intel_context_unpin(ce);
 519                         intel_context_put(ce);
 520                         break;
 521                 }
 522                 ring = ce->ring;
 523
 524                 /* Poison the ring, and offset the next request from HEAD */
 525                 memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
 526                 ring->emit = ring->size / 2;
 527                 ring->tail = ring->emit;
 528                 GEM_BUG_ON(ring->head);
 529
 530                 intel_context_unpin(ce);
 531
 532                 /* Submit a simple nop request */
 533                 GEM_BUG_ON(intel_context_is_pinned(ce));
 534                 rq = intel_context_create_request(ce);
 535                 i915_active_release(&ce->active); /* e.g. async retire */
 536                 intel_context_put(ce);
 537                 if (IS_ERR(rq)) {
 538                         err = PTR_ERR(rq);
 539                         break;
 540                 }
 541                 GEM_BUG_ON(!rq->head);
 542                 i915_request_add(rq);
 543
 544                 /* Expect not to hang! */
 545                 if (igt_live_test_end(&t)) {
 546                         err = -EIO;
 547                         break;
 548                 }
 549         }
 550
 551         return err;
 552 }
 553
 554 static int engine_lock_reset_tasklet(struct intel_engine_cs *engine)
 555 {
 556         tasklet_disable(&engine->sched_engine->tasklet);
 557         local_bh_disable();
 558
 559         if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
 560                              &engine->gt->reset.flags)) {
 561                 local_bh_enable();
 562                 tasklet_enable(&engine->sched_engine->tasklet);
 563
 564                 intel_gt_set_wedged(engine->gt);
 565                 return -EBUSY;
 566         }
 567
 568         return 0;
 569 }
 570
 571 static void engine_unlock_reset_tasklet(struct intel_engine_cs *engine)
 572 {
 573         clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id,
 574                               &engine->gt->reset.flags);
 575
 576         local_bh_enable();
 577         tasklet_enable(&engine->sched_engine->tasklet);
 578 }
 579
 580 static int live_hold_reset(void *arg)
 581 {
 582         struct intel_gt *gt = arg;
 583         struct intel_engine_cs *engine;
 584         enum intel_engine_id id;
 585         struct igt_spinner spin;
 586         int err = 0;
 587
 588         /*
 589          * In order to support offline error capture for fast preempt reset,
 590          * we need to decouple the guilty request and ensure that it and its
 591          * descendents are not executed while the capture is in progress.
 592          */
 593
 594         if (!intel_has_reset_engine(gt))
 595                 return 0;
 596
 597         if (igt_spinner_init(&spin, gt))
 598                 return -ENOMEM;
 599
 600         for_each_engine(engine, gt, id) {
 601                 struct intel_context *ce;
 602                 struct i915_request *rq;
 603
 604                 ce = intel_context_create(engine);
 605                 if (IS_ERR(ce)) {
 606                         err = PTR_ERR(ce);
 607                         break;
 608                 }
 609
 610                 st_engine_heartbeat_disable(engine);
 611
 612                 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
 613                 if (IS_ERR(rq)) {
 614                         err = PTR_ERR(rq);
 615                         goto out;
 616                 }
 617                 i915_request_add(rq);
 618
 619                 if (!igt_wait_for_spinner(&spin, rq)) {
 620                         intel_gt_set_wedged(gt);
 621                         err = -ETIME;
 622                         goto out;
 623                 }
 624
 625                 /* We have our request executing, now remove it and reset */
 626
 627                 err = engine_lock_reset_tasklet(engine);
 628                 if (err)
 629                         goto out;
 630
 631                 engine->sched_engine->tasklet.callback(&engine->sched_engine->tasklet);
 632                 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
 633
 634                 i915_request_get(rq);
 635                 execlists_hold(engine, rq);
 636                 GEM_BUG_ON(!i915_request_on_hold(rq));
 637
 638                 __intel_engine_reset_bh(engine, NULL);
 639                 GEM_BUG_ON(rq->fence.error != -EIO);
 640
 641                 engine_unlock_reset_tasklet(engine);
 642
 643                 /* Check that we do not resubmit the held request */
 644                 if (!i915_request_wait(rq, 0, HZ / 5)) {
 645                         pr_err("%s: on hold request completed!\n",
 646                                engine->name);
 647                         i915_request_put(rq);
 648                         err = -EIO;
 649                         goto out;
 650                 }
 651                 GEM_BUG_ON(!i915_request_on_hold(rq));
 652
 653                 /* But is resubmitted on release */
 654                 execlists_unhold(engine, rq);
 655                 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 656                         pr_err("%s: held request did not complete!\n",
 657                                engine->name);
 658                         intel_gt_set_wedged(gt);
 659                         err = -ETIME;
 660                 }
 661                 i915_request_put(rq);
 662
 663 out:
 664                 st_engine_heartbeat_enable(engine);
 665                 intel_context_put(ce);
 666                 if (err)
 667                         break;
 668         }
 669
 670         igt_spinner_fini(&spin);
 671         return err;
 672 }
 673
 674 static const char *error_repr(int err)
 675 {
 676         return err ? "bad" : "good";
 677 }
 678
 679 static int live_error_interrupt(void *arg)
 680 {
 681         static const struct error_phase {
 682                 enum { GOOD = 0, BAD = -EIO } error[2];
 683         } phases[] = {
 684                 { { BAD,  GOOD } },
 685                 { { BAD,  BAD  } },
 686                 { { BAD,  GOOD } },
 687                 { { GOOD, GOOD } }, /* sentinel */
 688         };
 689         struct intel_gt *gt = arg;
 690         struct intel_engine_cs *engine;
 691         enum intel_engine_id id;
 692
 693         /*
 694          * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
 695          * of invalid commands in user batches that will cause a GPU hang.
 696          * This is a faster mechanism than using hangcheck/heartbeats, but
 697          * only detects problems the HW knows about -- it will not warn when
 698          * we kill the HW!
 699          *
 700          * To verify our detection and reset, we throw some invalid commands
 701          * at the HW and wait for the interrupt.
 702          */
 703
 704         if (!intel_has_reset_engine(gt))
 705                 return 0;
 706
 707         for_each_engine(engine, gt, id) {
 708                 const struct error_phase *p;
 709                 int err = 0;
 710
 711                 st_engine_heartbeat_disable(engine);
 712
 713                 for (p = phases; p->error[0] != GOOD; p++) {
 714                         struct i915_request *client[ARRAY_SIZE(phases->error)];
 715                         u32 *cs;
 716                         int i;
 717
 718                         memset(client, 0, sizeof(*client));
 719                         for (i = 0; i < ARRAY_SIZE(client); i++) {
 720                                 struct intel_context *ce;
 721                                 struct i915_request *rq;
 722
 723                                 ce = intel_context_create(engine);
 724                                 if (IS_ERR(ce)) {
 725                                         err = PTR_ERR(ce);
 726                                         goto out;
 727                                 }
 728
 729                                 rq = intel_context_create_request(ce);
 730                                 intel_context_put(ce);
 731                                 if (IS_ERR(rq)) {
 732                                         err = PTR_ERR(rq);
 733                                         goto out;
 734                                 }
 735
 736                                 if (rq->engine->emit_init_breadcrumb) {
 737                                         err = rq->engine->emit_init_breadcrumb(rq);
 738                                         if (err) {
 739                                                 i915_request_add(rq);
 740                                                 goto out;
 741                                         }
 742                                 }
 743
 744                                 cs = intel_ring_begin(rq, 2);
 745                                 if (IS_ERR(cs)) {
 746                                         i915_request_add(rq);
 747                                         err = PTR_ERR(cs);
 748                                         goto out;
 749                                 }
 750
 751                                 if (p->error[i]) {
 752                                         *cs++ = 0xdeadbeef;
 753                                         *cs++ = 0xdeadbeef;
 754                                 } else {
 755                                         *cs++ = MI_NOOP;
 756                                         *cs++ = MI_NOOP;
 757                                 }
 758
 759                                 client[i] = i915_request_get(rq);
 760                                 i915_request_add(rq);
 761                         }
 762
 763                         err = wait_for_submit(engine, client[0], HZ / 2);
 764                         if (err) {
 765                                 pr_err("%s: first request did not start within time!\n",
 766                                        engine->name);
 767                                 err = -ETIME;
 768                                 goto out;
 769                         }
 770
 771                         for (i = 0; i < ARRAY_SIZE(client); i++) {
 772                                 if (i915_request_wait(client[i], 0, HZ / 5) < 0)
 773                                         pr_debug("%s: %s request incomplete!\n",
 774                                                  engine->name,
 775                                                  error_repr(p->error[i]));
 776
 777                                 if (!i915_request_started(client[i])) {
 778                                         pr_err("%s: %s request not started!\n",
 779                                                engine->name,
 780                                                error_repr(p->error[i]));
 781                                         err = -ETIME;
 782                                         goto out;
 783                                 }
 784
 785                                 /* Kick the tasklet to process the error */
 786                                 intel_engine_flush_submission(engine);
 787                                 if (client[i]->fence.error != p->error[i]) {
 788                                         pr_err("%s: %s request (%s) with wrong error code: %d\n",
 789                                                engine->name,
 790                                                error_repr(p->error[i]),
 791                                                i915_request_completed(client[i]) ? "completed" : "running",
 792                                                client[i]->fence.error);
 793                                         err = -EINVAL;
 794                                         goto out;
 795                                 }
 796                         }
 797
 798 out:
 799                         for (i = 0; i < ARRAY_SIZE(client); i++)
 800                                 if (client[i])
 801                                         i915_request_put(client[i]);
 802                         if (err) {
 803                                 pr_err("%s: failed at phase[%zd] { %d, %d }\n",
 804                                        engine->name, p - phases,
 805                                        p->error[0], p->error[1]);
 806                                 break;
 807                         }
 808                 }
 809
 810                 st_engine_heartbeat_enable(engine);
 811                 if (err) {
 812                         intel_gt_set_wedged(gt);
 813                         return err;
 814                 }
 815         }
 816
 817         return 0;
 818 }
 819
 820 static int
 821 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
 822 {
 823         u32 *cs;
 824
 825         cs = intel_ring_begin(rq, 10);
 826         if (IS_ERR(cs))
 827                 return PTR_ERR(cs);
 828
 829         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
 830
 831         *cs++ = MI_SEMAPHORE_WAIT |
 832                 MI_SEMAPHORE_GLOBAL_GTT |
 833                 MI_SEMAPHORE_POLL |
 834                 MI_SEMAPHORE_SAD_NEQ_SDD;
 835         *cs++ = 0;
 836         *cs++ = i915_ggtt_offset(vma) + 4 * idx;
 837         *cs++ = 0;
 838
 839         if (idx > 0) {
 840                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 841                 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
 842                 *cs++ = 0;
 843                 *cs++ = 1;
 844         } else {
 845                 *cs++ = MI_NOOP;
 846                 *cs++ = MI_NOOP;
 847                 *cs++ = MI_NOOP;
 848                 *cs++ = MI_NOOP;
 849         }
 850
 851         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
 852
 853         intel_ring_advance(rq, cs);
 854         return 0;
 855 }
 856
 857 static struct i915_request *
 858 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
 859 {
 860         struct intel_context *ce;
 861         struct i915_request *rq;
 862         int err;
 863
 864         ce = intel_context_create(engine);
 865         if (IS_ERR(ce))
 866                 return ERR_CAST(ce);
 867
 868         rq = intel_context_create_request(ce);
 869         if (IS_ERR(rq))
 870                 goto out_ce;
 871
 872         err = 0;
 873         if (rq->engine->emit_init_breadcrumb)
 874                 err = rq->engine->emit_init_breadcrumb(rq);
 875         if (err == 0)
 876                 err = emit_semaphore_chain(rq, vma, idx);
 877         if (err == 0)
 878                 i915_request_get(rq);
 879         i915_request_add(rq);
 880         if (err)
 881                 rq = ERR_PTR(err);
 882
 883 out_ce:
 884         intel_context_put(ce);
 885         return rq;
 886 }
 887
 888 static int
 889 release_queue(struct intel_engine_cs *engine,
 890               struct i915_vma *vma,
 891               int idx, int prio)
 892 {
 893         struct i915_sched_attr attr = {
 894                 .priority = prio,
 895         };
 896         struct i915_request *rq;
 897         u32 *cs;
 898
 899         rq = intel_engine_create_kernel_request(engine);
 900         if (IS_ERR(rq))
 901                 return PTR_ERR(rq);
 902
 903         cs = intel_ring_begin(rq, 4);
 904         if (IS_ERR(cs)) {
 905                 i915_request_add(rq);
 906                 return PTR_ERR(cs);
 907         }
 908
 909         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 910         *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
 911         *cs++ = 0;
 912         *cs++ = 1;
 913
 914         intel_ring_advance(rq, cs);
 915
 916         i915_request_get(rq);
 917         i915_request_add(rq);
 918
 919         local_bh_disable();
 920         engine->sched_engine->schedule(rq, &attr);
 921         local_bh_enable(); /* kick tasklet */
 922
 923         i915_request_put(rq);
 924
 925         return 0;
 926 }
 927
 928 static int
 929 slice_semaphore_queue(struct intel_engine_cs *outer,
 930                       struct i915_vma *vma,
 931                       int count)
 932 {
 933         struct intel_engine_cs *engine;
 934         struct i915_request *head;
 935         enum intel_engine_id id;
 936         int err, i, n = 0;
 937
 938         head = semaphore_queue(outer, vma, n++);
 939         if (IS_ERR(head))
 940                 return PTR_ERR(head);
 941
 942         for_each_engine(engine, outer->gt, id) {
 943                 if (!intel_engine_has_preemption(engine))
 944                         continue;
 945
 946                 for (i = 0; i < count; i++) {
 947                         struct i915_request *rq;
 948
 949                         rq = semaphore_queue(engine, vma, n++);
 950                         if (IS_ERR(rq)) {
 951                                 err = PTR_ERR(rq);
 952                                 goto out;
 953                         }
 954
 955                         i915_request_put(rq);
 956                 }
 957         }
 958
 959         err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER);
 960         if (err)
 961                 goto out;
 962
 963         if (i915_request_wait(head, 0,
 964                               2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) {
 965                 pr_err("%s: Failed to slice along semaphore chain of length (%d, %d)!\n",
 966                        outer->name, count, n);
 967                 GEM_TRACE_DUMP();
 968                 intel_gt_set_wedged(outer->gt);
 969                 err = -EIO;
 970         }
 971
 972 out:
 973         i915_request_put(head);
 974         return err;
 975 }
 976
 977 static int live_timeslice_preempt(void *arg)
 978 {
 979         struct intel_gt *gt = arg;
 980         struct drm_i915_gem_object *obj;
 981         struct intel_engine_cs *engine;
 982         enum intel_engine_id id;
 983         struct i915_vma *vma;
 984         void *vaddr;
 985         int err = 0;
 986
 987         /*
 988          * If a request takes too long, we would like to give other users
 989          * a fair go on the GPU. In particular, users may create batches
 990          * that wait upon external input, where that input may even be
 991          * supplied by another GPU job. To avoid blocking forever, we
 992          * need to preempt the current task and replace it with another
 993          * ready task.
 994          */
 995         if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
 996                 return 0;
 997
 998         obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
 999         if (IS_ERR(obj))
1000                 return PTR_ERR(obj);
1001
1002         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1003         if (IS_ERR(vma)) {
1004                 err = PTR_ERR(vma);
1005                 goto err_obj;
1006         }
1007
1008         vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1009         if (IS_ERR(vaddr)) {
1010                 err = PTR_ERR(vaddr);
1011                 goto err_obj;
1012         }
1013
1014         err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1015         if (err)
1016                 goto err_map;
1017
1018         err = i915_vma_sync(vma);
1019         if (err)
1020                 goto err_pin;
1021
1022         for_each_engine(engine, gt, id) {
1023                 if (!intel_engine_has_preemption(engine))
1024                         continue;
1025
1026                 memset(vaddr, 0, PAGE_SIZE);
1027
1028                 st_engine_heartbeat_disable(engine);
1029                 err = slice_semaphore_queue(engine, vma, 5);
1030                 st_engine_heartbeat_enable(engine);
1031                 if (err)
1032                         goto err_pin;
1033
1034                 if (igt_flush_test(gt->i915)) {
1035                         err = -EIO;
1036                         goto err_pin;
1037                 }
1038         }
1039
1040 err_pin:
1041         i915_vma_unpin(vma);
1042 err_map:
1043         i915_gem_object_unpin_map(obj);
1044 err_obj:
1045         i915_gem_object_put(obj);
1046         return err;
1047 }
1048
1049 static struct i915_request *
1050 create_rewinder(struct intel_context *ce,
1051                 struct i915_request *wait,
1052                 void *slot, int idx)
1053 {
1054         const u32 offset =
1055                 i915_ggtt_offset(ce->engine->status_page.vma) +
1056                 offset_in_page(slot);
1057         struct i915_request *rq;
1058         u32 *cs;
1059         int err;
1060
1061         rq = intel_context_create_request(ce);
1062         if (IS_ERR(rq))
1063                 return rq;
1064
1065         if (wait) {
1066                 err = i915_request_await_dma_fence(rq, &wait->fence);
1067                 if (err)
1068                         goto err;
1069         }
1070
1071         cs = intel_ring_begin(rq, 14);
1072         if (IS_ERR(cs)) {
1073                 err = PTR_ERR(cs);
1074                 goto err;
1075         }
1076
1077         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1078         *cs++ = MI_NOOP;
1079
1080         *cs++ = MI_SEMAPHORE_WAIT |
1081                 MI_SEMAPHORE_GLOBAL_GTT |
1082                 MI_SEMAPHORE_POLL |
1083                 MI_SEMAPHORE_SAD_GTE_SDD;
1084         *cs++ = idx;
1085         *cs++ = offset;
1086         *cs++ = 0;
1087
1088         *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
1089         *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
1090         *cs++ = offset + idx * sizeof(u32);
1091         *cs++ = 0;
1092
1093         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1094         *cs++ = offset;
1095         *cs++ = 0;
1096         *cs++ = idx + 1;
1097
1098         intel_ring_advance(rq, cs);
1099
1100         err = 0;
1101 err:
1102         i915_request_get(rq);
1103         i915_request_add(rq);
1104         if (err) {
1105                 i915_request_put(rq);
1106                 return ERR_PTR(err);
1107         }
1108
1109         return rq;
1110 }
1111
1112 static int live_timeslice_rewind(void *arg)
1113 {
1114         struct intel_gt *gt = arg;
1115         struct intel_engine_cs *engine;
1116         enum intel_engine_id id;
1117
1118         /*
1119          * The usual presumption on timeslice expiration is that we replace
1120          * the active context with another. However, given a chain of
1121          * dependencies we may end up with replacing the context with itself,
1122          * but only a few of those requests, forcing us to rewind the
1123          * RING_TAIL of the original request.
1124          */
1125         if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1126                 return 0;
1127
1128         for_each_engine(engine, gt, id) {
1129                 enum { A1, A2, B1 };
1130                 enum { X = 1, Z, Y };
1131                 struct i915_request *rq[3] = {};
1132                 struct intel_context *ce;
1133                 unsigned long timeslice;
1134                 int i, err = 0;
1135                 u32 *slot;
1136
1137                 if (!intel_engine_has_timeslices(engine))
1138                         continue;
1139
1140                 /*
1141                  * A:rq1 -- semaphore wait, timestamp X
1142                  * A:rq2 -- write timestamp Y
1143                  *
1144                  * B:rq1 [await A:rq1] -- write timestamp Z
1145                  *
1146                  * Force timeslice, release semaphore.
1147                  *
1148                  * Expect execution/evaluation order XZY
1149                  */
1150
1151                 st_engine_heartbeat_disable(engine);
1152                 timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1153
1154                 slot = memset32(engine->status_page.addr + 1000, 0, 4);
1155
1156                 ce = intel_context_create(engine);
1157                 if (IS_ERR(ce)) {
1158                         err = PTR_ERR(ce);
1159                         goto err;
1160                 }
1161
1162                 rq[A1] = create_rewinder(ce, NULL, slot, X);
1163                 if (IS_ERR(rq[A1])) {
1164                         intel_context_put(ce);
1165                         goto err;
1166                 }
1167
1168                 rq[A2] = create_rewinder(ce, NULL, slot, Y);
1169                 intel_context_put(ce);
1170                 if (IS_ERR(rq[A2]))
1171                         goto err;
1172
1173                 err = wait_for_submit(engine, rq[A2], HZ / 2);
1174                 if (err) {
1175                         pr_err("%s: failed to submit first context\n",
1176                                engine->name);
1177                         goto err;
1178                 }
1179
1180                 ce = intel_context_create(engine);
1181                 if (IS_ERR(ce)) {
1182                         err = PTR_ERR(ce);
1183                         goto err;
1184                 }
1185
1186                 rq[B1] = create_rewinder(ce, rq[A1], slot, Z);
1187                 intel_context_put(ce);
1188                 if (IS_ERR(rq[2]))
1189                         goto err;
1190
1191                 err = wait_for_submit(engine, rq[B1], HZ / 2);
1192                 if (err) {
1193                         pr_err("%s: failed to submit second context\n",
1194                                engine->name);
1195                         goto err;
1196                 }
1197
1198                 /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
1199                 ENGINE_TRACE(engine, "forcing tasklet for rewind\n");
1200                 while (i915_request_is_active(rq[A2])) { /* semaphore yield! */
1201                         /* Wait for the timeslice to kick in */
1202                         del_timer(&engine->execlists.timer);
1203                         tasklet_hi_schedule(&engine->sched_engine->tasklet);
1204                         intel_engine_flush_submission(engine);
1205                 }
1206                 /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
1207                 GEM_BUG_ON(!i915_request_is_active(rq[A1]));
1208                 GEM_BUG_ON(!i915_request_is_active(rq[B1]));
1209                 GEM_BUG_ON(i915_request_is_active(rq[A2]));
1210
1211                 /* Release the hounds! */
1212                 slot[0] = 1;
1213                 wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
1214
1215                 for (i = 1; i <= 3; i++) {
1216                         unsigned long timeout = jiffies + HZ / 2;
1217
1218                         while (!READ_ONCE(slot[i]) &&
1219                                time_before(jiffies, timeout))
1220                                 ;
1221
1222                         if (!time_before(jiffies, timeout)) {
1223                                 pr_err("%s: rq[%d] timed out\n",
1224                                        engine->name, i - 1);
1225                                 err = -ETIME;
1226                                 goto err;
1227                         }
1228
1229                         pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
1230                 }
1231
1232                 /* XZY: XZ < XY */
1233                 if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
1234                         pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
1235                                engine->name,
1236                                slot[Z] - slot[X],
1237                                slot[Y] - slot[X]);
1238                         err = -EINVAL;
1239                 }
1240
1241 err:
1242                 memset32(&slot[0], -1, 4);
1243                 wmb();
1244
1245                 engine->props.timeslice_duration_ms = timeslice;
1246                 st_engine_heartbeat_enable(engine);
1247                 for (i = 0; i < 3; i++)
1248                         i915_request_put(rq[i]);
1249                 if (igt_flush_test(gt->i915))
1250                         err = -EIO;
1251                 if (err)
1252                         return err;
1253         }
1254
1255         return 0;
1256 }
1257
1258 static struct i915_request *nop_request(struct intel_engine_cs *engine)
1259 {
1260         struct i915_request *rq;
1261
1262         rq = intel_engine_create_kernel_request(engine);
1263         if (IS_ERR(rq))
1264                 return rq;
1265
1266         i915_request_get(rq);
1267         i915_request_add(rq);
1268
1269         return rq;
1270 }
1271
1272 static long slice_timeout(struct intel_engine_cs *engine)
1273 {
1274         long timeout;
1275
1276         /* Enough time for a timeslice to kick in, and kick out */
1277         timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine));
1278
1279         /* Enough time for the nop request to complete */
1280         timeout += HZ / 5;
1281
1282         return timeout + 1;
1283 }
1284
1285 static int live_timeslice_queue(void *arg)
1286 {
1287         struct intel_gt *gt = arg;
1288         struct drm_i915_gem_object *obj;
1289         struct intel_engine_cs *engine;
1290         enum intel_engine_id id;
1291         struct i915_vma *vma;
1292         void *vaddr;
1293         int err = 0;
1294
1295         /*
1296          * Make sure that even if ELSP[0] and ELSP[1] are filled with
1297          * timeslicing between them disabled, we *do* enable timeslicing
1298          * if the queue demands it. (Normally, we do not submit if
1299          * ELSP[1] is already occupied, so must rely on timeslicing to
1300          * eject ELSP[0] in favour of the queue.)
1301          */
1302         if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1303                 return 0;
1304
1305         obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1306         if (IS_ERR(obj))
1307                 return PTR_ERR(obj);
1308
1309         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1310         if (IS_ERR(vma)) {
1311                 err = PTR_ERR(vma);
1312                 goto err_obj;
1313         }
1314
1315         vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1316         if (IS_ERR(vaddr)) {
1317                 err = PTR_ERR(vaddr);
1318                 goto err_obj;
1319         }
1320
1321         err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1322         if (err)
1323                 goto err_map;
1324
1325         err = i915_vma_sync(vma);
1326         if (err)
1327                 goto err_pin;
1328
1329         for_each_engine(engine, gt, id) {
1330                 struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
1331                 struct i915_request *rq, *nop;
1332
1333                 if (!intel_engine_has_preemption(engine))
1334                         continue;
1335
1336                 st_engine_heartbeat_disable(engine);
1337                 memset(vaddr, 0, PAGE_SIZE);
1338
1339                 /* ELSP[0]: semaphore wait */
1340                 rq = semaphore_queue(engine, vma, 0);
1341                 if (IS_ERR(rq)) {
1342                         err = PTR_ERR(rq);
1343                         goto err_heartbeat;
1344                 }
1345                 engine->sched_engine->schedule(rq, &attr);
1346                 err = wait_for_submit(engine, rq, HZ / 2);
1347                 if (err) {
1348                         pr_err("%s: Timed out trying to submit semaphores\n",
1349                                engine->name);
1350                         goto err_rq;
1351                 }
1352
1353                 /* ELSP[1]: nop request */
1354                 nop = nop_request(engine);
1355                 if (IS_ERR(nop)) {
1356                         err = PTR_ERR(nop);
1357                         goto err_rq;
1358                 }
1359                 err = wait_for_submit(engine, nop, HZ / 2);
1360                 i915_request_put(nop);
1361                 if (err) {
1362                         pr_err("%s: Timed out trying to submit nop\n",
1363                                engine->name);
1364                         goto err_rq;
1365                 }
1366
1367                 GEM_BUG_ON(i915_request_completed(rq));
1368                 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
1369
1370                 /* Queue: semaphore signal, matching priority as semaphore */
1371                 err = release_queue(engine, vma, 1, effective_prio(rq));
1372                 if (err)
1373                         goto err_rq;
1374
1375                 /* Wait until we ack the release_queue and start timeslicing */
1376                 do {
1377                         cond_resched();
1378                         intel_engine_flush_submission(engine);
1379                 } while (READ_ONCE(engine->execlists.pending[0]));
1380
1381                 /* Timeslice every jiffy, so within 2 we should signal */
1382                 if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) {
1383                         struct drm_printer p =
1384                                 drm_info_printer(gt->i915->drm.dev);
1385
1386                         pr_err("%s: Failed to timeslice into queue\n",
1387                                engine->name);
1388                         intel_engine_dump(engine, &p,
1389                                           "%s\n", engine->name);
1390
1391                         memset(vaddr, 0xff, PAGE_SIZE);
1392                         err = -EIO;
1393                 }
1394 err_rq:
1395                 i915_request_put(rq);
1396 err_heartbeat:
1397                 st_engine_heartbeat_enable(engine);
1398                 if (err)
1399                         break;
1400         }
1401
1402 err_pin:
1403         i915_vma_unpin(vma);
1404 err_map:
1405         i915_gem_object_unpin_map(obj);
1406 err_obj:
1407         i915_gem_object_put(obj);
1408         return err;
1409 }
1410
1411 static int live_timeslice_nopreempt(void *arg)
1412 {
1413         struct intel_gt *gt = arg;
1414         struct intel_engine_cs *engine;
1415         enum intel_engine_id id;
1416         struct igt_spinner spin;
1417         int err = 0;
1418
1419         /*
1420          * We should not timeslice into a request that is marked with
1421          * I915_REQUEST_NOPREEMPT.
1422          */
1423         if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1424                 return 0;
1425
1426         if (igt_spinner_init(&spin, gt))
1427                 return -ENOMEM;
1428
1429         for_each_engine(engine, gt, id) {
1430                 struct intel_context *ce;
1431                 struct i915_request *rq;
1432                 unsigned long timeslice;
1433
1434                 if (!intel_engine_has_preemption(engine))
1435                         continue;
1436
1437                 ce = intel_context_create(engine);
1438                 if (IS_ERR(ce)) {
1439                         err = PTR_ERR(ce);
1440                         break;
1441                 }
1442
1443                 st_engine_heartbeat_disable(engine);
1444                 timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1445
1446                 /* Create an unpreemptible spinner */
1447
1448                 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
1449                 intel_context_put(ce);
1450                 if (IS_ERR(rq)) {
1451                         err = PTR_ERR(rq);
1452                         goto out_heartbeat;
1453                 }
1454
1455                 i915_request_get(rq);
1456                 i915_request_add(rq);
1457
1458                 if (!igt_wait_for_spinner(&spin, rq)) {
1459                         i915_request_put(rq);
1460                         err = -ETIME;
1461                         goto out_spin;
1462                 }
1463
1464                 set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
1465                 i915_request_put(rq);
1466
1467                 /* Followed by a maximum priority barrier (heartbeat) */
1468
1469                 ce = intel_context_create(engine);
1470                 if (IS_ERR(ce)) {
1471                         err = PTR_ERR(ce);
1472                         goto out_spin;
1473                 }
1474
1475                 rq = intel_context_create_request(ce);
1476                 intel_context_put(ce);
1477                 if (IS_ERR(rq)) {
1478                         err = PTR_ERR(rq);
1479                         goto out_spin;
1480                 }
1481
1482                 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1483                 i915_request_get(rq);
1484                 i915_request_add(rq);
1485
1486                 /*
1487                  * Wait until the barrier is in ELSP, and we know timeslicing
1488                  * will have been activated.
1489                  */
1490                 if (wait_for_submit(engine, rq, HZ / 2)) {
1491                         i915_request_put(rq);
1492                         err = -ETIME;
1493                         goto out_spin;
1494                 }
1495
1496                 /*
1497                  * Since the ELSP[0] request is unpreemptible, it should not
1498                  * allow the maximum priority barrier through. Wait long
1499                  * enough to see if it is timesliced in by mistake.
1500                  */
1501                 if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) {
1502                         pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n",
1503                                engine->name);
1504                         err = -EINVAL;
1505                 }
1506                 i915_request_put(rq);
1507
1508 out_spin:
1509                 igt_spinner_end(&spin);
1510 out_heartbeat:
1511                 xchg(&engine->props.timeslice_duration_ms, timeslice);
1512                 st_engine_heartbeat_enable(engine);
1513                 if (err)
1514                         break;
1515
1516                 if (igt_flush_test(gt->i915)) {
1517                         err = -EIO;
1518                         break;
1519                 }
1520         }
1521
1522         igt_spinner_fini(&spin);
1523         return err;
1524 }
1525
1526 static int live_busywait_preempt(void *arg)
1527 {
1528         struct intel_gt *gt = arg;
1529         struct i915_gem_context *ctx_hi, *ctx_lo;
1530         struct intel_engine_cs *engine;
1531         struct drm_i915_gem_object *obj;
1532         struct i915_vma *vma;
1533         enum intel_engine_id id;
1534         int err = -ENOMEM;
1535         u32 *map;
1536
1537         /*
1538          * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
1539          * preempt the busywaits used to synchronise between rings.
1540          */
1541
1542         ctx_hi = kernel_context(gt->i915);
1543         if (!ctx_hi)
1544                 return -ENOMEM;
1545         ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
1546
1547         ctx_lo = kernel_context(gt->i915);
1548         if (!ctx_lo)
1549                 goto err_ctx_hi;
1550         ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
1551
1552         obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1553         if (IS_ERR(obj)) {
1554                 err = PTR_ERR(obj);
1555                 goto err_ctx_lo;
1556         }
1557
1558         map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1559         if (IS_ERR(map)) {
1560                 err = PTR_ERR(map);
1561                 goto err_obj;
1562         }
1563
1564         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1565         if (IS_ERR(vma)) {
1566                 err = PTR_ERR(vma);
1567                 goto err_map;
1568         }
1569
1570         err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1571         if (err)
1572                 goto err_map;
1573
1574         err = i915_vma_sync(vma);
1575         if (err)
1576                 goto err_vma;
1577
1578         for_each_engine(engine, gt, id) {
1579                 struct i915_request *lo, *hi;
1580                 struct igt_live_test t;
1581                 u32 *cs;
1582
1583                 if (!intel_engine_has_preemption(engine))
1584                         continue;
1585
1586                 if (!intel_engine_can_store_dword(engine))
1587                         continue;
1588
1589                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1590                         err = -EIO;
1591                         goto err_vma;
1592                 }
1593
1594                 /*
1595                  * We create two requests. The low priority request
1596                  * busywaits on a semaphore (inside the ringbuffer where
1597                  * is should be preemptible) and the high priority requests
1598                  * uses a MI_STORE_DWORD_IMM to update the semaphore value
1599                  * allowing the first request to complete. If preemption
1600                  * fails, we hang instead.
1601                  */
1602
1603                 lo = igt_request_alloc(ctx_lo, engine);
1604                 if (IS_ERR(lo)) {
1605                         err = PTR_ERR(lo);
1606                         goto err_vma;
1607                 }
1608
1609                 cs = intel_ring_begin(lo, 8);
1610                 if (IS_ERR(cs)) {
1611                         err = PTR_ERR(cs);
1612                         i915_request_add(lo);
1613                         goto err_vma;
1614                 }
1615
1616                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1617                 *cs++ = i915_ggtt_offset(vma);
1618                 *cs++ = 0;
1619                 *cs++ = 1;
1620
1621                 /* XXX Do we need a flush + invalidate here? */
1622
1623                 *cs++ = MI_SEMAPHORE_WAIT |
1624                         MI_SEMAPHORE_GLOBAL_GTT |
1625                         MI_SEMAPHORE_POLL |
1626                         MI_SEMAPHORE_SAD_EQ_SDD;
1627                 *cs++ = 0;
1628                 *cs++ = i915_ggtt_offset(vma);
1629                 *cs++ = 0;
1630
1631                 intel_ring_advance(lo, cs);
1632
1633                 i915_request_get(lo);
1634                 i915_request_add(lo);
1635
1636                 if (wait_for(READ_ONCE(*map), 10)) {
1637                         i915_request_put(lo);
1638                         err = -ETIMEDOUT;
1639                         goto err_vma;
1640                 }
1641
1642                 /* Low priority request should be busywaiting now */
1643                 if (i915_request_wait(lo, 0, 1) != -ETIME) {
1644                         i915_request_put(lo);
1645                         pr_err("%s: Busywaiting request did not!\n",
1646                                engine->name);
1647                         err = -EIO;
1648                         goto err_vma;
1649                 }
1650
1651                 hi = igt_request_alloc(ctx_hi, engine);
1652                 if (IS_ERR(hi)) {
1653                         err = PTR_ERR(hi);
1654                         i915_request_put(lo);
1655                         goto err_vma;
1656                 }
1657
1658                 cs = intel_ring_begin(hi, 4);
1659                 if (IS_ERR(cs)) {
1660                         err = PTR_ERR(cs);
1661                         i915_request_add(hi);
1662                         i915_request_put(lo);
1663                         goto err_vma;
1664                 }
1665
1666                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1667                 *cs++ = i915_ggtt_offset(vma);
1668                 *cs++ = 0;
1669                 *cs++ = 0;
1670
1671                 intel_ring_advance(hi, cs);
1672                 i915_request_add(hi);
1673
1674                 if (i915_request_wait(lo, 0, HZ / 5) < 0) {
1675                         struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1676
1677                         pr_err("%s: Failed to preempt semaphore busywait!\n",
1678                                engine->name);
1679
1680                         intel_engine_dump(engine, &p, "%s\n", engine->name);
1681                         GEM_TRACE_DUMP();
1682
1683                         i915_request_put(lo);
1684                         intel_gt_set_wedged(gt);
1685                         err = -EIO;
1686                         goto err_vma;
1687                 }
1688                 GEM_BUG_ON(READ_ONCE(*map));
1689                 i915_request_put(lo);
1690
1691                 if (igt_live_test_end(&t)) {
1692                         err = -EIO;
1693                         goto err_vma;
1694                 }
1695         }
1696
1697         err = 0;
1698 err_vma:
1699         i915_vma_unpin(vma);
1700 err_map:
1701         i915_gem_object_unpin_map(obj);
1702 err_obj:
1703         i915_gem_object_put(obj);
1704 err_ctx_lo:
1705         kernel_context_close(ctx_lo);
1706 err_ctx_hi:
1707         kernel_context_close(ctx_hi);
1708         return err;
1709 }
1710
1711 static struct i915_request *
1712 spinner_create_request(struct igt_spinner *spin,
1713                        struct i915_gem_context *ctx,
1714                        struct intel_engine_cs *engine,
1715                        u32 arb)
1716 {
1717         struct intel_context *ce;
1718         struct i915_request *rq;
1719
1720         ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
1721         if (IS_ERR(ce))
1722                 return ERR_CAST(ce);
1723
1724         rq = igt_spinner_create_request(spin, ce, arb);
1725         intel_context_put(ce);
1726         return rq;
1727 }
1728
1729 static int live_preempt(void *arg)
1730 {
1731         struct intel_gt *gt = arg;
1732         struct i915_gem_context *ctx_hi, *ctx_lo;
1733         struct igt_spinner spin_hi, spin_lo;
1734         struct intel_engine_cs *engine;
1735         enum intel_engine_id id;
1736         int err = -ENOMEM;
1737
1738         if (igt_spinner_init(&spin_hi, gt))
1739                 return -ENOMEM;
1740
1741         if (igt_spinner_init(&spin_lo, gt))
1742                 goto err_spin_hi;
1743
1744         ctx_hi = kernel_context(gt->i915);
1745         if (!ctx_hi)
1746                 goto err_spin_lo;
1747         ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
1748
1749         ctx_lo = kernel_context(gt->i915);
1750         if (!ctx_lo)
1751                 goto err_ctx_hi;
1752         ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
1753
1754         for_each_engine(engine, gt, id) {
1755                 struct igt_live_test t;
1756                 struct i915_request *rq;
1757
1758                 if (!intel_engine_has_preemption(engine))
1759                         continue;
1760
1761                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1762                         err = -EIO;
1763                         goto err_ctx_lo;
1764                 }
1765
1766                 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1767                                             MI_ARB_CHECK);
1768                 if (IS_ERR(rq)) {
1769                         err = PTR_ERR(rq);
1770                         goto err_ctx_lo;
1771                 }
1772
1773                 i915_request_add(rq);
1774                 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1775                         GEM_TRACE("lo spinner failed to start\n");
1776                         GEM_TRACE_DUMP();
1777                         intel_gt_set_wedged(gt);
1778                         err = -EIO;
1779                         goto err_ctx_lo;
1780                 }
1781
1782                 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1783                                             MI_ARB_CHECK);
1784                 if (IS_ERR(rq)) {
1785                         igt_spinner_end(&spin_lo);
1786                         err = PTR_ERR(rq);
1787                         goto err_ctx_lo;
1788                 }
1789
1790                 i915_request_add(rq);
1791                 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1792                         GEM_TRACE("hi spinner failed to start\n");
1793                         GEM_TRACE_DUMP();
1794                         intel_gt_set_wedged(gt);
1795                         err = -EIO;
1796                         goto err_ctx_lo;
1797                 }
1798
1799                 igt_spinner_end(&spin_hi);
1800                 igt_spinner_end(&spin_lo);
1801
1802                 if (igt_live_test_end(&t)) {
1803                         err = -EIO;
1804                         goto err_ctx_lo;
1805                 }
1806         }
1807
1808         err = 0;
1809 err_ctx_lo:
1810         kernel_context_close(ctx_lo);
1811 err_ctx_hi:
1812         kernel_context_close(ctx_hi);
1813 err_spin_lo:
1814         igt_spinner_fini(&spin_lo);
1815 err_spin_hi:
1816         igt_spinner_fini(&spin_hi);
1817         return err;
1818 }
1819
1820 static int live_late_preempt(void *arg)
1821 {
1822         struct intel_gt *gt = arg;
1823         struct i915_gem_context *ctx_hi, *ctx_lo;
1824         struct igt_spinner spin_hi, spin_lo;
1825         struct intel_engine_cs *engine;
1826         struct i915_sched_attr attr = {};
1827         enum intel_engine_id id;
1828         int err = -ENOMEM;
1829
1830         if (igt_spinner_init(&spin_hi, gt))
1831                 return -ENOMEM;
1832
1833         if (igt_spinner_init(&spin_lo, gt))
1834                 goto err_spin_hi;
1835
1836         ctx_hi = kernel_context(gt->i915);
1837         if (!ctx_hi)
1838                 goto err_spin_lo;
1839
1840         ctx_lo = kernel_context(gt->i915);
1841         if (!ctx_lo)
1842                 goto err_ctx_hi;
1843
1844         /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1845         ctx_lo->sched.priority = 1;
1846
1847         for_each_engine(engine, gt, id) {
1848                 struct igt_live_test t;
1849                 struct i915_request *rq;
1850
1851                 if (!intel_engine_has_preemption(engine))
1852                         continue;
1853
1854                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1855                         err = -EIO;
1856                         goto err_ctx_lo;
1857                 }
1858
1859                 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1860                                             MI_ARB_CHECK);
1861                 if (IS_ERR(rq)) {
1862                         err = PTR_ERR(rq);
1863                         goto err_ctx_lo;
1864                 }
1865
1866                 i915_request_add(rq);
1867                 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1868                         pr_err("First context failed to start\n");
1869                         goto err_wedged;
1870                 }
1871
1872                 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1873                                             MI_NOOP);
1874                 if (IS_ERR(rq)) {
1875                         igt_spinner_end(&spin_lo);
1876                         err = PTR_ERR(rq);
1877                         goto err_ctx_lo;
1878                 }
1879
1880                 i915_request_add(rq);
1881                 if (igt_wait_for_spinner(&spin_hi, rq)) {
1882                         pr_err("Second context overtook first?\n");
1883                         goto err_wedged;
1884                 }
1885
1886                 attr.priority = I915_PRIORITY_MAX;
1887                 engine->sched_engine->schedule(rq, &attr);
1888
1889                 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1890                         pr_err("High priority context failed to preempt the low priority context\n");
1891                         GEM_TRACE_DUMP();
1892                         goto err_wedged;
1893                 }
1894
1895                 igt_spinner_end(&spin_hi);
1896                 igt_spinner_end(&spin_lo);
1897
1898                 if (igt_live_test_end(&t)) {
1899                         err = -EIO;
1900                         goto err_ctx_lo;
1901                 }
1902         }
1903
1904         err = 0;
1905 err_ctx_lo:
1906         kernel_context_close(ctx_lo);
1907 err_ctx_hi:
1908         kernel_context_close(ctx_hi);
1909 err_spin_lo:
1910         igt_spinner_fini(&spin_lo);
1911 err_spin_hi:
1912         igt_spinner_fini(&spin_hi);
1913         return err;
1914
1915 err_wedged:
1916         igt_spinner_end(&spin_hi);
1917         igt_spinner_end(&spin_lo);
1918         intel_gt_set_wedged(gt);
1919         err = -EIO;
1920         goto err_ctx_lo;
1921 }
1922
1923 struct preempt_client {
1924         struct igt_spinner spin;
1925         struct i915_gem_context *ctx;
1926 };
1927
1928 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1929 {
1930         c->ctx = kernel_context(gt->i915);
1931         if (!c->ctx)
1932                 return -ENOMEM;
1933
1934         if (igt_spinner_init(&c->spin, gt))
1935                 goto err_ctx;
1936
1937         return 0;
1938
1939 err_ctx:
1940         kernel_context_close(c->ctx);
1941         return -ENOMEM;
1942 }
1943
1944 static void preempt_client_fini(struct preempt_client *c)
1945 {
1946         igt_spinner_fini(&c->spin);
1947         kernel_context_close(c->ctx);
1948 }
1949
1950 static int live_nopreempt(void *arg)
1951 {
1952         struct intel_gt *gt = arg;
1953         struct intel_engine_cs *engine;
1954         struct preempt_client a, b;
1955         enum intel_engine_id id;
1956         int err = -ENOMEM;
1957
1958         /*
1959          * Verify that we can disable preemption for an individual request
1960          * that may be being observed and not want to be interrupted.
1961          */
1962
1963         if (preempt_client_init(gt, &a))
1964                 return -ENOMEM;
1965         if (preempt_client_init(gt, &b))
1966                 goto err_client_a;
1967         b.ctx->sched.priority = I915_PRIORITY_MAX;
1968
1969         for_each_engine(engine, gt, id) {
1970                 struct i915_request *rq_a, *rq_b;
1971
1972                 if (!intel_engine_has_preemption(engine))
1973                         continue;
1974
1975                 engine->execlists.preempt_hang.count = 0;
1976
1977                 rq_a = spinner_create_request(&a.spin,
1978                                               a.ctx, engine,
1979                                               MI_ARB_CHECK);
1980                 if (IS_ERR(rq_a)) {
1981                         err = PTR_ERR(rq_a);
1982                         goto err_client_b;
1983                 }
1984
1985                 /* Low priority client, but unpreemptable! */
1986                 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
1987
1988                 i915_request_add(rq_a);
1989                 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1990                         pr_err("First client failed to start\n");
1991                         goto err_wedged;
1992                 }
1993
1994                 rq_b = spinner_create_request(&b.spin,
1995                                               b.ctx, engine,
1996                                               MI_ARB_CHECK);
1997                 if (IS_ERR(rq_b)) {
1998                         err = PTR_ERR(rq_b);
1999                         goto err_client_b;
2000                 }
2001
2002                 i915_request_add(rq_b);
2003
2004                 /* B is much more important than A! (But A is unpreemptable.) */
2005                 GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
2006
2007                 /* Wait long enough for preemption and timeslicing */
2008                 if (igt_wait_for_spinner(&b.spin, rq_b)) {
2009                         pr_err("Second client started too early!\n");
2010                         goto err_wedged;
2011                 }
2012
2013                 igt_spinner_end(&a.spin);
2014
2015                 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2016                         pr_err("Second client failed to start\n");
2017                         goto err_wedged;
2018                 }
2019
2020                 igt_spinner_end(&b.spin);
2021
2022                 if (engine->execlists.preempt_hang.count) {
2023                         pr_err("Preemption recorded x%d; should have been suppressed!\n",
2024                                engine->execlists.preempt_hang.count);
2025                         err = -EINVAL;
2026                         goto err_wedged;
2027                 }
2028
2029                 if (igt_flush_test(gt->i915))
2030                         goto err_wedged;
2031         }
2032
2033         err = 0;
2034 err_client_b:
2035         preempt_client_fini(&b);
2036 err_client_a:
2037         preempt_client_fini(&a);
2038         return err;
2039
2040 err_wedged:
2041         igt_spinner_end(&b.spin);
2042         igt_spinner_end(&a.spin);
2043         intel_gt_set_wedged(gt);
2044         err = -EIO;
2045         goto err_client_b;
2046 }
2047
2048 struct live_preempt_cancel {
2049         struct intel_engine_cs *engine;
2050         struct preempt_client a, b;
2051 };
2052
2053 static int __cancel_active0(struct live_preempt_cancel *arg)
2054 {
2055         struct i915_request *rq;
2056         struct igt_live_test t;
2057         int err;
2058
2059         /* Preempt cancel of ELSP0 */
2060         GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2061         if (igt_live_test_begin(&t, arg->engine->i915,
2062                                 __func__, arg->engine->name))
2063                 return -EIO;
2064
2065         rq = spinner_create_request(&arg->a.spin,
2066                                     arg->a.ctx, arg->engine,
2067                                     MI_ARB_CHECK);
2068         if (IS_ERR(rq))
2069                 return PTR_ERR(rq);
2070
2071         clear_bit(CONTEXT_BANNED, &rq->context->flags);
2072         i915_request_get(rq);
2073         i915_request_add(rq);
2074         if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2075                 err = -EIO;
2076                 goto out;
2077         }
2078
2079         intel_context_set_banned(rq->context);
2080         err = intel_engine_pulse(arg->engine);
2081         if (err)
2082                 goto out;
2083
2084         err = wait_for_reset(arg->engine, rq, HZ / 2);
2085         if (err) {
2086                 pr_err("Cancelled inflight0 request did not reset\n");
2087                 goto out;
2088         }
2089
2090 out:
2091         i915_request_put(rq);
2092         if (igt_live_test_end(&t))
2093                 err = -EIO;
2094         return err;
2095 }
2096
2097 static int __cancel_active1(struct live_preempt_cancel *arg)
2098 {
2099         struct i915_request *rq[2] = {};
2100         struct igt_live_test t;
2101         int err;
2102
2103         /* Preempt cancel of ELSP1 */
2104         GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2105         if (igt_live_test_begin(&t, arg->engine->i915,
2106                                 __func__, arg->engine->name))
2107                 return -EIO;
2108
2109         rq[0] = spinner_create_request(&arg->a.spin,
2110                                        arg->a.ctx, arg->engine,
2111                                        MI_NOOP); /* no preemption */
2112         if (IS_ERR(rq[0]))
2113                 return PTR_ERR(rq[0]);
2114
2115         clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2116         i915_request_get(rq[0]);
2117         i915_request_add(rq[0]);
2118         if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2119                 err = -EIO;
2120                 goto out;
2121         }
2122
2123         rq[1] = spinner_create_request(&arg->b.spin,
2124                                        arg->b.ctx, arg->engine,
2125                                        MI_ARB_CHECK);
2126         if (IS_ERR(rq[1])) {
2127                 err = PTR_ERR(rq[1]);
2128                 goto out;
2129         }
2130
2131         clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2132         i915_request_get(rq[1]);
2133         err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2134         i915_request_add(rq[1]);
2135         if (err)
2136                 goto out;
2137
2138         intel_context_set_banned(rq[1]->context);
2139         err = intel_engine_pulse(arg->engine);
2140         if (err)
2141                 goto out;
2142
2143         igt_spinner_end(&arg->a.spin);
2144         err = wait_for_reset(arg->engine, rq[1], HZ / 2);
2145         if (err)
2146                 goto out;
2147
2148         if (rq[0]->fence.error != 0) {
2149                 pr_err("Normal inflight0 request did not complete\n");
2150                 err = -EINVAL;
2151                 goto out;
2152         }
2153
2154         if (rq[1]->fence.error != -EIO) {
2155                 pr_err("Cancelled inflight1 request did not report -EIO\n");
2156                 err = -EINVAL;
2157                 goto out;
2158         }
2159
2160 out:
2161         i915_request_put(rq[1]);
2162         i915_request_put(rq[0]);
2163         if (igt_live_test_end(&t))
2164                 err = -EIO;
2165         return err;
2166 }
2167
2168 static int __cancel_queued(struct live_preempt_cancel *arg)
2169 {
2170         struct i915_request *rq[3] = {};
2171         struct igt_live_test t;
2172         int err;
2173
2174         /* Full ELSP and one in the wings */
2175         GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2176         if (igt_live_test_begin(&t, arg->engine->i915,
2177                                 __func__, arg->engine->name))
2178                 return -EIO;
2179
2180         rq[0] = spinner_create_request(&arg->a.spin,
2181                                        arg->a.ctx, arg->engine,
2182                                        MI_ARB_CHECK);
2183         if (IS_ERR(rq[0]))
2184                 return PTR_ERR(rq[0]);
2185
2186         clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2187         i915_request_get(rq[0]);
2188         i915_request_add(rq[0]);
2189         if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2190                 err = -EIO;
2191                 goto out;
2192         }
2193
2194         rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
2195         if (IS_ERR(rq[1])) {
2196                 err = PTR_ERR(rq[1]);
2197                 goto out;
2198         }
2199
2200         clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2201         i915_request_get(rq[1]);
2202         err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2203         i915_request_add(rq[1]);
2204         if (err)
2205                 goto out;
2206
2207         rq[2] = spinner_create_request(&arg->b.spin,
2208                                        arg->a.ctx, arg->engine,
2209                                        MI_ARB_CHECK);
2210         if (IS_ERR(rq[2])) {
2211                 err = PTR_ERR(rq[2]);
2212                 goto out;
2213         }
2214
2215         i915_request_get(rq[2]);
2216         err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
2217         i915_request_add(rq[2]);
2218         if (err)
2219                 goto out;
2220
2221         intel_context_set_banned(rq[2]->context);
2222         err = intel_engine_pulse(arg->engine);
2223         if (err)
2224                 goto out;
2225
2226         err = wait_for_reset(arg->engine, rq[2], HZ / 2);
2227         if (err)
2228                 goto out;
2229
2230         if (rq[0]->fence.error != -EIO) {
2231                 pr_err("Cancelled inflight0 request did not report -EIO\n");
2232                 err = -EINVAL;
2233                 goto out;
2234         }
2235
2236         if (rq[1]->fence.error != 0) {
2237                 pr_err("Normal inflight1 request did not complete\n");
2238                 err = -EINVAL;
2239                 goto out;
2240         }
2241
2242         if (rq[2]->fence.error != -EIO) {
2243                 pr_err("Cancelled queued request did not report -EIO\n");
2244                 err = -EINVAL;
2245                 goto out;
2246         }
2247
2248 out:
2249         i915_request_put(rq[2]);
2250         i915_request_put(rq[1]);
2251         i915_request_put(rq[0]);
2252         if (igt_live_test_end(&t))
2253                 err = -EIO;
2254         return err;
2255 }
2256
2257 static int __cancel_hostile(struct live_preempt_cancel *arg)
2258 {
2259         struct i915_request *rq;
2260         int err;
2261
2262         /* Preempt cancel non-preemptible spinner in ELSP0 */
2263         if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2264                 return 0;
2265
2266         if (!intel_has_reset_engine(arg->engine->gt))
2267                 return 0;
2268
2269         GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2270         rq = spinner_create_request(&arg->a.spin,
2271                                     arg->a.ctx, arg->engine,
2272                                     MI_NOOP); /* preemption disabled */
2273         if (IS_ERR(rq))
2274                 return PTR_ERR(rq);
2275
2276         clear_bit(CONTEXT_BANNED, &rq->context->flags);
2277         i915_request_get(rq);
2278         i915_request_add(rq);
2279         if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2280                 err = -EIO;
2281                 goto out;
2282         }
2283
2284         intel_context_set_banned(rq->context);
2285         err = intel_engine_pulse(arg->engine); /* force reset */
2286         if (err)
2287                 goto out;
2288
2289         err = wait_for_reset(arg->engine, rq, HZ / 2);
2290         if (err) {
2291                 pr_err("Cancelled inflight0 request did not reset\n");
2292                 goto out;
2293         }
2294
2295 out:
2296         i915_request_put(rq);
2297         if (igt_flush_test(arg->engine->i915))
2298                 err = -EIO;
2299         return err;
2300 }
2301
2302 static void force_reset_timeout(struct intel_engine_cs *engine)
2303 {
2304         engine->reset_timeout.probability = 999;
2305         atomic_set(&engine->reset_timeout.times, -1);
2306 }
2307
2308 static void cancel_reset_timeout(struct intel_engine_cs *engine)
2309 {
2310         memset(&engine->reset_timeout, 0, sizeof(engine->reset_timeout));
2311 }
2312
2313 static int __cancel_fail(struct live_preempt_cancel *arg)
2314 {
2315         struct intel_engine_cs *engine = arg->engine;
2316         struct i915_request *rq;
2317         int err;
2318
2319         if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2320                 return 0;
2321
2322         if (!intel_has_reset_engine(engine->gt))
2323                 return 0;
2324
2325         GEM_TRACE("%s(%s)\n", __func__, engine->name);
2326         rq = spinner_create_request(&arg->a.spin,
2327                                     arg->a.ctx, engine,
2328                                     MI_NOOP); /* preemption disabled */
2329         if (IS_ERR(rq))
2330                 return PTR_ERR(rq);
2331
2332         clear_bit(CONTEXT_BANNED, &rq->context->flags);
2333         i915_request_get(rq);
2334         i915_request_add(rq);
2335         if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2336                 err = -EIO;
2337                 goto out;
2338         }
2339
2340         intel_context_set_banned(rq->context);
2341
2342         err = intel_engine_pulse(engine);
2343         if (err)
2344                 goto out;
2345
2346         force_reset_timeout(engine);
2347
2348         /* force preempt reset [failure] */
2349         while (!engine->execlists.pending[0])
2350                 intel_engine_flush_submission(engine);
2351         del_timer_sync(&engine->execlists.preempt);
2352         intel_engine_flush_submission(engine);
2353
2354         cancel_reset_timeout(engine);
2355
2356         /* after failure, require heartbeats to reset device */
2357         intel_engine_set_heartbeat(engine, 1);
2358         err = wait_for_reset(engine, rq, HZ / 2);
2359         intel_engine_set_heartbeat(engine,
2360                                    engine->defaults.heartbeat_interval_ms);
2361         if (err) {
2362                 pr_err("Cancelled inflight0 request did not reset\n");
2363                 goto out;
2364         }
2365
2366 out:
2367         i915_request_put(rq);
2368         if (igt_flush_test(engine->i915))
2369                 err = -EIO;
2370         return err;
2371 }
2372
2373 static int live_preempt_cancel(void *arg)
2374 {
2375         struct intel_gt *gt = arg;
2376         struct live_preempt_cancel data;
2377         enum intel_engine_id id;
2378         int err = -ENOMEM;
2379
2380         /*
2381          * To cancel an inflight context, we need to first remove it from the
2382          * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
2383          */
2384
2385         if (preempt_client_init(gt, &data.a))
2386                 return -ENOMEM;
2387         if (preempt_client_init(gt, &data.b))
2388                 goto err_client_a;
2389
2390         for_each_engine(data.engine, gt, id) {
2391                 if (!intel_engine_has_preemption(data.engine))
2392                         continue;
2393
2394                 err = __cancel_active0(&data);
2395                 if (err)
2396                         goto err_wedged;
2397
2398                 err = __cancel_active1(&data);
2399                 if (err)
2400                         goto err_wedged;
2401
2402                 err = __cancel_queued(&data);
2403                 if (err)
2404                         goto err_wedged;
2405
2406                 err = __cancel_hostile(&data);
2407                 if (err)
2408                         goto err_wedged;
2409
2410                 err = __cancel_fail(&data);
2411                 if (err)
2412                         goto err_wedged;
2413         }
2414
2415         err = 0;
2416 err_client_b:
2417         preempt_client_fini(&data.b);
2418 err_client_a:
2419         preempt_client_fini(&data.a);
2420         return err;
2421
2422 err_wedged:
2423         GEM_TRACE_DUMP();
2424         igt_spinner_end(&data.b.spin);
2425         igt_spinner_end(&data.a.spin);
2426         intel_gt_set_wedged(gt);
2427         goto err_client_b;
2428 }
2429
2430 static int live_suppress_self_preempt(void *arg)
2431 {
2432         struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
2433         struct intel_gt *gt = arg;
2434         struct intel_engine_cs *engine;
2435         struct preempt_client a, b;
2436         enum intel_engine_id id;
2437         int err = -ENOMEM;
2438
2439         /*
2440          * Verify that if a preemption request does not cause a change in
2441          * the current execution order, the preempt-to-idle injection is
2442          * skipped and that we do not accidentally apply it after the CS
2443          * completion event.
2444          */
2445
2446         if (intel_uc_uses_guc_submission(&gt->uc))
2447                 return 0; /* presume black blox */
2448
2449         if (intel_vgpu_active(gt->i915))
2450                 return 0; /* GVT forces single port & request submission */
2451
2452         if (preempt_client_init(gt, &a))
2453                 return -ENOMEM;
2454         if (preempt_client_init(gt, &b))
2455                 goto err_client_a;
2456
2457         for_each_engine(engine, gt, id) {
2458                 struct i915_request *rq_a, *rq_b;
2459                 int depth;
2460
2461                 if (!intel_engine_has_preemption(engine))
2462                         continue;
2463
2464                 if (igt_flush_test(gt->i915))
2465                         goto err_wedged;
2466
2467                 st_engine_heartbeat_disable(engine);
2468                 engine->execlists.preempt_hang.count = 0;
2469
2470                 rq_a = spinner_create_request(&a.spin,
2471                                               a.ctx, engine,
2472                                               MI_NOOP);
2473                 if (IS_ERR(rq_a)) {
2474                         err = PTR_ERR(rq_a);
2475                         st_engine_heartbeat_enable(engine);
2476                         goto err_client_b;
2477                 }
2478
2479                 i915_request_add(rq_a);
2480                 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2481                         pr_err("First client failed to start\n");
2482                         st_engine_heartbeat_enable(engine);
2483                         goto err_wedged;
2484                 }
2485
2486                 /* Keep postponing the timer to avoid premature slicing */
2487                 mod_timer(&engine->execlists.timer, jiffies + HZ);
2488                 for (depth = 0; depth < 8; depth++) {
2489                         rq_b = spinner_create_request(&b.spin,
2490                                                       b.ctx, engine,
2491                                                       MI_NOOP);
2492                         if (IS_ERR(rq_b)) {
2493                                 err = PTR_ERR(rq_b);
2494                                 st_engine_heartbeat_enable(engine);
2495                                 goto err_client_b;
2496                         }
2497                         i915_request_add(rq_b);
2498
2499                         GEM_BUG_ON(i915_request_completed(rq_a));
2500                         engine->sched_engine->schedule(rq_a, &attr);
2501                         igt_spinner_end(&a.spin);
2502
2503                         if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2504                                 pr_err("Second client failed to start\n");
2505                                 st_engine_heartbeat_enable(engine);
2506                                 goto err_wedged;
2507                         }
2508
2509                         swap(a, b);
2510                         rq_a = rq_b;
2511                 }
2512                 igt_spinner_end(&a.spin);
2513
2514                 if (engine->execlists.preempt_hang.count) {
2515                         pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
2516                                engine->name,
2517                                engine->execlists.preempt_hang.count,
2518                                depth);
2519                         st_engine_heartbeat_enable(engine);
2520                         err = -EINVAL;
2521                         goto err_client_b;
2522                 }
2523
2524                 st_engine_heartbeat_enable(engine);
2525                 if (igt_flush_test(gt->i915))
2526                         goto err_wedged;
2527         }
2528
2529         err = 0;
2530 err_client_b:
2531         preempt_client_fini(&b);
2532 err_client_a:
2533         preempt_client_fini(&a);
2534         return err;
2535
2536 err_wedged:
2537         igt_spinner_end(&b.spin);
2538         igt_spinner_end(&a.spin);
2539         intel_gt_set_wedged(gt);
2540         err = -EIO;
2541         goto err_client_b;
2542 }
2543
2544 static int live_chain_preempt(void *arg)
2545 {
2546         struct intel_gt *gt = arg;
2547         struct intel_engine_cs *engine;
2548         struct preempt_client hi, lo;
2549         enum intel_engine_id id;
2550         int err = -ENOMEM;
2551
2552         /*
2553          * Build a chain AB...BA between two contexts (A, B) and request
2554          * preemption of the last request. It should then complete before
2555          * the previously submitted spinner in B.
2556          */
2557
2558         if (preempt_client_init(gt, &hi))
2559                 return -ENOMEM;
2560
2561         if (preempt_client_init(gt, &lo))
2562                 goto err_client_hi;
2563
2564         for_each_engine(engine, gt, id) {
2565                 struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
2566                 struct igt_live_test t;
2567                 struct i915_request *rq;
2568                 int ring_size, count, i;
2569
2570                 if (!intel_engine_has_preemption(engine))
2571                         continue;
2572
2573                 rq = spinner_create_request(&lo.spin,
2574                                             lo.ctx, engine,
2575                                             MI_ARB_CHECK);
2576                 if (IS_ERR(rq))
2577                         goto err_wedged;
2578
2579                 i915_request_get(rq);
2580                 i915_request_add(rq);
2581
2582                 ring_size = rq->wa_tail - rq->head;
2583                 if (ring_size < 0)
2584                         ring_size += rq->ring->size;
2585                 ring_size = rq->ring->size / ring_size;
2586                 pr_debug("%s(%s): Using maximum of %d requests\n",
2587                          __func__, engine->name, ring_size);
2588
2589                 igt_spinner_end(&lo.spin);
2590                 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
2591                         pr_err("Timed out waiting to flush %s\n", engine->name);
2592                         i915_request_put(rq);
2593                         goto err_wedged;
2594                 }
2595                 i915_request_put(rq);
2596
2597                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
2598                         err = -EIO;
2599                         goto err_wedged;
2600                 }
2601
2602                 for_each_prime_number_from(count, 1, ring_size) {
2603                         rq = spinner_create_request(&hi.spin,
2604                                                     hi.ctx, engine,
2605                                                     MI_ARB_CHECK);
2606                         if (IS_ERR(rq))
2607                                 goto err_wedged;
2608                         i915_request_add(rq);
2609                         if (!igt_wait_for_spinner(&hi.spin, rq))
2610                                 goto err_wedged;
2611
2612                         rq = spinner_create_request(&lo.spin,
2613                                                     lo.ctx, engine,
2614                                                     MI_ARB_CHECK);
2615                         if (IS_ERR(rq))
2616                                 goto err_wedged;
2617                         i915_request_add(rq);
2618
2619                         for (i = 0; i < count; i++) {
2620                                 rq = igt_request_alloc(lo.ctx, engine);
2621                                 if (IS_ERR(rq))
2622                                         goto err_wedged;
2623                                 i915_request_add(rq);
2624                         }
2625
2626                         rq = igt_request_alloc(hi.ctx, engine);
2627                         if (IS_ERR(rq))
2628                                 goto err_wedged;
2629
2630                         i915_request_get(rq);
2631                         i915_request_add(rq);
2632                         engine->sched_engine->schedule(rq, &attr);
2633
2634                         igt_spinner_end(&hi.spin);
2635                         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2636                                 struct drm_printer p =
2637                                         drm_info_printer(gt->i915->drm.dev);
2638
2639                                 pr_err("Failed to preempt over chain of %d\n",
2640                                        count);
2641                                 intel_engine_dump(engine, &p,
2642                                                   "%s\n", engine->name);
2643                                 i915_request_put(rq);
2644                                 goto err_wedged;
2645                         }
2646                         igt_spinner_end(&lo.spin);
2647                         i915_request_put(rq);
2648
2649                         rq = igt_request_alloc(lo.ctx, engine);
2650                         if (IS_ERR(rq))
2651                                 goto err_wedged;
2652
2653                         i915_request_get(rq);
2654                         i915_request_add(rq);
2655
2656                         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2657                                 struct drm_printer p =
2658                                         drm_info_printer(gt->i915->drm.dev);
2659
2660                                 pr_err("Failed to flush low priority chain of %d requests\n",
2661                                        count);
2662                                 intel_engine_dump(engine, &p,
2663                                                   "%s\n", engine->name);
2664
2665                                 i915_request_put(rq);
2666                                 goto err_wedged;
2667                         }
2668                         i915_request_put(rq);
2669                 }
2670
2671                 if (igt_live_test_end(&t)) {
2672                         err = -EIO;
2673                         goto err_wedged;
2674                 }
2675         }
2676
2677         err = 0;
2678 err_client_lo:
2679         preempt_client_fini(&lo);
2680 err_client_hi:
2681         preempt_client_fini(&hi);
2682         return err;
2683
2684 err_wedged:
2685         igt_spinner_end(&hi.spin);
2686         igt_spinner_end(&lo.spin);
2687         intel_gt_set_wedged(gt);
2688         err = -EIO;
2689         goto err_client_lo;
2690 }
2691
2692 static int create_gang(struct intel_engine_cs *engine,
2693                        struct i915_request **prev)
2694 {
2695         struct drm_i915_gem_object *obj;
2696         struct intel_context *ce;
2697         struct i915_request *rq;
2698         struct i915_vma *vma;
2699         u32 *cs;
2700         int err;
2701
2702         ce = intel_context_create(engine);
2703         if (IS_ERR(ce))
2704                 return PTR_ERR(ce);
2705
2706         obj = i915_gem_object_create_internal(engine->i915, 4096);
2707         if (IS_ERR(obj)) {
2708                 err = PTR_ERR(obj);
2709                 goto err_ce;
2710         }
2711
2712         vma = i915_vma_instance(obj, ce->vm, NULL);
2713         if (IS_ERR(vma)) {
2714                 err = PTR_ERR(vma);
2715                 goto err_obj;
2716         }
2717
2718         err = i915_vma_pin(vma, 0, 0, PIN_USER);
2719         if (err)
2720                 goto err_obj;
2721
2722         cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
2723         if (IS_ERR(cs)) {
2724                 err = PTR_ERR(cs);
2725                 goto err_obj;
2726         }
2727
2728         /* Semaphore target: spin until zero */
2729         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2730
2731         *cs++ = MI_SEMAPHORE_WAIT |
2732                 MI_SEMAPHORE_POLL |
2733                 MI_SEMAPHORE_SAD_EQ_SDD;
2734         *cs++ = 0;
2735         *cs++ = lower_32_bits(vma->node.start);
2736         *cs++ = upper_32_bits(vma->node.start);
2737
2738         if (*prev) {
2739                 u64 offset = (*prev)->batch->node.start;
2740
2741                 /* Terminate the spinner in the next lower priority batch. */
2742                 *cs++ = MI_STORE_DWORD_IMM_GEN4;
2743                 *cs++ = lower_32_bits(offset);
2744                 *cs++ = upper_32_bits(offset);
2745                 *cs++ = 0;
2746         }
2747
2748         *cs++ = MI_BATCH_BUFFER_END;
2749         i915_gem_object_flush_map(obj);
2750         i915_gem_object_unpin_map(obj);
2751
2752         rq = intel_context_create_request(ce);
2753         if (IS_ERR(rq)) {
2754                 err = PTR_ERR(rq);
2755                 goto err_obj;
2756         }
2757
2758         rq->batch = i915_vma_get(vma);
2759         i915_request_get(rq);
2760
2761         i915_vma_lock(vma);
2762         err = i915_request_await_object(rq, vma->obj, false);
2763         if (!err)
2764                 err = i915_vma_move_to_active(vma, rq, 0);
2765         if (!err)
2766                 err = rq->engine->emit_bb_start(rq,
2767                                                 vma->node.start,
2768                                                 PAGE_SIZE, 0);
2769         i915_vma_unlock(vma);
2770         i915_request_add(rq);
2771         if (err)
2772                 goto err_rq;
2773
2774         i915_gem_object_put(obj);
2775         intel_context_put(ce);
2776
2777         rq->mock.link.next = &(*prev)->mock.link;
2778         *prev = rq;
2779         return 0;
2780
2781 err_rq:
2782         i915_vma_put(rq->batch);
2783         i915_request_put(rq);
2784 err_obj:
2785         i915_gem_object_put(obj);
2786 err_ce:
2787         intel_context_put(ce);
2788         return err;
2789 }
2790
2791 static int __live_preempt_ring(struct intel_engine_cs *engine,
2792                                struct igt_spinner *spin,
2793                                int queue_sz, int ring_sz)
2794 {
2795         struct intel_context *ce[2] = {};
2796         struct i915_request *rq;
2797         struct igt_live_test t;
2798         int err = 0;
2799         int n;
2800
2801         if (igt_live_test_begin(&t, engine->i915, __func__, engine->name))
2802                 return -EIO;
2803
2804         for (n = 0; n < ARRAY_SIZE(ce); n++) {
2805                 struct intel_context *tmp;
2806
2807                 tmp = intel_context_create(engine);
2808                 if (IS_ERR(tmp)) {
2809                         err = PTR_ERR(tmp);
2810                         goto err_ce;
2811                 }
2812
2813                 tmp->ring_size = ring_sz;
2814
2815                 err = intel_context_pin(tmp);
2816                 if (err) {
2817                         intel_context_put(tmp);
2818                         goto err_ce;
2819                 }
2820
2821                 memset32(tmp->ring->vaddr,
2822                          0xdeadbeef, /* trigger a hang if executed */
2823                          tmp->ring->vma->size / sizeof(u32));
2824
2825                 ce[n] = tmp;
2826         }
2827
2828         rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK);
2829         if (IS_ERR(rq)) {
2830                 err = PTR_ERR(rq);
2831                 goto err_ce;
2832         }
2833
2834         i915_request_get(rq);
2835         rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2836         i915_request_add(rq);
2837
2838         if (!igt_wait_for_spinner(spin, rq)) {
2839                 intel_gt_set_wedged(engine->gt);
2840                 i915_request_put(rq);
2841                 err = -ETIME;
2842                 goto err_ce;
2843         }
2844
2845         /* Fill the ring, until we will cause a wrap */
2846         n = 0;
2847         while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) {
2848                 struct i915_request *tmp;
2849
2850                 tmp = intel_context_create_request(ce[0]);
2851                 if (IS_ERR(tmp)) {
2852                         err = PTR_ERR(tmp);
2853                         i915_request_put(rq);
2854                         goto err_ce;
2855                 }
2856
2857                 i915_request_add(tmp);
2858                 intel_engine_flush_submission(engine);
2859                 n++;
2860         }
2861         intel_engine_flush_submission(engine);
2862         pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
2863                  engine->name, queue_sz, n,
2864                  ce[0]->ring->size,
2865                  ce[0]->ring->tail,
2866                  ce[0]->ring->emit,
2867                  rq->tail);
2868         i915_request_put(rq);
2869
2870         /* Create a second request to preempt the first ring */
2871         rq = intel_context_create_request(ce[1]);
2872         if (IS_ERR(rq)) {
2873                 err = PTR_ERR(rq);
2874                 goto err_ce;
2875         }
2876
2877         rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2878         i915_request_get(rq);
2879         i915_request_add(rq);
2880
2881         err = wait_for_submit(engine, rq, HZ / 2);
2882         i915_request_put(rq);
2883         if (err) {
2884                 pr_err("%s: preemption request was not submitted\n",
2885                        engine->name);
2886                 err = -ETIME;
2887         }
2888
2889         pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
2890                  engine->name,
2891                  ce[0]->ring->tail, ce[0]->ring->emit,
2892                  ce[1]->ring->tail, ce[1]->ring->emit);
2893
2894 err_ce:
2895         intel_engine_flush_submission(engine);
2896         igt_spinner_end(spin);
2897         for (n = 0; n < ARRAY_SIZE(ce); n++) {
2898                 if (IS_ERR_OR_NULL(ce[n]))
2899                         break;
2900
2901                 intel_context_unpin(ce[n]);
2902                 intel_context_put(ce[n]);
2903         }
2904         if (igt_live_test_end(&t))
2905                 err = -EIO;
2906         return err;
2907 }
2908
2909 static int live_preempt_ring(void *arg)
2910 {
2911         struct intel_gt *gt = arg;
2912         struct intel_engine_cs *engine;
2913         struct igt_spinner spin;
2914         enum intel_engine_id id;
2915         int err = 0;
2916
2917         /*
2918          * Check that we rollback large chunks of a ring in order to do a
2919          * preemption event. Similar to live_unlite_ring, but looking at
2920          * ring size rather than the impact of intel_ring_direction().
2921          */
2922
2923         if (igt_spinner_init(&spin, gt))
2924                 return -ENOMEM;
2925
2926         for_each_engine(engine, gt, id) {
2927                 int n;
2928
2929                 if (!intel_engine_has_preemption(engine))
2930                         continue;
2931
2932                 if (!intel_engine_can_store_dword(engine))
2933                         continue;
2934
2935                 st_engine_heartbeat_disable(engine);
2936
2937                 for (n = 0; n <= 3; n++) {
2938                         err = __live_preempt_ring(engine, &spin,
2939                                                   n * SZ_4K / 4, SZ_4K);
2940                         if (err)
2941                                 break;
2942                 }
2943
2944                 st_engine_heartbeat_enable(engine);
2945                 if (err)
2946                         break;
2947         }
2948
2949         igt_spinner_fini(&spin);
2950         return err;
2951 }
2952
2953 static int live_preempt_gang(void *arg)
2954 {
2955         struct intel_gt *gt = arg;
2956         struct intel_engine_cs *engine;
2957         enum intel_engine_id id;
2958
2959         /*
2960          * Build as long a chain of preempters as we can, with each
2961          * request higher priority than the last. Once we are ready, we release
2962          * the last batch which then precolates down the chain, each releasing
2963          * the next oldest in turn. The intent is to simply push as hard as we
2964          * can with the number of preemptions, trying to exceed narrow HW
2965          * limits. At a minimum, we insist that we can sort all the user
2966          * high priority levels into execution order.
2967          */
2968
2969         for_each_engine(engine, gt, id) {
2970                 struct i915_request *rq = NULL;
2971                 struct igt_live_test t;
2972                 IGT_TIMEOUT(end_time);
2973                 int prio = 0;
2974                 int err = 0;
2975                 u32 *cs;
2976
2977                 if (!intel_engine_has_preemption(engine))
2978                         continue;
2979
2980                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2981                         return -EIO;
2982
2983                 do {
2984                         struct i915_sched_attr attr = { .priority = prio++ };
2985
2986                         err = create_gang(engine, &rq);
2987                         if (err)
2988                                 break;
2989
2990                         /* Submit each spinner at increasing priority */
2991                         engine->sched_engine->schedule(rq, &attr);
2992                 } while (prio <= I915_PRIORITY_MAX &&
2993                          !__igt_timeout(end_time, NULL));
2994                 pr_debug("%s: Preempt chain of %d requests\n",
2995                          engine->name, prio);
2996
2997                 /*
2998                  * Such that the last spinner is the highest priority and
2999                  * should execute first. When that spinner completes,
3000                  * it will terminate the next lowest spinner until there
3001                  * are no more spinners and the gang is complete.
3002                  */
3003                 cs = i915_gem_object_pin_map_unlocked(rq->batch->obj, I915_MAP_WC);
3004                 if (!IS_ERR(cs)) {
3005                         *cs = 0;
3006                         i915_gem_object_unpin_map(rq->batch->obj);
3007                 } else {
3008                         err = PTR_ERR(cs);
3009                         intel_gt_set_wedged(gt);
3010                 }
3011
3012                 while (rq) { /* wait for each rq from highest to lowest prio */
3013                         struct i915_request *n = list_next_entry(rq, mock.link);
3014
3015                         if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
3016                                 struct drm_printer p =
3017                                         drm_info_printer(engine->i915->drm.dev);
3018
3019                                 pr_err("Failed to flush chain of %d requests, at %d\n",
3020                                        prio, rq_prio(rq));
3021                                 intel_engine_dump(engine, &p,
3022                                                   "%s\n", engine->name);
3023
3024                                 err = -ETIME;
3025                         }
3026
3027                         i915_vma_put(rq->batch);
3028                         i915_request_put(rq);
3029                         rq = n;
3030                 }
3031
3032                 if (igt_live_test_end(&t))
3033                         err = -EIO;
3034                 if (err)
3035                         return err;
3036         }
3037
3038         return 0;
3039 }
3040
3041 static struct i915_vma *
3042 create_gpr_user(struct intel_engine_cs *engine,
3043                 struct i915_vma *result,
3044                 unsigned int offset)
3045 {
3046         struct drm_i915_gem_object *obj;
3047         struct i915_vma *vma;
3048         u32 *cs;
3049         int err;
3050         int i;
3051
3052         obj = i915_gem_object_create_internal(engine->i915, 4096);
3053         if (IS_ERR(obj))
3054                 return ERR_CAST(obj);
3055
3056         vma = i915_vma_instance(obj, result->vm, NULL);
3057         if (IS_ERR(vma)) {
3058                 i915_gem_object_put(obj);
3059                 return vma;
3060         }
3061
3062         err = i915_vma_pin(vma, 0, 0, PIN_USER);
3063         if (err) {
3064                 i915_vma_put(vma);
3065                 return ERR_PTR(err);
3066         }
3067
3068         cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
3069         if (IS_ERR(cs)) {
3070                 i915_vma_put(vma);
3071                 return ERR_CAST(cs);
3072         }
3073
3074         /* All GPR are clear for new contexts. We use GPR(0) as a constant */
3075         *cs++ = MI_LOAD_REGISTER_IMM(1);
3076         *cs++ = CS_GPR(engine, 0);
3077         *cs++ = 1;
3078
3079         for (i = 1; i < NUM_GPR; i++) {
3080                 u64 addr;
3081
3082                 /*
3083                  * Perform: GPR[i]++
3084                  *
3085                  * As we read and write into the context saved GPR[i], if
3086                  * we restart this batch buffer from an earlier point, we
3087                  * will repeat the increment and store a value > 1.
3088                  */
3089                 *cs++ = MI_MATH(4);
3090                 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i));
3091                 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0));
3092                 *cs++ = MI_MATH_ADD;
3093                 *cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU);
3094
3095                 addr = result->node.start + offset + i * sizeof(*cs);
3096                 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
3097                 *cs++ = CS_GPR(engine, 2 * i);
3098                 *cs++ = lower_32_bits(addr);
3099                 *cs++ = upper_32_bits(addr);
3100
3101                 *cs++ = MI_SEMAPHORE_WAIT |
3102                         MI_SEMAPHORE_POLL |
3103                         MI_SEMAPHORE_SAD_GTE_SDD;
3104                 *cs++ = i;
3105                 *cs++ = lower_32_bits(result->node.start);
3106                 *cs++ = upper_32_bits(result->node.start);
3107         }
3108
3109         *cs++ = MI_BATCH_BUFFER_END;
3110         i915_gem_object_flush_map(obj);
3111         i915_gem_object_unpin_map(obj);
3112
3113         return vma;
3114 }
3115
3116 static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
3117 {
3118         struct drm_i915_gem_object *obj;
3119         struct i915_vma *vma;
3120         int err;
3121
3122         obj = i915_gem_object_create_internal(gt->i915, sz);
3123         if (IS_ERR(obj))
3124                 return ERR_CAST(obj);
3125
3126         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
3127         if (IS_ERR(vma)) {
3128                 i915_gem_object_put(obj);
3129                 return vma;
3130         }
3131
3132         err = i915_ggtt_pin(vma, NULL, 0, 0);
3133         if (err) {
3134                 i915_vma_put(vma);
3135                 return ERR_PTR(err);
3136         }
3137
3138         return vma;
3139 }
3140
3141 static struct i915_request *
3142 create_gpr_client(struct intel_engine_cs *engine,
3143                   struct i915_vma *global,
3144                   unsigned int offset)
3145 {
3146         struct i915_vma *batch, *vma;
3147         struct intel_context *ce;
3148         struct i915_request *rq;
3149         int err;
3150
3151         ce = intel_context_create(engine);
3152         if (IS_ERR(ce))
3153                 return ERR_CAST(ce);
3154
3155         vma = i915_vma_instance(global->obj, ce->vm, NULL);
3156         if (IS_ERR(vma)) {
3157                 err = PTR_ERR(vma);
3158                 goto out_ce;
3159         }
3160
3161         err = i915_vma_pin(vma, 0, 0, PIN_USER);
3162         if (err)
3163                 goto out_ce;
3164
3165         batch = create_gpr_user(engine, vma, offset);
3166         if (IS_ERR(batch)) {
3167                 err = PTR_ERR(batch);
3168                 goto out_vma;
3169         }
3170
3171         rq = intel_context_create_request(ce);
3172         if (IS_ERR(rq)) {
3173                 err = PTR_ERR(rq);
3174                 goto out_batch;
3175         }
3176
3177         i915_vma_lock(vma);
3178         err = i915_request_await_object(rq, vma->obj, false);
3179         if (!err)
3180                 err = i915_vma_move_to_active(vma, rq, 0);
3181         i915_vma_unlock(vma);
3182
3183         i915_vma_lock(batch);
3184         if (!err)
3185                 err = i915_request_await_object(rq, batch->obj, false);
3186         if (!err)
3187                 err = i915_vma_move_to_active(batch, rq, 0);
3188         if (!err)
3189                 err = rq->engine->emit_bb_start(rq,
3190                                                 batch->node.start,
3191                                                 PAGE_SIZE, 0);
3192         i915_vma_unlock(batch);
3193         i915_vma_unpin(batch);
3194
3195         if (!err)
3196                 i915_request_get(rq);
3197         i915_request_add(rq);
3198
3199 out_batch:
3200         i915_vma_put(batch);
3201 out_vma:
3202         i915_vma_unpin(vma);
3203 out_ce:
3204         intel_context_put(ce);
3205         return err ? ERR_PTR(err) : rq;
3206 }
3207
3208 static int preempt_user(struct intel_engine_cs *engine,
3209                         struct i915_vma *global,
3210                         int id)
3211 {
3212         struct i915_sched_attr attr = {
3213                 .priority = I915_PRIORITY_MAX
3214         };
3215         struct i915_request *rq;
3216         int err = 0;
3217         u32 *cs;
3218
3219         rq = intel_engine_create_kernel_request(engine);
3220         if (IS_ERR(rq))
3221                 return PTR_ERR(rq);
3222
3223         cs = intel_ring_begin(rq, 4);
3224         if (IS_ERR(cs)) {
3225                 i915_request_add(rq);
3226                 return PTR_ERR(cs);
3227         }
3228
3229         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3230         *cs++ = i915_ggtt_offset(global);
3231         *cs++ = 0;
3232         *cs++ = id;
3233
3234         intel_ring_advance(rq, cs);
3235
3236         i915_request_get(rq);
3237         i915_request_add(rq);
3238
3239         engine->sched_engine->schedule(rq, &attr);
3240
3241         if (i915_request_wait(rq, 0, HZ / 2) < 0)
3242                 err = -ETIME;
3243         i915_request_put(rq);
3244
3245         return err;
3246 }
3247
3248 static int live_preempt_user(void *arg)
3249 {
3250         struct intel_gt *gt = arg;
3251         struct intel_engine_cs *engine;
3252         struct i915_vma *global;
3253         enum intel_engine_id id;
3254         u32 *result;
3255         int err = 0;
3256
3257         /*
3258          * In our other tests, we look at preemption in carefully
3259          * controlled conditions in the ringbuffer. Since most of the
3260          * time is spent in user batches, most of our preemptions naturally
3261          * occur there. We want to verify that when we preempt inside a batch
3262          * we continue on from the current instruction and do not roll back
3263          * to the start, or another earlier arbitration point.
3264          *
3265          * To verify this, we create a batch which is a mixture of
3266          * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with
3267          * a few preempting contexts thrown into the mix, we look for any
3268          * repeated instructions (which show up as incorrect values).
3269          */
3270
3271         global = create_global(gt, 4096);
3272         if (IS_ERR(global))
3273                 return PTR_ERR(global);
3274
3275         result = i915_gem_object_pin_map_unlocked(global->obj, I915_MAP_WC);
3276         if (IS_ERR(result)) {
3277                 i915_vma_unpin_and_release(&global, 0);
3278                 return PTR_ERR(result);
3279         }
3280
3281         for_each_engine(engine, gt, id) {
3282                 struct i915_request *client[3] = {};
3283                 struct igt_live_test t;
3284                 int i;
3285
3286                 if (!intel_engine_has_preemption(engine))
3287                         continue;
3288
3289                 if (GRAPHICS_VER(gt->i915) == 8 && engine->class != RENDER_CLASS)
3290                         continue; /* we need per-context GPR */
3291
3292                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
3293                         err = -EIO;
3294                         break;
3295                 }
3296
3297                 memset(result, 0, 4096);
3298
3299                 for (i = 0; i < ARRAY_SIZE(client); i++) {
3300                         struct i915_request *rq;
3301
3302                         rq = create_gpr_client(engine, global,
3303                                                NUM_GPR * i * sizeof(u32));
3304                         if (IS_ERR(rq)) {
3305                                 err = PTR_ERR(rq);
3306                                 goto end_test;
3307                         }
3308
3309                         client[i] = rq;
3310                 }
3311
3312                 /* Continuously preempt the set of 3 running contexts */
3313                 for (i = 1; i <= NUM_GPR; i++) {
3314                         err = preempt_user(engine, global, i);
3315                         if (err)
3316                                 goto end_test;
3317                 }
3318
3319                 if (READ_ONCE(result[0]) != NUM_GPR) {
3320                         pr_err("%s: Failed to release semaphore\n",
3321                                engine->name);
3322                         err = -EIO;
3323                         goto end_test;
3324                 }
3325
3326                 for (i = 0; i < ARRAY_SIZE(client); i++) {
3327                         int gpr;
3328
3329                         if (i915_request_wait(client[i], 0, HZ / 2) < 0) {
3330                                 err = -ETIME;
3331                                 goto end_test;
3332                         }
3333
3334                         for (gpr = 1; gpr < NUM_GPR; gpr++) {
3335                                 if (result[NUM_GPR * i + gpr] != 1) {
3336                                         pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n",
3337                                                engine->name,
3338                                                i, gpr, result[NUM_GPR * i + gpr]);
3339                                         err = -EINVAL;
3340                                         goto end_test;
3341                                 }
3342                         }
3343                 }
3344
3345 end_test:
3346                 for (i = 0; i < ARRAY_SIZE(client); i++) {
3347                         if (!client[i])
3348                                 break;
3349
3350                         i915_request_put(client[i]);
3351                 }
3352
3353                 /* Flush the semaphores on error */
3354                 smp_store_mb(result[0], -1);
3355                 if (igt_live_test_end(&t))
3356                         err = -EIO;
3357                 if (err)
3358                         break;
3359         }
3360
3361         i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP);
3362         return err;
3363 }
3364
3365 static int live_preempt_timeout(void *arg)
3366 {
3367         struct intel_gt *gt = arg;
3368         struct i915_gem_context *ctx_hi, *ctx_lo;
3369         struct igt_spinner spin_lo;
3370         struct intel_engine_cs *engine;
3371         enum intel_engine_id id;
3372         int err = -ENOMEM;
3373
3374         /*
3375          * Check that we force preemption to occur by cancelling the previous
3376          * context if it refuses to yield the GPU.
3377          */
3378         if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
3379                 return 0;
3380
3381         if (!intel_has_reset_engine(gt))
3382                 return 0;
3383
3384         if (igt_spinner_init(&spin_lo, gt))
3385                 return -ENOMEM;
3386
3387         ctx_hi = kernel_context(gt->i915);
3388         if (!ctx_hi)
3389                 goto err_spin_lo;
3390         ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
3391
3392         ctx_lo = kernel_context(gt->i915);
3393         if (!ctx_lo)
3394                 goto err_ctx_hi;
3395         ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
3396
3397         for_each_engine(engine, gt, id) {
3398                 unsigned long saved_timeout;
3399                 struct i915_request *rq;
3400
3401                 if (!intel_engine_has_preemption(engine))
3402                         continue;
3403
3404                 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
3405                                             MI_NOOP); /* preemption disabled */
3406                 if (IS_ERR(rq)) {
3407                         err = PTR_ERR(rq);
3408                         goto err_ctx_lo;
3409                 }
3410
3411                 i915_request_add(rq);
3412                 if (!igt_wait_for_spinner(&spin_lo, rq)) {
3413                         intel_gt_set_wedged(gt);
3414                         err = -EIO;
3415                         goto err_ctx_lo;
3416                 }
3417
3418                 rq = igt_request_alloc(ctx_hi, engine);
3419                 if (IS_ERR(rq)) {
3420                         igt_spinner_end(&spin_lo);
3421                         err = PTR_ERR(rq);
3422                         goto err_ctx_lo;
3423                 }
3424
3425                 /* Flush the previous CS ack before changing timeouts */
3426                 while (READ_ONCE(engine->execlists.pending[0]))
3427                         cpu_relax();
3428
3429                 saved_timeout = engine->props.preempt_timeout_ms;
3430                 engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
3431
3432                 i915_request_get(rq);
3433                 i915_request_add(rq);
3434
3435                 intel_engine_flush_submission(engine);
3436                 engine->props.preempt_timeout_ms = saved_timeout;
3437
3438                 if (i915_request_wait(rq, 0, HZ / 10) < 0) {
3439                         intel_gt_set_wedged(gt);
3440                         i915_request_put(rq);
3441                         err = -ETIME;
3442                         goto err_ctx_lo;
3443                 }
3444
3445                 igt_spinner_end(&spin_lo);
3446                 i915_request_put(rq);
3447         }
3448
3449         err = 0;
3450 err_ctx_lo:
3451         kernel_context_close(ctx_lo);
3452 err_ctx_hi:
3453         kernel_context_close(ctx_hi);
3454 err_spin_lo:
3455         igt_spinner_fini(&spin_lo);
3456         return err;
3457 }
3458
3459 static int random_range(struct rnd_state *rnd, int min, int max)
3460 {
3461         return i915_prandom_u32_max_state(max - min, rnd) + min;
3462 }
3463
3464 static int random_priority(struct rnd_state *rnd)
3465 {
3466         return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
3467 }
3468
3469 struct preempt_smoke {
3470         struct intel_gt *gt;
3471         struct i915_gem_context **contexts;
3472         struct intel_engine_cs *engine;
3473         struct drm_i915_gem_object *batch;
3474         unsigned int ncontext;
3475         struct rnd_state prng;
3476         unsigned long count;
3477 };
3478
3479 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
3480 {
3481         return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
3482                                                           &smoke->prng)];
3483 }
3484
3485 static int smoke_submit(struct preempt_smoke *smoke,
3486                         struct i915_gem_context *ctx, int prio,
3487                         struct drm_i915_gem_object *batch)
3488 {
3489         struct i915_request *rq;
3490         struct i915_vma *vma = NULL;
3491         int err = 0;
3492
3493         if (batch) {
3494                 struct i915_address_space *vm;
3495
3496                 vm = i915_gem_context_get_vm_rcu(ctx);
3497                 vma = i915_vma_instance(batch, vm, NULL);
3498                 i915_vm_put(vm);
3499                 if (IS_ERR(vma))
3500                         return PTR_ERR(vma);
3501
3502                 err = i915_vma_pin(vma, 0, 0, PIN_USER);
3503                 if (err)
3504                         return err;
3505         }
3506
3507         ctx->sched.priority = prio;
3508
3509         rq = igt_request_alloc(ctx, smoke->engine);
3510         if (IS_ERR(rq)) {
3511                 err = PTR_ERR(rq);
3512                 goto unpin;
3513         }
3514
3515         if (vma) {
3516                 i915_vma_lock(vma);
3517                 err = i915_request_await_object(rq, vma->obj, false);
3518                 if (!err)
3519                         err = i915_vma_move_to_active(vma, rq, 0);
3520                 if (!err)
3521                         err = rq->engine->emit_bb_start(rq,
3522                                                         vma->node.start,
3523                                                         PAGE_SIZE, 0);
3524                 i915_vma_unlock(vma);
3525         }
3526
3527         i915_request_add(rq);
3528
3529 unpin:
3530         if (vma)
3531                 i915_vma_unpin(vma);
3532
3533         return err;
3534 }
3535
3536 static int smoke_crescendo_thread(void *arg)
3537 {
3538         struct preempt_smoke *smoke = arg;
3539         IGT_TIMEOUT(end_time);
3540         unsigned long count;
3541
3542         count = 0;
3543         do {
3544                 struct i915_gem_context *ctx = smoke_context(smoke);
3545                 int err;
3546
3547                 err = smoke_submit(smoke,
3548                                    ctx, count % I915_PRIORITY_MAX,
3549                                    smoke->batch);
3550                 if (err)
3551                         return err;
3552
3553                 count++;
3554         } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3555
3556         smoke->count = count;
3557         return 0;
3558 }
3559
3560 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
3561 #define BATCH BIT(0)
3562 {
3563         struct task_struct *tsk[I915_NUM_ENGINES] = {};
3564         struct preempt_smoke arg[I915_NUM_ENGINES];
3565         struct intel_engine_cs *engine;
3566         enum intel_engine_id id;
3567         unsigned long count;
3568         int err = 0;
3569
3570         for_each_engine(engine, smoke->gt, id) {
3571                 arg[id] = *smoke;
3572                 arg[id].engine = engine;
3573                 if (!(flags & BATCH))
3574                         arg[id].batch = NULL;
3575                 arg[id].count = 0;
3576
3577                 tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
3578                                       "igt/smoke:%d", id);
3579                 if (IS_ERR(tsk[id])) {
3580                         err = PTR_ERR(tsk[id]);
3581                         break;
3582                 }
3583                 get_task_struct(tsk[id]);
3584         }
3585
3586         yield(); /* start all threads before we kthread_stop() */
3587
3588         count = 0;
3589         for_each_engine(engine, smoke->gt, id) {
3590                 int status;
3591
3592                 if (IS_ERR_OR_NULL(tsk[id]))
3593                         continue;
3594
3595                 status = kthread_stop(tsk[id]);
3596                 if (status && !err)
3597                         err = status;
3598
3599                 count += arg[id].count;
3600
3601                 put_task_struct(tsk[id]);
3602         }
3603
3604         pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
3605                 count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3606         return 0;
3607 }
3608
3609 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
3610 {
3611         enum intel_engine_id id;
3612         IGT_TIMEOUT(end_time);
3613         unsigned long count;
3614
3615         count = 0;
3616         do {
3617                 for_each_engine(smoke->engine, smoke->gt, id) {
3618                         struct i915_gem_context *ctx = smoke_context(smoke);
3619                         int err;
3620
3621                         err = smoke_submit(smoke,
3622                                            ctx, random_priority(&smoke->prng),
3623                                            flags & BATCH ? smoke->batch : NULL);
3624                         if (err)
3625                                 return err;
3626
3627                         count++;
3628                 }
3629         } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3630
3631         pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
3632                 count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3633         return 0;
3634 }
3635
3636 static int live_preempt_smoke(void *arg)
3637 {
3638         struct preempt_smoke smoke = {
3639                 .gt = arg,
3640                 .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
3641                 .ncontext = 256,
3642         };
3643         const unsigned int phase[] = { 0, BATCH };
3644         struct igt_live_test t;
3645         int err = -ENOMEM;
3646         u32 *cs;
3647         int n;
3648
3649         smoke.contexts = kmalloc_array(smoke.ncontext,
3650                                        sizeof(*smoke.contexts),
3651                                        GFP_KERNEL);
3652         if (!smoke.contexts)
3653                 return -ENOMEM;
3654
3655         smoke.batch =
3656                 i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
3657         if (IS_ERR(smoke.batch)) {
3658                 err = PTR_ERR(smoke.batch);
3659                 goto err_free;
3660         }
3661
3662         cs = i915_gem_object_pin_map_unlocked(smoke.batch, I915_MAP_WB);
3663         if (IS_ERR(cs)) {
3664                 err = PTR_ERR(cs);
3665                 goto err_batch;
3666         }
3667         for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
3668                 cs[n] = MI_ARB_CHECK;
3669         cs[n] = MI_BATCH_BUFFER_END;
3670         i915_gem_object_flush_map(smoke.batch);
3671         i915_gem_object_unpin_map(smoke.batch);
3672
3673         if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
3674                 err = -EIO;
3675                 goto err_batch;
3676         }
3677
3678         for (n = 0; n < smoke.ncontext; n++) {
3679                 smoke.contexts[n] = kernel_context(smoke.gt->i915);
3680                 if (!smoke.contexts[n])
3681                         goto err_ctx;
3682         }
3683
3684         for (n = 0; n < ARRAY_SIZE(phase); n++) {
3685                 err = smoke_crescendo(&smoke, phase[n]);
3686                 if (err)
3687                         goto err_ctx;
3688
3689                 err = smoke_random(&smoke, phase[n]);
3690                 if (err)
3691                         goto err_ctx;
3692         }
3693
3694 err_ctx:
3695         if (igt_live_test_end(&t))
3696                 err = -EIO;
3697
3698         for (n = 0; n < smoke.ncontext; n++) {
3699                 if (!smoke.contexts[n])
3700                         break;
3701                 kernel_context_close(smoke.contexts[n]);
3702         }
3703
3704 err_batch:
3705         i915_gem_object_put(smoke.batch);
3706 err_free:
3707         kfree(smoke.contexts);
3708
3709         return err;
3710 }
3711
3712 static int nop_virtual_engine(struct intel_gt *gt,
3713                               struct intel_engine_cs **siblings,
3714                               unsigned int nsibling,
3715                               unsigned int nctx,
3716                               unsigned int flags)
3717 #define CHAIN BIT(0)
3718 {
3719         IGT_TIMEOUT(end_time);
3720         struct i915_request *request[16] = {};
3721         struct intel_context *ve[16];
3722         unsigned long n, prime, nc;
3723         struct igt_live_test t;
3724         ktime_t times[2] = {};
3725         int err;
3726
3727         GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
3728
3729         for (n = 0; n < nctx; n++) {
3730                 ve[n] = intel_execlists_create_virtual(siblings, nsibling);
3731                 if (IS_ERR(ve[n])) {
3732                         err = PTR_ERR(ve[n]);
3733                         nctx = n;
3734                         goto out;
3735                 }
3736
3737                 err = intel_context_pin(ve[n]);
3738                 if (err) {
3739                         intel_context_put(ve[n]);
3740                         nctx = n;
3741                         goto out;
3742                 }
3743         }
3744
3745         err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
3746         if (err)
3747                 goto out;
3748
3749         for_each_prime_number_from(prime, 1, 8192) {
3750                 times[1] = ktime_get_raw();
3751
3752                 if (flags & CHAIN) {
3753                         for (nc = 0; nc < nctx; nc++) {
3754                                 for (n = 0; n < prime; n++) {
3755                                         struct i915_request *rq;
3756
3757                                         rq = i915_request_create(ve[nc]);
3758                                         if (IS_ERR(rq)) {
3759                                                 err = PTR_ERR(rq);
3760                                                 goto out;
3761                                         }
3762
3763                                         if (request[nc])
3764                                                 i915_request_put(request[nc]);
3765                                         request[nc] = i915_request_get(rq);
3766                                         i915_request_add(rq);
3767                                 }
3768                         }
3769                 } else {
3770                         for (n = 0; n < prime; n++) {
3771                                 for (nc = 0; nc < nctx; nc++) {
3772                                         struct i915_request *rq;
3773
3774                                         rq = i915_request_create(ve[nc]);
3775                                         if (IS_ERR(rq)) {
3776                                                 err = PTR_ERR(rq);
3777                                                 goto out;
3778                                         }
3779
3780                                         if (request[nc])
3781                                                 i915_request_put(request[nc]);
3782                                         request[nc] = i915_request_get(rq);
3783                                         i915_request_add(rq);
3784                                 }
3785                         }
3786                 }
3787
3788                 for (nc = 0; nc < nctx; nc++) {
3789                         if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
3790                                 pr_err("%s(%s): wait for %llx:%lld timed out\n",
3791                                        __func__, ve[0]->engine->name,
3792                                        request[nc]->fence.context,
3793                                        request[nc]->fence.seqno);
3794
3795                                 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3796                                           __func__, ve[0]->engine->name,
3797                                           request[nc]->fence.context,
3798                                           request[nc]->fence.seqno);
3799                                 GEM_TRACE_DUMP();
3800                                 intel_gt_set_wedged(gt);
3801                                 break;
3802                         }
3803                 }
3804
3805                 times[1] = ktime_sub(ktime_get_raw(), times[1]);
3806                 if (prime == 1)
3807                         times[0] = times[1];
3808
3809                 for (nc = 0; nc < nctx; nc++) {
3810                         i915_request_put(request[nc]);
3811                         request[nc] = NULL;
3812                 }
3813
3814                 if (__igt_timeout(end_time, NULL))
3815                         break;
3816         }
3817
3818         err = igt_live_test_end(&t);
3819         if (err)
3820                 goto out;
3821
3822         pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
3823                 nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
3824                 prime, div64_u64(ktime_to_ns(times[1]), prime));
3825
3826 out:
3827         if (igt_flush_test(gt->i915))
3828                 err = -EIO;
3829
3830         for (nc = 0; nc < nctx; nc++) {
3831                 i915_request_put(request[nc]);
3832                 intel_context_unpin(ve[nc]);
3833                 intel_context_put(ve[nc]);
3834         }
3835         return err;
3836 }
3837
3838 static unsigned int
3839 __select_siblings(struct intel_gt *gt,
3840                   unsigned int class,
3841                   struct intel_engine_cs **siblings,
3842                   bool (*filter)(const struct intel_engine_cs *))
3843 {
3844         unsigned int n = 0;
3845         unsigned int inst;
3846
3847         for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3848                 if (!gt->engine_class[class][inst])
3849                         continue;
3850
3851                 if (filter && !filter(gt->engine_class[class][inst]))
3852                         continue;
3853
3854                 siblings[n++] = gt->engine_class[class][inst];
3855         }
3856
3857         return n;
3858 }
3859
3860 static unsigned int
3861 select_siblings(struct intel_gt *gt,
3862                 unsigned int class,
3863                 struct intel_engine_cs **siblings)
3864 {
3865         return __select_siblings(gt, class, siblings, NULL);
3866 }
3867
3868 static int live_virtual_engine(void *arg)
3869 {
3870         struct intel_gt *gt = arg;
3871         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3872         struct intel_engine_cs *engine;
3873         enum intel_engine_id id;
3874         unsigned int class;
3875         int err;
3876
3877         if (intel_uc_uses_guc_submission(&gt->uc))
3878                 return 0;
3879
3880         for_each_engine(engine, gt, id) {
3881                 err = nop_virtual_engine(gt, &engine, 1, 1, 0);
3882                 if (err) {
3883                         pr_err("Failed to wrap engine %s: err=%d\n",
3884                                engine->name, err);
3885                         return err;
3886                 }
3887         }
3888
3889         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3890                 int nsibling, n;
3891
3892                 nsibling = select_siblings(gt, class, siblings);
3893                 if (nsibling < 2)
3894                         continue;
3895
3896                 for (n = 1; n <= nsibling + 1; n++) {
3897                         err = nop_virtual_engine(gt, siblings, nsibling,
3898                                                  n, 0);
3899                         if (err)
3900                                 return err;
3901                 }
3902
3903                 err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
3904                 if (err)
3905                         return err;
3906         }
3907
3908         return 0;
3909 }
3910
3911 static int mask_virtual_engine(struct intel_gt *gt,
3912                                struct intel_engine_cs **siblings,
3913                                unsigned int nsibling)
3914 {
3915         struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
3916         struct intel_context *ve;
3917         struct igt_live_test t;
3918         unsigned int n;
3919         int err;
3920
3921         /*
3922          * Check that by setting the execution mask on a request, we can
3923          * restrict it to our desired engine within the virtual engine.
3924          */
3925
3926         ve = intel_execlists_create_virtual(siblings, nsibling);
3927         if (IS_ERR(ve)) {
3928                 err = PTR_ERR(ve);
3929                 goto out_close;
3930         }
3931
3932         err = intel_context_pin(ve);
3933         if (err)
3934                 goto out_put;
3935
3936         err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3937         if (err)
3938                 goto out_unpin;
3939
3940         for (n = 0; n < nsibling; n++) {
3941                 request[n] = i915_request_create(ve);
3942                 if (IS_ERR(request[n])) {
3943                         err = PTR_ERR(request[n]);
3944                         nsibling = n;
3945                         goto out;
3946                 }
3947
3948                 /* Reverse order as it's more likely to be unnatural */
3949                 request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
3950
3951                 i915_request_get(request[n]);
3952                 i915_request_add(request[n]);
3953         }
3954
3955         for (n = 0; n < nsibling; n++) {
3956                 if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
3957                         pr_err("%s(%s): wait for %llx:%lld timed out\n",
3958                                __func__, ve->engine->name,
3959                                request[n]->fence.context,
3960                                request[n]->fence.seqno);
3961
3962                         GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3963                                   __func__, ve->engine->name,
3964                                   request[n]->fence.context,
3965                                   request[n]->fence.seqno);
3966                         GEM_TRACE_DUMP();
3967                         intel_gt_set_wedged(gt);
3968                         err = -EIO;
3969                         goto out;
3970                 }
3971
3972                 if (request[n]->engine != siblings[nsibling - n - 1]) {
3973                         pr_err("Executed on wrong sibling '%s', expected '%s'\n",
3974                                request[n]->engine->name,
3975                                siblings[nsibling - n - 1]->name);
3976                         err = -EINVAL;
3977                         goto out;
3978                 }
3979         }
3980
3981         err = igt_live_test_end(&t);
3982 out:
3983         if (igt_flush_test(gt->i915))
3984                 err = -EIO;
3985
3986         for (n = 0; n < nsibling; n++)
3987                 i915_request_put(request[n]);
3988
3989 out_unpin:
3990         intel_context_unpin(ve);
3991 out_put:
3992         intel_context_put(ve);
3993 out_close:
3994         return err;
3995 }
3996
3997 static int live_virtual_mask(void *arg)
3998 {
3999         struct intel_gt *gt = arg;
4000         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4001         unsigned int class;
4002         int err;
4003
4004         if (intel_uc_uses_guc_submission(&gt->uc))
4005                 return 0;
4006
4007         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4008                 unsigned int nsibling;
4009
4010                 nsibling = select_siblings(gt, class, siblings);
4011                 if (nsibling < 2)
4012                         continue;
4013
4014                 err = mask_virtual_engine(gt, siblings, nsibling);
4015                 if (err)
4016                         return err;
4017         }
4018
4019         return 0;
4020 }
4021
4022 static int slicein_virtual_engine(struct intel_gt *gt,
4023                                   struct intel_engine_cs **siblings,
4024                                   unsigned int nsibling)
4025 {
4026         const long timeout = slice_timeout(siblings[0]);
4027         struct intel_context *ce;
4028         struct i915_request *rq;
4029         struct igt_spinner spin;
4030         unsigned int n;
4031         int err = 0;
4032
4033         /*
4034          * Virtual requests must take part in timeslicing on the target engines.
4035          */
4036
4037         if (igt_spinner_init(&spin, gt))
4038                 return -ENOMEM;
4039
4040         for (n = 0; n < nsibling; n++) {
4041                 ce = intel_context_create(siblings[n]);
4042                 if (IS_ERR(ce)) {
4043                         err = PTR_ERR(ce);
4044                         goto out;
4045                 }
4046
4047                 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4048                 intel_context_put(ce);
4049                 if (IS_ERR(rq)) {
4050                         err = PTR_ERR(rq);
4051                         goto out;
4052                 }
4053
4054                 i915_request_add(rq);
4055         }
4056
4057         ce = intel_execlists_create_virtual(siblings, nsibling);
4058         if (IS_ERR(ce)) {
4059                 err = PTR_ERR(ce);
4060                 goto out;
4061         }
4062
4063         rq = intel_context_create_request(ce);
4064         intel_context_put(ce);
4065         if (IS_ERR(rq)) {
4066                 err = PTR_ERR(rq);
4067                 goto out;
4068         }
4069
4070         i915_request_get(rq);
4071         i915_request_add(rq);
4072         if (i915_request_wait(rq, 0, timeout) < 0) {
4073                 GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n",
4074                               __func__, rq->engine->name);
4075                 GEM_TRACE_DUMP();
4076                 intel_gt_set_wedged(gt);
4077                 err = -EIO;
4078         }
4079         i915_request_put(rq);
4080
4081 out:
4082         igt_spinner_end(&spin);
4083         if (igt_flush_test(gt->i915))
4084                 err = -EIO;
4085         igt_spinner_fini(&spin);
4086         return err;
4087 }
4088
4089 static int sliceout_virtual_engine(struct intel_gt *gt,
4090                                    struct intel_engine_cs **siblings,
4091                                    unsigned int nsibling)
4092 {
4093         const long timeout = slice_timeout(siblings[0]);
4094         struct intel_context *ce;
4095         struct i915_request *rq;
4096         struct igt_spinner spin;
4097         unsigned int n;
4098         int err = 0;
4099
4100         /*
4101          * Virtual requests must allow others a fair timeslice.
4102          */
4103
4104         if (igt_spinner_init(&spin, gt))
4105                 return -ENOMEM;
4106
4107         /* XXX We do not handle oversubscription and fairness with normal rq */
4108         for (n = 0; n < nsibling; n++) {
4109                 ce = intel_execlists_create_virtual(siblings, nsibling);
4110                 if (IS_ERR(ce)) {
4111                         err = PTR_ERR(ce);
4112                         goto out;
4113                 }
4114
4115                 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4116                 intel_context_put(ce);
4117                 if (IS_ERR(rq)) {
4118                         err = PTR_ERR(rq);
4119                         goto out;
4120                 }
4121
4122                 i915_request_add(rq);
4123         }
4124
4125         for (n = 0; !err && n < nsibling; n++) {
4126                 ce = intel_context_create(siblings[n]);
4127                 if (IS_ERR(ce)) {
4128                         err = PTR_ERR(ce);
4129                         goto out;
4130                 }
4131
4132                 rq = intel_context_create_request(ce);
4133                 intel_context_put(ce);
4134                 if (IS_ERR(rq)) {
4135                         err = PTR_ERR(rq);
4136                         goto out;
4137                 }
4138
4139                 i915_request_get(rq);
4140                 i915_request_add(rq);
4141                 if (i915_request_wait(rq, 0, timeout) < 0) {
4142                         GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n",
4143                                       __func__, siblings[n]->name);
4144                         GEM_TRACE_DUMP();
4145                         intel_gt_set_wedged(gt);
4146                         err = -EIO;
4147                 }
4148                 i915_request_put(rq);
4149         }
4150
4151 out:
4152         igt_spinner_end(&spin);
4153         if (igt_flush_test(gt->i915))
4154                 err = -EIO;
4155         igt_spinner_fini(&spin);
4156         return err;
4157 }
4158
4159 static int live_virtual_slice(void *arg)
4160 {
4161         struct intel_gt *gt = arg;
4162         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4163         unsigned int class;
4164         int err;
4165
4166         if (intel_uc_uses_guc_submission(&gt->uc))
4167                 return 0;
4168
4169         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4170                 unsigned int nsibling;
4171
4172                 nsibling = __select_siblings(gt, class, siblings,
4173                                              intel_engine_has_timeslices);
4174                 if (nsibling < 2)
4175                         continue;
4176
4177                 err = slicein_virtual_engine(gt, siblings, nsibling);
4178                 if (err)
4179                         return err;
4180
4181                 err = sliceout_virtual_engine(gt, siblings, nsibling);
4182                 if (err)
4183                         return err;
4184         }
4185
4186         return 0;
4187 }
4188
4189 static int preserved_virtual_engine(struct intel_gt *gt,
4190                                     struct intel_engine_cs **siblings,
4191                                     unsigned int nsibling)
4192 {
4193         struct i915_request *last = NULL;
4194         struct intel_context *ve;
4195         struct i915_vma *scratch;
4196         struct igt_live_test t;
4197         unsigned int n;
4198         int err = 0;
4199         u32 *cs;
4200
4201         scratch =
4202                 __vm_create_scratch_for_read_pinned(&siblings[0]->gt->ggtt->vm,
4203                                                     PAGE_SIZE);
4204         if (IS_ERR(scratch))
4205                 return PTR_ERR(scratch);
4206
4207         err = i915_vma_sync(scratch);
4208         if (err)
4209                 goto out_scratch;
4210
4211         ve = intel_execlists_create_virtual(siblings, nsibling);
4212         if (IS_ERR(ve)) {
4213                 err = PTR_ERR(ve);
4214                 goto out_scratch;
4215         }
4216
4217         err = intel_context_pin(ve);
4218         if (err)
4219                 goto out_put;
4220
4221         err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
4222         if (err)
4223                 goto out_unpin;
4224
4225         for (n = 0; n < NUM_GPR_DW; n++) {
4226                 struct intel_engine_cs *engine = siblings[n % nsibling];
4227                 struct i915_request *rq;
4228
4229                 rq = i915_request_create(ve);
4230                 if (IS_ERR(rq)) {
4231                         err = PTR_ERR(rq);
4232                         goto out_end;
4233                 }
4234
4235                 i915_request_put(last);
4236                 last = i915_request_get(rq);
4237
4238                 cs = intel_ring_begin(rq, 8);
4239                 if (IS_ERR(cs)) {
4240                         i915_request_add(rq);
4241                         err = PTR_ERR(cs);
4242                         goto out_end;
4243                 }
4244
4245                 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4246                 *cs++ = CS_GPR(engine, n);
4247                 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4248                 *cs++ = 0;
4249
4250                 *cs++ = MI_LOAD_REGISTER_IMM(1);
4251                 *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
4252                 *cs++ = n + 1;
4253
4254                 *cs++ = MI_NOOP;
4255                 intel_ring_advance(rq, cs);
4256
4257                 /* Restrict this request to run on a particular engine */
4258                 rq->execution_mask = engine->mask;
4259                 i915_request_add(rq);
4260         }
4261
4262         if (i915_request_wait(last, 0, HZ / 5) < 0) {
4263                 err = -ETIME;
4264                 goto out_end;
4265         }
4266
4267         cs = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
4268         if (IS_ERR(cs)) {
4269                 err = PTR_ERR(cs);
4270                 goto out_end;
4271         }
4272
4273         for (n = 0; n < NUM_GPR_DW; n++) {
4274                 if (cs[n] != n) {
4275                         pr_err("Incorrect value[%d] found for GPR[%d]\n",
4276                                cs[n], n);
4277                         err = -EINVAL;
4278                         break;
4279                 }
4280         }
4281
4282         i915_gem_object_unpin_map(scratch->obj);
4283
4284 out_end:
4285         if (igt_live_test_end(&t))
4286                 err = -EIO;
4287         i915_request_put(last);
4288 out_unpin:
4289         intel_context_unpin(ve);
4290 out_put:
4291         intel_context_put(ve);
4292 out_scratch:
4293         i915_vma_unpin_and_release(&scratch, 0);
4294         return err;
4295 }
4296
4297 static int live_virtual_preserved(void *arg)
4298 {
4299         struct intel_gt *gt = arg;
4300         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4301         unsigned int class;
4302
4303         /*
4304          * Check that the context image retains non-privileged (user) registers
4305          * from one engine to the next. For this we check that the CS_GPR
4306          * are preserved.
4307          */
4308
4309         if (intel_uc_uses_guc_submission(&gt->uc))
4310                 return 0;
4311
4312         /* As we use CS_GPR we cannot run before they existed on all engines. */
4313         if (GRAPHICS_VER(gt->i915) < 9)
4314                 return 0;
4315
4316         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4317                 int nsibling, err;
4318
4319                 nsibling = select_siblings(gt, class, siblings);
4320                 if (nsibling < 2)
4321                         continue;
4322
4323                 err = preserved_virtual_engine(gt, siblings, nsibling);
4324                 if (err)
4325                         return err;
4326         }
4327
4328         return 0;
4329 }
4330
4331 static int bond_virtual_engine(struct intel_gt *gt,
4332                                unsigned int class,
4333                                struct intel_engine_cs **siblings,
4334                                unsigned int nsibling,
4335                                unsigned int flags)
4336 #define BOND_SCHEDULE BIT(0)
4337 {
4338         struct intel_engine_cs *master;
4339         struct i915_request *rq[16];
4340         enum intel_engine_id id;
4341         struct igt_spinner spin;
4342         unsigned long n;
4343         int err;
4344
4345         /*
4346          * A set of bonded requests is intended to be run concurrently
4347          * across a number of engines. We use one request per-engine
4348          * and a magic fence to schedule each of the bonded requests
4349          * at the same time. A consequence of our current scheduler is that
4350          * we only move requests to the HW ready queue when the request
4351          * becomes ready, that is when all of its prerequisite fences have
4352          * been signaled. As one of those fences is the master submit fence,
4353          * there is a delay on all secondary fences as the HW may be
4354          * currently busy. Equally, as all the requests are independent,
4355          * they may have other fences that delay individual request
4356          * submission to HW. Ergo, we do not guarantee that all requests are
4357          * immediately submitted to HW at the same time, just that if the
4358          * rules are abided by, they are ready at the same time as the
4359          * first is submitted. Userspace can embed semaphores in its batch
4360          * to ensure parallel execution of its phases as it requires.
4361          * Though naturally it gets requested that perhaps the scheduler should
4362          * take care of parallel execution, even across preemption events on
4363          * different HW. (The proper answer is of course "lalalala".)
4364          *
4365          * With the submit-fence, we have identified three possible phases
4366          * of synchronisation depending on the master fence: queued (not
4367          * ready), executing, and signaled. The first two are quite simple
4368          * and checked below. However, the signaled master fence handling is
4369          * contentious. Currently we do not distinguish between a signaled
4370          * fence and an expired fence, as once signaled it does not convey
4371          * any information about the previous execution. It may even be freed
4372          * and hence checking later it may not exist at all. Ergo we currently
4373          * do not apply the bonding constraint for an already signaled fence,
4374          * as our expectation is that it should not constrain the secondaries
4375          * and is outside of the scope of the bonded request API (i.e. all
4376          * userspace requests are meant to be running in parallel). As
4377          * it imposes no constraint, and is effectively a no-op, we do not
4378          * check below as normal execution flows are checked extensively above.
4379          *
4380          * XXX Is the degenerate handling of signaled submit fences the
4381          * expected behaviour for userpace?
4382          */
4383
4384         GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
4385
4386         if (igt_spinner_init(&spin, gt))
4387                 return -ENOMEM;
4388
4389         err = 0;
4390         rq[0] = ERR_PTR(-ENOMEM);
4391         for_each_engine(master, gt, id) {
4392                 struct i915_sw_fence fence = {};
4393                 struct intel_context *ce;
4394
4395                 if (master->class == class)
4396                         continue;
4397
4398                 ce = intel_context_create(master);
4399                 if (IS_ERR(ce)) {
4400                         err = PTR_ERR(ce);
4401                         goto out;
4402                 }
4403
4404                 memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
4405
4406                 rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP);
4407                 intel_context_put(ce);
4408                 if (IS_ERR(rq[0])) {
4409                         err = PTR_ERR(rq[0]);
4410                         goto out;
4411                 }
4412                 i915_request_get(rq[0]);
4413
4414                 if (flags & BOND_SCHEDULE) {
4415                         onstack_fence_init(&fence);
4416                         err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
4417                                                                &fence,
4418                                                                GFP_KERNEL);
4419                 }
4420
4421                 i915_request_add(rq[0]);
4422                 if (err < 0)
4423                         goto out;
4424
4425                 if (!(flags & BOND_SCHEDULE) &&
4426                     !igt_wait_for_spinner(&spin, rq[0])) {
4427                         err = -EIO;
4428                         goto out;
4429                 }
4430
4431                 for (n = 0; n < nsibling; n++) {
4432                         struct intel_context *ve;
4433
4434                         ve = intel_execlists_create_virtual(siblings, nsibling);
4435                         if (IS_ERR(ve)) {
4436                                 err = PTR_ERR(ve);
4437                                 onstack_fence_fini(&fence);
4438                                 goto out;
4439                         }
4440
4441                         err = intel_virtual_engine_attach_bond(ve->engine,
4442                                                                master,
4443                                                                siblings[n]);
4444                         if (err) {
4445                                 intel_context_put(ve);
4446                                 onstack_fence_fini(&fence);
4447                                 goto out;
4448                         }
4449
4450                         err = intel_context_pin(ve);
4451                         intel_context_put(ve);
4452                         if (err) {
4453                                 onstack_fence_fini(&fence);
4454                                 goto out;
4455                         }
4456
4457                         rq[n + 1] = i915_request_create(ve);
4458                         intel_context_unpin(ve);
4459                         if (IS_ERR(rq[n + 1])) {
4460                                 err = PTR_ERR(rq[n + 1]);
4461                                 onstack_fence_fini(&fence);
4462                                 goto out;
4463                         }
4464                         i915_request_get(rq[n + 1]);
4465
4466                         err = i915_request_await_execution(rq[n + 1],
4467                                                            &rq[0]->fence,
4468                                                            ve->engine->bond_execute);
4469                         i915_request_add(rq[n + 1]);
4470                         if (err < 0) {
4471                                 onstack_fence_fini(&fence);
4472                                 goto out;
4473                         }
4474                 }
4475                 onstack_fence_fini(&fence);
4476                 intel_engine_flush_submission(master);
4477                 igt_spinner_end(&spin);
4478
4479                 if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
4480                         pr_err("Master request did not execute (on %s)!\n",
4481                                rq[0]->engine->name);
4482                         err = -EIO;
4483                         goto out;
4484                 }
4485
4486                 for (n = 0; n < nsibling; n++) {
4487                         if (i915_request_wait(rq[n + 1], 0,
4488                                               MAX_SCHEDULE_TIMEOUT) < 0) {
4489                                 err = -EIO;
4490                                 goto out;
4491                         }
4492
4493                         if (rq[n + 1]->engine != siblings[n]) {
4494                                 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
4495                                        siblings[n]->name,
4496                                        rq[n + 1]->engine->name,
4497                                        rq[0]->engine->name);
4498                                 err = -EINVAL;
4499                                 goto out;
4500                         }
4501                 }
4502
4503                 for (n = 0; !IS_ERR(rq[n]); n++)
4504                         i915_request_put(rq[n]);
4505                 rq[0] = ERR_PTR(-ENOMEM);
4506         }
4507
4508 out:
4509         for (n = 0; !IS_ERR(rq[n]); n++)
4510                 i915_request_put(rq[n]);
4511         if (igt_flush_test(gt->i915))
4512                 err = -EIO;
4513
4514         igt_spinner_fini(&spin);
4515         return err;
4516 }
4517
4518 static int live_virtual_bond(void *arg)
4519 {
4520         static const struct phase {
4521                 const char *name;
4522                 unsigned int flags;
4523         } phases[] = {
4524                 { "", 0 },
4525                 { "schedule", BOND_SCHEDULE },
4526                 { },
4527         };
4528         struct intel_gt *gt = arg;
4529         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4530         unsigned int class;
4531         int err;
4532
4533         if (intel_uc_uses_guc_submission(&gt->uc))
4534                 return 0;
4535
4536         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4537                 const struct phase *p;
4538                 int nsibling;
4539
4540                 nsibling = select_siblings(gt, class, siblings);
4541                 if (nsibling < 2)
4542                         continue;
4543
4544                 for (p = phases; p->name; p++) {
4545                         err = bond_virtual_engine(gt,
4546                                                   class, siblings, nsibling,
4547                                                   p->flags);
4548                         if (err) {
4549                                 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
4550                                        __func__, p->name, class, nsibling, err);
4551                                 return err;
4552                         }
4553                 }
4554         }
4555
4556         return 0;
4557 }
4558
4559 static int reset_virtual_engine(struct intel_gt *gt,
4560                                 struct intel_engine_cs **siblings,
4561                                 unsigned int nsibling)
4562 {
4563         struct intel_engine_cs *engine;
4564         struct intel_context *ve;
4565         struct igt_spinner spin;
4566         struct i915_request *rq;
4567         unsigned int n;
4568         int err = 0;
4569
4570         /*
4571          * In order to support offline error capture for fast preempt reset,
4572          * we need to decouple the guilty request and ensure that it and its
4573          * descendents are not executed while the capture is in progress.
4574          */
4575
4576         if (igt_spinner_init(&spin, gt))
4577                 return -ENOMEM;
4578
4579         ve = intel_execlists_create_virtual(siblings, nsibling);
4580         if (IS_ERR(ve)) {
4581                 err = PTR_ERR(ve);
4582                 goto out_spin;
4583         }
4584
4585         for (n = 0; n < nsibling; n++)
4586                 st_engine_heartbeat_disable(siblings[n]);
4587
4588         rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
4589         if (IS_ERR(rq)) {
4590                 err = PTR_ERR(rq);
4591                 goto out_heartbeat;
4592         }
4593         i915_request_add(rq);
4594
4595         if (!igt_wait_for_spinner(&spin, rq)) {
4596                 intel_gt_set_wedged(gt);
4597                 err = -ETIME;
4598                 goto out_heartbeat;
4599         }
4600
4601         engine = rq->engine;
4602         GEM_BUG_ON(engine == ve->engine);
4603
4604         /* Take ownership of the reset and tasklet */
4605         err = engine_lock_reset_tasklet(engine);
4606         if (err)
4607                 goto out_heartbeat;
4608
4609         engine->sched_engine->tasklet.callback(&engine->sched_engine->tasklet);
4610         GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
4611
4612         /* Fake a preemption event; failed of course */
4613         spin_lock_irq(&engine->sched_engine->lock);
4614         __unwind_incomplete_requests(engine);
4615         spin_unlock_irq(&engine->sched_engine->lock);
4616         GEM_BUG_ON(rq->engine != engine);
4617
4618         /* Reset the engine while keeping our active request on hold */
4619         execlists_hold(engine, rq);
4620         GEM_BUG_ON(!i915_request_on_hold(rq));
4621
4622         __intel_engine_reset_bh(engine, NULL);
4623         GEM_BUG_ON(rq->fence.error != -EIO);
4624
4625         /* Release our grasp on the engine, letting CS flow again */
4626         engine_unlock_reset_tasklet(engine);
4627
4628         /* Check that we do not resubmit the held request */
4629         i915_request_get(rq);
4630         if (!i915_request_wait(rq, 0, HZ / 5)) {
4631                 pr_err("%s: on hold request completed!\n",
4632                        engine->name);
4633                 intel_gt_set_wedged(gt);
4634                 err = -EIO;
4635                 goto out_rq;
4636         }
4637         GEM_BUG_ON(!i915_request_on_hold(rq));
4638
4639         /* But is resubmitted on release */
4640         execlists_unhold(engine, rq);
4641         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4642                 pr_err("%s: held request did not complete!\n",
4643                        engine->name);
4644                 intel_gt_set_wedged(gt);
4645                 err = -ETIME;
4646         }
4647
4648 out_rq:
4649         i915_request_put(rq);
4650 out_heartbeat:
4651         for (n = 0; n < nsibling; n++)
4652                 st_engine_heartbeat_enable(siblings[n]);
4653
4654         intel_context_put(ve);
4655 out_spin:
4656         igt_spinner_fini(&spin);
4657         return err;
4658 }
4659
4660 static int live_virtual_reset(void *arg)
4661 {
4662         struct intel_gt *gt = arg;
4663         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4664         unsigned int class;
4665
4666         /*
4667          * Check that we handle a reset event within a virtual engine.
4668          * Only the physical engine is reset, but we have to check the flow
4669          * of the virtual requests around the reset, and make sure it is not
4670          * forgotten.
4671          */
4672
4673         if (intel_uc_uses_guc_submission(&gt->uc))
4674                 return 0;
4675
4676         if (!intel_has_reset_engine(gt))
4677                 return 0;
4678
4679         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4680                 int nsibling, err;
4681
4682                 nsibling = select_siblings(gt, class, siblings);
4683                 if (nsibling < 2)
4684                         continue;
4685
4686                 err = reset_virtual_engine(gt, siblings, nsibling);
4687                 if (err)
4688                         return err;
4689         }
4690
4691         return 0;
4692 }
4693
4694 int intel_execlists_live_selftests(struct drm_i915_private *i915)
4695 {
4696         static const struct i915_subtest tests[] = {
4697                 SUBTEST(live_sanitycheck),
4698                 SUBTEST(live_unlite_switch),
4699                 SUBTEST(live_unlite_preempt),
4700                 SUBTEST(live_unlite_ring),
4701                 SUBTEST(live_pin_rewind),
4702                 SUBTEST(live_hold_reset),
4703                 SUBTEST(live_error_interrupt),
4704                 SUBTEST(live_timeslice_preempt),
4705                 SUBTEST(live_timeslice_rewind),
4706                 SUBTEST(live_timeslice_queue),
4707                 SUBTEST(live_timeslice_nopreempt),
4708                 SUBTEST(live_busywait_preempt),
4709                 SUBTEST(live_preempt),
4710                 SUBTEST(live_late_preempt),
4711                 SUBTEST(live_nopreempt),
4712                 SUBTEST(live_preempt_cancel),
4713                 SUBTEST(live_suppress_self_preempt),
4714                 SUBTEST(live_chain_preempt),
4715                 SUBTEST(live_preempt_ring),
4716                 SUBTEST(live_preempt_gang),
4717                 SUBTEST(live_preempt_timeout),
4718                 SUBTEST(live_preempt_user),
4719                 SUBTEST(live_preempt_smoke),
4720                 SUBTEST(live_virtual_engine),
4721                 SUBTEST(live_virtual_mask),
4722                 SUBTEST(live_virtual_preserved),
4723                 SUBTEST(live_virtual_slice),
4724                 SUBTEST(live_virtual_bond),
4725                 SUBTEST(live_virtual_reset),
4726         };
4727
4728         if (i915->gt.submission_method != INTEL_SUBMISSION_ELSP)
4729                 return 0;
4730
4731         if (intel_gt_is_wedged(&i915->gt))
4732                 return 0;
4733
4734         return intel_gt_live_subtests(tests, &i915->gt);
4735 }