drivers/gpu/drm/i915/gt/selftest_timeline.c

   1 /*
   2  * SPDX-License-Identifier: MIT
   3  *
   4  * Copyright © 2017-2018 Intel Corporation
   5  */
   6
   7 #include <linux/prime_numbers.h>
   8
   9 #include "intel_context.h"
  10 #include "intel_engine_heartbeat.h"
  11 #include "intel_engine_pm.h"
  12 #include "intel_gt.h"
  13 #include "intel_gt_requests.h"
  14 #include "intel_ring.h"
  15
  16 #include "../selftests/i915_random.h"
  17 #include "../i915_selftest.h"
  18
  19 #include "../selftests/igt_flush_test.h"
  20 #include "../selftests/mock_gem_device.h"
  21 #include "selftests/mock_timeline.h"
  22
  23 static struct page *hwsp_page(struct intel_timeline *tl)
  24 {
  25         struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj;
  26
  27         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
  28         return sg_page(obj->mm.pages->sgl);
  29 }
  30
  31 static unsigned long hwsp_cacheline(struct intel_timeline *tl)
  32 {
  33         unsigned long address = (unsigned long)page_address(hwsp_page(tl));
  34
  35         return (address + tl->hwsp_offset) / CACHELINE_BYTES;
  36 }
  37
  38 #define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES)
  39
  40 struct mock_hwsp_freelist {
  41         struct intel_gt *gt;
  42         struct radix_tree_root cachelines;
  43         struct intel_timeline **history;
  44         unsigned long count, max;
  45         struct rnd_state prng;
  46 };
  47
  48 enum {
  49         SHUFFLE = BIT(0),
  50 };
  51
  52 static void __mock_hwsp_record(struct mock_hwsp_freelist *state,
  53                                unsigned int idx,
  54                                struct intel_timeline *tl)
  55 {
  56         tl = xchg(&state->history[idx], tl);
  57         if (tl) {
  58                 radix_tree_delete(&state->cachelines, hwsp_cacheline(tl));
  59                 intel_timeline_put(tl);
  60         }
  61 }
  62
  63 static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
  64                                 unsigned int count,
  65                                 unsigned int flags)
  66 {
  67         struct intel_timeline *tl;
  68         unsigned int idx;
  69
  70         while (count--) {
  71                 unsigned long cacheline;
  72                 int err;
  73
  74                 tl = intel_timeline_create(state->gt, NULL);
  75                 if (IS_ERR(tl))
  76                         return PTR_ERR(tl);
  77
  78                 cacheline = hwsp_cacheline(tl);
  79                 err = radix_tree_insert(&state->cachelines, cacheline, tl);
  80                 if (err) {
  81                         if (err == -EEXIST) {
  82                                 pr_err("HWSP cacheline %lu already used; duplicate allocation!\n",
  83                                        cacheline);
  84                         }
  85                         intel_timeline_put(tl);
  86                         return err;
  87                 }
  88
  89                 idx = state->count++ % state->max;
  90                 __mock_hwsp_record(state, idx, tl);
  91         }
  92
  93         if (flags & SHUFFLE)
  94                 i915_prandom_shuffle(state->history,
  95                                      sizeof(*state->history),
  96                                      min(state->count, state->max),
  97                                      &state->prng);
  98
  99         count = i915_prandom_u32_max_state(min(state->count, state->max),
 100                                            &state->prng);
 101         while (count--) {
 102                 idx = --state->count % state->max;
 103                 __mock_hwsp_record(state, idx, NULL);
 104         }
 105
 106         return 0;
 107 }
 108
 109 static int mock_hwsp_freelist(void *arg)
 110 {
 111         struct mock_hwsp_freelist state;
 112         struct drm_i915_private *i915;
 113         const struct {
 114                 const char *name;
 115                 unsigned int flags;
 116         } phases[] = {
 117                 { "linear", 0 },
 118                 { "shuffled", SHUFFLE },
 119                 { },
 120         }, *p;
 121         unsigned int na;
 122         int err = 0;
 123
 124         i915 = mock_gem_device();
 125         if (!i915)
 126                 return -ENOMEM;
 127
 128         INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL);
 129         state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed);
 130
 131         state.gt = &i915->gt;
 132
 133         /*
 134          * Create a bunch of timelines and check that their HWSP do not overlap.
 135          * Free some, and try again.
 136          */
 137
 138         state.max = PAGE_SIZE / sizeof(*state.history);
 139         state.count = 0;
 140         state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL);
 141         if (!state.history) {
 142                 err = -ENOMEM;
 143                 goto err_put;
 144         }
 145
 146         for (p = phases; p->name; p++) {
 147                 pr_debug("%s(%s)\n", __func__, p->name);
 148                 for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) {
 149                         err = __mock_hwsp_timeline(&state, na, p->flags);
 150                         if (err)
 151                                 goto out;
 152                 }
 153         }
 154
 155 out:
 156         for (na = 0; na < state.max; na++)
 157                 __mock_hwsp_record(&state, na, NULL);
 158         kfree(state.history);
 159 err_put:
 160         drm_dev_put(&i915->drm);
 161         return err;
 162 }
 163
 164 struct __igt_sync {
 165         const char *name;
 166         u32 seqno;
 167         bool expected;
 168         bool set;
 169 };
 170
 171 static int __igt_sync(struct intel_timeline *tl,
 172                       u64 ctx,
 173                       const struct __igt_sync *p,
 174                       const char *name)
 175 {
 176         int ret;
 177
 178         if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
 179                 pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
 180                        name, p->name, ctx, p->seqno, yesno(p->expected));
 181                 return -EINVAL;
 182         }
 183
 184         if (p->set) {
 185                 ret = __intel_timeline_sync_set(tl, ctx, p->seqno);
 186                 if (ret)
 187                         return ret;
 188         }
 189
 190         return 0;
 191 }
 192
 193 static int igt_sync(void *arg)
 194 {
 195         const struct __igt_sync pass[] = {
 196                 { "unset", 0, false, false },
 197                 { "new", 0, false, true },
 198                 { "0a", 0, true, true },
 199                 { "1a", 1, false, true },
 200                 { "1b", 1, true, true },
 201                 { "0b", 0, true, false },
 202                 { "2a", 2, false, true },
 203                 { "4", 4, false, true },
 204                 { "INT_MAX", INT_MAX, false, true },
 205                 { "INT_MAX-1", INT_MAX-1, true, false },
 206                 { "INT_MAX+1", (u32)INT_MAX+1, false, true },
 207                 { "INT_MAX", INT_MAX, true, false },
 208                 { "UINT_MAX", UINT_MAX, false, true },
 209                 { "wrap", 0, false, true },
 210                 { "unwrap", UINT_MAX, true, false },
 211                 {},
 212         }, *p;
 213         struct intel_timeline tl;
 214         int order, offset;
 215         int ret = -ENODEV;
 216
 217         mock_timeline_init(&tl, 0);
 218         for (p = pass; p->name; p++) {
 219                 for (order = 1; order < 64; order++) {
 220                         for (offset = -1; offset <= (order > 1); offset++) {
 221                                 u64 ctx = BIT_ULL(order) + offset;
 222
 223                                 ret = __igt_sync(&tl, ctx, p, "1");
 224                                 if (ret)
 225                                         goto out;
 226                         }
 227                 }
 228         }
 229         mock_timeline_fini(&tl);
 230
 231         mock_timeline_init(&tl, 0);
 232         for (order = 1; order < 64; order++) {
 233                 for (offset = -1; offset <= (order > 1); offset++) {
 234                         u64 ctx = BIT_ULL(order) + offset;
 235
 236                         for (p = pass; p->name; p++) {
 237                                 ret = __igt_sync(&tl, ctx, p, "2");
 238                                 if (ret)
 239                                         goto out;
 240                         }
 241                 }
 242         }
 243
 244 out:
 245         mock_timeline_fini(&tl);
 246         return ret;
 247 }
 248
 249 static unsigned int random_engine(struct rnd_state *rnd)
 250 {
 251         return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd);
 252 }
 253
 254 static int bench_sync(void *arg)
 255 {
 256         struct rnd_state prng;
 257         struct intel_timeline tl;
 258         unsigned long end_time, count;
 259         u64 prng32_1M;
 260         ktime_t kt;
 261         int order, last_order;
 262
 263         mock_timeline_init(&tl, 0);
 264
 265         /* Lookups from cache are very fast and so the random number generation
 266          * and the loop itself becomes a significant factor in the per-iteration
 267          * timings. We try to compensate the results by measuring the overhead
 268          * of the prng and subtract it from the reported results.
 269          */
 270         prandom_seed_state(&prng, i915_selftest.random_seed);
 271         count = 0;
 272         kt = ktime_get();
 273         end_time = jiffies + HZ/10;
 274         do {
 275                 u32 x;
 276
 277                 /* Make sure the compiler doesn't optimise away the prng call */
 278                 WRITE_ONCE(x, prandom_u32_state(&prng));
 279
 280                 count++;
 281         } while (!time_after(jiffies, end_time));
 282         kt = ktime_sub(ktime_get(), kt);
 283         pr_debug("%s: %lu random evaluations, %lluns/prng\n",
 284                  __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 285         prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
 286
 287         /* Benchmark (only) setting random context ids */
 288         prandom_seed_state(&prng, i915_selftest.random_seed);
 289         count = 0;
 290         kt = ktime_get();
 291         end_time = jiffies + HZ/10;
 292         do {
 293                 u64 id = i915_prandom_u64_state(&prng);
 294
 295                 __intel_timeline_sync_set(&tl, id, 0);
 296                 count++;
 297         } while (!time_after(jiffies, end_time));
 298         kt = ktime_sub(ktime_get(), kt);
 299         kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
 300         pr_info("%s: %lu random insertions, %lluns/insert\n",
 301                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 302
 303         /* Benchmark looking up the exact same context ids as we just set */
 304         prandom_seed_state(&prng, i915_selftest.random_seed);
 305         end_time = count;
 306         kt = ktime_get();
 307         while (end_time--) {
 308                 u64 id = i915_prandom_u64_state(&prng);
 309
 310                 if (!__intel_timeline_sync_is_later(&tl, id, 0)) {
 311                         mock_timeline_fini(&tl);
 312                         pr_err("Lookup of %llu failed\n", id);
 313                         return -EINVAL;
 314                 }
 315         }
 316         kt = ktime_sub(ktime_get(), kt);
 317         kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
 318         pr_info("%s: %lu random lookups, %lluns/lookup\n",
 319                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 320
 321         mock_timeline_fini(&tl);
 322         cond_resched();
 323
 324         mock_timeline_init(&tl, 0);
 325
 326         /* Benchmark setting the first N (in order) contexts */
 327         count = 0;
 328         kt = ktime_get();
 329         end_time = jiffies + HZ/10;
 330         do {
 331                 __intel_timeline_sync_set(&tl, count++, 0);
 332         } while (!time_after(jiffies, end_time));
 333         kt = ktime_sub(ktime_get(), kt);
 334         pr_info("%s: %lu in-order insertions, %lluns/insert\n",
 335                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 336
 337         /* Benchmark looking up the exact same context ids as we just set */
 338         end_time = count;
 339         kt = ktime_get();
 340         while (end_time--) {
 341                 if (!__intel_timeline_sync_is_later(&tl, end_time, 0)) {
 342                         pr_err("Lookup of %lu failed\n", end_time);
 343                         mock_timeline_fini(&tl);
 344                         return -EINVAL;
 345                 }
 346         }
 347         kt = ktime_sub(ktime_get(), kt);
 348         pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
 349                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 350
 351         mock_timeline_fini(&tl);
 352         cond_resched();
 353
 354         mock_timeline_init(&tl, 0);
 355
 356         /* Benchmark searching for a random context id and maybe changing it */
 357         prandom_seed_state(&prng, i915_selftest.random_seed);
 358         count = 0;
 359         kt = ktime_get();
 360         end_time = jiffies + HZ/10;
 361         do {
 362                 u32 id = random_engine(&prng);
 363                 u32 seqno = prandom_u32_state(&prng);
 364
 365                 if (!__intel_timeline_sync_is_later(&tl, id, seqno))
 366                         __intel_timeline_sync_set(&tl, id, seqno);
 367
 368                 count++;
 369         } while (!time_after(jiffies, end_time));
 370         kt = ktime_sub(ktime_get(), kt);
 371         kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
 372         pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
 373                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 374         mock_timeline_fini(&tl);
 375         cond_resched();
 376
 377         /* Benchmark searching for a known context id and changing the seqno */
 378         for (last_order = 1, order = 1; order < 32;
 379              ({ int tmp = last_order; last_order = order; order += tmp; })) {
 380                 unsigned int mask = BIT(order) - 1;
 381
 382                 mock_timeline_init(&tl, 0);
 383
 384                 count = 0;
 385                 kt = ktime_get();
 386                 end_time = jiffies + HZ/10;
 387                 do {
 388                         /* Without assuming too many details of the underlying
 389                          * implementation, try to identify its phase-changes
 390                          * (if any)!
 391                          */
 392                         u64 id = (u64)(count & mask) << order;
 393
 394                         __intel_timeline_sync_is_later(&tl, id, 0);
 395                         __intel_timeline_sync_set(&tl, id, 0);
 396
 397                         count++;
 398                 } while (!time_after(jiffies, end_time));
 399                 kt = ktime_sub(ktime_get(), kt);
 400                 pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
 401                         __func__, count, order,
 402                         (long long)div64_ul(ktime_to_ns(kt), count));
 403                 mock_timeline_fini(&tl);
 404                 cond_resched();
 405         }
 406
 407         return 0;
 408 }
 409
 410 int intel_timeline_mock_selftests(void)
 411 {
 412         static const struct i915_subtest tests[] = {
 413                 SUBTEST(mock_hwsp_freelist),
 414                 SUBTEST(igt_sync),
 415                 SUBTEST(bench_sync),
 416         };
 417
 418         return i915_subtests(tests, NULL);
 419 }
 420
 421 static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
 422 {
 423         u32 *cs;
 424
 425         cs = intel_ring_begin(rq, 4);
 426         if (IS_ERR(cs))
 427                 return PTR_ERR(cs);
 428
 429         if (INTEL_GEN(rq->i915) >= 8) {
 430                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 431                 *cs++ = addr;
 432                 *cs++ = 0;
 433                 *cs++ = value;
 434         } else if (INTEL_GEN(rq->i915) >= 4) {
 435                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 436                 *cs++ = 0;
 437                 *cs++ = addr;
 438                 *cs++ = value;
 439         } else {
 440                 *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
 441                 *cs++ = addr;
 442                 *cs++ = value;
 443                 *cs++ = MI_NOOP;
 444         }
 445
 446         intel_ring_advance(rq, cs);
 447
 448         return 0;
 449 }
 450
 451 static struct i915_request *
 452 tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
 453 {
 454         struct i915_request *rq;
 455         int err;
 456
 457         err = intel_timeline_pin(tl);
 458         if (err) {
 459                 rq = ERR_PTR(err);
 460                 goto out;
 461         }
 462
 463         rq = intel_engine_create_kernel_request(engine);
 464         if (IS_ERR(rq))
 465                 goto out_unpin;
 466
 467         i915_request_get(rq);
 468
 469         err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value);
 470         i915_request_add(rq);
 471         if (err) {
 472                 i915_request_put(rq);
 473                 rq = ERR_PTR(err);
 474         }
 475
 476 out_unpin:
 477         intel_timeline_unpin(tl);
 478 out:
 479         if (IS_ERR(rq))
 480                 pr_err("Failed to write to timeline!\n");
 481         return rq;
 482 }
 483
 484 static struct intel_timeline *
 485 checked_intel_timeline_create(struct intel_gt *gt)
 486 {
 487         struct intel_timeline *tl;
 488
 489         tl = intel_timeline_create(gt, NULL);
 490         if (IS_ERR(tl))
 491                 return tl;
 492
 493         if (*tl->hwsp_seqno != tl->seqno) {
 494                 pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n",
 495                        *tl->hwsp_seqno, tl->seqno);
 496                 intel_timeline_put(tl);
 497                 return ERR_PTR(-EINVAL);
 498         }
 499
 500         return tl;
 501 }
 502
 503 static int live_hwsp_engine(void *arg)
 504 {
 505 #define NUM_TIMELINES 4096
 506         struct intel_gt *gt = arg;
 507         struct intel_timeline **timelines;
 508         struct intel_engine_cs *engine;
 509         enum intel_engine_id id;
 510         unsigned long count, n;
 511         int err = 0;
 512
 513         /*
 514          * Create a bunch of timelines and check we can write
 515          * independently to each of their breadcrumb slots.
 516          */
 517
 518         timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
 519                                    sizeof(*timelines),
 520                                    GFP_KERNEL);
 521         if (!timelines)
 522                 return -ENOMEM;
 523
 524         count = 0;
 525         for_each_engine(engine, gt, id) {
 526                 if (!intel_engine_can_store_dword(engine))
 527                         continue;
 528
 529                 intel_engine_pm_get(engine);
 530
 531                 for (n = 0; n < NUM_TIMELINES; n++) {
 532                         struct intel_timeline *tl;
 533                         struct i915_request *rq;
 534
 535                         tl = checked_intel_timeline_create(gt);
 536                         if (IS_ERR(tl)) {
 537                                 err = PTR_ERR(tl);
 538                                 break;
 539                         }
 540
 541                         rq = tl_write(tl, engine, count);
 542                         if (IS_ERR(rq)) {
 543                                 intel_timeline_put(tl);
 544                                 err = PTR_ERR(rq);
 545                                 break;
 546                         }
 547
 548                         timelines[count++] = tl;
 549                         i915_request_put(rq);
 550                 }
 551
 552                 intel_engine_pm_put(engine);
 553                 if (err)
 554                         break;
 555         }
 556
 557         if (igt_flush_test(gt->i915))
 558                 err = -EIO;
 559
 560         for (n = 0; n < count; n++) {
 561                 struct intel_timeline *tl = timelines[n];
 562
 563                 if (!err && *tl->hwsp_seqno != n) {
 564                         pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
 565                                n, *tl->hwsp_seqno);
 566                         err = -EINVAL;
 567                 }
 568                 intel_timeline_put(tl);
 569         }
 570
 571         kvfree(timelines);
 572         return err;
 573 #undef NUM_TIMELINES
 574 }
 575
 576 static int live_hwsp_alternate(void *arg)
 577 {
 578 #define NUM_TIMELINES 4096
 579         struct intel_gt *gt = arg;
 580         struct intel_timeline **timelines;
 581         struct intel_engine_cs *engine;
 582         enum intel_engine_id id;
 583         unsigned long count, n;
 584         int err = 0;
 585
 586         /*
 587          * Create a bunch of timelines and check we can write
 588          * independently to each of their breadcrumb slots with adjacent
 589          * engines.
 590          */
 591
 592         timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
 593                                    sizeof(*timelines),
 594                                    GFP_KERNEL);
 595         if (!timelines)
 596                 return -ENOMEM;
 597
 598         count = 0;
 599         for (n = 0; n < NUM_TIMELINES; n++) {
 600                 for_each_engine(engine, gt, id) {
 601                         struct intel_timeline *tl;
 602                         struct i915_request *rq;
 603
 604                         if (!intel_engine_can_store_dword(engine))
 605                                 continue;
 606
 607                         tl = checked_intel_timeline_create(gt);
 608                         if (IS_ERR(tl)) {
 609                                 err = PTR_ERR(tl);
 610                                 goto out;
 611                         }
 612
 613                         intel_engine_pm_get(engine);
 614                         rq = tl_write(tl, engine, count);
 615                         intel_engine_pm_put(engine);
 616                         if (IS_ERR(rq)) {
 617                                 intel_timeline_put(tl);
 618                                 err = PTR_ERR(rq);
 619                                 goto out;
 620                         }
 621
 622                         timelines[count++] = tl;
 623                         i915_request_put(rq);
 624                 }
 625         }
 626
 627 out:
 628         if (igt_flush_test(gt->i915))
 629                 err = -EIO;
 630
 631         for (n = 0; n < count; n++) {
 632                 struct intel_timeline *tl = timelines[n];
 633
 634                 if (!err && *tl->hwsp_seqno != n) {
 635                         pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
 636                                n, *tl->hwsp_seqno);
 637                         err = -EINVAL;
 638                 }
 639                 intel_timeline_put(tl);
 640         }
 641
 642         kvfree(timelines);
 643         return err;
 644 #undef NUM_TIMELINES
 645 }
 646
 647 static int live_hwsp_wrap(void *arg)
 648 {
 649         struct intel_gt *gt = arg;
 650         struct intel_engine_cs *engine;
 651         struct intel_timeline *tl;
 652         enum intel_engine_id id;
 653         int err = 0;
 654
 655         /*
 656          * Across a seqno wrap, we need to keep the old cacheline alive for
 657          * foreign GPU references.
 658          */
 659
 660         tl = intel_timeline_create(gt, NULL);
 661         if (IS_ERR(tl))
 662                 return PTR_ERR(tl);
 663
 664         if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
 665                 goto out_free;
 666
 667         err = intel_timeline_pin(tl);
 668         if (err)
 669                 goto out_free;
 670
 671         for_each_engine(engine, gt, id) {
 672                 const u32 *hwsp_seqno[2];
 673                 struct i915_request *rq;
 674                 u32 seqno[2];
 675
 676                 if (!intel_engine_can_store_dword(engine))
 677                         continue;
 678
 679                 rq = intel_engine_create_kernel_request(engine);
 680                 if (IS_ERR(rq)) {
 681                         err = PTR_ERR(rq);
 682                         goto out;
 683                 }
 684
 685                 tl->seqno = -4u;
 686
 687                 mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
 688                 err = intel_timeline_get_seqno(tl, rq, &seqno[0]);
 689                 mutex_unlock(&tl->mutex);
 690                 if (err) {
 691                         i915_request_add(rq);
 692                         goto out;
 693                 }
 694                 pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n",
 695                          seqno[0], tl->hwsp_offset);
 696
 697                 err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]);
 698                 if (err) {
 699                         i915_request_add(rq);
 700                         goto out;
 701                 }
 702                 hwsp_seqno[0] = tl->hwsp_seqno;
 703
 704                 mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
 705                 err = intel_timeline_get_seqno(tl, rq, &seqno[1]);
 706                 mutex_unlock(&tl->mutex);
 707                 if (err) {
 708                         i915_request_add(rq);
 709                         goto out;
 710                 }
 711                 pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n",
 712                          seqno[1], tl->hwsp_offset);
 713
 714                 err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]);
 715                 if (err) {
 716                         i915_request_add(rq);
 717                         goto out;
 718                 }
 719                 hwsp_seqno[1] = tl->hwsp_seqno;
 720
 721                 /* With wrap should come a new hwsp */
 722                 GEM_BUG_ON(seqno[1] >= seqno[0]);
 723                 GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]);
 724
 725                 i915_request_add(rq);
 726
 727                 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 728                         pr_err("Wait for timeline writes timed out!\n");
 729                         err = -EIO;
 730                         goto out;
 731                 }
 732
 733                 if (*hwsp_seqno[0] != seqno[0] || *hwsp_seqno[1] != seqno[1]) {
 734                         pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n",
 735                                *hwsp_seqno[0], *hwsp_seqno[1],
 736                                seqno[0], seqno[1]);
 737                         err = -EINVAL;
 738                         goto out;
 739                 }
 740
 741                 intel_gt_retire_requests(gt); /* recycle HWSP */
 742         }
 743
 744 out:
 745         if (igt_flush_test(gt->i915))
 746                 err = -EIO;
 747
 748         intel_timeline_unpin(tl);
 749 out_free:
 750         intel_timeline_put(tl);
 751         return err;
 752 }
 753
 754 static void engine_heartbeat_disable(struct intel_engine_cs *engine)
 755 {
 756         engine->props.heartbeat_interval_ms = 0;
 757
 758         intel_engine_pm_get(engine);
 759         intel_engine_park_heartbeat(engine);
 760 }
 761
 762 static void engine_heartbeat_enable(struct intel_engine_cs *engine)
 763 {
 764         intel_engine_pm_put(engine);
 765
 766         engine->props.heartbeat_interval_ms =
 767                 engine->defaults.heartbeat_interval_ms;
 768 }
 769
 770 static int live_hwsp_rollover_kernel(void *arg)
 771 {
 772         struct intel_gt *gt = arg;
 773         struct intel_engine_cs *engine;
 774         enum intel_engine_id id;
 775         int err = 0;
 776
 777         /*
 778          * Run the host for long enough, and even the kernel context will
 779          * see a seqno rollover.
 780          */
 781
 782         for_each_engine(engine, gt, id) {
 783                 struct intel_context *ce = engine->kernel_context;
 784                 struct intel_timeline *tl = ce->timeline;
 785                 struct i915_request *rq[3] = {};
 786                 int i;
 787
 788                 engine_heartbeat_disable(engine);
 789                 if (intel_gt_wait_for_idle(gt, HZ / 2)) {
 790                         err = -EIO;
 791                         goto out;
 792                 }
 793
 794                 GEM_BUG_ON(i915_active_fence_isset(&tl->last_request));
 795                 tl->seqno = 0;
 796                 timeline_rollback(tl);
 797                 timeline_rollback(tl);
 798                 WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
 799
 800                 for (i = 0; i < ARRAY_SIZE(rq); i++) {
 801                         struct i915_request *this;
 802
 803                         this = i915_request_create(ce);
 804                         if (IS_ERR(this)) {
 805                                 err = PTR_ERR(this);
 806                                 goto out;
 807                         }
 808
 809                         pr_debug("%s: create fence.seqnp:%d\n",
 810                                  engine->name,
 811                                  lower_32_bits(this->fence.seqno));
 812
 813                         GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl);
 814
 815                         rq[i] = i915_request_get(this);
 816                         i915_request_add(this);
 817                 }
 818
 819                 /* We expected a wrap! */
 820                 GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno);
 821
 822                 if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
 823                         pr_err("Wait for timeline wrap timed out!\n");
 824                         err = -EIO;
 825                         goto out;
 826                 }
 827
 828                 for (i = 0; i < ARRAY_SIZE(rq); i++) {
 829                         if (!i915_request_completed(rq[i])) {
 830                                 pr_err("Pre-wrap request not completed!\n");
 831                                 err = -EINVAL;
 832                                 goto out;
 833                         }
 834                 }
 835
 836 out:
 837                 for (i = 0; i < ARRAY_SIZE(rq); i++)
 838                         i915_request_put(rq[i]);
 839                 engine_heartbeat_enable(engine);
 840                 if (err)
 841                         break;
 842         }
 843
 844         if (igt_flush_test(gt->i915))
 845                 err = -EIO;
 846
 847         return err;
 848 }
 849
 850 static int live_hwsp_rollover_user(void *arg)
 851 {
 852         struct intel_gt *gt = arg;
 853         struct intel_engine_cs *engine;
 854         enum intel_engine_id id;
 855         int err = 0;
 856
 857         /*
 858          * Simulate a long running user context, and force the seqno wrap
 859          * on the user's timeline.
 860          */
 861
 862         for_each_engine(engine, gt, id) {
 863                 struct i915_request *rq[3] = {};
 864                 struct intel_timeline *tl;
 865                 struct intel_context *ce;
 866                 int i;
 867
 868                 ce = intel_context_create(engine);
 869                 if (IS_ERR(ce))
 870                         return PTR_ERR(ce);
 871
 872                 err = intel_context_alloc_state(ce);
 873                 if (err)
 874                         goto out;
 875
 876                 tl = ce->timeline;
 877                 if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
 878                         goto out;
 879
 880                 timeline_rollback(tl);
 881                 timeline_rollback(tl);
 882                 WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
 883
 884                 for (i = 0; i < ARRAY_SIZE(rq); i++) {
 885                         struct i915_request *this;
 886
 887                         this = intel_context_create_request(ce);
 888                         if (IS_ERR(this)) {
 889                                 err = PTR_ERR(this);
 890                                 goto out;
 891                         }
 892
 893                         pr_debug("%s: create fence.seqnp:%d\n",
 894                                  engine->name,
 895                                  lower_32_bits(this->fence.seqno));
 896
 897                         GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl);
 898
 899                         rq[i] = i915_request_get(this);
 900                         i915_request_add(this);
 901                 }
 902
 903                 /* We expected a wrap! */
 904                 GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno);
 905
 906                 if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
 907                         pr_err("Wait for timeline wrap timed out!\n");
 908                         err = -EIO;
 909                         goto out;
 910                 }
 911
 912                 for (i = 0; i < ARRAY_SIZE(rq); i++) {
 913                         if (!i915_request_completed(rq[i])) {
 914                                 pr_err("Pre-wrap request not completed!\n");
 915                                 err = -EINVAL;
 916                                 goto out;
 917                         }
 918                 }
 919
 920 out:
 921                 for (i = 0; i < ARRAY_SIZE(rq); i++)
 922                         i915_request_put(rq[i]);
 923                 intel_context_put(ce);
 924                 if (err)
 925                         break;
 926         }
 927
 928         if (igt_flush_test(gt->i915))
 929                 err = -EIO;
 930
 931         return err;
 932 }
 933
 934 static int live_hwsp_recycle(void *arg)
 935 {
 936         struct intel_gt *gt = arg;
 937         struct intel_engine_cs *engine;
 938         enum intel_engine_id id;
 939         unsigned long count;
 940         int err = 0;
 941
 942         /*
 943          * Check seqno writes into one timeline at a time. We expect to
 944          * recycle the breadcrumb slot between iterations and neither
 945          * want to confuse ourselves or the GPU.
 946          */
 947
 948         count = 0;
 949         for_each_engine(engine, gt, id) {
 950                 IGT_TIMEOUT(end_time);
 951
 952                 if (!intel_engine_can_store_dword(engine))
 953                         continue;
 954
 955                 intel_engine_pm_get(engine);
 956
 957                 do {
 958                         struct intel_timeline *tl;
 959                         struct i915_request *rq;
 960
 961                         tl = checked_intel_timeline_create(gt);
 962                         if (IS_ERR(tl)) {
 963                                 err = PTR_ERR(tl);
 964                                 break;
 965                         }
 966
 967                         rq = tl_write(tl, engine, count);
 968                         if (IS_ERR(rq)) {
 969                                 intel_timeline_put(tl);
 970                                 err = PTR_ERR(rq);
 971                                 break;
 972                         }
 973
 974                         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 975                                 pr_err("Wait for timeline writes timed out!\n");
 976                                 i915_request_put(rq);
 977                                 intel_timeline_put(tl);
 978                                 err = -EIO;
 979                                 break;
 980                         }
 981
 982                         if (*tl->hwsp_seqno != count) {
 983                                 pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
 984                                        count, *tl->hwsp_seqno);
 985                                 err = -EINVAL;
 986                         }
 987
 988                         i915_request_put(rq);
 989                         intel_timeline_put(tl);
 990                         count++;
 991
 992                         if (err)
 993                                 break;
 994                 } while (!__igt_timeout(end_time, NULL));
 995
 996                 intel_engine_pm_put(engine);
 997                 if (err)
 998                         break;
 999         }
1000
1001         return err;
1002 }
1003
1004 int intel_timeline_live_selftests(struct drm_i915_private *i915)
1005 {
1006         static const struct i915_subtest tests[] = {
1007                 SUBTEST(live_hwsp_recycle),
1008                 SUBTEST(live_hwsp_engine),
1009                 SUBTEST(live_hwsp_alternate),
1010                 SUBTEST(live_hwsp_wrap),
1011                 SUBTEST(live_hwsp_rollover_kernel),
1012                 SUBTEST(live_hwsp_rollover_user),
1013         };
1014
1015         if (intel_gt_is_wedged(&i915->gt))
1016                 return 0;
1017
1018         return intel_gt_live_subtests(tests, &i915->gt);
1019 }