drivers/gpu/drm/i915/selftests/i915_timeline.c

   1 /*
   2  * SPDX-License-Identifier: MIT
   3  *
   4  * Copyright © 2017-2018 Intel Corporation
   5  */
   6
   7 #include <linux/prime_numbers.h>
   8
   9 #include "../i915_selftest.h"
  10 #include "i915_random.h"
  11
  12 #include "igt_flush_test.h"
  13 #include "mock_gem_device.h"
  14 #include "mock_timeline.h"
  15
  16 static struct page *hwsp_page(struct i915_timeline *tl)
  17 {
  18         struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj;
  19
  20         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
  21         return sg_page(obj->mm.pages->sgl);
  22 }
  23
  24 static unsigned long hwsp_cacheline(struct i915_timeline *tl)
  25 {
  26         unsigned long address = (unsigned long)page_address(hwsp_page(tl));
  27
  28         return (address + tl->hwsp_offset) / CACHELINE_BYTES;
  29 }
  30
  31 #define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES)
  32
  33 struct mock_hwsp_freelist {
  34         struct drm_i915_private *i915;
  35         struct radix_tree_root cachelines;
  36         struct i915_timeline **history;
  37         unsigned long count, max;
  38         struct rnd_state prng;
  39 };
  40
  41 enum {
  42         SHUFFLE = BIT(0),
  43 };
  44
  45 static void __mock_hwsp_record(struct mock_hwsp_freelist *state,
  46                                unsigned int idx,
  47                                struct i915_timeline *tl)
  48 {
  49         tl = xchg(&state->history[idx], tl);
  50         if (tl) {
  51                 radix_tree_delete(&state->cachelines, hwsp_cacheline(tl));
  52                 i915_timeline_put(tl);
  53         }
  54 }
  55
  56 static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
  57                                 unsigned int count,
  58                                 unsigned int flags)
  59 {
  60         struct i915_timeline *tl;
  61         unsigned int idx;
  62
  63         while (count--) {
  64                 unsigned long cacheline;
  65                 int err;
  66
  67                 tl = i915_timeline_create(state->i915, NULL);
  68                 if (IS_ERR(tl))
  69                         return PTR_ERR(tl);
  70
  71                 cacheline = hwsp_cacheline(tl);
  72                 err = radix_tree_insert(&state->cachelines, cacheline, tl);
  73                 if (err) {
  74                         if (err == -EEXIST) {
  75                                 pr_err("HWSP cacheline %lu already used; duplicate allocation!\n",
  76                                        cacheline);
  77                         }
  78                         i915_timeline_put(tl);
  79                         return err;
  80                 }
  81
  82                 idx = state->count++ % state->max;
  83                 __mock_hwsp_record(state, idx, tl);
  84         }
  85
  86         if (flags & SHUFFLE)
  87                 i915_prandom_shuffle(state->history,
  88                                      sizeof(*state->history),
  89                                      min(state->count, state->max),
  90                                      &state->prng);
  91
  92         count = i915_prandom_u32_max_state(min(state->count, state->max),
  93                                            &state->prng);
  94         while (count--) {
  95                 idx = --state->count % state->max;
  96                 __mock_hwsp_record(state, idx, NULL);
  97         }
  98
  99         return 0;
 100 }
 101
 102 static int mock_hwsp_freelist(void *arg)
 103 {
 104         struct mock_hwsp_freelist state;
 105         const struct {
 106                 const char *name;
 107                 unsigned int flags;
 108         } phases[] = {
 109                 { "linear", 0 },
 110                 { "shuffled", SHUFFLE },
 111                 { },
 112         }, *p;
 113         unsigned int na;
 114         int err = 0;
 115
 116         INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL);
 117         state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed);
 118
 119         state.i915 = mock_gem_device();
 120         if (!state.i915)
 121                 return -ENOMEM;
 122
 123         /*
 124          * Create a bunch of timelines and check that their HWSP do not overlap.
 125          * Free some, and try again.
 126          */
 127
 128         state.max = PAGE_SIZE / sizeof(*state.history);
 129         state.count = 0;
 130         state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL);
 131         if (!state.history) {
 132                 err = -ENOMEM;
 133                 goto err_put;
 134         }
 135
 136         mutex_lock(&state.i915->drm.struct_mutex);
 137         for (p = phases; p->name; p++) {
 138                 pr_debug("%s(%s)\n", __func__, p->name);
 139                 for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) {
 140                         err = __mock_hwsp_timeline(&state, na, p->flags);
 141                         if (err)
 142                                 goto out;
 143                 }
 144         }
 145
 146 out:
 147         for (na = 0; na < state.max; na++)
 148                 __mock_hwsp_record(&state, na, NULL);
 149         mutex_unlock(&state.i915->drm.struct_mutex);
 150         kfree(state.history);
 151 err_put:
 152         drm_dev_put(&state.i915->drm);
 153         return err;
 154 }
 155
 156 struct __igt_sync {
 157         const char *name;
 158         u32 seqno;
 159         bool expected;
 160         bool set;
 161 };
 162
 163 static int __igt_sync(struct i915_timeline *tl,
 164                       u64 ctx,
 165                       const struct __igt_sync *p,
 166                       const char *name)
 167 {
 168         int ret;
 169
 170         if (__i915_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
 171                 pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
 172                        name, p->name, ctx, p->seqno, yesno(p->expected));
 173                 return -EINVAL;
 174         }
 175
 176         if (p->set) {
 177                 ret = __i915_timeline_sync_set(tl, ctx, p->seqno);
 178                 if (ret)
 179                         return ret;
 180         }
 181
 182         return 0;
 183 }
 184
 185 static int igt_sync(void *arg)
 186 {
 187         const struct __igt_sync pass[] = {
 188                 { "unset", 0, false, false },
 189                 { "new", 0, false, true },
 190                 { "0a", 0, true, true },
 191                 { "1a", 1, false, true },
 192                 { "1b", 1, true, true },
 193                 { "0b", 0, true, false },
 194                 { "2a", 2, false, true },
 195                 { "4", 4, false, true },
 196                 { "INT_MAX", INT_MAX, false, true },
 197                 { "INT_MAX-1", INT_MAX-1, true, false },
 198                 { "INT_MAX+1", (u32)INT_MAX+1, false, true },
 199                 { "INT_MAX", INT_MAX, true, false },
 200                 { "UINT_MAX", UINT_MAX, false, true },
 201                 { "wrap", 0, false, true },
 202                 { "unwrap", UINT_MAX, true, false },
 203                 {},
 204         }, *p;
 205         struct i915_timeline tl;
 206         int order, offset;
 207         int ret = -ENODEV;
 208
 209         mock_timeline_init(&tl, 0);
 210         for (p = pass; p->name; p++) {
 211                 for (order = 1; order < 64; order++) {
 212                         for (offset = -1; offset <= (order > 1); offset++) {
 213                                 u64 ctx = BIT_ULL(order) + offset;
 214
 215                                 ret = __igt_sync(&tl, ctx, p, "1");
 216                                 if (ret)
 217                                         goto out;
 218                         }
 219                 }
 220         }
 221         mock_timeline_fini(&tl);
 222
 223         mock_timeline_init(&tl, 0);
 224         for (order = 1; order < 64; order++) {
 225                 for (offset = -1; offset <= (order > 1); offset++) {
 226                         u64 ctx = BIT_ULL(order) + offset;
 227
 228                         for (p = pass; p->name; p++) {
 229                                 ret = __igt_sync(&tl, ctx, p, "2");
 230                                 if (ret)
 231                                         goto out;
 232                         }
 233                 }
 234         }
 235
 236 out:
 237         mock_timeline_fini(&tl);
 238         return ret;
 239 }
 240
 241 static unsigned int random_engine(struct rnd_state *rnd)
 242 {
 243         return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd);
 244 }
 245
 246 static int bench_sync(void *arg)
 247 {
 248         struct rnd_state prng;
 249         struct i915_timeline tl;
 250         unsigned long end_time, count;
 251         u64 prng32_1M;
 252         ktime_t kt;
 253         int order, last_order;
 254
 255         mock_timeline_init(&tl, 0);
 256
 257         /* Lookups from cache are very fast and so the random number generation
 258          * and the loop itself becomes a significant factor in the per-iteration
 259          * timings. We try to compensate the results by measuring the overhead
 260          * of the prng and subtract it from the reported results.
 261          */
 262         prandom_seed_state(&prng, i915_selftest.random_seed);
 263         count = 0;
 264         kt = ktime_get();
 265         end_time = jiffies + HZ/10;
 266         do {
 267                 u32 x;
 268
 269                 /* Make sure the compiler doesn't optimise away the prng call */
 270                 WRITE_ONCE(x, prandom_u32_state(&prng));
 271
 272                 count++;
 273         } while (!time_after(jiffies, end_time));
 274         kt = ktime_sub(ktime_get(), kt);
 275         pr_debug("%s: %lu random evaluations, %lluns/prng\n",
 276                  __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 277         prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
 278
 279         /* Benchmark (only) setting random context ids */
 280         prandom_seed_state(&prng, i915_selftest.random_seed);
 281         count = 0;
 282         kt = ktime_get();
 283         end_time = jiffies + HZ/10;
 284         do {
 285                 u64 id = i915_prandom_u64_state(&prng);
 286
 287                 __i915_timeline_sync_set(&tl, id, 0);
 288                 count++;
 289         } while (!time_after(jiffies, end_time));
 290         kt = ktime_sub(ktime_get(), kt);
 291         kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
 292         pr_info("%s: %lu random insertions, %lluns/insert\n",
 293                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 294
 295         /* Benchmark looking up the exact same context ids as we just set */
 296         prandom_seed_state(&prng, i915_selftest.random_seed);
 297         end_time = count;
 298         kt = ktime_get();
 299         while (end_time--) {
 300                 u64 id = i915_prandom_u64_state(&prng);
 301
 302                 if (!__i915_timeline_sync_is_later(&tl, id, 0)) {
 303                         mock_timeline_fini(&tl);
 304                         pr_err("Lookup of %llu failed\n", id);
 305                         return -EINVAL;
 306                 }
 307         }
 308         kt = ktime_sub(ktime_get(), kt);
 309         kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
 310         pr_info("%s: %lu random lookups, %lluns/lookup\n",
 311                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 312
 313         mock_timeline_fini(&tl);
 314         cond_resched();
 315
 316         mock_timeline_init(&tl, 0);
 317
 318         /* Benchmark setting the first N (in order) contexts */
 319         count = 0;
 320         kt = ktime_get();
 321         end_time = jiffies + HZ/10;
 322         do {
 323                 __i915_timeline_sync_set(&tl, count++, 0);
 324         } while (!time_after(jiffies, end_time));
 325         kt = ktime_sub(ktime_get(), kt);
 326         pr_info("%s: %lu in-order insertions, %lluns/insert\n",
 327                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 328
 329         /* Benchmark looking up the exact same context ids as we just set */
 330         end_time = count;
 331         kt = ktime_get();
 332         while (end_time--) {
 333                 if (!__i915_timeline_sync_is_later(&tl, end_time, 0)) {
 334                         pr_err("Lookup of %lu failed\n", end_time);
 335                         mock_timeline_fini(&tl);
 336                         return -EINVAL;
 337                 }
 338         }
 339         kt = ktime_sub(ktime_get(), kt);
 340         pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
 341                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 342
 343         mock_timeline_fini(&tl);
 344         cond_resched();
 345
 346         mock_timeline_init(&tl, 0);
 347
 348         /* Benchmark searching for a random context id and maybe changing it */
 349         prandom_seed_state(&prng, i915_selftest.random_seed);
 350         count = 0;
 351         kt = ktime_get();
 352         end_time = jiffies + HZ/10;
 353         do {
 354                 u32 id = random_engine(&prng);
 355                 u32 seqno = prandom_u32_state(&prng);
 356
 357                 if (!__i915_timeline_sync_is_later(&tl, id, seqno))
 358                         __i915_timeline_sync_set(&tl, id, seqno);
 359
 360                 count++;
 361         } while (!time_after(jiffies, end_time));
 362         kt = ktime_sub(ktime_get(), kt);
 363         kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
 364         pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
 365                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 366         mock_timeline_fini(&tl);
 367         cond_resched();
 368
 369         /* Benchmark searching for a known context id and changing the seqno */
 370         for (last_order = 1, order = 1; order < 32;
 371              ({ int tmp = last_order; last_order = order; order += tmp; })) {
 372                 unsigned int mask = BIT(order) - 1;
 373
 374                 mock_timeline_init(&tl, 0);
 375
 376                 count = 0;
 377                 kt = ktime_get();
 378                 end_time = jiffies + HZ/10;
 379                 do {
 380                         /* Without assuming too many details of the underlying
 381                          * implementation, try to identify its phase-changes
 382                          * (if any)!
 383                          */
 384                         u64 id = (u64)(count & mask) << order;
 385
 386                         __i915_timeline_sync_is_later(&tl, id, 0);
 387                         __i915_timeline_sync_set(&tl, id, 0);
 388
 389                         count++;
 390                 } while (!time_after(jiffies, end_time));
 391                 kt = ktime_sub(ktime_get(), kt);
 392                 pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
 393                         __func__, count, order,
 394                         (long long)div64_ul(ktime_to_ns(kt), count));
 395                 mock_timeline_fini(&tl);
 396                 cond_resched();
 397         }
 398
 399         return 0;
 400 }
 401
 402 int i915_timeline_mock_selftests(void)
 403 {
 404         static const struct i915_subtest tests[] = {
 405                 SUBTEST(mock_hwsp_freelist),
 406                 SUBTEST(igt_sync),
 407                 SUBTEST(bench_sync),
 408         };
 409
 410         return i915_subtests(tests, NULL);
 411 }
 412
 413 static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
 414 {
 415         u32 *cs;
 416
 417         cs = intel_ring_begin(rq, 4);
 418         if (IS_ERR(cs))
 419                 return PTR_ERR(cs);
 420
 421         if (INTEL_GEN(rq->i915) >= 8) {
 422                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 423                 *cs++ = addr;
 424                 *cs++ = 0;
 425                 *cs++ = value;
 426         } else if (INTEL_GEN(rq->i915) >= 4) {
 427                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 428                 *cs++ = 0;
 429                 *cs++ = addr;
 430                 *cs++ = value;
 431         } else {
 432                 *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
 433                 *cs++ = addr;
 434                 *cs++ = value;
 435                 *cs++ = MI_NOOP;
 436         }
 437
 438         intel_ring_advance(rq, cs);
 439
 440         return 0;
 441 }
 442
 443 static struct i915_request *
 444 tl_write(struct i915_timeline *tl, struct intel_engine_cs *engine, u32 value)
 445 {
 446         struct i915_request *rq;
 447         int err;
 448
 449         lockdep_assert_held(&tl->i915->drm.struct_mutex); /* lazy rq refs */
 450
 451         err = i915_timeline_pin(tl);
 452         if (err) {
 453                 rq = ERR_PTR(err);
 454                 goto out;
 455         }
 456
 457         rq = i915_request_alloc(engine, engine->i915->kernel_context);
 458         if (IS_ERR(rq))
 459                 goto out_unpin;
 460
 461         err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value);
 462         i915_request_add(rq);
 463         if (err)
 464                 rq = ERR_PTR(err);
 465
 466 out_unpin:
 467         i915_timeline_unpin(tl);
 468 out:
 469         if (IS_ERR(rq))
 470                 pr_err("Failed to write to timeline!\n");
 471         return rq;
 472 }
 473
 474 static struct i915_timeline *
 475 checked_i915_timeline_create(struct drm_i915_private *i915)
 476 {
 477         struct i915_timeline *tl;
 478
 479         tl = i915_timeline_create(i915, NULL);
 480         if (IS_ERR(tl))
 481                 return tl;
 482
 483         if (*tl->hwsp_seqno != tl->seqno) {
 484                 pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n",
 485                        *tl->hwsp_seqno, tl->seqno);
 486                 i915_timeline_put(tl);
 487                 return ERR_PTR(-EINVAL);
 488         }
 489
 490         return tl;
 491 }
 492
 493 static int live_hwsp_engine(void *arg)
 494 {
 495 #define NUM_TIMELINES 4096
 496         struct drm_i915_private *i915 = arg;
 497         struct i915_timeline **timelines;
 498         struct intel_engine_cs *engine;
 499         enum intel_engine_id id;
 500         intel_wakeref_t wakeref;
 501         unsigned long count, n;
 502         int err = 0;
 503
 504         /*
 505          * Create a bunch of timelines and check we can write
 506          * independently to each of their breadcrumb slots.
 507          */
 508
 509         timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
 510                                    sizeof(*timelines),
 511                                    GFP_KERNEL);
 512         if (!timelines)
 513                 return -ENOMEM;
 514
 515         mutex_lock(&i915->drm.struct_mutex);
 516         wakeref = intel_runtime_pm_get(i915);
 517
 518         count = 0;
 519         for_each_engine(engine, i915, id) {
 520                 if (!intel_engine_can_store_dword(engine))
 521                         continue;
 522
 523                 for (n = 0; n < NUM_TIMELINES; n++) {
 524                         struct i915_timeline *tl;
 525                         struct i915_request *rq;
 526
 527                         tl = checked_i915_timeline_create(i915);
 528                         if (IS_ERR(tl)) {
 529                                 err = PTR_ERR(tl);
 530                                 goto out;
 531                         }
 532
 533                         rq = tl_write(tl, engine, count);
 534                         if (IS_ERR(rq)) {
 535                                 i915_timeline_put(tl);
 536                                 err = PTR_ERR(rq);
 537                                 goto out;
 538                         }
 539
 540                         timelines[count++] = tl;
 541                 }
 542         }
 543
 544 out:
 545         if (igt_flush_test(i915, I915_WAIT_LOCKED))
 546                 err = -EIO;
 547
 548         for (n = 0; n < count; n++) {
 549                 struct i915_timeline *tl = timelines[n];
 550
 551                 if (!err && *tl->hwsp_seqno != n) {
 552                         pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
 553                                n, *tl->hwsp_seqno);
 554                         err = -EINVAL;
 555                 }
 556                 i915_timeline_put(tl);
 557         }
 558
 559         intel_runtime_pm_put(i915, wakeref);
 560         mutex_unlock(&i915->drm.struct_mutex);
 561
 562         kvfree(timelines);
 563
 564         return err;
 565 #undef NUM_TIMELINES
 566 }
 567
 568 static int live_hwsp_alternate(void *arg)
 569 {
 570 #define NUM_TIMELINES 4096
 571         struct drm_i915_private *i915 = arg;
 572         struct i915_timeline **timelines;
 573         struct intel_engine_cs *engine;
 574         enum intel_engine_id id;
 575         intel_wakeref_t wakeref;
 576         unsigned long count, n;
 577         int err = 0;
 578
 579         /*
 580          * Create a bunch of timelines and check we can write
 581          * independently to each of their breadcrumb slots with adjacent
 582          * engines.
 583          */
 584
 585         timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
 586                                    sizeof(*timelines),
 587                                    GFP_KERNEL);
 588         if (!timelines)
 589                 return -ENOMEM;
 590
 591         mutex_lock(&i915->drm.struct_mutex);
 592         wakeref = intel_runtime_pm_get(i915);
 593
 594         count = 0;
 595         for (n = 0; n < NUM_TIMELINES; n++) {
 596                 for_each_engine(engine, i915, id) {
 597                         struct i915_timeline *tl;
 598                         struct i915_request *rq;
 599
 600                         if (!intel_engine_can_store_dword(engine))
 601                                 continue;
 602
 603                         tl = checked_i915_timeline_create(i915);
 604                         if (IS_ERR(tl)) {
 605                                 err = PTR_ERR(tl);
 606                                 goto out;
 607                         }
 608
 609                         rq = tl_write(tl, engine, count);
 610                         if (IS_ERR(rq)) {
 611                                 i915_timeline_put(tl);
 612                                 err = PTR_ERR(rq);
 613                                 goto out;
 614                         }
 615
 616                         timelines[count++] = tl;
 617                 }
 618         }
 619
 620 out:
 621         if (igt_flush_test(i915, I915_WAIT_LOCKED))
 622                 err = -EIO;
 623
 624         for (n = 0; n < count; n++) {
 625                 struct i915_timeline *tl = timelines[n];
 626
 627                 if (!err && *tl->hwsp_seqno != n) {
 628                         pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
 629                                n, *tl->hwsp_seqno);
 630                         err = -EINVAL;
 631                 }
 632                 i915_timeline_put(tl);
 633         }
 634
 635         intel_runtime_pm_put(i915, wakeref);
 636         mutex_unlock(&i915->drm.struct_mutex);
 637
 638         kvfree(timelines);
 639
 640         return err;
 641 #undef NUM_TIMELINES
 642 }
 643
 644 static int live_hwsp_wrap(void *arg)
 645 {
 646         struct drm_i915_private *i915 = arg;
 647         struct intel_engine_cs *engine;
 648         struct i915_timeline *tl;
 649         enum intel_engine_id id;
 650         intel_wakeref_t wakeref;
 651         int err = 0;
 652
 653         /*
 654          * Across a seqno wrap, we need to keep the old cacheline alive for
 655          * foreign GPU references.
 656          */
 657
 658         mutex_lock(&i915->drm.struct_mutex);
 659         wakeref = intel_runtime_pm_get(i915);
 660
 661         tl = i915_timeline_create(i915, NULL);
 662         if (IS_ERR(tl)) {
 663                 err = PTR_ERR(tl);
 664                 goto out_rpm;
 665         }
 666         if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
 667                 goto out_free;
 668
 669         err = i915_timeline_pin(tl);
 670         if (err)
 671                 goto out_free;
 672
 673         for_each_engine(engine, i915, id) {
 674                 const u32 *hwsp_seqno[2];
 675                 struct i915_request *rq;
 676                 u32 seqno[2];
 677
 678                 if (!intel_engine_can_store_dword(engine))
 679                         continue;
 680
 681                 rq = i915_request_alloc(engine, i915->kernel_context);
 682                 if (IS_ERR(rq)) {
 683                         err = PTR_ERR(rq);
 684                         goto out;
 685                 }
 686
 687                 tl->seqno = -4u;
 688
 689                 err = i915_timeline_get_seqno(tl, rq, &seqno[0]);
 690                 if (err) {
 691                         i915_request_add(rq);
 692                         goto out;
 693                 }
 694                 pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n",
 695                          seqno[0], tl->hwsp_offset);
 696
 697                 err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]);
 698                 if (err) {
 699                         i915_request_add(rq);
 700                         goto out;
 701                 }
 702                 hwsp_seqno[0] = tl->hwsp_seqno;
 703
 704                 err = i915_timeline_get_seqno(tl, rq, &seqno[1]);
 705                 if (err) {
 706                         i915_request_add(rq);
 707                         goto out;
 708                 }
 709                 pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n",
 710                          seqno[1], tl->hwsp_offset);
 711
 712                 err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]);
 713                 if (err) {
 714                         i915_request_add(rq);
 715                         goto out;
 716                 }
 717                 hwsp_seqno[1] = tl->hwsp_seqno;
 718
 719                 /* With wrap should come a new hwsp */
 720                 GEM_BUG_ON(seqno[1] >= seqno[0]);
 721                 GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]);
 722
 723                 i915_request_add(rq);
 724
 725                 if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) {
 726                         pr_err("Wait for timeline writes timed out!\n");
 727                         err = -EIO;
 728                         goto out;
 729                 }
 730
 731                 if (*hwsp_seqno[0] != seqno[0] || *hwsp_seqno[1] != seqno[1]) {
 732                         pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n",
 733                                *hwsp_seqno[0], *hwsp_seqno[1],
 734                                seqno[0], seqno[1]);
 735                         err = -EINVAL;
 736                         goto out;
 737                 }
 738
 739                 i915_retire_requests(i915); /* recycle HWSP */
 740         }
 741
 742 out:
 743         if (igt_flush_test(i915, I915_WAIT_LOCKED))
 744                 err = -EIO;
 745
 746         i915_timeline_unpin(tl);
 747 out_free:
 748         i915_timeline_put(tl);
 749 out_rpm:
 750         intel_runtime_pm_put(i915, wakeref);
 751         mutex_unlock(&i915->drm.struct_mutex);
 752
 753         return err;
 754 }
 755
 756 static int live_hwsp_recycle(void *arg)
 757 {
 758         struct drm_i915_private *i915 = arg;
 759         struct intel_engine_cs *engine;
 760         enum intel_engine_id id;
 761         intel_wakeref_t wakeref;
 762         unsigned long count;
 763         int err = 0;
 764
 765         /*
 766          * Check seqno writes into one timeline at a time. We expect to
 767          * recycle the breadcrumb slot between iterations and neither
 768          * want to confuse ourselves or the GPU.
 769          */
 770
 771         mutex_lock(&i915->drm.struct_mutex);
 772         wakeref = intel_runtime_pm_get(i915);
 773
 774         count = 0;
 775         for_each_engine(engine, i915, id) {
 776                 IGT_TIMEOUT(end_time);
 777
 778                 if (!intel_engine_can_store_dword(engine))
 779                         continue;
 780
 781                 do {
 782                         struct i915_timeline *tl;
 783                         struct i915_request *rq;
 784
 785                         tl = checked_i915_timeline_create(i915);
 786                         if (IS_ERR(tl)) {
 787                                 err = PTR_ERR(tl);
 788                                 goto out;
 789                         }
 790
 791                         rq = tl_write(tl, engine, count);
 792                         if (IS_ERR(rq)) {
 793                                 i915_timeline_put(tl);
 794                                 err = PTR_ERR(rq);
 795                                 goto out;
 796                         }
 797
 798                         if (i915_request_wait(rq,
 799                                               I915_WAIT_LOCKED,
 800                                               HZ / 5) < 0) {
 801                                 pr_err("Wait for timeline writes timed out!\n");
 802                                 i915_timeline_put(tl);
 803                                 err = -EIO;
 804                                 goto out;
 805                         }
 806
 807                         if (*tl->hwsp_seqno != count) {
 808                                 pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
 809                                        count, *tl->hwsp_seqno);
 810                                 err = -EINVAL;
 811                         }
 812
 813                         i915_timeline_put(tl);
 814                         count++;
 815
 816                         if (err)
 817                                 goto out;
 818
 819                         i915_timelines_park(i915); /* Encourage recycling! */
 820                 } while (!__igt_timeout(end_time, NULL));
 821         }
 822
 823 out:
 824         if (igt_flush_test(i915, I915_WAIT_LOCKED))
 825                 err = -EIO;
 826         intel_runtime_pm_put(i915, wakeref);
 827         mutex_unlock(&i915->drm.struct_mutex);
 828
 829         return err;
 830 }
 831
 832 int i915_timeline_live_selftests(struct drm_i915_private *i915)
 833 {
 834         static const struct i915_subtest tests[] = {
 835                 SUBTEST(live_hwsp_recycle),
 836                 SUBTEST(live_hwsp_engine),
 837                 SUBTEST(live_hwsp_alternate),
 838                 SUBTEST(live_hwsp_wrap),
 839         };
 840
 841         if (i915_terminally_wedged(i915))
 842                 return 0;
 843
 844         return i915_subtests(tests, i915);
 845 }