drivers/gpu/drm/i915/gt/intel_timeline.c

   1 // SPDX-License-Identifier: MIT
   2 /*
   3  * Copyright © 2016-2018 Intel Corporation
   4  */
   5
   6 #include "i915_drv.h"
   7
   8 #include "i915_active.h"
   9 #include "i915_syncmap.h"
  10 #include "intel_gt.h"
  11 #include "intel_ring.h"
  12 #include "intel_timeline.h"
  13
  14 #define TIMELINE_SEQNO_BYTES 8
  15
  16 static struct i915_vma *hwsp_alloc(struct intel_gt *gt)
  17 {
  18         struct drm_i915_private *i915 = gt->i915;
  19         struct drm_i915_gem_object *obj;
  20         struct i915_vma *vma;
  21
  22         obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
  23         if (IS_ERR(obj))
  24                 return ERR_CAST(obj);
  25
  26         i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
  27
  28         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
  29         if (IS_ERR(vma))
  30                 i915_gem_object_put(obj);
  31
  32         return vma;
  33 }
  34
  35 static void __timeline_retire(struct i915_active *active)
  36 {
  37         struct intel_timeline *tl =
  38                 container_of(active, typeof(*tl), active);
  39
  40         i915_vma_unpin(tl->hwsp_ggtt);
  41         intel_timeline_put(tl);
  42 }
  43
  44 static int __timeline_active(struct i915_active *active)
  45 {
  46         struct intel_timeline *tl =
  47                 container_of(active, typeof(*tl), active);
  48
  49         __i915_vma_pin(tl->hwsp_ggtt);
  50         intel_timeline_get(tl);
  51         return 0;
  52 }
  53
  54 I915_SELFTEST_EXPORT int
  55 intel_timeline_pin_map(struct intel_timeline *timeline)
  56 {
  57         struct drm_i915_gem_object *obj = timeline->hwsp_ggtt->obj;
  58         u32 ofs = offset_in_page(timeline->hwsp_offset);
  59         void *vaddr;
  60
  61         vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
  62         if (IS_ERR(vaddr))
  63                 return PTR_ERR(vaddr);
  64
  65         timeline->hwsp_map = vaddr;
  66         timeline->hwsp_seqno = memset(vaddr + ofs, 0, TIMELINE_SEQNO_BYTES);
  67         clflush(vaddr + ofs);
  68
  69         return 0;
  70 }
  71
  72 static int intel_timeline_init(struct intel_timeline *timeline,
  73                                struct intel_gt *gt,
  74                                struct i915_vma *hwsp,
  75                                unsigned int offset)
  76 {
  77         kref_init(&timeline->kref);
  78         atomic_set(&timeline->pin_count, 0);
  79
  80         timeline->gt = gt;
  81
  82         if (hwsp) {
  83                 timeline->hwsp_offset = offset;
  84                 timeline->hwsp_ggtt = i915_vma_get(hwsp);
  85         } else {
  86                 timeline->has_initial_breadcrumb = true;
  87                 hwsp = hwsp_alloc(gt);
  88                 if (IS_ERR(hwsp))
  89                         return PTR_ERR(hwsp);
  90                 timeline->hwsp_ggtt = hwsp;
  91         }
  92
  93         timeline->hwsp_map = NULL;
  94         timeline->hwsp_seqno = (void *)(long)timeline->hwsp_offset;
  95
  96         GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size);
  97
  98         timeline->fence_context = dma_fence_context_alloc(1);
  99
 100         mutex_init(&timeline->mutex);
 101
 102         INIT_ACTIVE_FENCE(&timeline->last_request);
 103         INIT_LIST_HEAD(&timeline->requests);
 104
 105         i915_syncmap_init(&timeline->sync);
 106         i915_active_init(&timeline->active, __timeline_active,
 107                          __timeline_retire, 0);
 108
 109         return 0;
 110 }
 111
 112 void intel_gt_init_timelines(struct intel_gt *gt)
 113 {
 114         struct intel_gt_timelines *timelines = &gt->timelines;
 115
 116         spin_lock_init(&timelines->lock);
 117         INIT_LIST_HEAD(&timelines->active_list);
 118 }
 119
 120 static void intel_timeline_fini(struct rcu_head *rcu)
 121 {
 122         struct intel_timeline *timeline =
 123                 container_of(rcu, struct intel_timeline, rcu);
 124
 125         if (timeline->hwsp_map)
 126                 i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
 127
 128         i915_vma_put(timeline->hwsp_ggtt);
 129         i915_active_fini(&timeline->active);
 130
 131         /*
 132          * A small race exists between intel_gt_retire_requests_timeout and
 133          * intel_timeline_exit which could result in the syncmap not getting
 134          * free'd. Rather than work to hard to seal this race, simply cleanup
 135          * the syncmap on fini.
 136          */
 137         i915_syncmap_free(&timeline->sync);
 138
 139         kfree(timeline);
 140 }
 141
 142 struct intel_timeline *
 143 __intel_timeline_create(struct intel_gt *gt,
 144                         struct i915_vma *global_hwsp,
 145                         unsigned int offset)
 146 {
 147         struct intel_timeline *timeline;
 148         int err;
 149
 150         timeline = kzalloc(sizeof(*timeline), GFP_KERNEL);
 151         if (!timeline)
 152                 return ERR_PTR(-ENOMEM);
 153
 154         err = intel_timeline_init(timeline, gt, global_hwsp, offset);
 155         if (err) {
 156                 kfree(timeline);
 157                 return ERR_PTR(err);
 158         }
 159
 160         return timeline;
 161 }
 162
 163 struct intel_timeline *
 164 intel_timeline_create_from_engine(struct intel_engine_cs *engine,
 165                                   unsigned int offset)
 166 {
 167         struct i915_vma *hwsp = engine->status_page.vma;
 168         struct intel_timeline *tl;
 169
 170         tl = __intel_timeline_create(engine->gt, hwsp, offset);
 171         if (IS_ERR(tl))
 172                 return tl;
 173
 174         /* Borrow a nearby lock; we only create these timelines during init */
 175         mutex_lock(&hwsp->vm->mutex);
 176         list_add_tail(&tl->engine_link, &engine->status_page.timelines);
 177         mutex_unlock(&hwsp->vm->mutex);
 178
 179         return tl;
 180 }
 181
 182 void __intel_timeline_pin(struct intel_timeline *tl)
 183 {
 184         GEM_BUG_ON(!atomic_read(&tl->pin_count));
 185         atomic_inc(&tl->pin_count);
 186 }
 187
 188 int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww)
 189 {
 190         int err;
 191
 192         if (atomic_add_unless(&tl->pin_count, 1, 0))
 193                 return 0;
 194
 195         if (!tl->hwsp_map) {
 196                 err = intel_timeline_pin_map(tl);
 197                 if (err)
 198                         return err;
 199         }
 200
 201         err = i915_ggtt_pin(tl->hwsp_ggtt, ww, 0, PIN_HIGH);
 202         if (err)
 203                 return err;
 204
 205         tl->hwsp_offset =
 206                 i915_ggtt_offset(tl->hwsp_ggtt) +
 207                 offset_in_page(tl->hwsp_offset);
 208         GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
 209                  tl->fence_context, tl->hwsp_offset);
 210
 211         i915_active_acquire(&tl->active);
 212         if (atomic_fetch_inc(&tl->pin_count)) {
 213                 i915_active_release(&tl->active);
 214                 __i915_vma_unpin(tl->hwsp_ggtt);
 215         }
 216
 217         return 0;
 218 }
 219
 220 void intel_timeline_reset_seqno(const struct intel_timeline *tl)
 221 {
 222         u32 *hwsp_seqno = (u32 *)tl->hwsp_seqno;
 223         /* Must be pinned to be writable, and no requests in flight. */
 224         GEM_BUG_ON(!atomic_read(&tl->pin_count));
 225
 226         memset(hwsp_seqno + 1, 0, TIMELINE_SEQNO_BYTES - sizeof(*hwsp_seqno));
 227         WRITE_ONCE(*hwsp_seqno, tl->seqno);
 228         clflush(hwsp_seqno);
 229 }
 230
 231 void intel_timeline_enter(struct intel_timeline *tl)
 232 {
 233         struct intel_gt_timelines *timelines = &tl->gt->timelines;
 234
 235         /*
 236          * Pretend we are serialised by the timeline->mutex.
 237          *
 238          * While generally true, there are a few exceptions to the rule
 239          * for the engine->kernel_context being used to manage power
 240          * transitions. As the engine_park may be called from under any
 241          * timeline, it uses the power mutex as a global serialisation
 242          * lock to prevent any other request entering its timeline.
 243          *
 244          * The rule is generally tl->mutex, otherwise engine->wakeref.mutex.
 245          *
 246          * However, intel_gt_retire_request() does not know which engine
 247          * it is retiring along and so cannot partake in the engine-pm
 248          * barrier, and there we use the tl->active_count as a means to
 249          * pin the timeline in the active_list while the locks are dropped.
 250          * Ergo, as that is outside of the engine-pm barrier, we need to
 251          * use atomic to manipulate tl->active_count.
 252          */
 253         lockdep_assert_held(&tl->mutex);
 254
 255         if (atomic_add_unless(&tl->active_count, 1, 0))
 256                 return;
 257
 258         spin_lock(&timelines->lock);
 259         if (!atomic_fetch_inc(&tl->active_count)) {
 260                 /*
 261                  * The HWSP is volatile, and may have been lost while inactive,
 262                  * e.g. across suspend/resume. Be paranoid, and ensure that
 263                  * the HWSP value matches our seqno so we don't proclaim
 264                  * the next request as already complete.
 265                  */
 266                 intel_timeline_reset_seqno(tl);
 267                 list_add_tail(&tl->link, &timelines->active_list);
 268         }
 269         spin_unlock(&timelines->lock);
 270 }
 271
 272 void intel_timeline_exit(struct intel_timeline *tl)
 273 {
 274         struct intel_gt_timelines *timelines = &tl->gt->timelines;
 275
 276         /* See intel_timeline_enter() */
 277         lockdep_assert_held(&tl->mutex);
 278
 279         GEM_BUG_ON(!atomic_read(&tl->active_count));
 280         if (atomic_add_unless(&tl->active_count, -1, 1))
 281                 return;
 282
 283         spin_lock(&timelines->lock);
 284         if (atomic_dec_and_test(&tl->active_count))
 285                 list_del(&tl->link);
 286         spin_unlock(&timelines->lock);
 287
 288         /*
 289          * Since this timeline is idle, all bariers upon which we were waiting
 290          * must also be complete and so we can discard the last used barriers
 291          * without loss of information.
 292          */
 293         i915_syncmap_free(&tl->sync);
 294 }
 295
 296 static u32 timeline_advance(struct intel_timeline *tl)
 297 {
 298         GEM_BUG_ON(!atomic_read(&tl->pin_count));
 299         GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb);
 300
 301         return tl->seqno += 1 + tl->has_initial_breadcrumb;
 302 }
 303
 304 static noinline int
 305 __intel_timeline_get_seqno(struct intel_timeline *tl,
 306                            u32 *seqno)
 307 {
 308         u32 next_ofs = offset_in_page(tl->hwsp_offset + TIMELINE_SEQNO_BYTES);
 309
 310         /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
 311         if (TIMELINE_SEQNO_BYTES <= BIT(5) && (next_ofs & BIT(5)))
 312                 next_ofs = offset_in_page(next_ofs + BIT(5));
 313
 314         tl->hwsp_offset = i915_ggtt_offset(tl->hwsp_ggtt) + next_ofs;
 315         tl->hwsp_seqno = tl->hwsp_map + next_ofs;
 316         intel_timeline_reset_seqno(tl);
 317
 318         *seqno = timeline_advance(tl);
 319         GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno));
 320         return 0;
 321 }
 322
 323 int intel_timeline_get_seqno(struct intel_timeline *tl,
 324                              struct i915_request *rq,
 325                              u32 *seqno)
 326 {
 327         *seqno = timeline_advance(tl);
 328
 329         /* Replace the HWSP on wraparound for HW semaphores */
 330         if (unlikely(!*seqno && tl->has_initial_breadcrumb))
 331                 return __intel_timeline_get_seqno(tl, seqno);
 332
 333         return 0;
 334 }
 335
 336 int intel_timeline_read_hwsp(struct i915_request *from,
 337                              struct i915_request *to,
 338                              u32 *hwsp)
 339 {
 340         struct intel_timeline *tl;
 341         int err;
 342
 343         rcu_read_lock();
 344         tl = rcu_dereference(from->timeline);
 345         if (i915_request_signaled(from) ||
 346             !i915_active_acquire_if_busy(&tl->active))
 347                 tl = NULL;
 348
 349         if (tl) {
 350                 /* hwsp_offset may wraparound, so use from->hwsp_seqno */
 351                 *hwsp = i915_ggtt_offset(tl->hwsp_ggtt) +
 352                         offset_in_page(from->hwsp_seqno);
 353         }
 354
 355         /* ensure we wait on the right request, if not, we completed */
 356         if (tl && __i915_request_is_complete(from)) {
 357                 i915_active_release(&tl->active);
 358                 tl = NULL;
 359         }
 360         rcu_read_unlock();
 361
 362         if (!tl)
 363                 return 1;
 364
 365         /* Can't do semaphore waits on kernel context */
 366         if (!tl->has_initial_breadcrumb) {
 367                 err = -EINVAL;
 368                 goto out;
 369         }
 370
 371         err = i915_active_add_request(&tl->active, to);
 372
 373 out:
 374         i915_active_release(&tl->active);
 375         return err;
 376 }
 377
 378 void intel_timeline_unpin(struct intel_timeline *tl)
 379 {
 380         GEM_BUG_ON(!atomic_read(&tl->pin_count));
 381         if (!atomic_dec_and_test(&tl->pin_count))
 382                 return;
 383
 384         i915_active_release(&tl->active);
 385         __i915_vma_unpin(tl->hwsp_ggtt);
 386 }
 387
 388 void __intel_timeline_free(struct kref *kref)
 389 {
 390         struct intel_timeline *timeline =
 391                 container_of(kref, typeof(*timeline), kref);
 392
 393         GEM_BUG_ON(atomic_read(&timeline->pin_count));
 394         GEM_BUG_ON(!list_empty(&timeline->requests));
 395         GEM_BUG_ON(timeline->retire);
 396
 397         call_rcu(&timeline->rcu, intel_timeline_fini);
 398 }
 399
 400 void intel_gt_fini_timelines(struct intel_gt *gt)
 401 {
 402         struct intel_gt_timelines *timelines = &gt->timelines;
 403
 404         GEM_BUG_ON(!list_empty(&timelines->active_list));
 405 }
 406
 407 void intel_gt_show_timelines(struct intel_gt *gt,
 408                              struct drm_printer *m,
 409                              void (*show_request)(struct drm_printer *m,
 410                                                   const struct i915_request *rq,
 411                                                   const char *prefix,
 412                                                   int indent))
 413 {
 414         struct intel_gt_timelines *timelines = &gt->timelines;
 415         struct intel_timeline *tl, *tn;
 416         LIST_HEAD(free);
 417
 418         spin_lock(&timelines->lock);
 419         list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
 420                 unsigned long count, ready, inflight;
 421                 struct i915_request *rq, *rn;
 422                 struct dma_fence *fence;
 423
 424                 if (!mutex_trylock(&tl->mutex)) {
 425                         drm_printf(m, "Timeline %llx: busy; skipping\n",
 426                                    tl->fence_context);
 427                         continue;
 428                 }
 429
 430                 intel_timeline_get(tl);
 431                 GEM_BUG_ON(!atomic_read(&tl->active_count));
 432                 atomic_inc(&tl->active_count); /* pin the list element */
 433                 spin_unlock(&timelines->lock);
 434
 435                 count = 0;
 436                 ready = 0;
 437                 inflight = 0;
 438                 list_for_each_entry_safe(rq, rn, &tl->requests, link) {
 439                         if (i915_request_completed(rq))
 440                                 continue;
 441
 442                         count++;
 443                         if (i915_request_is_ready(rq))
 444                                 ready++;
 445                         if (i915_request_is_active(rq))
 446                                 inflight++;
 447                 }
 448
 449                 drm_printf(m, "Timeline %llx: { ", tl->fence_context);
 450                 drm_printf(m, "count: %lu, ready: %lu, inflight: %lu",
 451                            count, ready, inflight);
 452                 drm_printf(m, ", seqno: { current: %d, last: %d }",
 453                            *tl->hwsp_seqno, tl->seqno);
 454                 fence = i915_active_fence_get(&tl->last_request);
 455                 if (fence) {
 456                         drm_printf(m, ", engine: %s",
 457                                    to_request(fence)->engine->name);
 458                         dma_fence_put(fence);
 459                 }
 460                 drm_printf(m, " }\n");
 461
 462                 if (show_request) {
 463                         list_for_each_entry_safe(rq, rn, &tl->requests, link)
 464                                 show_request(m, rq, "", 2);
 465                 }
 466
 467                 mutex_unlock(&tl->mutex);
 468                 spin_lock(&timelines->lock);
 469
 470                 /* Resume list iteration after reacquiring spinlock */
 471                 list_safe_reset_next(tl, tn, link);
 472                 if (atomic_dec_and_test(&tl->active_count))
 473                         list_del(&tl->link);
 474
 475                 /* Defer the final release to after the spinlock */
 476                 if (refcount_dec_and_test(&tl->kref.refcount)) {
 477                         GEM_BUG_ON(atomic_read(&tl->active_count));
 478                         list_add(&tl->link, &free);
 479                 }
 480         }
 481         spin_unlock(&timelines->lock);
 482
 483         list_for_each_entry_safe(tl, tn, &free, link)
 484                 __intel_timeline_free(&tl->kref);
 485 }
 486
 487 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 488 #include "gt/selftests/mock_timeline.c"
 489 #include "gt/selftest_timeline.c"
 490 #endif