drivers/gpu/drm/i915/gt/intel_gt.c

   1 // SPDX-License-Identifier: MIT
   2 /*
   3  * Copyright © 2019 Intel Corporation
   4  */
   5
   6 #include <drm/drm_managed.h>
   7 #include <drm/intel-gtt.h>
   8
   9 #include "intel_gt_debugfs.h"
  10
  11 #include "gem/i915_gem_lmem.h"
  12 #include "i915_drv.h"
  13 #include "intel_context.h"
  14 #include "intel_gt.h"
  15 #include "intel_gt_buffer_pool.h"
  16 #include "intel_gt_clock_utils.h"
  17 #include "intel_gt_pm.h"
  18 #include "intel_gt_requests.h"
  19 #include "intel_migrate.h"
  20 #include "intel_mocs.h"
  21 #include "intel_pm.h"
  22 #include "intel_rc6.h"
  23 #include "intel_renderstate.h"
  24 #include "intel_rps.h"
  25 #include "intel_uncore.h"
  26 #include "shmem_utils.h"
  27 #include "pxp/intel_pxp.h"
  28
  29 void __intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
  30 {
  31         spin_lock_init(&gt->irq_lock);
  32
  33         mutex_init(&gt->tlb_invalidate_lock);
  34
  35         INIT_LIST_HEAD(&gt->closed_vma);
  36         spin_lock_init(&gt->closed_lock);
  37
  38         init_llist_head(&gt->watchdog.list);
  39         INIT_WORK(&gt->watchdog.work, intel_gt_watchdog_work);
  40
  41         intel_gt_init_buffer_pool(gt);
  42         intel_gt_init_reset(gt);
  43         intel_gt_init_requests(gt);
  44         intel_gt_init_timelines(gt);
  45         intel_gt_pm_init_early(gt);
  46
  47         intel_uc_init_early(&gt->uc);
  48         intel_rps_init_early(&gt->rps);
  49 }
  50
  51 void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
  52 {
  53         gt->i915 = i915;
  54         gt->uncore = &i915->uncore;
  55 }
  56
  57 int intel_gt_probe_lmem(struct intel_gt *gt)
  58 {
  59         struct drm_i915_private *i915 = gt->i915;
  60         struct intel_memory_region *mem;
  61         int id;
  62         int err;
  63
  64         mem = intel_gt_setup_lmem(gt);
  65         if (mem == ERR_PTR(-ENODEV))
  66                 mem = intel_gt_setup_fake_lmem(gt);
  67         if (IS_ERR(mem)) {
  68                 err = PTR_ERR(mem);
  69                 if (err == -ENODEV)
  70                         return 0;
  71
  72                 drm_err(&i915->drm,
  73                         "Failed to setup region(%d) type=%d\n",
  74                         err, INTEL_MEMORY_LOCAL);
  75                 return err;
  76         }
  77
  78         id = INTEL_REGION_LMEM;
  79
  80         mem->id = id;
  81
  82         intel_memory_region_set_name(mem, "local%u", mem->instance);
  83
  84         GEM_BUG_ON(!HAS_REGION(i915, id));
  85         GEM_BUG_ON(i915->mm.regions[id]);
  86         i915->mm.regions[id] = mem;
  87
  88         return 0;
  89 }
  90
  91 int intel_gt_assign_ggtt(struct intel_gt *gt)
  92 {
  93         gt->ggtt = drmm_kzalloc(&gt->i915->drm, sizeof(*gt->ggtt), GFP_KERNEL);
  94
  95         return gt->ggtt ? 0 : -ENOMEM;
  96 }
  97
  98 static const struct intel_mmio_range icl_l3bank_steering_table[] = {
  99         { 0x00B100, 0x00B3FF },
 100         {},
 101 };
 102
 103 static const struct intel_mmio_range xehpsdv_mslice_steering_table[] = {
 104         { 0x004000, 0x004AFF },
 105         { 0x00C800, 0x00CFFF },
 106         { 0x00DD00, 0x00DDFF },
 107         { 0x00E900, 0x00FFFF }, /* 0xEA00 - OxEFFF is unused */
 108         {},
 109 };
 110
 111 static const struct intel_mmio_range xehpsdv_lncf_steering_table[] = {
 112         { 0x00B000, 0x00B0FF },
 113         { 0x00D800, 0x00D8FF },
 114         {},
 115 };
 116
 117 static const struct intel_mmio_range dg2_lncf_steering_table[] = {
 118         { 0x00B000, 0x00B0FF },
 119         { 0x00D880, 0x00D8FF },
 120         {},
 121 };
 122
 123 static u16 slicemask(struct intel_gt *gt, int count)
 124 {
 125         u64 dss_mask = intel_sseu_get_subslices(&gt->info.sseu, 0);
 126
 127         return intel_slicemask_from_dssmask(dss_mask, count);
 128 }
 129
 130 int intel_gt_init_mmio(struct intel_gt *gt)
 131 {
 132         struct drm_i915_private *i915 = gt->i915;
 133
 134         intel_gt_init_clock_frequency(gt);
 135
 136         intel_uc_init_mmio(&gt->uc);
 137         intel_sseu_info_init(gt);
 138
 139         /*
 140          * An mslice is unavailable only if both the meml3 for the slice is
 141          * disabled *and* all of the DSS in the slice (quadrant) are disabled.
 142          */
 143         if (HAS_MSLICES(i915))
 144                 gt->info.mslice_mask =
 145                         slicemask(gt, GEN_DSS_PER_MSLICE) |
 146                         (intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) &
 147                          GEN12_MEML3_EN_MASK);
 148
 149         if (IS_DG2(i915)) {
 150                 gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table;
 151                 gt->steering_table[LNCF] = dg2_lncf_steering_table;
 152         } else if (IS_XEHPSDV(i915)) {
 153                 gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table;
 154                 gt->steering_table[LNCF] = xehpsdv_lncf_steering_table;
 155         } else if (GRAPHICS_VER(i915) >= 11 &&
 156                    GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) {
 157                 gt->steering_table[L3BANK] = icl_l3bank_steering_table;
 158                 gt->info.l3bank_mask =
 159                         ~intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) &
 160                         GEN10_L3BANK_MASK;
 161         } else if (HAS_MSLICES(i915)) {
 162                 MISSING_CASE(INTEL_INFO(i915)->platform);
 163         }
 164
 165         return intel_engines_init_mmio(gt);
 166 }
 167
 168 static void init_unused_ring(struct intel_gt *gt, u32 base)
 169 {
 170         struct intel_uncore *uncore = gt->uncore;
 171
 172         intel_uncore_write(uncore, RING_CTL(base), 0);
 173         intel_uncore_write(uncore, RING_HEAD(base), 0);
 174         intel_uncore_write(uncore, RING_TAIL(base), 0);
 175         intel_uncore_write(uncore, RING_START(base), 0);
 176 }
 177
 178 static void init_unused_rings(struct intel_gt *gt)
 179 {
 180         struct drm_i915_private *i915 = gt->i915;
 181
 182         if (IS_I830(i915)) {
 183                 init_unused_ring(gt, PRB1_BASE);
 184                 init_unused_ring(gt, SRB0_BASE);
 185                 init_unused_ring(gt, SRB1_BASE);
 186                 init_unused_ring(gt, SRB2_BASE);
 187                 init_unused_ring(gt, SRB3_BASE);
 188         } else if (GRAPHICS_VER(i915) == 2) {
 189                 init_unused_ring(gt, SRB0_BASE);
 190                 init_unused_ring(gt, SRB1_BASE);
 191         } else if (GRAPHICS_VER(i915) == 3) {
 192                 init_unused_ring(gt, PRB1_BASE);
 193                 init_unused_ring(gt, PRB2_BASE);
 194         }
 195 }
 196
 197 int intel_gt_init_hw(struct intel_gt *gt)
 198 {
 199         struct drm_i915_private *i915 = gt->i915;
 200         struct intel_uncore *uncore = gt->uncore;
 201         int ret;
 202
 203         gt->last_init_time = ktime_get();
 204
 205         /* Double layer security blanket, see i915_gem_init() */
 206         intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
 207
 208         if (HAS_EDRAM(i915) && GRAPHICS_VER(i915) < 9)
 209                 intel_uncore_rmw(uncore, HSW_IDICR, 0, IDIHASHMSK(0xf));
 210
 211         if (IS_HASWELL(i915))
 212                 intel_uncore_write(uncore,
 213                                    MI_PREDICATE_RESULT_2,
 214                                    IS_HSW_GT3(i915) ?
 215                                    LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
 216
 217         /* Apply the GT workarounds... */
 218         intel_gt_apply_workarounds(gt);
 219         /* ...and determine whether they are sticking. */
 220         intel_gt_verify_workarounds(gt, "init");
 221
 222         intel_gt_init_swizzling(gt);
 223
 224         /*
 225          * At least 830 can leave some of the unused rings
 226          * "active" (ie. head != tail) after resume which
 227          * will prevent c3 entry. Makes sure all unused rings
 228          * are totally idle.
 229          */
 230         init_unused_rings(gt);
 231
 232         ret = i915_ppgtt_init_hw(gt);
 233         if (ret) {
 234                 DRM_ERROR("Enabling PPGTT failed (%d)\n", ret);
 235                 goto out;
 236         }
 237
 238         /* We can't enable contexts until all firmware is loaded */
 239         ret = intel_uc_init_hw(&gt->uc);
 240         if (ret) {
 241                 i915_probe_error(i915, "Enabling uc failed (%d)\n", ret);
 242                 goto out;
 243         }
 244
 245         intel_mocs_init(gt);
 246
 247 out:
 248         intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
 249         return ret;
 250 }
 251
 252 static void rmw_set(struct intel_uncore *uncore, i915_reg_t reg, u32 set)
 253 {
 254         intel_uncore_rmw(uncore, reg, 0, set);
 255 }
 256
 257 static void rmw_clear(struct intel_uncore *uncore, i915_reg_t reg, u32 clr)
 258 {
 259         intel_uncore_rmw(uncore, reg, clr, 0);
 260 }
 261
 262 static void clear_register(struct intel_uncore *uncore, i915_reg_t reg)
 263 {
 264         intel_uncore_rmw(uncore, reg, 0, 0);
 265 }
 266
 267 static void gen6_clear_engine_error_register(struct intel_engine_cs *engine)
 268 {
 269         GEN6_RING_FAULT_REG_RMW(engine, RING_FAULT_VALID, 0);
 270         GEN6_RING_FAULT_REG_POSTING_READ(engine);
 271 }
 272
 273 void
 274 intel_gt_clear_error_registers(struct intel_gt *gt,
 275                                intel_engine_mask_t engine_mask)
 276 {
 277         struct drm_i915_private *i915 = gt->i915;
 278         struct intel_uncore *uncore = gt->uncore;
 279         u32 eir;
 280
 281         if (GRAPHICS_VER(i915) != 2)
 282                 clear_register(uncore, PGTBL_ER);
 283
 284         if (GRAPHICS_VER(i915) < 4)
 285                 clear_register(uncore, IPEIR(RENDER_RING_BASE));
 286         else
 287                 clear_register(uncore, IPEIR_I965);
 288
 289         clear_register(uncore, EIR);
 290         eir = intel_uncore_read(uncore, EIR);
 291         if (eir) {
 292                 /*
 293                  * some errors might have become stuck,
 294                  * mask them.
 295                  */
 296                 DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir);
 297                 rmw_set(uncore, EMR, eir);
 298                 intel_uncore_write(uncore, GEN2_IIR,
 299                                    I915_MASTER_ERROR_INTERRUPT);
 300         }
 301
 302         if (GRAPHICS_VER(i915) >= 12) {
 303                 rmw_clear(uncore, GEN12_RING_FAULT_REG, RING_FAULT_VALID);
 304                 intel_uncore_posting_read(uncore, GEN12_RING_FAULT_REG);
 305         } else if (GRAPHICS_VER(i915) >= 8) {
 306                 rmw_clear(uncore, GEN8_RING_FAULT_REG, RING_FAULT_VALID);
 307                 intel_uncore_posting_read(uncore, GEN8_RING_FAULT_REG);
 308         } else if (GRAPHICS_VER(i915) >= 6) {
 309                 struct intel_engine_cs *engine;
 310                 enum intel_engine_id id;
 311
 312                 for_each_engine_masked(engine, gt, engine_mask, id)
 313                         gen6_clear_engine_error_register(engine);
 314         }
 315 }
 316
 317 static void gen6_check_faults(struct intel_gt *gt)
 318 {
 319         struct intel_engine_cs *engine;
 320         enum intel_engine_id id;
 321         u32 fault;
 322
 323         for_each_engine(engine, gt, id) {
 324                 fault = GEN6_RING_FAULT_REG_READ(engine);
 325                 if (fault & RING_FAULT_VALID) {
 326                         drm_dbg(&engine->i915->drm, "Unexpected fault\n"
 327                                 "\tAddr: 0x%08lx\n"
 328                                 "\tAddress space: %s\n"
 329                                 "\tSource ID: %d\n"
 330                                 "\tType: %d\n",
 331                                 fault & PAGE_MASK,
 332                                 fault & RING_FAULT_GTTSEL_MASK ?
 333                                 "GGTT" : "PPGTT",
 334                                 RING_FAULT_SRCID(fault),
 335                                 RING_FAULT_FAULT_TYPE(fault));
 336                 }
 337         }
 338 }
 339
 340 static void gen8_check_faults(struct intel_gt *gt)
 341 {
 342         struct intel_uncore *uncore = gt->uncore;
 343         i915_reg_t fault_reg, fault_data0_reg, fault_data1_reg;
 344         u32 fault;
 345
 346         if (GRAPHICS_VER(gt->i915) >= 12) {
 347                 fault_reg = GEN12_RING_FAULT_REG;
 348                 fault_data0_reg = GEN12_FAULT_TLB_DATA0;
 349                 fault_data1_reg = GEN12_FAULT_TLB_DATA1;
 350         } else {
 351                 fault_reg = GEN8_RING_FAULT_REG;
 352                 fault_data0_reg = GEN8_FAULT_TLB_DATA0;
 353                 fault_data1_reg = GEN8_FAULT_TLB_DATA1;
 354         }
 355
 356         fault = intel_uncore_read(uncore, fault_reg);
 357         if (fault & RING_FAULT_VALID) {
 358                 u32 fault_data0, fault_data1;
 359                 u64 fault_addr;
 360
 361                 fault_data0 = intel_uncore_read(uncore, fault_data0_reg);
 362                 fault_data1 = intel_uncore_read(uncore, fault_data1_reg);
 363
 364                 fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) |
 365                              ((u64)fault_data0 << 12);
 366
 367                 drm_dbg(&uncore->i915->drm, "Unexpected fault\n"
 368                         "\tAddr: 0x%08x_%08x\n"
 369                         "\tAddress space: %s\n"
 370                         "\tEngine ID: %d\n"
 371                         "\tSource ID: %d\n"
 372                         "\tType: %d\n",
 373                         upper_32_bits(fault_addr), lower_32_bits(fault_addr),
 374                         fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
 375                         GEN8_RING_FAULT_ENGINE_ID(fault),
 376                         RING_FAULT_SRCID(fault),
 377                         RING_FAULT_FAULT_TYPE(fault));
 378         }
 379 }
 380
 381 void intel_gt_check_and_clear_faults(struct intel_gt *gt)
 382 {
 383         struct drm_i915_private *i915 = gt->i915;
 384
 385         /* From GEN8 onwards we only have one 'All Engine Fault Register' */
 386         if (GRAPHICS_VER(i915) >= 8)
 387                 gen8_check_faults(gt);
 388         else if (GRAPHICS_VER(i915) >= 6)
 389                 gen6_check_faults(gt);
 390         else
 391                 return;
 392
 393         intel_gt_clear_error_registers(gt, ALL_ENGINES);
 394 }
 395
 396 void intel_gt_flush_ggtt_writes(struct intel_gt *gt)
 397 {
 398         struct intel_uncore *uncore = gt->uncore;
 399         intel_wakeref_t wakeref;
 400
 401         /*
 402          * No actual flushing is required for the GTT write domain for reads
 403          * from the GTT domain. Writes to it "immediately" go to main memory
 404          * as far as we know, so there's no chipset flush. It also doesn't
 405          * land in the GPU render cache.
 406          *
 407          * However, we do have to enforce the order so that all writes through
 408          * the GTT land before any writes to the device, such as updates to
 409          * the GATT itself.
 410          *
 411          * We also have to wait a bit for the writes to land from the GTT.
 412          * An uncached read (i.e. mmio) seems to be ideal for the round-trip
 413          * timing. This issue has only been observed when switching quickly
 414          * between GTT writes and CPU reads from inside the kernel on recent hw,
 415          * and it appears to only affect discrete GTT blocks (i.e. on LLC
 416          * system agents we cannot reproduce this behaviour, until Cannonlake
 417          * that was!).
 418          */
 419
 420         wmb();
 421
 422         if (INTEL_INFO(gt->i915)->has_coherent_ggtt)
 423                 return;
 424
 425         intel_gt_chipset_flush(gt);
 426
 427         with_intel_runtime_pm_if_in_use(uncore->rpm, wakeref) {
 428                 unsigned long flags;
 429
 430                 spin_lock_irqsave(&uncore->lock, flags);
 431                 intel_uncore_posting_read_fw(uncore,
 432                                              RING_HEAD(RENDER_RING_BASE));
 433                 spin_unlock_irqrestore(&uncore->lock, flags);
 434         }
 435 }
 436
 437 void intel_gt_chipset_flush(struct intel_gt *gt)
 438 {
 439         wmb();
 440         if (GRAPHICS_VER(gt->i915) < 6)
 441                 intel_gtt_chipset_flush();
 442 }
 443
 444 void intel_gt_driver_register(struct intel_gt *gt)
 445 {
 446         intel_rps_driver_register(&gt->rps);
 447
 448         intel_gt_debugfs_register(gt);
 449 }
 450
 451 static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
 452 {
 453         struct drm_i915_private *i915 = gt->i915;
 454         struct drm_i915_gem_object *obj;
 455         struct i915_vma *vma;
 456         int ret;
 457
 458         obj = i915_gem_object_create_lmem(i915, size, I915_BO_ALLOC_VOLATILE);
 459         if (IS_ERR(obj))
 460                 obj = i915_gem_object_create_stolen(i915, size);
 461         if (IS_ERR(obj))
 462                 obj = i915_gem_object_create_internal(i915, size);
 463         if (IS_ERR(obj)) {
 464                 drm_err(&i915->drm, "Failed to allocate scratch page\n");
 465                 return PTR_ERR(obj);
 466         }
 467
 468         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
 469         if (IS_ERR(vma)) {
 470                 ret = PTR_ERR(vma);
 471                 goto err_unref;
 472         }
 473
 474         ret = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
 475         if (ret)
 476                 goto err_unref;
 477
 478         gt->scratch = i915_vma_make_unshrinkable(vma);
 479
 480         return 0;
 481
 482 err_unref:
 483         i915_gem_object_put(obj);
 484         return ret;
 485 }
 486
 487 static void intel_gt_fini_scratch(struct intel_gt *gt)
 488 {
 489         i915_vma_unpin_and_release(&gt->scratch, 0);
 490 }
 491
 492 static struct i915_address_space *kernel_vm(struct intel_gt *gt)
 493 {
 494         if (INTEL_PPGTT(gt->i915) > INTEL_PPGTT_ALIASING)
 495                 return &i915_ppgtt_create(gt, I915_BO_ALLOC_PM_EARLY)->vm;
 496         else
 497                 return i915_vm_get(&gt->ggtt->vm);
 498 }
 499
 500 static int __engines_record_defaults(struct intel_gt *gt)
 501 {
 502         struct i915_request *requests[I915_NUM_ENGINES] = {};
 503         struct intel_engine_cs *engine;
 504         enum intel_engine_id id;
 505         int err = 0;
 506
 507         /*
 508          * As we reset the gpu during very early sanitisation, the current
 509          * register state on the GPU should reflect its defaults values.
 510          * We load a context onto the hw (with restore-inhibit), then switch
 511          * over to a second context to save that default register state. We
 512          * can then prime every new context with that state so they all start
 513          * from the same default HW values.
 514          */
 515
 516         for_each_engine(engine, gt, id) {
 517                 struct intel_renderstate so;
 518                 struct intel_context *ce;
 519                 struct i915_request *rq;
 520
 521                 /* We must be able to switch to something! */
 522                 GEM_BUG_ON(!engine->kernel_context);
 523
 524                 ce = intel_context_create(engine);
 525                 if (IS_ERR(ce)) {
 526                         err = PTR_ERR(ce);
 527                         goto out;
 528                 }
 529
 530                 err = intel_renderstate_init(&so, ce);
 531                 if (err)
 532                         goto err;
 533
 534                 rq = i915_request_create(ce);
 535                 if (IS_ERR(rq)) {
 536                         err = PTR_ERR(rq);
 537                         goto err_fini;
 538                 }
 539
 540                 err = intel_engine_emit_ctx_wa(rq);
 541                 if (err)
 542                         goto err_rq;
 543
 544                 err = intel_renderstate_emit(&so, rq);
 545                 if (err)
 546                         goto err_rq;
 547
 548 err_rq:
 549                 requests[id] = i915_request_get(rq);
 550                 i915_request_add(rq);
 551 err_fini:
 552                 intel_renderstate_fini(&so, ce);
 553 err:
 554                 if (err) {
 555                         intel_context_put(ce);
 556                         goto out;
 557                 }
 558         }
 559
 560         /* Flush the default context image to memory, and enable powersaving. */
 561         if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
 562                 err = -EIO;
 563                 goto out;
 564         }
 565
 566         for (id = 0; id < ARRAY_SIZE(requests); id++) {
 567                 struct i915_request *rq;
 568                 struct file *state;
 569
 570                 rq = requests[id];
 571                 if (!rq)
 572                         continue;
 573
 574                 if (rq->fence.error) {
 575                         err = -EIO;
 576                         goto out;
 577                 }
 578
 579                 GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT, &rq->context->flags));
 580                 if (!rq->context->state)
 581                         continue;
 582
 583                 /* Keep a copy of the state's backing pages; free the obj */
 584                 state = shmem_create_from_object(rq->context->state->obj);
 585                 if (IS_ERR(state)) {
 586                         err = PTR_ERR(state);
 587                         goto out;
 588                 }
 589                 rq->engine->default_state = state;
 590         }
 591
 592 out:
 593         /*
 594          * If we have to abandon now, we expect the engines to be idle
 595          * and ready to be torn-down. The quickest way we can accomplish
 596          * this is by declaring ourselves wedged.
 597          */
 598         if (err)
 599                 intel_gt_set_wedged(gt);
 600
 601         for (id = 0; id < ARRAY_SIZE(requests); id++) {
 602                 struct intel_context *ce;
 603                 struct i915_request *rq;
 604
 605                 rq = requests[id];
 606                 if (!rq)
 607                         continue;
 608
 609                 ce = rq->context;
 610                 i915_request_put(rq);
 611                 intel_context_put(ce);
 612         }
 613         return err;
 614 }
 615
 616 static int __engines_verify_workarounds(struct intel_gt *gt)
 617 {
 618         struct intel_engine_cs *engine;
 619         enum intel_engine_id id;
 620         int err = 0;
 621
 622         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
 623                 return 0;
 624
 625         for_each_engine(engine, gt, id) {
 626                 if (intel_engine_verify_workarounds(engine, "load"))
 627                         err = -EIO;
 628         }
 629
 630         /* Flush and restore the kernel context for safety */
 631         if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME)
 632                 err = -EIO;
 633
 634         return err;
 635 }
 636
 637 static void __intel_gt_disable(struct intel_gt *gt)
 638 {
 639         intel_gt_set_wedged_on_fini(gt);
 640
 641         intel_gt_suspend_prepare(gt);
 642         intel_gt_suspend_late(gt);
 643
 644         GEM_BUG_ON(intel_gt_pm_is_awake(gt));
 645 }
 646
 647 int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout)
 648 {
 649         long remaining_timeout;
 650
 651         /* If the device is asleep, we have no requests outstanding */
 652         if (!intel_gt_pm_is_awake(gt))
 653                 return 0;
 654
 655         while ((timeout = intel_gt_retire_requests_timeout(gt, timeout,
 656                                                            &remaining_timeout)) > 0) {
 657                 cond_resched();
 658                 if (signal_pending(current))
 659                         return -EINTR;
 660         }
 661
 662         return timeout ? timeout : intel_uc_wait_for_idle(&gt->uc,
 663                                                           remaining_timeout);
 664 }
 665
 666 int intel_gt_init(struct intel_gt *gt)
 667 {
 668         int err;
 669
 670         err = i915_inject_probe_error(gt->i915, -ENODEV);
 671         if (err)
 672                 return err;
 673
 674         intel_gt_init_workarounds(gt);
 675
 676         /*
 677          * This is just a security blanket to placate dragons.
 678          * On some systems, we very sporadically observe that the first TLBs
 679          * used by the CS may be stale, despite us poking the TLB reset. If
 680          * we hold the forcewake during initialisation these problems
 681          * just magically go away.
 682          */
 683         intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
 684
 685         err = intel_gt_init_scratch(gt,
 686                                     GRAPHICS_VER(gt->i915) == 2 ? SZ_256K : SZ_4K);
 687         if (err)
 688                 goto out_fw;
 689
 690         intel_gt_pm_init(gt);
 691
 692         gt->vm = kernel_vm(gt);
 693         if (!gt->vm) {
 694                 err = -ENOMEM;
 695                 goto err_pm;
 696         }
 697
 698         intel_set_mocs_index(gt);
 699
 700         err = intel_engines_init(gt);
 701         if (err)
 702                 goto err_engines;
 703
 704         err = intel_uc_init(&gt->uc);
 705         if (err)
 706                 goto err_engines;
 707
 708         err = intel_gt_resume(gt);
 709         if (err)
 710                 goto err_uc_init;
 711
 712         err = __engines_record_defaults(gt);
 713         if (err)
 714                 goto err_gt;
 715
 716         err = __engines_verify_workarounds(gt);
 717         if (err)
 718                 goto err_gt;
 719
 720         intel_uc_init_late(&gt->uc);
 721
 722         err = i915_inject_probe_error(gt->i915, -EIO);
 723         if (err)
 724                 goto err_gt;
 725
 726         intel_migrate_init(&gt->migrate, gt);
 727
 728         intel_pxp_init(&gt->pxp);
 729
 730         goto out_fw;
 731 err_gt:
 732         __intel_gt_disable(gt);
 733         intel_uc_fini_hw(&gt->uc);
 734 err_uc_init:
 735         intel_uc_fini(&gt->uc);
 736 err_engines:
 737         intel_engines_release(gt);
 738         i915_vm_put(fetch_and_zero(&gt->vm));
 739 err_pm:
 740         intel_gt_pm_fini(gt);
 741         intel_gt_fini_scratch(gt);
 742 out_fw:
 743         if (err)
 744                 intel_gt_set_wedged_on_init(gt);
 745         intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
 746         return err;
 747 }
 748
 749 void intel_gt_driver_remove(struct intel_gt *gt)
 750 {
 751         __intel_gt_disable(gt);
 752
 753         intel_migrate_fini(&gt->migrate);
 754         intel_uc_driver_remove(&gt->uc);
 755
 756         intel_engines_release(gt);
 757
 758         intel_gt_flush_buffer_pool(gt);
 759 }
 760
 761 void intel_gt_driver_unregister(struct intel_gt *gt)
 762 {
 763         intel_wakeref_t wakeref;
 764
 765         intel_rps_driver_unregister(&gt->rps);
 766
 767         intel_pxp_fini(&gt->pxp);
 768
 769         /*
 770          * Upon unregistering the device to prevent any new users, cancel
 771          * all in-flight requests so that we can quickly unbind the active
 772          * resources.
 773          */
 774         intel_gt_set_wedged_on_fini(gt);
 775
 776         /* Scrub all HW state upon release */
 777         with_intel_runtime_pm(gt->uncore->rpm, wakeref)
 778                 __intel_gt_reset(gt, ALL_ENGINES);
 779 }
 780
 781 void intel_gt_driver_release(struct intel_gt *gt)
 782 {
 783         struct i915_address_space *vm;
 784
 785         vm = fetch_and_zero(&gt->vm);
 786         if (vm) /* FIXME being called twice on error paths :( */
 787                 i915_vm_put(vm);
 788
 789         intel_wa_list_free(&gt->wa_list);
 790         intel_gt_pm_fini(gt);
 791         intel_gt_fini_scratch(gt);
 792         intel_gt_fini_buffer_pool(gt);
 793 }
 794
 795 void intel_gt_driver_late_release(struct intel_gt *gt)
 796 {
 797         /* We need to wait for inflight RCU frees to release their grip */
 798         rcu_barrier();
 799
 800         intel_uc_driver_late_release(&gt->uc);
 801         intel_gt_fini_requests(gt);
 802         intel_gt_fini_reset(gt);
 803         intel_gt_fini_timelines(gt);
 804         intel_engines_free(gt);
 805 }
 806
 807 /**
 808  * intel_gt_reg_needs_read_steering - determine whether a register read
 809  *     requires explicit steering
 810  * @gt: GT structure
 811  * @reg: the register to check steering requirements for
 812  * @type: type of multicast steering to check
 813  *
 814  * Determines whether @reg needs explicit steering of a specific type for
 815  * reads.
 816  *
 817  * Returns false if @reg does not belong to a register range of the given
 818  * steering type, or if the default (subslice-based) steering IDs are suitable
 819  * for @type steering too.
 820  */
 821 static bool intel_gt_reg_needs_read_steering(struct intel_gt *gt,
 822                                              i915_reg_t reg,
 823                                              enum intel_steering_type type)
 824 {
 825         const u32 offset = i915_mmio_reg_offset(reg);
 826         const struct intel_mmio_range *entry;
 827
 828         if (likely(!intel_gt_needs_read_steering(gt, type)))
 829                 return false;
 830
 831         for (entry = gt->steering_table[type]; entry->end; entry++) {
 832                 if (offset >= entry->start && offset <= entry->end)
 833                         return true;
 834         }
 835
 836         return false;
 837 }
 838
 839 /**
 840  * intel_gt_get_valid_steering - determines valid IDs for a class of MCR steering
 841  * @gt: GT structure
 842  * @type: multicast register type
 843  * @sliceid: Slice ID returned
 844  * @subsliceid: Subslice ID returned
 845  *
 846  * Determines sliceid and subsliceid values that will steer reads
 847  * of a specific multicast register class to a valid value.
 848  */
 849 static void intel_gt_get_valid_steering(struct intel_gt *gt,
 850                                         enum intel_steering_type type,
 851                                         u8 *sliceid, u8 *subsliceid)
 852 {
 853         switch (type) {
 854         case L3BANK:
 855                 GEM_DEBUG_WARN_ON(!gt->info.l3bank_mask); /* should be impossible! */
 856
 857                 *sliceid = 0;           /* unused */
 858                 *subsliceid = __ffs(gt->info.l3bank_mask);
 859                 break;
 860         case MSLICE:
 861                 GEM_DEBUG_WARN_ON(!gt->info.mslice_mask); /* should be impossible! */
 862
 863                 *sliceid = __ffs(gt->info.mslice_mask);
 864                 *subsliceid = 0;        /* unused */
 865                 break;
 866         case LNCF:
 867                 GEM_DEBUG_WARN_ON(!gt->info.mslice_mask); /* should be impossible! */
 868
 869                 /*
 870                  * An LNCF is always present if its mslice is present, so we
 871                  * can safely just steer to LNCF 0 in all cases.
 872                  */
 873                 *sliceid = __ffs(gt->info.mslice_mask) << 1;
 874                 *subsliceid = 0;        /* unused */
 875                 break;
 876         default:
 877                 MISSING_CASE(type);
 878                 *sliceid = 0;
 879                 *subsliceid = 0;
 880         }
 881 }
 882
 883 /**
 884  * intel_gt_read_register_fw - reads a GT register with support for multicast
 885  * @gt: GT structure
 886  * @reg: register to read
 887  *
 888  * This function will read a GT register.  If the register is a multicast
 889  * register, the read will be steered to a valid instance (i.e., one that
 890  * isn't fused off or powered down by power gating).
 891  *
 892  * Returns the value from a valid instance of @reg.
 893  */
 894 u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg)
 895 {
 896         int type;
 897         u8 sliceid, subsliceid;
 898
 899         for (type = 0; type < NUM_STEERING_TYPES; type++) {
 900                 if (intel_gt_reg_needs_read_steering(gt, reg, type)) {
 901                         intel_gt_get_valid_steering(gt, type, &sliceid,
 902                                                     &subsliceid);
 903                         return intel_uncore_read_with_mcr_steering_fw(gt->uncore,
 904                                                                       reg,
 905                                                                       sliceid,
 906                                                                       subsliceid);
 907                 }
 908         }
 909
 910         return intel_uncore_read_fw(gt->uncore, reg);
 911 }
 912
 913 void intel_gt_info_print(const struct intel_gt_info *info,
 914                          struct drm_printer *p)
 915 {
 916         drm_printf(p, "available engines: %x\n", info->engine_mask);
 917
 918         intel_sseu_dump(&info->sseu, p);
 919 }
 920
 921 struct reg_and_bit {
 922         i915_reg_t reg;
 923         u32 bit;
 924 };
 925
 926 static struct reg_and_bit
 927 get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
 928                 const i915_reg_t *regs, const unsigned int num)
 929 {
 930         const unsigned int class = engine->class;
 931         struct reg_and_bit rb = { };
 932
 933         if (drm_WARN_ON_ONCE(&engine->i915->drm,
 934                              class >= num || !regs[class].reg))
 935                 return rb;
 936
 937         rb.reg = regs[class];
 938         if (gen8 && class == VIDEO_DECODE_CLASS)
 939                 rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
 940         else
 941                 rb.bit = engine->instance;
 942
 943         rb.bit = BIT(rb.bit);
 944
 945         return rb;
 946 }
 947
 948 void intel_gt_invalidate_tlbs(struct intel_gt *gt)
 949 {
 950         static const i915_reg_t gen8_regs[] = {
 951                 [RENDER_CLASS]                  = GEN8_RTCR,
 952                 [VIDEO_DECODE_CLASS]            = GEN8_M1TCR, /* , GEN8_M2TCR */
 953                 [VIDEO_ENHANCEMENT_CLASS]       = GEN8_VTCR,
 954                 [COPY_ENGINE_CLASS]             = GEN8_BTCR,
 955         };
 956         static const i915_reg_t gen12_regs[] = {
 957                 [RENDER_CLASS]                  = GEN12_GFX_TLB_INV_CR,
 958                 [VIDEO_DECODE_CLASS]            = GEN12_VD_TLB_INV_CR,
 959                 [VIDEO_ENHANCEMENT_CLASS]       = GEN12_VE_TLB_INV_CR,
 960                 [COPY_ENGINE_CLASS]             = GEN12_BLT_TLB_INV_CR,
 961         };
 962         struct drm_i915_private *i915 = gt->i915;
 963         struct intel_uncore *uncore = gt->uncore;
 964         struct intel_engine_cs *engine;
 965         enum intel_engine_id id;
 966         const i915_reg_t *regs;
 967         unsigned int num = 0;
 968
 969         if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
 970                 return;
 971
 972         if (GRAPHICS_VER(i915) == 12) {
 973                 regs = gen12_regs;
 974                 num = ARRAY_SIZE(gen12_regs);
 975         } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
 976                 regs = gen8_regs;
 977                 num = ARRAY_SIZE(gen8_regs);
 978         } else if (GRAPHICS_VER(i915) < 8) {
 979                 return;
 980         }
 981
 982         if (drm_WARN_ONCE(&i915->drm, !num,
 983                           "Platform does not implement TLB invalidation!"))
 984                 return;
 985
 986         GEM_TRACE("\n");
 987
 988         assert_rpm_wakelock_held(&i915->runtime_pm);
 989
 990         mutex_lock(&gt->tlb_invalidate_lock);
 991         intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
 992
 993         for_each_engine(engine, gt, id) {
 994                 /*
 995                  * HW architecture suggest typical invalidation time at 40us,
 996                  * with pessimistic cases up to 100us and a recommendation to
 997                  * cap at 1ms. We go a bit higher just in case.
 998                  */
 999                 const unsigned int timeout_us = 100;
1000                 const unsigned int timeout_ms = 4;
1001                 struct reg_and_bit rb;
1002
1003                 rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
1004                 if (!i915_mmio_reg_offset(rb.reg))
1005                         continue;
1006
1007                 intel_uncore_write_fw(uncore, rb.reg, rb.bit);
1008                 if (__intel_wait_for_register_fw(uncore,
1009                                                  rb.reg, rb.bit, 0,
1010                                                  timeout_us, timeout_ms,
1011                                                  NULL))
1012                         drm_err_ratelimited(&gt->i915->drm,
1013                                             "%s TLB invalidation did not complete in %ums!\n",
1014                                             engine->name, timeout_ms);
1015         }
1016
1017         /*
1018          * Use delayed put since a) we mostly expect a flurry of TLB
1019          * invalidations so it is good to avoid paying the forcewake cost and
1020          * b) it works around a bug in Icelake which cannot cope with too rapid
1021          * transitions.
1022          */
1023         intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
1024         mutex_unlock(&gt->tlb_invalidate_lock);
1025 }