Merge drm/drm-next into drm-intel-gt-next
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / gt / intel_gt.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2019 Intel Corporation
4  */
5
6 #include <drm/drm_managed.h>
7 #include <drm/intel-gtt.h>
8
9 #include "intel_gt_debugfs.h"
10
11 #include "gem/i915_gem_lmem.h"
12 #include "i915_drv.h"
13 #include "intel_context.h"
14 #include "intel_gt.h"
15 #include "intel_gt_buffer_pool.h"
16 #include "intel_gt_clock_utils.h"
17 #include "intel_gt_pm.h"
18 #include "intel_gt_requests.h"
19 #include "intel_migrate.h"
20 #include "intel_mocs.h"
21 #include "intel_pm.h"
22 #include "intel_rc6.h"
23 #include "intel_renderstate.h"
24 #include "intel_rps.h"
25 #include "intel_uncore.h"
26 #include "shmem_utils.h"
27 #include "pxp/intel_pxp.h"
28
29 void __intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
30 {
31         spin_lock_init(&gt->irq_lock);
32
33         mutex_init(&gt->tlb_invalidate_lock);
34
35         INIT_LIST_HEAD(&gt->closed_vma);
36         spin_lock_init(&gt->closed_lock);
37
38         init_llist_head(&gt->watchdog.list);
39         INIT_WORK(&gt->watchdog.work, intel_gt_watchdog_work);
40
41         intel_gt_init_buffer_pool(gt);
42         intel_gt_init_reset(gt);
43         intel_gt_init_requests(gt);
44         intel_gt_init_timelines(gt);
45         intel_gt_pm_init_early(gt);
46
47         intel_uc_init_early(&gt->uc);
48         intel_rps_init_early(&gt->rps);
49 }
50
51 void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
52 {
53         gt->i915 = i915;
54         gt->uncore = &i915->uncore;
55 }
56
57 int intel_gt_probe_lmem(struct intel_gt *gt)
58 {
59         struct drm_i915_private *i915 = gt->i915;
60         struct intel_memory_region *mem;
61         int id;
62         int err;
63
64         mem = intel_gt_setup_lmem(gt);
65         if (mem == ERR_PTR(-ENODEV))
66                 mem = intel_gt_setup_fake_lmem(gt);
67         if (IS_ERR(mem)) {
68                 err = PTR_ERR(mem);
69                 if (err == -ENODEV)
70                         return 0;
71
72                 drm_err(&i915->drm,
73                         "Failed to setup region(%d) type=%d\n",
74                         err, INTEL_MEMORY_LOCAL);
75                 return err;
76         }
77
78         id = INTEL_REGION_LMEM;
79
80         mem->id = id;
81
82         intel_memory_region_set_name(mem, "local%u", mem->instance);
83
84         GEM_BUG_ON(!HAS_REGION(i915, id));
85         GEM_BUG_ON(i915->mm.regions[id]);
86         i915->mm.regions[id] = mem;
87
88         return 0;
89 }
90
91 int intel_gt_assign_ggtt(struct intel_gt *gt)
92 {
93         gt->ggtt = drmm_kzalloc(&gt->i915->drm, sizeof(*gt->ggtt), GFP_KERNEL);
94
95         return gt->ggtt ? 0 : -ENOMEM;
96 }
97
98 static const struct intel_mmio_range icl_l3bank_steering_table[] = {
99         { 0x00B100, 0x00B3FF },
100         {},
101 };
102
103 static const struct intel_mmio_range xehpsdv_mslice_steering_table[] = {
104         { 0x004000, 0x004AFF },
105         { 0x00C800, 0x00CFFF },
106         { 0x00DD00, 0x00DDFF },
107         { 0x00E900, 0x00FFFF }, /* 0xEA00 - OxEFFF is unused */
108         {},
109 };
110
111 static const struct intel_mmio_range xehpsdv_lncf_steering_table[] = {
112         { 0x00B000, 0x00B0FF },
113         { 0x00D800, 0x00D8FF },
114         {},
115 };
116
117 static const struct intel_mmio_range dg2_lncf_steering_table[] = {
118         { 0x00B000, 0x00B0FF },
119         { 0x00D880, 0x00D8FF },
120         {},
121 };
122
123 static u16 slicemask(struct intel_gt *gt, int count)
124 {
125         u64 dss_mask = intel_sseu_get_subslices(&gt->info.sseu, 0);
126
127         return intel_slicemask_from_dssmask(dss_mask, count);
128 }
129
130 int intel_gt_init_mmio(struct intel_gt *gt)
131 {
132         struct drm_i915_private *i915 = gt->i915;
133
134         intel_gt_init_clock_frequency(gt);
135
136         intel_uc_init_mmio(&gt->uc);
137         intel_sseu_info_init(gt);
138
139         /*
140          * An mslice is unavailable only if both the meml3 for the slice is
141          * disabled *and* all of the DSS in the slice (quadrant) are disabled.
142          */
143         if (HAS_MSLICES(i915))
144                 gt->info.mslice_mask =
145                         slicemask(gt, GEN_DSS_PER_MSLICE) |
146                         (intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) &
147                          GEN12_MEML3_EN_MASK);
148
149         if (IS_DG2(i915)) {
150                 gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table;
151                 gt->steering_table[LNCF] = dg2_lncf_steering_table;
152         } else if (IS_XEHPSDV(i915)) {
153                 gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table;
154                 gt->steering_table[LNCF] = xehpsdv_lncf_steering_table;
155         } else if (GRAPHICS_VER(i915) >= 11 &&
156                    GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) {
157                 gt->steering_table[L3BANK] = icl_l3bank_steering_table;
158                 gt->info.l3bank_mask =
159                         ~intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) &
160                         GEN10_L3BANK_MASK;
161         } else if (HAS_MSLICES(i915)) {
162                 MISSING_CASE(INTEL_INFO(i915)->platform);
163         }
164
165         return intel_engines_init_mmio(gt);
166 }
167
168 static void init_unused_ring(struct intel_gt *gt, u32 base)
169 {
170         struct intel_uncore *uncore = gt->uncore;
171
172         intel_uncore_write(uncore, RING_CTL(base), 0);
173         intel_uncore_write(uncore, RING_HEAD(base), 0);
174         intel_uncore_write(uncore, RING_TAIL(base), 0);
175         intel_uncore_write(uncore, RING_START(base), 0);
176 }
177
178 static void init_unused_rings(struct intel_gt *gt)
179 {
180         struct drm_i915_private *i915 = gt->i915;
181
182         if (IS_I830(i915)) {
183                 init_unused_ring(gt, PRB1_BASE);
184                 init_unused_ring(gt, SRB0_BASE);
185                 init_unused_ring(gt, SRB1_BASE);
186                 init_unused_ring(gt, SRB2_BASE);
187                 init_unused_ring(gt, SRB3_BASE);
188         } else if (GRAPHICS_VER(i915) == 2) {
189                 init_unused_ring(gt, SRB0_BASE);
190                 init_unused_ring(gt, SRB1_BASE);
191         } else if (GRAPHICS_VER(i915) == 3) {
192                 init_unused_ring(gt, PRB1_BASE);
193                 init_unused_ring(gt, PRB2_BASE);
194         }
195 }
196
197 int intel_gt_init_hw(struct intel_gt *gt)
198 {
199         struct drm_i915_private *i915 = gt->i915;
200         struct intel_uncore *uncore = gt->uncore;
201         int ret;
202
203         gt->last_init_time = ktime_get();
204
205         /* Double layer security blanket, see i915_gem_init() */
206         intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
207
208         if (HAS_EDRAM(i915) && GRAPHICS_VER(i915) < 9)
209                 intel_uncore_rmw(uncore, HSW_IDICR, 0, IDIHASHMSK(0xf));
210
211         if (IS_HASWELL(i915))
212                 intel_uncore_write(uncore,
213                                    MI_PREDICATE_RESULT_2,
214                                    IS_HSW_GT3(i915) ?
215                                    LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
216
217         /* Apply the GT workarounds... */
218         intel_gt_apply_workarounds(gt);
219         /* ...and determine whether they are sticking. */
220         intel_gt_verify_workarounds(gt, "init");
221
222         intel_gt_init_swizzling(gt);
223
224         /*
225          * At least 830 can leave some of the unused rings
226          * "active" (ie. head != tail) after resume which
227          * will prevent c3 entry. Makes sure all unused rings
228          * are totally idle.
229          */
230         init_unused_rings(gt);
231
232         ret = i915_ppgtt_init_hw(gt);
233         if (ret) {
234                 DRM_ERROR("Enabling PPGTT failed (%d)\n", ret);
235                 goto out;
236         }
237
238         /* We can't enable contexts until all firmware is loaded */
239         ret = intel_uc_init_hw(&gt->uc);
240         if (ret) {
241                 i915_probe_error(i915, "Enabling uc failed (%d)\n", ret);
242                 goto out;
243         }
244
245         intel_mocs_init(gt);
246
247 out:
248         intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
249         return ret;
250 }
251
252 static void rmw_set(struct intel_uncore *uncore, i915_reg_t reg, u32 set)
253 {
254         intel_uncore_rmw(uncore, reg, 0, set);
255 }
256
257 static void rmw_clear(struct intel_uncore *uncore, i915_reg_t reg, u32 clr)
258 {
259         intel_uncore_rmw(uncore, reg, clr, 0);
260 }
261
262 static void clear_register(struct intel_uncore *uncore, i915_reg_t reg)
263 {
264         intel_uncore_rmw(uncore, reg, 0, 0);
265 }
266
267 static void gen6_clear_engine_error_register(struct intel_engine_cs *engine)
268 {
269         GEN6_RING_FAULT_REG_RMW(engine, RING_FAULT_VALID, 0);
270         GEN6_RING_FAULT_REG_POSTING_READ(engine);
271 }
272
273 void
274 intel_gt_clear_error_registers(struct intel_gt *gt,
275                                intel_engine_mask_t engine_mask)
276 {
277         struct drm_i915_private *i915 = gt->i915;
278         struct intel_uncore *uncore = gt->uncore;
279         u32 eir;
280
281         if (GRAPHICS_VER(i915) != 2)
282                 clear_register(uncore, PGTBL_ER);
283
284         if (GRAPHICS_VER(i915) < 4)
285                 clear_register(uncore, IPEIR(RENDER_RING_BASE));
286         else
287                 clear_register(uncore, IPEIR_I965);
288
289         clear_register(uncore, EIR);
290         eir = intel_uncore_read(uncore, EIR);
291         if (eir) {
292                 /*
293                  * some errors might have become stuck,
294                  * mask them.
295                  */
296                 DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir);
297                 rmw_set(uncore, EMR, eir);
298                 intel_uncore_write(uncore, GEN2_IIR,
299                                    I915_MASTER_ERROR_INTERRUPT);
300         }
301
302         if (GRAPHICS_VER(i915) >= 12) {
303                 rmw_clear(uncore, GEN12_RING_FAULT_REG, RING_FAULT_VALID);
304                 intel_uncore_posting_read(uncore, GEN12_RING_FAULT_REG);
305         } else if (GRAPHICS_VER(i915) >= 8) {
306                 rmw_clear(uncore, GEN8_RING_FAULT_REG, RING_FAULT_VALID);
307                 intel_uncore_posting_read(uncore, GEN8_RING_FAULT_REG);
308         } else if (GRAPHICS_VER(i915) >= 6) {
309                 struct intel_engine_cs *engine;
310                 enum intel_engine_id id;
311
312                 for_each_engine_masked(engine, gt, engine_mask, id)
313                         gen6_clear_engine_error_register(engine);
314         }
315 }
316
317 static void gen6_check_faults(struct intel_gt *gt)
318 {
319         struct intel_engine_cs *engine;
320         enum intel_engine_id id;
321         u32 fault;
322
323         for_each_engine(engine, gt, id) {
324                 fault = GEN6_RING_FAULT_REG_READ(engine);
325                 if (fault & RING_FAULT_VALID) {
326                         drm_dbg(&engine->i915->drm, "Unexpected fault\n"
327                                 "\tAddr: 0x%08lx\n"
328                                 "\tAddress space: %s\n"
329                                 "\tSource ID: %d\n"
330                                 "\tType: %d\n",
331                                 fault & PAGE_MASK,
332                                 fault & RING_FAULT_GTTSEL_MASK ?
333                                 "GGTT" : "PPGTT",
334                                 RING_FAULT_SRCID(fault),
335                                 RING_FAULT_FAULT_TYPE(fault));
336                 }
337         }
338 }
339
340 static void gen8_check_faults(struct intel_gt *gt)
341 {
342         struct intel_uncore *uncore = gt->uncore;
343         i915_reg_t fault_reg, fault_data0_reg, fault_data1_reg;
344         u32 fault;
345
346         if (GRAPHICS_VER(gt->i915) >= 12) {
347                 fault_reg = GEN12_RING_FAULT_REG;
348                 fault_data0_reg = GEN12_FAULT_TLB_DATA0;
349                 fault_data1_reg = GEN12_FAULT_TLB_DATA1;
350         } else {
351                 fault_reg = GEN8_RING_FAULT_REG;
352                 fault_data0_reg = GEN8_FAULT_TLB_DATA0;
353                 fault_data1_reg = GEN8_FAULT_TLB_DATA1;
354         }
355
356         fault = intel_uncore_read(uncore, fault_reg);
357         if (fault & RING_FAULT_VALID) {
358                 u32 fault_data0, fault_data1;
359                 u64 fault_addr;
360
361                 fault_data0 = intel_uncore_read(uncore, fault_data0_reg);
362                 fault_data1 = intel_uncore_read(uncore, fault_data1_reg);
363
364                 fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) |
365                              ((u64)fault_data0 << 12);
366
367                 drm_dbg(&uncore->i915->drm, "Unexpected fault\n"
368                         "\tAddr: 0x%08x_%08x\n"
369                         "\tAddress space: %s\n"
370                         "\tEngine ID: %d\n"
371                         "\tSource ID: %d\n"
372                         "\tType: %d\n",
373                         upper_32_bits(fault_addr), lower_32_bits(fault_addr),
374                         fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
375                         GEN8_RING_FAULT_ENGINE_ID(fault),
376                         RING_FAULT_SRCID(fault),
377                         RING_FAULT_FAULT_TYPE(fault));
378         }
379 }
380
381 void intel_gt_check_and_clear_faults(struct intel_gt *gt)
382 {
383         struct drm_i915_private *i915 = gt->i915;
384
385         /* From GEN8 onwards we only have one 'All Engine Fault Register' */
386         if (GRAPHICS_VER(i915) >= 8)
387                 gen8_check_faults(gt);
388         else if (GRAPHICS_VER(i915) >= 6)
389                 gen6_check_faults(gt);
390         else
391                 return;
392
393         intel_gt_clear_error_registers(gt, ALL_ENGINES);
394 }
395
396 void intel_gt_flush_ggtt_writes(struct intel_gt *gt)
397 {
398         struct intel_uncore *uncore = gt->uncore;
399         intel_wakeref_t wakeref;
400
401         /*
402          * No actual flushing is required for the GTT write domain for reads
403          * from the GTT domain. Writes to it "immediately" go to main memory
404          * as far as we know, so there's no chipset flush. It also doesn't
405          * land in the GPU render cache.
406          *
407          * However, we do have to enforce the order so that all writes through
408          * the GTT land before any writes to the device, such as updates to
409          * the GATT itself.
410          *
411          * We also have to wait a bit for the writes to land from the GTT.
412          * An uncached read (i.e. mmio) seems to be ideal for the round-trip
413          * timing. This issue has only been observed when switching quickly
414          * between GTT writes and CPU reads from inside the kernel on recent hw,
415          * and it appears to only affect discrete GTT blocks (i.e. on LLC
416          * system agents we cannot reproduce this behaviour, until Cannonlake
417          * that was!).
418          */
419
420         wmb();
421
422         if (INTEL_INFO(gt->i915)->has_coherent_ggtt)
423                 return;
424
425         intel_gt_chipset_flush(gt);
426
427         with_intel_runtime_pm_if_in_use(uncore->rpm, wakeref) {
428                 unsigned long flags;
429
430                 spin_lock_irqsave(&uncore->lock, flags);
431                 intel_uncore_posting_read_fw(uncore,
432                                              RING_HEAD(RENDER_RING_BASE));
433                 spin_unlock_irqrestore(&uncore->lock, flags);
434         }
435 }
436
437 void intel_gt_chipset_flush(struct intel_gt *gt)
438 {
439         wmb();
440         if (GRAPHICS_VER(gt->i915) < 6)
441                 intel_gtt_chipset_flush();
442 }
443
444 void intel_gt_driver_register(struct intel_gt *gt)
445 {
446         intel_rps_driver_register(&gt->rps);
447
448         intel_gt_debugfs_register(gt);
449 }
450
451 static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
452 {
453         struct drm_i915_private *i915 = gt->i915;
454         struct drm_i915_gem_object *obj;
455         struct i915_vma *vma;
456         int ret;
457
458         obj = i915_gem_object_create_lmem(i915, size, I915_BO_ALLOC_VOLATILE);
459         if (IS_ERR(obj))
460                 obj = i915_gem_object_create_stolen(i915, size);
461         if (IS_ERR(obj))
462                 obj = i915_gem_object_create_internal(i915, size);
463         if (IS_ERR(obj)) {
464                 drm_err(&i915->drm, "Failed to allocate scratch page\n");
465                 return PTR_ERR(obj);
466         }
467
468         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
469         if (IS_ERR(vma)) {
470                 ret = PTR_ERR(vma);
471                 goto err_unref;
472         }
473
474         ret = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
475         if (ret)
476                 goto err_unref;
477
478         gt->scratch = i915_vma_make_unshrinkable(vma);
479
480         return 0;
481
482 err_unref:
483         i915_gem_object_put(obj);
484         return ret;
485 }
486
487 static void intel_gt_fini_scratch(struct intel_gt *gt)
488 {
489         i915_vma_unpin_and_release(&gt->scratch, 0);
490 }
491
492 static struct i915_address_space *kernel_vm(struct intel_gt *gt)
493 {
494         if (INTEL_PPGTT(gt->i915) > INTEL_PPGTT_ALIASING)
495                 return &i915_ppgtt_create(gt, I915_BO_ALLOC_PM_EARLY)->vm;
496         else
497                 return i915_vm_get(&gt->ggtt->vm);
498 }
499
500 static int __engines_record_defaults(struct intel_gt *gt)
501 {
502         struct i915_request *requests[I915_NUM_ENGINES] = {};
503         struct intel_engine_cs *engine;
504         enum intel_engine_id id;
505         int err = 0;
506
507         /*
508          * As we reset the gpu during very early sanitisation, the current
509          * register state on the GPU should reflect its defaults values.
510          * We load a context onto the hw (with restore-inhibit), then switch
511          * over to a second context to save that default register state. We
512          * can then prime every new context with that state so they all start
513          * from the same default HW values.
514          */
515
516         for_each_engine(engine, gt, id) {
517                 struct intel_renderstate so;
518                 struct intel_context *ce;
519                 struct i915_request *rq;
520
521                 /* We must be able to switch to something! */
522                 GEM_BUG_ON(!engine->kernel_context);
523
524                 ce = intel_context_create(engine);
525                 if (IS_ERR(ce)) {
526                         err = PTR_ERR(ce);
527                         goto out;
528                 }
529
530                 err = intel_renderstate_init(&so, ce);
531                 if (err)
532                         goto err;
533
534                 rq = i915_request_create(ce);
535                 if (IS_ERR(rq)) {
536                         err = PTR_ERR(rq);
537                         goto err_fini;
538                 }
539
540                 err = intel_engine_emit_ctx_wa(rq);
541                 if (err)
542                         goto err_rq;
543
544                 err = intel_renderstate_emit(&so, rq);
545                 if (err)
546                         goto err_rq;
547
548 err_rq:
549                 requests[id] = i915_request_get(rq);
550                 i915_request_add(rq);
551 err_fini:
552                 intel_renderstate_fini(&so, ce);
553 err:
554                 if (err) {
555                         intel_context_put(ce);
556                         goto out;
557                 }
558         }
559
560         /* Flush the default context image to memory, and enable powersaving. */
561         if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
562                 err = -EIO;
563                 goto out;
564         }
565
566         for (id = 0; id < ARRAY_SIZE(requests); id++) {
567                 struct i915_request *rq;
568                 struct file *state;
569
570                 rq = requests[id];
571                 if (!rq)
572                         continue;
573
574                 if (rq->fence.error) {
575                         err = -EIO;
576                         goto out;
577                 }
578
579                 GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT, &rq->context->flags));
580                 if (!rq->context->state)
581                         continue;
582
583                 /* Keep a copy of the state's backing pages; free the obj */
584                 state = shmem_create_from_object(rq->context->state->obj);
585                 if (IS_ERR(state)) {
586                         err = PTR_ERR(state);
587                         goto out;
588                 }
589                 rq->engine->default_state = state;
590         }
591
592 out:
593         /*
594          * If we have to abandon now, we expect the engines to be idle
595          * and ready to be torn-down. The quickest way we can accomplish
596          * this is by declaring ourselves wedged.
597          */
598         if (err)
599                 intel_gt_set_wedged(gt);
600
601         for (id = 0; id < ARRAY_SIZE(requests); id++) {
602                 struct intel_context *ce;
603                 struct i915_request *rq;
604
605                 rq = requests[id];
606                 if (!rq)
607                         continue;
608
609                 ce = rq->context;
610                 i915_request_put(rq);
611                 intel_context_put(ce);
612         }
613         return err;
614 }
615
616 static int __engines_verify_workarounds(struct intel_gt *gt)
617 {
618         struct intel_engine_cs *engine;
619         enum intel_engine_id id;
620         int err = 0;
621
622         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
623                 return 0;
624
625         for_each_engine(engine, gt, id) {
626                 if (intel_engine_verify_workarounds(engine, "load"))
627                         err = -EIO;
628         }
629
630         /* Flush and restore the kernel context for safety */
631         if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME)
632                 err = -EIO;
633
634         return err;
635 }
636
637 static void __intel_gt_disable(struct intel_gt *gt)
638 {
639         intel_gt_set_wedged_on_fini(gt);
640
641         intel_gt_suspend_prepare(gt);
642         intel_gt_suspend_late(gt);
643
644         GEM_BUG_ON(intel_gt_pm_is_awake(gt));
645 }
646
647 int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout)
648 {
649         long remaining_timeout;
650
651         /* If the device is asleep, we have no requests outstanding */
652         if (!intel_gt_pm_is_awake(gt))
653                 return 0;
654
655         while ((timeout = intel_gt_retire_requests_timeout(gt, timeout,
656                                                            &remaining_timeout)) > 0) {
657                 cond_resched();
658                 if (signal_pending(current))
659                         return -EINTR;
660         }
661
662         return timeout ? timeout : intel_uc_wait_for_idle(&gt->uc,
663                                                           remaining_timeout);
664 }
665
666 int intel_gt_init(struct intel_gt *gt)
667 {
668         int err;
669
670         err = i915_inject_probe_error(gt->i915, -ENODEV);
671         if (err)
672                 return err;
673
674         intel_gt_init_workarounds(gt);
675
676         /*
677          * This is just a security blanket to placate dragons.
678          * On some systems, we very sporadically observe that the first TLBs
679          * used by the CS may be stale, despite us poking the TLB reset. If
680          * we hold the forcewake during initialisation these problems
681          * just magically go away.
682          */
683         intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
684
685         err = intel_gt_init_scratch(gt,
686                                     GRAPHICS_VER(gt->i915) == 2 ? SZ_256K : SZ_4K);
687         if (err)
688                 goto out_fw;
689
690         intel_gt_pm_init(gt);
691
692         gt->vm = kernel_vm(gt);
693         if (!gt->vm) {
694                 err = -ENOMEM;
695                 goto err_pm;
696         }
697
698         intel_set_mocs_index(gt);
699
700         err = intel_engines_init(gt);
701         if (err)
702                 goto err_engines;
703
704         err = intel_uc_init(&gt->uc);
705         if (err)
706                 goto err_engines;
707
708         err = intel_gt_resume(gt);
709         if (err)
710                 goto err_uc_init;
711
712         err = __engines_record_defaults(gt);
713         if (err)
714                 goto err_gt;
715
716         err = __engines_verify_workarounds(gt);
717         if (err)
718                 goto err_gt;
719
720         intel_uc_init_late(&gt->uc);
721
722         err = i915_inject_probe_error(gt->i915, -EIO);
723         if (err)
724                 goto err_gt;
725
726         intel_migrate_init(&gt->migrate, gt);
727
728         intel_pxp_init(&gt->pxp);
729
730         goto out_fw;
731 err_gt:
732         __intel_gt_disable(gt);
733         intel_uc_fini_hw(&gt->uc);
734 err_uc_init:
735         intel_uc_fini(&gt->uc);
736 err_engines:
737         intel_engines_release(gt);
738         i915_vm_put(fetch_and_zero(&gt->vm));
739 err_pm:
740         intel_gt_pm_fini(gt);
741         intel_gt_fini_scratch(gt);
742 out_fw:
743         if (err)
744                 intel_gt_set_wedged_on_init(gt);
745         intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
746         return err;
747 }
748
749 void intel_gt_driver_remove(struct intel_gt *gt)
750 {
751         __intel_gt_disable(gt);
752
753         intel_migrate_fini(&gt->migrate);
754         intel_uc_driver_remove(&gt->uc);
755
756         intel_engines_release(gt);
757
758         intel_gt_flush_buffer_pool(gt);
759 }
760
761 void intel_gt_driver_unregister(struct intel_gt *gt)
762 {
763         intel_wakeref_t wakeref;
764
765         intel_rps_driver_unregister(&gt->rps);
766
767         intel_pxp_fini(&gt->pxp);
768
769         /*
770          * Upon unregistering the device to prevent any new users, cancel
771          * all in-flight requests so that we can quickly unbind the active
772          * resources.
773          */
774         intel_gt_set_wedged_on_fini(gt);
775
776         /* Scrub all HW state upon release */
777         with_intel_runtime_pm(gt->uncore->rpm, wakeref)
778                 __intel_gt_reset(gt, ALL_ENGINES);
779 }
780
781 void intel_gt_driver_release(struct intel_gt *gt)
782 {
783         struct i915_address_space *vm;
784
785         vm = fetch_and_zero(&gt->vm);
786         if (vm) /* FIXME being called twice on error paths :( */
787                 i915_vm_put(vm);
788
789         intel_wa_list_free(&gt->wa_list);
790         intel_gt_pm_fini(gt);
791         intel_gt_fini_scratch(gt);
792         intel_gt_fini_buffer_pool(gt);
793 }
794
795 void intel_gt_driver_late_release(struct intel_gt *gt)
796 {
797         /* We need to wait for inflight RCU frees to release their grip */
798         rcu_barrier();
799
800         intel_uc_driver_late_release(&gt->uc);
801         intel_gt_fini_requests(gt);
802         intel_gt_fini_reset(gt);
803         intel_gt_fini_timelines(gt);
804         intel_engines_free(gt);
805 }
806
807 /**
808  * intel_gt_reg_needs_read_steering - determine whether a register read
809  *     requires explicit steering
810  * @gt: GT structure
811  * @reg: the register to check steering requirements for
812  * @type: type of multicast steering to check
813  *
814  * Determines whether @reg needs explicit steering of a specific type for
815  * reads.
816  *
817  * Returns false if @reg does not belong to a register range of the given
818  * steering type, or if the default (subslice-based) steering IDs are suitable
819  * for @type steering too.
820  */
821 static bool intel_gt_reg_needs_read_steering(struct intel_gt *gt,
822                                              i915_reg_t reg,
823                                              enum intel_steering_type type)
824 {
825         const u32 offset = i915_mmio_reg_offset(reg);
826         const struct intel_mmio_range *entry;
827
828         if (likely(!intel_gt_needs_read_steering(gt, type)))
829                 return false;
830
831         for (entry = gt->steering_table[type]; entry->end; entry++) {
832                 if (offset >= entry->start && offset <= entry->end)
833                         return true;
834         }
835
836         return false;
837 }
838
839 /**
840  * intel_gt_get_valid_steering - determines valid IDs for a class of MCR steering
841  * @gt: GT structure
842  * @type: multicast register type
843  * @sliceid: Slice ID returned
844  * @subsliceid: Subslice ID returned
845  *
846  * Determines sliceid and subsliceid values that will steer reads
847  * of a specific multicast register class to a valid value.
848  */
849 static void intel_gt_get_valid_steering(struct intel_gt *gt,
850                                         enum intel_steering_type type,
851                                         u8 *sliceid, u8 *subsliceid)
852 {
853         switch (type) {
854         case L3BANK:
855                 GEM_DEBUG_WARN_ON(!gt->info.l3bank_mask); /* should be impossible! */
856
857                 *sliceid = 0;           /* unused */
858                 *subsliceid = __ffs(gt->info.l3bank_mask);
859                 break;
860         case MSLICE:
861                 GEM_DEBUG_WARN_ON(!gt->info.mslice_mask); /* should be impossible! */
862
863                 *sliceid = __ffs(gt->info.mslice_mask);
864                 *subsliceid = 0;        /* unused */
865                 break;
866         case LNCF:
867                 GEM_DEBUG_WARN_ON(!gt->info.mslice_mask); /* should be impossible! */
868
869                 /*
870                  * An LNCF is always present if its mslice is present, so we
871                  * can safely just steer to LNCF 0 in all cases.
872                  */
873                 *sliceid = __ffs(gt->info.mslice_mask) << 1;
874                 *subsliceid = 0;        /* unused */
875                 break;
876         default:
877                 MISSING_CASE(type);
878                 *sliceid = 0;
879                 *subsliceid = 0;
880         }
881 }
882
883 /**
884  * intel_gt_read_register_fw - reads a GT register with support for multicast
885  * @gt: GT structure
886  * @reg: register to read
887  *
888  * This function will read a GT register.  If the register is a multicast
889  * register, the read will be steered to a valid instance (i.e., one that
890  * isn't fused off or powered down by power gating).
891  *
892  * Returns the value from a valid instance of @reg.
893  */
894 u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg)
895 {
896         int type;
897         u8 sliceid, subsliceid;
898
899         for (type = 0; type < NUM_STEERING_TYPES; type++) {
900                 if (intel_gt_reg_needs_read_steering(gt, reg, type)) {
901                         intel_gt_get_valid_steering(gt, type, &sliceid,
902                                                     &subsliceid);
903                         return intel_uncore_read_with_mcr_steering_fw(gt->uncore,
904                                                                       reg,
905                                                                       sliceid,
906                                                                       subsliceid);
907                 }
908         }
909
910         return intel_uncore_read_fw(gt->uncore, reg);
911 }
912
913 void intel_gt_info_print(const struct intel_gt_info *info,
914                          struct drm_printer *p)
915 {
916         drm_printf(p, "available engines: %x\n", info->engine_mask);
917
918         intel_sseu_dump(&info->sseu, p);
919 }
920
921 struct reg_and_bit {
922         i915_reg_t reg;
923         u32 bit;
924 };
925
926 static struct reg_and_bit
927 get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
928                 const i915_reg_t *regs, const unsigned int num)
929 {
930         const unsigned int class = engine->class;
931         struct reg_and_bit rb = { };
932
933         if (drm_WARN_ON_ONCE(&engine->i915->drm,
934                              class >= num || !regs[class].reg))
935                 return rb;
936
937         rb.reg = regs[class];
938         if (gen8 && class == VIDEO_DECODE_CLASS)
939                 rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
940         else
941                 rb.bit = engine->instance;
942
943         rb.bit = BIT(rb.bit);
944
945         return rb;
946 }
947
948 void intel_gt_invalidate_tlbs(struct intel_gt *gt)
949 {
950         static const i915_reg_t gen8_regs[] = {
951                 [RENDER_CLASS]                  = GEN8_RTCR,
952                 [VIDEO_DECODE_CLASS]            = GEN8_M1TCR, /* , GEN8_M2TCR */
953                 [VIDEO_ENHANCEMENT_CLASS]       = GEN8_VTCR,
954                 [COPY_ENGINE_CLASS]             = GEN8_BTCR,
955         };
956         static const i915_reg_t gen12_regs[] = {
957                 [RENDER_CLASS]                  = GEN12_GFX_TLB_INV_CR,
958                 [VIDEO_DECODE_CLASS]            = GEN12_VD_TLB_INV_CR,
959                 [VIDEO_ENHANCEMENT_CLASS]       = GEN12_VE_TLB_INV_CR,
960                 [COPY_ENGINE_CLASS]             = GEN12_BLT_TLB_INV_CR,
961         };
962         struct drm_i915_private *i915 = gt->i915;
963         struct intel_uncore *uncore = gt->uncore;
964         struct intel_engine_cs *engine;
965         enum intel_engine_id id;
966         const i915_reg_t *regs;
967         unsigned int num = 0;
968
969         if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
970                 return;
971
972         if (GRAPHICS_VER(i915) == 12) {
973                 regs = gen12_regs;
974                 num = ARRAY_SIZE(gen12_regs);
975         } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
976                 regs = gen8_regs;
977                 num = ARRAY_SIZE(gen8_regs);
978         } else if (GRAPHICS_VER(i915) < 8) {
979                 return;
980         }
981
982         if (drm_WARN_ONCE(&i915->drm, !num,
983                           "Platform does not implement TLB invalidation!"))
984                 return;
985
986         GEM_TRACE("\n");
987
988         assert_rpm_wakelock_held(&i915->runtime_pm);
989
990         mutex_lock(&gt->tlb_invalidate_lock);
991         intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
992
993         for_each_engine(engine, gt, id) {
994                 /*
995                  * HW architecture suggest typical invalidation time at 40us,
996                  * with pessimistic cases up to 100us and a recommendation to
997                  * cap at 1ms. We go a bit higher just in case.
998                  */
999                 const unsigned int timeout_us = 100;
1000                 const unsigned int timeout_ms = 4;
1001                 struct reg_and_bit rb;
1002
1003                 rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
1004                 if (!i915_mmio_reg_offset(rb.reg))
1005                         continue;
1006
1007                 intel_uncore_write_fw(uncore, rb.reg, rb.bit);
1008                 if (__intel_wait_for_register_fw(uncore,
1009                                                  rb.reg, rb.bit, 0,
1010                                                  timeout_us, timeout_ms,
1011                                                  NULL))
1012                         drm_err_ratelimited(&gt->i915->drm,
1013                                             "%s TLB invalidation did not complete in %ums!\n",
1014                                             engine->name, timeout_ms);
1015         }
1016
1017         /*
1018          * Use delayed put since a) we mostly expect a flurry of TLB
1019          * invalidations so it is good to avoid paying the forcewake cost and
1020          * b) it works around a bug in Icelake which cannot cope with too rapid
1021          * transitions.
1022          */
1023         intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
1024         mutex_unlock(&gt->tlb_invalidate_lock);
1025 }