drm/i915: Add support for explicit L3BANK steering
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / gt / intel_workarounds.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2014-2018 Intel Corporation
4  */
5
6 #include "i915_drv.h"
7 #include "intel_context.h"
8 #include "intel_engine_pm.h"
9 #include "intel_gpu_commands.h"
10 #include "intel_gt.h"
11 #include "intel_ring.h"
12 #include "intel_workarounds.h"
13
14 /**
15  * DOC: Hardware workarounds
16  *
17  * This file is intended as a central place to implement most [1]_ of the
18  * required workarounds for hardware to work as originally intended. They fall
19  * in five basic categories depending on how/when they are applied:
20  *
21  * - Workarounds that touch registers that are saved/restored to/from the HW
22  *   context image. The list is emitted (via Load Register Immediate commands)
23  *   everytime a new context is created.
24  * - GT workarounds. The list of these WAs is applied whenever these registers
25  *   revert to default values (on GPU reset, suspend/resume [2]_, etc..).
26  * - Display workarounds. The list is applied during display clock-gating
27  *   initialization.
28  * - Workarounds that whitelist a privileged register, so that UMDs can manage
29  *   them directly. This is just a special case of a MMMIO workaround (as we
30  *   write the list of these to/be-whitelisted registers to some special HW
31  *   registers).
32  * - Workaround batchbuffers, that get executed automatically by the hardware
33  *   on every HW context restore.
34  *
35  * .. [1] Please notice that there are other WAs that, due to their nature,
36  *    cannot be applied from a central place. Those are peppered around the rest
37  *    of the code, as needed.
38  *
39  * .. [2] Technically, some registers are powercontext saved & restored, so they
40  *    survive a suspend/resume. In practice, writing them again is not too
41  *    costly and simplifies things. We can revisit this in the future.
42  *
43  * Layout
44  * ~~~~~~
45  *
46  * Keep things in this file ordered by WA type, as per the above (context, GT,
47  * display, register whitelist, batchbuffer). Then, inside each type, keep the
48  * following order:
49  *
50  * - Infrastructure functions and macros
51  * - WAs per platform in standard gen/chrono order
52  * - Public functions to init or apply the given workaround type.
53  */
54
55 static void wa_init_start(struct i915_wa_list *wal, const char *name, const char *engine_name)
56 {
57         wal->name = name;
58         wal->engine_name = engine_name;
59 }
60
61 #define WA_LIST_CHUNK (1 << 4)
62
63 static void wa_init_finish(struct i915_wa_list *wal)
64 {
65         /* Trim unused entries. */
66         if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
67                 struct i915_wa *list = kmemdup(wal->list,
68                                                wal->count * sizeof(*list),
69                                                GFP_KERNEL);
70
71                 if (list) {
72                         kfree(wal->list);
73                         wal->list = list;
74                 }
75         }
76
77         if (!wal->count)
78                 return;
79
80         DRM_DEBUG_DRIVER("Initialized %u %s workarounds on %s\n",
81                          wal->wa_count, wal->name, wal->engine_name);
82 }
83
84 static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
85 {
86         unsigned int addr = i915_mmio_reg_offset(wa->reg);
87         unsigned int start = 0, end = wal->count;
88         const unsigned int grow = WA_LIST_CHUNK;
89         struct i915_wa *wa_;
90
91         GEM_BUG_ON(!is_power_of_2(grow));
92
93         if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
94                 struct i915_wa *list;
95
96                 list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
97                                      GFP_KERNEL);
98                 if (!list) {
99                         DRM_ERROR("No space for workaround init!\n");
100                         return;
101                 }
102
103                 if (wal->list) {
104                         memcpy(list, wal->list, sizeof(*wa) * wal->count);
105                         kfree(wal->list);
106                 }
107
108                 wal->list = list;
109         }
110
111         while (start < end) {
112                 unsigned int mid = start + (end - start) / 2;
113
114                 if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
115                         start = mid + 1;
116                 } else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
117                         end = mid;
118                 } else {
119                         wa_ = &wal->list[mid];
120
121                         if ((wa->clr | wa_->clr) && !(wa->clr & ~wa_->clr)) {
122                                 DRM_ERROR("Discarding overwritten w/a for reg %04x (clear: %08x, set: %08x)\n",
123                                           i915_mmio_reg_offset(wa_->reg),
124                                           wa_->clr, wa_->set);
125
126                                 wa_->set &= ~wa->clr;
127                         }
128
129                         wal->wa_count++;
130                         wa_->set |= wa->set;
131                         wa_->clr |= wa->clr;
132                         wa_->read |= wa->read;
133                         return;
134                 }
135         }
136
137         wal->wa_count++;
138         wa_ = &wal->list[wal->count++];
139         *wa_ = *wa;
140
141         while (wa_-- > wal->list) {
142                 GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) ==
143                            i915_mmio_reg_offset(wa_[1].reg));
144                 if (i915_mmio_reg_offset(wa_[1].reg) >
145                     i915_mmio_reg_offset(wa_[0].reg))
146                         break;
147
148                 swap(wa_[1], wa_[0]);
149         }
150 }
151
152 static void wa_add(struct i915_wa_list *wal, i915_reg_t reg,
153                    u32 clear, u32 set, u32 read_mask)
154 {
155         struct i915_wa wa = {
156                 .reg  = reg,
157                 .clr  = clear,
158                 .set  = set,
159                 .read = read_mask,
160         };
161
162         _wa_add(wal, &wa);
163 }
164
165 static void
166 wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
167 {
168         wa_add(wal, reg, clear, set, clear);
169 }
170
171 static void
172 wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
173 {
174         wa_write_clr_set(wal, reg, ~0, set);
175 }
176
177 static void
178 wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
179 {
180         wa_write_clr_set(wal, reg, set, set);
181 }
182
183 static void
184 wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
185 {
186         wa_write_clr_set(wal, reg, clr, 0);
187 }
188
189 /*
190  * WA operations on "masked register". A masked register has the upper 16 bits
191  * documented as "masked" in b-spec. Its purpose is to allow writing to just a
192  * portion of the register without a rmw: you simply write in the upper 16 bits
193  * the mask of bits you are going to modify.
194  *
195  * The wa_masked_* family of functions already does the necessary operations to
196  * calculate the mask based on the parameters passed, so user only has to
197  * provide the lower 16 bits of that register.
198  */
199
200 static void
201 wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
202 {
203         wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val);
204 }
205
206 static void
207 wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
208 {
209         wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val);
210 }
211
212 static void
213 wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
214                     u32 mask, u32 val)
215 {
216         wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask);
217 }
218
219 static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine,
220                                       struct i915_wa_list *wal)
221 {
222         wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
223 }
224
225 static void gen7_ctx_workarounds_init(struct intel_engine_cs *engine,
226                                       struct i915_wa_list *wal)
227 {
228         wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
229 }
230
231 static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
232                                       struct i915_wa_list *wal)
233 {
234         wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
235
236         /* WaDisableAsyncFlipPerfMode:bdw,chv */
237         wa_masked_en(wal, MI_MODE, ASYNC_FLIP_PERF_DISABLE);
238
239         /* WaDisablePartialInstShootdown:bdw,chv */
240         wa_masked_en(wal, GEN8_ROW_CHICKEN,
241                      PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
242
243         /* Use Force Non-Coherent whenever executing a 3D context. This is a
244          * workaround for a possible hang in the unlikely event a TLB
245          * invalidation occurs during a PSD flush.
246          */
247         /* WaForceEnableNonCoherent:bdw,chv */
248         /* WaHdcDisableFetchWhenMasked:bdw,chv */
249         wa_masked_en(wal, HDC_CHICKEN0,
250                      HDC_DONOT_FETCH_MEM_WHEN_MASKED |
251                      HDC_FORCE_NON_COHERENT);
252
253         /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
254          * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
255          *  polygons in the same 8x4 pixel/sample area to be processed without
256          *  stalling waiting for the earlier ones to write to Hierarchical Z
257          *  buffer."
258          *
259          * This optimization is off by default for BDW and CHV; turn it on.
260          */
261         wa_masked_dis(wal, CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
262
263         /* Wa4x4STCOptimizationDisable:bdw,chv */
264         wa_masked_en(wal, CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
265
266         /*
267          * BSpec recommends 8x4 when MSAA is used,
268          * however in practice 16x4 seems fastest.
269          *
270          * Note that PS/WM thread counts depend on the WIZ hashing
271          * disable bit, which we don't touch here, but it's good
272          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
273          */
274         wa_masked_field_set(wal, GEN7_GT_MODE,
275                             GEN6_WIZ_HASHING_MASK,
276                             GEN6_WIZ_HASHING_16x4);
277 }
278
279 static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
280                                      struct i915_wa_list *wal)
281 {
282         struct drm_i915_private *i915 = engine->i915;
283
284         gen8_ctx_workarounds_init(engine, wal);
285
286         /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
287         wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
288
289         /* WaDisableDopClockGating:bdw
290          *
291          * Also see the related UCGTCL1 write in bdw_init_clock_gating()
292          * to disable EUTC clock gating.
293          */
294         wa_masked_en(wal, GEN7_ROW_CHICKEN2,
295                      DOP_CLOCK_GATING_DISABLE);
296
297         wa_masked_en(wal, HALF_SLICE_CHICKEN3,
298                      GEN8_SAMPLER_POWER_BYPASS_DIS);
299
300         wa_masked_en(wal, HDC_CHICKEN0,
301                      /* WaForceContextSaveRestoreNonCoherent:bdw */
302                      HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
303                      /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
304                      (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
305 }
306
307 static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
308                                      struct i915_wa_list *wal)
309 {
310         gen8_ctx_workarounds_init(engine, wal);
311
312         /* WaDisableThreadStallDopClockGating:chv */
313         wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
314
315         /* Improve HiZ throughput on CHV. */
316         wa_masked_en(wal, HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
317 }
318
319 static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
320                                       struct i915_wa_list *wal)
321 {
322         struct drm_i915_private *i915 = engine->i915;
323
324         if (HAS_LLC(i915)) {
325                 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
326                  *
327                  * Must match Display Engine. See
328                  * WaCompressedResourceDisplayNewHashMode.
329                  */
330                 wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
331                              GEN9_PBE_COMPRESSED_HASH_SELECTION);
332                 wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
333                              GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
334         }
335
336         /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
337         /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
338         wa_masked_en(wal, GEN8_ROW_CHICKEN,
339                      FLOW_CONTROL_ENABLE |
340                      PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
341
342         /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
343         /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
344         wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
345                      GEN9_ENABLE_YV12_BUGFIX |
346                      GEN9_ENABLE_GPGPU_PREEMPTION);
347
348         /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
349         /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
350         wa_masked_en(wal, CACHE_MODE_1,
351                      GEN8_4x4_STC_OPTIMIZATION_DISABLE |
352                      GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
353
354         /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
355         wa_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5,
356                       GEN9_CCS_TLB_PREFETCH_ENABLE);
357
358         /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
359         wa_masked_en(wal, HDC_CHICKEN0,
360                      HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
361                      HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
362
363         /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
364          * both tied to WaForceContextSaveRestoreNonCoherent
365          * in some hsds for skl. We keep the tie for all gen9. The
366          * documentation is a bit hazy and so we want to get common behaviour,
367          * even though there is no clear evidence we would need both on kbl/bxt.
368          * This area has been source of system hangs so we play it safe
369          * and mimic the skl regardless of what bspec says.
370          *
371          * Use Force Non-Coherent whenever executing a 3D context. This
372          * is a workaround for a possible hang in the unlikely event
373          * a TLB invalidation occurs during a PSD flush.
374          */
375
376         /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
377         wa_masked_en(wal, HDC_CHICKEN0,
378                      HDC_FORCE_NON_COHERENT);
379
380         /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
381         if (IS_SKYLAKE(i915) ||
382             IS_KABYLAKE(i915) ||
383             IS_COFFEELAKE(i915) ||
384             IS_COMETLAKE(i915))
385                 wa_masked_en(wal, HALF_SLICE_CHICKEN3,
386                              GEN8_SAMPLER_POWER_BYPASS_DIS);
387
388         /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
389         wa_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
390
391         /*
392          * Supporting preemption with fine-granularity requires changes in the
393          * batch buffer programming. Since we can't break old userspace, we
394          * need to set our default preemption level to safe value. Userspace is
395          * still able to use more fine-grained preemption levels, since in
396          * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
397          * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
398          * not real HW workarounds, but merely a way to start using preemption
399          * while maintaining old contract with userspace.
400          */
401
402         /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
403         wa_masked_dis(wal, GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
404
405         /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
406         wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
407                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
408                             GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
409
410         /* WaClearHIZ_WM_CHICKEN3:bxt,glk */
411         if (IS_GEN9_LP(i915))
412                 wa_masked_en(wal, GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
413 }
414
415 static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
416                                 struct i915_wa_list *wal)
417 {
418         struct intel_gt *gt = engine->gt;
419         u8 vals[3] = { 0, 0, 0 };
420         unsigned int i;
421
422         for (i = 0; i < 3; i++) {
423                 u8 ss;
424
425                 /*
426                  * Only consider slices where one, and only one, subslice has 7
427                  * EUs
428                  */
429                 if (!is_power_of_2(gt->info.sseu.subslice_7eu[i]))
430                         continue;
431
432                 /*
433                  * subslice_7eu[i] != 0 (because of the check above) and
434                  * ss_max == 4 (maximum number of subslices possible per slice)
435                  *
436                  * ->    0 <= ss <= 3;
437                  */
438                 ss = ffs(gt->info.sseu.subslice_7eu[i]) - 1;
439                 vals[i] = 3 - ss;
440         }
441
442         if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
443                 return;
444
445         /* Tune IZ hashing. See intel_device_info_runtime_init() */
446         wa_masked_field_set(wal, GEN7_GT_MODE,
447                             GEN9_IZ_HASHING_MASK(2) |
448                             GEN9_IZ_HASHING_MASK(1) |
449                             GEN9_IZ_HASHING_MASK(0),
450                             GEN9_IZ_HASHING(2, vals[2]) |
451                             GEN9_IZ_HASHING(1, vals[1]) |
452                             GEN9_IZ_HASHING(0, vals[0]));
453 }
454
455 static void skl_ctx_workarounds_init(struct intel_engine_cs *engine,
456                                      struct i915_wa_list *wal)
457 {
458         gen9_ctx_workarounds_init(engine, wal);
459         skl_tune_iz_hashing(engine, wal);
460 }
461
462 static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
463                                      struct i915_wa_list *wal)
464 {
465         gen9_ctx_workarounds_init(engine, wal);
466
467         /* WaDisableThreadStallDopClockGating:bxt */
468         wa_masked_en(wal, GEN8_ROW_CHICKEN,
469                      STALL_DOP_GATING_DISABLE);
470
471         /* WaToEnableHwFixForPushConstHWBug:bxt */
472         wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
473                      GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
474 }
475
476 static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
477                                      struct i915_wa_list *wal)
478 {
479         struct drm_i915_private *i915 = engine->i915;
480
481         gen9_ctx_workarounds_init(engine, wal);
482
483         /* WaToEnableHwFixForPushConstHWBug:kbl */
484         if (IS_KBL_GT_STEP(i915, STEP_C0, STEP_FOREVER))
485                 wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
486                              GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
487
488         /* WaDisableSbeCacheDispatchPortSharing:kbl */
489         wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1,
490                      GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
491 }
492
493 static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
494                                      struct i915_wa_list *wal)
495 {
496         gen9_ctx_workarounds_init(engine, wal);
497
498         /* WaToEnableHwFixForPushConstHWBug:glk */
499         wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
500                      GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
501 }
502
503 static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
504                                      struct i915_wa_list *wal)
505 {
506         gen9_ctx_workarounds_init(engine, wal);
507
508         /* WaToEnableHwFixForPushConstHWBug:cfl */
509         wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
510                      GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
511
512         /* WaDisableSbeCacheDispatchPortSharing:cfl */
513         wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1,
514                      GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
515 }
516
517 static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine,
518                                      struct i915_wa_list *wal)
519 {
520         /* WaForceContextSaveRestoreNonCoherent:cnl */
521         wa_masked_en(wal, CNL_HDC_CHICKEN0,
522                      HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
523
524         /* WaDisableReplayBufferBankArbitrationOptimization:cnl */
525         wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
526                      GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
527
528         /* WaPushConstantDereferenceHoldDisable:cnl */
529         wa_masked_en(wal, GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
530
531         /* FtrEnableFastAnisoL1BankingFix:cnl */
532         wa_masked_en(wal, HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
533
534         /* WaDisable3DMidCmdPreemption:cnl */
535         wa_masked_dis(wal, GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
536
537         /* WaDisableGPGPUMidCmdPreemption:cnl */
538         wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
539                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
540                             GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
541
542         /* WaDisableEarlyEOT:cnl */
543         wa_masked_en(wal, GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
544 }
545
546 static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
547                                      struct i915_wa_list *wal)
548 {
549         struct drm_i915_private *i915 = engine->i915;
550
551         /* WaDisableBankHangMode:icl */
552         wa_write(wal,
553                  GEN8_L3CNTLREG,
554                  intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
555                  GEN8_ERRDETBCTRL);
556
557         /* Wa_1604370585:icl (pre-prod)
558          * Formerly known as WaPushConstantDereferenceHoldDisable
559          */
560         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
561                 wa_masked_en(wal, GEN7_ROW_CHICKEN2,
562                              PUSH_CONSTANT_DEREF_DISABLE);
563
564         /* WaForceEnableNonCoherent:icl
565          * This is not the same workaround as in early Gen9 platforms, where
566          * lacking this could cause system hangs, but coherency performance
567          * overhead is high and only a few compute workloads really need it
568          * (the register is whitelisted in hardware now, so UMDs can opt in
569          * for coherency if they have a good reason).
570          */
571         wa_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
572
573         /* Wa_2006611047:icl (pre-prod)
574          * Formerly known as WaDisableImprovedTdlClkGating
575          */
576         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
577                 wa_masked_en(wal, GEN7_ROW_CHICKEN2,
578                              GEN11_TDL_CLOCK_GATING_FIX_DISABLE);
579
580         /* Wa_2006665173:icl (pre-prod) */
581         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
582                 wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
583                              GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC);
584
585         /* WaEnableFloatBlendOptimization:icl */
586         wa_write_clr_set(wal,
587                          GEN10_CACHE_MODE_SS,
588                          0, /* write-only, so skip validation */
589                          _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE));
590
591         /* WaDisableGPGPUMidThreadPreemption:icl */
592         wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
593                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
594                             GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
595
596         /* allow headerless messages for preemptible GPGPU context */
597         wa_masked_en(wal, GEN10_SAMPLER_MODE,
598                      GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
599
600         /* Wa_1604278689:icl,ehl */
601         wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID);
602         wa_write_clr_set(wal, IVB_FBC_RT_BASE_UPPER,
603                          0, /* write-only register; skip validation */
604                          0xFFFFFFFF);
605
606         /* Wa_1406306137:icl,ehl */
607         wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
608 }
609
610 /*
611  * These settings aren't actually workarounds, but general tuning settings that
612  * need to be programmed on several platforms.
613  */
614 static void gen12_ctx_gt_tuning_init(struct intel_engine_cs *engine,
615                                      struct i915_wa_list *wal)
616 {
617         /*
618          * Although some platforms refer to it as Wa_1604555607, we need to
619          * program it even on those that don't explicitly list that
620          * workaround.
621          *
622          * Note that the programming of this register is further modified
623          * according to the FF_MODE2 guidance given by Wa_1608008084:gen12.
624          * Wa_1608008084 tells us the FF_MODE2 register will return the wrong
625          * value when read. The default value for this register is zero for all
626          * fields and there are no bit masks. So instead of doing a RMW we
627          * should just write TDS timer value. For the same reason read
628          * verification is ignored.
629          */
630         wa_add(wal,
631                FF_MODE2,
632                FF_MODE2_TDS_TIMER_MASK,
633                FF_MODE2_TDS_TIMER_128,
634                0);
635 }
636
637 static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
638                                        struct i915_wa_list *wal)
639 {
640         gen12_ctx_gt_tuning_init(engine, wal);
641
642         /*
643          * Wa_1409142259:tgl,dg1,adl-p
644          * Wa_1409347922:tgl,dg1,adl-p
645          * Wa_1409252684:tgl,dg1,adl-p
646          * Wa_1409217633:tgl,dg1,adl-p
647          * Wa_1409207793:tgl,dg1,adl-p
648          * Wa_1409178076:tgl,dg1,adl-p
649          * Wa_1408979724:tgl,dg1,adl-p
650          * Wa_14010443199:tgl,rkl,dg1,adl-p
651          * Wa_14010698770:tgl,rkl,dg1,adl-s,adl-p
652          * Wa_1409342910:tgl,rkl,dg1,adl-s,adl-p
653          */
654         wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
655                      GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
656
657         /* WaDisableGPGPUMidThreadPreemption:gen12 */
658         wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
659                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
660                             GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
661
662         /*
663          * Wa_16011163337
664          *
665          * Like in gen12_ctx_gt_tuning_init(), read verification is ignored due
666          * to Wa_1608008084.
667          */
668         wa_add(wal,
669                FF_MODE2,
670                FF_MODE2_GS_TIMER_MASK,
671                FF_MODE2_GS_TIMER_224,
672                0);
673 }
674
675 static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine,
676                                      struct i915_wa_list *wal)
677 {
678         gen12_ctx_workarounds_init(engine, wal);
679
680         /* Wa_1409044764 */
681         wa_masked_dis(wal, GEN11_COMMON_SLICE_CHICKEN3,
682                       DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN);
683
684         /* Wa_22010493298 */
685         wa_masked_en(wal, HIZ_CHICKEN,
686                      DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE);
687 }
688
689 static void
690 __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
691                            struct i915_wa_list *wal,
692                            const char *name)
693 {
694         struct drm_i915_private *i915 = engine->i915;
695
696         if (engine->class != RENDER_CLASS)
697                 return;
698
699         wa_init_start(wal, name, engine->name);
700
701         if (IS_DG1(i915))
702                 dg1_ctx_workarounds_init(engine, wal);
703         else if (GRAPHICS_VER(i915) == 12)
704                 gen12_ctx_workarounds_init(engine, wal);
705         else if (GRAPHICS_VER(i915) == 11)
706                 icl_ctx_workarounds_init(engine, wal);
707         else if (IS_CANNONLAKE(i915))
708                 cnl_ctx_workarounds_init(engine, wal);
709         else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
710                 cfl_ctx_workarounds_init(engine, wal);
711         else if (IS_GEMINILAKE(i915))
712                 glk_ctx_workarounds_init(engine, wal);
713         else if (IS_KABYLAKE(i915))
714                 kbl_ctx_workarounds_init(engine, wal);
715         else if (IS_BROXTON(i915))
716                 bxt_ctx_workarounds_init(engine, wal);
717         else if (IS_SKYLAKE(i915))
718                 skl_ctx_workarounds_init(engine, wal);
719         else if (IS_CHERRYVIEW(i915))
720                 chv_ctx_workarounds_init(engine, wal);
721         else if (IS_BROADWELL(i915))
722                 bdw_ctx_workarounds_init(engine, wal);
723         else if (GRAPHICS_VER(i915) == 7)
724                 gen7_ctx_workarounds_init(engine, wal);
725         else if (GRAPHICS_VER(i915) == 6)
726                 gen6_ctx_workarounds_init(engine, wal);
727         else if (GRAPHICS_VER(i915) < 8)
728                 ;
729         else
730                 MISSING_CASE(GRAPHICS_VER(i915));
731
732         wa_init_finish(wal);
733 }
734
735 void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
736 {
737         __intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context");
738 }
739
740 int intel_engine_emit_ctx_wa(struct i915_request *rq)
741 {
742         struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
743         struct i915_wa *wa;
744         unsigned int i;
745         u32 *cs;
746         int ret;
747
748         if (wal->count == 0)
749                 return 0;
750
751         ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
752         if (ret)
753                 return ret;
754
755         cs = intel_ring_begin(rq, (wal->count * 2 + 2));
756         if (IS_ERR(cs))
757                 return PTR_ERR(cs);
758
759         *cs++ = MI_LOAD_REGISTER_IMM(wal->count);
760         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
761                 *cs++ = i915_mmio_reg_offset(wa->reg);
762                 *cs++ = wa->set;
763         }
764         *cs++ = MI_NOOP;
765
766         intel_ring_advance(rq, cs);
767
768         ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
769         if (ret)
770                 return ret;
771
772         return 0;
773 }
774
775 static void
776 gen4_gt_workarounds_init(struct drm_i915_private *i915,
777                          struct i915_wa_list *wal)
778 {
779         /* WaDisable_RenderCache_OperationalFlush:gen4,ilk */
780         wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
781 }
782
783 static void
784 g4x_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
785 {
786         gen4_gt_workarounds_init(i915, wal);
787
788         /* WaDisableRenderCachePipelinedFlush:g4x,ilk */
789         wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE);
790 }
791
792 static void
793 ilk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
794 {
795         g4x_gt_workarounds_init(i915, wal);
796
797         wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED);
798 }
799
800 static void
801 snb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
802 {
803 }
804
805 static void
806 ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
807 {
808         /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
809         wa_masked_dis(wal,
810                       GEN7_COMMON_SLICE_CHICKEN1,
811                       GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
812
813         /* WaApplyL3ControlAndL3ChickenMode:ivb */
814         wa_write(wal, GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL);
815         wa_write(wal, GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE);
816
817         /* WaForceL3Serialization:ivb */
818         wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
819 }
820
821 static void
822 vlv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
823 {
824         /* WaForceL3Serialization:vlv */
825         wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
826
827         /*
828          * WaIncreaseL3CreditsForVLVB0:vlv
829          * This is the hardware default actually.
830          */
831         wa_write(wal, GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
832 }
833
834 static void
835 hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
836 {
837         /* L3 caching of data atomics doesn't work -- disable it. */
838         wa_write(wal, HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
839
840         wa_add(wal,
841                HSW_ROW_CHICKEN3, 0,
842                _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE),
843                 0 /* XXX does this reg exist? */);
844
845         /* WaVSRefCountFullforceMissDisable:hsw */
846         wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME);
847 }
848
849 static void
850 gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
851 {
852         /* WaDisableKillLogic:bxt,skl,kbl */
853         if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915))
854                 wa_write_or(wal,
855                             GAM_ECOCHK,
856                             ECOCHK_DIS_TLB);
857
858         if (HAS_LLC(i915)) {
859                 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
860                  *
861                  * Must match Display Engine. See
862                  * WaCompressedResourceDisplayNewHashMode.
863                  */
864                 wa_write_or(wal,
865                             MMCD_MISC_CTRL,
866                             MMCD_PCLA | MMCD_HOTSPOT_EN);
867         }
868
869         /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
870         wa_write_or(wal,
871                     GAM_ECOCHK,
872                     BDW_DISABLE_HDC_INVALIDATION);
873 }
874
875 static void
876 skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
877 {
878         gen9_gt_workarounds_init(i915, wal);
879
880         /* WaDisableGafsUnitClkGating:skl */
881         wa_write_or(wal,
882                     GEN7_UCGCTL4,
883                     GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
884
885         /* WaInPlaceDecompressionHang:skl */
886         if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER))
887                 wa_write_or(wal,
888                             GEN9_GAMT_ECO_REG_RW_IA,
889                             GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
890 }
891
892 static void
893 bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
894 {
895         gen9_gt_workarounds_init(i915, wal);
896
897         /* WaInPlaceDecompressionHang:bxt */
898         wa_write_or(wal,
899                     GEN9_GAMT_ECO_REG_RW_IA,
900                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
901 }
902
903 static void
904 kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
905 {
906         gen9_gt_workarounds_init(i915, wal);
907
908         /* WaDisableDynamicCreditSharing:kbl */
909         if (IS_KBL_GT_STEP(i915, 0, STEP_B0))
910                 wa_write_or(wal,
911                             GAMT_CHKN_BIT_REG,
912                             GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
913
914         /* WaDisableGafsUnitClkGating:kbl */
915         wa_write_or(wal,
916                     GEN7_UCGCTL4,
917                     GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
918
919         /* WaInPlaceDecompressionHang:kbl */
920         wa_write_or(wal,
921                     GEN9_GAMT_ECO_REG_RW_IA,
922                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
923 }
924
925 static void
926 glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
927 {
928         gen9_gt_workarounds_init(i915, wal);
929 }
930
931 static void
932 cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
933 {
934         gen9_gt_workarounds_init(i915, wal);
935
936         /* WaDisableGafsUnitClkGating:cfl */
937         wa_write_or(wal,
938                     GEN7_UCGCTL4,
939                     GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
940
941         /* WaInPlaceDecompressionHang:cfl */
942         wa_write_or(wal,
943                     GEN9_GAMT_ECO_REG_RW_IA,
944                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
945 }
946
947 static void
948 icl_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
949 {
950         const struct sseu_dev_info *sseu = &i915->gt.info.sseu;
951         unsigned int slice, subslice;
952         u32 mcr, mcr_mask;
953
954         GEM_BUG_ON(GRAPHICS_VER(i915) < 11);
955         GEM_BUG_ON(hweight8(sseu->slice_mask) > 1);
956         slice = 0;
957
958         /*
959          * Although a platform may have subslices, we need to always steer
960          * reads to the lowest instance that isn't fused off.  When Render
961          * Power Gating is enabled, grabbing forcewake will only power up a
962          * single subslice (the "minconfig") if there isn't a real workload
963          * that needs to be run; this means that if we steer register reads to
964          * one of the higher subslices, we run the risk of reading back 0's or
965          * random garbage.
966          */
967         subslice = __ffs(intel_sseu_get_subslices(sseu, slice));
968
969         /*
970          * If the subslice we picked above also steers us to a valid L3 bank,
971          * then we can just rely on the default steering and won't need to
972          * worry about explicitly re-steering L3BANK reads later.
973          */
974         if (i915->gt.info.l3bank_mask & BIT(subslice))
975                 i915->gt.steering_table[L3BANK] = NULL;
976
977         mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
978         mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
979
980         drm_dbg(&i915->drm, "MCR slice/subslice = %x\n", mcr);
981
982         wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
983 }
984
985 static void
986 cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
987 {
988         /* WaInPlaceDecompressionHang:cnl */
989         wa_write_or(wal,
990                     GEN9_GAMT_ECO_REG_RW_IA,
991                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
992 }
993
994 static void
995 icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
996 {
997         icl_wa_init_mcr(i915, wal);
998
999         /* WaInPlaceDecompressionHang:icl */
1000         wa_write_or(wal,
1001                     GEN9_GAMT_ECO_REG_RW_IA,
1002                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1003
1004         /* WaModifyGamTlbPartitioning:icl */
1005         wa_write_clr_set(wal,
1006                          GEN11_GACB_PERF_CTRL,
1007                          GEN11_HASH_CTRL_MASK,
1008                          GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
1009
1010         /* Wa_1405766107:icl
1011          * Formerly known as WaCL2SFHalfMaxAlloc
1012          */
1013         wa_write_or(wal,
1014                     GEN11_LSN_UNSLCVC,
1015                     GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
1016                     GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
1017
1018         /* Wa_220166154:icl
1019          * Formerly known as WaDisCtxReload
1020          */
1021         wa_write_or(wal,
1022                     GEN8_GAMW_ECO_DEV_RW_IA,
1023                     GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
1024
1025         /* Wa_1405779004:icl (pre-prod) */
1026         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
1027                 wa_write_or(wal,
1028                             SLICE_UNIT_LEVEL_CLKGATE,
1029                             MSCUNIT_CLKGATE_DIS);
1030
1031         /* Wa_1406838659:icl (pre-prod) */
1032         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
1033                 wa_write_or(wal,
1034                             INF_UNIT_LEVEL_CLKGATE,
1035                             CGPSF_CLKGATE_DIS);
1036
1037         /* Wa_1406463099:icl
1038          * Formerly known as WaGamTlbPendError
1039          */
1040         wa_write_or(wal,
1041                     GAMT_CHKN_BIT_REG,
1042                     GAMT_CHKN_DISABLE_L3_COH_PIPE);
1043
1044         /* Wa_1607087056:icl,ehl,jsl */
1045         if (IS_ICELAKE(i915) ||
1046             IS_JSL_EHL_REVID(i915, EHL_REVID_A0, EHL_REVID_A0))
1047                 wa_write_or(wal,
1048                             SLICE_UNIT_LEVEL_CLKGATE,
1049                             L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
1050 }
1051
1052 /*
1053  * Though there are per-engine instances of these registers,
1054  * they retain their value through engine resets and should
1055  * only be provided on the GT workaround list rather than
1056  * the engine-specific workaround list.
1057  */
1058 static void
1059 wa_14011060649(struct drm_i915_private *i915, struct i915_wa_list *wal)
1060 {
1061         struct intel_engine_cs *engine;
1062         struct intel_gt *gt = &i915->gt;
1063         int id;
1064
1065         for_each_engine(engine, gt, id) {
1066                 if (engine->class != VIDEO_DECODE_CLASS ||
1067                     (engine->instance % 2))
1068                         continue;
1069
1070                 wa_write_or(wal, VDBOX_CGCTL3F10(engine->mmio_base),
1071                             IECPUNIT_CLKGATE_DIS);
1072         }
1073 }
1074
1075 static void
1076 gen12_gt_workarounds_init(struct drm_i915_private *i915,
1077                           struct i915_wa_list *wal)
1078 {
1079         icl_wa_init_mcr(i915, wal);
1080
1081         /* Wa_14011060649:tgl,rkl,dg1,adls,adl-p */
1082         wa_14011060649(i915, wal);
1083 }
1084
1085 static void
1086 tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
1087 {
1088         gen12_gt_workarounds_init(i915, wal);
1089
1090         /* Wa_1409420604:tgl */
1091         if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_A0))
1092                 wa_write_or(wal,
1093                             SUBSLICE_UNIT_LEVEL_CLKGATE2,
1094                             CPSSUNIT_CLKGATE_DIS);
1095
1096         /* Wa_1607087056:tgl also know as BUG:1409180338 */
1097         if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_A0))
1098                 wa_write_or(wal,
1099                             SLICE_UNIT_LEVEL_CLKGATE,
1100                             L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
1101
1102         /* Wa_1408615072:tgl[a0] */
1103         if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_A0))
1104                 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
1105                             VSUNIT_CLKGATE_DIS_TGL);
1106 }
1107
1108 static void
1109 dg1_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
1110 {
1111         gen12_gt_workarounds_init(i915, wal);
1112
1113         /* Wa_1607087056:dg1 */
1114         if (IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0))
1115                 wa_write_or(wal,
1116                             SLICE_UNIT_LEVEL_CLKGATE,
1117                             L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
1118
1119         /* Wa_1409420604:dg1 */
1120         if (IS_DG1(i915))
1121                 wa_write_or(wal,
1122                             SUBSLICE_UNIT_LEVEL_CLKGATE2,
1123                             CPSSUNIT_CLKGATE_DIS);
1124
1125         /* Wa_1408615072:dg1 */
1126         /* Empirical testing shows this register is unaffected by engine reset. */
1127         if (IS_DG1(i915))
1128                 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
1129                             VSUNIT_CLKGATE_DIS_TGL);
1130 }
1131
1132 static void
1133 gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
1134 {
1135         if (IS_DG1(i915))
1136                 dg1_gt_workarounds_init(i915, wal);
1137         else if (IS_TIGERLAKE(i915))
1138                 tgl_gt_workarounds_init(i915, wal);
1139         else if (GRAPHICS_VER(i915) == 12)
1140                 gen12_gt_workarounds_init(i915, wal);
1141         else if (GRAPHICS_VER(i915) == 11)
1142                 icl_gt_workarounds_init(i915, wal);
1143         else if (IS_CANNONLAKE(i915))
1144                 cnl_gt_workarounds_init(i915, wal);
1145         else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
1146                 cfl_gt_workarounds_init(i915, wal);
1147         else if (IS_GEMINILAKE(i915))
1148                 glk_gt_workarounds_init(i915, wal);
1149         else if (IS_KABYLAKE(i915))
1150                 kbl_gt_workarounds_init(i915, wal);
1151         else if (IS_BROXTON(i915))
1152                 bxt_gt_workarounds_init(i915, wal);
1153         else if (IS_SKYLAKE(i915))
1154                 skl_gt_workarounds_init(i915, wal);
1155         else if (IS_HASWELL(i915))
1156                 hsw_gt_workarounds_init(i915, wal);
1157         else if (IS_VALLEYVIEW(i915))
1158                 vlv_gt_workarounds_init(i915, wal);
1159         else if (IS_IVYBRIDGE(i915))
1160                 ivb_gt_workarounds_init(i915, wal);
1161         else if (GRAPHICS_VER(i915) == 6)
1162                 snb_gt_workarounds_init(i915, wal);
1163         else if (GRAPHICS_VER(i915) == 5)
1164                 ilk_gt_workarounds_init(i915, wal);
1165         else if (IS_G4X(i915))
1166                 g4x_gt_workarounds_init(i915, wal);
1167         else if (GRAPHICS_VER(i915) == 4)
1168                 gen4_gt_workarounds_init(i915, wal);
1169         else if (GRAPHICS_VER(i915) <= 8)
1170                 ;
1171         else
1172                 MISSING_CASE(GRAPHICS_VER(i915));
1173 }
1174
1175 void intel_gt_init_workarounds(struct drm_i915_private *i915)
1176 {
1177         struct i915_wa_list *wal = &i915->gt_wa_list;
1178
1179         wa_init_start(wal, "GT", "global");
1180         gt_init_workarounds(i915, wal);
1181         wa_init_finish(wal);
1182 }
1183
1184 static enum forcewake_domains
1185 wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
1186 {
1187         enum forcewake_domains fw = 0;
1188         struct i915_wa *wa;
1189         unsigned int i;
1190
1191         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1192                 fw |= intel_uncore_forcewake_for_reg(uncore,
1193                                                      wa->reg,
1194                                                      FW_REG_READ |
1195                                                      FW_REG_WRITE);
1196
1197         return fw;
1198 }
1199
1200 static bool
1201 wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
1202 {
1203         if ((cur ^ wa->set) & wa->read) {
1204                 DRM_ERROR("%s workaround lost on %s! (reg[%x]=0x%x, relevant bits were 0x%x vs expected 0x%x)\n",
1205                           name, from, i915_mmio_reg_offset(wa->reg),
1206                           cur, cur & wa->read, wa->set & wa->read);
1207
1208                 return false;
1209         }
1210
1211         return true;
1212 }
1213
1214 static void
1215 wa_list_apply(struct intel_gt *gt, const struct i915_wa_list *wal)
1216 {
1217         struct intel_uncore *uncore = gt->uncore;
1218         enum forcewake_domains fw;
1219         unsigned long flags;
1220         struct i915_wa *wa;
1221         unsigned int i;
1222
1223         if (!wal->count)
1224                 return;
1225
1226         fw = wal_get_fw_for_rmw(uncore, wal);
1227
1228         spin_lock_irqsave(&uncore->lock, flags);
1229         intel_uncore_forcewake_get__locked(uncore, fw);
1230
1231         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
1232                 u32 val, old = 0;
1233
1234                 /* open-coded rmw due to steering */
1235                 old = wa->clr ? intel_gt_read_register_fw(gt, wa->reg) : 0;
1236                 val = (old & ~wa->clr) | wa->set;
1237                 if (val != old || !wa->clr)
1238                         intel_uncore_write_fw(uncore, wa->reg, val);
1239
1240                 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1241                         wa_verify(wa, intel_gt_read_register_fw(gt, wa->reg),
1242                                   wal->name, "application");
1243         }
1244
1245         intel_uncore_forcewake_put__locked(uncore, fw);
1246         spin_unlock_irqrestore(&uncore->lock, flags);
1247 }
1248
1249 void intel_gt_apply_workarounds(struct intel_gt *gt)
1250 {
1251         wa_list_apply(gt, &gt->i915->gt_wa_list);
1252 }
1253
1254 static bool wa_list_verify(struct intel_gt *gt,
1255                            const struct i915_wa_list *wal,
1256                            const char *from)
1257 {
1258         struct intel_uncore *uncore = gt->uncore;
1259         struct i915_wa *wa;
1260         enum forcewake_domains fw;
1261         unsigned long flags;
1262         unsigned int i;
1263         bool ok = true;
1264
1265         fw = wal_get_fw_for_rmw(uncore, wal);
1266
1267         spin_lock_irqsave(&uncore->lock, flags);
1268         intel_uncore_forcewake_get__locked(uncore, fw);
1269
1270         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1271                 ok &= wa_verify(wa,
1272                                 intel_gt_read_register_fw(gt, wa->reg),
1273                                 wal->name, from);
1274
1275         intel_uncore_forcewake_put__locked(uncore, fw);
1276         spin_unlock_irqrestore(&uncore->lock, flags);
1277
1278         return ok;
1279 }
1280
1281 bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from)
1282 {
1283         return wa_list_verify(gt, &gt->i915->gt_wa_list, from);
1284 }
1285
1286 __maybe_unused
1287 static bool is_nonpriv_flags_valid(u32 flags)
1288 {
1289         /* Check only valid flag bits are set */
1290         if (flags & ~RING_FORCE_TO_NONPRIV_MASK_VALID)
1291                 return false;
1292
1293         /* NB: Only 3 out of 4 enum values are valid for access field */
1294         if ((flags & RING_FORCE_TO_NONPRIV_ACCESS_MASK) ==
1295             RING_FORCE_TO_NONPRIV_ACCESS_INVALID)
1296                 return false;
1297
1298         return true;
1299 }
1300
1301 static void
1302 whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
1303 {
1304         struct i915_wa wa = {
1305                 .reg = reg
1306         };
1307
1308         if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
1309                 return;
1310
1311         if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags)))
1312                 return;
1313
1314         wa.reg.reg |= flags;
1315         _wa_add(wal, &wa);
1316 }
1317
1318 static void
1319 whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
1320 {
1321         whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
1322 }
1323
1324 static void gen9_whitelist_build(struct i915_wa_list *w)
1325 {
1326         /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
1327         whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
1328
1329         /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
1330         whitelist_reg(w, GEN8_CS_CHICKEN1);
1331
1332         /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
1333         whitelist_reg(w, GEN8_HDC_CHICKEN1);
1334
1335         /* WaSendPushConstantsFromMMIO:skl,bxt */
1336         whitelist_reg(w, COMMON_SLICE_CHICKEN2);
1337 }
1338
1339 static void skl_whitelist_build(struct intel_engine_cs *engine)
1340 {
1341         struct i915_wa_list *w = &engine->whitelist;
1342
1343         if (engine->class != RENDER_CLASS)
1344                 return;
1345
1346         gen9_whitelist_build(w);
1347
1348         /* WaDisableLSQCROPERFforOCL:skl */
1349         whitelist_reg(w, GEN8_L3SQCREG4);
1350 }
1351
1352 static void bxt_whitelist_build(struct intel_engine_cs *engine)
1353 {
1354         if (engine->class != RENDER_CLASS)
1355                 return;
1356
1357         gen9_whitelist_build(&engine->whitelist);
1358 }
1359
1360 static void kbl_whitelist_build(struct intel_engine_cs *engine)
1361 {
1362         struct i915_wa_list *w = &engine->whitelist;
1363
1364         if (engine->class != RENDER_CLASS)
1365                 return;
1366
1367         gen9_whitelist_build(w);
1368
1369         /* WaDisableLSQCROPERFforOCL:kbl */
1370         whitelist_reg(w, GEN8_L3SQCREG4);
1371 }
1372
1373 static void glk_whitelist_build(struct intel_engine_cs *engine)
1374 {
1375         struct i915_wa_list *w = &engine->whitelist;
1376
1377         if (engine->class != RENDER_CLASS)
1378                 return;
1379
1380         gen9_whitelist_build(w);
1381
1382         /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
1383         whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1384 }
1385
1386 static void cfl_whitelist_build(struct intel_engine_cs *engine)
1387 {
1388         struct i915_wa_list *w = &engine->whitelist;
1389
1390         if (engine->class != RENDER_CLASS)
1391                 return;
1392
1393         gen9_whitelist_build(w);
1394
1395         /*
1396          * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml
1397          *
1398          * This covers 4 register which are next to one another :
1399          *   - PS_INVOCATION_COUNT
1400          *   - PS_INVOCATION_COUNT_UDW
1401          *   - PS_DEPTH_COUNT
1402          *   - PS_DEPTH_COUNT_UDW
1403          */
1404         whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1405                           RING_FORCE_TO_NONPRIV_ACCESS_RD |
1406                           RING_FORCE_TO_NONPRIV_RANGE_4);
1407 }
1408
1409 static void cml_whitelist_build(struct intel_engine_cs *engine)
1410 {
1411         struct i915_wa_list *w = &engine->whitelist;
1412
1413         if (engine->class != RENDER_CLASS)
1414                 whitelist_reg_ext(w,
1415                                   RING_CTX_TIMESTAMP(engine->mmio_base),
1416                                   RING_FORCE_TO_NONPRIV_ACCESS_RD);
1417
1418         cfl_whitelist_build(engine);
1419 }
1420
1421 static void cnl_whitelist_build(struct intel_engine_cs *engine)
1422 {
1423         struct i915_wa_list *w = &engine->whitelist;
1424
1425         if (engine->class != RENDER_CLASS)
1426                 return;
1427
1428         /* WaEnablePreemptionGranularityControlByUMD:cnl */
1429         whitelist_reg(w, GEN8_CS_CHICKEN1);
1430 }
1431
1432 static void icl_whitelist_build(struct intel_engine_cs *engine)
1433 {
1434         struct i915_wa_list *w = &engine->whitelist;
1435
1436         switch (engine->class) {
1437         case RENDER_CLASS:
1438                 /* WaAllowUMDToModifyHalfSliceChicken7:icl */
1439                 whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7);
1440
1441                 /* WaAllowUMDToModifySamplerMode:icl */
1442                 whitelist_reg(w, GEN10_SAMPLER_MODE);
1443
1444                 /* WaEnableStateCacheRedirectToCS:icl */
1445                 whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1446
1447                 /*
1448                  * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl
1449                  *
1450                  * This covers 4 register which are next to one another :
1451                  *   - PS_INVOCATION_COUNT
1452                  *   - PS_INVOCATION_COUNT_UDW
1453                  *   - PS_DEPTH_COUNT
1454                  *   - PS_DEPTH_COUNT_UDW
1455                  */
1456                 whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1457                                   RING_FORCE_TO_NONPRIV_ACCESS_RD |
1458                                   RING_FORCE_TO_NONPRIV_RANGE_4);
1459                 break;
1460
1461         case VIDEO_DECODE_CLASS:
1462                 /* hucStatusRegOffset */
1463                 whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base),
1464                                   RING_FORCE_TO_NONPRIV_ACCESS_RD);
1465                 /* hucUKernelHdrInfoRegOffset */
1466                 whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base),
1467                                   RING_FORCE_TO_NONPRIV_ACCESS_RD);
1468                 /* hucStatus2RegOffset */
1469                 whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base),
1470                                   RING_FORCE_TO_NONPRIV_ACCESS_RD);
1471                 whitelist_reg_ext(w,
1472                                   RING_CTX_TIMESTAMP(engine->mmio_base),
1473                                   RING_FORCE_TO_NONPRIV_ACCESS_RD);
1474                 break;
1475
1476         default:
1477                 whitelist_reg_ext(w,
1478                                   RING_CTX_TIMESTAMP(engine->mmio_base),
1479                                   RING_FORCE_TO_NONPRIV_ACCESS_RD);
1480                 break;
1481         }
1482 }
1483
1484 static void tgl_whitelist_build(struct intel_engine_cs *engine)
1485 {
1486         struct i915_wa_list *w = &engine->whitelist;
1487
1488         switch (engine->class) {
1489         case RENDER_CLASS:
1490                 /*
1491                  * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl
1492                  * Wa_1408556865:tgl
1493                  *
1494                  * This covers 4 registers which are next to one another :
1495                  *   - PS_INVOCATION_COUNT
1496                  *   - PS_INVOCATION_COUNT_UDW
1497                  *   - PS_DEPTH_COUNT
1498                  *   - PS_DEPTH_COUNT_UDW
1499                  */
1500                 whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1501                                   RING_FORCE_TO_NONPRIV_ACCESS_RD |
1502                                   RING_FORCE_TO_NONPRIV_RANGE_4);
1503
1504                 /* Wa_1808121037:tgl */
1505                 whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1);
1506
1507                 /* Wa_1806527549:tgl */
1508                 whitelist_reg(w, HIZ_CHICKEN);
1509                 break;
1510         default:
1511                 whitelist_reg_ext(w,
1512                                   RING_CTX_TIMESTAMP(engine->mmio_base),
1513                                   RING_FORCE_TO_NONPRIV_ACCESS_RD);
1514                 break;
1515         }
1516 }
1517
1518 static void dg1_whitelist_build(struct intel_engine_cs *engine)
1519 {
1520         struct i915_wa_list *w = &engine->whitelist;
1521
1522         tgl_whitelist_build(engine);
1523
1524         /* GEN:BUG:1409280441:dg1 */
1525         if (IS_DG1_REVID(engine->i915, DG1_REVID_A0, DG1_REVID_A0) &&
1526             (engine->class == RENDER_CLASS ||
1527              engine->class == COPY_ENGINE_CLASS))
1528                 whitelist_reg_ext(w, RING_ID(engine->mmio_base),
1529                                   RING_FORCE_TO_NONPRIV_ACCESS_RD);
1530 }
1531
1532 void intel_engine_init_whitelist(struct intel_engine_cs *engine)
1533 {
1534         struct drm_i915_private *i915 = engine->i915;
1535         struct i915_wa_list *w = &engine->whitelist;
1536
1537         wa_init_start(w, "whitelist", engine->name);
1538
1539         if (IS_DG1(i915))
1540                 dg1_whitelist_build(engine);
1541         else if (GRAPHICS_VER(i915) == 12)
1542                 tgl_whitelist_build(engine);
1543         else if (GRAPHICS_VER(i915) == 11)
1544                 icl_whitelist_build(engine);
1545         else if (IS_CANNONLAKE(i915))
1546                 cnl_whitelist_build(engine);
1547         else if (IS_COMETLAKE(i915))
1548                 cml_whitelist_build(engine);
1549         else if (IS_COFFEELAKE(i915))
1550                 cfl_whitelist_build(engine);
1551         else if (IS_GEMINILAKE(i915))
1552                 glk_whitelist_build(engine);
1553         else if (IS_KABYLAKE(i915))
1554                 kbl_whitelist_build(engine);
1555         else if (IS_BROXTON(i915))
1556                 bxt_whitelist_build(engine);
1557         else if (IS_SKYLAKE(i915))
1558                 skl_whitelist_build(engine);
1559         else if (GRAPHICS_VER(i915) <= 8)
1560                 ;
1561         else
1562                 MISSING_CASE(GRAPHICS_VER(i915));
1563
1564         wa_init_finish(w);
1565 }
1566
1567 void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
1568 {
1569         const struct i915_wa_list *wal = &engine->whitelist;
1570         struct intel_uncore *uncore = engine->uncore;
1571         const u32 base = engine->mmio_base;
1572         struct i915_wa *wa;
1573         unsigned int i;
1574
1575         if (!wal->count)
1576                 return;
1577
1578         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1579                 intel_uncore_write(uncore,
1580                                    RING_FORCE_TO_NONPRIV(base, i),
1581                                    i915_mmio_reg_offset(wa->reg));
1582
1583         /* And clear the rest just in case of garbage */
1584         for (; i < RING_MAX_NONPRIV_SLOTS; i++)
1585                 intel_uncore_write(uncore,
1586                                    RING_FORCE_TO_NONPRIV(base, i),
1587                                    i915_mmio_reg_offset(RING_NOPID(base)));
1588 }
1589
1590 static void
1591 rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1592 {
1593         struct drm_i915_private *i915 = engine->i915;
1594
1595         if (IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) ||
1596             IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_A0)) {
1597                 /*
1598                  * Wa_1607138336:tgl[a0],dg1[a0]
1599                  * Wa_1607063988:tgl[a0],dg1[a0]
1600                  */
1601                 wa_write_or(wal,
1602                             GEN9_CTX_PREEMPT_REG,
1603                             GEN12_DISABLE_POSH_BUSY_FF_DOP_CG);
1604         }
1605
1606         if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_A0)) {
1607                 /*
1608                  * Wa_1606679103:tgl
1609                  * (see also Wa_1606682166:icl)
1610                  */
1611                 wa_write_or(wal,
1612                             GEN7_SARCHKMD,
1613                             GEN7_DISABLE_SAMPLER_PREFETCH);
1614         }
1615
1616         if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) ||
1617             IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
1618                 /* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */
1619                 wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ);
1620
1621                 /*
1622                  * Wa_1407928979:tgl A*
1623                  * Wa_18011464164:tgl[B0+],dg1[B0+]
1624                  * Wa_22010931296:tgl[B0+],dg1[B0+]
1625                  * Wa_14010919138:rkl,dg1,adl-s,adl-p
1626                  */
1627                 wa_write_or(wal, GEN7_FF_THREAD_MODE,
1628                             GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
1629
1630                 /*
1631                  * Wa_1606700617:tgl,dg1,adl-p
1632                  * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p
1633                  * Wa_14010826681:tgl,dg1,rkl,adl-p
1634                  */
1635                 wa_masked_en(wal,
1636                              GEN9_CS_DEBUG_MODE1,
1637                              FF_DOP_CLOCK_GATE_DISABLE);
1638         }
1639
1640         if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
1641             IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) ||
1642             IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
1643                 /* Wa_1409804808:tgl,rkl,dg1[a0],adl-s,adl-p */
1644                 wa_masked_en(wal, GEN7_ROW_CHICKEN2,
1645                              GEN12_PUSH_CONST_DEREF_HOLD_DIS);
1646
1647                 /*
1648                  * Wa_1409085225:tgl
1649                  * Wa_14010229206:tgl,rkl,dg1[a0],adl-s,adl-p
1650                  */
1651                 wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
1652         }
1653
1654
1655         if (IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) ||
1656             IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
1657                 /*
1658                  * Wa_1607030317:tgl
1659                  * Wa_1607186500:tgl
1660                  * Wa_1607297627:tgl,rkl,dg1[a0]
1661                  *
1662                  * On TGL and RKL there are multiple entries for this WA in the
1663                  * BSpec; some indicate this is an A0-only WA, others indicate
1664                  * it applies to all steppings so we trust the "all steppings."
1665                  * For DG1 this only applies to A0.
1666                  */
1667                 wa_masked_en(wal,
1668                              GEN6_RC_SLEEP_PSMI_CONTROL,
1669                              GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
1670                              GEN8_RC_SEMA_IDLE_MSG_DISABLE);
1671         }
1672
1673         if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
1674                 /* Wa_1406941453:tgl,rkl,dg1 */
1675                 wa_masked_en(wal,
1676                              GEN10_SAMPLER_MODE,
1677                              ENABLE_SMALLPL);
1678         }
1679
1680         if (GRAPHICS_VER(i915) == 11) {
1681                 /* This is not an Wa. Enable for better image quality */
1682                 wa_masked_en(wal,
1683                              _3D_CHICKEN3,
1684                              _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
1685
1686                 /* WaPipelineFlushCoherentLines:icl */
1687                 wa_write_or(wal,
1688                             GEN8_L3SQCREG4,
1689                             GEN8_LQSC_FLUSH_COHERENT_LINES);
1690
1691                 /*
1692                  * Wa_1405543622:icl
1693                  * Formerly known as WaGAPZPriorityScheme
1694                  */
1695                 wa_write_or(wal,
1696                             GEN8_GARBCNTL,
1697                             GEN11_ARBITRATION_PRIO_ORDER_MASK);
1698
1699                 /*
1700                  * Wa_1604223664:icl
1701                  * Formerly known as WaL3BankAddressHashing
1702                  */
1703                 wa_write_clr_set(wal,
1704                                  GEN8_GARBCNTL,
1705                                  GEN11_HASH_CTRL_EXCL_MASK,
1706                                  GEN11_HASH_CTRL_EXCL_BIT0);
1707                 wa_write_clr_set(wal,
1708                                  GEN11_GLBLINVL,
1709                                  GEN11_BANK_HASH_ADDR_EXCL_MASK,
1710                                  GEN11_BANK_HASH_ADDR_EXCL_BIT0);
1711
1712                 /*
1713                  * Wa_1405733216:icl
1714                  * Formerly known as WaDisableCleanEvicts
1715                  */
1716                 wa_write_or(wal,
1717                             GEN8_L3SQCREG4,
1718                             GEN11_LQSC_CLEAN_EVICT_DISABLE);
1719
1720                 /* WaForwardProgressSoftReset:icl */
1721                 wa_write_or(wal,
1722                             GEN10_SCRATCH_LNCF2,
1723                             PMFLUSHDONE_LNICRSDROP |
1724                             PMFLUSH_GAPL3UNBLOCK |
1725                             PMFLUSHDONE_LNEBLK);
1726
1727                 /* Wa_1406609255:icl (pre-prod) */
1728                 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
1729                         wa_write_or(wal,
1730                                     GEN7_SARCHKMD,
1731                                     GEN7_DISABLE_DEMAND_PREFETCH);
1732
1733                 /* Wa_1606682166:icl */
1734                 wa_write_or(wal,
1735                             GEN7_SARCHKMD,
1736                             GEN7_DISABLE_SAMPLER_PREFETCH);
1737
1738                 /* Wa_1409178092:icl */
1739                 wa_write_clr_set(wal,
1740                                  GEN11_SCRATCH2,
1741                                  GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
1742                                  0);
1743
1744                 /* WaEnable32PlaneMode:icl */
1745                 wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS,
1746                              GEN11_ENABLE_32_PLANE_MODE);
1747
1748                 /*
1749                  * Wa_1408615072:icl,ehl  (vsunit)
1750                  * Wa_1407596294:icl,ehl  (hsunit)
1751                  */
1752                 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
1753                             VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS);
1754
1755                 /* Wa_1407352427:icl,ehl */
1756                 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
1757                             PSDUNIT_CLKGATE_DIS);
1758
1759                 /* Wa_1406680159:icl,ehl */
1760                 wa_write_or(wal,
1761                             SUBSLICE_UNIT_LEVEL_CLKGATE,
1762                             GWUNIT_CLKGATE_DIS);
1763
1764                 /*
1765                  * Wa_1408767742:icl[a2..forever],ehl[all]
1766                  * Wa_1605460711:icl[a0..c0]
1767                  */
1768                 wa_write_or(wal,
1769                             GEN7_FF_THREAD_MODE,
1770                             GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
1771
1772                 /* Wa_22010271021 */
1773                 wa_masked_en(wal,
1774                              GEN9_CS_DEBUG_MODE1,
1775                              FF_DOP_CLOCK_GATE_DISABLE);
1776         }
1777
1778         if (IS_GRAPHICS_VER(i915, 9, 12)) {
1779                 /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl,tgl */
1780                 wa_masked_en(wal,
1781                              GEN7_FF_SLICE_CS_CHICKEN1,
1782                              GEN9_FFSC_PERCTX_PREEMPT_CTRL);
1783         }
1784
1785         if (IS_SKYLAKE(i915) ||
1786             IS_KABYLAKE(i915) ||
1787             IS_COFFEELAKE(i915) ||
1788             IS_COMETLAKE(i915)) {
1789                 /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
1790                 wa_write_or(wal,
1791                             GEN8_GARBCNTL,
1792                             GEN9_GAPS_TSV_CREDIT_DISABLE);
1793         }
1794
1795         if (IS_BROXTON(i915)) {
1796                 /* WaDisablePooledEuLoadBalancingFix:bxt */
1797                 wa_masked_en(wal,
1798                              FF_SLICE_CS_CHICKEN2,
1799                              GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
1800         }
1801
1802         if (GRAPHICS_VER(i915) == 9) {
1803                 /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
1804                 wa_masked_en(wal,
1805                              GEN9_CSFE_CHICKEN1_RCS,
1806                              GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
1807
1808                 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
1809                 wa_write_or(wal,
1810                             BDW_SCRATCH1,
1811                             GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
1812
1813                 /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
1814                 if (IS_GEN9_LP(i915))
1815                         wa_write_clr_set(wal,
1816                                          GEN8_L3SQCREG1,
1817                                          L3_PRIO_CREDITS_MASK,
1818                                          L3_GENERAL_PRIO_CREDITS(62) |
1819                                          L3_HIGH_PRIO_CREDITS(2));
1820
1821                 /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
1822                 wa_write_or(wal,
1823                             GEN8_L3SQCREG4,
1824                             GEN8_LQSC_FLUSH_COHERENT_LINES);
1825
1826                 /* Disable atomics in L3 to prevent unrecoverable hangs */
1827                 wa_write_clr_set(wal, GEN9_SCRATCH_LNCF1,
1828                                  GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE, 0);
1829                 wa_write_clr_set(wal, GEN8_L3SQCREG4,
1830                                  GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0);
1831                 wa_write_clr_set(wal, GEN9_SCRATCH1,
1832                                  EVICTION_PERF_FIX_ENABLE, 0);
1833         }
1834
1835         if (IS_HASWELL(i915)) {
1836                 /* WaSampleCChickenBitEnable:hsw */
1837                 wa_masked_en(wal,
1838                              HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE);
1839
1840                 wa_masked_dis(wal,
1841                               CACHE_MODE_0_GEN7,
1842                               /* enable HiZ Raw Stall Optimization */
1843                               HIZ_RAW_STALL_OPT_DISABLE);
1844         }
1845
1846         if (IS_VALLEYVIEW(i915)) {
1847                 /* WaDisableEarlyCull:vlv */
1848                 wa_masked_en(wal,
1849                              _3D_CHICKEN3,
1850                              _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
1851
1852                 /*
1853                  * WaVSThreadDispatchOverride:ivb,vlv
1854                  *
1855                  * This actually overrides the dispatch
1856                  * mode for all thread types.
1857                  */
1858                 wa_write_clr_set(wal,
1859                                  GEN7_FF_THREAD_MODE,
1860                                  GEN7_FF_SCHED_MASK,
1861                                  GEN7_FF_TS_SCHED_HW |
1862                                  GEN7_FF_VS_SCHED_HW |
1863                                  GEN7_FF_DS_SCHED_HW);
1864
1865                 /* WaPsdDispatchEnable:vlv */
1866                 /* WaDisablePSDDualDispatchEnable:vlv */
1867                 wa_masked_en(wal,
1868                              GEN7_HALF_SLICE_CHICKEN1,
1869                              GEN7_MAX_PS_THREAD_DEP |
1870                              GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
1871         }
1872
1873         if (IS_IVYBRIDGE(i915)) {
1874                 /* WaDisableEarlyCull:ivb */
1875                 wa_masked_en(wal,
1876                              _3D_CHICKEN3,
1877                              _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
1878
1879                 if (0) { /* causes HiZ corruption on ivb:gt1 */
1880                         /* enable HiZ Raw Stall Optimization */
1881                         wa_masked_dis(wal,
1882                                       CACHE_MODE_0_GEN7,
1883                                       HIZ_RAW_STALL_OPT_DISABLE);
1884                 }
1885
1886                 /*
1887                  * WaVSThreadDispatchOverride:ivb,vlv
1888                  *
1889                  * This actually overrides the dispatch
1890                  * mode for all thread types.
1891                  */
1892                 wa_write_clr_set(wal,
1893                                  GEN7_FF_THREAD_MODE,
1894                                  GEN7_FF_SCHED_MASK,
1895                                  GEN7_FF_TS_SCHED_HW |
1896                                  GEN7_FF_VS_SCHED_HW |
1897                                  GEN7_FF_DS_SCHED_HW);
1898
1899                 /* WaDisablePSDDualDispatchEnable:ivb */
1900                 if (IS_IVB_GT1(i915))
1901                         wa_masked_en(wal,
1902                                      GEN7_HALF_SLICE_CHICKEN1,
1903                                      GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
1904         }
1905
1906         if (GRAPHICS_VER(i915) == 7) {
1907                 /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
1908                 wa_masked_en(wal,
1909                              GFX_MODE_GEN7,
1910                              GFX_TLB_INVALIDATE_EXPLICIT | GFX_REPLAY_MODE);
1911
1912                 /* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */
1913                 wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);
1914
1915                 /*
1916                  * BSpec says this must be set, even though
1917                  * WaDisable4x2SubspanOptimization:ivb,hsw
1918                  * WaDisable4x2SubspanOptimization isn't listed for VLV.
1919                  */
1920                 wa_masked_en(wal,
1921                              CACHE_MODE_1,
1922                              PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
1923
1924                 /*
1925                  * BSpec recommends 8x4 when MSAA is used,
1926                  * however in practice 16x4 seems fastest.
1927                  *
1928                  * Note that PS/WM thread counts depend on the WIZ hashing
1929                  * disable bit, which we don't touch here, but it's good
1930                  * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
1931                  */
1932                 wa_add(wal, GEN7_GT_MODE, 0,
1933                        _MASKED_FIELD(GEN6_WIZ_HASHING_MASK,
1934                                      GEN6_WIZ_HASHING_16x4),
1935                        GEN6_WIZ_HASHING_16x4);
1936         }
1937
1938         if (IS_GRAPHICS_VER(i915, 6, 7))
1939                 /*
1940                  * We need to disable the AsyncFlip performance optimisations in
1941                  * order to use MI_WAIT_FOR_EVENT within the CS. It should
1942                  * already be programmed to '1' on all products.
1943                  *
1944                  * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
1945                  */
1946                 wa_masked_en(wal,
1947                              MI_MODE,
1948                              ASYNC_FLIP_PERF_DISABLE);
1949
1950         if (GRAPHICS_VER(i915) == 6) {
1951                 /*
1952                  * Required for the hardware to program scanline values for
1953                  * waiting
1954                  * WaEnableFlushTlbInvalidationMode:snb
1955                  */
1956                 wa_masked_en(wal,
1957                              GFX_MODE,
1958                              GFX_TLB_INVALIDATE_EXPLICIT);
1959
1960                 /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
1961                 wa_masked_en(wal,
1962                              _3D_CHICKEN,
1963                              _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB);
1964
1965                 wa_masked_en(wal,
1966                              _3D_CHICKEN3,
1967                              /* WaStripsFansDisableFastClipPerformanceFix:snb */
1968                              _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL |
1969                              /*
1970                               * Bspec says:
1971                               * "This bit must be set if 3DSTATE_CLIP clip mode is set
1972                               * to normal and 3DSTATE_SF number of SF output attributes
1973                               * is more than 16."
1974                               */
1975                              _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH);
1976
1977                 /*
1978                  * BSpec recommends 8x4 when MSAA is used,
1979                  * however in practice 16x4 seems fastest.
1980                  *
1981                  * Note that PS/WM thread counts depend on the WIZ hashing
1982                  * disable bit, which we don't touch here, but it's good
1983                  * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
1984                  */
1985                 wa_add(wal,
1986                        GEN6_GT_MODE, 0,
1987                        _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4),
1988                        GEN6_WIZ_HASHING_16x4);
1989
1990                 /* WaDisable_RenderCache_OperationalFlush:snb */
1991                 wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
1992
1993                 /*
1994                  * From the Sandybridge PRM, volume 1 part 3, page 24:
1995                  * "If this bit is set, STCunit will have LRA as replacement
1996                  *  policy. [...] This bit must be reset. LRA replacement
1997                  *  policy is not supported."
1998                  */
1999                 wa_masked_dis(wal,
2000                               CACHE_MODE_0,
2001                               CM0_STC_EVICT_DISABLE_LRA_SNB);
2002         }
2003
2004         if (IS_GRAPHICS_VER(i915, 4, 6))
2005                 /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
2006                 wa_add(wal, MI_MODE,
2007                        0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH),
2008                        /* XXX bit doesn't stick on Broadwater */
2009                        IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH);
2010
2011         if (GRAPHICS_VER(i915) == 4)
2012                 /*
2013                  * Disable CONSTANT_BUFFER before it is loaded from the context
2014                  * image. For as it is loaded, it is executed and the stored
2015                  * address may no longer be valid, leading to a GPU hang.
2016                  *
2017                  * This imposes the requirement that userspace reload their
2018                  * CONSTANT_BUFFER on every batch, fortunately a requirement
2019                  * they are already accustomed to from before contexts were
2020                  * enabled.
2021                  */
2022                 wa_add(wal, ECOSKPD,
2023                        0, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE),
2024                        0 /* XXX bit doesn't stick on Broadwater */);
2025 }
2026
2027 static void
2028 xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2029 {
2030         struct drm_i915_private *i915 = engine->i915;
2031
2032         /* WaKBLVECSSemaphoreWaitPoll:kbl */
2033         if (IS_KBL_GT_STEP(i915, STEP_A0, STEP_E0)) {
2034                 wa_write(wal,
2035                          RING_SEMA_WAIT_POLL(engine->mmio_base),
2036                          1);
2037         }
2038 }
2039
2040 static void
2041 engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2042 {
2043         if (I915_SELFTEST_ONLY(GRAPHICS_VER(engine->i915) < 4))
2044                 return;
2045
2046         if (engine->class == RENDER_CLASS)
2047                 rcs_engine_wa_init(engine, wal);
2048         else
2049                 xcs_engine_wa_init(engine, wal);
2050 }
2051
2052 void intel_engine_init_workarounds(struct intel_engine_cs *engine)
2053 {
2054         struct i915_wa_list *wal = &engine->wa_list;
2055
2056         if (GRAPHICS_VER(engine->i915) < 4)
2057                 return;
2058
2059         wa_init_start(wal, "engine", engine->name);
2060         engine_init_workarounds(engine, wal);
2061         wa_init_finish(wal);
2062 }
2063
2064 void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
2065 {
2066         wa_list_apply(engine->gt, &engine->wa_list);
2067 }
2068
2069 struct mcr_range {
2070         u32 start;
2071         u32 end;
2072 };
2073
2074 static const struct mcr_range mcr_ranges_gen8[] = {
2075         { .start = 0x5500, .end = 0x55ff },
2076         { .start = 0x7000, .end = 0x7fff },
2077         { .start = 0x9400, .end = 0x97ff },
2078         { .start = 0xb000, .end = 0xb3ff },
2079         { .start = 0xe000, .end = 0xe7ff },
2080         {},
2081 };
2082
2083 static const struct mcr_range mcr_ranges_gen12[] = {
2084         { .start =  0x8150, .end =  0x815f },
2085         { .start =  0x9520, .end =  0x955f },
2086         { .start =  0xb100, .end =  0xb3ff },
2087         { .start =  0xde80, .end =  0xe8ff },
2088         { .start = 0x24a00, .end = 0x24a7f },
2089         {},
2090 };
2091
2092 static bool mcr_range(struct drm_i915_private *i915, u32 offset)
2093 {
2094         const struct mcr_range *mcr_ranges;
2095         int i;
2096
2097         if (GRAPHICS_VER(i915) >= 12)
2098                 mcr_ranges = mcr_ranges_gen12;
2099         else if (GRAPHICS_VER(i915) >= 8)
2100                 mcr_ranges = mcr_ranges_gen8;
2101         else
2102                 return false;
2103
2104         /*
2105          * Registers in these ranges are affected by the MCR selector
2106          * which only controls CPU initiated MMIO. Routing does not
2107          * work for CS access so we cannot verify them on this path.
2108          */
2109         for (i = 0; mcr_ranges[i].start; i++)
2110                 if (offset >= mcr_ranges[i].start &&
2111                     offset <= mcr_ranges[i].end)
2112                         return true;
2113
2114         return false;
2115 }
2116
2117 static int
2118 wa_list_srm(struct i915_request *rq,
2119             const struct i915_wa_list *wal,
2120             struct i915_vma *vma)
2121 {
2122         struct drm_i915_private *i915 = rq->engine->i915;
2123         unsigned int i, count = 0;
2124         const struct i915_wa *wa;
2125         u32 srm, *cs;
2126
2127         srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
2128         if (GRAPHICS_VER(i915) >= 8)
2129                 srm++;
2130
2131         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
2132                 if (!mcr_range(i915, i915_mmio_reg_offset(wa->reg)))
2133                         count++;
2134         }
2135
2136         cs = intel_ring_begin(rq, 4 * count);
2137         if (IS_ERR(cs))
2138                 return PTR_ERR(cs);
2139
2140         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
2141                 u32 offset = i915_mmio_reg_offset(wa->reg);
2142
2143                 if (mcr_range(i915, offset))
2144                         continue;
2145
2146                 *cs++ = srm;
2147                 *cs++ = offset;
2148                 *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
2149                 *cs++ = 0;
2150         }
2151         intel_ring_advance(rq, cs);
2152
2153         return 0;
2154 }
2155
2156 static int engine_wa_list_verify(struct intel_context *ce,
2157                                  const struct i915_wa_list * const wal,
2158                                  const char *from)
2159 {
2160         const struct i915_wa *wa;
2161         struct i915_request *rq;
2162         struct i915_vma *vma;
2163         struct i915_gem_ww_ctx ww;
2164         unsigned int i;
2165         u32 *results;
2166         int err;
2167
2168         if (!wal->count)
2169                 return 0;
2170
2171         vma = __vm_create_scratch_for_read(&ce->engine->gt->ggtt->vm,
2172                                            wal->count * sizeof(u32));
2173         if (IS_ERR(vma))
2174                 return PTR_ERR(vma);
2175
2176         intel_engine_pm_get(ce->engine);
2177         i915_gem_ww_ctx_init(&ww, false);
2178 retry:
2179         err = i915_gem_object_lock(vma->obj, &ww);
2180         if (err == 0)
2181                 err = intel_context_pin_ww(ce, &ww);
2182         if (err)
2183                 goto err_pm;
2184
2185         err = i915_vma_pin_ww(vma, &ww, 0, 0,
2186                            i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
2187         if (err)
2188                 goto err_unpin;
2189
2190         rq = i915_request_create(ce);
2191         if (IS_ERR(rq)) {
2192                 err = PTR_ERR(rq);
2193                 goto err_vma;
2194         }
2195
2196         err = i915_request_await_object(rq, vma->obj, true);
2197         if (err == 0)
2198                 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
2199         if (err == 0)
2200                 err = wa_list_srm(rq, wal, vma);
2201
2202         i915_request_get(rq);
2203         if (err)
2204                 i915_request_set_error_once(rq, err);
2205         i915_request_add(rq);
2206
2207         if (err)
2208                 goto err_rq;
2209
2210         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2211                 err = -ETIME;
2212                 goto err_rq;
2213         }
2214
2215         results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
2216         if (IS_ERR(results)) {
2217                 err = PTR_ERR(results);
2218                 goto err_rq;
2219         }
2220
2221         err = 0;
2222         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
2223                 if (mcr_range(rq->engine->i915, i915_mmio_reg_offset(wa->reg)))
2224                         continue;
2225
2226                 if (!wa_verify(wa, results[i], wal->name, from))
2227                         err = -ENXIO;
2228         }
2229
2230         i915_gem_object_unpin_map(vma->obj);
2231
2232 err_rq:
2233         i915_request_put(rq);
2234 err_vma:
2235         i915_vma_unpin(vma);
2236 err_unpin:
2237         intel_context_unpin(ce);
2238 err_pm:
2239         if (err == -EDEADLK) {
2240                 err = i915_gem_ww_ctx_backoff(&ww);
2241                 if (!err)
2242                         goto retry;
2243         }
2244         i915_gem_ww_ctx_fini(&ww);
2245         intel_engine_pm_put(ce->engine);
2246         i915_vma_put(vma);
2247         return err;
2248 }
2249
2250 int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
2251                                     const char *from)
2252 {
2253         return engine_wa_list_verify(engine->kernel_context,
2254                                      &engine->wa_list,
2255                                      from);
2256 }
2257
2258 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
2259 #include "selftest_workarounds.c"
2260 #endif