Merge tag 'drm-misc-next-2019-05-24' of git://anongit.freedesktop.org/drm/drm-misc...
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / intel_workarounds.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2014-2018 Intel Corporation
5  */
6
7 #include "i915_drv.h"
8 #include "intel_workarounds.h"
9
10 /**
11  * DOC: Hardware workarounds
12  *
13  * This file is intended as a central place to implement most [1]_ of the
14  * required workarounds for hardware to work as originally intended. They fall
15  * in five basic categories depending on how/when they are applied:
16  *
17  * - Workarounds that touch registers that are saved/restored to/from the HW
18  *   context image. The list is emitted (via Load Register Immediate commands)
19  *   everytime a new context is created.
20  * - GT workarounds. The list of these WAs is applied whenever these registers
21  *   revert to default values (on GPU reset, suspend/resume [2]_, etc..).
22  * - Display workarounds. The list is applied during display clock-gating
23  *   initialization.
24  * - Workarounds that whitelist a privileged register, so that UMDs can manage
25  *   them directly. This is just a special case of a MMMIO workaround (as we
26  *   write the list of these to/be-whitelisted registers to some special HW
27  *   registers).
28  * - Workaround batchbuffers, that get executed automatically by the hardware
29  *   on every HW context restore.
30  *
31  * .. [1] Please notice that there are other WAs that, due to their nature,
32  *    cannot be applied from a central place. Those are peppered around the rest
33  *    of the code, as needed.
34  *
35  * .. [2] Technically, some registers are powercontext saved & restored, so they
36  *    survive a suspend/resume. In practice, writing them again is not too
37  *    costly and simplifies things. We can revisit this in the future.
38  *
39  * Layout
40  * ''''''
41  *
42  * Keep things in this file ordered by WA type, as per the above (context, GT,
43  * display, register whitelist, batchbuffer). Then, inside each type, keep the
44  * following order:
45  *
46  * - Infrastructure functions and macros
47  * - WAs per platform in standard gen/chrono order
48  * - Public functions to init or apply the given workaround type.
49  */
50
51 static void wa_init_start(struct i915_wa_list *wal, const char *name)
52 {
53         wal->name = name;
54 }
55
56 #define WA_LIST_CHUNK (1 << 4)
57
58 static void wa_init_finish(struct i915_wa_list *wal)
59 {
60         /* Trim unused entries. */
61         if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
62                 struct i915_wa *list = kmemdup(wal->list,
63                                                wal->count * sizeof(*list),
64                                                GFP_KERNEL);
65
66                 if (list) {
67                         kfree(wal->list);
68                         wal->list = list;
69                 }
70         }
71
72         if (!wal->count)
73                 return;
74
75         DRM_DEBUG_DRIVER("Initialized %u %s workarounds\n",
76                          wal->wa_count, wal->name);
77 }
78
79 static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
80 {
81         unsigned int addr = i915_mmio_reg_offset(wa->reg);
82         unsigned int start = 0, end = wal->count;
83         const unsigned int grow = WA_LIST_CHUNK;
84         struct i915_wa *wa_;
85
86         GEM_BUG_ON(!is_power_of_2(grow));
87
88         if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
89                 struct i915_wa *list;
90
91                 list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
92                                      GFP_KERNEL);
93                 if (!list) {
94                         DRM_ERROR("No space for workaround init!\n");
95                         return;
96                 }
97
98                 if (wal->list)
99                         memcpy(list, wal->list, sizeof(*wa) * wal->count);
100
101                 wal->list = list;
102         }
103
104         while (start < end) {
105                 unsigned int mid = start + (end - start) / 2;
106
107                 if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
108                         start = mid + 1;
109                 } else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
110                         end = mid;
111                 } else {
112                         wa_ = &wal->list[mid];
113
114                         if ((wa->mask & ~wa_->mask) == 0) {
115                                 DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n",
116                                           i915_mmio_reg_offset(wa_->reg),
117                                           wa_->mask, wa_->val);
118
119                                 wa_->val &= ~wa->mask;
120                         }
121
122                         wal->wa_count++;
123                         wa_->val |= wa->val;
124                         wa_->mask |= wa->mask;
125                         return;
126                 }
127         }
128
129         wal->wa_count++;
130         wa_ = &wal->list[wal->count++];
131         *wa_ = *wa;
132
133         while (wa_-- > wal->list) {
134                 GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) ==
135                            i915_mmio_reg_offset(wa_[1].reg));
136                 if (i915_mmio_reg_offset(wa_[1].reg) >
137                     i915_mmio_reg_offset(wa_[0].reg))
138                         break;
139
140                 swap(wa_[1], wa_[0]);
141         }
142 }
143
144 static void
145 wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
146                    u32 val)
147 {
148         struct i915_wa wa = {
149                 .reg = reg,
150                 .mask = mask,
151                 .val = val
152         };
153
154         _wa_add(wal, &wa);
155 }
156
157 static void
158 wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
159 {
160         wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val));
161 }
162
163 static void
164 wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
165 {
166         wa_write_masked_or(wal, reg, ~0, val);
167 }
168
169 static void
170 wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
171 {
172         wa_write_masked_or(wal, reg, val, val);
173 }
174
175 #define WA_SET_BIT_MASKED(addr, mask) \
176         wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask))
177
178 #define WA_CLR_BIT_MASKED(addr, mask) \
179         wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask))
180
181 #define WA_SET_FIELD_MASKED(addr, mask, value) \
182         wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value)))
183
184 static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine)
185 {
186         struct i915_wa_list *wal = &engine->ctx_wa_list;
187
188         WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
189
190         /* WaDisableAsyncFlipPerfMode:bdw,chv */
191         WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
192
193         /* WaDisablePartialInstShootdown:bdw,chv */
194         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
195                           PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
196
197         /* Use Force Non-Coherent whenever executing a 3D context. This is a
198          * workaround for for a possible hang in the unlikely event a TLB
199          * invalidation occurs during a PSD flush.
200          */
201         /* WaForceEnableNonCoherent:bdw,chv */
202         /* WaHdcDisableFetchWhenMasked:bdw,chv */
203         WA_SET_BIT_MASKED(HDC_CHICKEN0,
204                           HDC_DONOT_FETCH_MEM_WHEN_MASKED |
205                           HDC_FORCE_NON_COHERENT);
206
207         /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
208          * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
209          *  polygons in the same 8x4 pixel/sample area to be processed without
210          *  stalling waiting for the earlier ones to write to Hierarchical Z
211          *  buffer."
212          *
213          * This optimization is off by default for BDW and CHV; turn it on.
214          */
215         WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
216
217         /* Wa4x4STCOptimizationDisable:bdw,chv */
218         WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
219
220         /*
221          * BSpec recommends 8x4 when MSAA is used,
222          * however in practice 16x4 seems fastest.
223          *
224          * Note that PS/WM thread counts depend on the WIZ hashing
225          * disable bit, which we don't touch here, but it's good
226          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
227          */
228         WA_SET_FIELD_MASKED(GEN7_GT_MODE,
229                             GEN6_WIZ_HASHING_MASK,
230                             GEN6_WIZ_HASHING_16x4);
231 }
232
233 static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine)
234 {
235         struct drm_i915_private *i915 = engine->i915;
236         struct i915_wa_list *wal = &engine->ctx_wa_list;
237
238         gen8_ctx_workarounds_init(engine);
239
240         /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
241         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
242
243         /* WaDisableDopClockGating:bdw
244          *
245          * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
246          * to disable EUTC clock gating.
247          */
248         WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
249                           DOP_CLOCK_GATING_DISABLE);
250
251         WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
252                           GEN8_SAMPLER_POWER_BYPASS_DIS);
253
254         WA_SET_BIT_MASKED(HDC_CHICKEN0,
255                           /* WaForceContextSaveRestoreNonCoherent:bdw */
256                           HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
257                           /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
258                           (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
259 }
260
261 static void chv_ctx_workarounds_init(struct intel_engine_cs *engine)
262 {
263         struct i915_wa_list *wal = &engine->ctx_wa_list;
264
265         gen8_ctx_workarounds_init(engine);
266
267         /* WaDisableThreadStallDopClockGating:chv */
268         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
269
270         /* Improve HiZ throughput on CHV. */
271         WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
272 }
273
274 static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine)
275 {
276         struct drm_i915_private *i915 = engine->i915;
277         struct i915_wa_list *wal = &engine->ctx_wa_list;
278
279         if (HAS_LLC(i915)) {
280                 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
281                  *
282                  * Must match Display Engine. See
283                  * WaCompressedResourceDisplayNewHashMode.
284                  */
285                 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
286                                   GEN9_PBE_COMPRESSED_HASH_SELECTION);
287                 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
288                                   GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
289         }
290
291         /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
292         /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
293         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
294                           FLOW_CONTROL_ENABLE |
295                           PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
296
297         /* Syncing dependencies between camera and graphics:skl,bxt,kbl */
298         if (!IS_COFFEELAKE(i915))
299                 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
300                                   GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
301
302         /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
303         /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
304         WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
305                           GEN9_ENABLE_YV12_BUGFIX |
306                           GEN9_ENABLE_GPGPU_PREEMPTION);
307
308         /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
309         /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
310         WA_SET_BIT_MASKED(CACHE_MODE_1,
311                           GEN8_4x4_STC_OPTIMIZATION_DISABLE |
312                           GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
313
314         /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
315         WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
316                           GEN9_CCS_TLB_PREFETCH_ENABLE);
317
318         /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
319         WA_SET_BIT_MASKED(HDC_CHICKEN0,
320                           HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
321                           HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
322
323         /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
324          * both tied to WaForceContextSaveRestoreNonCoherent
325          * in some hsds for skl. We keep the tie for all gen9. The
326          * documentation is a bit hazy and so we want to get common behaviour,
327          * even though there is no clear evidence we would need both on kbl/bxt.
328          * This area has been source of system hangs so we play it safe
329          * and mimic the skl regardless of what bspec says.
330          *
331          * Use Force Non-Coherent whenever executing a 3D context. This
332          * is a workaround for a possible hang in the unlikely event
333          * a TLB invalidation occurs during a PSD flush.
334          */
335
336         /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
337         WA_SET_BIT_MASKED(HDC_CHICKEN0,
338                           HDC_FORCE_NON_COHERENT);
339
340         /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
341         if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915))
342                 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
343                                   GEN8_SAMPLER_POWER_BYPASS_DIS);
344
345         /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
346         WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
347
348         /*
349          * Supporting preemption with fine-granularity requires changes in the
350          * batch buffer programming. Since we can't break old userspace, we
351          * need to set our default preemption level to safe value. Userspace is
352          * still able to use more fine-grained preemption levels, since in
353          * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
354          * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
355          * not real HW workarounds, but merely a way to start using preemption
356          * while maintaining old contract with userspace.
357          */
358
359         /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
360         WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
361
362         /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
363         WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
364                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
365                             GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
366
367         /* WaClearHIZ_WM_CHICKEN3:bxt,glk */
368         if (IS_GEN9_LP(i915))
369                 WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
370 }
371
372 static void skl_tune_iz_hashing(struct intel_engine_cs *engine)
373 {
374         struct drm_i915_private *i915 = engine->i915;
375         struct i915_wa_list *wal = &engine->ctx_wa_list;
376         u8 vals[3] = { 0, 0, 0 };
377         unsigned int i;
378
379         for (i = 0; i < 3; i++) {
380                 u8 ss;
381
382                 /*
383                  * Only consider slices where one, and only one, subslice has 7
384                  * EUs
385                  */
386                 if (!is_power_of_2(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]))
387                         continue;
388
389                 /*
390                  * subslice_7eu[i] != 0 (because of the check above) and
391                  * ss_max == 4 (maximum number of subslices possible per slice)
392                  *
393                  * ->    0 <= ss <= 3;
394                  */
395                 ss = ffs(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]) - 1;
396                 vals[i] = 3 - ss;
397         }
398
399         if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
400                 return;
401
402         /* Tune IZ hashing. See intel_device_info_runtime_init() */
403         WA_SET_FIELD_MASKED(GEN7_GT_MODE,
404                             GEN9_IZ_HASHING_MASK(2) |
405                             GEN9_IZ_HASHING_MASK(1) |
406                             GEN9_IZ_HASHING_MASK(0),
407                             GEN9_IZ_HASHING(2, vals[2]) |
408                             GEN9_IZ_HASHING(1, vals[1]) |
409                             GEN9_IZ_HASHING(0, vals[0]));
410 }
411
412 static void skl_ctx_workarounds_init(struct intel_engine_cs *engine)
413 {
414         gen9_ctx_workarounds_init(engine);
415         skl_tune_iz_hashing(engine);
416 }
417
418 static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine)
419 {
420         struct i915_wa_list *wal = &engine->ctx_wa_list;
421
422         gen9_ctx_workarounds_init(engine);
423
424         /* WaDisableThreadStallDopClockGating:bxt */
425         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
426                           STALL_DOP_GATING_DISABLE);
427
428         /* WaToEnableHwFixForPushConstHWBug:bxt */
429         WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
430                           GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
431 }
432
433 static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine)
434 {
435         struct drm_i915_private *i915 = engine->i915;
436         struct i915_wa_list *wal = &engine->ctx_wa_list;
437
438         gen9_ctx_workarounds_init(engine);
439
440         /* WaToEnableHwFixForPushConstHWBug:kbl */
441         if (IS_KBL_REVID(i915, KBL_REVID_C0, REVID_FOREVER))
442                 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
443                                   GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
444
445         /* WaDisableSbeCacheDispatchPortSharing:kbl */
446         WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
447                           GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
448 }
449
450 static void glk_ctx_workarounds_init(struct intel_engine_cs *engine)
451 {
452         struct i915_wa_list *wal = &engine->ctx_wa_list;
453
454         gen9_ctx_workarounds_init(engine);
455
456         /* WaToEnableHwFixForPushConstHWBug:glk */
457         WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
458                           GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
459 }
460
461 static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine)
462 {
463         struct i915_wa_list *wal = &engine->ctx_wa_list;
464
465         gen9_ctx_workarounds_init(engine);
466
467         /* WaToEnableHwFixForPushConstHWBug:cfl */
468         WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
469                           GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
470
471         /* WaDisableSbeCacheDispatchPortSharing:cfl */
472         WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
473                           GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
474 }
475
476 static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine)
477 {
478         struct drm_i915_private *i915 = engine->i915;
479         struct i915_wa_list *wal = &engine->ctx_wa_list;
480
481         /* WaForceContextSaveRestoreNonCoherent:cnl */
482         WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
483                           HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
484
485         /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
486         if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
487                 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);
488
489         /* WaDisableReplayBufferBankArbitrationOptimization:cnl */
490         WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
491                           GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
492
493         /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
494         if (IS_CNL_REVID(i915, 0, CNL_REVID_B0))
495                 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
496                                   GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
497
498         /* WaPushConstantDereferenceHoldDisable:cnl */
499         WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
500
501         /* FtrEnableFastAnisoL1BankingFix:cnl */
502         WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
503
504         /* WaDisable3DMidCmdPreemption:cnl */
505         WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
506
507         /* WaDisableGPGPUMidCmdPreemption:cnl */
508         WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
509                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
510                             GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
511
512         /* WaDisableEarlyEOT:cnl */
513         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
514 }
515
516 static void icl_ctx_workarounds_init(struct intel_engine_cs *engine)
517 {
518         struct drm_i915_private *i915 = engine->i915;
519         struct i915_wa_list *wal = &engine->ctx_wa_list;
520
521         /* Wa_1604370585:icl (pre-prod)
522          * Formerly known as WaPushConstantDereferenceHoldDisable
523          */
524         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
525                 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
526                                   PUSH_CONSTANT_DEREF_DISABLE);
527
528         /* WaForceEnableNonCoherent:icl
529          * This is not the same workaround as in early Gen9 platforms, where
530          * lacking this could cause system hangs, but coherency performance
531          * overhead is high and only a few compute workloads really need it
532          * (the register is whitelisted in hardware now, so UMDs can opt in
533          * for coherency if they have a good reason).
534          */
535         WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
536
537         /* Wa_2006611047:icl (pre-prod)
538          * Formerly known as WaDisableImprovedTdlClkGating
539          */
540         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
541                 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
542                                   GEN11_TDL_CLOCK_GATING_FIX_DISABLE);
543
544         /* Wa_2006665173:icl (pre-prod) */
545         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
546                 WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
547                                   GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC);
548
549         /* WaEnableFloatBlendOptimization:icl */
550         wa_write_masked_or(wal,
551                            GEN10_CACHE_MODE_SS,
552                            0, /* write-only, so skip validation */
553                            _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE));
554
555         /* WaDisableGPGPUMidThreadPreemption:icl */
556         WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
557                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
558                             GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
559 }
560
561 void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
562 {
563         struct drm_i915_private *i915 = engine->i915;
564         struct i915_wa_list *wal = &engine->ctx_wa_list;
565
566         wa_init_start(wal, "context");
567
568         if (IS_GEN(i915, 11))
569                 icl_ctx_workarounds_init(engine);
570         else if (IS_CANNONLAKE(i915))
571                 cnl_ctx_workarounds_init(engine);
572         else if (IS_COFFEELAKE(i915))
573                 cfl_ctx_workarounds_init(engine);
574         else if (IS_GEMINILAKE(i915))
575                 glk_ctx_workarounds_init(engine);
576         else if (IS_KABYLAKE(i915))
577                 kbl_ctx_workarounds_init(engine);
578         else if (IS_BROXTON(i915))
579                 bxt_ctx_workarounds_init(engine);
580         else if (IS_SKYLAKE(i915))
581                 skl_ctx_workarounds_init(engine);
582         else if (IS_CHERRYVIEW(i915))
583                 chv_ctx_workarounds_init(engine);
584         else if (IS_BROADWELL(i915))
585                 bdw_ctx_workarounds_init(engine);
586         else if (INTEL_GEN(i915) < 8)
587                 return;
588         else
589                 MISSING_CASE(INTEL_GEN(i915));
590
591         wa_init_finish(wal);
592 }
593
594 int intel_engine_emit_ctx_wa(struct i915_request *rq)
595 {
596         struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
597         struct i915_wa *wa;
598         unsigned int i;
599         u32 *cs;
600         int ret;
601
602         if (wal->count == 0)
603                 return 0;
604
605         ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
606         if (ret)
607                 return ret;
608
609         cs = intel_ring_begin(rq, (wal->count * 2 + 2));
610         if (IS_ERR(cs))
611                 return PTR_ERR(cs);
612
613         *cs++ = MI_LOAD_REGISTER_IMM(wal->count);
614         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
615                 *cs++ = i915_mmio_reg_offset(wa->reg);
616                 *cs++ = wa->val;
617         }
618         *cs++ = MI_NOOP;
619
620         intel_ring_advance(rq, cs);
621
622         ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
623         if (ret)
624                 return ret;
625
626         return 0;
627 }
628
629 static void
630 gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
631 {
632         /* WaDisableKillLogic:bxt,skl,kbl */
633         if (!IS_COFFEELAKE(i915))
634                 wa_write_or(wal,
635                             GAM_ECOCHK,
636                             ECOCHK_DIS_TLB);
637
638         if (HAS_LLC(i915)) {
639                 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
640                  *
641                  * Must match Display Engine. See
642                  * WaCompressedResourceDisplayNewHashMode.
643                  */
644                 wa_write_or(wal,
645                             MMCD_MISC_CTRL,
646                             MMCD_PCLA | MMCD_HOTSPOT_EN);
647         }
648
649         /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
650         wa_write_or(wal,
651                     GAM_ECOCHK,
652                     BDW_DISABLE_HDC_INVALIDATION);
653 }
654
655 static void
656 skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
657 {
658         gen9_gt_workarounds_init(i915, wal);
659
660         /* WaDisableGafsUnitClkGating:skl */
661         wa_write_or(wal,
662                     GEN7_UCGCTL4,
663                     GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
664
665         /* WaInPlaceDecompressionHang:skl */
666         if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER))
667                 wa_write_or(wal,
668                             GEN9_GAMT_ECO_REG_RW_IA,
669                             GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
670 }
671
672 static void
673 bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
674 {
675         gen9_gt_workarounds_init(i915, wal);
676
677         /* WaInPlaceDecompressionHang:bxt */
678         wa_write_or(wal,
679                     GEN9_GAMT_ECO_REG_RW_IA,
680                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
681 }
682
683 static void
684 kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
685 {
686         gen9_gt_workarounds_init(i915, wal);
687
688         /* WaDisableDynamicCreditSharing:kbl */
689         if (IS_KBL_REVID(i915, 0, KBL_REVID_B0))
690                 wa_write_or(wal,
691                             GAMT_CHKN_BIT_REG,
692                             GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
693
694         /* WaDisableGafsUnitClkGating:kbl */
695         wa_write_or(wal,
696                     GEN7_UCGCTL4,
697                     GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
698
699         /* WaInPlaceDecompressionHang:kbl */
700         wa_write_or(wal,
701                     GEN9_GAMT_ECO_REG_RW_IA,
702                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
703 }
704
705 static void
706 glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
707 {
708         gen9_gt_workarounds_init(i915, wal);
709 }
710
711 static void
712 cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
713 {
714         gen9_gt_workarounds_init(i915, wal);
715
716         /* WaDisableGafsUnitClkGating:cfl */
717         wa_write_or(wal,
718                     GEN7_UCGCTL4,
719                     GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
720
721         /* WaInPlaceDecompressionHang:cfl */
722         wa_write_or(wal,
723                     GEN9_GAMT_ECO_REG_RW_IA,
724                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
725 }
726
727 static void
728 wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
729 {
730         const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
731         u32 mcr_slice_subslice_mask;
732
733         /*
734          * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl
735          * L3Banks could be fused off in single slice scenario. If that is
736          * the case, we might need to program MCR select to a valid L3Bank
737          * by default, to make sure we correctly read certain registers
738          * later on (in the range 0xB100 - 0xB3FF).
739          * This might be incompatible with
740          * WaProgramMgsrForCorrectSliceSpecificMmioReads.
741          * Fortunately, this should not happen in production hardware, so
742          * we only assert that this is the case (instead of implementing
743          * something more complex that requires checking the range of every
744          * MMIO read).
745          */
746         if (INTEL_GEN(i915) >= 10 &&
747             is_power_of_2(sseu->slice_mask)) {
748                 /*
749                  * read FUSE3 for enabled L3 Bank IDs, if L3 Bank matches
750                  * enabled subslice, no need to redirect MCR packet
751                  */
752                 u32 slice = fls(sseu->slice_mask);
753                 u32 fuse3 =
754                         intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3);
755                 u8 ss_mask = sseu->subslice_mask[slice];
756
757                 u8 enabled_mask = (ss_mask | ss_mask >>
758                                    GEN10_L3BANK_PAIR_COUNT) & GEN10_L3BANK_MASK;
759                 u8 disabled_mask = fuse3 & GEN10_L3BANK_MASK;
760
761                 /*
762                  * Production silicon should have matched L3Bank and
763                  * subslice enabled
764                  */
765                 WARN_ON((enabled_mask & disabled_mask) != enabled_mask);
766         }
767
768         if (INTEL_GEN(i915) >= 11)
769                 mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK |
770                                           GEN11_MCR_SUBSLICE_MASK;
771         else
772                 mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK |
773                                           GEN8_MCR_SUBSLICE_MASK;
774         /*
775          * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl
776          * Before any MMIO read into slice/subslice specific registers, MCR
777          * packet control register needs to be programmed to point to any
778          * enabled s/ss pair. Otherwise, incorrect values will be returned.
779          * This means each subsequent MMIO read will be forwarded to an
780          * specific s/ss combination, but this is OK since these registers
781          * are consistent across s/ss in almost all cases. In the rare
782          * occasions, such as INSTDONE, where this value is dependent
783          * on s/ss combo, the read should be done with read_subslice_reg.
784          */
785         wa_write_masked_or(wal,
786                            GEN8_MCR_SELECTOR,
787                            mcr_slice_subslice_mask,
788                            intel_calculate_mcr_s_ss_select(i915));
789 }
790
791 static void
792 cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
793 {
794         wa_init_mcr(i915, wal);
795
796         /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
797         if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
798                 wa_write_or(wal,
799                             GAMT_CHKN_BIT_REG,
800                             GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT);
801
802         /* WaInPlaceDecompressionHang:cnl */
803         wa_write_or(wal,
804                     GEN9_GAMT_ECO_REG_RW_IA,
805                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
806 }
807
808 static void
809 icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
810 {
811         wa_init_mcr(i915, wal);
812
813         /* WaInPlaceDecompressionHang:icl */
814         wa_write_or(wal,
815                     GEN9_GAMT_ECO_REG_RW_IA,
816                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
817
818         /* WaModifyGamTlbPartitioning:icl */
819         wa_write_masked_or(wal,
820                            GEN11_GACB_PERF_CTRL,
821                            GEN11_HASH_CTRL_MASK,
822                            GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
823
824         /* Wa_1405766107:icl
825          * Formerly known as WaCL2SFHalfMaxAlloc
826          */
827         wa_write_or(wal,
828                     GEN11_LSN_UNSLCVC,
829                     GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
830                     GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
831
832         /* Wa_220166154:icl
833          * Formerly known as WaDisCtxReload
834          */
835         wa_write_or(wal,
836                     GEN8_GAMW_ECO_DEV_RW_IA,
837                     GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
838
839         /* Wa_1405779004:icl (pre-prod) */
840         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
841                 wa_write_or(wal,
842                             SLICE_UNIT_LEVEL_CLKGATE,
843                             MSCUNIT_CLKGATE_DIS);
844
845         /* Wa_1406680159:icl */
846         wa_write_or(wal,
847                     SUBSLICE_UNIT_LEVEL_CLKGATE,
848                     GWUNIT_CLKGATE_DIS);
849
850         /* Wa_1406838659:icl (pre-prod) */
851         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
852                 wa_write_or(wal,
853                             INF_UNIT_LEVEL_CLKGATE,
854                             CGPSF_CLKGATE_DIS);
855
856         /* Wa_1406463099:icl
857          * Formerly known as WaGamTlbPendError
858          */
859         wa_write_or(wal,
860                     GAMT_CHKN_BIT_REG,
861                     GAMT_CHKN_DISABLE_L3_COH_PIPE);
862 }
863
864 static void
865 gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
866 {
867         if (IS_GEN(i915, 11))
868                 icl_gt_workarounds_init(i915, wal);
869         else if (IS_CANNONLAKE(i915))
870                 cnl_gt_workarounds_init(i915, wal);
871         else if (IS_COFFEELAKE(i915))
872                 cfl_gt_workarounds_init(i915, wal);
873         else if (IS_GEMINILAKE(i915))
874                 glk_gt_workarounds_init(i915, wal);
875         else if (IS_KABYLAKE(i915))
876                 kbl_gt_workarounds_init(i915, wal);
877         else if (IS_BROXTON(i915))
878                 bxt_gt_workarounds_init(i915, wal);
879         else if (IS_SKYLAKE(i915))
880                 skl_gt_workarounds_init(i915, wal);
881         else if (INTEL_GEN(i915) <= 8)
882                 return;
883         else
884                 MISSING_CASE(INTEL_GEN(i915));
885 }
886
887 void intel_gt_init_workarounds(struct drm_i915_private *i915)
888 {
889         struct i915_wa_list *wal = &i915->gt_wa_list;
890
891         wa_init_start(wal, "GT");
892         gt_init_workarounds(i915, wal);
893         wa_init_finish(wal);
894 }
895
896 static enum forcewake_domains
897 wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
898 {
899         enum forcewake_domains fw = 0;
900         struct i915_wa *wa;
901         unsigned int i;
902
903         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
904                 fw |= intel_uncore_forcewake_for_reg(uncore,
905                                                      wa->reg,
906                                                      FW_REG_READ |
907                                                      FW_REG_WRITE);
908
909         return fw;
910 }
911
912 static void
913 wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal)
914 {
915         enum forcewake_domains fw;
916         unsigned long flags;
917         struct i915_wa *wa;
918         unsigned int i;
919
920         if (!wal->count)
921                 return;
922
923         fw = wal_get_fw_for_rmw(uncore, wal);
924
925         spin_lock_irqsave(&uncore->lock, flags);
926         intel_uncore_forcewake_get__locked(uncore, fw);
927
928         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
929                 intel_uncore_rmw_fw(uncore, wa->reg, wa->mask, wa->val);
930         }
931
932         intel_uncore_forcewake_put__locked(uncore, fw);
933         spin_unlock_irqrestore(&uncore->lock, flags);
934 }
935
936 void intel_gt_apply_workarounds(struct drm_i915_private *i915)
937 {
938         wa_list_apply(&i915->uncore, &i915->gt_wa_list);
939 }
940
941 static bool
942 wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
943 {
944         if ((cur ^ wa->val) & wa->mask) {
945                 DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n",
946                           name, from, i915_mmio_reg_offset(wa->reg), cur,
947                           cur & wa->mask, wa->val, wa->mask);
948
949                 return false;
950         }
951
952         return true;
953 }
954
955 static bool wa_list_verify(struct intel_uncore *uncore,
956                            const struct i915_wa_list *wal,
957                            const char *from)
958 {
959         struct i915_wa *wa;
960         unsigned int i;
961         bool ok = true;
962
963         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
964                 ok &= wa_verify(wa,
965                                 intel_uncore_read(uncore, wa->reg),
966                                 wal->name, from);
967
968         return ok;
969 }
970
971 bool intel_gt_verify_workarounds(struct drm_i915_private *i915,
972                                  const char *from)
973 {
974         return wa_list_verify(&i915->uncore, &i915->gt_wa_list, from);
975 }
976
977 static void
978 whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
979 {
980         struct i915_wa wa = {
981                 .reg = reg
982         };
983
984         if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
985                 return;
986
987         _wa_add(wal, &wa);
988 }
989
990 static void gen9_whitelist_build(struct i915_wa_list *w)
991 {
992         /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
993         whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
994
995         /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
996         whitelist_reg(w, GEN8_CS_CHICKEN1);
997
998         /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
999         whitelist_reg(w, GEN8_HDC_CHICKEN1);
1000 }
1001
1002 static void skl_whitelist_build(struct i915_wa_list *w)
1003 {
1004         gen9_whitelist_build(w);
1005
1006         /* WaDisableLSQCROPERFforOCL:skl */
1007         whitelist_reg(w, GEN8_L3SQCREG4);
1008 }
1009
1010 static void bxt_whitelist_build(struct i915_wa_list *w)
1011 {
1012         gen9_whitelist_build(w);
1013 }
1014
1015 static void kbl_whitelist_build(struct i915_wa_list *w)
1016 {
1017         gen9_whitelist_build(w);
1018
1019         /* WaDisableLSQCROPERFforOCL:kbl */
1020         whitelist_reg(w, GEN8_L3SQCREG4);
1021 }
1022
1023 static void glk_whitelist_build(struct i915_wa_list *w)
1024 {
1025         gen9_whitelist_build(w);
1026
1027         /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
1028         whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1029 }
1030
1031 static void cfl_whitelist_build(struct i915_wa_list *w)
1032 {
1033         gen9_whitelist_build(w);
1034 }
1035
1036 static void cnl_whitelist_build(struct i915_wa_list *w)
1037 {
1038         /* WaEnablePreemptionGranularityControlByUMD:cnl */
1039         whitelist_reg(w, GEN8_CS_CHICKEN1);
1040 }
1041
1042 static void icl_whitelist_build(struct i915_wa_list *w)
1043 {
1044         /* WaAllowUMDToModifyHalfSliceChicken7:icl */
1045         whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7);
1046
1047         /* WaAllowUMDToModifySamplerMode:icl */
1048         whitelist_reg(w, GEN10_SAMPLER_MODE);
1049
1050         /* WaEnableStateCacheRedirectToCS:icl */
1051         whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1052 }
1053
1054 void intel_engine_init_whitelist(struct intel_engine_cs *engine)
1055 {
1056         struct drm_i915_private *i915 = engine->i915;
1057         struct i915_wa_list *w = &engine->whitelist;
1058
1059         GEM_BUG_ON(engine->id != RCS0);
1060
1061         wa_init_start(w, "whitelist");
1062
1063         if (IS_GEN(i915, 11))
1064                 icl_whitelist_build(w);
1065         else if (IS_CANNONLAKE(i915))
1066                 cnl_whitelist_build(w);
1067         else if (IS_COFFEELAKE(i915))
1068                 cfl_whitelist_build(w);
1069         else if (IS_GEMINILAKE(i915))
1070                 glk_whitelist_build(w);
1071         else if (IS_KABYLAKE(i915))
1072                 kbl_whitelist_build(w);
1073         else if (IS_BROXTON(i915))
1074                 bxt_whitelist_build(w);
1075         else if (IS_SKYLAKE(i915))
1076                 skl_whitelist_build(w);
1077         else if (INTEL_GEN(i915) <= 8)
1078                 return;
1079         else
1080                 MISSING_CASE(INTEL_GEN(i915));
1081
1082         wa_init_finish(w);
1083 }
1084
1085 void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
1086 {
1087         const struct i915_wa_list *wal = &engine->whitelist;
1088         struct intel_uncore *uncore = engine->uncore;
1089         const u32 base = engine->mmio_base;
1090         struct i915_wa *wa;
1091         unsigned int i;
1092
1093         if (!wal->count)
1094                 return;
1095
1096         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1097                 intel_uncore_write(uncore,
1098                                    RING_FORCE_TO_NONPRIV(base, i),
1099                                    i915_mmio_reg_offset(wa->reg));
1100
1101         /* And clear the rest just in case of garbage */
1102         for (; i < RING_MAX_NONPRIV_SLOTS; i++)
1103                 intel_uncore_write(uncore,
1104                                    RING_FORCE_TO_NONPRIV(base, i),
1105                                    i915_mmio_reg_offset(RING_NOPID(base)));
1106 }
1107
1108 static void
1109 rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1110 {
1111         struct drm_i915_private *i915 = engine->i915;
1112
1113         if (IS_GEN(i915, 11)) {
1114                 /* This is not an Wa. Enable for better image quality */
1115                 wa_masked_en(wal,
1116                              _3D_CHICKEN3,
1117                              _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
1118
1119                 /* WaPipelineFlushCoherentLines:icl */
1120                 wa_write_or(wal,
1121                             GEN8_L3SQCREG4,
1122                             GEN8_LQSC_FLUSH_COHERENT_LINES);
1123
1124                 /*
1125                  * Wa_1405543622:icl
1126                  * Formerly known as WaGAPZPriorityScheme
1127                  */
1128                 wa_write_or(wal,
1129                             GEN8_GARBCNTL,
1130                             GEN11_ARBITRATION_PRIO_ORDER_MASK);
1131
1132                 /*
1133                  * Wa_1604223664:icl
1134                  * Formerly known as WaL3BankAddressHashing
1135                  */
1136                 wa_write_masked_or(wal,
1137                                    GEN8_GARBCNTL,
1138                                    GEN11_HASH_CTRL_EXCL_MASK,
1139                                    GEN11_HASH_CTRL_EXCL_BIT0);
1140                 wa_write_masked_or(wal,
1141                                    GEN11_GLBLINVL,
1142                                    GEN11_BANK_HASH_ADDR_EXCL_MASK,
1143                                    GEN11_BANK_HASH_ADDR_EXCL_BIT0);
1144
1145                 /*
1146                  * Wa_1405733216:icl
1147                  * Formerly known as WaDisableCleanEvicts
1148                  */
1149                 wa_write_or(wal,
1150                             GEN8_L3SQCREG4,
1151                             GEN11_LQSC_CLEAN_EVICT_DISABLE);
1152
1153                 /* WaForwardProgressSoftReset:icl */
1154                 wa_write_or(wal,
1155                             GEN10_SCRATCH_LNCF2,
1156                             PMFLUSHDONE_LNICRSDROP |
1157                             PMFLUSH_GAPL3UNBLOCK |
1158                             PMFLUSHDONE_LNEBLK);
1159
1160                 /* Wa_1406609255:icl (pre-prod) */
1161                 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
1162                         wa_write_or(wal,
1163                                     GEN7_SARCHKMD,
1164                                     GEN7_DISABLE_DEMAND_PREFETCH |
1165                                     GEN7_DISABLE_SAMPLER_PREFETCH);
1166         }
1167
1168         if (IS_GEN_RANGE(i915, 9, 11)) {
1169                 /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl */
1170                 wa_masked_en(wal,
1171                              GEN7_FF_SLICE_CS_CHICKEN1,
1172                              GEN9_FFSC_PERCTX_PREEMPT_CTRL);
1173         }
1174
1175         if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) {
1176                 /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
1177                 wa_write_or(wal,
1178                             GEN8_GARBCNTL,
1179                             GEN9_GAPS_TSV_CREDIT_DISABLE);
1180         }
1181
1182         if (IS_BROXTON(i915)) {
1183                 /* WaDisablePooledEuLoadBalancingFix:bxt */
1184                 wa_masked_en(wal,
1185                              FF_SLICE_CS_CHICKEN2,
1186                              GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
1187         }
1188
1189         if (IS_GEN(i915, 9)) {
1190                 /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
1191                 wa_masked_en(wal,
1192                              GEN9_CSFE_CHICKEN1_RCS,
1193                              GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
1194
1195                 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
1196                 wa_write_or(wal,
1197                             BDW_SCRATCH1,
1198                             GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
1199
1200                 /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
1201                 if (IS_GEN9_LP(i915))
1202                         wa_write_masked_or(wal,
1203                                            GEN8_L3SQCREG1,
1204                                            L3_PRIO_CREDITS_MASK,
1205                                            L3_GENERAL_PRIO_CREDITS(62) |
1206                                            L3_HIGH_PRIO_CREDITS(2));
1207
1208                 /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
1209                 wa_write_or(wal,
1210                             GEN8_L3SQCREG4,
1211                             GEN8_LQSC_FLUSH_COHERENT_LINES);
1212         }
1213 }
1214
1215 static void
1216 xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1217 {
1218         struct drm_i915_private *i915 = engine->i915;
1219
1220         /* WaKBLVECSSemaphoreWaitPoll:kbl */
1221         if (IS_KBL_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) {
1222                 wa_write(wal,
1223                          RING_SEMA_WAIT_POLL(engine->mmio_base),
1224                          1);
1225         }
1226 }
1227
1228 static void
1229 engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1230 {
1231         if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 8))
1232                 return;
1233
1234         if (engine->id == RCS0)
1235                 rcs_engine_wa_init(engine, wal);
1236         else
1237                 xcs_engine_wa_init(engine, wal);
1238 }
1239
1240 void intel_engine_init_workarounds(struct intel_engine_cs *engine)
1241 {
1242         struct i915_wa_list *wal = &engine->wa_list;
1243
1244         if (GEM_WARN_ON(INTEL_GEN(engine->i915) < 8))
1245                 return;
1246
1247         wa_init_start(wal, engine->name);
1248         engine_init_workarounds(engine, wal);
1249         wa_init_finish(wal);
1250 }
1251
1252 void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
1253 {
1254         wa_list_apply(engine->uncore, &engine->wa_list);
1255 }
1256
1257 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1258 #include "selftests/intel_workarounds.c"
1259 #endif