e758bbf5061767a22e6f722d5e661b647729920b
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / intel_workarounds.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2014-2018 Intel Corporation
5  */
6
7 #include "i915_drv.h"
8 #include "intel_workarounds.h"
9
10 /**
11  * DOC: Hardware workarounds
12  *
13  * This file is intended as a central place to implement most [1]_ of the
14  * required workarounds for hardware to work as originally intended. They fall
15  * in five basic categories depending on how/when they are applied:
16  *
17  * - Workarounds that touch registers that are saved/restored to/from the HW
18  *   context image. The list is emitted (via Load Register Immediate commands)
19  *   everytime a new context is created.
20  * - GT workarounds. The list of these WAs is applied whenever these registers
21  *   revert to default values (on GPU reset, suspend/resume [2]_, etc..).
22  * - Display workarounds. The list is applied during display clock-gating
23  *   initialization.
24  * - Workarounds that whitelist a privileged register, so that UMDs can manage
25  *   them directly. This is just a special case of a MMMIO workaround (as we
26  *   write the list of these to/be-whitelisted registers to some special HW
27  *   registers).
28  * - Workaround batchbuffers, that get executed automatically by the hardware
29  *   on every HW context restore.
30  *
31  * .. [1] Please notice that there are other WAs that, due to their nature,
32  *    cannot be applied from a central place. Those are peppered around the rest
33  *    of the code, as needed.
34  *
35  * .. [2] Technically, some registers are powercontext saved & restored, so they
36  *    survive a suspend/resume. In practice, writing them again is not too
37  *    costly and simplifies things. We can revisit this in the future.
38  *
39  * Layout
40  * ''''''
41  *
42  * Keep things in this file ordered by WA type, as per the above (context, GT,
43  * display, register whitelist, batchbuffer). Then, inside each type, keep the
44  * following order:
45  *
46  * - Infrastructure functions and macros
47  * - WAs per platform in standard gen/chrono order
48  * - Public functions to init or apply the given workaround type.
49  */
50
51 static void wa_init_start(struct i915_wa_list *wal, const char *name)
52 {
53         wal->name = name;
54 }
55
56 #define WA_LIST_CHUNK (1 << 4)
57
58 static void wa_init_finish(struct i915_wa_list *wal)
59 {
60         /* Trim unused entries. */
61         if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
62                 struct i915_wa *list = kmemdup(wal->list,
63                                                wal->count * sizeof(*list),
64                                                GFP_KERNEL);
65
66                 if (list) {
67                         kfree(wal->list);
68                         wal->list = list;
69                 }
70         }
71
72         if (!wal->count)
73                 return;
74
75         DRM_DEBUG_DRIVER("Initialized %u %s workarounds\n",
76                          wal->wa_count, wal->name);
77 }
78
79 static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
80 {
81         unsigned int addr = i915_mmio_reg_offset(wa->reg);
82         unsigned int start = 0, end = wal->count;
83         const unsigned int grow = WA_LIST_CHUNK;
84         struct i915_wa *wa_;
85
86         GEM_BUG_ON(!is_power_of_2(grow));
87
88         if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
89                 struct i915_wa *list;
90
91                 list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
92                                      GFP_KERNEL);
93                 if (!list) {
94                         DRM_ERROR("No space for workaround init!\n");
95                         return;
96                 }
97
98                 if (wal->list)
99                         memcpy(list, wal->list, sizeof(*wa) * wal->count);
100
101                 wal->list = list;
102         }
103
104         while (start < end) {
105                 unsigned int mid = start + (end - start) / 2;
106
107                 if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
108                         start = mid + 1;
109                 } else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
110                         end = mid;
111                 } else {
112                         wa_ = &wal->list[mid];
113
114                         if ((wa->mask & ~wa_->mask) == 0) {
115                                 DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n",
116                                           i915_mmio_reg_offset(wa_->reg),
117                                           wa_->mask, wa_->val);
118
119                                 wa_->val &= ~wa->mask;
120                         }
121
122                         wal->wa_count++;
123                         wa_->val |= wa->val;
124                         wa_->mask |= wa->mask;
125                         return;
126                 }
127         }
128
129         wal->wa_count++;
130         wa_ = &wal->list[wal->count++];
131         *wa_ = *wa;
132
133         while (wa_-- > wal->list) {
134                 GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) ==
135                            i915_mmio_reg_offset(wa_[1].reg));
136                 if (i915_mmio_reg_offset(wa_[1].reg) >
137                     i915_mmio_reg_offset(wa_[0].reg))
138                         break;
139
140                 swap(wa_[1], wa_[0]);
141         }
142 }
143
144 static void
145 wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
146                    u32 val)
147 {
148         struct i915_wa wa = {
149                 .reg = reg,
150                 .mask = mask,
151                 .val = val
152         };
153
154         _wa_add(wal, &wa);
155 }
156
157 static void
158 wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
159 {
160         wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val));
161 }
162
163 static void
164 wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
165 {
166         wa_write_masked_or(wal, reg, ~0, val);
167 }
168
169 static void
170 wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
171 {
172         wa_write_masked_or(wal, reg, val, val);
173 }
174
175 #define WA_SET_BIT_MASKED(addr, mask) \
176         wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask))
177
178 #define WA_CLR_BIT_MASKED(addr, mask) \
179         wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask))
180
181 #define WA_SET_FIELD_MASKED(addr, mask, value) \
182         wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value)))
183
184 static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine)
185 {
186         struct i915_wa_list *wal = &engine->ctx_wa_list;
187
188         WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
189
190         /* WaDisableAsyncFlipPerfMode:bdw,chv */
191         WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
192
193         /* WaDisablePartialInstShootdown:bdw,chv */
194         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
195                           PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
196
197         /* Use Force Non-Coherent whenever executing a 3D context. This is a
198          * workaround for for a possible hang in the unlikely event a TLB
199          * invalidation occurs during a PSD flush.
200          */
201         /* WaForceEnableNonCoherent:bdw,chv */
202         /* WaHdcDisableFetchWhenMasked:bdw,chv */
203         WA_SET_BIT_MASKED(HDC_CHICKEN0,
204                           HDC_DONOT_FETCH_MEM_WHEN_MASKED |
205                           HDC_FORCE_NON_COHERENT);
206
207         /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
208          * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
209          *  polygons in the same 8x4 pixel/sample area to be processed without
210          *  stalling waiting for the earlier ones to write to Hierarchical Z
211          *  buffer."
212          *
213          * This optimization is off by default for BDW and CHV; turn it on.
214          */
215         WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
216
217         /* Wa4x4STCOptimizationDisable:bdw,chv */
218         WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
219
220         /*
221          * BSpec recommends 8x4 when MSAA is used,
222          * however in practice 16x4 seems fastest.
223          *
224          * Note that PS/WM thread counts depend on the WIZ hashing
225          * disable bit, which we don't touch here, but it's good
226          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
227          */
228         WA_SET_FIELD_MASKED(GEN7_GT_MODE,
229                             GEN6_WIZ_HASHING_MASK,
230                             GEN6_WIZ_HASHING_16x4);
231 }
232
233 static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine)
234 {
235         struct drm_i915_private *i915 = engine->i915;
236         struct i915_wa_list *wal = &engine->ctx_wa_list;
237
238         gen8_ctx_workarounds_init(engine);
239
240         /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
241         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
242
243         /* WaDisableDopClockGating:bdw
244          *
245          * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
246          * to disable EUTC clock gating.
247          */
248         WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
249                           DOP_CLOCK_GATING_DISABLE);
250
251         WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
252                           GEN8_SAMPLER_POWER_BYPASS_DIS);
253
254         WA_SET_BIT_MASKED(HDC_CHICKEN0,
255                           /* WaForceContextSaveRestoreNonCoherent:bdw */
256                           HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
257                           /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
258                           (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
259 }
260
261 static void chv_ctx_workarounds_init(struct intel_engine_cs *engine)
262 {
263         struct i915_wa_list *wal = &engine->ctx_wa_list;
264
265         gen8_ctx_workarounds_init(engine);
266
267         /* WaDisableThreadStallDopClockGating:chv */
268         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
269
270         /* Improve HiZ throughput on CHV. */
271         WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
272 }
273
274 static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine)
275 {
276         struct drm_i915_private *i915 = engine->i915;
277         struct i915_wa_list *wal = &engine->ctx_wa_list;
278
279         if (HAS_LLC(i915)) {
280                 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
281                  *
282                  * Must match Display Engine. See
283                  * WaCompressedResourceDisplayNewHashMode.
284                  */
285                 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
286                                   GEN9_PBE_COMPRESSED_HASH_SELECTION);
287                 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
288                                   GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
289         }
290
291         /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
292         /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
293         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
294                           FLOW_CONTROL_ENABLE |
295                           PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
296
297         /* Syncing dependencies between camera and graphics:skl,bxt,kbl */
298         if (!IS_COFFEELAKE(i915))
299                 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
300                                   GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
301
302         /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
303         /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
304         WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
305                           GEN9_ENABLE_YV12_BUGFIX |
306                           GEN9_ENABLE_GPGPU_PREEMPTION);
307
308         /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
309         /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
310         WA_SET_BIT_MASKED(CACHE_MODE_1,
311                           GEN8_4x4_STC_OPTIMIZATION_DISABLE |
312                           GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
313
314         /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
315         WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
316                           GEN9_CCS_TLB_PREFETCH_ENABLE);
317
318         /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
319         WA_SET_BIT_MASKED(HDC_CHICKEN0,
320                           HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
321                           HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
322
323         /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
324          * both tied to WaForceContextSaveRestoreNonCoherent
325          * in some hsds for skl. We keep the tie for all gen9. The
326          * documentation is a bit hazy and so we want to get common behaviour,
327          * even though there is no clear evidence we would need both on kbl/bxt.
328          * This area has been source of system hangs so we play it safe
329          * and mimic the skl regardless of what bspec says.
330          *
331          * Use Force Non-Coherent whenever executing a 3D context. This
332          * is a workaround for a possible hang in the unlikely event
333          * a TLB invalidation occurs during a PSD flush.
334          */
335
336         /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
337         WA_SET_BIT_MASKED(HDC_CHICKEN0,
338                           HDC_FORCE_NON_COHERENT);
339
340         /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
341         if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915))
342                 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
343                                   GEN8_SAMPLER_POWER_BYPASS_DIS);
344
345         /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
346         WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
347
348         /*
349          * Supporting preemption with fine-granularity requires changes in the
350          * batch buffer programming. Since we can't break old userspace, we
351          * need to set our default preemption level to safe value. Userspace is
352          * still able to use more fine-grained preemption levels, since in
353          * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
354          * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
355          * not real HW workarounds, but merely a way to start using preemption
356          * while maintaining old contract with userspace.
357          */
358
359         /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
360         WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
361
362         /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
363         WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
364                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
365                             GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
366
367         /* WaClearHIZ_WM_CHICKEN3:bxt,glk */
368         if (IS_GEN9_LP(i915))
369                 WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
370 }
371
372 static void skl_tune_iz_hashing(struct intel_engine_cs *engine)
373 {
374         struct drm_i915_private *i915 = engine->i915;
375         struct i915_wa_list *wal = &engine->ctx_wa_list;
376         u8 vals[3] = { 0, 0, 0 };
377         unsigned int i;
378
379         for (i = 0; i < 3; i++) {
380                 u8 ss;
381
382                 /*
383                  * Only consider slices where one, and only one, subslice has 7
384                  * EUs
385                  */
386                 if (!is_power_of_2(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]))
387                         continue;
388
389                 /*
390                  * subslice_7eu[i] != 0 (because of the check above) and
391                  * ss_max == 4 (maximum number of subslices possible per slice)
392                  *
393                  * ->    0 <= ss <= 3;
394                  */
395                 ss = ffs(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]) - 1;
396                 vals[i] = 3 - ss;
397         }
398
399         if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
400                 return;
401
402         /* Tune IZ hashing. See intel_device_info_runtime_init() */
403         WA_SET_FIELD_MASKED(GEN7_GT_MODE,
404                             GEN9_IZ_HASHING_MASK(2) |
405                             GEN9_IZ_HASHING_MASK(1) |
406                             GEN9_IZ_HASHING_MASK(0),
407                             GEN9_IZ_HASHING(2, vals[2]) |
408                             GEN9_IZ_HASHING(1, vals[1]) |
409                             GEN9_IZ_HASHING(0, vals[0]));
410 }
411
412 static void skl_ctx_workarounds_init(struct intel_engine_cs *engine)
413 {
414         gen9_ctx_workarounds_init(engine);
415         skl_tune_iz_hashing(engine);
416 }
417
418 static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine)
419 {
420         struct i915_wa_list *wal = &engine->ctx_wa_list;
421
422         gen9_ctx_workarounds_init(engine);
423
424         /* WaDisableThreadStallDopClockGating:bxt */
425         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
426                           STALL_DOP_GATING_DISABLE);
427
428         /* WaToEnableHwFixForPushConstHWBug:bxt */
429         WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
430                           GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
431 }
432
433 static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine)
434 {
435         struct drm_i915_private *i915 = engine->i915;
436         struct i915_wa_list *wal = &engine->ctx_wa_list;
437
438         gen9_ctx_workarounds_init(engine);
439
440         /* WaToEnableHwFixForPushConstHWBug:kbl */
441         if (IS_KBL_REVID(i915, KBL_REVID_C0, REVID_FOREVER))
442                 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
443                                   GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
444
445         /* WaDisableSbeCacheDispatchPortSharing:kbl */
446         WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
447                           GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
448 }
449
450 static void glk_ctx_workarounds_init(struct intel_engine_cs *engine)
451 {
452         struct i915_wa_list *wal = &engine->ctx_wa_list;
453
454         gen9_ctx_workarounds_init(engine);
455
456         /* WaToEnableHwFixForPushConstHWBug:glk */
457         WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
458                           GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
459 }
460
461 static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine)
462 {
463         struct i915_wa_list *wal = &engine->ctx_wa_list;
464
465         gen9_ctx_workarounds_init(engine);
466
467         /* WaToEnableHwFixForPushConstHWBug:cfl */
468         WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
469                           GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
470
471         /* WaDisableSbeCacheDispatchPortSharing:cfl */
472         WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
473                           GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
474 }
475
476 static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine)
477 {
478         struct drm_i915_private *i915 = engine->i915;
479         struct i915_wa_list *wal = &engine->ctx_wa_list;
480
481         /* WaForceContextSaveRestoreNonCoherent:cnl */
482         WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
483                           HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
484
485         /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
486         if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
487                 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);
488
489         /* WaDisableReplayBufferBankArbitrationOptimization:cnl */
490         WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
491                           GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
492
493         /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
494         if (IS_CNL_REVID(i915, 0, CNL_REVID_B0))
495                 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
496                                   GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
497
498         /* WaPushConstantDereferenceHoldDisable:cnl */
499         WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
500
501         /* FtrEnableFastAnisoL1BankingFix:cnl */
502         WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
503
504         /* WaDisable3DMidCmdPreemption:cnl */
505         WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
506
507         /* WaDisableGPGPUMidCmdPreemption:cnl */
508         WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
509                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
510                             GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
511
512         /* WaDisableEarlyEOT:cnl */
513         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
514 }
515
516 static void icl_ctx_workarounds_init(struct intel_engine_cs *engine)
517 {
518         struct drm_i915_private *i915 = engine->i915;
519         struct i915_wa_list *wal = &engine->ctx_wa_list;
520
521         /* Wa_1604370585:icl (pre-prod)
522          * Formerly known as WaPushConstantDereferenceHoldDisable
523          */
524         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
525                 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
526                                   PUSH_CONSTANT_DEREF_DISABLE);
527
528         /* WaForceEnableNonCoherent:icl
529          * This is not the same workaround as in early Gen9 platforms, where
530          * lacking this could cause system hangs, but coherency performance
531          * overhead is high and only a few compute workloads really need it
532          * (the register is whitelisted in hardware now, so UMDs can opt in
533          * for coherency if they have a good reason).
534          */
535         WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
536
537         /* Wa_2006611047:icl (pre-prod)
538          * Formerly known as WaDisableImprovedTdlClkGating
539          */
540         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
541                 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
542                                   GEN11_TDL_CLOCK_GATING_FIX_DISABLE);
543
544         /* WaEnableStateCacheRedirectToCS:icl */
545         WA_SET_BIT_MASKED(GEN9_SLICE_COMMON_ECO_CHICKEN1,
546                           GEN11_STATE_CACHE_REDIRECT_TO_CS);
547
548         /* Wa_2006665173:icl (pre-prod) */
549         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
550                 WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
551                                   GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC);
552
553         /* WaEnableFloatBlendOptimization:icl */
554         wa_write_masked_or(wal,
555                            GEN10_CACHE_MODE_SS,
556                            0, /* write-only, so skip validation */
557                            _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE));
558
559         /* WaDisableGPGPUMidThreadPreemption:icl */
560         WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
561                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
562                             GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
563 }
564
565 void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
566 {
567         struct drm_i915_private *i915 = engine->i915;
568         struct i915_wa_list *wal = &engine->ctx_wa_list;
569
570         wa_init_start(wal, "context");
571
572         if (IS_ICELAKE(i915))
573                 icl_ctx_workarounds_init(engine);
574         else if (IS_CANNONLAKE(i915))
575                 cnl_ctx_workarounds_init(engine);
576         else if (IS_COFFEELAKE(i915))
577                 cfl_ctx_workarounds_init(engine);
578         else if (IS_GEMINILAKE(i915))
579                 glk_ctx_workarounds_init(engine);
580         else if (IS_KABYLAKE(i915))
581                 kbl_ctx_workarounds_init(engine);
582         else if (IS_BROXTON(i915))
583                 bxt_ctx_workarounds_init(engine);
584         else if (IS_SKYLAKE(i915))
585                 skl_ctx_workarounds_init(engine);
586         else if (IS_CHERRYVIEW(i915))
587                 chv_ctx_workarounds_init(engine);
588         else if (IS_BROADWELL(i915))
589                 bdw_ctx_workarounds_init(engine);
590         else if (INTEL_GEN(i915) < 8)
591                 return;
592         else
593                 MISSING_CASE(INTEL_GEN(i915));
594
595         wa_init_finish(wal);
596 }
597
598 int intel_engine_emit_ctx_wa(struct i915_request *rq)
599 {
600         struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
601         struct i915_wa *wa;
602         unsigned int i;
603         u32 *cs;
604         int ret;
605
606         if (wal->count == 0)
607                 return 0;
608
609         ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
610         if (ret)
611                 return ret;
612
613         cs = intel_ring_begin(rq, (wal->count * 2 + 2));
614         if (IS_ERR(cs))
615                 return PTR_ERR(cs);
616
617         *cs++ = MI_LOAD_REGISTER_IMM(wal->count);
618         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
619                 *cs++ = i915_mmio_reg_offset(wa->reg);
620                 *cs++ = wa->val;
621         }
622         *cs++ = MI_NOOP;
623
624         intel_ring_advance(rq, cs);
625
626         ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
627         if (ret)
628                 return ret;
629
630         return 0;
631 }
632
633 static void
634 gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
635 {
636         /* WaDisableKillLogic:bxt,skl,kbl */
637         if (!IS_COFFEELAKE(i915))
638                 wa_write_or(wal,
639                             GAM_ECOCHK,
640                             ECOCHK_DIS_TLB);
641
642         if (HAS_LLC(i915)) {
643                 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
644                  *
645                  * Must match Display Engine. See
646                  * WaCompressedResourceDisplayNewHashMode.
647                  */
648                 wa_write_or(wal,
649                             MMCD_MISC_CTRL,
650                             MMCD_PCLA | MMCD_HOTSPOT_EN);
651         }
652
653         /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
654         wa_write_or(wal,
655                     GAM_ECOCHK,
656                     BDW_DISABLE_HDC_INVALIDATION);
657 }
658
659 static void
660 skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
661 {
662         gen9_gt_workarounds_init(i915, wal);
663
664         /* WaDisableGafsUnitClkGating:skl */
665         wa_write_or(wal,
666                     GEN7_UCGCTL4,
667                     GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
668
669         /* WaInPlaceDecompressionHang:skl */
670         if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER))
671                 wa_write_or(wal,
672                             GEN9_GAMT_ECO_REG_RW_IA,
673                             GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
674 }
675
676 static void
677 bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
678 {
679         gen9_gt_workarounds_init(i915, wal);
680
681         /* WaInPlaceDecompressionHang:bxt */
682         wa_write_or(wal,
683                     GEN9_GAMT_ECO_REG_RW_IA,
684                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
685 }
686
687 static void
688 kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
689 {
690         gen9_gt_workarounds_init(i915, wal);
691
692         /* WaDisableDynamicCreditSharing:kbl */
693         if (IS_KBL_REVID(i915, 0, KBL_REVID_B0))
694                 wa_write_or(wal,
695                             GAMT_CHKN_BIT_REG,
696                             GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
697
698         /* WaDisableGafsUnitClkGating:kbl */
699         wa_write_or(wal,
700                     GEN7_UCGCTL4,
701                     GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
702
703         /* WaInPlaceDecompressionHang:kbl */
704         wa_write_or(wal,
705                     GEN9_GAMT_ECO_REG_RW_IA,
706                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
707 }
708
709 static void
710 glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
711 {
712         gen9_gt_workarounds_init(i915, wal);
713 }
714
715 static void
716 cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
717 {
718         gen9_gt_workarounds_init(i915, wal);
719
720         /* WaDisableGafsUnitClkGating:cfl */
721         wa_write_or(wal,
722                     GEN7_UCGCTL4,
723                     GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
724
725         /* WaInPlaceDecompressionHang:cfl */
726         wa_write_or(wal,
727                     GEN9_GAMT_ECO_REG_RW_IA,
728                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
729 }
730
731 static void
732 wa_init_mcr(struct drm_i915_private *dev_priv, struct i915_wa_list *wal)
733 {
734         const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
735         u32 mcr_slice_subslice_mask;
736
737         /*
738          * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl
739          * L3Banks could be fused off in single slice scenario. If that is
740          * the case, we might need to program MCR select to a valid L3Bank
741          * by default, to make sure we correctly read certain registers
742          * later on (in the range 0xB100 - 0xB3FF).
743          * This might be incompatible with
744          * WaProgramMgsrForCorrectSliceSpecificMmioReads.
745          * Fortunately, this should not happen in production hardware, so
746          * we only assert that this is the case (instead of implementing
747          * something more complex that requires checking the range of every
748          * MMIO read).
749          */
750         if (INTEL_GEN(dev_priv) >= 10 &&
751             is_power_of_2(sseu->slice_mask)) {
752                 /*
753                  * read FUSE3 for enabled L3 Bank IDs, if L3 Bank matches
754                  * enabled subslice, no need to redirect MCR packet
755                  */
756                 u32 slice = fls(sseu->slice_mask);
757                 u32 fuse3 = I915_READ(GEN10_MIRROR_FUSE3);
758                 u8 ss_mask = sseu->subslice_mask[slice];
759
760                 u8 enabled_mask = (ss_mask | ss_mask >>
761                                    GEN10_L3BANK_PAIR_COUNT) & GEN10_L3BANK_MASK;
762                 u8 disabled_mask = fuse3 & GEN10_L3BANK_MASK;
763
764                 /*
765                  * Production silicon should have matched L3Bank and
766                  * subslice enabled
767                  */
768                 WARN_ON((enabled_mask & disabled_mask) != enabled_mask);
769         }
770
771         if (INTEL_GEN(dev_priv) >= 11)
772                 mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK |
773                                           GEN11_MCR_SUBSLICE_MASK;
774         else
775                 mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK |
776                                           GEN8_MCR_SUBSLICE_MASK;
777         /*
778          * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl
779          * Before any MMIO read into slice/subslice specific registers, MCR
780          * packet control register needs to be programmed to point to any
781          * enabled s/ss pair. Otherwise, incorrect values will be returned.
782          * This means each subsequent MMIO read will be forwarded to an
783          * specific s/ss combination, but this is OK since these registers
784          * are consistent across s/ss in almost all cases. In the rare
785          * occasions, such as INSTDONE, where this value is dependent
786          * on s/ss combo, the read should be done with read_subslice_reg.
787          */
788         wa_write_masked_or(wal,
789                            GEN8_MCR_SELECTOR,
790                            mcr_slice_subslice_mask,
791                            intel_calculate_mcr_s_ss_select(dev_priv));
792 }
793
794 static void
795 cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
796 {
797         wa_init_mcr(i915, wal);
798
799         /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
800         if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
801                 wa_write_or(wal,
802                             GAMT_CHKN_BIT_REG,
803                             GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT);
804
805         /* WaInPlaceDecompressionHang:cnl */
806         wa_write_or(wal,
807                     GEN9_GAMT_ECO_REG_RW_IA,
808                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
809 }
810
811 static void
812 icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
813 {
814         wa_init_mcr(i915, wal);
815
816         /* WaInPlaceDecompressionHang:icl */
817         wa_write_or(wal,
818                     GEN9_GAMT_ECO_REG_RW_IA,
819                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
820
821         /* WaModifyGamTlbPartitioning:icl */
822         wa_write_masked_or(wal,
823                            GEN11_GACB_PERF_CTRL,
824                            GEN11_HASH_CTRL_MASK,
825                            GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
826
827         /* Wa_1405766107:icl
828          * Formerly known as WaCL2SFHalfMaxAlloc
829          */
830         wa_write_or(wal,
831                     GEN11_LSN_UNSLCVC,
832                     GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
833                     GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
834
835         /* Wa_220166154:icl
836          * Formerly known as WaDisCtxReload
837          */
838         wa_write_or(wal,
839                     GEN8_GAMW_ECO_DEV_RW_IA,
840                     GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
841
842         /* Wa_1405779004:icl (pre-prod) */
843         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
844                 wa_write_or(wal,
845                             SLICE_UNIT_LEVEL_CLKGATE,
846                             MSCUNIT_CLKGATE_DIS);
847
848         /* Wa_1406680159:icl */
849         wa_write_or(wal,
850                     SUBSLICE_UNIT_LEVEL_CLKGATE,
851                     GWUNIT_CLKGATE_DIS);
852
853         /* Wa_1406838659:icl (pre-prod) */
854         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
855                 wa_write_or(wal,
856                             INF_UNIT_LEVEL_CLKGATE,
857                             CGPSF_CLKGATE_DIS);
858
859         /* Wa_1406463099:icl
860          * Formerly known as WaGamTlbPendError
861          */
862         wa_write_or(wal,
863                     GAMT_CHKN_BIT_REG,
864                     GAMT_CHKN_DISABLE_L3_COH_PIPE);
865 }
866
867 static void
868 gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
869 {
870         if (IS_ICELAKE(i915))
871                 icl_gt_workarounds_init(i915, wal);
872         else if (IS_CANNONLAKE(i915))
873                 cnl_gt_workarounds_init(i915, wal);
874         else if (IS_COFFEELAKE(i915))
875                 cfl_gt_workarounds_init(i915, wal);
876         else if (IS_GEMINILAKE(i915))
877                 glk_gt_workarounds_init(i915, wal);
878         else if (IS_KABYLAKE(i915))
879                 kbl_gt_workarounds_init(i915, wal);
880         else if (IS_BROXTON(i915))
881                 bxt_gt_workarounds_init(i915, wal);
882         else if (IS_SKYLAKE(i915))
883                 skl_gt_workarounds_init(i915, wal);
884         else if (INTEL_GEN(i915) <= 8)
885                 return;
886         else
887                 MISSING_CASE(INTEL_GEN(i915));
888 }
889
890 void intel_gt_init_workarounds(struct drm_i915_private *i915)
891 {
892         struct i915_wa_list *wal = &i915->gt_wa_list;
893
894         wa_init_start(wal, "GT");
895         gt_init_workarounds(i915, wal);
896         wa_init_finish(wal);
897 }
898
899 static enum forcewake_domains
900 wal_get_fw_for_rmw(struct drm_i915_private *dev_priv,
901                    const struct i915_wa_list *wal)
902 {
903         enum forcewake_domains fw = 0;
904         struct i915_wa *wa;
905         unsigned int i;
906
907         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
908                 fw |= intel_uncore_forcewake_for_reg(dev_priv,
909                                                      wa->reg,
910                                                      FW_REG_READ |
911                                                      FW_REG_WRITE);
912
913         return fw;
914 }
915
916 static void
917 wa_list_apply(struct drm_i915_private *dev_priv, const struct i915_wa_list *wal)
918 {
919         enum forcewake_domains fw;
920         unsigned long flags;
921         struct i915_wa *wa;
922         unsigned int i;
923
924         if (!wal->count)
925                 return;
926
927         fw = wal_get_fw_for_rmw(dev_priv, wal);
928
929         spin_lock_irqsave(&dev_priv->uncore.lock, flags);
930         intel_uncore_forcewake_get__locked(&dev_priv->uncore, fw);
931
932         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
933                 u32 val = I915_READ_FW(wa->reg);
934
935                 val &= ~wa->mask;
936                 val |= wa->val;
937
938                 I915_WRITE_FW(wa->reg, val);
939         }
940
941         intel_uncore_forcewake_put__locked(&dev_priv->uncore, fw);
942         spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
943 }
944
945 void intel_gt_apply_workarounds(struct drm_i915_private *dev_priv)
946 {
947         wa_list_apply(dev_priv, &dev_priv->gt_wa_list);
948 }
949
950 static bool
951 wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
952 {
953         if ((cur ^ wa->val) & wa->mask) {
954                 DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n",
955                           name, from, i915_mmio_reg_offset(wa->reg), cur,
956                           cur & wa->mask, wa->val, wa->mask);
957
958                 return false;
959         }
960
961         return true;
962 }
963
964 static bool wa_list_verify(struct drm_i915_private *dev_priv,
965                            const struct i915_wa_list *wal,
966                            const char *from)
967 {
968         struct i915_wa *wa;
969         unsigned int i;
970         bool ok = true;
971
972         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
973                 ok &= wa_verify(wa, I915_READ(wa->reg), wal->name, from);
974
975         return ok;
976 }
977
978 bool intel_gt_verify_workarounds(struct drm_i915_private *dev_priv,
979                                  const char *from)
980 {
981         return wa_list_verify(dev_priv, &dev_priv->gt_wa_list, from);
982 }
983
984 static void
985 whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
986 {
987         struct i915_wa wa = {
988                 .reg = reg
989         };
990
991         if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
992                 return;
993
994         _wa_add(wal, &wa);
995 }
996
997 static void gen9_whitelist_build(struct i915_wa_list *w)
998 {
999         /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
1000         whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
1001
1002         /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
1003         whitelist_reg(w, GEN8_CS_CHICKEN1);
1004
1005         /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
1006         whitelist_reg(w, GEN8_HDC_CHICKEN1);
1007 }
1008
1009 static void skl_whitelist_build(struct i915_wa_list *w)
1010 {
1011         gen9_whitelist_build(w);
1012
1013         /* WaDisableLSQCROPERFforOCL:skl */
1014         whitelist_reg(w, GEN8_L3SQCREG4);
1015 }
1016
1017 static void bxt_whitelist_build(struct i915_wa_list *w)
1018 {
1019         gen9_whitelist_build(w);
1020 }
1021
1022 static void kbl_whitelist_build(struct i915_wa_list *w)
1023 {
1024         gen9_whitelist_build(w);
1025
1026         /* WaDisableLSQCROPERFforOCL:kbl */
1027         whitelist_reg(w, GEN8_L3SQCREG4);
1028 }
1029
1030 static void glk_whitelist_build(struct i915_wa_list *w)
1031 {
1032         gen9_whitelist_build(w);
1033
1034         /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
1035         whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1036 }
1037
1038 static void cfl_whitelist_build(struct i915_wa_list *w)
1039 {
1040         gen9_whitelist_build(w);
1041 }
1042
1043 static void cnl_whitelist_build(struct i915_wa_list *w)
1044 {
1045         /* WaEnablePreemptionGranularityControlByUMD:cnl */
1046         whitelist_reg(w, GEN8_CS_CHICKEN1);
1047 }
1048
1049 static void icl_whitelist_build(struct i915_wa_list *w)
1050 {
1051         /* WaAllowUMDToModifyHalfSliceChicken7:icl */
1052         whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7);
1053
1054         /* WaAllowUMDToModifySamplerMode:icl */
1055         whitelist_reg(w, GEN10_SAMPLER_MODE);
1056 }
1057
1058 void intel_engine_init_whitelist(struct intel_engine_cs *engine)
1059 {
1060         struct drm_i915_private *i915 = engine->i915;
1061         struct i915_wa_list *w = &engine->whitelist;
1062
1063         GEM_BUG_ON(engine->id != RCS0);
1064
1065         wa_init_start(w, "whitelist");
1066
1067         if (IS_ICELAKE(i915))
1068                 icl_whitelist_build(w);
1069         else if (IS_CANNONLAKE(i915))
1070                 cnl_whitelist_build(w);
1071         else if (IS_COFFEELAKE(i915))
1072                 cfl_whitelist_build(w);
1073         else if (IS_GEMINILAKE(i915))
1074                 glk_whitelist_build(w);
1075         else if (IS_KABYLAKE(i915))
1076                 kbl_whitelist_build(w);
1077         else if (IS_BROXTON(i915))
1078                 bxt_whitelist_build(w);
1079         else if (IS_SKYLAKE(i915))
1080                 skl_whitelist_build(w);
1081         else if (INTEL_GEN(i915) <= 8)
1082                 return;
1083         else
1084                 MISSING_CASE(INTEL_GEN(i915));
1085
1086         wa_init_finish(w);
1087 }
1088
1089 void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
1090 {
1091         struct drm_i915_private *dev_priv = engine->i915;
1092         const struct i915_wa_list *wal = &engine->whitelist;
1093         const u32 base = engine->mmio_base;
1094         struct i915_wa *wa;
1095         unsigned int i;
1096
1097         if (!wal->count)
1098                 return;
1099
1100         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1101                 I915_WRITE(RING_FORCE_TO_NONPRIV(base, i),
1102                            i915_mmio_reg_offset(wa->reg));
1103
1104         /* And clear the rest just in case of garbage */
1105         for (; i < RING_MAX_NONPRIV_SLOTS; i++)
1106                 I915_WRITE(RING_FORCE_TO_NONPRIV(base, i),
1107                            i915_mmio_reg_offset(RING_NOPID(base)));
1108 }
1109
1110 static void
1111 rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1112 {
1113         struct drm_i915_private *i915 = engine->i915;
1114
1115         if (IS_ICELAKE(i915)) {
1116                 /* This is not an Wa. Enable for better image quality */
1117                 wa_masked_en(wal,
1118                              _3D_CHICKEN3,
1119                              _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
1120
1121                 /* WaPipelineFlushCoherentLines:icl */
1122                 wa_write_or(wal,
1123                             GEN8_L3SQCREG4,
1124                             GEN8_LQSC_FLUSH_COHERENT_LINES);
1125
1126                 /*
1127                  * Wa_1405543622:icl
1128                  * Formerly known as WaGAPZPriorityScheme
1129                  */
1130                 wa_write_or(wal,
1131                             GEN8_GARBCNTL,
1132                             GEN11_ARBITRATION_PRIO_ORDER_MASK);
1133
1134                 /*
1135                  * Wa_1604223664:icl
1136                  * Formerly known as WaL3BankAddressHashing
1137                  */
1138                 wa_write_masked_or(wal,
1139                                    GEN8_GARBCNTL,
1140                                    GEN11_HASH_CTRL_EXCL_MASK,
1141                                    GEN11_HASH_CTRL_EXCL_BIT0);
1142                 wa_write_masked_or(wal,
1143                                    GEN11_GLBLINVL,
1144                                    GEN11_BANK_HASH_ADDR_EXCL_MASK,
1145                                    GEN11_BANK_HASH_ADDR_EXCL_BIT0);
1146
1147                 /*
1148                  * Wa_1405733216:icl
1149                  * Formerly known as WaDisableCleanEvicts
1150                  */
1151                 wa_write_or(wal,
1152                             GEN8_L3SQCREG4,
1153                             GEN11_LQSC_CLEAN_EVICT_DISABLE);
1154
1155                 /* WaForwardProgressSoftReset:icl */
1156                 wa_write_or(wal,
1157                             GEN10_SCRATCH_LNCF2,
1158                             PMFLUSHDONE_LNICRSDROP |
1159                             PMFLUSH_GAPL3UNBLOCK |
1160                             PMFLUSHDONE_LNEBLK);
1161
1162                 /* Wa_1406609255:icl (pre-prod) */
1163                 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
1164                         wa_write_or(wal,
1165                                     GEN7_SARCHKMD,
1166                                     GEN7_DISABLE_DEMAND_PREFETCH |
1167                                     GEN7_DISABLE_SAMPLER_PREFETCH);
1168         }
1169
1170         if (IS_GEN_RANGE(i915, 9, 11)) {
1171                 /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl */
1172                 wa_masked_en(wal,
1173                              GEN7_FF_SLICE_CS_CHICKEN1,
1174                              GEN9_FFSC_PERCTX_PREEMPT_CTRL);
1175         }
1176
1177         if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) {
1178                 /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
1179                 wa_write_or(wal,
1180                             GEN8_GARBCNTL,
1181                             GEN9_GAPS_TSV_CREDIT_DISABLE);
1182         }
1183
1184         if (IS_BROXTON(i915)) {
1185                 /* WaDisablePooledEuLoadBalancingFix:bxt */
1186                 wa_masked_en(wal,
1187                              FF_SLICE_CS_CHICKEN2,
1188                              GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
1189         }
1190
1191         if (IS_GEN(i915, 9)) {
1192                 /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
1193                 wa_masked_en(wal,
1194                              GEN9_CSFE_CHICKEN1_RCS,
1195                              GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
1196
1197                 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
1198                 wa_write_or(wal,
1199                             BDW_SCRATCH1,
1200                             GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
1201
1202                 /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
1203                 if (IS_GEN9_LP(i915))
1204                         wa_write_masked_or(wal,
1205                                            GEN8_L3SQCREG1,
1206                                            L3_PRIO_CREDITS_MASK,
1207                                            L3_GENERAL_PRIO_CREDITS(62) |
1208                                            L3_HIGH_PRIO_CREDITS(2));
1209
1210                 /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
1211                 wa_write_or(wal,
1212                             GEN8_L3SQCREG4,
1213                             GEN8_LQSC_FLUSH_COHERENT_LINES);
1214         }
1215 }
1216
1217 static void
1218 xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1219 {
1220         struct drm_i915_private *i915 = engine->i915;
1221
1222         /* WaKBLVECSSemaphoreWaitPoll:kbl */
1223         if (IS_KBL_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) {
1224                 wa_write(wal,
1225                          RING_SEMA_WAIT_POLL(engine->mmio_base),
1226                          1);
1227         }
1228 }
1229
1230 static void
1231 engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1232 {
1233         if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 8))
1234                 return;
1235
1236         if (engine->id == RCS0)
1237                 rcs_engine_wa_init(engine, wal);
1238         else
1239                 xcs_engine_wa_init(engine, wal);
1240 }
1241
1242 void intel_engine_init_workarounds(struct intel_engine_cs *engine)
1243 {
1244         struct i915_wa_list *wal = &engine->wa_list;
1245
1246         if (GEM_WARN_ON(INTEL_GEN(engine->i915) < 8))
1247                 return;
1248
1249         wa_init_start(wal, engine->name);
1250         engine_init_workarounds(engine, wal);
1251         wa_init_finish(wal);
1252 }
1253
1254 void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
1255 {
1256         wa_list_apply(engine->i915, &engine->wa_list);
1257 }
1258
1259 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1260 #include "selftests/intel_workarounds.c"
1261 #endif