Merge tag 'exynos-drm-fixes-for-v5.3-rc3' of git://git.kernel.org/pub/scm/linux/kerne...
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / gt / intel_workarounds.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2014-2018 Intel Corporation
5  */
6
7 #include "i915_drv.h"
8 #include "intel_context.h"
9 #include "intel_workarounds.h"
10
11 /**
12  * DOC: Hardware workarounds
13  *
14  * This file is intended as a central place to implement most [1]_ of the
15  * required workarounds for hardware to work as originally intended. They fall
16  * in five basic categories depending on how/when they are applied:
17  *
18  * - Workarounds that touch registers that are saved/restored to/from the HW
19  *   context image. The list is emitted (via Load Register Immediate commands)
20  *   everytime a new context is created.
21  * - GT workarounds. The list of these WAs is applied whenever these registers
22  *   revert to default values (on GPU reset, suspend/resume [2]_, etc..).
23  * - Display workarounds. The list is applied during display clock-gating
24  *   initialization.
25  * - Workarounds that whitelist a privileged register, so that UMDs can manage
26  *   them directly. This is just a special case of a MMMIO workaround (as we
27  *   write the list of these to/be-whitelisted registers to some special HW
28  *   registers).
29  * - Workaround batchbuffers, that get executed automatically by the hardware
30  *   on every HW context restore.
31  *
32  * .. [1] Please notice that there are other WAs that, due to their nature,
33  *    cannot be applied from a central place. Those are peppered around the rest
34  *    of the code, as needed.
35  *
36  * .. [2] Technically, some registers are powercontext saved & restored, so they
37  *    survive a suspend/resume. In practice, writing them again is not too
38  *    costly and simplifies things. We can revisit this in the future.
39  *
40  * Layout
41  * ~~~~~~
42  *
43  * Keep things in this file ordered by WA type, as per the above (context, GT,
44  * display, register whitelist, batchbuffer). Then, inside each type, keep the
45  * following order:
46  *
47  * - Infrastructure functions and macros
48  * - WAs per platform in standard gen/chrono order
49  * - Public functions to init or apply the given workaround type.
50  */
51
52 static void wa_init_start(struct i915_wa_list *wal, const char *name)
53 {
54         wal->name = name;
55 }
56
57 #define WA_LIST_CHUNK (1 << 4)
58
59 static void wa_init_finish(struct i915_wa_list *wal)
60 {
61         /* Trim unused entries. */
62         if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
63                 struct i915_wa *list = kmemdup(wal->list,
64                                                wal->count * sizeof(*list),
65                                                GFP_KERNEL);
66
67                 if (list) {
68                         kfree(wal->list);
69                         wal->list = list;
70                 }
71         }
72
73         if (!wal->count)
74                 return;
75
76         DRM_DEBUG_DRIVER("Initialized %u %s workarounds\n",
77                          wal->wa_count, wal->name);
78 }
79
80 static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
81 {
82         unsigned int addr = i915_mmio_reg_offset(wa->reg);
83         unsigned int start = 0, end = wal->count;
84         const unsigned int grow = WA_LIST_CHUNK;
85         struct i915_wa *wa_;
86
87         GEM_BUG_ON(!is_power_of_2(grow));
88
89         if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
90                 struct i915_wa *list;
91
92                 list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
93                                      GFP_KERNEL);
94                 if (!list) {
95                         DRM_ERROR("No space for workaround init!\n");
96                         return;
97                 }
98
99                 if (wal->list)
100                         memcpy(list, wal->list, sizeof(*wa) * wal->count);
101
102                 wal->list = list;
103         }
104
105         while (start < end) {
106                 unsigned int mid = start + (end - start) / 2;
107
108                 if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
109                         start = mid + 1;
110                 } else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
111                         end = mid;
112                 } else {
113                         wa_ = &wal->list[mid];
114
115                         if ((wa->mask & ~wa_->mask) == 0) {
116                                 DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n",
117                                           i915_mmio_reg_offset(wa_->reg),
118                                           wa_->mask, wa_->val);
119
120                                 wa_->val &= ~wa->mask;
121                         }
122
123                         wal->wa_count++;
124                         wa_->val |= wa->val;
125                         wa_->mask |= wa->mask;
126                         wa_->read |= wa->read;
127                         return;
128                 }
129         }
130
131         wal->wa_count++;
132         wa_ = &wal->list[wal->count++];
133         *wa_ = *wa;
134
135         while (wa_-- > wal->list) {
136                 GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) ==
137                            i915_mmio_reg_offset(wa_[1].reg));
138                 if (i915_mmio_reg_offset(wa_[1].reg) >
139                     i915_mmio_reg_offset(wa_[0].reg))
140                         break;
141
142                 swap(wa_[1], wa_[0]);
143         }
144 }
145
146 static void
147 wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
148                    u32 val)
149 {
150         struct i915_wa wa = {
151                 .reg  = reg,
152                 .mask = mask,
153                 .val  = val,
154                 .read = mask,
155         };
156
157         _wa_add(wal, &wa);
158 }
159
160 static void
161 wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
162 {
163         wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val));
164 }
165
166 static void
167 wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
168 {
169         wa_write_masked_or(wal, reg, ~0, val);
170 }
171
172 static void
173 wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
174 {
175         wa_write_masked_or(wal, reg, val, val);
176 }
177
178 static void
179 ignore_wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val)
180 {
181         struct i915_wa wa = {
182                 .reg  = reg,
183                 .mask = mask,
184                 .val  = val,
185                 /* Bonkers HW, skip verifying */
186         };
187
188         _wa_add(wal, &wa);
189 }
190
191 #define WA_SET_BIT_MASKED(addr, mask) \
192         wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask))
193
194 #define WA_CLR_BIT_MASKED(addr, mask) \
195         wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask))
196
197 #define WA_SET_FIELD_MASKED(addr, mask, value) \
198         wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value)))
199
200 static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
201                                       struct i915_wa_list *wal)
202 {
203         WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
204
205         /* WaDisableAsyncFlipPerfMode:bdw,chv */
206         WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
207
208         /* WaDisablePartialInstShootdown:bdw,chv */
209         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
210                           PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
211
212         /* Use Force Non-Coherent whenever executing a 3D context. This is a
213          * workaround for for a possible hang in the unlikely event a TLB
214          * invalidation occurs during a PSD flush.
215          */
216         /* WaForceEnableNonCoherent:bdw,chv */
217         /* WaHdcDisableFetchWhenMasked:bdw,chv */
218         WA_SET_BIT_MASKED(HDC_CHICKEN0,
219                           HDC_DONOT_FETCH_MEM_WHEN_MASKED |
220                           HDC_FORCE_NON_COHERENT);
221
222         /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
223          * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
224          *  polygons in the same 8x4 pixel/sample area to be processed without
225          *  stalling waiting for the earlier ones to write to Hierarchical Z
226          *  buffer."
227          *
228          * This optimization is off by default for BDW and CHV; turn it on.
229          */
230         WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
231
232         /* Wa4x4STCOptimizationDisable:bdw,chv */
233         WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
234
235         /*
236          * BSpec recommends 8x4 when MSAA is used,
237          * however in practice 16x4 seems fastest.
238          *
239          * Note that PS/WM thread counts depend on the WIZ hashing
240          * disable bit, which we don't touch here, but it's good
241          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
242          */
243         WA_SET_FIELD_MASKED(GEN7_GT_MODE,
244                             GEN6_WIZ_HASHING_MASK,
245                             GEN6_WIZ_HASHING_16x4);
246 }
247
248 static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
249                                      struct i915_wa_list *wal)
250 {
251         struct drm_i915_private *i915 = engine->i915;
252
253         gen8_ctx_workarounds_init(engine, wal);
254
255         /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
256         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
257
258         /* WaDisableDopClockGating:bdw
259          *
260          * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
261          * to disable EUTC clock gating.
262          */
263         WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
264                           DOP_CLOCK_GATING_DISABLE);
265
266         WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
267                           GEN8_SAMPLER_POWER_BYPASS_DIS);
268
269         WA_SET_BIT_MASKED(HDC_CHICKEN0,
270                           /* WaForceContextSaveRestoreNonCoherent:bdw */
271                           HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
272                           /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
273                           (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
274 }
275
276 static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
277                                      struct i915_wa_list *wal)
278 {
279         gen8_ctx_workarounds_init(engine, wal);
280
281         /* WaDisableThreadStallDopClockGating:chv */
282         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
283
284         /* Improve HiZ throughput on CHV. */
285         WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
286 }
287
288 static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
289                                       struct i915_wa_list *wal)
290 {
291         struct drm_i915_private *i915 = engine->i915;
292
293         if (HAS_LLC(i915)) {
294                 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
295                  *
296                  * Must match Display Engine. See
297                  * WaCompressedResourceDisplayNewHashMode.
298                  */
299                 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
300                                   GEN9_PBE_COMPRESSED_HASH_SELECTION);
301                 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
302                                   GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
303         }
304
305         /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
306         /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
307         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
308                           FLOW_CONTROL_ENABLE |
309                           PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
310
311         /* Syncing dependencies between camera and graphics:skl,bxt,kbl */
312         if (!IS_COFFEELAKE(i915))
313                 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
314                                   GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
315
316         /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
317         /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
318         WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
319                           GEN9_ENABLE_YV12_BUGFIX |
320                           GEN9_ENABLE_GPGPU_PREEMPTION);
321
322         /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
323         /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
324         WA_SET_BIT_MASKED(CACHE_MODE_1,
325                           GEN8_4x4_STC_OPTIMIZATION_DISABLE |
326                           GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
327
328         /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
329         WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
330                           GEN9_CCS_TLB_PREFETCH_ENABLE);
331
332         /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
333         WA_SET_BIT_MASKED(HDC_CHICKEN0,
334                           HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
335                           HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
336
337         /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
338          * both tied to WaForceContextSaveRestoreNonCoherent
339          * in some hsds for skl. We keep the tie for all gen9. The
340          * documentation is a bit hazy and so we want to get common behaviour,
341          * even though there is no clear evidence we would need both on kbl/bxt.
342          * This area has been source of system hangs so we play it safe
343          * and mimic the skl regardless of what bspec says.
344          *
345          * Use Force Non-Coherent whenever executing a 3D context. This
346          * is a workaround for a possible hang in the unlikely event
347          * a TLB invalidation occurs during a PSD flush.
348          */
349
350         /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
351         WA_SET_BIT_MASKED(HDC_CHICKEN0,
352                           HDC_FORCE_NON_COHERENT);
353
354         /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
355         if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915))
356                 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
357                                   GEN8_SAMPLER_POWER_BYPASS_DIS);
358
359         /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
360         WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
361
362         /*
363          * Supporting preemption with fine-granularity requires changes in the
364          * batch buffer programming. Since we can't break old userspace, we
365          * need to set our default preemption level to safe value. Userspace is
366          * still able to use more fine-grained preemption levels, since in
367          * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
368          * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
369          * not real HW workarounds, but merely a way to start using preemption
370          * while maintaining old contract with userspace.
371          */
372
373         /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
374         WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
375
376         /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
377         WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
378                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
379                             GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
380
381         /* WaClearHIZ_WM_CHICKEN3:bxt,glk */
382         if (IS_GEN9_LP(i915))
383                 WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
384 }
385
386 static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
387                                 struct i915_wa_list *wal)
388 {
389         struct drm_i915_private *i915 = engine->i915;
390         u8 vals[3] = { 0, 0, 0 };
391         unsigned int i;
392
393         for (i = 0; i < 3; i++) {
394                 u8 ss;
395
396                 /*
397                  * Only consider slices where one, and only one, subslice has 7
398                  * EUs
399                  */
400                 if (!is_power_of_2(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]))
401                         continue;
402
403                 /*
404                  * subslice_7eu[i] != 0 (because of the check above) and
405                  * ss_max == 4 (maximum number of subslices possible per slice)
406                  *
407                  * ->    0 <= ss <= 3;
408                  */
409                 ss = ffs(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]) - 1;
410                 vals[i] = 3 - ss;
411         }
412
413         if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
414                 return;
415
416         /* Tune IZ hashing. See intel_device_info_runtime_init() */
417         WA_SET_FIELD_MASKED(GEN7_GT_MODE,
418                             GEN9_IZ_HASHING_MASK(2) |
419                             GEN9_IZ_HASHING_MASK(1) |
420                             GEN9_IZ_HASHING_MASK(0),
421                             GEN9_IZ_HASHING(2, vals[2]) |
422                             GEN9_IZ_HASHING(1, vals[1]) |
423                             GEN9_IZ_HASHING(0, vals[0]));
424 }
425
426 static void skl_ctx_workarounds_init(struct intel_engine_cs *engine,
427                                      struct i915_wa_list *wal)
428 {
429         gen9_ctx_workarounds_init(engine, wal);
430         skl_tune_iz_hashing(engine, wal);
431 }
432
433 static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
434                                      struct i915_wa_list *wal)
435 {
436         gen9_ctx_workarounds_init(engine, wal);
437
438         /* WaDisableThreadStallDopClockGating:bxt */
439         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
440                           STALL_DOP_GATING_DISABLE);
441
442         /* WaToEnableHwFixForPushConstHWBug:bxt */
443         WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
444                           GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
445 }
446
447 static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
448                                      struct i915_wa_list *wal)
449 {
450         struct drm_i915_private *i915 = engine->i915;
451
452         gen9_ctx_workarounds_init(engine, wal);
453
454         /* WaToEnableHwFixForPushConstHWBug:kbl */
455         if (IS_KBL_REVID(i915, KBL_REVID_C0, REVID_FOREVER))
456                 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
457                                   GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
458
459         /* WaDisableSbeCacheDispatchPortSharing:kbl */
460         WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
461                           GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
462 }
463
464 static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
465                                      struct i915_wa_list *wal)
466 {
467         gen9_ctx_workarounds_init(engine, wal);
468
469         /* WaToEnableHwFixForPushConstHWBug:glk */
470         WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
471                           GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
472 }
473
474 static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
475                                      struct i915_wa_list *wal)
476 {
477         gen9_ctx_workarounds_init(engine, wal);
478
479         /* WaToEnableHwFixForPushConstHWBug:cfl */
480         WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
481                           GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
482
483         /* WaDisableSbeCacheDispatchPortSharing:cfl */
484         WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
485                           GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
486 }
487
488 static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine,
489                                      struct i915_wa_list *wal)
490 {
491         struct drm_i915_private *i915 = engine->i915;
492
493         /* WaForceContextSaveRestoreNonCoherent:cnl */
494         WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
495                           HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
496
497         /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
498         if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
499                 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);
500
501         /* WaDisableReplayBufferBankArbitrationOptimization:cnl */
502         WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
503                           GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
504
505         /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
506         if (IS_CNL_REVID(i915, 0, CNL_REVID_B0))
507                 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
508                                   GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
509
510         /* WaPushConstantDereferenceHoldDisable:cnl */
511         WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
512
513         /* FtrEnableFastAnisoL1BankingFix:cnl */
514         WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
515
516         /* WaDisable3DMidCmdPreemption:cnl */
517         WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
518
519         /* WaDisableGPGPUMidCmdPreemption:cnl */
520         WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
521                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
522                             GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
523
524         /* WaDisableEarlyEOT:cnl */
525         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
526 }
527
528 static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
529                                      struct i915_wa_list *wal)
530 {
531         struct drm_i915_private *i915 = engine->i915;
532
533         /* WaDisableBankHangMode:icl */
534         wa_write(wal,
535                  GEN8_L3CNTLREG,
536                  intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
537                  GEN8_ERRDETBCTRL);
538
539         /* WaDisableBankHangMode:icl */
540         wa_write(wal,
541                  GEN8_L3CNTLREG,
542                  intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
543                  GEN8_ERRDETBCTRL);
544
545         /* Wa_1604370585:icl (pre-prod)
546          * Formerly known as WaPushConstantDereferenceHoldDisable
547          */
548         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
549                 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
550                                   PUSH_CONSTANT_DEREF_DISABLE);
551
552         /* WaForceEnableNonCoherent:icl
553          * This is not the same workaround as in early Gen9 platforms, where
554          * lacking this could cause system hangs, but coherency performance
555          * overhead is high and only a few compute workloads really need it
556          * (the register is whitelisted in hardware now, so UMDs can opt in
557          * for coherency if they have a good reason).
558          */
559         WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
560
561         /* Wa_2006611047:icl (pre-prod)
562          * Formerly known as WaDisableImprovedTdlClkGating
563          */
564         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
565                 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
566                                   GEN11_TDL_CLOCK_GATING_FIX_DISABLE);
567
568         /* Wa_2006665173:icl (pre-prod) */
569         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
570                 WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
571                                   GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC);
572
573         /* WaEnableFloatBlendOptimization:icl */
574         wa_write_masked_or(wal,
575                            GEN10_CACHE_MODE_SS,
576                            0, /* write-only, so skip validation */
577                            _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE));
578
579         /* WaDisableGPGPUMidThreadPreemption:icl */
580         WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
581                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
582                             GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
583
584         /* allow headerless messages for preemptible GPGPU context */
585         WA_SET_BIT_MASKED(GEN10_SAMPLER_MODE,
586                           GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
587 }
588
589 static void
590 __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
591                            struct i915_wa_list *wal,
592                            const char *name)
593 {
594         struct drm_i915_private *i915 = engine->i915;
595
596         if (engine->class != RENDER_CLASS)
597                 return;
598
599         wa_init_start(wal, name);
600
601         if (IS_GEN(i915, 11))
602                 icl_ctx_workarounds_init(engine, wal);
603         else if (IS_CANNONLAKE(i915))
604                 cnl_ctx_workarounds_init(engine, wal);
605         else if (IS_COFFEELAKE(i915))
606                 cfl_ctx_workarounds_init(engine, wal);
607         else if (IS_GEMINILAKE(i915))
608                 glk_ctx_workarounds_init(engine, wal);
609         else if (IS_KABYLAKE(i915))
610                 kbl_ctx_workarounds_init(engine, wal);
611         else if (IS_BROXTON(i915))
612                 bxt_ctx_workarounds_init(engine, wal);
613         else if (IS_SKYLAKE(i915))
614                 skl_ctx_workarounds_init(engine, wal);
615         else if (IS_CHERRYVIEW(i915))
616                 chv_ctx_workarounds_init(engine, wal);
617         else if (IS_BROADWELL(i915))
618                 bdw_ctx_workarounds_init(engine, wal);
619         else if (INTEL_GEN(i915) < 8)
620                 return;
621         else
622                 MISSING_CASE(INTEL_GEN(i915));
623
624         wa_init_finish(wal);
625 }
626
627 void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
628 {
629         __intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context");
630 }
631
632 int intel_engine_emit_ctx_wa(struct i915_request *rq)
633 {
634         struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
635         struct i915_wa *wa;
636         unsigned int i;
637         u32 *cs;
638         int ret;
639
640         if (wal->count == 0)
641                 return 0;
642
643         ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
644         if (ret)
645                 return ret;
646
647         cs = intel_ring_begin(rq, (wal->count * 2 + 2));
648         if (IS_ERR(cs))
649                 return PTR_ERR(cs);
650
651         *cs++ = MI_LOAD_REGISTER_IMM(wal->count);
652         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
653                 *cs++ = i915_mmio_reg_offset(wa->reg);
654                 *cs++ = wa->val;
655         }
656         *cs++ = MI_NOOP;
657
658         intel_ring_advance(rq, cs);
659
660         ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
661         if (ret)
662                 return ret;
663
664         return 0;
665 }
666
667 static void
668 gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
669 {
670         /* WaDisableKillLogic:bxt,skl,kbl */
671         if (!IS_COFFEELAKE(i915))
672                 wa_write_or(wal,
673                             GAM_ECOCHK,
674                             ECOCHK_DIS_TLB);
675
676         if (HAS_LLC(i915)) {
677                 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
678                  *
679                  * Must match Display Engine. See
680                  * WaCompressedResourceDisplayNewHashMode.
681                  */
682                 wa_write_or(wal,
683                             MMCD_MISC_CTRL,
684                             MMCD_PCLA | MMCD_HOTSPOT_EN);
685         }
686
687         /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
688         wa_write_or(wal,
689                     GAM_ECOCHK,
690                     BDW_DISABLE_HDC_INVALIDATION);
691 }
692
693 static void
694 skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
695 {
696         gen9_gt_workarounds_init(i915, wal);
697
698         /* WaDisableGafsUnitClkGating:skl */
699         wa_write_or(wal,
700                     GEN7_UCGCTL4,
701                     GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
702
703         /* WaInPlaceDecompressionHang:skl */
704         if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER))
705                 wa_write_or(wal,
706                             GEN9_GAMT_ECO_REG_RW_IA,
707                             GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
708 }
709
710 static void
711 bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
712 {
713         gen9_gt_workarounds_init(i915, wal);
714
715         /* WaInPlaceDecompressionHang:bxt */
716         wa_write_or(wal,
717                     GEN9_GAMT_ECO_REG_RW_IA,
718                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
719 }
720
721 static void
722 kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
723 {
724         gen9_gt_workarounds_init(i915, wal);
725
726         /* WaDisableDynamicCreditSharing:kbl */
727         if (IS_KBL_REVID(i915, 0, KBL_REVID_B0))
728                 wa_write_or(wal,
729                             GAMT_CHKN_BIT_REG,
730                             GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
731
732         /* WaDisableGafsUnitClkGating:kbl */
733         wa_write_or(wal,
734                     GEN7_UCGCTL4,
735                     GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
736
737         /* WaInPlaceDecompressionHang:kbl */
738         wa_write_or(wal,
739                     GEN9_GAMT_ECO_REG_RW_IA,
740                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
741 }
742
743 static void
744 glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
745 {
746         gen9_gt_workarounds_init(i915, wal);
747 }
748
749 static void
750 cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
751 {
752         gen9_gt_workarounds_init(i915, wal);
753
754         /* WaDisableGafsUnitClkGating:cfl */
755         wa_write_or(wal,
756                     GEN7_UCGCTL4,
757                     GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
758
759         /* WaInPlaceDecompressionHang:cfl */
760         wa_write_or(wal,
761                     GEN9_GAMT_ECO_REG_RW_IA,
762                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
763 }
764
765 static void
766 wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
767 {
768         const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
769         u32 mcr_slice_subslice_mask;
770
771         /*
772          * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl
773          * L3Banks could be fused off in single slice scenario. If that is
774          * the case, we might need to program MCR select to a valid L3Bank
775          * by default, to make sure we correctly read certain registers
776          * later on (in the range 0xB100 - 0xB3FF).
777          * This might be incompatible with
778          * WaProgramMgsrForCorrectSliceSpecificMmioReads.
779          * Fortunately, this should not happen in production hardware, so
780          * we only assert that this is the case (instead of implementing
781          * something more complex that requires checking the range of every
782          * MMIO read).
783          */
784         if (INTEL_GEN(i915) >= 10 &&
785             is_power_of_2(sseu->slice_mask)) {
786                 /*
787                  * read FUSE3 for enabled L3 Bank IDs, if L3 Bank matches
788                  * enabled subslice, no need to redirect MCR packet
789                  */
790                 u32 slice = fls(sseu->slice_mask);
791                 u32 fuse3 =
792                         intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3);
793                 u8 ss_mask = sseu->subslice_mask[slice];
794
795                 u8 enabled_mask = (ss_mask | ss_mask >>
796                                    GEN10_L3BANK_PAIR_COUNT) & GEN10_L3BANK_MASK;
797                 u8 disabled_mask = fuse3 & GEN10_L3BANK_MASK;
798
799                 /*
800                  * Production silicon should have matched L3Bank and
801                  * subslice enabled
802                  */
803                 WARN_ON((enabled_mask & disabled_mask) != enabled_mask);
804         }
805
806         if (INTEL_GEN(i915) >= 11)
807                 mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK |
808                                           GEN11_MCR_SUBSLICE_MASK;
809         else
810                 mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK |
811                                           GEN8_MCR_SUBSLICE_MASK;
812         /*
813          * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl
814          * Before any MMIO read into slice/subslice specific registers, MCR
815          * packet control register needs to be programmed to point to any
816          * enabled s/ss pair. Otherwise, incorrect values will be returned.
817          * This means each subsequent MMIO read will be forwarded to an
818          * specific s/ss combination, but this is OK since these registers
819          * are consistent across s/ss in almost all cases. In the rare
820          * occasions, such as INSTDONE, where this value is dependent
821          * on s/ss combo, the read should be done with read_subslice_reg.
822          */
823         wa_write_masked_or(wal,
824                            GEN8_MCR_SELECTOR,
825                            mcr_slice_subslice_mask,
826                            intel_calculate_mcr_s_ss_select(i915));
827 }
828
829 static void
830 cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
831 {
832         wa_init_mcr(i915, wal);
833
834         /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
835         if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
836                 wa_write_or(wal,
837                             GAMT_CHKN_BIT_REG,
838                             GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT);
839
840         /* WaInPlaceDecompressionHang:cnl */
841         wa_write_or(wal,
842                     GEN9_GAMT_ECO_REG_RW_IA,
843                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
844 }
845
846 static void
847 icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
848 {
849         wa_init_mcr(i915, wal);
850
851         /* WaInPlaceDecompressionHang:icl */
852         wa_write_or(wal,
853                     GEN9_GAMT_ECO_REG_RW_IA,
854                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
855
856         /* WaModifyGamTlbPartitioning:icl */
857         wa_write_masked_or(wal,
858                            GEN11_GACB_PERF_CTRL,
859                            GEN11_HASH_CTRL_MASK,
860                            GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
861
862         /* Wa_1405766107:icl
863          * Formerly known as WaCL2SFHalfMaxAlloc
864          */
865         wa_write_or(wal,
866                     GEN11_LSN_UNSLCVC,
867                     GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
868                     GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
869
870         /* Wa_220166154:icl
871          * Formerly known as WaDisCtxReload
872          */
873         wa_write_or(wal,
874                     GEN8_GAMW_ECO_DEV_RW_IA,
875                     GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
876
877         /* Wa_1405779004:icl (pre-prod) */
878         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
879                 wa_write_or(wal,
880                             SLICE_UNIT_LEVEL_CLKGATE,
881                             MSCUNIT_CLKGATE_DIS);
882
883         /* Wa_1406680159:icl */
884         wa_write_or(wal,
885                     SUBSLICE_UNIT_LEVEL_CLKGATE,
886                     GWUNIT_CLKGATE_DIS);
887
888         /* Wa_1406838659:icl (pre-prod) */
889         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
890                 wa_write_or(wal,
891                             INF_UNIT_LEVEL_CLKGATE,
892                             CGPSF_CLKGATE_DIS);
893
894         /* Wa_1406463099:icl
895          * Formerly known as WaGamTlbPendError
896          */
897         wa_write_or(wal,
898                     GAMT_CHKN_BIT_REG,
899                     GAMT_CHKN_DISABLE_L3_COH_PIPE);
900 }
901
902 static void
903 gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
904 {
905         if (IS_GEN(i915, 11))
906                 icl_gt_workarounds_init(i915, wal);
907         else if (IS_CANNONLAKE(i915))
908                 cnl_gt_workarounds_init(i915, wal);
909         else if (IS_COFFEELAKE(i915))
910                 cfl_gt_workarounds_init(i915, wal);
911         else if (IS_GEMINILAKE(i915))
912                 glk_gt_workarounds_init(i915, wal);
913         else if (IS_KABYLAKE(i915))
914                 kbl_gt_workarounds_init(i915, wal);
915         else if (IS_BROXTON(i915))
916                 bxt_gt_workarounds_init(i915, wal);
917         else if (IS_SKYLAKE(i915))
918                 skl_gt_workarounds_init(i915, wal);
919         else if (INTEL_GEN(i915) <= 8)
920                 return;
921         else
922                 MISSING_CASE(INTEL_GEN(i915));
923 }
924
925 void intel_gt_init_workarounds(struct drm_i915_private *i915)
926 {
927         struct i915_wa_list *wal = &i915->gt_wa_list;
928
929         wa_init_start(wal, "GT");
930         gt_init_workarounds(i915, wal);
931         wa_init_finish(wal);
932 }
933
934 static enum forcewake_domains
935 wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
936 {
937         enum forcewake_domains fw = 0;
938         struct i915_wa *wa;
939         unsigned int i;
940
941         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
942                 fw |= intel_uncore_forcewake_for_reg(uncore,
943                                                      wa->reg,
944                                                      FW_REG_READ |
945                                                      FW_REG_WRITE);
946
947         return fw;
948 }
949
950 static bool
951 wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
952 {
953         if ((cur ^ wa->val) & wa->read) {
954                 DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n",
955                           name, from, i915_mmio_reg_offset(wa->reg),
956                           cur, cur & wa->read,
957                           wa->val, wa->mask);
958
959                 return false;
960         }
961
962         return true;
963 }
964
965 static void
966 wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal)
967 {
968         enum forcewake_domains fw;
969         unsigned long flags;
970         struct i915_wa *wa;
971         unsigned int i;
972
973         if (!wal->count)
974                 return;
975
976         fw = wal_get_fw_for_rmw(uncore, wal);
977
978         spin_lock_irqsave(&uncore->lock, flags);
979         intel_uncore_forcewake_get__locked(uncore, fw);
980
981         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
982                 intel_uncore_rmw_fw(uncore, wa->reg, wa->mask, wa->val);
983                 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
984                         wa_verify(wa,
985                                   intel_uncore_read_fw(uncore, wa->reg),
986                                   wal->name, "application");
987         }
988
989         intel_uncore_forcewake_put__locked(uncore, fw);
990         spin_unlock_irqrestore(&uncore->lock, flags);
991 }
992
993 void intel_gt_apply_workarounds(struct drm_i915_private *i915)
994 {
995         wa_list_apply(&i915->uncore, &i915->gt_wa_list);
996 }
997
998 static bool wa_list_verify(struct intel_uncore *uncore,
999                            const struct i915_wa_list *wal,
1000                            const char *from)
1001 {
1002         struct i915_wa *wa;
1003         unsigned int i;
1004         bool ok = true;
1005
1006         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1007                 ok &= wa_verify(wa,
1008                                 intel_uncore_read(uncore, wa->reg),
1009                                 wal->name, from);
1010
1011         return ok;
1012 }
1013
1014 bool intel_gt_verify_workarounds(struct drm_i915_private *i915,
1015                                  const char *from)
1016 {
1017         return wa_list_verify(&i915->uncore, &i915->gt_wa_list, from);
1018 }
1019
1020 static void
1021 whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
1022 {
1023         struct i915_wa wa = {
1024                 .reg = reg
1025         };
1026
1027         if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
1028                 return;
1029
1030         wa.reg.reg |= flags;
1031         _wa_add(wal, &wa);
1032 }
1033
1034 static void
1035 whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
1036 {
1037         whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_RW);
1038 }
1039
1040 static void gen9_whitelist_build(struct i915_wa_list *w)
1041 {
1042         /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
1043         whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
1044
1045         /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
1046         whitelist_reg(w, GEN8_CS_CHICKEN1);
1047
1048         /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
1049         whitelist_reg(w, GEN8_HDC_CHICKEN1);
1050 }
1051
1052 static void skl_whitelist_build(struct intel_engine_cs *engine)
1053 {
1054         struct i915_wa_list *w = &engine->whitelist;
1055
1056         if (engine->class != RENDER_CLASS)
1057                 return;
1058
1059         gen9_whitelist_build(w);
1060
1061         /* WaDisableLSQCROPERFforOCL:skl */
1062         whitelist_reg(w, GEN8_L3SQCREG4);
1063 }
1064
1065 static void bxt_whitelist_build(struct intel_engine_cs *engine)
1066 {
1067         if (engine->class != RENDER_CLASS)
1068                 return;
1069
1070         gen9_whitelist_build(&engine->whitelist);
1071 }
1072
1073 static void kbl_whitelist_build(struct intel_engine_cs *engine)
1074 {
1075         struct i915_wa_list *w = &engine->whitelist;
1076
1077         if (engine->class != RENDER_CLASS)
1078                 return;
1079
1080         gen9_whitelist_build(w);
1081
1082         /* WaDisableLSQCROPERFforOCL:kbl */
1083         whitelist_reg(w, GEN8_L3SQCREG4);
1084 }
1085
1086 static void glk_whitelist_build(struct intel_engine_cs *engine)
1087 {
1088         struct i915_wa_list *w = &engine->whitelist;
1089
1090         if (engine->class != RENDER_CLASS)
1091                 return;
1092
1093         gen9_whitelist_build(w);
1094
1095         /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
1096         whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1097 }
1098
1099 static void cfl_whitelist_build(struct intel_engine_cs *engine)
1100 {
1101         struct i915_wa_list *w = &engine->whitelist;
1102
1103         if (engine->class != RENDER_CLASS)
1104                 return;
1105
1106         gen9_whitelist_build(w);
1107
1108         /*
1109          * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml
1110          *
1111          * This covers 4 register which are next to one another :
1112          *   - PS_INVOCATION_COUNT
1113          *   - PS_INVOCATION_COUNT_UDW
1114          *   - PS_DEPTH_COUNT
1115          *   - PS_DEPTH_COUNT_UDW
1116          */
1117         whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1118                           RING_FORCE_TO_NONPRIV_RD |
1119                           RING_FORCE_TO_NONPRIV_RANGE_4);
1120 }
1121
1122 static void cnl_whitelist_build(struct intel_engine_cs *engine)
1123 {
1124         struct i915_wa_list *w = &engine->whitelist;
1125
1126         if (engine->class != RENDER_CLASS)
1127                 return;
1128
1129         /* WaEnablePreemptionGranularityControlByUMD:cnl */
1130         whitelist_reg(w, GEN8_CS_CHICKEN1);
1131 }
1132
1133 static void icl_whitelist_build(struct intel_engine_cs *engine)
1134 {
1135         struct i915_wa_list *w = &engine->whitelist;
1136
1137         switch (engine->class) {
1138         case RENDER_CLASS:
1139                 /* WaAllowUMDToModifyHalfSliceChicken7:icl */
1140                 whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7);
1141
1142                 /* WaAllowUMDToModifySamplerMode:icl */
1143                 whitelist_reg(w, GEN10_SAMPLER_MODE);
1144
1145                 /* WaEnableStateCacheRedirectToCS:icl */
1146                 whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1147
1148                 /*
1149                  * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl
1150                  *
1151                  * This covers 4 register which are next to one another :
1152                  *   - PS_INVOCATION_COUNT
1153                  *   - PS_INVOCATION_COUNT_UDW
1154                  *   - PS_DEPTH_COUNT
1155                  *   - PS_DEPTH_COUNT_UDW
1156                  */
1157                 whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1158                                   RING_FORCE_TO_NONPRIV_RD |
1159                                   RING_FORCE_TO_NONPRIV_RANGE_4);
1160                 break;
1161
1162         case VIDEO_DECODE_CLASS:
1163                 /* hucStatusRegOffset */
1164                 whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base),
1165                                   RING_FORCE_TO_NONPRIV_RD);
1166                 /* hucUKernelHdrInfoRegOffset */
1167                 whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base),
1168                                   RING_FORCE_TO_NONPRIV_RD);
1169                 /* hucStatus2RegOffset */
1170                 whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base),
1171                                   RING_FORCE_TO_NONPRIV_RD);
1172                 break;
1173
1174         default:
1175                 break;
1176         }
1177 }
1178
1179 void intel_engine_init_whitelist(struct intel_engine_cs *engine)
1180 {
1181         struct drm_i915_private *i915 = engine->i915;
1182         struct i915_wa_list *w = &engine->whitelist;
1183
1184         wa_init_start(w, "whitelist");
1185
1186         if (IS_GEN(i915, 11))
1187                 icl_whitelist_build(engine);
1188         else if (IS_CANNONLAKE(i915))
1189                 cnl_whitelist_build(engine);
1190         else if (IS_COFFEELAKE(i915))
1191                 cfl_whitelist_build(engine);
1192         else if (IS_GEMINILAKE(i915))
1193                 glk_whitelist_build(engine);
1194         else if (IS_KABYLAKE(i915))
1195                 kbl_whitelist_build(engine);
1196         else if (IS_BROXTON(i915))
1197                 bxt_whitelist_build(engine);
1198         else if (IS_SKYLAKE(i915))
1199                 skl_whitelist_build(engine);
1200         else if (INTEL_GEN(i915) <= 8)
1201                 return;
1202         else
1203                 MISSING_CASE(INTEL_GEN(i915));
1204
1205         wa_init_finish(w);
1206 }
1207
1208 void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
1209 {
1210         const struct i915_wa_list *wal = &engine->whitelist;
1211         struct intel_uncore *uncore = engine->uncore;
1212         const u32 base = engine->mmio_base;
1213         struct i915_wa *wa;
1214         unsigned int i;
1215
1216         if (!wal->count)
1217                 return;
1218
1219         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1220                 intel_uncore_write(uncore,
1221                                    RING_FORCE_TO_NONPRIV(base, i),
1222                                    i915_mmio_reg_offset(wa->reg));
1223
1224         /* And clear the rest just in case of garbage */
1225         for (; i < RING_MAX_NONPRIV_SLOTS; i++)
1226                 intel_uncore_write(uncore,
1227                                    RING_FORCE_TO_NONPRIV(base, i),
1228                                    i915_mmio_reg_offset(RING_NOPID(base)));
1229 }
1230
1231 static void
1232 rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1233 {
1234         struct drm_i915_private *i915 = engine->i915;
1235
1236         if (IS_GEN(i915, 11)) {
1237                 /* This is not an Wa. Enable for better image quality */
1238                 wa_masked_en(wal,
1239                              _3D_CHICKEN3,
1240                              _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
1241
1242                 /* WaPipelineFlushCoherentLines:icl */
1243                 ignore_wa_write_or(wal,
1244                                    GEN8_L3SQCREG4,
1245                                    GEN8_LQSC_FLUSH_COHERENT_LINES,
1246                                    GEN8_LQSC_FLUSH_COHERENT_LINES);
1247
1248                 /*
1249                  * Wa_1405543622:icl
1250                  * Formerly known as WaGAPZPriorityScheme
1251                  */
1252                 wa_write_or(wal,
1253                             GEN8_GARBCNTL,
1254                             GEN11_ARBITRATION_PRIO_ORDER_MASK);
1255
1256                 /*
1257                  * Wa_1604223664:icl
1258                  * Formerly known as WaL3BankAddressHashing
1259                  */
1260                 wa_write_masked_or(wal,
1261                                    GEN8_GARBCNTL,
1262                                    GEN11_HASH_CTRL_EXCL_MASK,
1263                                    GEN11_HASH_CTRL_EXCL_BIT0);
1264                 wa_write_masked_or(wal,
1265                                    GEN11_GLBLINVL,
1266                                    GEN11_BANK_HASH_ADDR_EXCL_MASK,
1267                                    GEN11_BANK_HASH_ADDR_EXCL_BIT0);
1268
1269                 /*
1270                  * Wa_1405733216:icl
1271                  * Formerly known as WaDisableCleanEvicts
1272                  */
1273                 ignore_wa_write_or(wal,
1274                                    GEN8_L3SQCREG4,
1275                                    GEN11_LQSC_CLEAN_EVICT_DISABLE,
1276                                    GEN11_LQSC_CLEAN_EVICT_DISABLE);
1277
1278                 /* WaForwardProgressSoftReset:icl */
1279                 wa_write_or(wal,
1280                             GEN10_SCRATCH_LNCF2,
1281                             PMFLUSHDONE_LNICRSDROP |
1282                             PMFLUSH_GAPL3UNBLOCK |
1283                             PMFLUSHDONE_LNEBLK);
1284
1285                 /* Wa_1406609255:icl (pre-prod) */
1286                 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
1287                         wa_write_or(wal,
1288                                     GEN7_SARCHKMD,
1289                                     GEN7_DISABLE_DEMAND_PREFETCH);
1290
1291                 /* Wa_1606682166:icl */
1292                 wa_write_or(wal,
1293                             GEN7_SARCHKMD,
1294                             GEN7_DISABLE_SAMPLER_PREFETCH);
1295         }
1296
1297         if (IS_GEN_RANGE(i915, 9, 11)) {
1298                 /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl */
1299                 wa_masked_en(wal,
1300                              GEN7_FF_SLICE_CS_CHICKEN1,
1301                              GEN9_FFSC_PERCTX_PREEMPT_CTRL);
1302         }
1303
1304         if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) {
1305                 /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
1306                 wa_write_or(wal,
1307                             GEN8_GARBCNTL,
1308                             GEN9_GAPS_TSV_CREDIT_DISABLE);
1309         }
1310
1311         if (IS_BROXTON(i915)) {
1312                 /* WaDisablePooledEuLoadBalancingFix:bxt */
1313                 wa_masked_en(wal,
1314                              FF_SLICE_CS_CHICKEN2,
1315                              GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
1316         }
1317
1318         if (IS_GEN(i915, 9)) {
1319                 /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
1320                 wa_masked_en(wal,
1321                              GEN9_CSFE_CHICKEN1_RCS,
1322                              GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
1323
1324                 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
1325                 wa_write_or(wal,
1326                             BDW_SCRATCH1,
1327                             GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
1328
1329                 /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
1330                 if (IS_GEN9_LP(i915))
1331                         wa_write_masked_or(wal,
1332                                            GEN8_L3SQCREG1,
1333                                            L3_PRIO_CREDITS_MASK,
1334                                            L3_GENERAL_PRIO_CREDITS(62) |
1335                                            L3_HIGH_PRIO_CREDITS(2));
1336
1337                 /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
1338                 wa_write_or(wal,
1339                             GEN8_L3SQCREG4,
1340                             GEN8_LQSC_FLUSH_COHERENT_LINES);
1341         }
1342 }
1343
1344 static void
1345 xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1346 {
1347         struct drm_i915_private *i915 = engine->i915;
1348
1349         /* WaKBLVECSSemaphoreWaitPoll:kbl */
1350         if (IS_KBL_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) {
1351                 wa_write(wal,
1352                          RING_SEMA_WAIT_POLL(engine->mmio_base),
1353                          1);
1354         }
1355 }
1356
1357 static void
1358 engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1359 {
1360         if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 8))
1361                 return;
1362
1363         if (engine->id == RCS0)
1364                 rcs_engine_wa_init(engine, wal);
1365         else
1366                 xcs_engine_wa_init(engine, wal);
1367 }
1368
1369 void intel_engine_init_workarounds(struct intel_engine_cs *engine)
1370 {
1371         struct i915_wa_list *wal = &engine->wa_list;
1372
1373         if (GEM_WARN_ON(INTEL_GEN(engine->i915) < 8))
1374                 return;
1375
1376         wa_init_start(wal, engine->name);
1377         engine_init_workarounds(engine, wal);
1378         wa_init_finish(wal);
1379 }
1380
1381 void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
1382 {
1383         wa_list_apply(engine->uncore, &engine->wa_list);
1384 }
1385
1386 static struct i915_vma *
1387 create_scratch(struct i915_address_space *vm, int count)
1388 {
1389         struct drm_i915_gem_object *obj;
1390         struct i915_vma *vma;
1391         unsigned int size;
1392         int err;
1393
1394         size = round_up(count * sizeof(u32), PAGE_SIZE);
1395         obj = i915_gem_object_create_internal(vm->i915, size);
1396         if (IS_ERR(obj))
1397                 return ERR_CAST(obj);
1398
1399         i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
1400
1401         vma = i915_vma_instance(obj, vm, NULL);
1402         if (IS_ERR(vma)) {
1403                 err = PTR_ERR(vma);
1404                 goto err_obj;
1405         }
1406
1407         err = i915_vma_pin(vma, 0, 0,
1408                            i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
1409         if (err)
1410                 goto err_obj;
1411
1412         return vma;
1413
1414 err_obj:
1415         i915_gem_object_put(obj);
1416         return ERR_PTR(err);
1417 }
1418
1419 static int
1420 wa_list_srm(struct i915_request *rq,
1421             const struct i915_wa_list *wal,
1422             struct i915_vma *vma)
1423 {
1424         const struct i915_wa *wa;
1425         unsigned int i;
1426         u32 srm, *cs;
1427
1428         srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
1429         if (INTEL_GEN(rq->i915) >= 8)
1430                 srm++;
1431
1432         cs = intel_ring_begin(rq, 4 * wal->count);
1433         if (IS_ERR(cs))
1434                 return PTR_ERR(cs);
1435
1436         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
1437                 *cs++ = srm;
1438                 *cs++ = i915_mmio_reg_offset(wa->reg);
1439                 *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
1440                 *cs++ = 0;
1441         }
1442         intel_ring_advance(rq, cs);
1443
1444         return 0;
1445 }
1446
1447 static int engine_wa_list_verify(struct intel_context *ce,
1448                                  const struct i915_wa_list * const wal,
1449                                  const char *from)
1450 {
1451         const struct i915_wa *wa;
1452         struct i915_request *rq;
1453         struct i915_vma *vma;
1454         unsigned int i;
1455         u32 *results;
1456         int err;
1457
1458         if (!wal->count)
1459                 return 0;
1460
1461         vma = create_scratch(&ce->engine->i915->ggtt.vm, wal->count);
1462         if (IS_ERR(vma))
1463                 return PTR_ERR(vma);
1464
1465         rq = intel_context_create_request(ce);
1466         if (IS_ERR(rq)) {
1467                 err = PTR_ERR(rq);
1468                 goto err_vma;
1469         }
1470
1471         err = wa_list_srm(rq, wal, vma);
1472         if (err)
1473                 goto err_vma;
1474
1475         i915_request_add(rq);
1476         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1477                 err = -ETIME;
1478                 goto err_vma;
1479         }
1480
1481         results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
1482         if (IS_ERR(results)) {
1483                 err = PTR_ERR(results);
1484                 goto err_vma;
1485         }
1486
1487         err = 0;
1488         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1489                 if (!wa_verify(wa, results[i], wal->name, from))
1490                         err = -ENXIO;
1491
1492         i915_gem_object_unpin_map(vma->obj);
1493
1494 err_vma:
1495         i915_vma_unpin(vma);
1496         i915_vma_put(vma);
1497         return err;
1498 }
1499
1500 int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
1501                                     const char *from)
1502 {
1503         return engine_wa_list_verify(engine->kernel_context,
1504                                      &engine->wa_list,
1505                                      from);
1506 }
1507
1508 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1509 #include "selftest_workarounds.c"
1510 #endif