2 * SPDX-License-Identifier: MIT
4 * Copyright © 2014-2018 Intel Corporation
8 #include "intel_context.h"
9 #include "intel_workarounds.h"
12 * DOC: Hardware workarounds
14 * This file is intended as a central place to implement most [1]_ of the
15 * required workarounds for hardware to work as originally intended. They fall
16 * in five basic categories depending on how/when they are applied:
18 * - Workarounds that touch registers that are saved/restored to/from the HW
19 * context image. The list is emitted (via Load Register Immediate commands)
20 * everytime a new context is created.
21 * - GT workarounds. The list of these WAs is applied whenever these registers
22 * revert to default values (on GPU reset, suspend/resume [2]_, etc..).
23 * - Display workarounds. The list is applied during display clock-gating
25 * - Workarounds that whitelist a privileged register, so that UMDs can manage
26 * them directly. This is just a special case of a MMMIO workaround (as we
27 * write the list of these to/be-whitelisted registers to some special HW
29 * - Workaround batchbuffers, that get executed automatically by the hardware
30 * on every HW context restore.
32 * .. [1] Please notice that there are other WAs that, due to their nature,
33 * cannot be applied from a central place. Those are peppered around the rest
34 * of the code, as needed.
36 * .. [2] Technically, some registers are powercontext saved & restored, so they
37 * survive a suspend/resume. In practice, writing them again is not too
38 * costly and simplifies things. We can revisit this in the future.
43 * Keep things in this file ordered by WA type, as per the above (context, GT,
44 * display, register whitelist, batchbuffer). Then, inside each type, keep the
47 * - Infrastructure functions and macros
48 * - WAs per platform in standard gen/chrono order
49 * - Public functions to init or apply the given workaround type.
52 static void wa_init_start(struct i915_wa_list *wal, const char *name)
57 #define WA_LIST_CHUNK (1 << 4)
59 static void wa_init_finish(struct i915_wa_list *wal)
61 /* Trim unused entries. */
62 if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
63 struct i915_wa *list = kmemdup(wal->list,
64 wal->count * sizeof(*list),
76 DRM_DEBUG_DRIVER("Initialized %u %s workarounds\n",
77 wal->wa_count, wal->name);
80 static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
82 unsigned int addr = i915_mmio_reg_offset(wa->reg);
83 unsigned int start = 0, end = wal->count;
84 const unsigned int grow = WA_LIST_CHUNK;
87 GEM_BUG_ON(!is_power_of_2(grow));
89 if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
92 list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
95 DRM_ERROR("No space for workaround init!\n");
100 memcpy(list, wal->list, sizeof(*wa) * wal->count);
105 while (start < end) {
106 unsigned int mid = start + (end - start) / 2;
108 if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
110 } else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
113 wa_ = &wal->list[mid];
115 if ((wa->mask & ~wa_->mask) == 0) {
116 DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n",
117 i915_mmio_reg_offset(wa_->reg),
118 wa_->mask, wa_->val);
120 wa_->val &= ~wa->mask;
125 wa_->mask |= wa->mask;
126 wa_->read |= wa->read;
132 wa_ = &wal->list[wal->count++];
135 while (wa_-- > wal->list) {
136 GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) ==
137 i915_mmio_reg_offset(wa_[1].reg));
138 if (i915_mmio_reg_offset(wa_[1].reg) >
139 i915_mmio_reg_offset(wa_[0].reg))
142 swap(wa_[1], wa_[0]);
147 wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
150 struct i915_wa wa = {
161 wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
163 wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val));
167 wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
169 wa_write_masked_or(wal, reg, ~0, val);
173 wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
175 wa_write_masked_or(wal, reg, val, val);
179 ignore_wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val)
181 struct i915_wa wa = {
185 /* Bonkers HW, skip verifying */
191 #define WA_SET_BIT_MASKED(addr, mask) \
192 wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask))
194 #define WA_CLR_BIT_MASKED(addr, mask) \
195 wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask))
197 #define WA_SET_FIELD_MASKED(addr, mask, value) \
198 wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value)))
200 static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
201 struct i915_wa_list *wal)
203 WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
205 /* WaDisableAsyncFlipPerfMode:bdw,chv */
206 WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
208 /* WaDisablePartialInstShootdown:bdw,chv */
209 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
210 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
212 /* Use Force Non-Coherent whenever executing a 3D context. This is a
213 * workaround for for a possible hang in the unlikely event a TLB
214 * invalidation occurs during a PSD flush.
216 /* WaForceEnableNonCoherent:bdw,chv */
217 /* WaHdcDisableFetchWhenMasked:bdw,chv */
218 WA_SET_BIT_MASKED(HDC_CHICKEN0,
219 HDC_DONOT_FETCH_MEM_WHEN_MASKED |
220 HDC_FORCE_NON_COHERENT);
222 /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
223 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
224 * polygons in the same 8x4 pixel/sample area to be processed without
225 * stalling waiting for the earlier ones to write to Hierarchical Z
228 * This optimization is off by default for BDW and CHV; turn it on.
230 WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
232 /* Wa4x4STCOptimizationDisable:bdw,chv */
233 WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
236 * BSpec recommends 8x4 when MSAA is used,
237 * however in practice 16x4 seems fastest.
239 * Note that PS/WM thread counts depend on the WIZ hashing
240 * disable bit, which we don't touch here, but it's good
241 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
243 WA_SET_FIELD_MASKED(GEN7_GT_MODE,
244 GEN6_WIZ_HASHING_MASK,
245 GEN6_WIZ_HASHING_16x4);
248 static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
249 struct i915_wa_list *wal)
251 struct drm_i915_private *i915 = engine->i915;
253 gen8_ctx_workarounds_init(engine, wal);
255 /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
256 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
258 /* WaDisableDopClockGating:bdw
260 * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
261 * to disable EUTC clock gating.
263 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
264 DOP_CLOCK_GATING_DISABLE);
266 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
267 GEN8_SAMPLER_POWER_BYPASS_DIS);
269 WA_SET_BIT_MASKED(HDC_CHICKEN0,
270 /* WaForceContextSaveRestoreNonCoherent:bdw */
271 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
272 /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
273 (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
276 static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
277 struct i915_wa_list *wal)
279 gen8_ctx_workarounds_init(engine, wal);
281 /* WaDisableThreadStallDopClockGating:chv */
282 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
284 /* Improve HiZ throughput on CHV. */
285 WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
288 static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
289 struct i915_wa_list *wal)
291 struct drm_i915_private *i915 = engine->i915;
294 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
296 * Must match Display Engine. See
297 * WaCompressedResourceDisplayNewHashMode.
299 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
300 GEN9_PBE_COMPRESSED_HASH_SELECTION);
301 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
302 GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
305 /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
306 /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
307 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
308 FLOW_CONTROL_ENABLE |
309 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
311 /* Syncing dependencies between camera and graphics:skl,bxt,kbl */
312 if (!IS_COFFEELAKE(i915))
313 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
314 GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
316 /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
317 /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
318 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
319 GEN9_ENABLE_YV12_BUGFIX |
320 GEN9_ENABLE_GPGPU_PREEMPTION);
322 /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
323 /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
324 WA_SET_BIT_MASKED(CACHE_MODE_1,
325 GEN8_4x4_STC_OPTIMIZATION_DISABLE |
326 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
328 /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
329 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
330 GEN9_CCS_TLB_PREFETCH_ENABLE);
332 /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
333 WA_SET_BIT_MASKED(HDC_CHICKEN0,
334 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
335 HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
337 /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
338 * both tied to WaForceContextSaveRestoreNonCoherent
339 * in some hsds for skl. We keep the tie for all gen9. The
340 * documentation is a bit hazy and so we want to get common behaviour,
341 * even though there is no clear evidence we would need both on kbl/bxt.
342 * This area has been source of system hangs so we play it safe
343 * and mimic the skl regardless of what bspec says.
345 * Use Force Non-Coherent whenever executing a 3D context. This
346 * is a workaround for a possible hang in the unlikely event
347 * a TLB invalidation occurs during a PSD flush.
350 /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
351 WA_SET_BIT_MASKED(HDC_CHICKEN0,
352 HDC_FORCE_NON_COHERENT);
354 /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
355 if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915))
356 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
357 GEN8_SAMPLER_POWER_BYPASS_DIS);
359 /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
360 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
363 * Supporting preemption with fine-granularity requires changes in the
364 * batch buffer programming. Since we can't break old userspace, we
365 * need to set our default preemption level to safe value. Userspace is
366 * still able to use more fine-grained preemption levels, since in
367 * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
368 * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
369 * not real HW workarounds, but merely a way to start using preemption
370 * while maintaining old contract with userspace.
373 /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
374 WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
376 /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
377 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
378 GEN9_PREEMPT_GPGPU_LEVEL_MASK,
379 GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
381 /* WaClearHIZ_WM_CHICKEN3:bxt,glk */
382 if (IS_GEN9_LP(i915))
383 WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
386 static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
387 struct i915_wa_list *wal)
389 struct drm_i915_private *i915 = engine->i915;
390 u8 vals[3] = { 0, 0, 0 };
393 for (i = 0; i < 3; i++) {
397 * Only consider slices where one, and only one, subslice has 7
400 if (!is_power_of_2(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]))
404 * subslice_7eu[i] != 0 (because of the check above) and
405 * ss_max == 4 (maximum number of subslices possible per slice)
409 ss = ffs(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]) - 1;
413 if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
416 /* Tune IZ hashing. See intel_device_info_runtime_init() */
417 WA_SET_FIELD_MASKED(GEN7_GT_MODE,
418 GEN9_IZ_HASHING_MASK(2) |
419 GEN9_IZ_HASHING_MASK(1) |
420 GEN9_IZ_HASHING_MASK(0),
421 GEN9_IZ_HASHING(2, vals[2]) |
422 GEN9_IZ_HASHING(1, vals[1]) |
423 GEN9_IZ_HASHING(0, vals[0]));
426 static void skl_ctx_workarounds_init(struct intel_engine_cs *engine,
427 struct i915_wa_list *wal)
429 gen9_ctx_workarounds_init(engine, wal);
430 skl_tune_iz_hashing(engine, wal);
433 static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
434 struct i915_wa_list *wal)
436 gen9_ctx_workarounds_init(engine, wal);
438 /* WaDisableThreadStallDopClockGating:bxt */
439 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
440 STALL_DOP_GATING_DISABLE);
442 /* WaToEnableHwFixForPushConstHWBug:bxt */
443 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
444 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
447 static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
448 struct i915_wa_list *wal)
450 struct drm_i915_private *i915 = engine->i915;
452 gen9_ctx_workarounds_init(engine, wal);
454 /* WaToEnableHwFixForPushConstHWBug:kbl */
455 if (IS_KBL_REVID(i915, KBL_REVID_C0, REVID_FOREVER))
456 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
457 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
459 /* WaDisableSbeCacheDispatchPortSharing:kbl */
460 WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
461 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
464 static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
465 struct i915_wa_list *wal)
467 gen9_ctx_workarounds_init(engine, wal);
469 /* WaToEnableHwFixForPushConstHWBug:glk */
470 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
471 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
474 static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
475 struct i915_wa_list *wal)
477 gen9_ctx_workarounds_init(engine, wal);
479 /* WaToEnableHwFixForPushConstHWBug:cfl */
480 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
481 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
483 /* WaDisableSbeCacheDispatchPortSharing:cfl */
484 WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
485 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
488 static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine,
489 struct i915_wa_list *wal)
491 struct drm_i915_private *i915 = engine->i915;
493 /* WaForceContextSaveRestoreNonCoherent:cnl */
494 WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
495 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
497 /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
498 if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
499 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);
501 /* WaDisableReplayBufferBankArbitrationOptimization:cnl */
502 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
503 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
505 /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
506 if (IS_CNL_REVID(i915, 0, CNL_REVID_B0))
507 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
508 GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
510 /* WaPushConstantDereferenceHoldDisable:cnl */
511 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
513 /* FtrEnableFastAnisoL1BankingFix:cnl */
514 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
516 /* WaDisable3DMidCmdPreemption:cnl */
517 WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
519 /* WaDisableGPGPUMidCmdPreemption:cnl */
520 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
521 GEN9_PREEMPT_GPGPU_LEVEL_MASK,
522 GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
524 /* WaDisableEarlyEOT:cnl */
525 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
528 static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
529 struct i915_wa_list *wal)
531 struct drm_i915_private *i915 = engine->i915;
533 /* WaDisableBankHangMode:icl */
536 intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
539 /* WaDisableBankHangMode:icl */
542 intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
545 /* Wa_1604370585:icl (pre-prod)
546 * Formerly known as WaPushConstantDereferenceHoldDisable
548 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
549 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
550 PUSH_CONSTANT_DEREF_DISABLE);
552 /* WaForceEnableNonCoherent:icl
553 * This is not the same workaround as in early Gen9 platforms, where
554 * lacking this could cause system hangs, but coherency performance
555 * overhead is high and only a few compute workloads really need it
556 * (the register is whitelisted in hardware now, so UMDs can opt in
557 * for coherency if they have a good reason).
559 WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
561 /* Wa_2006611047:icl (pre-prod)
562 * Formerly known as WaDisableImprovedTdlClkGating
564 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
565 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
566 GEN11_TDL_CLOCK_GATING_FIX_DISABLE);
568 /* Wa_2006665173:icl (pre-prod) */
569 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
570 WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
571 GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC);
573 /* WaEnableFloatBlendOptimization:icl */
574 wa_write_masked_or(wal,
576 0, /* write-only, so skip validation */
577 _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE));
579 /* WaDisableGPGPUMidThreadPreemption:icl */
580 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
581 GEN9_PREEMPT_GPGPU_LEVEL_MASK,
582 GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
584 /* allow headerless messages for preemptible GPGPU context */
585 WA_SET_BIT_MASKED(GEN10_SAMPLER_MODE,
586 GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
590 __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
591 struct i915_wa_list *wal,
594 struct drm_i915_private *i915 = engine->i915;
596 if (engine->class != RENDER_CLASS)
599 wa_init_start(wal, name);
601 if (IS_GEN(i915, 11))
602 icl_ctx_workarounds_init(engine, wal);
603 else if (IS_CANNONLAKE(i915))
604 cnl_ctx_workarounds_init(engine, wal);
605 else if (IS_COFFEELAKE(i915))
606 cfl_ctx_workarounds_init(engine, wal);
607 else if (IS_GEMINILAKE(i915))
608 glk_ctx_workarounds_init(engine, wal);
609 else if (IS_KABYLAKE(i915))
610 kbl_ctx_workarounds_init(engine, wal);
611 else if (IS_BROXTON(i915))
612 bxt_ctx_workarounds_init(engine, wal);
613 else if (IS_SKYLAKE(i915))
614 skl_ctx_workarounds_init(engine, wal);
615 else if (IS_CHERRYVIEW(i915))
616 chv_ctx_workarounds_init(engine, wal);
617 else if (IS_BROADWELL(i915))
618 bdw_ctx_workarounds_init(engine, wal);
619 else if (INTEL_GEN(i915) < 8)
622 MISSING_CASE(INTEL_GEN(i915));
627 void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
629 __intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context");
632 int intel_engine_emit_ctx_wa(struct i915_request *rq)
634 struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
643 ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
647 cs = intel_ring_begin(rq, (wal->count * 2 + 2));
651 *cs++ = MI_LOAD_REGISTER_IMM(wal->count);
652 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
653 *cs++ = i915_mmio_reg_offset(wa->reg);
658 intel_ring_advance(rq, cs);
660 ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
668 gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
670 /* WaDisableKillLogic:bxt,skl,kbl */
671 if (!IS_COFFEELAKE(i915))
677 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
679 * Must match Display Engine. See
680 * WaCompressedResourceDisplayNewHashMode.
684 MMCD_PCLA | MMCD_HOTSPOT_EN);
687 /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
690 BDW_DISABLE_HDC_INVALIDATION);
694 skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
696 gen9_gt_workarounds_init(i915, wal);
698 /* WaDisableGafsUnitClkGating:skl */
701 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
703 /* WaInPlaceDecompressionHang:skl */
704 if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER))
706 GEN9_GAMT_ECO_REG_RW_IA,
707 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
711 bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
713 gen9_gt_workarounds_init(i915, wal);
715 /* WaInPlaceDecompressionHang:bxt */
717 GEN9_GAMT_ECO_REG_RW_IA,
718 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
722 kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
724 gen9_gt_workarounds_init(i915, wal);
726 /* WaDisableDynamicCreditSharing:kbl */
727 if (IS_KBL_REVID(i915, 0, KBL_REVID_B0))
730 GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
732 /* WaDisableGafsUnitClkGating:kbl */
735 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
737 /* WaInPlaceDecompressionHang:kbl */
739 GEN9_GAMT_ECO_REG_RW_IA,
740 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
744 glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
746 gen9_gt_workarounds_init(i915, wal);
750 cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
752 gen9_gt_workarounds_init(i915, wal);
754 /* WaDisableGafsUnitClkGating:cfl */
757 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
759 /* WaInPlaceDecompressionHang:cfl */
761 GEN9_GAMT_ECO_REG_RW_IA,
762 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
766 wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
768 const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
769 u32 mcr_slice_subslice_mask;
772 * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl
773 * L3Banks could be fused off in single slice scenario. If that is
774 * the case, we might need to program MCR select to a valid L3Bank
775 * by default, to make sure we correctly read certain registers
776 * later on (in the range 0xB100 - 0xB3FF).
777 * This might be incompatible with
778 * WaProgramMgsrForCorrectSliceSpecificMmioReads.
779 * Fortunately, this should not happen in production hardware, so
780 * we only assert that this is the case (instead of implementing
781 * something more complex that requires checking the range of every
784 if (INTEL_GEN(i915) >= 10 &&
785 is_power_of_2(sseu->slice_mask)) {
787 * read FUSE3 for enabled L3 Bank IDs, if L3 Bank matches
788 * enabled subslice, no need to redirect MCR packet
790 u32 slice = fls(sseu->slice_mask);
792 intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3);
793 u8 ss_mask = sseu->subslice_mask[slice];
795 u8 enabled_mask = (ss_mask | ss_mask >>
796 GEN10_L3BANK_PAIR_COUNT) & GEN10_L3BANK_MASK;
797 u8 disabled_mask = fuse3 & GEN10_L3BANK_MASK;
800 * Production silicon should have matched L3Bank and
803 WARN_ON((enabled_mask & disabled_mask) != enabled_mask);
806 if (INTEL_GEN(i915) >= 11)
807 mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK |
808 GEN11_MCR_SUBSLICE_MASK;
810 mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK |
811 GEN8_MCR_SUBSLICE_MASK;
813 * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl
814 * Before any MMIO read into slice/subslice specific registers, MCR
815 * packet control register needs to be programmed to point to any
816 * enabled s/ss pair. Otherwise, incorrect values will be returned.
817 * This means each subsequent MMIO read will be forwarded to an
818 * specific s/ss combination, but this is OK since these registers
819 * are consistent across s/ss in almost all cases. In the rare
820 * occasions, such as INSTDONE, where this value is dependent
821 * on s/ss combo, the read should be done with read_subslice_reg.
823 wa_write_masked_or(wal,
825 mcr_slice_subslice_mask,
826 intel_calculate_mcr_s_ss_select(i915));
830 cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
832 wa_init_mcr(i915, wal);
834 /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
835 if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
838 GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT);
840 /* WaInPlaceDecompressionHang:cnl */
842 GEN9_GAMT_ECO_REG_RW_IA,
843 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
847 icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
849 wa_init_mcr(i915, wal);
851 /* WaInPlaceDecompressionHang:icl */
853 GEN9_GAMT_ECO_REG_RW_IA,
854 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
856 /* WaModifyGamTlbPartitioning:icl */
857 wa_write_masked_or(wal,
858 GEN11_GACB_PERF_CTRL,
859 GEN11_HASH_CTRL_MASK,
860 GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
863 * Formerly known as WaCL2SFHalfMaxAlloc
867 GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
868 GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
871 * Formerly known as WaDisCtxReload
874 GEN8_GAMW_ECO_DEV_RW_IA,
875 GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
877 /* Wa_1405779004:icl (pre-prod) */
878 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
880 SLICE_UNIT_LEVEL_CLKGATE,
881 MSCUNIT_CLKGATE_DIS);
883 /* Wa_1406680159:icl */
885 SUBSLICE_UNIT_LEVEL_CLKGATE,
888 /* Wa_1406838659:icl (pre-prod) */
889 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
891 INF_UNIT_LEVEL_CLKGATE,
895 * Formerly known as WaGamTlbPendError
899 GAMT_CHKN_DISABLE_L3_COH_PIPE);
903 gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
905 if (IS_GEN(i915, 11))
906 icl_gt_workarounds_init(i915, wal);
907 else if (IS_CANNONLAKE(i915))
908 cnl_gt_workarounds_init(i915, wal);
909 else if (IS_COFFEELAKE(i915))
910 cfl_gt_workarounds_init(i915, wal);
911 else if (IS_GEMINILAKE(i915))
912 glk_gt_workarounds_init(i915, wal);
913 else if (IS_KABYLAKE(i915))
914 kbl_gt_workarounds_init(i915, wal);
915 else if (IS_BROXTON(i915))
916 bxt_gt_workarounds_init(i915, wal);
917 else if (IS_SKYLAKE(i915))
918 skl_gt_workarounds_init(i915, wal);
919 else if (INTEL_GEN(i915) <= 8)
922 MISSING_CASE(INTEL_GEN(i915));
925 void intel_gt_init_workarounds(struct drm_i915_private *i915)
927 struct i915_wa_list *wal = &i915->gt_wa_list;
929 wa_init_start(wal, "GT");
930 gt_init_workarounds(i915, wal);
934 static enum forcewake_domains
935 wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
937 enum forcewake_domains fw = 0;
941 for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
942 fw |= intel_uncore_forcewake_for_reg(uncore,
951 wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
953 if ((cur ^ wa->val) & wa->read) {
954 DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n",
955 name, from, i915_mmio_reg_offset(wa->reg),
966 wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal)
968 enum forcewake_domains fw;
976 fw = wal_get_fw_for_rmw(uncore, wal);
978 spin_lock_irqsave(&uncore->lock, flags);
979 intel_uncore_forcewake_get__locked(uncore, fw);
981 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
982 intel_uncore_rmw_fw(uncore, wa->reg, wa->mask, wa->val);
983 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
985 intel_uncore_read_fw(uncore, wa->reg),
986 wal->name, "application");
989 intel_uncore_forcewake_put__locked(uncore, fw);
990 spin_unlock_irqrestore(&uncore->lock, flags);
993 void intel_gt_apply_workarounds(struct drm_i915_private *i915)
995 wa_list_apply(&i915->uncore, &i915->gt_wa_list);
998 static bool wa_list_verify(struct intel_uncore *uncore,
999 const struct i915_wa_list *wal,
1006 for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1008 intel_uncore_read(uncore, wa->reg),
1014 bool intel_gt_verify_workarounds(struct drm_i915_private *i915,
1017 return wa_list_verify(&i915->uncore, &i915->gt_wa_list, from);
1021 whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
1023 struct i915_wa wa = {
1027 if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
1030 wa.reg.reg |= flags;
1035 whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
1037 whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_RW);
1040 static void gen9_whitelist_build(struct i915_wa_list *w)
1042 /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
1043 whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
1045 /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
1046 whitelist_reg(w, GEN8_CS_CHICKEN1);
1048 /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
1049 whitelist_reg(w, GEN8_HDC_CHICKEN1);
1052 static void skl_whitelist_build(struct intel_engine_cs *engine)
1054 struct i915_wa_list *w = &engine->whitelist;
1056 if (engine->class != RENDER_CLASS)
1059 gen9_whitelist_build(w);
1061 /* WaDisableLSQCROPERFforOCL:skl */
1062 whitelist_reg(w, GEN8_L3SQCREG4);
1065 static void bxt_whitelist_build(struct intel_engine_cs *engine)
1067 if (engine->class != RENDER_CLASS)
1070 gen9_whitelist_build(&engine->whitelist);
1073 static void kbl_whitelist_build(struct intel_engine_cs *engine)
1075 struct i915_wa_list *w = &engine->whitelist;
1077 if (engine->class != RENDER_CLASS)
1080 gen9_whitelist_build(w);
1082 /* WaDisableLSQCROPERFforOCL:kbl */
1083 whitelist_reg(w, GEN8_L3SQCREG4);
1086 static void glk_whitelist_build(struct intel_engine_cs *engine)
1088 struct i915_wa_list *w = &engine->whitelist;
1090 if (engine->class != RENDER_CLASS)
1093 gen9_whitelist_build(w);
1095 /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
1096 whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1099 static void cfl_whitelist_build(struct intel_engine_cs *engine)
1101 struct i915_wa_list *w = &engine->whitelist;
1103 if (engine->class != RENDER_CLASS)
1106 gen9_whitelist_build(w);
1109 * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml
1111 * This covers 4 register which are next to one another :
1112 * - PS_INVOCATION_COUNT
1113 * - PS_INVOCATION_COUNT_UDW
1115 * - PS_DEPTH_COUNT_UDW
1117 whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1118 RING_FORCE_TO_NONPRIV_RD |
1119 RING_FORCE_TO_NONPRIV_RANGE_4);
1122 static void cnl_whitelist_build(struct intel_engine_cs *engine)
1124 struct i915_wa_list *w = &engine->whitelist;
1126 if (engine->class != RENDER_CLASS)
1129 /* WaEnablePreemptionGranularityControlByUMD:cnl */
1130 whitelist_reg(w, GEN8_CS_CHICKEN1);
1133 static void icl_whitelist_build(struct intel_engine_cs *engine)
1135 struct i915_wa_list *w = &engine->whitelist;
1137 switch (engine->class) {
1139 /* WaAllowUMDToModifyHalfSliceChicken7:icl */
1140 whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7);
1142 /* WaAllowUMDToModifySamplerMode:icl */
1143 whitelist_reg(w, GEN10_SAMPLER_MODE);
1145 /* WaEnableStateCacheRedirectToCS:icl */
1146 whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1149 * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl
1151 * This covers 4 register which are next to one another :
1152 * - PS_INVOCATION_COUNT
1153 * - PS_INVOCATION_COUNT_UDW
1155 * - PS_DEPTH_COUNT_UDW
1157 whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1158 RING_FORCE_TO_NONPRIV_RD |
1159 RING_FORCE_TO_NONPRIV_RANGE_4);
1162 case VIDEO_DECODE_CLASS:
1163 /* hucStatusRegOffset */
1164 whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base),
1165 RING_FORCE_TO_NONPRIV_RD);
1166 /* hucUKernelHdrInfoRegOffset */
1167 whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base),
1168 RING_FORCE_TO_NONPRIV_RD);
1169 /* hucStatus2RegOffset */
1170 whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base),
1171 RING_FORCE_TO_NONPRIV_RD);
1179 void intel_engine_init_whitelist(struct intel_engine_cs *engine)
1181 struct drm_i915_private *i915 = engine->i915;
1182 struct i915_wa_list *w = &engine->whitelist;
1184 wa_init_start(w, "whitelist");
1186 if (IS_GEN(i915, 11))
1187 icl_whitelist_build(engine);
1188 else if (IS_CANNONLAKE(i915))
1189 cnl_whitelist_build(engine);
1190 else if (IS_COFFEELAKE(i915))
1191 cfl_whitelist_build(engine);
1192 else if (IS_GEMINILAKE(i915))
1193 glk_whitelist_build(engine);
1194 else if (IS_KABYLAKE(i915))
1195 kbl_whitelist_build(engine);
1196 else if (IS_BROXTON(i915))
1197 bxt_whitelist_build(engine);
1198 else if (IS_SKYLAKE(i915))
1199 skl_whitelist_build(engine);
1200 else if (INTEL_GEN(i915) <= 8)
1203 MISSING_CASE(INTEL_GEN(i915));
1208 void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
1210 const struct i915_wa_list *wal = &engine->whitelist;
1211 struct intel_uncore *uncore = engine->uncore;
1212 const u32 base = engine->mmio_base;
1219 for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1220 intel_uncore_write(uncore,
1221 RING_FORCE_TO_NONPRIV(base, i),
1222 i915_mmio_reg_offset(wa->reg));
1224 /* And clear the rest just in case of garbage */
1225 for (; i < RING_MAX_NONPRIV_SLOTS; i++)
1226 intel_uncore_write(uncore,
1227 RING_FORCE_TO_NONPRIV(base, i),
1228 i915_mmio_reg_offset(RING_NOPID(base)));
1232 rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1234 struct drm_i915_private *i915 = engine->i915;
1236 if (IS_GEN(i915, 11)) {
1237 /* This is not an Wa. Enable for better image quality */
1240 _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
1242 /* WaPipelineFlushCoherentLines:icl */
1243 ignore_wa_write_or(wal,
1245 GEN8_LQSC_FLUSH_COHERENT_LINES,
1246 GEN8_LQSC_FLUSH_COHERENT_LINES);
1250 * Formerly known as WaGAPZPriorityScheme
1254 GEN11_ARBITRATION_PRIO_ORDER_MASK);
1258 * Formerly known as WaL3BankAddressHashing
1260 wa_write_masked_or(wal,
1262 GEN11_HASH_CTRL_EXCL_MASK,
1263 GEN11_HASH_CTRL_EXCL_BIT0);
1264 wa_write_masked_or(wal,
1266 GEN11_BANK_HASH_ADDR_EXCL_MASK,
1267 GEN11_BANK_HASH_ADDR_EXCL_BIT0);
1271 * Formerly known as WaDisableCleanEvicts
1273 ignore_wa_write_or(wal,
1275 GEN11_LQSC_CLEAN_EVICT_DISABLE,
1276 GEN11_LQSC_CLEAN_EVICT_DISABLE);
1278 /* WaForwardProgressSoftReset:icl */
1280 GEN10_SCRATCH_LNCF2,
1281 PMFLUSHDONE_LNICRSDROP |
1282 PMFLUSH_GAPL3UNBLOCK |
1283 PMFLUSHDONE_LNEBLK);
1285 /* Wa_1406609255:icl (pre-prod) */
1286 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
1289 GEN7_DISABLE_DEMAND_PREFETCH);
1291 /* Wa_1606682166:icl */
1294 GEN7_DISABLE_SAMPLER_PREFETCH);
1297 if (IS_GEN_RANGE(i915, 9, 11)) {
1298 /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl */
1300 GEN7_FF_SLICE_CS_CHICKEN1,
1301 GEN9_FFSC_PERCTX_PREEMPT_CTRL);
1304 if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) {
1305 /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
1308 GEN9_GAPS_TSV_CREDIT_DISABLE);
1311 if (IS_BROXTON(i915)) {
1312 /* WaDisablePooledEuLoadBalancingFix:bxt */
1314 FF_SLICE_CS_CHICKEN2,
1315 GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
1318 if (IS_GEN(i915, 9)) {
1319 /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
1321 GEN9_CSFE_CHICKEN1_RCS,
1322 GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
1324 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
1327 GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
1329 /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
1330 if (IS_GEN9_LP(i915))
1331 wa_write_masked_or(wal,
1333 L3_PRIO_CREDITS_MASK,
1334 L3_GENERAL_PRIO_CREDITS(62) |
1335 L3_HIGH_PRIO_CREDITS(2));
1337 /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
1340 GEN8_LQSC_FLUSH_COHERENT_LINES);
1345 xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1347 struct drm_i915_private *i915 = engine->i915;
1349 /* WaKBLVECSSemaphoreWaitPoll:kbl */
1350 if (IS_KBL_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) {
1352 RING_SEMA_WAIT_POLL(engine->mmio_base),
1358 engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1360 if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 8))
1363 if (engine->id == RCS0)
1364 rcs_engine_wa_init(engine, wal);
1366 xcs_engine_wa_init(engine, wal);
1369 void intel_engine_init_workarounds(struct intel_engine_cs *engine)
1371 struct i915_wa_list *wal = &engine->wa_list;
1373 if (GEM_WARN_ON(INTEL_GEN(engine->i915) < 8))
1376 wa_init_start(wal, engine->name);
1377 engine_init_workarounds(engine, wal);
1378 wa_init_finish(wal);
1381 void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
1383 wa_list_apply(engine->uncore, &engine->wa_list);
1386 static struct i915_vma *
1387 create_scratch(struct i915_address_space *vm, int count)
1389 struct drm_i915_gem_object *obj;
1390 struct i915_vma *vma;
1394 size = round_up(count * sizeof(u32), PAGE_SIZE);
1395 obj = i915_gem_object_create_internal(vm->i915, size);
1397 return ERR_CAST(obj);
1399 i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
1401 vma = i915_vma_instance(obj, vm, NULL);
1407 err = i915_vma_pin(vma, 0, 0,
1408 i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
1415 i915_gem_object_put(obj);
1416 return ERR_PTR(err);
1420 wa_list_srm(struct i915_request *rq,
1421 const struct i915_wa_list *wal,
1422 struct i915_vma *vma)
1424 const struct i915_wa *wa;
1428 srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
1429 if (INTEL_GEN(rq->i915) >= 8)
1432 cs = intel_ring_begin(rq, 4 * wal->count);
1436 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
1438 *cs++ = i915_mmio_reg_offset(wa->reg);
1439 *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
1442 intel_ring_advance(rq, cs);
1447 static int engine_wa_list_verify(struct intel_context *ce,
1448 const struct i915_wa_list * const wal,
1451 const struct i915_wa *wa;
1452 struct i915_request *rq;
1453 struct i915_vma *vma;
1461 vma = create_scratch(&ce->engine->i915->ggtt.vm, wal->count);
1463 return PTR_ERR(vma);
1465 rq = intel_context_create_request(ce);
1471 err = wa_list_srm(rq, wal, vma);
1475 i915_request_add(rq);
1476 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1481 results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
1482 if (IS_ERR(results)) {
1483 err = PTR_ERR(results);
1488 for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1489 if (!wa_verify(wa, results[i], wal->name, from))
1492 i915_gem_object_unpin_map(vma->obj);
1495 i915_vma_unpin(vma);
1500 int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
1503 return engine_wa_list_verify(engine->kernel_context,
1508 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1509 #include "selftest_workarounds.c"