1 // SPDX-License-Identifier: MIT
3 * Copyright © 2014-2018 Intel Corporation
7 #include "intel_context.h"
8 #include "intel_engine_pm.h"
9 #include "intel_gpu_commands.h"
11 #include "intel_ring.h"
12 #include "intel_workarounds.h"
15 * DOC: Hardware workarounds
17 * This file is intended as a central place to implement most [1]_ of the
18 * required workarounds for hardware to work as originally intended. They fall
19 * in five basic categories depending on how/when they are applied:
21 * - Workarounds that touch registers that are saved/restored to/from the HW
22 * context image. The list is emitted (via Load Register Immediate commands)
23 * everytime a new context is created.
24 * - GT workarounds. The list of these WAs is applied whenever these registers
25 * revert to default values (on GPU reset, suspend/resume [2]_, etc..).
26 * - Display workarounds. The list is applied during display clock-gating
28 * - Workarounds that whitelist a privileged register, so that UMDs can manage
29 * them directly. This is just a special case of a MMMIO workaround (as we
30 * write the list of these to/be-whitelisted registers to some special HW
32 * - Workaround batchbuffers, that get executed automatically by the hardware
33 * on every HW context restore.
35 * .. [1] Please notice that there are other WAs that, due to their nature,
36 * cannot be applied from a central place. Those are peppered around the rest
37 * of the code, as needed.
39 * .. [2] Technically, some registers are powercontext saved & restored, so they
40 * survive a suspend/resume. In practice, writing them again is not too
41 * costly and simplifies things. We can revisit this in the future.
46 * Keep things in this file ordered by WA type, as per the above (context, GT,
47 * display, register whitelist, batchbuffer). Then, inside each type, keep the
50 * - Infrastructure functions and macros
51 * - WAs per platform in standard gen/chrono order
52 * - Public functions to init or apply the given workaround type.
55 static void wa_init_start(struct i915_wa_list *wal, const char *name, const char *engine_name)
58 wal->engine_name = engine_name;
61 #define WA_LIST_CHUNK (1 << 4)
63 static void wa_init_finish(struct i915_wa_list *wal)
65 /* Trim unused entries. */
66 if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
67 struct i915_wa *list = kmemdup(wal->list,
68 wal->count * sizeof(*list),
80 DRM_DEBUG_DRIVER("Initialized %u %s workarounds on %s\n",
81 wal->wa_count, wal->name, wal->engine_name);
84 static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
86 unsigned int addr = i915_mmio_reg_offset(wa->reg);
87 unsigned int start = 0, end = wal->count;
88 const unsigned int grow = WA_LIST_CHUNK;
91 GEM_BUG_ON(!is_power_of_2(grow));
93 if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
96 list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
99 DRM_ERROR("No space for workaround init!\n");
104 memcpy(list, wal->list, sizeof(*wa) * wal->count);
111 while (start < end) {
112 unsigned int mid = start + (end - start) / 2;
114 if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
116 } else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
119 wa_ = &wal->list[mid];
121 if ((wa->clr | wa_->clr) && !(wa->clr & ~wa_->clr)) {
122 DRM_ERROR("Discarding overwritten w/a for reg %04x (clear: %08x, set: %08x)\n",
123 i915_mmio_reg_offset(wa_->reg),
126 wa_->set &= ~wa->clr;
132 wa_->read |= wa->read;
138 wa_ = &wal->list[wal->count++];
141 while (wa_-- > wal->list) {
142 GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) ==
143 i915_mmio_reg_offset(wa_[1].reg));
144 if (i915_mmio_reg_offset(wa_[1].reg) >
145 i915_mmio_reg_offset(wa_[0].reg))
148 swap(wa_[1], wa_[0]);
152 static void wa_add(struct i915_wa_list *wal, i915_reg_t reg,
153 u32 clear, u32 set, u32 read_mask, bool masked_reg)
155 struct i915_wa wa = {
160 .masked_reg = masked_reg,
167 wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
169 wa_add(wal, reg, clear, set, clear, false);
173 wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
175 wa_write_clr_set(wal, reg, ~0, set);
179 wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
181 wa_write_clr_set(wal, reg, set, set);
185 wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
187 wa_write_clr_set(wal, reg, clr, 0);
191 * WA operations on "masked register". A masked register has the upper 16 bits
192 * documented as "masked" in b-spec. Its purpose is to allow writing to just a
193 * portion of the register without a rmw: you simply write in the upper 16 bits
194 * the mask of bits you are going to modify.
196 * The wa_masked_* family of functions already does the necessary operations to
197 * calculate the mask based on the parameters passed, so user only has to
198 * provide the lower 16 bits of that register.
202 wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
204 wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
208 wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
210 wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
214 wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
217 wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
220 static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine,
221 struct i915_wa_list *wal)
223 wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
226 static void gen7_ctx_workarounds_init(struct intel_engine_cs *engine,
227 struct i915_wa_list *wal)
229 wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
232 static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
233 struct i915_wa_list *wal)
235 wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
237 /* WaDisableAsyncFlipPerfMode:bdw,chv */
238 wa_masked_en(wal, MI_MODE, ASYNC_FLIP_PERF_DISABLE);
240 /* WaDisablePartialInstShootdown:bdw,chv */
241 wa_masked_en(wal, GEN8_ROW_CHICKEN,
242 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
244 /* Use Force Non-Coherent whenever executing a 3D context. This is a
245 * workaround for a possible hang in the unlikely event a TLB
246 * invalidation occurs during a PSD flush.
248 /* WaForceEnableNonCoherent:bdw,chv */
249 /* WaHdcDisableFetchWhenMasked:bdw,chv */
250 wa_masked_en(wal, HDC_CHICKEN0,
251 HDC_DONOT_FETCH_MEM_WHEN_MASKED |
252 HDC_FORCE_NON_COHERENT);
254 /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
255 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
256 * polygons in the same 8x4 pixel/sample area to be processed without
257 * stalling waiting for the earlier ones to write to Hierarchical Z
260 * This optimization is off by default for BDW and CHV; turn it on.
262 wa_masked_dis(wal, CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
264 /* Wa4x4STCOptimizationDisable:bdw,chv */
265 wa_masked_en(wal, CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
268 * BSpec recommends 8x4 when MSAA is used,
269 * however in practice 16x4 seems fastest.
271 * Note that PS/WM thread counts depend on the WIZ hashing
272 * disable bit, which we don't touch here, but it's good
273 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
275 wa_masked_field_set(wal, GEN7_GT_MODE,
276 GEN6_WIZ_HASHING_MASK,
277 GEN6_WIZ_HASHING_16x4);
280 static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
281 struct i915_wa_list *wal)
283 struct drm_i915_private *i915 = engine->i915;
285 gen8_ctx_workarounds_init(engine, wal);
287 /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
288 wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
290 /* WaDisableDopClockGating:bdw
292 * Also see the related UCGTCL1 write in bdw_init_clock_gating()
293 * to disable EUTC clock gating.
295 wa_masked_en(wal, GEN7_ROW_CHICKEN2,
296 DOP_CLOCK_GATING_DISABLE);
298 wa_masked_en(wal, HALF_SLICE_CHICKEN3,
299 GEN8_SAMPLER_POWER_BYPASS_DIS);
301 wa_masked_en(wal, HDC_CHICKEN0,
302 /* WaForceContextSaveRestoreNonCoherent:bdw */
303 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
304 /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
305 (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
308 static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
309 struct i915_wa_list *wal)
311 gen8_ctx_workarounds_init(engine, wal);
313 /* WaDisableThreadStallDopClockGating:chv */
314 wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
316 /* Improve HiZ throughput on CHV. */
317 wa_masked_en(wal, HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
320 static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
321 struct i915_wa_list *wal)
323 struct drm_i915_private *i915 = engine->i915;
326 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
328 * Must match Display Engine. See
329 * WaCompressedResourceDisplayNewHashMode.
331 wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
332 GEN9_PBE_COMPRESSED_HASH_SELECTION);
333 wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
334 GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
337 /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
338 /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
339 wa_masked_en(wal, GEN8_ROW_CHICKEN,
340 FLOW_CONTROL_ENABLE |
341 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
343 /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
344 /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
345 wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
346 GEN9_ENABLE_YV12_BUGFIX |
347 GEN9_ENABLE_GPGPU_PREEMPTION);
349 /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
350 /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
351 wa_masked_en(wal, CACHE_MODE_1,
352 GEN8_4x4_STC_OPTIMIZATION_DISABLE |
353 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
355 /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
356 wa_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5,
357 GEN9_CCS_TLB_PREFETCH_ENABLE);
359 /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
360 wa_masked_en(wal, HDC_CHICKEN0,
361 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
362 HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
364 /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
365 * both tied to WaForceContextSaveRestoreNonCoherent
366 * in some hsds for skl. We keep the tie for all gen9. The
367 * documentation is a bit hazy and so we want to get common behaviour,
368 * even though there is no clear evidence we would need both on kbl/bxt.
369 * This area has been source of system hangs so we play it safe
370 * and mimic the skl regardless of what bspec says.
372 * Use Force Non-Coherent whenever executing a 3D context. This
373 * is a workaround for a possible hang in the unlikely event
374 * a TLB invalidation occurs during a PSD flush.
377 /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
378 wa_masked_en(wal, HDC_CHICKEN0,
379 HDC_FORCE_NON_COHERENT);
381 /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
382 if (IS_SKYLAKE(i915) ||
384 IS_COFFEELAKE(i915) ||
386 wa_masked_en(wal, HALF_SLICE_CHICKEN3,
387 GEN8_SAMPLER_POWER_BYPASS_DIS);
389 /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
390 wa_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
393 * Supporting preemption with fine-granularity requires changes in the
394 * batch buffer programming. Since we can't break old userspace, we
395 * need to set our default preemption level to safe value. Userspace is
396 * still able to use more fine-grained preemption levels, since in
397 * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
398 * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
399 * not real HW workarounds, but merely a way to start using preemption
400 * while maintaining old contract with userspace.
403 /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
404 wa_masked_dis(wal, GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
406 /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
407 wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
408 GEN9_PREEMPT_GPGPU_LEVEL_MASK,
409 GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
411 /* WaClearHIZ_WM_CHICKEN3:bxt,glk */
412 if (IS_GEN9_LP(i915))
413 wa_masked_en(wal, GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
416 static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
417 struct i915_wa_list *wal)
419 struct intel_gt *gt = engine->gt;
420 u8 vals[3] = { 0, 0, 0 };
423 for (i = 0; i < 3; i++) {
427 * Only consider slices where one, and only one, subslice has 7
430 if (!is_power_of_2(gt->info.sseu.subslice_7eu[i]))
434 * subslice_7eu[i] != 0 (because of the check above) and
435 * ss_max == 4 (maximum number of subslices possible per slice)
439 ss = ffs(gt->info.sseu.subslice_7eu[i]) - 1;
443 if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
446 /* Tune IZ hashing. See intel_device_info_runtime_init() */
447 wa_masked_field_set(wal, GEN7_GT_MODE,
448 GEN9_IZ_HASHING_MASK(2) |
449 GEN9_IZ_HASHING_MASK(1) |
450 GEN9_IZ_HASHING_MASK(0),
451 GEN9_IZ_HASHING(2, vals[2]) |
452 GEN9_IZ_HASHING(1, vals[1]) |
453 GEN9_IZ_HASHING(0, vals[0]));
456 static void skl_ctx_workarounds_init(struct intel_engine_cs *engine,
457 struct i915_wa_list *wal)
459 gen9_ctx_workarounds_init(engine, wal);
460 skl_tune_iz_hashing(engine, wal);
463 static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
464 struct i915_wa_list *wal)
466 gen9_ctx_workarounds_init(engine, wal);
468 /* WaDisableThreadStallDopClockGating:bxt */
469 wa_masked_en(wal, GEN8_ROW_CHICKEN,
470 STALL_DOP_GATING_DISABLE);
472 /* WaToEnableHwFixForPushConstHWBug:bxt */
473 wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
474 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
477 static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
478 struct i915_wa_list *wal)
480 struct drm_i915_private *i915 = engine->i915;
482 gen9_ctx_workarounds_init(engine, wal);
484 /* WaToEnableHwFixForPushConstHWBug:kbl */
485 if (IS_KBL_GT_STEP(i915, STEP_C0, STEP_FOREVER))
486 wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
487 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
489 /* WaDisableSbeCacheDispatchPortSharing:kbl */
490 wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1,
491 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
494 static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
495 struct i915_wa_list *wal)
497 gen9_ctx_workarounds_init(engine, wal);
499 /* WaToEnableHwFixForPushConstHWBug:glk */
500 wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
501 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
504 static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
505 struct i915_wa_list *wal)
507 gen9_ctx_workarounds_init(engine, wal);
509 /* WaToEnableHwFixForPushConstHWBug:cfl */
510 wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
511 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
513 /* WaDisableSbeCacheDispatchPortSharing:cfl */
514 wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1,
515 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
518 static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
519 struct i915_wa_list *wal)
521 /* Wa_1406697149 (WaDisableBankHangMode:icl) */
524 intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
527 /* WaForceEnableNonCoherent:icl
528 * This is not the same workaround as in early Gen9 platforms, where
529 * lacking this could cause system hangs, but coherency performance
530 * overhead is high and only a few compute workloads really need it
531 * (the register is whitelisted in hardware now, so UMDs can opt in
532 * for coherency if they have a good reason).
534 wa_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
536 /* WaEnableFloatBlendOptimization:icl */
537 wa_add(wal, GEN10_CACHE_MODE_SS, 0,
538 _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE),
539 0 /* write-only, so skip validation */,
542 /* WaDisableGPGPUMidThreadPreemption:icl */
543 wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
544 GEN9_PREEMPT_GPGPU_LEVEL_MASK,
545 GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
547 /* allow headerless messages for preemptible GPGPU context */
548 wa_masked_en(wal, GEN10_SAMPLER_MODE,
549 GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
551 /* Wa_1604278689:icl,ehl */
552 wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID);
553 wa_write_clr_set(wal, IVB_FBC_RT_BASE_UPPER,
554 0, /* write-only register; skip validation */
557 /* Wa_1406306137:icl,ehl */
558 wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
562 * These settings aren't actually workarounds, but general tuning settings that
563 * need to be programmed on several platforms.
565 static void gen12_ctx_gt_tuning_init(struct intel_engine_cs *engine,
566 struct i915_wa_list *wal)
569 * Although some platforms refer to it as Wa_1604555607, we need to
570 * program it even on those that don't explicitly list that
573 * Note that the programming of this register is further modified
574 * according to the FF_MODE2 guidance given by Wa_1608008084:gen12.
575 * Wa_1608008084 tells us the FF_MODE2 register will return the wrong
576 * value when read. The default value for this register is zero for all
577 * fields and there are no bit masks. So instead of doing a RMW we
578 * should just write TDS timer value. For the same reason read
579 * verification is ignored.
583 FF_MODE2_TDS_TIMER_MASK,
584 FF_MODE2_TDS_TIMER_128,
588 static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
589 struct i915_wa_list *wal)
591 gen12_ctx_gt_tuning_init(engine, wal);
594 * Wa_1409142259:tgl,dg1,adl-p
595 * Wa_1409347922:tgl,dg1,adl-p
596 * Wa_1409252684:tgl,dg1,adl-p
597 * Wa_1409217633:tgl,dg1,adl-p
598 * Wa_1409207793:tgl,dg1,adl-p
599 * Wa_1409178076:tgl,dg1,adl-p
600 * Wa_1408979724:tgl,dg1,adl-p
601 * Wa_14010443199:tgl,rkl,dg1,adl-p
602 * Wa_14010698770:tgl,rkl,dg1,adl-s,adl-p
603 * Wa_1409342910:tgl,rkl,dg1,adl-s,adl-p
605 wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
606 GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
608 /* WaDisableGPGPUMidThreadPreemption:gen12 */
609 wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
610 GEN9_PREEMPT_GPGPU_LEVEL_MASK,
611 GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
616 * Like in gen12_ctx_gt_tuning_init(), read verification is ignored due
621 FF_MODE2_GS_TIMER_MASK,
622 FF_MODE2_GS_TIMER_224,
627 * Wa_1508744258:tgl,rkl,dg1,adl-s,adl-p
629 wa_masked_en(wal, GEN7_COMMON_SLICE_CHICKEN1,
630 GEN9_RHWO_OPTIMIZATION_DISABLE);
633 static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine,
634 struct i915_wa_list *wal)
636 gen12_ctx_workarounds_init(engine, wal);
639 wa_masked_dis(wal, GEN11_COMMON_SLICE_CHICKEN3,
640 DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN);
643 wa_masked_en(wal, HIZ_CHICKEN,
644 DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE);
648 __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
649 struct i915_wa_list *wal,
652 struct drm_i915_private *i915 = engine->i915;
654 if (engine->class != RENDER_CLASS)
657 wa_init_start(wal, name, engine->name);
660 dg1_ctx_workarounds_init(engine, wal);
661 else if (GRAPHICS_VER(i915) == 12)
662 gen12_ctx_workarounds_init(engine, wal);
663 else if (GRAPHICS_VER(i915) == 11)
664 icl_ctx_workarounds_init(engine, wal);
665 else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
666 cfl_ctx_workarounds_init(engine, wal);
667 else if (IS_GEMINILAKE(i915))
668 glk_ctx_workarounds_init(engine, wal);
669 else if (IS_KABYLAKE(i915))
670 kbl_ctx_workarounds_init(engine, wal);
671 else if (IS_BROXTON(i915))
672 bxt_ctx_workarounds_init(engine, wal);
673 else if (IS_SKYLAKE(i915))
674 skl_ctx_workarounds_init(engine, wal);
675 else if (IS_CHERRYVIEW(i915))
676 chv_ctx_workarounds_init(engine, wal);
677 else if (IS_BROADWELL(i915))
678 bdw_ctx_workarounds_init(engine, wal);
679 else if (GRAPHICS_VER(i915) == 7)
680 gen7_ctx_workarounds_init(engine, wal);
681 else if (GRAPHICS_VER(i915) == 6)
682 gen6_ctx_workarounds_init(engine, wal);
683 else if (GRAPHICS_VER(i915) < 8)
686 MISSING_CASE(GRAPHICS_VER(i915));
691 void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
693 __intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context");
696 int intel_engine_emit_ctx_wa(struct i915_request *rq)
698 struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
707 ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
711 cs = intel_ring_begin(rq, (wal->count * 2 + 2));
715 *cs++ = MI_LOAD_REGISTER_IMM(wal->count);
716 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
717 *cs++ = i915_mmio_reg_offset(wa->reg);
722 intel_ring_advance(rq, cs);
724 ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
732 gen4_gt_workarounds_init(struct drm_i915_private *i915,
733 struct i915_wa_list *wal)
735 /* WaDisable_RenderCache_OperationalFlush:gen4,ilk */
736 wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
740 g4x_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
742 gen4_gt_workarounds_init(i915, wal);
744 /* WaDisableRenderCachePipelinedFlush:g4x,ilk */
745 wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE);
749 ilk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
751 g4x_gt_workarounds_init(i915, wal);
753 wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED);
757 snb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
762 ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
764 /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
766 GEN7_COMMON_SLICE_CHICKEN1,
767 GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
769 /* WaApplyL3ControlAndL3ChickenMode:ivb */
770 wa_write(wal, GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL);
771 wa_write(wal, GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE);
773 /* WaForceL3Serialization:ivb */
774 wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
778 vlv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
780 /* WaForceL3Serialization:vlv */
781 wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
784 * WaIncreaseL3CreditsForVLVB0:vlv
785 * This is the hardware default actually.
787 wa_write(wal, GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
791 hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
793 /* L3 caching of data atomics doesn't work -- disable it. */
794 wa_write(wal, HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
798 _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE),
799 0 /* XXX does this reg exist? */, true);
801 /* WaVSRefCountFullforceMissDisable:hsw */
802 wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME);
806 gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
808 /* WaDisableKillLogic:bxt,skl,kbl */
809 if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915))
815 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
817 * Must match Display Engine. See
818 * WaCompressedResourceDisplayNewHashMode.
822 MMCD_PCLA | MMCD_HOTSPOT_EN);
825 /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
828 BDW_DISABLE_HDC_INVALIDATION);
832 skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
834 gen9_gt_workarounds_init(i915, wal);
836 /* WaDisableGafsUnitClkGating:skl */
839 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
841 /* WaInPlaceDecompressionHang:skl */
842 if (IS_SKL_GT_STEP(i915, STEP_A0, STEP_H0))
844 GEN9_GAMT_ECO_REG_RW_IA,
845 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
849 kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
851 gen9_gt_workarounds_init(i915, wal);
853 /* WaDisableDynamicCreditSharing:kbl */
854 if (IS_KBL_GT_STEP(i915, 0, STEP_C0))
857 GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
859 /* WaDisableGafsUnitClkGating:kbl */
862 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
864 /* WaInPlaceDecompressionHang:kbl */
866 GEN9_GAMT_ECO_REG_RW_IA,
867 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
871 glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
873 gen9_gt_workarounds_init(i915, wal);
877 cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
879 gen9_gt_workarounds_init(i915, wal);
881 /* WaDisableGafsUnitClkGating:cfl */
884 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
886 /* WaInPlaceDecompressionHang:cfl */
888 GEN9_GAMT_ECO_REG_RW_IA,
889 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
893 icl_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
895 const struct sseu_dev_info *sseu = &i915->gt.info.sseu;
896 unsigned int slice, subslice;
899 GEM_BUG_ON(GRAPHICS_VER(i915) < 11);
900 GEM_BUG_ON(hweight8(sseu->slice_mask) > 1);
904 * Although a platform may have subslices, we need to always steer
905 * reads to the lowest instance that isn't fused off. When Render
906 * Power Gating is enabled, grabbing forcewake will only power up a
907 * single subslice (the "minconfig") if there isn't a real workload
908 * that needs to be run; this means that if we steer register reads to
909 * one of the higher subslices, we run the risk of reading back 0's or
912 subslice = __ffs(intel_sseu_get_subslices(sseu, slice));
915 * If the subslice we picked above also steers us to a valid L3 bank,
916 * then we can just rely on the default steering and won't need to
917 * worry about explicitly re-steering L3BANK reads later.
919 if (i915->gt.info.l3bank_mask & BIT(subslice))
920 i915->gt.steering_table[L3BANK] = NULL;
922 mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
923 mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
925 drm_dbg(&i915->drm, "MCR slice/subslice = %x\n", mcr);
927 wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
931 icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
933 icl_wa_init_mcr(i915, wal);
935 /* WaModifyGamTlbPartitioning:icl */
936 wa_write_clr_set(wal,
937 GEN11_GACB_PERF_CTRL,
938 GEN11_HASH_CTRL_MASK,
939 GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
942 * Formerly known as WaCL2SFHalfMaxAlloc
946 GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
947 GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
950 * Formerly known as WaDisCtxReload
953 GEN8_GAMW_ECO_DEV_RW_IA,
954 GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
957 * Formerly known as WaGamTlbPendError
961 GAMT_CHKN_DISABLE_L3_COH_PIPE);
963 /* Wa_1607087056:icl,ehl,jsl */
964 if (IS_ICELAKE(i915) ||
965 IS_JSL_EHL_GT_STEP(i915, STEP_A0, STEP_B0))
967 SLICE_UNIT_LEVEL_CLKGATE,
968 L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
971 * This is not a documented workaround, but rather an optimization
972 * to reduce sampler power.
974 wa_write_clr(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
978 * Though there are per-engine instances of these registers,
979 * they retain their value through engine resets and should
980 * only be provided on the GT workaround list rather than
981 * the engine-specific workaround list.
984 wa_14011060649(struct drm_i915_private *i915, struct i915_wa_list *wal)
986 struct intel_engine_cs *engine;
987 struct intel_gt *gt = &i915->gt;
990 for_each_engine(engine, gt, id) {
991 if (engine->class != VIDEO_DECODE_CLASS ||
992 (engine->instance % 2))
995 wa_write_or(wal, VDBOX_CGCTL3F10(engine->mmio_base),
996 IECPUNIT_CLKGATE_DIS);
1001 gen12_gt_workarounds_init(struct drm_i915_private *i915,
1002 struct i915_wa_list *wal)
1004 icl_wa_init_mcr(i915, wal);
1006 /* Wa_14011060649:tgl,rkl,dg1,adl-s,adl-p */
1007 wa_14011060649(i915, wal);
1009 /* Wa_14011059788:tgl,rkl,adl-s,dg1,adl-p */
1010 wa_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
1014 tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
1016 gen12_gt_workarounds_init(i915, wal);
1018 /* Wa_1409420604:tgl */
1019 if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0))
1021 SUBSLICE_UNIT_LEVEL_CLKGATE2,
1022 CPSSUNIT_CLKGATE_DIS);
1024 /* Wa_1607087056:tgl also know as BUG:1409180338 */
1025 if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0))
1027 SLICE_UNIT_LEVEL_CLKGATE,
1028 L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
1030 /* Wa_1408615072:tgl[a0] */
1031 if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0))
1032 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
1033 VSUNIT_CLKGATE_DIS_TGL);
1037 dg1_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
1039 gen12_gt_workarounds_init(i915, wal);
1041 /* Wa_1607087056:dg1 */
1042 if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0))
1044 SLICE_UNIT_LEVEL_CLKGATE,
1045 L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
1047 /* Wa_1409420604:dg1 */
1050 SUBSLICE_UNIT_LEVEL_CLKGATE2,
1051 CPSSUNIT_CLKGATE_DIS);
1053 /* Wa_1408615072:dg1 */
1054 /* Empirical testing shows this register is unaffected by engine reset. */
1056 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
1057 VSUNIT_CLKGATE_DIS_TGL);
1061 gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
1064 dg1_gt_workarounds_init(i915, wal);
1065 else if (IS_TIGERLAKE(i915))
1066 tgl_gt_workarounds_init(i915, wal);
1067 else if (GRAPHICS_VER(i915) == 12)
1068 gen12_gt_workarounds_init(i915, wal);
1069 else if (GRAPHICS_VER(i915) == 11)
1070 icl_gt_workarounds_init(i915, wal);
1071 else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
1072 cfl_gt_workarounds_init(i915, wal);
1073 else if (IS_GEMINILAKE(i915))
1074 glk_gt_workarounds_init(i915, wal);
1075 else if (IS_KABYLAKE(i915))
1076 kbl_gt_workarounds_init(i915, wal);
1077 else if (IS_BROXTON(i915))
1078 gen9_gt_workarounds_init(i915, wal);
1079 else if (IS_SKYLAKE(i915))
1080 skl_gt_workarounds_init(i915, wal);
1081 else if (IS_HASWELL(i915))
1082 hsw_gt_workarounds_init(i915, wal);
1083 else if (IS_VALLEYVIEW(i915))
1084 vlv_gt_workarounds_init(i915, wal);
1085 else if (IS_IVYBRIDGE(i915))
1086 ivb_gt_workarounds_init(i915, wal);
1087 else if (GRAPHICS_VER(i915) == 6)
1088 snb_gt_workarounds_init(i915, wal);
1089 else if (GRAPHICS_VER(i915) == 5)
1090 ilk_gt_workarounds_init(i915, wal);
1091 else if (IS_G4X(i915))
1092 g4x_gt_workarounds_init(i915, wal);
1093 else if (GRAPHICS_VER(i915) == 4)
1094 gen4_gt_workarounds_init(i915, wal);
1095 else if (GRAPHICS_VER(i915) <= 8)
1098 MISSING_CASE(GRAPHICS_VER(i915));
1101 void intel_gt_init_workarounds(struct drm_i915_private *i915)
1103 struct i915_wa_list *wal = &i915->gt_wa_list;
1105 wa_init_start(wal, "GT", "global");
1106 gt_init_workarounds(i915, wal);
1107 wa_init_finish(wal);
1110 static enum forcewake_domains
1111 wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
1113 enum forcewake_domains fw = 0;
1117 for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1118 fw |= intel_uncore_forcewake_for_reg(uncore,
1127 wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
1129 if ((cur ^ wa->set) & wa->read) {
1130 DRM_ERROR("%s workaround lost on %s! (reg[%x]=0x%x, relevant bits were 0x%x vs expected 0x%x)\n",
1131 name, from, i915_mmio_reg_offset(wa->reg),
1132 cur, cur & wa->read, wa->set & wa->read);
1141 wa_list_apply(struct intel_gt *gt, const struct i915_wa_list *wal)
1143 struct intel_uncore *uncore = gt->uncore;
1144 enum forcewake_domains fw;
1145 unsigned long flags;
1152 fw = wal_get_fw_for_rmw(uncore, wal);
1154 spin_lock_irqsave(&uncore->lock, flags);
1155 intel_uncore_forcewake_get__locked(uncore, fw);
1157 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
1160 /* open-coded rmw due to steering */
1161 old = wa->clr ? intel_gt_read_register_fw(gt, wa->reg) : 0;
1162 val = (old & ~wa->clr) | wa->set;
1163 if (val != old || !wa->clr)
1164 intel_uncore_write_fw(uncore, wa->reg, val);
1166 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1167 wa_verify(wa, intel_gt_read_register_fw(gt, wa->reg),
1168 wal->name, "application");
1171 intel_uncore_forcewake_put__locked(uncore, fw);
1172 spin_unlock_irqrestore(&uncore->lock, flags);
1175 void intel_gt_apply_workarounds(struct intel_gt *gt)
1177 wa_list_apply(gt, >->i915->gt_wa_list);
1180 static bool wa_list_verify(struct intel_gt *gt,
1181 const struct i915_wa_list *wal,
1184 struct intel_uncore *uncore = gt->uncore;
1186 enum forcewake_domains fw;
1187 unsigned long flags;
1191 fw = wal_get_fw_for_rmw(uncore, wal);
1193 spin_lock_irqsave(&uncore->lock, flags);
1194 intel_uncore_forcewake_get__locked(uncore, fw);
1196 for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1198 intel_gt_read_register_fw(gt, wa->reg),
1201 intel_uncore_forcewake_put__locked(uncore, fw);
1202 spin_unlock_irqrestore(&uncore->lock, flags);
1207 bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from)
1209 return wa_list_verify(gt, >->i915->gt_wa_list, from);
1213 static bool is_nonpriv_flags_valid(u32 flags)
1215 /* Check only valid flag bits are set */
1216 if (flags & ~RING_FORCE_TO_NONPRIV_MASK_VALID)
1219 /* NB: Only 3 out of 4 enum values are valid for access field */
1220 if ((flags & RING_FORCE_TO_NONPRIV_ACCESS_MASK) ==
1221 RING_FORCE_TO_NONPRIV_ACCESS_INVALID)
1228 whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
1230 struct i915_wa wa = {
1234 if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
1237 if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags)))
1240 wa.reg.reg |= flags;
1245 whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
1247 whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
1250 static void gen9_whitelist_build(struct i915_wa_list *w)
1252 /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
1253 whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
1255 /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
1256 whitelist_reg(w, GEN8_CS_CHICKEN1);
1258 /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
1259 whitelist_reg(w, GEN8_HDC_CHICKEN1);
1261 /* WaSendPushConstantsFromMMIO:skl,bxt */
1262 whitelist_reg(w, COMMON_SLICE_CHICKEN2);
1265 static void skl_whitelist_build(struct intel_engine_cs *engine)
1267 struct i915_wa_list *w = &engine->whitelist;
1269 if (engine->class != RENDER_CLASS)
1272 gen9_whitelist_build(w);
1274 /* WaDisableLSQCROPERFforOCL:skl */
1275 whitelist_reg(w, GEN8_L3SQCREG4);
1278 static void bxt_whitelist_build(struct intel_engine_cs *engine)
1280 if (engine->class != RENDER_CLASS)
1283 gen9_whitelist_build(&engine->whitelist);
1286 static void kbl_whitelist_build(struct intel_engine_cs *engine)
1288 struct i915_wa_list *w = &engine->whitelist;
1290 if (engine->class != RENDER_CLASS)
1293 gen9_whitelist_build(w);
1295 /* WaDisableLSQCROPERFforOCL:kbl */
1296 whitelist_reg(w, GEN8_L3SQCREG4);
1299 static void glk_whitelist_build(struct intel_engine_cs *engine)
1301 struct i915_wa_list *w = &engine->whitelist;
1303 if (engine->class != RENDER_CLASS)
1306 gen9_whitelist_build(w);
1308 /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
1309 whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1312 static void cfl_whitelist_build(struct intel_engine_cs *engine)
1314 struct i915_wa_list *w = &engine->whitelist;
1316 if (engine->class != RENDER_CLASS)
1319 gen9_whitelist_build(w);
1322 * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml
1324 * This covers 4 register which are next to one another :
1325 * - PS_INVOCATION_COUNT
1326 * - PS_INVOCATION_COUNT_UDW
1328 * - PS_DEPTH_COUNT_UDW
1330 whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1331 RING_FORCE_TO_NONPRIV_ACCESS_RD |
1332 RING_FORCE_TO_NONPRIV_RANGE_4);
1335 static void cml_whitelist_build(struct intel_engine_cs *engine)
1337 struct i915_wa_list *w = &engine->whitelist;
1339 if (engine->class != RENDER_CLASS)
1340 whitelist_reg_ext(w,
1341 RING_CTX_TIMESTAMP(engine->mmio_base),
1342 RING_FORCE_TO_NONPRIV_ACCESS_RD);
1344 cfl_whitelist_build(engine);
1347 static void icl_whitelist_build(struct intel_engine_cs *engine)
1349 struct i915_wa_list *w = &engine->whitelist;
1351 switch (engine->class) {
1353 /* WaAllowUMDToModifyHalfSliceChicken7:icl */
1354 whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7);
1356 /* WaAllowUMDToModifySamplerMode:icl */
1357 whitelist_reg(w, GEN10_SAMPLER_MODE);
1359 /* WaEnableStateCacheRedirectToCS:icl */
1360 whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1363 * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl
1365 * This covers 4 register which are next to one another :
1366 * - PS_INVOCATION_COUNT
1367 * - PS_INVOCATION_COUNT_UDW
1369 * - PS_DEPTH_COUNT_UDW
1371 whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1372 RING_FORCE_TO_NONPRIV_ACCESS_RD |
1373 RING_FORCE_TO_NONPRIV_RANGE_4);
1376 case VIDEO_DECODE_CLASS:
1377 /* hucStatusRegOffset */
1378 whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base),
1379 RING_FORCE_TO_NONPRIV_ACCESS_RD);
1380 /* hucUKernelHdrInfoRegOffset */
1381 whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base),
1382 RING_FORCE_TO_NONPRIV_ACCESS_RD);
1383 /* hucStatus2RegOffset */
1384 whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base),
1385 RING_FORCE_TO_NONPRIV_ACCESS_RD);
1386 whitelist_reg_ext(w,
1387 RING_CTX_TIMESTAMP(engine->mmio_base),
1388 RING_FORCE_TO_NONPRIV_ACCESS_RD);
1392 whitelist_reg_ext(w,
1393 RING_CTX_TIMESTAMP(engine->mmio_base),
1394 RING_FORCE_TO_NONPRIV_ACCESS_RD);
1399 static void tgl_whitelist_build(struct intel_engine_cs *engine)
1401 struct i915_wa_list *w = &engine->whitelist;
1403 switch (engine->class) {
1406 * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl
1409 * This covers 4 registers which are next to one another :
1410 * - PS_INVOCATION_COUNT
1411 * - PS_INVOCATION_COUNT_UDW
1413 * - PS_DEPTH_COUNT_UDW
1415 whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1416 RING_FORCE_TO_NONPRIV_ACCESS_RD |
1417 RING_FORCE_TO_NONPRIV_RANGE_4);
1419 /* Wa_1808121037:tgl */
1420 whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1);
1422 /* Wa_1806527549:tgl */
1423 whitelist_reg(w, HIZ_CHICKEN);
1426 whitelist_reg_ext(w,
1427 RING_CTX_TIMESTAMP(engine->mmio_base),
1428 RING_FORCE_TO_NONPRIV_ACCESS_RD);
1433 static void dg1_whitelist_build(struct intel_engine_cs *engine)
1435 struct i915_wa_list *w = &engine->whitelist;
1437 tgl_whitelist_build(engine);
1439 /* GEN:BUG:1409280441:dg1 */
1440 if (IS_DG1_GT_STEP(engine->i915, STEP_A0, STEP_B0) &&
1441 (engine->class == RENDER_CLASS ||
1442 engine->class == COPY_ENGINE_CLASS))
1443 whitelist_reg_ext(w, RING_ID(engine->mmio_base),
1444 RING_FORCE_TO_NONPRIV_ACCESS_RD);
1447 void intel_engine_init_whitelist(struct intel_engine_cs *engine)
1449 struct drm_i915_private *i915 = engine->i915;
1450 struct i915_wa_list *w = &engine->whitelist;
1452 wa_init_start(w, "whitelist", engine->name);
1455 dg1_whitelist_build(engine);
1456 else if (GRAPHICS_VER(i915) == 12)
1457 tgl_whitelist_build(engine);
1458 else if (GRAPHICS_VER(i915) == 11)
1459 icl_whitelist_build(engine);
1460 else if (IS_COMETLAKE(i915))
1461 cml_whitelist_build(engine);
1462 else if (IS_COFFEELAKE(i915))
1463 cfl_whitelist_build(engine);
1464 else if (IS_GEMINILAKE(i915))
1465 glk_whitelist_build(engine);
1466 else if (IS_KABYLAKE(i915))
1467 kbl_whitelist_build(engine);
1468 else if (IS_BROXTON(i915))
1469 bxt_whitelist_build(engine);
1470 else if (IS_SKYLAKE(i915))
1471 skl_whitelist_build(engine);
1472 else if (GRAPHICS_VER(i915) <= 8)
1475 MISSING_CASE(GRAPHICS_VER(i915));
1480 void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
1482 const struct i915_wa_list *wal = &engine->whitelist;
1483 struct intel_uncore *uncore = engine->uncore;
1484 const u32 base = engine->mmio_base;
1491 for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1492 intel_uncore_write(uncore,
1493 RING_FORCE_TO_NONPRIV(base, i),
1494 i915_mmio_reg_offset(wa->reg));
1496 /* And clear the rest just in case of garbage */
1497 for (; i < RING_MAX_NONPRIV_SLOTS; i++)
1498 intel_uncore_write(uncore,
1499 RING_FORCE_TO_NONPRIV(base, i),
1500 i915_mmio_reg_offset(RING_NOPID(base)));
1504 rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1506 struct drm_i915_private *i915 = engine->i915;
1508 if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0) ||
1509 IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) {
1511 * Wa_1607138336:tgl[a0],dg1[a0]
1512 * Wa_1607063988:tgl[a0],dg1[a0]
1515 GEN9_CTX_PREEMPT_REG,
1516 GEN12_DISABLE_POSH_BUSY_FF_DOP_CG);
1519 if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) {
1522 * (see also Wa_1606682166:icl)
1526 GEN7_DISABLE_SAMPLER_PREFETCH);
1529 if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) ||
1530 IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
1531 /* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */
1532 wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ);
1535 * Wa_1407928979:tgl A*
1536 * Wa_18011464164:tgl[B0+],dg1[B0+]
1537 * Wa_22010931296:tgl[B0+],dg1[B0+]
1538 * Wa_14010919138:rkl,dg1,adl-s,adl-p
1540 wa_write_or(wal, GEN7_FF_THREAD_MODE,
1541 GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
1544 * Wa_1606700617:tgl,dg1,adl-p
1545 * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p
1546 * Wa_14010826681:tgl,dg1,rkl,adl-p
1549 GEN9_CS_DEBUG_MODE1,
1550 FF_DOP_CLOCK_GATE_DISABLE);
1553 if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
1554 IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0) ||
1555 IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
1556 /* Wa_1409804808:tgl,rkl,dg1[a0],adl-s,adl-p */
1557 wa_masked_en(wal, GEN7_ROW_CHICKEN2,
1558 GEN12_PUSH_CONST_DEREF_HOLD_DIS);
1562 * Wa_14010229206:tgl,rkl,dg1[a0],adl-s,adl-p
1564 wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
1568 if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0) ||
1569 IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
1573 * Wa_1607297627:tgl,rkl,dg1[a0]
1575 * On TGL and RKL there are multiple entries for this WA in the
1576 * BSpec; some indicate this is an A0-only WA, others indicate
1577 * it applies to all steppings so we trust the "all steppings."
1578 * For DG1 this only applies to A0.
1581 GEN6_RC_SLEEP_PSMI_CONTROL,
1582 GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
1583 GEN8_RC_SEMA_IDLE_MSG_DISABLE);
1586 if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) ||
1587 IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) {
1588 /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */
1594 if (GRAPHICS_VER(i915) == 11) {
1595 /* This is not an Wa. Enable for better image quality */
1598 _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
1602 * Formerly known as WaGAPZPriorityScheme
1606 GEN11_ARBITRATION_PRIO_ORDER_MASK);
1610 * Formerly known as WaL3BankAddressHashing
1612 wa_write_clr_set(wal,
1614 GEN11_HASH_CTRL_EXCL_MASK,
1615 GEN11_HASH_CTRL_EXCL_BIT0);
1616 wa_write_clr_set(wal,
1618 GEN11_BANK_HASH_ADDR_EXCL_MASK,
1619 GEN11_BANK_HASH_ADDR_EXCL_BIT0);
1623 * Formerly known as WaDisableCleanEvicts
1627 GEN11_LQSC_CLEAN_EVICT_DISABLE);
1629 /* Wa_1606682166:icl */
1632 GEN7_DISABLE_SAMPLER_PREFETCH);
1634 /* Wa_1409178092:icl */
1635 wa_write_clr_set(wal,
1637 GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
1640 /* WaEnable32PlaneMode:icl */
1641 wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS,
1642 GEN11_ENABLE_32_PLANE_MODE);
1645 * Wa_1408615072:icl,ehl (vsunit)
1646 * Wa_1407596294:icl,ehl (hsunit)
1648 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
1649 VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS);
1651 /* Wa_1407352427:icl,ehl */
1652 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
1653 PSDUNIT_CLKGATE_DIS);
1655 /* Wa_1406680159:icl,ehl */
1657 SUBSLICE_UNIT_LEVEL_CLKGATE,
1658 GWUNIT_CLKGATE_DIS);
1661 * Wa_1408767742:icl[a2..forever],ehl[all]
1662 * Wa_1605460711:icl[a0..c0]
1665 GEN7_FF_THREAD_MODE,
1666 GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
1668 /* Wa_22010271021 */
1670 GEN9_CS_DEBUG_MODE1,
1671 FF_DOP_CLOCK_GATE_DISABLE);
1674 if (IS_GRAPHICS_VER(i915, 9, 12)) {
1675 /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl,tgl */
1677 GEN7_FF_SLICE_CS_CHICKEN1,
1678 GEN9_FFSC_PERCTX_PREEMPT_CTRL);
1681 if (IS_SKYLAKE(i915) ||
1682 IS_KABYLAKE(i915) ||
1683 IS_COFFEELAKE(i915) ||
1684 IS_COMETLAKE(i915)) {
1685 /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
1688 GEN9_GAPS_TSV_CREDIT_DISABLE);
1691 if (IS_BROXTON(i915)) {
1692 /* WaDisablePooledEuLoadBalancingFix:bxt */
1694 FF_SLICE_CS_CHICKEN2,
1695 GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
1698 if (GRAPHICS_VER(i915) == 9) {
1699 /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
1701 GEN9_CSFE_CHICKEN1_RCS,
1702 GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
1704 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
1707 GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
1709 /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
1710 if (IS_GEN9_LP(i915))
1711 wa_write_clr_set(wal,
1713 L3_PRIO_CREDITS_MASK,
1714 L3_GENERAL_PRIO_CREDITS(62) |
1715 L3_HIGH_PRIO_CREDITS(2));
1717 /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
1720 GEN8_LQSC_FLUSH_COHERENT_LINES);
1722 /* Disable atomics in L3 to prevent unrecoverable hangs */
1723 wa_write_clr_set(wal, GEN9_SCRATCH_LNCF1,
1724 GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE, 0);
1725 wa_write_clr_set(wal, GEN8_L3SQCREG4,
1726 GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0);
1727 wa_write_clr_set(wal, GEN9_SCRATCH1,
1728 EVICTION_PERF_FIX_ENABLE, 0);
1731 if (IS_HASWELL(i915)) {
1732 /* WaSampleCChickenBitEnable:hsw */
1734 HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE);
1738 /* enable HiZ Raw Stall Optimization */
1739 HIZ_RAW_STALL_OPT_DISABLE);
1742 if (IS_VALLEYVIEW(i915)) {
1743 /* WaDisableEarlyCull:vlv */
1746 _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
1749 * WaVSThreadDispatchOverride:ivb,vlv
1751 * This actually overrides the dispatch
1752 * mode for all thread types.
1754 wa_write_clr_set(wal,
1755 GEN7_FF_THREAD_MODE,
1757 GEN7_FF_TS_SCHED_HW |
1758 GEN7_FF_VS_SCHED_HW |
1759 GEN7_FF_DS_SCHED_HW);
1761 /* WaPsdDispatchEnable:vlv */
1762 /* WaDisablePSDDualDispatchEnable:vlv */
1764 GEN7_HALF_SLICE_CHICKEN1,
1765 GEN7_MAX_PS_THREAD_DEP |
1766 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
1769 if (IS_IVYBRIDGE(i915)) {
1770 /* WaDisableEarlyCull:ivb */
1773 _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
1775 if (0) { /* causes HiZ corruption on ivb:gt1 */
1776 /* enable HiZ Raw Stall Optimization */
1779 HIZ_RAW_STALL_OPT_DISABLE);
1783 * WaVSThreadDispatchOverride:ivb,vlv
1785 * This actually overrides the dispatch
1786 * mode for all thread types.
1788 wa_write_clr_set(wal,
1789 GEN7_FF_THREAD_MODE,
1791 GEN7_FF_TS_SCHED_HW |
1792 GEN7_FF_VS_SCHED_HW |
1793 GEN7_FF_DS_SCHED_HW);
1795 /* WaDisablePSDDualDispatchEnable:ivb */
1796 if (IS_IVB_GT1(i915))
1798 GEN7_HALF_SLICE_CHICKEN1,
1799 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
1802 if (GRAPHICS_VER(i915) == 7) {
1803 /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
1806 GFX_TLB_INVALIDATE_EXPLICIT | GFX_REPLAY_MODE);
1808 /* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */
1809 wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);
1812 * BSpec says this must be set, even though
1813 * WaDisable4x2SubspanOptimization:ivb,hsw
1814 * WaDisable4x2SubspanOptimization isn't listed for VLV.
1818 PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
1821 * BSpec recommends 8x4 when MSAA is used,
1822 * however in practice 16x4 seems fastest.
1824 * Note that PS/WM thread counts depend on the WIZ hashing
1825 * disable bit, which we don't touch here, but it's good
1826 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
1828 wa_masked_field_set(wal,
1830 GEN6_WIZ_HASHING_MASK,
1831 GEN6_WIZ_HASHING_16x4);
1834 if (IS_GRAPHICS_VER(i915, 6, 7))
1836 * We need to disable the AsyncFlip performance optimisations in
1837 * order to use MI_WAIT_FOR_EVENT within the CS. It should
1838 * already be programmed to '1' on all products.
1840 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
1844 ASYNC_FLIP_PERF_DISABLE);
1846 if (GRAPHICS_VER(i915) == 6) {
1848 * Required for the hardware to program scanline values for
1850 * WaEnableFlushTlbInvalidationMode:snb
1854 GFX_TLB_INVALIDATE_EXPLICIT);
1856 /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
1859 _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB);
1863 /* WaStripsFansDisableFastClipPerformanceFix:snb */
1864 _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL |
1867 * "This bit must be set if 3DSTATE_CLIP clip mode is set
1868 * to normal and 3DSTATE_SF number of SF output attributes
1871 _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH);
1874 * BSpec recommends 8x4 when MSAA is used,
1875 * however in practice 16x4 seems fastest.
1877 * Note that PS/WM thread counts depend on the WIZ hashing
1878 * disable bit, which we don't touch here, but it's good
1879 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
1881 wa_masked_field_set(wal,
1883 GEN6_WIZ_HASHING_MASK,
1884 GEN6_WIZ_HASHING_16x4);
1886 /* WaDisable_RenderCache_OperationalFlush:snb */
1887 wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
1890 * From the Sandybridge PRM, volume 1 part 3, page 24:
1891 * "If this bit is set, STCunit will have LRA as replacement
1892 * policy. [...] This bit must be reset. LRA replacement
1893 * policy is not supported."
1897 CM0_STC_EVICT_DISABLE_LRA_SNB);
1900 if (IS_GRAPHICS_VER(i915, 4, 6))
1901 /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
1902 wa_add(wal, MI_MODE,
1903 0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH),
1904 /* XXX bit doesn't stick on Broadwater */
1905 IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH, true);
1907 if (GRAPHICS_VER(i915) == 4)
1909 * Disable CONSTANT_BUFFER before it is loaded from the context
1910 * image. For as it is loaded, it is executed and the stored
1911 * address may no longer be valid, leading to a GPU hang.
1913 * This imposes the requirement that userspace reload their
1914 * CONSTANT_BUFFER on every batch, fortunately a requirement
1915 * they are already accustomed to from before contexts were
1918 wa_add(wal, ECOSKPD,
1919 0, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE),
1920 0 /* XXX bit doesn't stick on Broadwater */,
1925 xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1927 struct drm_i915_private *i915 = engine->i915;
1929 /* WaKBLVECSSemaphoreWaitPoll:kbl */
1930 if (IS_KBL_GT_STEP(i915, STEP_A0, STEP_F0)) {
1932 RING_SEMA_WAIT_POLL(engine->mmio_base),
1938 engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1940 if (I915_SELFTEST_ONLY(GRAPHICS_VER(engine->i915) < 4))
1943 if (engine->class == RENDER_CLASS)
1944 rcs_engine_wa_init(engine, wal);
1946 xcs_engine_wa_init(engine, wal);
1949 void intel_engine_init_workarounds(struct intel_engine_cs *engine)
1951 struct i915_wa_list *wal = &engine->wa_list;
1953 if (GRAPHICS_VER(engine->i915) < 4)
1956 wa_init_start(wal, "engine", engine->name);
1957 engine_init_workarounds(engine, wal);
1958 wa_init_finish(wal);
1961 void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
1963 wa_list_apply(engine->gt, &engine->wa_list);
1971 static const struct mcr_range mcr_ranges_gen8[] = {
1972 { .start = 0x5500, .end = 0x55ff },
1973 { .start = 0x7000, .end = 0x7fff },
1974 { .start = 0x9400, .end = 0x97ff },
1975 { .start = 0xb000, .end = 0xb3ff },
1976 { .start = 0xe000, .end = 0xe7ff },
1980 static const struct mcr_range mcr_ranges_gen12[] = {
1981 { .start = 0x8150, .end = 0x815f },
1982 { .start = 0x9520, .end = 0x955f },
1983 { .start = 0xb100, .end = 0xb3ff },
1984 { .start = 0xde80, .end = 0xe8ff },
1985 { .start = 0x24a00, .end = 0x24a7f },
1989 static const struct mcr_range mcr_ranges_xehp[] = {
1990 { .start = 0x4000, .end = 0x4aff },
1991 { .start = 0x5200, .end = 0x52ff },
1992 { .start = 0x5400, .end = 0x7fff },
1993 { .start = 0x8140, .end = 0x815f },
1994 { .start = 0x8c80, .end = 0x8dff },
1995 { .start = 0x94d0, .end = 0x955f },
1996 { .start = 0x9680, .end = 0x96ff },
1997 { .start = 0xb000, .end = 0xb3ff },
1998 { .start = 0xc800, .end = 0xcfff },
1999 { .start = 0xd800, .end = 0xd8ff },
2000 { .start = 0xdc00, .end = 0xffff },
2001 { .start = 0x17000, .end = 0x17fff },
2002 { .start = 0x24a00, .end = 0x24a7f },
2005 static bool mcr_range(struct drm_i915_private *i915, u32 offset)
2007 const struct mcr_range *mcr_ranges;
2010 if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
2011 mcr_ranges = mcr_ranges_xehp;
2012 else if (GRAPHICS_VER(i915) >= 12)
2013 mcr_ranges = mcr_ranges_gen12;
2014 else if (GRAPHICS_VER(i915) >= 8)
2015 mcr_ranges = mcr_ranges_gen8;
2020 * Registers in these ranges are affected by the MCR selector
2021 * which only controls CPU initiated MMIO. Routing does not
2022 * work for CS access so we cannot verify them on this path.
2024 for (i = 0; mcr_ranges[i].start; i++)
2025 if (offset >= mcr_ranges[i].start &&
2026 offset <= mcr_ranges[i].end)
2033 wa_list_srm(struct i915_request *rq,
2034 const struct i915_wa_list *wal,
2035 struct i915_vma *vma)
2037 struct drm_i915_private *i915 = rq->engine->i915;
2038 unsigned int i, count = 0;
2039 const struct i915_wa *wa;
2042 srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
2043 if (GRAPHICS_VER(i915) >= 8)
2046 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
2047 if (!mcr_range(i915, i915_mmio_reg_offset(wa->reg)))
2051 cs = intel_ring_begin(rq, 4 * count);
2055 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
2056 u32 offset = i915_mmio_reg_offset(wa->reg);
2058 if (mcr_range(i915, offset))
2063 *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
2066 intel_ring_advance(rq, cs);
2071 static int engine_wa_list_verify(struct intel_context *ce,
2072 const struct i915_wa_list * const wal,
2075 const struct i915_wa *wa;
2076 struct i915_request *rq;
2077 struct i915_vma *vma;
2078 struct i915_gem_ww_ctx ww;
2086 vma = __vm_create_scratch_for_read(&ce->engine->gt->ggtt->vm,
2087 wal->count * sizeof(u32));
2089 return PTR_ERR(vma);
2091 intel_engine_pm_get(ce->engine);
2092 i915_gem_ww_ctx_init(&ww, false);
2094 err = i915_gem_object_lock(vma->obj, &ww);
2096 err = intel_context_pin_ww(ce, &ww);
2100 err = i915_vma_pin_ww(vma, &ww, 0, 0,
2101 i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
2105 rq = i915_request_create(ce);
2111 err = i915_request_await_object(rq, vma->obj, true);
2113 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
2115 err = wa_list_srm(rq, wal, vma);
2117 i915_request_get(rq);
2119 i915_request_set_error_once(rq, err);
2120 i915_request_add(rq);
2125 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2130 results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
2131 if (IS_ERR(results)) {
2132 err = PTR_ERR(results);
2137 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
2138 if (mcr_range(rq->engine->i915, i915_mmio_reg_offset(wa->reg)))
2141 if (!wa_verify(wa, results[i], wal->name, from))
2145 i915_gem_object_unpin_map(vma->obj);
2148 i915_request_put(rq);
2150 i915_vma_unpin(vma);
2152 intel_context_unpin(ce);
2154 if (err == -EDEADLK) {
2155 err = i915_gem_ww_ctx_backoff(&ww);
2159 i915_gem_ww_ctx_fini(&ww);
2160 intel_engine_pm_put(ce->engine);
2165 int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
2168 return engine_wa_list_verify(engine->kernel_context,
2173 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
2174 #include "selftest_workarounds.c"