1 // SPDX-License-Identifier: MIT
3 * Copyright © 2022 Intel Corporation
8 #include "regs/xe_gt_regs.h"
10 #include "xe_device.h"
11 #include "xe_exec_queue.h"
14 #include "xe_platform_types.h"
15 #include "xe_step_types.h"
17 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
18 #define mocs_dbg drm_dbg
21 static inline void mocs_dbg(const struct drm_device *dev,
22 const char *format, ...)
27 HAS_GLOBAL_MOCS = BIT(0),
30 struct xe_mocs_entry {
38 unsigned int n_entries;
39 const struct xe_mocs_entry *table;
42 u8 unused_entries_index;
45 /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */
46 #define _LE_CACHEABILITY(value) ((value) << 0)
47 #define _LE_TGT_CACHE(value) ((value) << 2)
48 #define LE_LRUM(value) ((value) << 4)
49 #define LE_AOM(value) ((value) << 6)
50 #define LE_RSC(value) ((value) << 7)
51 #define LE_SCC(value) ((value) << 8)
52 #define LE_PFM(value) ((value) << 11)
53 #define LE_SCF(value) ((value) << 14)
54 #define LE_COS(value) ((value) << 15)
55 #define LE_SSE(value) ((value) << 17)
57 /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
58 #define L3_ESC(value) ((value) << 0)
59 #define L3_SCC(value) ((value) << 1)
60 #define _L3_CACHEABILITY(value) ((value) << 4)
61 #define L3_GLBGO(value) ((value) << 6)
62 #define L3_LKUP(value) ((value) << 7)
64 /* Defines for the tables (GLOB_MOCS_0 - GLOB_MOCS_16) */
65 #define _L4_CACHEABILITY REG_GENMASK(3, 2)
66 #define IG_PAT REG_BIT(8)
69 #define GEN9_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */
70 #define PVC_NUM_MOCS_ENTRIES 3
71 #define MTL_NUM_MOCS_ENTRIES 16
73 /* (e)LLC caching options */
75 * Note: LE_0_PAGETABLE works only up to Gen11; for newer gens it means
78 #define LE_0_PAGETABLE _LE_CACHEABILITY(0)
79 #define LE_1_UC _LE_CACHEABILITY(1)
80 #define LE_2_WT _LE_CACHEABILITY(2)
81 #define LE_3_WB _LE_CACHEABILITY(3)
84 #define LE_TC_0_PAGETABLE _LE_TGT_CACHE(0)
85 #define LE_TC_1_LLC _LE_TGT_CACHE(1)
86 #define LE_TC_2_LLC_ELLC _LE_TGT_CACHE(2)
87 #define LE_TC_3_LLC_ELLC_ALT _LE_TGT_CACHE(3)
89 /* L3 caching options */
90 #define L3_0_DIRECT _L3_CACHEABILITY(0)
91 #define L3_1_UC _L3_CACHEABILITY(1)
92 #define L3_2_RESERVED _L3_CACHEABILITY(2)
93 #define L3_3_WB _L3_CACHEABILITY(3)
95 /* L4 caching options */
96 #define L4_0_WB REG_FIELD_PREP(_L4_CACHEABILITY, 0)
97 #define L4_1_WT REG_FIELD_PREP(_L4_CACHEABILITY, 1)
98 #define L4_2_RESERVED REG_FIELD_PREP(_L4_CACHEABILITY, 2)
99 #define L4_3_UC REG_FIELD_PREP(_L4_CACHEABILITY, 3)
101 #define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \
103 .control_value = __control_value, \
104 .l3cc_value = __l3cc_value, \
111 * These are the MOCS tables that are programmed across all the rings.
112 * The control value is programmed to all the rings that support the
113 * MOCS registers. While the l3cc_values are only programmed to the
114 * LNCFCMOCS0 - LNCFCMOCS32 registers.
116 * These tables are intended to be kept reasonably consistent across
117 * HW platforms, and for ICL+, be identical across OSes. To achieve
118 * that, for Icelake and above, list of entries is published as part
121 * Entries not part of the following tables are undefined as far as
122 * userspace is concerned and shouldn't be relied upon. For Gen < 12
123 * they will be initialized to PTE. Gen >= 12 don't have a setting for
124 * PTE and those platforms except TGL/RKL will be initialized L3 WB to
125 * catch accidental use of reserved and unused mocs indexes.
127 * The last few entries are reserved by the hardware. For ICL+ they
128 * should be initialized according to bspec and never used, for older
129 * platforms they should never be written to.
131 * NOTE1: These tables are part of bspec and defined as part of hardware
132 * interface for ICL+. For older platforms, they are part of kernel
133 * ABI. It is expected that, for specific hardware platform, existing
134 * entries will remain constant and the table will only be updated by
135 * adding new entries, filling unused positions.
137 * NOTE2: For GEN >= 12 except TGL and RKL, reserved and unspecified MOCS
138 * indices have been set to L3 WB. These reserved entries should never
139 * be used, they may be changed to low performant variants with better
140 * coherency in the future if more entries are needed.
141 * For TGL/RKL, all the unspecified MOCS indexes are mapped to L3 UC.
144 #define GEN11_MOCS_ENTRIES \
145 /* Entries 0 and 1 are defined per-platform */ \
146 /* Base - L3 + LLC */ \
148 LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
150 /* Base - Uncached */ \
152 LE_1_UC | LE_TC_1_LLC, \
156 LE_1_UC | LE_TC_1_LLC, \
160 LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
164 LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
166 /* Age 0 - L3 + LLC */ \
168 LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
170 /* Age: Don't Chg. - LLC */ \
172 LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
174 /* Age: Don't Chg. - L3 + LLC */ \
176 LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
180 LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
182 /* No AOM - L3 + LLC */ \
184 LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
186 /* No AOM; Age 0 - LLC */ \
188 LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
190 /* No AOM; Age 0 - L3 + LLC */ \
192 LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
194 /* No AOM; Age:DC - LLC */ \
196 LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
198 /* No AOM; Age:DC - L3 + LLC */ \
200 LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
202 /* Self-Snoop - L3 + LLC */ \
204 LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \
206 /* Skip Caching - L3 + LLC(12.5%) */ \
208 LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7), \
210 /* Skip Caching - L3 + LLC(25%) */ \
212 LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3), \
214 /* Skip Caching - L3 + LLC(50%) */ \
216 LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1), \
218 /* Skip Caching - L3 + LLC(75%) */ \
220 LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3), \
222 /* Skip Caching - L3 + LLC(87.5%) */ \
224 LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7), \
226 /* HW Reserved - SW program but never use */ \
228 LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
230 /* HW Reserved - SW program but never use */ \
232 LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
235 static const struct xe_mocs_entry dg1_mocs_desc[] = {
237 MOCS_ENTRY(1, 0, L3_1_UC),
239 MOCS_ENTRY(5, 0, L3_3_WB),
241 MOCS_ENTRY(6, 0, L3_ESC(1) | L3_SCC(1) | L3_3_WB),
243 MOCS_ENTRY(7, 0, L3_ESC(1) | L3_SCC(3) | L3_3_WB),
245 MOCS_ENTRY(8, 0, L3_ESC(1) | L3_SCC(7) | L3_3_WB),
248 MOCS_ENTRY(48, 0, L3_3_WB),
250 MOCS_ENTRY(49, 0, L3_1_UC),
253 MOCS_ENTRY(60, 0, L3_1_UC),
254 MOCS_ENTRY(61, 0, L3_1_UC),
255 MOCS_ENTRY(62, 0, L3_1_UC),
256 MOCS_ENTRY(63, 0, L3_1_UC),
259 static const struct xe_mocs_entry gen12_mocs_desc[] = {
261 /* Implicitly enable L1 - HDC:L1 + L3 + LLC */
263 LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
265 /* Implicitly enable L1 - HDC:L1 + L3 */
267 LE_1_UC | LE_TC_1_LLC,
269 /* Implicitly enable L1 - HDC:L1 + LLC */
271 LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
273 /* Implicitly enable L1 - HDC:L1 */
275 LE_1_UC | LE_TC_1_LLC,
277 /* HW Special Case (CCS) */
279 LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
281 /* HW Special Case (Displayable) */
283 LE_1_UC | LE_TC_1_LLC,
287 static const struct xe_mocs_entry dg2_mocs_desc[] = {
288 /* UC - Coherent; GO:L3 */
289 MOCS_ENTRY(0, 0, L3_1_UC | L3_LKUP(1)),
290 /* UC - Coherent; GO:Memory */
291 MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
292 /* UC - Non-Coherent; GO:Memory */
293 MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)),
296 MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
299 static const struct xe_mocs_entry dg2_mocs_desc_g10_ax[] = {
300 /* Wa_14011441408: Set Go to Memory for MOCS#0 */
301 MOCS_ENTRY(0, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
302 /* UC - Coherent; GO:Memory */
303 MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
304 /* UC - Non-Coherent; GO:Memory */
305 MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)),
308 MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
311 static const struct xe_mocs_entry pvc_mocs_desc[] = {
313 MOCS_ENTRY(0, 0, L3_3_WB),
316 MOCS_ENTRY(1, 0, L3_1_UC),
319 MOCS_ENTRY(2, 0, L3_3_WB),
322 static const struct xe_mocs_entry mtl_mocs_desc[] = {
323 /* Error - Reserved for Non-Use */
326 L3_LKUP(1) | L3_3_WB),
327 /* Cached - L3 + L4 */
330 L3_LKUP(1) | L3_3_WB),
334 L3_LKUP(1) | L3_1_UC),
335 /* Uncached - GO:L3 */
338 L3_LKUP(1) | L3_1_UC),
342 L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC),
343 /* Uncached - GO:Mem */
346 L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC),
347 /* L4 - L3:NoLKUP; GO:L3 */
351 /* Uncached - L3:NoLKUP; GO:L3 */
355 /* L4 - L3:NoLKUP; GO:Mem */
358 L3_GLBGO(1) | L3_1_UC),
359 /* Uncached - L3:NoLKUP; GO:Mem */
362 L3_GLBGO(1) | L3_1_UC),
363 /* Display - L3; L4:WT */
366 L3_LKUP(1) | L3_3_WB),
367 /* CCS - Non-Displayable */
370 L3_GLBGO(1) | L3_1_UC),
373 static unsigned int get_mocs_settings(struct xe_device *xe,
374 struct xe_mocs_info *info)
376 unsigned int flags = 0;
378 memset(info, 0, sizeof(struct xe_mocs_info));
380 switch (xe->info.platform) {
382 info->size = ARRAY_SIZE(pvc_mocs_desc);
383 info->table = pvc_mocs_desc;
384 info->n_entries = PVC_NUM_MOCS_ENTRIES;
387 info->unused_entries_index = 2;
390 info->size = ARRAY_SIZE(mtl_mocs_desc);
391 info->table = mtl_mocs_desc;
392 info->n_entries = MTL_NUM_MOCS_ENTRIES;
394 info->unused_entries_index = 1;
397 if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G10 &&
398 xe->info.step.graphics >= STEP_A0 &&
399 xe->info.step.graphics <= STEP_B0) {
400 info->size = ARRAY_SIZE(dg2_mocs_desc_g10_ax);
401 info->table = dg2_mocs_desc_g10_ax;
403 info->size = ARRAY_SIZE(dg2_mocs_desc);
404 info->table = dg2_mocs_desc;
407 info->n_entries = GEN9_NUM_MOCS_ENTRIES;
408 info->unused_entries_index = 3;
411 info->size = ARRAY_SIZE(dg1_mocs_desc);
412 info->table = dg1_mocs_desc;
414 info->n_entries = GEN9_NUM_MOCS_ENTRIES;
415 info->unused_entries_index = 5;
422 info->size = ARRAY_SIZE(gen12_mocs_desc);
423 info->table = gen12_mocs_desc;
424 info->n_entries = GEN9_NUM_MOCS_ENTRIES;
426 info->unused_entries_index = 2;
429 drm_err(&xe->drm, "Platform that should have a MOCS table does not.\n");
434 * Index 0 is a reserved/unused table entry on most platforms, but
435 * even on those where it does represent a legitimate MOCS entry, it
436 * never represents the "most cached, least coherent" behavior we want
437 * to populate undefined table rows with. So if unused_entries_index
438 * is still 0 at this point, we'll assume that it was omitted by
439 * mistake in the switch statement above.
441 XE_WARN_ON(info->unused_entries_index == 0);
443 if (XE_WARN_ON(info->size > info->n_entries)) {
449 flags |= HAS_GLOBAL_MOCS;
455 * Get control_value from MOCS entry. If the table entry is not defined, the
456 * settings from unused_entries_index will be returned.
458 static u32 get_entry_control(const struct xe_mocs_info *info,
461 if (index < info->size && info->table[index].used)
462 return info->table[index].control_value;
463 return info->table[info->unused_entries_index].control_value;
466 static void __init_mocs_table(struct xe_gt *gt,
467 const struct xe_mocs_info *info,
470 struct xe_device *xe = gt_to_xe(gt);
475 mocs_dbg(>_to_xe(gt)->drm, "entries:%d\n", info->n_entries);
476 drm_WARN_ONCE(&xe->drm, !info->unused_entries_index,
477 "Unused entries index should have been defined\n");
479 i < info->n_entries ? (mocs = get_entry_control(info, i)), 1 : 0;
481 struct xe_reg reg = XE_REG(addr + i * 4);
483 mocs_dbg(>_to_xe(gt)->drm, "%d 0x%x 0x%x\n", i, reg.addr, mocs);
484 xe_mmio_write32(gt, reg, mocs);
489 * Get l3cc_value from MOCS entry taking into account when it's not used
490 * then if unused_entries_index is not zero then its value will be returned
491 * otherwise I915_MOCS_PTE's value is returned in this case.
493 static u16 get_entry_l3cc(const struct xe_mocs_info *info,
496 if (index < info->size && info->table[index].used)
497 return info->table[index].l3cc_value;
498 return info->table[info->unused_entries_index].l3cc_value;
501 static u32 l3cc_combine(u16 low, u16 high)
503 return low | (u32)high << 16;
506 static void init_l3cc_table(struct xe_gt *gt,
507 const struct xe_mocs_info *info)
512 mocs_dbg(>_to_xe(gt)->drm, "entries:%d\n", info->n_entries);
514 i < (info->n_entries + 1) / 2 ?
515 (l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i),
516 get_entry_l3cc(info, 2 * i + 1))), 1 : 0;
518 mocs_dbg(>_to_xe(gt)->drm, "%d 0x%x 0x%x\n", i, LNCFCMOCS(i).addr,
520 xe_mmio_write32(gt, LNCFCMOCS(i), l3cc);
524 void xe_mocs_init_early(struct xe_gt *gt)
526 struct xe_mocs_info table;
528 get_mocs_settings(gt_to_xe(gt), &table);
529 gt->mocs.uc_index = table.uc_index;
530 gt->mocs.wb_index = table.wb_index;
533 void xe_mocs_init(struct xe_gt *gt)
535 struct xe_mocs_info table;
539 * LLC and eDRAM control values are not applicable to dgfx
541 flags = get_mocs_settings(gt_to_xe(gt), &table);
542 mocs_dbg(>_to_xe(gt)->drm, "flag:0x%x\n", flags);
544 if (flags & HAS_GLOBAL_MOCS)
545 __init_mocs_table(gt, &table, GLOBAL_MOCS(0).addr);
548 * Initialize the L3CC table as part of mocs initalization to make
549 * sure the LNCFCMOCSx registers are programmed for the subsequent
550 * memory transactions including guc transactions
553 init_l3cc_table(gt, &table);