Merge v5.14-rc3 into usb-next
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / gt / intel_lrc.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2014 Intel Corporation
4  */
5
6 #include "gem/i915_gem_lmem.h"
7
8 #include "gen8_engine_cs.h"
9 #include "i915_drv.h"
10 #include "i915_perf.h"
11 #include "intel_engine.h"
12 #include "intel_gpu_commands.h"
13 #include "intel_gt.h"
14 #include "intel_lrc.h"
15 #include "intel_lrc_reg.h"
16 #include "intel_ring.h"
17 #include "shmem_utils.h"
18
19 static void set_offsets(u32 *regs,
20                         const u8 *data,
21                         const struct intel_engine_cs *engine,
22                         bool close)
23 #define NOP(x) (BIT(7) | (x))
24 #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
25 #define POSTED BIT(0)
26 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
27 #define REG16(x) \
28         (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
29         (((x) >> 2) & 0x7f)
30 #define END 0
31 {
32         const u32 base = engine->mmio_base;
33
34         while (*data) {
35                 u8 count, flags;
36
37                 if (*data & BIT(7)) { /* skip */
38                         count = *data++ & ~BIT(7);
39                         regs += count;
40                         continue;
41                 }
42
43                 count = *data & 0x3f;
44                 flags = *data >> 6;
45                 data++;
46
47                 *regs = MI_LOAD_REGISTER_IMM(count);
48                 if (flags & POSTED)
49                         *regs |= MI_LRI_FORCE_POSTED;
50                 if (GRAPHICS_VER(engine->i915) >= 11)
51                         *regs |= MI_LRI_LRM_CS_MMIO;
52                 regs++;
53
54                 GEM_BUG_ON(!count);
55                 do {
56                         u32 offset = 0;
57                         u8 v;
58
59                         do {
60                                 v = *data++;
61                                 offset <<= 7;
62                                 offset |= v & ~BIT(7);
63                         } while (v & BIT(7));
64
65                         regs[0] = base + (offset << 2);
66                         regs += 2;
67                 } while (--count);
68         }
69
70         if (close) {
71                 /* Close the batch; used mainly by live_lrc_layout() */
72                 *regs = MI_BATCH_BUFFER_END;
73                 if (GRAPHICS_VER(engine->i915) >= 10)
74                         *regs |= BIT(0);
75         }
76 }
77
78 static const u8 gen8_xcs_offsets[] = {
79         NOP(1),
80         LRI(11, 0),
81         REG16(0x244),
82         REG(0x034),
83         REG(0x030),
84         REG(0x038),
85         REG(0x03c),
86         REG(0x168),
87         REG(0x140),
88         REG(0x110),
89         REG(0x11c),
90         REG(0x114),
91         REG(0x118),
92
93         NOP(9),
94         LRI(9, 0),
95         REG16(0x3a8),
96         REG16(0x28c),
97         REG16(0x288),
98         REG16(0x284),
99         REG16(0x280),
100         REG16(0x27c),
101         REG16(0x278),
102         REG16(0x274),
103         REG16(0x270),
104
105         NOP(13),
106         LRI(2, 0),
107         REG16(0x200),
108         REG(0x028),
109
110         END
111 };
112
113 static const u8 gen9_xcs_offsets[] = {
114         NOP(1),
115         LRI(14, POSTED),
116         REG16(0x244),
117         REG(0x034),
118         REG(0x030),
119         REG(0x038),
120         REG(0x03c),
121         REG(0x168),
122         REG(0x140),
123         REG(0x110),
124         REG(0x11c),
125         REG(0x114),
126         REG(0x118),
127         REG(0x1c0),
128         REG(0x1c4),
129         REG(0x1c8),
130
131         NOP(3),
132         LRI(9, POSTED),
133         REG16(0x3a8),
134         REG16(0x28c),
135         REG16(0x288),
136         REG16(0x284),
137         REG16(0x280),
138         REG16(0x27c),
139         REG16(0x278),
140         REG16(0x274),
141         REG16(0x270),
142
143         NOP(13),
144         LRI(1, POSTED),
145         REG16(0x200),
146
147         NOP(13),
148         LRI(44, POSTED),
149         REG(0x028),
150         REG(0x09c),
151         REG(0x0c0),
152         REG(0x178),
153         REG(0x17c),
154         REG16(0x358),
155         REG(0x170),
156         REG(0x150),
157         REG(0x154),
158         REG(0x158),
159         REG16(0x41c),
160         REG16(0x600),
161         REG16(0x604),
162         REG16(0x608),
163         REG16(0x60c),
164         REG16(0x610),
165         REG16(0x614),
166         REG16(0x618),
167         REG16(0x61c),
168         REG16(0x620),
169         REG16(0x624),
170         REG16(0x628),
171         REG16(0x62c),
172         REG16(0x630),
173         REG16(0x634),
174         REG16(0x638),
175         REG16(0x63c),
176         REG16(0x640),
177         REG16(0x644),
178         REG16(0x648),
179         REG16(0x64c),
180         REG16(0x650),
181         REG16(0x654),
182         REG16(0x658),
183         REG16(0x65c),
184         REG16(0x660),
185         REG16(0x664),
186         REG16(0x668),
187         REG16(0x66c),
188         REG16(0x670),
189         REG16(0x674),
190         REG16(0x678),
191         REG16(0x67c),
192         REG(0x068),
193
194         END
195 };
196
197 static const u8 gen12_xcs_offsets[] = {
198         NOP(1),
199         LRI(13, POSTED),
200         REG16(0x244),
201         REG(0x034),
202         REG(0x030),
203         REG(0x038),
204         REG(0x03c),
205         REG(0x168),
206         REG(0x140),
207         REG(0x110),
208         REG(0x1c0),
209         REG(0x1c4),
210         REG(0x1c8),
211         REG(0x180),
212         REG16(0x2b4),
213
214         NOP(5),
215         LRI(9, POSTED),
216         REG16(0x3a8),
217         REG16(0x28c),
218         REG16(0x288),
219         REG16(0x284),
220         REG16(0x280),
221         REG16(0x27c),
222         REG16(0x278),
223         REG16(0x274),
224         REG16(0x270),
225
226         END
227 };
228
229 static const u8 gen8_rcs_offsets[] = {
230         NOP(1),
231         LRI(14, POSTED),
232         REG16(0x244),
233         REG(0x034),
234         REG(0x030),
235         REG(0x038),
236         REG(0x03c),
237         REG(0x168),
238         REG(0x140),
239         REG(0x110),
240         REG(0x11c),
241         REG(0x114),
242         REG(0x118),
243         REG(0x1c0),
244         REG(0x1c4),
245         REG(0x1c8),
246
247         NOP(3),
248         LRI(9, POSTED),
249         REG16(0x3a8),
250         REG16(0x28c),
251         REG16(0x288),
252         REG16(0x284),
253         REG16(0x280),
254         REG16(0x27c),
255         REG16(0x278),
256         REG16(0x274),
257         REG16(0x270),
258
259         NOP(13),
260         LRI(1, 0),
261         REG(0x0c8),
262
263         END
264 };
265
266 static const u8 gen9_rcs_offsets[] = {
267         NOP(1),
268         LRI(14, POSTED),
269         REG16(0x244),
270         REG(0x34),
271         REG(0x30),
272         REG(0x38),
273         REG(0x3c),
274         REG(0x168),
275         REG(0x140),
276         REG(0x110),
277         REG(0x11c),
278         REG(0x114),
279         REG(0x118),
280         REG(0x1c0),
281         REG(0x1c4),
282         REG(0x1c8),
283
284         NOP(3),
285         LRI(9, POSTED),
286         REG16(0x3a8),
287         REG16(0x28c),
288         REG16(0x288),
289         REG16(0x284),
290         REG16(0x280),
291         REG16(0x27c),
292         REG16(0x278),
293         REG16(0x274),
294         REG16(0x270),
295
296         NOP(13),
297         LRI(1, 0),
298         REG(0xc8),
299
300         NOP(13),
301         LRI(44, POSTED),
302         REG(0x28),
303         REG(0x9c),
304         REG(0xc0),
305         REG(0x178),
306         REG(0x17c),
307         REG16(0x358),
308         REG(0x170),
309         REG(0x150),
310         REG(0x154),
311         REG(0x158),
312         REG16(0x41c),
313         REG16(0x600),
314         REG16(0x604),
315         REG16(0x608),
316         REG16(0x60c),
317         REG16(0x610),
318         REG16(0x614),
319         REG16(0x618),
320         REG16(0x61c),
321         REG16(0x620),
322         REG16(0x624),
323         REG16(0x628),
324         REG16(0x62c),
325         REG16(0x630),
326         REG16(0x634),
327         REG16(0x638),
328         REG16(0x63c),
329         REG16(0x640),
330         REG16(0x644),
331         REG16(0x648),
332         REG16(0x64c),
333         REG16(0x650),
334         REG16(0x654),
335         REG16(0x658),
336         REG16(0x65c),
337         REG16(0x660),
338         REG16(0x664),
339         REG16(0x668),
340         REG16(0x66c),
341         REG16(0x670),
342         REG16(0x674),
343         REG16(0x678),
344         REG16(0x67c),
345         REG(0x68),
346
347         END
348 };
349
350 static const u8 gen11_rcs_offsets[] = {
351         NOP(1),
352         LRI(15, POSTED),
353         REG16(0x244),
354         REG(0x034),
355         REG(0x030),
356         REG(0x038),
357         REG(0x03c),
358         REG(0x168),
359         REG(0x140),
360         REG(0x110),
361         REG(0x11c),
362         REG(0x114),
363         REG(0x118),
364         REG(0x1c0),
365         REG(0x1c4),
366         REG(0x1c8),
367         REG(0x180),
368
369         NOP(1),
370         LRI(9, POSTED),
371         REG16(0x3a8),
372         REG16(0x28c),
373         REG16(0x288),
374         REG16(0x284),
375         REG16(0x280),
376         REG16(0x27c),
377         REG16(0x278),
378         REG16(0x274),
379         REG16(0x270),
380
381         LRI(1, POSTED),
382         REG(0x1b0),
383
384         NOP(10),
385         LRI(1, 0),
386         REG(0x0c8),
387
388         END
389 };
390
391 static const u8 gen12_rcs_offsets[] = {
392         NOP(1),
393         LRI(13, POSTED),
394         REG16(0x244),
395         REG(0x034),
396         REG(0x030),
397         REG(0x038),
398         REG(0x03c),
399         REG(0x168),
400         REG(0x140),
401         REG(0x110),
402         REG(0x1c0),
403         REG(0x1c4),
404         REG(0x1c8),
405         REG(0x180),
406         REG16(0x2b4),
407
408         NOP(5),
409         LRI(9, POSTED),
410         REG16(0x3a8),
411         REG16(0x28c),
412         REG16(0x288),
413         REG16(0x284),
414         REG16(0x280),
415         REG16(0x27c),
416         REG16(0x278),
417         REG16(0x274),
418         REG16(0x270),
419
420         LRI(3, POSTED),
421         REG(0x1b0),
422         REG16(0x5a8),
423         REG16(0x5ac),
424
425         NOP(6),
426         LRI(1, 0),
427         REG(0x0c8),
428         NOP(3 + 9 + 1),
429
430         LRI(51, POSTED),
431         REG16(0x588),
432         REG16(0x588),
433         REG16(0x588),
434         REG16(0x588),
435         REG16(0x588),
436         REG16(0x588),
437         REG(0x028),
438         REG(0x09c),
439         REG(0x0c0),
440         REG(0x178),
441         REG(0x17c),
442         REG16(0x358),
443         REG(0x170),
444         REG(0x150),
445         REG(0x154),
446         REG(0x158),
447         REG16(0x41c),
448         REG16(0x600),
449         REG16(0x604),
450         REG16(0x608),
451         REG16(0x60c),
452         REG16(0x610),
453         REG16(0x614),
454         REG16(0x618),
455         REG16(0x61c),
456         REG16(0x620),
457         REG16(0x624),
458         REG16(0x628),
459         REG16(0x62c),
460         REG16(0x630),
461         REG16(0x634),
462         REG16(0x638),
463         REG16(0x63c),
464         REG16(0x640),
465         REG16(0x644),
466         REG16(0x648),
467         REG16(0x64c),
468         REG16(0x650),
469         REG16(0x654),
470         REG16(0x658),
471         REG16(0x65c),
472         REG16(0x660),
473         REG16(0x664),
474         REG16(0x668),
475         REG16(0x66c),
476         REG16(0x670),
477         REG16(0x674),
478         REG16(0x678),
479         REG16(0x67c),
480         REG(0x068),
481         REG(0x084),
482         NOP(1),
483
484         END
485 };
486
487 #undef END
488 #undef REG16
489 #undef REG
490 #undef LRI
491 #undef NOP
492
493 static const u8 *reg_offsets(const struct intel_engine_cs *engine)
494 {
495         /*
496          * The gen12+ lists only have the registers we program in the basic
497          * default state. We rely on the context image using relative
498          * addressing to automatic fixup the register state between the
499          * physical engines for virtual engine.
500          */
501         GEM_BUG_ON(GRAPHICS_VER(engine->i915) >= 12 &&
502                    !intel_engine_has_relative_mmio(engine));
503
504         if (engine->class == RENDER_CLASS) {
505                 if (GRAPHICS_VER(engine->i915) >= 12)
506                         return gen12_rcs_offsets;
507                 else if (GRAPHICS_VER(engine->i915) >= 11)
508                         return gen11_rcs_offsets;
509                 else if (GRAPHICS_VER(engine->i915) >= 9)
510                         return gen9_rcs_offsets;
511                 else
512                         return gen8_rcs_offsets;
513         } else {
514                 if (GRAPHICS_VER(engine->i915) >= 12)
515                         return gen12_xcs_offsets;
516                 else if (GRAPHICS_VER(engine->i915) >= 9)
517                         return gen9_xcs_offsets;
518                 else
519                         return gen8_xcs_offsets;
520         }
521 }
522
523 static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
524 {
525         if (GRAPHICS_VER(engine->i915) >= 12)
526                 return 0x60;
527         else if (GRAPHICS_VER(engine->i915) >= 9)
528                 return 0x54;
529         else if (engine->class == RENDER_CLASS)
530                 return 0x58;
531         else
532                 return -1;
533 }
534
535 static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
536 {
537         if (GRAPHICS_VER(engine->i915) >= 12)
538                 return 0x74;
539         else if (GRAPHICS_VER(engine->i915) >= 9)
540                 return 0x68;
541         else if (engine->class == RENDER_CLASS)
542                 return 0xd8;
543         else
544                 return -1;
545 }
546
547 static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine)
548 {
549         if (GRAPHICS_VER(engine->i915) >= 12)
550                 return 0x12;
551         else if (GRAPHICS_VER(engine->i915) >= 9 || engine->class == RENDER_CLASS)
552                 return 0x18;
553         else
554                 return -1;
555 }
556
557 static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine)
558 {
559         int x;
560
561         x = lrc_ring_wa_bb_per_ctx(engine);
562         if (x < 0)
563                 return x;
564
565         return x + 2;
566 }
567
568 static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
569 {
570         int x;
571
572         x = lrc_ring_indirect_ptr(engine);
573         if (x < 0)
574                 return x;
575
576         return x + 2;
577 }
578
579 static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
580 {
581         if (engine->class != RENDER_CLASS)
582                 return -1;
583
584         if (GRAPHICS_VER(engine->i915) >= 12)
585                 return 0xb6;
586         else if (GRAPHICS_VER(engine->i915) >= 11)
587                 return 0xaa;
588         else
589                 return -1;
590 }
591
592 static u32
593 lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
594 {
595         switch (GRAPHICS_VER(engine->i915)) {
596         default:
597                 MISSING_CASE(GRAPHICS_VER(engine->i915));
598                 fallthrough;
599         case 12:
600                 return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
601         case 11:
602                 return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
603         case 10:
604                 return GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
605         case 9:
606                 return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
607         case 8:
608                 return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
609         }
610 }
611
612 static void
613 lrc_setup_indirect_ctx(u32 *regs,
614                        const struct intel_engine_cs *engine,
615                        u32 ctx_bb_ggtt_addr,
616                        u32 size)
617 {
618         GEM_BUG_ON(!size);
619         GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES));
620         GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1);
621         regs[lrc_ring_indirect_ptr(engine) + 1] =
622                 ctx_bb_ggtt_addr | (size / CACHELINE_BYTES);
623
624         GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1);
625         regs[lrc_ring_indirect_offset(engine) + 1] =
626                 lrc_ring_indirect_offset_default(engine) << 6;
627 }
628
629 static void init_common_regs(u32 * const regs,
630                              const struct intel_context *ce,
631                              const struct intel_engine_cs *engine,
632                              bool inhibit)
633 {
634         u32 ctl;
635
636         ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
637         ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
638         if (inhibit)
639                 ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
640         if (GRAPHICS_VER(engine->i915) < 11)
641                 ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
642                                            CTX_CTRL_RS_CTX_ENABLE);
643         regs[CTX_CONTEXT_CONTROL] = ctl;
644
645         regs[CTX_TIMESTAMP] = ce->runtime.last;
646 }
647
648 static void init_wa_bb_regs(u32 * const regs,
649                             const struct intel_engine_cs *engine)
650 {
651         const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
652
653         if (wa_ctx->per_ctx.size) {
654                 const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
655
656                 GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
657                 regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
658                         (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
659         }
660
661         if (wa_ctx->indirect_ctx.size) {
662                 lrc_setup_indirect_ctx(regs, engine,
663                                        i915_ggtt_offset(wa_ctx->vma) +
664                                        wa_ctx->indirect_ctx.offset,
665                                        wa_ctx->indirect_ctx.size);
666         }
667 }
668
669 static void init_ppgtt_regs(u32 *regs, const struct i915_ppgtt *ppgtt)
670 {
671         if (i915_vm_is_4lvl(&ppgtt->vm)) {
672                 /* 64b PPGTT (48bit canonical)
673                  * PDP0_DESCRIPTOR contains the base address to PML4 and
674                  * other PDP Descriptors are ignored.
675                  */
676                 ASSIGN_CTX_PML4(ppgtt, regs);
677         } else {
678                 ASSIGN_CTX_PDP(ppgtt, regs, 3);
679                 ASSIGN_CTX_PDP(ppgtt, regs, 2);
680                 ASSIGN_CTX_PDP(ppgtt, regs, 1);
681                 ASSIGN_CTX_PDP(ppgtt, regs, 0);
682         }
683 }
684
685 static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
686 {
687         if (i915_is_ggtt(vm))
688                 return i915_vm_to_ggtt(vm)->alias;
689         else
690                 return i915_vm_to_ppgtt(vm);
691 }
692
693 static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
694 {
695         int x;
696
697         x = lrc_ring_mi_mode(engine);
698         if (x != -1) {
699                 regs[x + 1] &= ~STOP_RING;
700                 regs[x + 1] |= STOP_RING << 16;
701         }
702 }
703
704 static void __lrc_init_regs(u32 *regs,
705                             const struct intel_context *ce,
706                             const struct intel_engine_cs *engine,
707                             bool inhibit)
708 {
709         /*
710          * A context is actually a big batch buffer with several
711          * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
712          * values we are setting here are only for the first context restore:
713          * on a subsequent save, the GPU will recreate this batchbuffer with new
714          * values (including all the missing MI_LOAD_REGISTER_IMM commands that
715          * we are not initializing here).
716          *
717          * Must keep consistent with virtual_update_register_offsets().
718          */
719
720         if (inhibit)
721                 memset(regs, 0, PAGE_SIZE);
722
723         set_offsets(regs, reg_offsets(engine), engine, inhibit);
724
725         init_common_regs(regs, ce, engine, inhibit);
726         init_ppgtt_regs(regs, vm_alias(ce->vm));
727
728         init_wa_bb_regs(regs, engine);
729
730         __reset_stop_ring(regs, engine);
731 }
732
733 void lrc_init_regs(const struct intel_context *ce,
734                    const struct intel_engine_cs *engine,
735                    bool inhibit)
736 {
737         __lrc_init_regs(ce->lrc_reg_state, ce, engine, inhibit);
738 }
739
740 void lrc_reset_regs(const struct intel_context *ce,
741                     const struct intel_engine_cs *engine)
742 {
743         __reset_stop_ring(ce->lrc_reg_state, engine);
744 }
745
746 static void
747 set_redzone(void *vaddr, const struct intel_engine_cs *engine)
748 {
749         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
750                 return;
751
752         vaddr += engine->context_size;
753
754         memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
755 }
756
757 static void
758 check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
759 {
760         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
761                 return;
762
763         vaddr += engine->context_size;
764
765         if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
766                 drm_err_once(&engine->i915->drm,
767                              "%s context redzone overwritten!\n",
768                              engine->name);
769 }
770
771 void lrc_init_state(struct intel_context *ce,
772                     struct intel_engine_cs *engine,
773                     void *state)
774 {
775         bool inhibit = true;
776
777         set_redzone(state, engine);
778
779         if (engine->default_state) {
780                 shmem_read(engine->default_state, 0,
781                            state, engine->context_size);
782                 __set_bit(CONTEXT_VALID_BIT, &ce->flags);
783                 inhibit = false;
784         }
785
786         /* Clear the ppHWSP (inc. per-context counters) */
787         memset(state, 0, PAGE_SIZE);
788
789         /*
790          * The second page of the context object contains some registers which
791          * must be set up prior to the first execution.
792          */
793         __lrc_init_regs(state + LRC_STATE_OFFSET, ce, engine, inhibit);
794 }
795
796 static struct i915_vma *
797 __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
798 {
799         struct drm_i915_gem_object *obj;
800         struct i915_vma *vma;
801         u32 context_size;
802
803         context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
804
805         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
806                 context_size += I915_GTT_PAGE_SIZE; /* for redzone */
807
808         if (GRAPHICS_VER(engine->i915) == 12) {
809                 ce->wa_bb_page = context_size / PAGE_SIZE;
810                 context_size += PAGE_SIZE;
811         }
812
813         obj = i915_gem_object_create_lmem(engine->i915, context_size, 0);
814         if (IS_ERR(obj))
815                 obj = i915_gem_object_create_shmem(engine->i915, context_size);
816         if (IS_ERR(obj))
817                 return ERR_CAST(obj);
818
819         vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
820         if (IS_ERR(vma)) {
821                 i915_gem_object_put(obj);
822                 return vma;
823         }
824
825         return vma;
826 }
827
828 static struct intel_timeline *
829 pinned_timeline(struct intel_context *ce, struct intel_engine_cs *engine)
830 {
831         struct intel_timeline *tl = fetch_and_zero(&ce->timeline);
832
833         return intel_timeline_create_from_engine(engine, page_unmask_bits(tl));
834 }
835
836 int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine)
837 {
838         struct intel_ring *ring;
839         struct i915_vma *vma;
840         int err;
841
842         GEM_BUG_ON(ce->state);
843
844         vma = __lrc_alloc_state(ce, engine);
845         if (IS_ERR(vma))
846                 return PTR_ERR(vma);
847
848         ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
849         if (IS_ERR(ring)) {
850                 err = PTR_ERR(ring);
851                 goto err_vma;
852         }
853
854         if (!page_mask_bits(ce->timeline)) {
855                 struct intel_timeline *tl;
856
857                 /*
858                  * Use the static global HWSP for the kernel context, and
859                  * a dynamically allocated cacheline for everyone else.
860                  */
861                 if (unlikely(ce->timeline))
862                         tl = pinned_timeline(ce, engine);
863                 else
864                         tl = intel_timeline_create(engine->gt);
865                 if (IS_ERR(tl)) {
866                         err = PTR_ERR(tl);
867                         goto err_ring;
868                 }
869
870                 ce->timeline = tl;
871         }
872
873         ce->ring = ring;
874         ce->state = vma;
875
876         return 0;
877
878 err_ring:
879         intel_ring_put(ring);
880 err_vma:
881         i915_vma_put(vma);
882         return err;
883 }
884
885 void lrc_reset(struct intel_context *ce)
886 {
887         GEM_BUG_ON(!intel_context_is_pinned(ce));
888
889         intel_ring_reset(ce->ring, ce->ring->emit);
890
891         /* Scrub away the garbage */
892         lrc_init_regs(ce, ce->engine, true);
893         ce->lrc.lrca = lrc_update_regs(ce, ce->engine, ce->ring->tail);
894 }
895
896 int
897 lrc_pre_pin(struct intel_context *ce,
898             struct intel_engine_cs *engine,
899             struct i915_gem_ww_ctx *ww,
900             void **vaddr)
901 {
902         GEM_BUG_ON(!ce->state);
903         GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
904
905         *vaddr = i915_gem_object_pin_map(ce->state->obj,
906                                          i915_coherent_map_type(ce->engine->i915,
907                                                                 ce->state->obj,
908                                                                 false) |
909                                          I915_MAP_OVERRIDE);
910
911         return PTR_ERR_OR_ZERO(*vaddr);
912 }
913
914 int
915 lrc_pin(struct intel_context *ce,
916         struct intel_engine_cs *engine,
917         void *vaddr)
918 {
919         ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
920
921         if (!__test_and_set_bit(CONTEXT_INIT_BIT, &ce->flags))
922                 lrc_init_state(ce, engine, vaddr);
923
924         ce->lrc.lrca = lrc_update_regs(ce, engine, ce->ring->tail);
925         return 0;
926 }
927
928 void lrc_unpin(struct intel_context *ce)
929 {
930         check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
931                       ce->engine);
932 }
933
934 void lrc_post_unpin(struct intel_context *ce)
935 {
936         i915_gem_object_unpin_map(ce->state->obj);
937 }
938
939 void lrc_fini(struct intel_context *ce)
940 {
941         if (!ce->state)
942                 return;
943
944         intel_ring_put(fetch_and_zero(&ce->ring));
945         i915_vma_put(fetch_and_zero(&ce->state));
946 }
947
948 void lrc_destroy(struct kref *kref)
949 {
950         struct intel_context *ce = container_of(kref, typeof(*ce), ref);
951
952         GEM_BUG_ON(!i915_active_is_idle(&ce->active));
953         GEM_BUG_ON(intel_context_is_pinned(ce));
954
955         lrc_fini(ce);
956
957         intel_context_fini(ce);
958         intel_context_free(ce);
959 }
960
961 static u32 *
962 gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs)
963 {
964         *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
965                 MI_SRM_LRM_GLOBAL_GTT |
966                 MI_LRI_LRM_CS_MMIO;
967         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
968         *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
969                 CTX_TIMESTAMP * sizeof(u32);
970         *cs++ = 0;
971
972         *cs++ = MI_LOAD_REGISTER_REG |
973                 MI_LRR_SOURCE_CS_MMIO |
974                 MI_LRI_LRM_CS_MMIO;
975         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
976         *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
977
978         *cs++ = MI_LOAD_REGISTER_REG |
979                 MI_LRR_SOURCE_CS_MMIO |
980                 MI_LRI_LRM_CS_MMIO;
981         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
982         *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
983
984         return cs;
985 }
986
987 static u32 *
988 gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs)
989 {
990         GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1);
991
992         *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
993                 MI_SRM_LRM_GLOBAL_GTT |
994                 MI_LRI_LRM_CS_MMIO;
995         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
996         *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
997                 (lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32);
998         *cs++ = 0;
999
1000         return cs;
1001 }
1002
1003 static u32 *
1004 gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
1005 {
1006         GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1);
1007
1008         *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
1009                 MI_SRM_LRM_GLOBAL_GTT |
1010                 MI_LRI_LRM_CS_MMIO;
1011         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1012         *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
1013                 (lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32);
1014         *cs++ = 0;
1015
1016         *cs++ = MI_LOAD_REGISTER_REG |
1017                 MI_LRR_SOURCE_CS_MMIO |
1018                 MI_LRI_LRM_CS_MMIO;
1019         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1020         *cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0));
1021
1022         return cs;
1023 }
1024
1025 static u32 *
1026 gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
1027 {
1028         cs = gen12_emit_timestamp_wa(ce, cs);
1029         cs = gen12_emit_cmd_buf_wa(ce, cs);
1030         cs = gen12_emit_restore_scratch(ce, cs);
1031
1032         return cs;
1033 }
1034
1035 static u32 *
1036 gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
1037 {
1038         cs = gen12_emit_timestamp_wa(ce, cs);
1039         cs = gen12_emit_restore_scratch(ce, cs);
1040
1041         return cs;
1042 }
1043
1044 static u32 context_wa_bb_offset(const struct intel_context *ce)
1045 {
1046         return PAGE_SIZE * ce->wa_bb_page;
1047 }
1048
1049 static u32 *context_indirect_bb(const struct intel_context *ce)
1050 {
1051         void *ptr;
1052
1053         GEM_BUG_ON(!ce->wa_bb_page);
1054
1055         ptr = ce->lrc_reg_state;
1056         ptr -= LRC_STATE_OFFSET; /* back to start of context image */
1057         ptr += context_wa_bb_offset(ce);
1058
1059         return ptr;
1060 }
1061
1062 static void
1063 setup_indirect_ctx_bb(const struct intel_context *ce,
1064                       const struct intel_engine_cs *engine,
1065                       u32 *(*emit)(const struct intel_context *, u32 *))
1066 {
1067         u32 * const start = context_indirect_bb(ce);
1068         u32 *cs;
1069
1070         cs = emit(ce, start);
1071         GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
1072         while ((unsigned long)cs % CACHELINE_BYTES)
1073                 *cs++ = MI_NOOP;
1074
1075         lrc_setup_indirect_ctx(ce->lrc_reg_state, engine,
1076                                i915_ggtt_offset(ce->state) +
1077                                context_wa_bb_offset(ce),
1078                                (cs - start) * sizeof(*cs));
1079 }
1080
1081 /*
1082  * The context descriptor encodes various attributes of a context,
1083  * including its GTT address and some flags. Because it's fairly
1084  * expensive to calculate, we'll just do it once and cache the result,
1085  * which remains valid until the context is unpinned.
1086  *
1087  * This is what a descriptor looks like, from LSB to MSB::
1088  *
1089  *      bits  0-11:    flags, GEN8_CTX_* (cached in ctx->desc_template)
1090  *      bits 12-31:    LRCA, GTT address of (the HWSP of) this context
1091  *      bits 32-52:    ctx ID, a globally unique tag (highest bit used by GuC)
1092  *      bits 53-54:    mbz, reserved for use by hardware
1093  *      bits 55-63:    group ID, currently unused and set to 0
1094  *
1095  * Starting from Gen11, the upper dword of the descriptor has a new format:
1096  *
1097  *      bits 32-36:    reserved
1098  *      bits 37-47:    SW context ID
1099  *      bits 48:53:    engine instance
1100  *      bit 54:        mbz, reserved for use by hardware
1101  *      bits 55-60:    SW counter
1102  *      bits 61-63:    engine class
1103  *
1104  * engine info, SW context ID and SW counter need to form a unique number
1105  * (Context ID) per lrc.
1106  */
1107 static u32 lrc_descriptor(const struct intel_context *ce)
1108 {
1109         u32 desc;
1110
1111         desc = INTEL_LEGACY_32B_CONTEXT;
1112         if (i915_vm_is_4lvl(ce->vm))
1113                 desc = INTEL_LEGACY_64B_CONTEXT;
1114         desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
1115
1116         desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
1117         if (GRAPHICS_VER(ce->vm->i915) == 8)
1118                 desc |= GEN8_CTX_L3LLC_COHERENT;
1119
1120         return i915_ggtt_offset(ce->state) | desc;
1121 }
1122
1123 u32 lrc_update_regs(const struct intel_context *ce,
1124                     const struct intel_engine_cs *engine,
1125                     u32 head)
1126 {
1127         struct intel_ring *ring = ce->ring;
1128         u32 *regs = ce->lrc_reg_state;
1129
1130         GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
1131         GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
1132
1133         regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1134         regs[CTX_RING_HEAD] = head;
1135         regs[CTX_RING_TAIL] = ring->tail;
1136         regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1137
1138         /* RPCS */
1139         if (engine->class == RENDER_CLASS) {
1140                 regs[CTX_R_PWR_CLK_STATE] =
1141                         intel_sseu_make_rpcs(engine->gt, &ce->sseu);
1142
1143                 i915_oa_init_reg_state(ce, engine);
1144         }
1145
1146         if (ce->wa_bb_page) {
1147                 u32 *(*fn)(const struct intel_context *ce, u32 *cs);
1148
1149                 fn = gen12_emit_indirect_ctx_xcs;
1150                 if (ce->engine->class == RENDER_CLASS)
1151                         fn = gen12_emit_indirect_ctx_rcs;
1152
1153                 /* Mutually exclusive wrt to global indirect bb */
1154                 GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
1155                 setup_indirect_ctx_bb(ce, engine, fn);
1156         }
1157
1158         return lrc_descriptor(ce) | CTX_DESC_FORCE_RESTORE;
1159 }
1160
1161 void lrc_update_offsets(struct intel_context *ce,
1162                         struct intel_engine_cs *engine)
1163 {
1164         set_offsets(ce->lrc_reg_state, reg_offsets(engine), engine, false);
1165 }
1166
1167 void lrc_check_regs(const struct intel_context *ce,
1168                     const struct intel_engine_cs *engine,
1169                     const char *when)
1170 {
1171         const struct intel_ring *ring = ce->ring;
1172         u32 *regs = ce->lrc_reg_state;
1173         bool valid = true;
1174         int x;
1175
1176         if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
1177                 pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
1178                        engine->name,
1179                        regs[CTX_RING_START],
1180                        i915_ggtt_offset(ring->vma));
1181                 regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1182                 valid = false;
1183         }
1184
1185         if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
1186             (RING_CTL_SIZE(ring->size) | RING_VALID)) {
1187                 pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
1188                        engine->name,
1189                        regs[CTX_RING_CTL],
1190                        (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
1191                 regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1192                 valid = false;
1193         }
1194
1195         x = lrc_ring_mi_mode(engine);
1196         if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
1197                 pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
1198                        engine->name, regs[x + 1]);
1199                 regs[x + 1] &= ~STOP_RING;
1200                 regs[x + 1] |= STOP_RING << 16;
1201                 valid = false;
1202         }
1203
1204         WARN_ONCE(!valid, "Invalid lrc state found %s submission\n", when);
1205 }
1206
1207 /*
1208  * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
1209  * PIPE_CONTROL instruction. This is required for the flush to happen correctly
1210  * but there is a slight complication as this is applied in WA batch where the
1211  * values are only initialized once so we cannot take register value at the
1212  * beginning and reuse it further; hence we save its value to memory, upload a
1213  * constant value with bit21 set and then we restore it back with the saved value.
1214  * To simplify the WA, a constant value is formed by using the default value
1215  * of this register. This shouldn't be a problem because we are only modifying
1216  * it for a short period and this batch in non-premptible. We can ofcourse
1217  * use additional instructions that read the actual value of the register
1218  * at that time and set our bit of interest but it makes the WA complicated.
1219  *
1220  * This WA is also required for Gen9 so extracting as a function avoids
1221  * code duplication.
1222  */
1223 static u32 *
1224 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
1225 {
1226         /* NB no one else is allowed to scribble over scratch + 256! */
1227         *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
1228         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1229         *batch++ = intel_gt_scratch_offset(engine->gt,
1230                                            INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
1231         *batch++ = 0;
1232
1233         *batch++ = MI_LOAD_REGISTER_IMM(1);
1234         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1235         *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
1236
1237         batch = gen8_emit_pipe_control(batch,
1238                                        PIPE_CONTROL_CS_STALL |
1239                                        PIPE_CONTROL_DC_FLUSH_ENABLE,
1240                                        0);
1241
1242         *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
1243         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1244         *batch++ = intel_gt_scratch_offset(engine->gt,
1245                                            INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
1246         *batch++ = 0;
1247
1248         return batch;
1249 }
1250
1251 /*
1252  * Typically we only have one indirect_ctx and per_ctx batch buffer which are
1253  * initialized at the beginning and shared across all contexts but this field
1254  * helps us to have multiple batches at different offsets and select them based
1255  * on a criteria. At the moment this batch always start at the beginning of the page
1256  * and at this point we don't have multiple wa_ctx batch buffers.
1257  *
1258  * The number of WA applied are not known at the beginning; we use this field
1259  * to return the no of DWORDS written.
1260  *
1261  * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
1262  * so it adds NOOPs as padding to make it cacheline aligned.
1263  * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
1264  * makes a complete batch buffer.
1265  */
1266 static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1267 {
1268         /* WaDisableCtxRestoreArbitration:bdw,chv */
1269         *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1270
1271         /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
1272         if (IS_BROADWELL(engine->i915))
1273                 batch = gen8_emit_flush_coherentl3_wa(engine, batch);
1274
1275         /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
1276         /* Actual scratch location is at 128 bytes offset */
1277         batch = gen8_emit_pipe_control(batch,
1278                                        PIPE_CONTROL_FLUSH_L3 |
1279                                        PIPE_CONTROL_STORE_DATA_INDEX |
1280                                        PIPE_CONTROL_CS_STALL |
1281                                        PIPE_CONTROL_QW_WRITE,
1282                                        LRC_PPHWSP_SCRATCH_ADDR);
1283
1284         *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1285
1286         /* Pad to end of cacheline */
1287         while ((unsigned long)batch % CACHELINE_BYTES)
1288                 *batch++ = MI_NOOP;
1289
1290         /*
1291          * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
1292          * execution depends on the length specified in terms of cache lines
1293          * in the register CTX_RCS_INDIRECT_CTX
1294          */
1295
1296         return batch;
1297 }
1298
1299 struct lri {
1300         i915_reg_t reg;
1301         u32 value;
1302 };
1303
1304 static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
1305 {
1306         GEM_BUG_ON(!count || count > 63);
1307
1308         *batch++ = MI_LOAD_REGISTER_IMM(count);
1309         do {
1310                 *batch++ = i915_mmio_reg_offset(lri->reg);
1311                 *batch++ = lri->value;
1312         } while (lri++, --count);
1313         *batch++ = MI_NOOP;
1314
1315         return batch;
1316 }
1317
1318 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1319 {
1320         static const struct lri lri[] = {
1321                 /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
1322                 {
1323                         COMMON_SLICE_CHICKEN2,
1324                         __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
1325                                        0),
1326                 },
1327
1328                 /* BSpec: 11391 */
1329                 {
1330                         FF_SLICE_CHICKEN,
1331                         __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
1332                                        FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
1333                 },
1334
1335                 /* BSpec: 11299 */
1336                 {
1337                         _3D_CHICKEN3,
1338                         __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
1339                                        _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
1340                 }
1341         };
1342
1343         *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1344
1345         /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
1346         batch = gen8_emit_flush_coherentl3_wa(engine, batch);
1347
1348         /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
1349         batch = gen8_emit_pipe_control(batch,
1350                                        PIPE_CONTROL_FLUSH_L3 |
1351                                        PIPE_CONTROL_STORE_DATA_INDEX |
1352                                        PIPE_CONTROL_CS_STALL |
1353                                        PIPE_CONTROL_QW_WRITE,
1354                                        LRC_PPHWSP_SCRATCH_ADDR);
1355
1356         batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
1357
1358         /* WaMediaPoolStateCmdInWABB:bxt,glk */
1359         if (HAS_POOLED_EU(engine->i915)) {
1360                 /*
1361                  * EU pool configuration is setup along with golden context
1362                  * during context initialization. This value depends on
1363                  * device type (2x6 or 3x6) and needs to be updated based
1364                  * on which subslice is disabled especially for 2x6
1365                  * devices, however it is safe to load default
1366                  * configuration of 3x6 device instead of masking off
1367                  * corresponding bits because HW ignores bits of a disabled
1368                  * subslice and drops down to appropriate config. Please
1369                  * see render_state_setup() in i915_gem_render_state.c for
1370                  * possible configurations, to avoid duplication they are
1371                  * not shown here again.
1372                  */
1373                 *batch++ = GEN9_MEDIA_POOL_STATE;
1374                 *batch++ = GEN9_MEDIA_POOL_ENABLE;
1375                 *batch++ = 0x00777000;
1376                 *batch++ = 0;
1377                 *batch++ = 0;
1378                 *batch++ = 0;
1379         }
1380
1381         *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1382
1383         /* Pad to end of cacheline */
1384         while ((unsigned long)batch % CACHELINE_BYTES)
1385                 *batch++ = MI_NOOP;
1386
1387         return batch;
1388 }
1389
1390 static u32 *
1391 gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1392 {
1393         int i;
1394
1395         /*
1396          * WaPipeControlBefore3DStateSamplePattern: cnl
1397          *
1398          * Ensure the engine is idle prior to programming a
1399          * 3DSTATE_SAMPLE_PATTERN during a context restore.
1400          */
1401         batch = gen8_emit_pipe_control(batch,
1402                                        PIPE_CONTROL_CS_STALL,
1403                                        0);
1404         /*
1405          * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
1406          * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
1407          * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
1408          * confusing. Since gen8_emit_pipe_control() already advances the
1409          * batch by 6 dwords, we advance the other 10 here, completing a
1410          * cacheline. It's not clear if the workaround requires this padding
1411          * before other commands, or if it's just the regular padding we would
1412          * already have for the workaround bb, so leave it here for now.
1413          */
1414         for (i = 0; i < 10; i++)
1415                 *batch++ = MI_NOOP;
1416
1417         /* Pad to end of cacheline */
1418         while ((unsigned long)batch % CACHELINE_BYTES)
1419                 *batch++ = MI_NOOP;
1420
1421         return batch;
1422 }
1423
1424 #define CTX_WA_BB_SIZE (PAGE_SIZE)
1425
1426 static int lrc_create_wa_ctx(struct intel_engine_cs *engine)
1427 {
1428         struct drm_i915_gem_object *obj;
1429         struct i915_vma *vma;
1430         int err;
1431
1432         obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_SIZE);
1433         if (IS_ERR(obj))
1434                 return PTR_ERR(obj);
1435
1436         vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
1437         if (IS_ERR(vma)) {
1438                 err = PTR_ERR(vma);
1439                 goto err;
1440         }
1441
1442         engine->wa_ctx.vma = vma;
1443         return 0;
1444
1445 err:
1446         i915_gem_object_put(obj);
1447         return err;
1448 }
1449
1450 void lrc_fini_wa_ctx(struct intel_engine_cs *engine)
1451 {
1452         i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
1453 }
1454
1455 typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
1456
1457 void lrc_init_wa_ctx(struct intel_engine_cs *engine)
1458 {
1459         struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
1460         struct i915_wa_ctx_bb *wa_bb[] = {
1461                 &wa_ctx->indirect_ctx, &wa_ctx->per_ctx
1462         };
1463         wa_bb_func_t wa_bb_fn[ARRAY_SIZE(wa_bb)];
1464         struct i915_gem_ww_ctx ww;
1465         void *batch, *batch_ptr;
1466         unsigned int i;
1467         int err;
1468
1469         if (engine->class != RENDER_CLASS)
1470                 return;
1471
1472         switch (GRAPHICS_VER(engine->i915)) {
1473         case 12:
1474         case 11:
1475                 return;
1476         case 10:
1477                 wa_bb_fn[0] = gen10_init_indirectctx_bb;
1478                 wa_bb_fn[1] = NULL;
1479                 break;
1480         case 9:
1481                 wa_bb_fn[0] = gen9_init_indirectctx_bb;
1482                 wa_bb_fn[1] = NULL;
1483                 break;
1484         case 8:
1485                 wa_bb_fn[0] = gen8_init_indirectctx_bb;
1486                 wa_bb_fn[1] = NULL;
1487                 break;
1488         default:
1489                 MISSING_CASE(GRAPHICS_VER(engine->i915));
1490                 return;
1491         }
1492
1493         err = lrc_create_wa_ctx(engine);
1494         if (err) {
1495                 /*
1496                  * We continue even if we fail to initialize WA batch
1497                  * because we only expect rare glitches but nothing
1498                  * critical to prevent us from using GPU
1499                  */
1500                 drm_err(&engine->i915->drm,
1501                         "Ignoring context switch w/a allocation error:%d\n",
1502                         err);
1503                 return;
1504         }
1505
1506         if (!engine->wa_ctx.vma)
1507                 return;
1508
1509         i915_gem_ww_ctx_init(&ww, true);
1510 retry:
1511         err = i915_gem_object_lock(wa_ctx->vma->obj, &ww);
1512         if (!err)
1513                 err = i915_ggtt_pin(wa_ctx->vma, &ww, 0, PIN_HIGH);
1514         if (err)
1515                 goto err;
1516
1517         batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB);
1518         if (IS_ERR(batch)) {
1519                 err = PTR_ERR(batch);
1520                 goto err_unpin;
1521         }
1522
1523         /*
1524          * Emit the two workaround batch buffers, recording the offset from the
1525          * start of the workaround batch buffer object for each and their
1526          * respective sizes.
1527          */
1528         batch_ptr = batch;
1529         for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
1530                 wa_bb[i]->offset = batch_ptr - batch;
1531                 if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
1532                                                   CACHELINE_BYTES))) {
1533                         err = -EINVAL;
1534                         break;
1535                 }
1536                 if (wa_bb_fn[i])
1537                         batch_ptr = wa_bb_fn[i](engine, batch_ptr);
1538                 wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
1539         }
1540         GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_SIZE);
1541
1542         __i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch);
1543         __i915_gem_object_release_map(wa_ctx->vma->obj);
1544
1545         /* Verify that we can handle failure to setup the wa_ctx */
1546         if (!err)
1547                 err = i915_inject_probe_error(engine->i915, -ENODEV);
1548
1549 err_unpin:
1550         if (err)
1551                 i915_vma_unpin(wa_ctx->vma);
1552 err:
1553         if (err == -EDEADLK) {
1554                 err = i915_gem_ww_ctx_backoff(&ww);
1555                 if (!err)
1556                         goto retry;
1557         }
1558         i915_gem_ww_ctx_fini(&ww);
1559
1560         if (err) {
1561                 i915_vma_put(engine->wa_ctx.vma);
1562
1563                 /* Clear all flags to prevent further use */
1564                 memset(wa_ctx, 0, sizeof(*wa_ctx));
1565         }
1566 }
1567
1568 static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
1569 {
1570 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1571         ce->runtime.num_underflow++;
1572         ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt);
1573 #endif
1574 }
1575
1576 void lrc_update_runtime(struct intel_context *ce)
1577 {
1578         u32 old;
1579         s32 dt;
1580
1581         if (intel_context_is_barrier(ce))
1582                 return;
1583
1584         old = ce->runtime.last;
1585         ce->runtime.last = lrc_get_runtime(ce);
1586         dt = ce->runtime.last - old;
1587
1588         if (unlikely(dt < 0)) {
1589                 CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
1590                          old, ce->runtime.last, dt);
1591                 st_update_runtime_underflow(ce, dt);
1592                 return;
1593         }
1594
1595         ewma_runtime_add(&ce->runtime.avg, dt);
1596         ce->runtime.total += dt;
1597 }
1598
1599 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1600 #include "selftest_lrc.c"
1601 #endif