drm/i915/gt: Track all timelines created using the HWSP
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / gt / intel_lrc.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2014 Intel Corporation
4  */
5
6 #include "gen8_engine_cs.h"
7 #include "i915_drv.h"
8 #include "i915_perf.h"
9 #include "intel_engine.h"
10 #include "intel_gpu_commands.h"
11 #include "intel_gt.h"
12 #include "intel_lrc.h"
13 #include "intel_lrc_reg.h"
14 #include "intel_ring.h"
15 #include "shmem_utils.h"
16
17 static inline unsigned int dword_in_page(void *addr)
18 {
19         return offset_in_page(addr) / sizeof(u32);
20 }
21
22 static void set_offsets(u32 *regs,
23                         const u8 *data,
24                         const struct intel_engine_cs *engine,
25                         bool close)
26 #define NOP(x) (BIT(7) | (x))
27 #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
28 #define POSTED BIT(0)
29 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
30 #define REG16(x) \
31         (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
32         (((x) >> 2) & 0x7f)
33 #define END 0
34 {
35         const u32 base = engine->mmio_base;
36
37         while (*data) {
38                 u8 count, flags;
39
40                 if (*data & BIT(7)) { /* skip */
41                         count = *data++ & ~BIT(7);
42                         regs += count;
43                         continue;
44                 }
45
46                 count = *data & 0x3f;
47                 flags = *data >> 6;
48                 data++;
49
50                 *regs = MI_LOAD_REGISTER_IMM(count);
51                 if (flags & POSTED)
52                         *regs |= MI_LRI_FORCE_POSTED;
53                 if (INTEL_GEN(engine->i915) >= 11)
54                         *regs |= MI_LRI_LRM_CS_MMIO;
55                 regs++;
56
57                 GEM_BUG_ON(!count);
58                 do {
59                         u32 offset = 0;
60                         u8 v;
61
62                         do {
63                                 v = *data++;
64                                 offset <<= 7;
65                                 offset |= v & ~BIT(7);
66                         } while (v & BIT(7));
67
68                         regs[0] = base + (offset << 2);
69                         regs += 2;
70                 } while (--count);
71         }
72
73         if (close) {
74                 /* Close the batch; used mainly by live_lrc_layout() */
75                 *regs = MI_BATCH_BUFFER_END;
76                 if (INTEL_GEN(engine->i915) >= 10)
77                         *regs |= BIT(0);
78         }
79 }
80
81 static const u8 gen8_xcs_offsets[] = {
82         NOP(1),
83         LRI(11, 0),
84         REG16(0x244),
85         REG(0x034),
86         REG(0x030),
87         REG(0x038),
88         REG(0x03c),
89         REG(0x168),
90         REG(0x140),
91         REG(0x110),
92         REG(0x11c),
93         REG(0x114),
94         REG(0x118),
95
96         NOP(9),
97         LRI(9, 0),
98         REG16(0x3a8),
99         REG16(0x28c),
100         REG16(0x288),
101         REG16(0x284),
102         REG16(0x280),
103         REG16(0x27c),
104         REG16(0x278),
105         REG16(0x274),
106         REG16(0x270),
107
108         NOP(13),
109         LRI(2, 0),
110         REG16(0x200),
111         REG(0x028),
112
113         END
114 };
115
116 static const u8 gen9_xcs_offsets[] = {
117         NOP(1),
118         LRI(14, POSTED),
119         REG16(0x244),
120         REG(0x034),
121         REG(0x030),
122         REG(0x038),
123         REG(0x03c),
124         REG(0x168),
125         REG(0x140),
126         REG(0x110),
127         REG(0x11c),
128         REG(0x114),
129         REG(0x118),
130         REG(0x1c0),
131         REG(0x1c4),
132         REG(0x1c8),
133
134         NOP(3),
135         LRI(9, POSTED),
136         REG16(0x3a8),
137         REG16(0x28c),
138         REG16(0x288),
139         REG16(0x284),
140         REG16(0x280),
141         REG16(0x27c),
142         REG16(0x278),
143         REG16(0x274),
144         REG16(0x270),
145
146         NOP(13),
147         LRI(1, POSTED),
148         REG16(0x200),
149
150         NOP(13),
151         LRI(44, POSTED),
152         REG(0x028),
153         REG(0x09c),
154         REG(0x0c0),
155         REG(0x178),
156         REG(0x17c),
157         REG16(0x358),
158         REG(0x170),
159         REG(0x150),
160         REG(0x154),
161         REG(0x158),
162         REG16(0x41c),
163         REG16(0x600),
164         REG16(0x604),
165         REG16(0x608),
166         REG16(0x60c),
167         REG16(0x610),
168         REG16(0x614),
169         REG16(0x618),
170         REG16(0x61c),
171         REG16(0x620),
172         REG16(0x624),
173         REG16(0x628),
174         REG16(0x62c),
175         REG16(0x630),
176         REG16(0x634),
177         REG16(0x638),
178         REG16(0x63c),
179         REG16(0x640),
180         REG16(0x644),
181         REG16(0x648),
182         REG16(0x64c),
183         REG16(0x650),
184         REG16(0x654),
185         REG16(0x658),
186         REG16(0x65c),
187         REG16(0x660),
188         REG16(0x664),
189         REG16(0x668),
190         REG16(0x66c),
191         REG16(0x670),
192         REG16(0x674),
193         REG16(0x678),
194         REG16(0x67c),
195         REG(0x068),
196
197         END
198 };
199
200 static const u8 gen12_xcs_offsets[] = {
201         NOP(1),
202         LRI(13, POSTED),
203         REG16(0x244),
204         REG(0x034),
205         REG(0x030),
206         REG(0x038),
207         REG(0x03c),
208         REG(0x168),
209         REG(0x140),
210         REG(0x110),
211         REG(0x1c0),
212         REG(0x1c4),
213         REG(0x1c8),
214         REG(0x180),
215         REG16(0x2b4),
216
217         NOP(5),
218         LRI(9, POSTED),
219         REG16(0x3a8),
220         REG16(0x28c),
221         REG16(0x288),
222         REG16(0x284),
223         REG16(0x280),
224         REG16(0x27c),
225         REG16(0x278),
226         REG16(0x274),
227         REG16(0x270),
228
229         END
230 };
231
232 static const u8 gen8_rcs_offsets[] = {
233         NOP(1),
234         LRI(14, POSTED),
235         REG16(0x244),
236         REG(0x034),
237         REG(0x030),
238         REG(0x038),
239         REG(0x03c),
240         REG(0x168),
241         REG(0x140),
242         REG(0x110),
243         REG(0x11c),
244         REG(0x114),
245         REG(0x118),
246         REG(0x1c0),
247         REG(0x1c4),
248         REG(0x1c8),
249
250         NOP(3),
251         LRI(9, POSTED),
252         REG16(0x3a8),
253         REG16(0x28c),
254         REG16(0x288),
255         REG16(0x284),
256         REG16(0x280),
257         REG16(0x27c),
258         REG16(0x278),
259         REG16(0x274),
260         REG16(0x270),
261
262         NOP(13),
263         LRI(1, 0),
264         REG(0x0c8),
265
266         END
267 };
268
269 static const u8 gen9_rcs_offsets[] = {
270         NOP(1),
271         LRI(14, POSTED),
272         REG16(0x244),
273         REG(0x34),
274         REG(0x30),
275         REG(0x38),
276         REG(0x3c),
277         REG(0x168),
278         REG(0x140),
279         REG(0x110),
280         REG(0x11c),
281         REG(0x114),
282         REG(0x118),
283         REG(0x1c0),
284         REG(0x1c4),
285         REG(0x1c8),
286
287         NOP(3),
288         LRI(9, POSTED),
289         REG16(0x3a8),
290         REG16(0x28c),
291         REG16(0x288),
292         REG16(0x284),
293         REG16(0x280),
294         REG16(0x27c),
295         REG16(0x278),
296         REG16(0x274),
297         REG16(0x270),
298
299         NOP(13),
300         LRI(1, 0),
301         REG(0xc8),
302
303         NOP(13),
304         LRI(44, POSTED),
305         REG(0x28),
306         REG(0x9c),
307         REG(0xc0),
308         REG(0x178),
309         REG(0x17c),
310         REG16(0x358),
311         REG(0x170),
312         REG(0x150),
313         REG(0x154),
314         REG(0x158),
315         REG16(0x41c),
316         REG16(0x600),
317         REG16(0x604),
318         REG16(0x608),
319         REG16(0x60c),
320         REG16(0x610),
321         REG16(0x614),
322         REG16(0x618),
323         REG16(0x61c),
324         REG16(0x620),
325         REG16(0x624),
326         REG16(0x628),
327         REG16(0x62c),
328         REG16(0x630),
329         REG16(0x634),
330         REG16(0x638),
331         REG16(0x63c),
332         REG16(0x640),
333         REG16(0x644),
334         REG16(0x648),
335         REG16(0x64c),
336         REG16(0x650),
337         REG16(0x654),
338         REG16(0x658),
339         REG16(0x65c),
340         REG16(0x660),
341         REG16(0x664),
342         REG16(0x668),
343         REG16(0x66c),
344         REG16(0x670),
345         REG16(0x674),
346         REG16(0x678),
347         REG16(0x67c),
348         REG(0x68),
349
350         END
351 };
352
353 static const u8 gen11_rcs_offsets[] = {
354         NOP(1),
355         LRI(15, POSTED),
356         REG16(0x244),
357         REG(0x034),
358         REG(0x030),
359         REG(0x038),
360         REG(0x03c),
361         REG(0x168),
362         REG(0x140),
363         REG(0x110),
364         REG(0x11c),
365         REG(0x114),
366         REG(0x118),
367         REG(0x1c0),
368         REG(0x1c4),
369         REG(0x1c8),
370         REG(0x180),
371
372         NOP(1),
373         LRI(9, POSTED),
374         REG16(0x3a8),
375         REG16(0x28c),
376         REG16(0x288),
377         REG16(0x284),
378         REG16(0x280),
379         REG16(0x27c),
380         REG16(0x278),
381         REG16(0x274),
382         REG16(0x270),
383
384         LRI(1, POSTED),
385         REG(0x1b0),
386
387         NOP(10),
388         LRI(1, 0),
389         REG(0x0c8),
390
391         END
392 };
393
394 static const u8 gen12_rcs_offsets[] = {
395         NOP(1),
396         LRI(13, POSTED),
397         REG16(0x244),
398         REG(0x034),
399         REG(0x030),
400         REG(0x038),
401         REG(0x03c),
402         REG(0x168),
403         REG(0x140),
404         REG(0x110),
405         REG(0x1c0),
406         REG(0x1c4),
407         REG(0x1c8),
408         REG(0x180),
409         REG16(0x2b4),
410
411         NOP(5),
412         LRI(9, POSTED),
413         REG16(0x3a8),
414         REG16(0x28c),
415         REG16(0x288),
416         REG16(0x284),
417         REG16(0x280),
418         REG16(0x27c),
419         REG16(0x278),
420         REG16(0x274),
421         REG16(0x270),
422
423         LRI(3, POSTED),
424         REG(0x1b0),
425         REG16(0x5a8),
426         REG16(0x5ac),
427
428         NOP(6),
429         LRI(1, 0),
430         REG(0x0c8),
431         NOP(3 + 9 + 1),
432
433         LRI(51, POSTED),
434         REG16(0x588),
435         REG16(0x588),
436         REG16(0x588),
437         REG16(0x588),
438         REG16(0x588),
439         REG16(0x588),
440         REG(0x028),
441         REG(0x09c),
442         REG(0x0c0),
443         REG(0x178),
444         REG(0x17c),
445         REG16(0x358),
446         REG(0x170),
447         REG(0x150),
448         REG(0x154),
449         REG(0x158),
450         REG16(0x41c),
451         REG16(0x600),
452         REG16(0x604),
453         REG16(0x608),
454         REG16(0x60c),
455         REG16(0x610),
456         REG16(0x614),
457         REG16(0x618),
458         REG16(0x61c),
459         REG16(0x620),
460         REG16(0x624),
461         REG16(0x628),
462         REG16(0x62c),
463         REG16(0x630),
464         REG16(0x634),
465         REG16(0x638),
466         REG16(0x63c),
467         REG16(0x640),
468         REG16(0x644),
469         REG16(0x648),
470         REG16(0x64c),
471         REG16(0x650),
472         REG16(0x654),
473         REG16(0x658),
474         REG16(0x65c),
475         REG16(0x660),
476         REG16(0x664),
477         REG16(0x668),
478         REG16(0x66c),
479         REG16(0x670),
480         REG16(0x674),
481         REG16(0x678),
482         REG16(0x67c),
483         REG(0x068),
484         REG(0x084),
485         NOP(1),
486
487         END
488 };
489
490 #undef END
491 #undef REG16
492 #undef REG
493 #undef LRI
494 #undef NOP
495
496 static const u8 *reg_offsets(const struct intel_engine_cs *engine)
497 {
498         /*
499          * The gen12+ lists only have the registers we program in the basic
500          * default state. We rely on the context image using relative
501          * addressing to automatic fixup the register state between the
502          * physical engines for virtual engine.
503          */
504         GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 &&
505                    !intel_engine_has_relative_mmio(engine));
506
507         if (engine->class == RENDER_CLASS) {
508                 if (INTEL_GEN(engine->i915) >= 12)
509                         return gen12_rcs_offsets;
510                 else if (INTEL_GEN(engine->i915) >= 11)
511                         return gen11_rcs_offsets;
512                 else if (INTEL_GEN(engine->i915) >= 9)
513                         return gen9_rcs_offsets;
514                 else
515                         return gen8_rcs_offsets;
516         } else {
517                 if (INTEL_GEN(engine->i915) >= 12)
518                         return gen12_xcs_offsets;
519                 else if (INTEL_GEN(engine->i915) >= 9)
520                         return gen9_xcs_offsets;
521                 else
522                         return gen8_xcs_offsets;
523         }
524 }
525
526 static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
527 {
528         if (INTEL_GEN(engine->i915) >= 12)
529                 return 0x60;
530         else if (INTEL_GEN(engine->i915) >= 9)
531                 return 0x54;
532         else if (engine->class == RENDER_CLASS)
533                 return 0x58;
534         else
535                 return -1;
536 }
537
538 static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
539 {
540         if (INTEL_GEN(engine->i915) >= 12)
541                 return 0x74;
542         else if (INTEL_GEN(engine->i915) >= 9)
543                 return 0x68;
544         else if (engine->class == RENDER_CLASS)
545                 return 0xd8;
546         else
547                 return -1;
548 }
549
550 static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine)
551 {
552         if (INTEL_GEN(engine->i915) >= 12)
553                 return 0x12;
554         else if (INTEL_GEN(engine->i915) >= 9 || engine->class == RENDER_CLASS)
555                 return 0x18;
556         else
557                 return -1;
558 }
559
560 static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine)
561 {
562         int x;
563
564         x = lrc_ring_wa_bb_per_ctx(engine);
565         if (x < 0)
566                 return x;
567
568         return x + 2;
569 }
570
571 static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
572 {
573         int x;
574
575         x = lrc_ring_indirect_ptr(engine);
576         if (x < 0)
577                 return x;
578
579         return x + 2;
580 }
581
582 static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
583 {
584         if (engine->class != RENDER_CLASS)
585                 return -1;
586
587         if (INTEL_GEN(engine->i915) >= 12)
588                 return 0xb6;
589         else if (INTEL_GEN(engine->i915) >= 11)
590                 return 0xaa;
591         else
592                 return -1;
593 }
594
595 static u32
596 lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
597 {
598         switch (INTEL_GEN(engine->i915)) {
599         default:
600                 MISSING_CASE(INTEL_GEN(engine->i915));
601                 fallthrough;
602         case 12:
603                 return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
604         case 11:
605                 return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
606         case 10:
607                 return GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
608         case 9:
609                 return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
610         case 8:
611                 return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
612         }
613 }
614
615 static void
616 lrc_setup_indirect_ctx(u32 *regs,
617                        const struct intel_engine_cs *engine,
618                        u32 ctx_bb_ggtt_addr,
619                        u32 size)
620 {
621         GEM_BUG_ON(!size);
622         GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES));
623         GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1);
624         regs[lrc_ring_indirect_ptr(engine) + 1] =
625                 ctx_bb_ggtt_addr | (size / CACHELINE_BYTES);
626
627         GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1);
628         regs[lrc_ring_indirect_offset(engine) + 1] =
629                 lrc_ring_indirect_offset_default(engine) << 6;
630 }
631
632 static void init_common_regs(u32 * const regs,
633                              const struct intel_context *ce,
634                              const struct intel_engine_cs *engine,
635                              bool inhibit)
636 {
637         u32 ctl;
638
639         ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
640         ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
641         if (inhibit)
642                 ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
643         if (INTEL_GEN(engine->i915) < 11)
644                 ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
645                                            CTX_CTRL_RS_CTX_ENABLE);
646         regs[CTX_CONTEXT_CONTROL] = ctl;
647
648         regs[CTX_TIMESTAMP] = ce->runtime.last;
649 }
650
651 static void init_wa_bb_regs(u32 * const regs,
652                             const struct intel_engine_cs *engine)
653 {
654         const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
655
656         if (wa_ctx->per_ctx.size) {
657                 const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
658
659                 GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
660                 regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
661                         (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
662         }
663
664         if (wa_ctx->indirect_ctx.size) {
665                 lrc_setup_indirect_ctx(regs, engine,
666                                        i915_ggtt_offset(wa_ctx->vma) +
667                                        wa_ctx->indirect_ctx.offset,
668                                        wa_ctx->indirect_ctx.size);
669         }
670 }
671
672 static void init_ppgtt_regs(u32 *regs, const struct i915_ppgtt *ppgtt)
673 {
674         if (i915_vm_is_4lvl(&ppgtt->vm)) {
675                 /* 64b PPGTT (48bit canonical)
676                  * PDP0_DESCRIPTOR contains the base address to PML4 and
677                  * other PDP Descriptors are ignored.
678                  */
679                 ASSIGN_CTX_PML4(ppgtt, regs);
680         } else {
681                 ASSIGN_CTX_PDP(ppgtt, regs, 3);
682                 ASSIGN_CTX_PDP(ppgtt, regs, 2);
683                 ASSIGN_CTX_PDP(ppgtt, regs, 1);
684                 ASSIGN_CTX_PDP(ppgtt, regs, 0);
685         }
686 }
687
688 static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
689 {
690         if (i915_is_ggtt(vm))
691                 return i915_vm_to_ggtt(vm)->alias;
692         else
693                 return i915_vm_to_ppgtt(vm);
694 }
695
696 static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
697 {
698         int x;
699
700         x = lrc_ring_mi_mode(engine);
701         if (x != -1) {
702                 regs[x + 1] &= ~STOP_RING;
703                 regs[x + 1] |= STOP_RING << 16;
704         }
705 }
706
707 static void __lrc_init_regs(u32 *regs,
708                             const struct intel_context *ce,
709                             const struct intel_engine_cs *engine,
710                             bool inhibit)
711 {
712         /*
713          * A context is actually a big batch buffer with several
714          * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
715          * values we are setting here are only for the first context restore:
716          * on a subsequent save, the GPU will recreate this batchbuffer with new
717          * values (including all the missing MI_LOAD_REGISTER_IMM commands that
718          * we are not initializing here).
719          *
720          * Must keep consistent with virtual_update_register_offsets().
721          */
722
723         if (inhibit)
724                 memset(regs, 0, PAGE_SIZE);
725
726         set_offsets(regs, reg_offsets(engine), engine, inhibit);
727
728         init_common_regs(regs, ce, engine, inhibit);
729         init_ppgtt_regs(regs, vm_alias(ce->vm));
730
731         init_wa_bb_regs(regs, engine);
732
733         __reset_stop_ring(regs, engine);
734 }
735
736 void lrc_init_regs(const struct intel_context *ce,
737                    const struct intel_engine_cs *engine,
738                    bool inhibit)
739 {
740         __lrc_init_regs(ce->lrc_reg_state, ce, engine, inhibit);
741 }
742
743 void lrc_reset_regs(const struct intel_context *ce,
744                     const struct intel_engine_cs *engine)
745 {
746         __reset_stop_ring(ce->lrc_reg_state, engine);
747 }
748
749 static void
750 set_redzone(void *vaddr, const struct intel_engine_cs *engine)
751 {
752         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
753                 return;
754
755         vaddr += engine->context_size;
756
757         memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
758 }
759
760 static void
761 check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
762 {
763         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
764                 return;
765
766         vaddr += engine->context_size;
767
768         if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
769                 drm_err_once(&engine->i915->drm,
770                              "%s context redzone overwritten!\n",
771                              engine->name);
772 }
773
774 void lrc_init_state(struct intel_context *ce,
775                     struct intel_engine_cs *engine,
776                     void *state)
777 {
778         bool inhibit = true;
779
780         set_redzone(state, engine);
781
782         if (engine->default_state) {
783                 shmem_read(engine->default_state, 0,
784                            state, engine->context_size);
785                 __set_bit(CONTEXT_VALID_BIT, &ce->flags);
786                 inhibit = false;
787         }
788
789         /* Clear the ppHWSP (inc. per-context counters) */
790         memset(state, 0, PAGE_SIZE);
791
792         /*
793          * The second page of the context object contains some registers which
794          * must be set up prior to the first execution.
795          */
796         __lrc_init_regs(state + LRC_STATE_OFFSET, ce, engine, inhibit);
797 }
798
799 static struct i915_vma *
800 __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
801 {
802         struct drm_i915_gem_object *obj;
803         struct i915_vma *vma;
804         u32 context_size;
805
806         context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
807
808         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
809                 context_size += I915_GTT_PAGE_SIZE; /* for redzone */
810
811         if (INTEL_GEN(engine->i915) == 12) {
812                 ce->wa_bb_page = context_size / PAGE_SIZE;
813                 context_size += PAGE_SIZE;
814         }
815
816         obj = i915_gem_object_create_shmem(engine->i915, context_size);
817         if (IS_ERR(obj))
818                 return ERR_CAST(obj);
819
820         vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
821         if (IS_ERR(vma)) {
822                 i915_gem_object_put(obj);
823                 return vma;
824         }
825
826         return vma;
827 }
828
829 static struct intel_timeline *
830 pinned_timeline(struct intel_context *ce, struct intel_engine_cs *engine)
831 {
832         struct intel_timeline *tl = fetch_and_zero(&ce->timeline);
833
834         return intel_timeline_create_from_engine(engine, page_unmask_bits(tl));
835 }
836
837 int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine)
838 {
839         struct intel_ring *ring;
840         struct i915_vma *vma;
841         int err;
842
843         GEM_BUG_ON(ce->state);
844
845         vma = __lrc_alloc_state(ce, engine);
846         if (IS_ERR(vma))
847                 return PTR_ERR(vma);
848
849         ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
850         if (IS_ERR(ring)) {
851                 err = PTR_ERR(ring);
852                 goto err_vma;
853         }
854
855         if (!page_mask_bits(ce->timeline)) {
856                 struct intel_timeline *tl;
857
858                 /*
859                  * Use the static global HWSP for the kernel context, and
860                  * a dynamically allocated cacheline for everyone else.
861                  */
862                 if (unlikely(ce->timeline))
863                         tl = pinned_timeline(ce, engine);
864                 else
865                         tl = intel_timeline_create(engine->gt);
866                 if (IS_ERR(tl)) {
867                         err = PTR_ERR(tl);
868                         goto err_ring;
869                 }
870
871                 ce->timeline = tl;
872         }
873
874         ce->ring = ring;
875         ce->state = vma;
876
877         return 0;
878
879 err_ring:
880         intel_ring_put(ring);
881 err_vma:
882         i915_vma_put(vma);
883         return err;
884 }
885
886 void lrc_reset(struct intel_context *ce)
887 {
888         GEM_BUG_ON(!intel_context_is_pinned(ce));
889
890         intel_ring_reset(ce->ring, ce->ring->emit);
891
892         /* Scrub away the garbage */
893         lrc_init_regs(ce, ce->engine, true);
894         ce->lrc.lrca = lrc_update_regs(ce, ce->engine, ce->ring->tail);
895 }
896
897 int
898 lrc_pre_pin(struct intel_context *ce,
899             struct intel_engine_cs *engine,
900             struct i915_gem_ww_ctx *ww,
901             void **vaddr)
902 {
903         GEM_BUG_ON(!ce->state);
904         GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
905
906         *vaddr = i915_gem_object_pin_map(ce->state->obj,
907                                          i915_coherent_map_type(ce->engine->i915) |
908                                          I915_MAP_OVERRIDE);
909
910         return PTR_ERR_OR_ZERO(*vaddr);
911 }
912
913 int
914 lrc_pin(struct intel_context *ce,
915         struct intel_engine_cs *engine,
916         void *vaddr)
917 {
918         ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
919         ce->lrc.lrca = lrc_update_regs(ce, engine, ce->ring->tail);
920         return 0;
921 }
922
923 void lrc_unpin(struct intel_context *ce)
924 {
925         check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
926                       ce->engine);
927 }
928
929 void lrc_post_unpin(struct intel_context *ce)
930 {
931         i915_gem_object_unpin_map(ce->state->obj);
932 }
933
934 void lrc_fini(struct intel_context *ce)
935 {
936         if (!ce->state)
937                 return;
938
939         intel_ring_put(fetch_and_zero(&ce->ring));
940         i915_vma_put(fetch_and_zero(&ce->state));
941 }
942
943 void lrc_destroy(struct kref *kref)
944 {
945         struct intel_context *ce = container_of(kref, typeof(*ce), ref);
946
947         GEM_BUG_ON(!i915_active_is_idle(&ce->active));
948         GEM_BUG_ON(intel_context_is_pinned(ce));
949
950         lrc_fini(ce);
951
952         intel_context_fini(ce);
953         intel_context_free(ce);
954 }
955
956 static u32 *
957 gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs)
958 {
959         *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
960                 MI_SRM_LRM_GLOBAL_GTT |
961                 MI_LRI_LRM_CS_MMIO;
962         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
963         *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
964                 CTX_TIMESTAMP * sizeof(u32);
965         *cs++ = 0;
966
967         *cs++ = MI_LOAD_REGISTER_REG |
968                 MI_LRR_SOURCE_CS_MMIO |
969                 MI_LRI_LRM_CS_MMIO;
970         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
971         *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
972
973         *cs++ = MI_LOAD_REGISTER_REG |
974                 MI_LRR_SOURCE_CS_MMIO |
975                 MI_LRI_LRM_CS_MMIO;
976         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
977         *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
978
979         return cs;
980 }
981
982 static u32 *
983 gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs)
984 {
985         GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1);
986
987         *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
988                 MI_SRM_LRM_GLOBAL_GTT |
989                 MI_LRI_LRM_CS_MMIO;
990         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
991         *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
992                 (lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32);
993         *cs++ = 0;
994
995         return cs;
996 }
997
998 static u32 *
999 gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
1000 {
1001         GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1);
1002
1003         *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
1004                 MI_SRM_LRM_GLOBAL_GTT |
1005                 MI_LRI_LRM_CS_MMIO;
1006         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1007         *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
1008                 (lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32);
1009         *cs++ = 0;
1010
1011         *cs++ = MI_LOAD_REGISTER_REG |
1012                 MI_LRR_SOURCE_CS_MMIO |
1013                 MI_LRI_LRM_CS_MMIO;
1014         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1015         *cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0));
1016
1017         return cs;
1018 }
1019
1020 static u32 *
1021 gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
1022 {
1023         cs = gen12_emit_timestamp_wa(ce, cs);
1024         cs = gen12_emit_cmd_buf_wa(ce, cs);
1025         cs = gen12_emit_restore_scratch(ce, cs);
1026
1027         return cs;
1028 }
1029
1030 static u32 *
1031 gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
1032 {
1033         cs = gen12_emit_timestamp_wa(ce, cs);
1034         cs = gen12_emit_restore_scratch(ce, cs);
1035
1036         return cs;
1037 }
1038
1039 static inline u32 context_wa_bb_offset(const struct intel_context *ce)
1040 {
1041         return PAGE_SIZE * ce->wa_bb_page;
1042 }
1043
1044 static u32 *context_indirect_bb(const struct intel_context *ce)
1045 {
1046         void *ptr;
1047
1048         GEM_BUG_ON(!ce->wa_bb_page);
1049
1050         ptr = ce->lrc_reg_state;
1051         ptr -= LRC_STATE_OFFSET; /* back to start of context image */
1052         ptr += context_wa_bb_offset(ce);
1053
1054         return ptr;
1055 }
1056
1057 static void
1058 setup_indirect_ctx_bb(const struct intel_context *ce,
1059                       const struct intel_engine_cs *engine,
1060                       u32 *(*emit)(const struct intel_context *, u32 *))
1061 {
1062         u32 * const start = context_indirect_bb(ce);
1063         u32 *cs;
1064
1065         cs = emit(ce, start);
1066         GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
1067         while ((unsigned long)cs % CACHELINE_BYTES)
1068                 *cs++ = MI_NOOP;
1069
1070         lrc_setup_indirect_ctx(ce->lrc_reg_state, engine,
1071                                i915_ggtt_offset(ce->state) +
1072                                context_wa_bb_offset(ce),
1073                                (cs - start) * sizeof(*cs));
1074 }
1075
1076 /*
1077  * The context descriptor encodes various attributes of a context,
1078  * including its GTT address and some flags. Because it's fairly
1079  * expensive to calculate, we'll just do it once and cache the result,
1080  * which remains valid until the context is unpinned.
1081  *
1082  * This is what a descriptor looks like, from LSB to MSB::
1083  *
1084  *      bits  0-11:    flags, GEN8_CTX_* (cached in ctx->desc_template)
1085  *      bits 12-31:    LRCA, GTT address of (the HWSP of) this context
1086  *      bits 32-52:    ctx ID, a globally unique tag (highest bit used by GuC)
1087  *      bits 53-54:    mbz, reserved for use by hardware
1088  *      bits 55-63:    group ID, currently unused and set to 0
1089  *
1090  * Starting from Gen11, the upper dword of the descriptor has a new format:
1091  *
1092  *      bits 32-36:    reserved
1093  *      bits 37-47:    SW context ID
1094  *      bits 48:53:    engine instance
1095  *      bit 54:        mbz, reserved for use by hardware
1096  *      bits 55-60:    SW counter
1097  *      bits 61-63:    engine class
1098  *
1099  * engine info, SW context ID and SW counter need to form a unique number
1100  * (Context ID) per lrc.
1101  */
1102 static inline u32 lrc_descriptor(const struct intel_context *ce)
1103 {
1104         u32 desc;
1105
1106         desc = INTEL_LEGACY_32B_CONTEXT;
1107         if (i915_vm_is_4lvl(ce->vm))
1108                 desc = INTEL_LEGACY_64B_CONTEXT;
1109         desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
1110
1111         desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
1112         if (IS_GEN(ce->vm->i915, 8))
1113                 desc |= GEN8_CTX_L3LLC_COHERENT;
1114
1115         return i915_ggtt_offset(ce->state) | desc;
1116 }
1117
1118 u32 lrc_update_regs(const struct intel_context *ce,
1119                     const struct intel_engine_cs *engine,
1120                     u32 head)
1121 {
1122         struct intel_ring *ring = ce->ring;
1123         u32 *regs = ce->lrc_reg_state;
1124
1125         GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
1126         GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
1127
1128         regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1129         regs[CTX_RING_HEAD] = head;
1130         regs[CTX_RING_TAIL] = ring->tail;
1131         regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1132
1133         /* RPCS */
1134         if (engine->class == RENDER_CLASS) {
1135                 regs[CTX_R_PWR_CLK_STATE] =
1136                         intel_sseu_make_rpcs(engine->gt, &ce->sseu);
1137
1138                 i915_oa_init_reg_state(ce, engine);
1139         }
1140
1141         if (ce->wa_bb_page) {
1142                 u32 *(*fn)(const struct intel_context *ce, u32 *cs);
1143
1144                 fn = gen12_emit_indirect_ctx_xcs;
1145                 if (ce->engine->class == RENDER_CLASS)
1146                         fn = gen12_emit_indirect_ctx_rcs;
1147
1148                 /* Mutually exclusive wrt to global indirect bb */
1149                 GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
1150                 setup_indirect_ctx_bb(ce, engine, fn);
1151         }
1152
1153         return lrc_descriptor(ce) | CTX_DESC_FORCE_RESTORE;
1154 }
1155
1156 void lrc_update_offsets(struct intel_context *ce,
1157                         struct intel_engine_cs *engine)
1158 {
1159         set_offsets(ce->lrc_reg_state, reg_offsets(engine), engine, false);
1160 }
1161
1162 void lrc_check_regs(const struct intel_context *ce,
1163                     const struct intel_engine_cs *engine,
1164                     const char *when)
1165 {
1166         const struct intel_ring *ring = ce->ring;
1167         u32 *regs = ce->lrc_reg_state;
1168         bool valid = true;
1169         int x;
1170
1171         if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
1172                 pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
1173                        engine->name,
1174                        regs[CTX_RING_START],
1175                        i915_ggtt_offset(ring->vma));
1176                 regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1177                 valid = false;
1178         }
1179
1180         if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
1181             (RING_CTL_SIZE(ring->size) | RING_VALID)) {
1182                 pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
1183                        engine->name,
1184                        regs[CTX_RING_CTL],
1185                        (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
1186                 regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1187                 valid = false;
1188         }
1189
1190         x = lrc_ring_mi_mode(engine);
1191         if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
1192                 pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
1193                        engine->name, regs[x + 1]);
1194                 regs[x + 1] &= ~STOP_RING;
1195                 regs[x + 1] |= STOP_RING << 16;
1196                 valid = false;
1197         }
1198
1199         WARN_ONCE(!valid, "Invalid lrc state found %s submission\n", when);
1200 }
1201
1202 /*
1203  * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
1204  * PIPE_CONTROL instruction. This is required for the flush to happen correctly
1205  * but there is a slight complication as this is applied in WA batch where the
1206  * values are only initialized once so we cannot take register value at the
1207  * beginning and reuse it further; hence we save its value to memory, upload a
1208  * constant value with bit21 set and then we restore it back with the saved value.
1209  * To simplify the WA, a constant value is formed by using the default value
1210  * of this register. This shouldn't be a problem because we are only modifying
1211  * it for a short period and this batch in non-premptible. We can ofcourse
1212  * use additional instructions that read the actual value of the register
1213  * at that time and set our bit of interest but it makes the WA complicated.
1214  *
1215  * This WA is also required for Gen9 so extracting as a function avoids
1216  * code duplication.
1217  */
1218 static u32 *
1219 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
1220 {
1221         /* NB no one else is allowed to scribble over scratch + 256! */
1222         *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
1223         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1224         *batch++ = intel_gt_scratch_offset(engine->gt,
1225                                            INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
1226         *batch++ = 0;
1227
1228         *batch++ = MI_LOAD_REGISTER_IMM(1);
1229         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1230         *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
1231
1232         batch = gen8_emit_pipe_control(batch,
1233                                        PIPE_CONTROL_CS_STALL |
1234                                        PIPE_CONTROL_DC_FLUSH_ENABLE,
1235                                        0);
1236
1237         *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
1238         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1239         *batch++ = intel_gt_scratch_offset(engine->gt,
1240                                            INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
1241         *batch++ = 0;
1242
1243         return batch;
1244 }
1245
1246 /*
1247  * Typically we only have one indirect_ctx and per_ctx batch buffer which are
1248  * initialized at the beginning and shared across all contexts but this field
1249  * helps us to have multiple batches at different offsets and select them based
1250  * on a criteria. At the moment this batch always start at the beginning of the page
1251  * and at this point we don't have multiple wa_ctx batch buffers.
1252  *
1253  * The number of WA applied are not known at the beginning; we use this field
1254  * to return the no of DWORDS written.
1255  *
1256  * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
1257  * so it adds NOOPs as padding to make it cacheline aligned.
1258  * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
1259  * makes a complete batch buffer.
1260  */
1261 static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1262 {
1263         /* WaDisableCtxRestoreArbitration:bdw,chv */
1264         *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1265
1266         /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
1267         if (IS_BROADWELL(engine->i915))
1268                 batch = gen8_emit_flush_coherentl3_wa(engine, batch);
1269
1270         /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
1271         /* Actual scratch location is at 128 bytes offset */
1272         batch = gen8_emit_pipe_control(batch,
1273                                        PIPE_CONTROL_FLUSH_L3 |
1274                                        PIPE_CONTROL_STORE_DATA_INDEX |
1275                                        PIPE_CONTROL_CS_STALL |
1276                                        PIPE_CONTROL_QW_WRITE,
1277                                        LRC_PPHWSP_SCRATCH_ADDR);
1278
1279         *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1280
1281         /* Pad to end of cacheline */
1282         while ((unsigned long)batch % CACHELINE_BYTES)
1283                 *batch++ = MI_NOOP;
1284
1285         /*
1286          * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
1287          * execution depends on the length specified in terms of cache lines
1288          * in the register CTX_RCS_INDIRECT_CTX
1289          */
1290
1291         return batch;
1292 }
1293
1294 struct lri {
1295         i915_reg_t reg;
1296         u32 value;
1297 };
1298
1299 static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
1300 {
1301         GEM_BUG_ON(!count || count > 63);
1302
1303         *batch++ = MI_LOAD_REGISTER_IMM(count);
1304         do {
1305                 *batch++ = i915_mmio_reg_offset(lri->reg);
1306                 *batch++ = lri->value;
1307         } while (lri++, --count);
1308         *batch++ = MI_NOOP;
1309
1310         return batch;
1311 }
1312
1313 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1314 {
1315         static const struct lri lri[] = {
1316                 /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
1317                 {
1318                         COMMON_SLICE_CHICKEN2,
1319                         __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
1320                                        0),
1321                 },
1322
1323                 /* BSpec: 11391 */
1324                 {
1325                         FF_SLICE_CHICKEN,
1326                         __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
1327                                        FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
1328                 },
1329
1330                 /* BSpec: 11299 */
1331                 {
1332                         _3D_CHICKEN3,
1333                         __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
1334                                        _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
1335                 }
1336         };
1337
1338         *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1339
1340         /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
1341         batch = gen8_emit_flush_coherentl3_wa(engine, batch);
1342
1343         /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
1344         batch = gen8_emit_pipe_control(batch,
1345                                        PIPE_CONTROL_FLUSH_L3 |
1346                                        PIPE_CONTROL_STORE_DATA_INDEX |
1347                                        PIPE_CONTROL_CS_STALL |
1348                                        PIPE_CONTROL_QW_WRITE,
1349                                        LRC_PPHWSP_SCRATCH_ADDR);
1350
1351         batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
1352
1353         /* WaMediaPoolStateCmdInWABB:bxt,glk */
1354         if (HAS_POOLED_EU(engine->i915)) {
1355                 /*
1356                  * EU pool configuration is setup along with golden context
1357                  * during context initialization. This value depends on
1358                  * device type (2x6 or 3x6) and needs to be updated based
1359                  * on which subslice is disabled especially for 2x6
1360                  * devices, however it is safe to load default
1361                  * configuration of 3x6 device instead of masking off
1362                  * corresponding bits because HW ignores bits of a disabled
1363                  * subslice and drops down to appropriate config. Please
1364                  * see render_state_setup() in i915_gem_render_state.c for
1365                  * possible configurations, to avoid duplication they are
1366                  * not shown here again.
1367                  */
1368                 *batch++ = GEN9_MEDIA_POOL_STATE;
1369                 *batch++ = GEN9_MEDIA_POOL_ENABLE;
1370                 *batch++ = 0x00777000;
1371                 *batch++ = 0;
1372                 *batch++ = 0;
1373                 *batch++ = 0;
1374         }
1375
1376         *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1377
1378         /* Pad to end of cacheline */
1379         while ((unsigned long)batch % CACHELINE_BYTES)
1380                 *batch++ = MI_NOOP;
1381
1382         return batch;
1383 }
1384
1385 static u32 *
1386 gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1387 {
1388         int i;
1389
1390         /*
1391          * WaPipeControlBefore3DStateSamplePattern: cnl
1392          *
1393          * Ensure the engine is idle prior to programming a
1394          * 3DSTATE_SAMPLE_PATTERN during a context restore.
1395          */
1396         batch = gen8_emit_pipe_control(batch,
1397                                        PIPE_CONTROL_CS_STALL,
1398                                        0);
1399         /*
1400          * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
1401          * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
1402          * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
1403          * confusing. Since gen8_emit_pipe_control() already advances the
1404          * batch by 6 dwords, we advance the other 10 here, completing a
1405          * cacheline. It's not clear if the workaround requires this padding
1406          * before other commands, or if it's just the regular padding we would
1407          * already have for the workaround bb, so leave it here for now.
1408          */
1409         for (i = 0; i < 10; i++)
1410                 *batch++ = MI_NOOP;
1411
1412         /* Pad to end of cacheline */
1413         while ((unsigned long)batch % CACHELINE_BYTES)
1414                 *batch++ = MI_NOOP;
1415
1416         return batch;
1417 }
1418
1419 #define CTX_WA_BB_SIZE (PAGE_SIZE)
1420
1421 static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
1422 {
1423         struct drm_i915_gem_object *obj;
1424         struct i915_vma *vma;
1425         int err;
1426
1427         obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_SIZE);
1428         if (IS_ERR(obj))
1429                 return PTR_ERR(obj);
1430
1431         vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
1432         if (IS_ERR(vma)) {
1433                 err = PTR_ERR(vma);
1434                 goto err;
1435         }
1436
1437         err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
1438         if (err)
1439                 goto err;
1440
1441         engine->wa_ctx.vma = vma;
1442         return 0;
1443
1444 err:
1445         i915_gem_object_put(obj);
1446         return err;
1447 }
1448
1449 void lrc_fini_wa_ctx(struct intel_engine_cs *engine)
1450 {
1451         i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
1452 }
1453
1454 typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
1455
1456 int lrc_init_wa_ctx(struct intel_engine_cs *engine)
1457 {
1458         struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
1459         struct i915_wa_ctx_bb *wa_bb[] = {
1460                 &wa_ctx->indirect_ctx, &wa_ctx->per_ctx
1461         };
1462         wa_bb_func_t wa_bb_fn[ARRAY_SIZE(wa_bb)];
1463         void *batch, *batch_ptr;
1464         unsigned int i;
1465         int ret;
1466
1467         if (engine->class != RENDER_CLASS)
1468                 return 0;
1469
1470         switch (INTEL_GEN(engine->i915)) {
1471         case 12:
1472         case 11:
1473                 return 0;
1474         case 10:
1475                 wa_bb_fn[0] = gen10_init_indirectctx_bb;
1476                 wa_bb_fn[1] = NULL;
1477                 break;
1478         case 9:
1479                 wa_bb_fn[0] = gen9_init_indirectctx_bb;
1480                 wa_bb_fn[1] = NULL;
1481                 break;
1482         case 8:
1483                 wa_bb_fn[0] = gen8_init_indirectctx_bb;
1484                 wa_bb_fn[1] = NULL;
1485                 break;
1486         default:
1487                 MISSING_CASE(INTEL_GEN(engine->i915));
1488                 return 0;
1489         }
1490
1491         ret = lrc_setup_wa_ctx(engine);
1492         if (ret) {
1493                 drm_dbg(&engine->i915->drm,
1494                         "Failed to setup context WA page: %d\n", ret);
1495                 return ret;
1496         }
1497
1498         batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB);
1499
1500         /*
1501          * Emit the two workaround batch buffers, recording the offset from the
1502          * start of the workaround batch buffer object for each and their
1503          * respective sizes.
1504          */
1505         batch_ptr = batch;
1506         for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
1507                 wa_bb[i]->offset = batch_ptr - batch;
1508                 if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
1509                                                   CACHELINE_BYTES))) {
1510                         ret = -EINVAL;
1511                         break;
1512                 }
1513                 if (wa_bb_fn[i])
1514                         batch_ptr = wa_bb_fn[i](engine, batch_ptr);
1515                 wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
1516         }
1517         GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_SIZE);
1518
1519         __i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch);
1520         __i915_gem_object_release_map(wa_ctx->vma->obj);
1521         if (ret)
1522                 lrc_fini_wa_ctx(engine);
1523
1524         return ret;
1525 }
1526
1527 static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
1528 {
1529 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1530         ce->runtime.num_underflow++;
1531         ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt);
1532 #endif
1533 }
1534
1535 void lrc_update_runtime(struct intel_context *ce)
1536 {
1537         u32 old;
1538         s32 dt;
1539
1540         if (intel_context_is_barrier(ce))
1541                 return;
1542
1543         old = ce->runtime.last;
1544         ce->runtime.last = lrc_get_runtime(ce);
1545         dt = ce->runtime.last - old;
1546
1547         if (unlikely(dt < 0)) {
1548                 CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
1549                          old, ce->runtime.last, dt);
1550                 st_update_runtime_underflow(ce, dt);
1551                 return;
1552         }
1553
1554         ewma_runtime_add(&ce->runtime.avg, dt);
1555         ce->runtime.total += dt;
1556 }
1557
1558 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1559 #include "selftest_lrc.c"
1560 #endif