Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / gt / intel_lrc.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2014 Intel Corporation
4  */
5
6 #include "gen8_engine_cs.h"
7 #include "i915_drv.h"
8 #include "i915_perf.h"
9 #include "intel_engine.h"
10 #include "intel_gpu_commands.h"
11 #include "intel_gt.h"
12 #include "intel_lrc.h"
13 #include "intel_lrc_reg.h"
14 #include "intel_ring.h"
15 #include "shmem_utils.h"
16
17 static void set_offsets(u32 *regs,
18                         const u8 *data,
19                         const struct intel_engine_cs *engine,
20                         bool close)
21 #define NOP(x) (BIT(7) | (x))
22 #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
23 #define POSTED BIT(0)
24 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
25 #define REG16(x) \
26         (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
27         (((x) >> 2) & 0x7f)
28 #define END 0
29 {
30         const u32 base = engine->mmio_base;
31
32         while (*data) {
33                 u8 count, flags;
34
35                 if (*data & BIT(7)) { /* skip */
36                         count = *data++ & ~BIT(7);
37                         regs += count;
38                         continue;
39                 }
40
41                 count = *data & 0x3f;
42                 flags = *data >> 6;
43                 data++;
44
45                 *regs = MI_LOAD_REGISTER_IMM(count);
46                 if (flags & POSTED)
47                         *regs |= MI_LRI_FORCE_POSTED;
48                 if (INTEL_GEN(engine->i915) >= 11)
49                         *regs |= MI_LRI_LRM_CS_MMIO;
50                 regs++;
51
52                 GEM_BUG_ON(!count);
53                 do {
54                         u32 offset = 0;
55                         u8 v;
56
57                         do {
58                                 v = *data++;
59                                 offset <<= 7;
60                                 offset |= v & ~BIT(7);
61                         } while (v & BIT(7));
62
63                         regs[0] = base + (offset << 2);
64                         regs += 2;
65                 } while (--count);
66         }
67
68         if (close) {
69                 /* Close the batch; used mainly by live_lrc_layout() */
70                 *regs = MI_BATCH_BUFFER_END;
71                 if (INTEL_GEN(engine->i915) >= 10)
72                         *regs |= BIT(0);
73         }
74 }
75
76 static const u8 gen8_xcs_offsets[] = {
77         NOP(1),
78         LRI(11, 0),
79         REG16(0x244),
80         REG(0x034),
81         REG(0x030),
82         REG(0x038),
83         REG(0x03c),
84         REG(0x168),
85         REG(0x140),
86         REG(0x110),
87         REG(0x11c),
88         REG(0x114),
89         REG(0x118),
90
91         NOP(9),
92         LRI(9, 0),
93         REG16(0x3a8),
94         REG16(0x28c),
95         REG16(0x288),
96         REG16(0x284),
97         REG16(0x280),
98         REG16(0x27c),
99         REG16(0x278),
100         REG16(0x274),
101         REG16(0x270),
102
103         NOP(13),
104         LRI(2, 0),
105         REG16(0x200),
106         REG(0x028),
107
108         END
109 };
110
111 static const u8 gen9_xcs_offsets[] = {
112         NOP(1),
113         LRI(14, POSTED),
114         REG16(0x244),
115         REG(0x034),
116         REG(0x030),
117         REG(0x038),
118         REG(0x03c),
119         REG(0x168),
120         REG(0x140),
121         REG(0x110),
122         REG(0x11c),
123         REG(0x114),
124         REG(0x118),
125         REG(0x1c0),
126         REG(0x1c4),
127         REG(0x1c8),
128
129         NOP(3),
130         LRI(9, POSTED),
131         REG16(0x3a8),
132         REG16(0x28c),
133         REG16(0x288),
134         REG16(0x284),
135         REG16(0x280),
136         REG16(0x27c),
137         REG16(0x278),
138         REG16(0x274),
139         REG16(0x270),
140
141         NOP(13),
142         LRI(1, POSTED),
143         REG16(0x200),
144
145         NOP(13),
146         LRI(44, POSTED),
147         REG(0x028),
148         REG(0x09c),
149         REG(0x0c0),
150         REG(0x178),
151         REG(0x17c),
152         REG16(0x358),
153         REG(0x170),
154         REG(0x150),
155         REG(0x154),
156         REG(0x158),
157         REG16(0x41c),
158         REG16(0x600),
159         REG16(0x604),
160         REG16(0x608),
161         REG16(0x60c),
162         REG16(0x610),
163         REG16(0x614),
164         REG16(0x618),
165         REG16(0x61c),
166         REG16(0x620),
167         REG16(0x624),
168         REG16(0x628),
169         REG16(0x62c),
170         REG16(0x630),
171         REG16(0x634),
172         REG16(0x638),
173         REG16(0x63c),
174         REG16(0x640),
175         REG16(0x644),
176         REG16(0x648),
177         REG16(0x64c),
178         REG16(0x650),
179         REG16(0x654),
180         REG16(0x658),
181         REG16(0x65c),
182         REG16(0x660),
183         REG16(0x664),
184         REG16(0x668),
185         REG16(0x66c),
186         REG16(0x670),
187         REG16(0x674),
188         REG16(0x678),
189         REG16(0x67c),
190         REG(0x068),
191
192         END
193 };
194
195 static const u8 gen12_xcs_offsets[] = {
196         NOP(1),
197         LRI(13, POSTED),
198         REG16(0x244),
199         REG(0x034),
200         REG(0x030),
201         REG(0x038),
202         REG(0x03c),
203         REG(0x168),
204         REG(0x140),
205         REG(0x110),
206         REG(0x1c0),
207         REG(0x1c4),
208         REG(0x1c8),
209         REG(0x180),
210         REG16(0x2b4),
211
212         NOP(5),
213         LRI(9, POSTED),
214         REG16(0x3a8),
215         REG16(0x28c),
216         REG16(0x288),
217         REG16(0x284),
218         REG16(0x280),
219         REG16(0x27c),
220         REG16(0x278),
221         REG16(0x274),
222         REG16(0x270),
223
224         END
225 };
226
227 static const u8 gen8_rcs_offsets[] = {
228         NOP(1),
229         LRI(14, POSTED),
230         REG16(0x244),
231         REG(0x034),
232         REG(0x030),
233         REG(0x038),
234         REG(0x03c),
235         REG(0x168),
236         REG(0x140),
237         REG(0x110),
238         REG(0x11c),
239         REG(0x114),
240         REG(0x118),
241         REG(0x1c0),
242         REG(0x1c4),
243         REG(0x1c8),
244
245         NOP(3),
246         LRI(9, POSTED),
247         REG16(0x3a8),
248         REG16(0x28c),
249         REG16(0x288),
250         REG16(0x284),
251         REG16(0x280),
252         REG16(0x27c),
253         REG16(0x278),
254         REG16(0x274),
255         REG16(0x270),
256
257         NOP(13),
258         LRI(1, 0),
259         REG(0x0c8),
260
261         END
262 };
263
264 static const u8 gen9_rcs_offsets[] = {
265         NOP(1),
266         LRI(14, POSTED),
267         REG16(0x244),
268         REG(0x34),
269         REG(0x30),
270         REG(0x38),
271         REG(0x3c),
272         REG(0x168),
273         REG(0x140),
274         REG(0x110),
275         REG(0x11c),
276         REG(0x114),
277         REG(0x118),
278         REG(0x1c0),
279         REG(0x1c4),
280         REG(0x1c8),
281
282         NOP(3),
283         LRI(9, POSTED),
284         REG16(0x3a8),
285         REG16(0x28c),
286         REG16(0x288),
287         REG16(0x284),
288         REG16(0x280),
289         REG16(0x27c),
290         REG16(0x278),
291         REG16(0x274),
292         REG16(0x270),
293
294         NOP(13),
295         LRI(1, 0),
296         REG(0xc8),
297
298         NOP(13),
299         LRI(44, POSTED),
300         REG(0x28),
301         REG(0x9c),
302         REG(0xc0),
303         REG(0x178),
304         REG(0x17c),
305         REG16(0x358),
306         REG(0x170),
307         REG(0x150),
308         REG(0x154),
309         REG(0x158),
310         REG16(0x41c),
311         REG16(0x600),
312         REG16(0x604),
313         REG16(0x608),
314         REG16(0x60c),
315         REG16(0x610),
316         REG16(0x614),
317         REG16(0x618),
318         REG16(0x61c),
319         REG16(0x620),
320         REG16(0x624),
321         REG16(0x628),
322         REG16(0x62c),
323         REG16(0x630),
324         REG16(0x634),
325         REG16(0x638),
326         REG16(0x63c),
327         REG16(0x640),
328         REG16(0x644),
329         REG16(0x648),
330         REG16(0x64c),
331         REG16(0x650),
332         REG16(0x654),
333         REG16(0x658),
334         REG16(0x65c),
335         REG16(0x660),
336         REG16(0x664),
337         REG16(0x668),
338         REG16(0x66c),
339         REG16(0x670),
340         REG16(0x674),
341         REG16(0x678),
342         REG16(0x67c),
343         REG(0x68),
344
345         END
346 };
347
348 static const u8 gen11_rcs_offsets[] = {
349         NOP(1),
350         LRI(15, POSTED),
351         REG16(0x244),
352         REG(0x034),
353         REG(0x030),
354         REG(0x038),
355         REG(0x03c),
356         REG(0x168),
357         REG(0x140),
358         REG(0x110),
359         REG(0x11c),
360         REG(0x114),
361         REG(0x118),
362         REG(0x1c0),
363         REG(0x1c4),
364         REG(0x1c8),
365         REG(0x180),
366
367         NOP(1),
368         LRI(9, POSTED),
369         REG16(0x3a8),
370         REG16(0x28c),
371         REG16(0x288),
372         REG16(0x284),
373         REG16(0x280),
374         REG16(0x27c),
375         REG16(0x278),
376         REG16(0x274),
377         REG16(0x270),
378
379         LRI(1, POSTED),
380         REG(0x1b0),
381
382         NOP(10),
383         LRI(1, 0),
384         REG(0x0c8),
385
386         END
387 };
388
389 static const u8 gen12_rcs_offsets[] = {
390         NOP(1),
391         LRI(13, POSTED),
392         REG16(0x244),
393         REG(0x034),
394         REG(0x030),
395         REG(0x038),
396         REG(0x03c),
397         REG(0x168),
398         REG(0x140),
399         REG(0x110),
400         REG(0x1c0),
401         REG(0x1c4),
402         REG(0x1c8),
403         REG(0x180),
404         REG16(0x2b4),
405
406         NOP(5),
407         LRI(9, POSTED),
408         REG16(0x3a8),
409         REG16(0x28c),
410         REG16(0x288),
411         REG16(0x284),
412         REG16(0x280),
413         REG16(0x27c),
414         REG16(0x278),
415         REG16(0x274),
416         REG16(0x270),
417
418         LRI(3, POSTED),
419         REG(0x1b0),
420         REG16(0x5a8),
421         REG16(0x5ac),
422
423         NOP(6),
424         LRI(1, 0),
425         REG(0x0c8),
426         NOP(3 + 9 + 1),
427
428         LRI(51, POSTED),
429         REG16(0x588),
430         REG16(0x588),
431         REG16(0x588),
432         REG16(0x588),
433         REG16(0x588),
434         REG16(0x588),
435         REG(0x028),
436         REG(0x09c),
437         REG(0x0c0),
438         REG(0x178),
439         REG(0x17c),
440         REG16(0x358),
441         REG(0x170),
442         REG(0x150),
443         REG(0x154),
444         REG(0x158),
445         REG16(0x41c),
446         REG16(0x600),
447         REG16(0x604),
448         REG16(0x608),
449         REG16(0x60c),
450         REG16(0x610),
451         REG16(0x614),
452         REG16(0x618),
453         REG16(0x61c),
454         REG16(0x620),
455         REG16(0x624),
456         REG16(0x628),
457         REG16(0x62c),
458         REG16(0x630),
459         REG16(0x634),
460         REG16(0x638),
461         REG16(0x63c),
462         REG16(0x640),
463         REG16(0x644),
464         REG16(0x648),
465         REG16(0x64c),
466         REG16(0x650),
467         REG16(0x654),
468         REG16(0x658),
469         REG16(0x65c),
470         REG16(0x660),
471         REG16(0x664),
472         REG16(0x668),
473         REG16(0x66c),
474         REG16(0x670),
475         REG16(0x674),
476         REG16(0x678),
477         REG16(0x67c),
478         REG(0x068),
479         REG(0x084),
480         NOP(1),
481
482         END
483 };
484
485 #undef END
486 #undef REG16
487 #undef REG
488 #undef LRI
489 #undef NOP
490
491 static const u8 *reg_offsets(const struct intel_engine_cs *engine)
492 {
493         /*
494          * The gen12+ lists only have the registers we program in the basic
495          * default state. We rely on the context image using relative
496          * addressing to automatic fixup the register state between the
497          * physical engines for virtual engine.
498          */
499         GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 &&
500                    !intel_engine_has_relative_mmio(engine));
501
502         if (engine->class == RENDER_CLASS) {
503                 if (INTEL_GEN(engine->i915) >= 12)
504                         return gen12_rcs_offsets;
505                 else if (INTEL_GEN(engine->i915) >= 11)
506                         return gen11_rcs_offsets;
507                 else if (INTEL_GEN(engine->i915) >= 9)
508                         return gen9_rcs_offsets;
509                 else
510                         return gen8_rcs_offsets;
511         } else {
512                 if (INTEL_GEN(engine->i915) >= 12)
513                         return gen12_xcs_offsets;
514                 else if (INTEL_GEN(engine->i915) >= 9)
515                         return gen9_xcs_offsets;
516                 else
517                         return gen8_xcs_offsets;
518         }
519 }
520
521 static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
522 {
523         if (INTEL_GEN(engine->i915) >= 12)
524                 return 0x60;
525         else if (INTEL_GEN(engine->i915) >= 9)
526                 return 0x54;
527         else if (engine->class == RENDER_CLASS)
528                 return 0x58;
529         else
530                 return -1;
531 }
532
533 static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
534 {
535         if (INTEL_GEN(engine->i915) >= 12)
536                 return 0x74;
537         else if (INTEL_GEN(engine->i915) >= 9)
538                 return 0x68;
539         else if (engine->class == RENDER_CLASS)
540                 return 0xd8;
541         else
542                 return -1;
543 }
544
545 static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine)
546 {
547         if (INTEL_GEN(engine->i915) >= 12)
548                 return 0x12;
549         else if (INTEL_GEN(engine->i915) >= 9 || engine->class == RENDER_CLASS)
550                 return 0x18;
551         else
552                 return -1;
553 }
554
555 static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine)
556 {
557         int x;
558
559         x = lrc_ring_wa_bb_per_ctx(engine);
560         if (x < 0)
561                 return x;
562
563         return x + 2;
564 }
565
566 static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
567 {
568         int x;
569
570         x = lrc_ring_indirect_ptr(engine);
571         if (x < 0)
572                 return x;
573
574         return x + 2;
575 }
576
577 static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
578 {
579         if (engine->class != RENDER_CLASS)
580                 return -1;
581
582         if (INTEL_GEN(engine->i915) >= 12)
583                 return 0xb6;
584         else if (INTEL_GEN(engine->i915) >= 11)
585                 return 0xaa;
586         else
587                 return -1;
588 }
589
590 static u32
591 lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
592 {
593         switch (INTEL_GEN(engine->i915)) {
594         default:
595                 MISSING_CASE(INTEL_GEN(engine->i915));
596                 fallthrough;
597         case 12:
598                 return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
599         case 11:
600                 return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
601         case 10:
602                 return GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
603         case 9:
604                 return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
605         case 8:
606                 return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
607         }
608 }
609
610 static void
611 lrc_setup_indirect_ctx(u32 *regs,
612                        const struct intel_engine_cs *engine,
613                        u32 ctx_bb_ggtt_addr,
614                        u32 size)
615 {
616         GEM_BUG_ON(!size);
617         GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES));
618         GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1);
619         regs[lrc_ring_indirect_ptr(engine) + 1] =
620                 ctx_bb_ggtt_addr | (size / CACHELINE_BYTES);
621
622         GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1);
623         regs[lrc_ring_indirect_offset(engine) + 1] =
624                 lrc_ring_indirect_offset_default(engine) << 6;
625 }
626
627 static void init_common_regs(u32 * const regs,
628                              const struct intel_context *ce,
629                              const struct intel_engine_cs *engine,
630                              bool inhibit)
631 {
632         u32 ctl;
633
634         ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
635         ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
636         if (inhibit)
637                 ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
638         if (INTEL_GEN(engine->i915) < 11)
639                 ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
640                                            CTX_CTRL_RS_CTX_ENABLE);
641         regs[CTX_CONTEXT_CONTROL] = ctl;
642
643         regs[CTX_TIMESTAMP] = ce->runtime.last;
644 }
645
646 static void init_wa_bb_regs(u32 * const regs,
647                             const struct intel_engine_cs *engine)
648 {
649         const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
650
651         if (wa_ctx->per_ctx.size) {
652                 const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
653
654                 GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
655                 regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
656                         (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
657         }
658
659         if (wa_ctx->indirect_ctx.size) {
660                 lrc_setup_indirect_ctx(regs, engine,
661                                        i915_ggtt_offset(wa_ctx->vma) +
662                                        wa_ctx->indirect_ctx.offset,
663                                        wa_ctx->indirect_ctx.size);
664         }
665 }
666
667 static void init_ppgtt_regs(u32 *regs, const struct i915_ppgtt *ppgtt)
668 {
669         if (i915_vm_is_4lvl(&ppgtt->vm)) {
670                 /* 64b PPGTT (48bit canonical)
671                  * PDP0_DESCRIPTOR contains the base address to PML4 and
672                  * other PDP Descriptors are ignored.
673                  */
674                 ASSIGN_CTX_PML4(ppgtt, regs);
675         } else {
676                 ASSIGN_CTX_PDP(ppgtt, regs, 3);
677                 ASSIGN_CTX_PDP(ppgtt, regs, 2);
678                 ASSIGN_CTX_PDP(ppgtt, regs, 1);
679                 ASSIGN_CTX_PDP(ppgtt, regs, 0);
680         }
681 }
682
683 static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
684 {
685         if (i915_is_ggtt(vm))
686                 return i915_vm_to_ggtt(vm)->alias;
687         else
688                 return i915_vm_to_ppgtt(vm);
689 }
690
691 static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
692 {
693         int x;
694
695         x = lrc_ring_mi_mode(engine);
696         if (x != -1) {
697                 regs[x + 1] &= ~STOP_RING;
698                 regs[x + 1] |= STOP_RING << 16;
699         }
700 }
701
702 static void __lrc_init_regs(u32 *regs,
703                             const struct intel_context *ce,
704                             const struct intel_engine_cs *engine,
705                             bool inhibit)
706 {
707         /*
708          * A context is actually a big batch buffer with several
709          * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
710          * values we are setting here are only for the first context restore:
711          * on a subsequent save, the GPU will recreate this batchbuffer with new
712          * values (including all the missing MI_LOAD_REGISTER_IMM commands that
713          * we are not initializing here).
714          *
715          * Must keep consistent with virtual_update_register_offsets().
716          */
717
718         if (inhibit)
719                 memset(regs, 0, PAGE_SIZE);
720
721         set_offsets(regs, reg_offsets(engine), engine, inhibit);
722
723         init_common_regs(regs, ce, engine, inhibit);
724         init_ppgtt_regs(regs, vm_alias(ce->vm));
725
726         init_wa_bb_regs(regs, engine);
727
728         __reset_stop_ring(regs, engine);
729 }
730
731 void lrc_init_regs(const struct intel_context *ce,
732                    const struct intel_engine_cs *engine,
733                    bool inhibit)
734 {
735         __lrc_init_regs(ce->lrc_reg_state, ce, engine, inhibit);
736 }
737
738 void lrc_reset_regs(const struct intel_context *ce,
739                     const struct intel_engine_cs *engine)
740 {
741         __reset_stop_ring(ce->lrc_reg_state, engine);
742 }
743
744 static void
745 set_redzone(void *vaddr, const struct intel_engine_cs *engine)
746 {
747         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
748                 return;
749
750         vaddr += engine->context_size;
751
752         memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
753 }
754
755 static void
756 check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
757 {
758         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
759                 return;
760
761         vaddr += engine->context_size;
762
763         if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
764                 drm_err_once(&engine->i915->drm,
765                              "%s context redzone overwritten!\n",
766                              engine->name);
767 }
768
769 void lrc_init_state(struct intel_context *ce,
770                     struct intel_engine_cs *engine,
771                     void *state)
772 {
773         bool inhibit = true;
774
775         set_redzone(state, engine);
776
777         if (engine->default_state) {
778                 shmem_read(engine->default_state, 0,
779                            state, engine->context_size);
780                 __set_bit(CONTEXT_VALID_BIT, &ce->flags);
781                 inhibit = false;
782         }
783
784         /* Clear the ppHWSP (inc. per-context counters) */
785         memset(state, 0, PAGE_SIZE);
786
787         /*
788          * The second page of the context object contains some registers which
789          * must be set up prior to the first execution.
790          */
791         __lrc_init_regs(state + LRC_STATE_OFFSET, ce, engine, inhibit);
792 }
793
794 static struct i915_vma *
795 __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
796 {
797         struct drm_i915_gem_object *obj;
798         struct i915_vma *vma;
799         u32 context_size;
800
801         context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
802
803         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
804                 context_size += I915_GTT_PAGE_SIZE; /* for redzone */
805
806         if (INTEL_GEN(engine->i915) == 12) {
807                 ce->wa_bb_page = context_size / PAGE_SIZE;
808                 context_size += PAGE_SIZE;
809         }
810
811         obj = i915_gem_object_create_shmem(engine->i915, context_size);
812         if (IS_ERR(obj))
813                 return ERR_CAST(obj);
814
815         vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
816         if (IS_ERR(vma)) {
817                 i915_gem_object_put(obj);
818                 return vma;
819         }
820
821         return vma;
822 }
823
824 static struct intel_timeline *
825 pinned_timeline(struct intel_context *ce, struct intel_engine_cs *engine)
826 {
827         struct intel_timeline *tl = fetch_and_zero(&ce->timeline);
828
829         return intel_timeline_create_from_engine(engine, page_unmask_bits(tl));
830 }
831
832 int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine)
833 {
834         struct intel_ring *ring;
835         struct i915_vma *vma;
836         int err;
837
838         GEM_BUG_ON(ce->state);
839
840         vma = __lrc_alloc_state(ce, engine);
841         if (IS_ERR(vma))
842                 return PTR_ERR(vma);
843
844         ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
845         if (IS_ERR(ring)) {
846                 err = PTR_ERR(ring);
847                 goto err_vma;
848         }
849
850         if (!page_mask_bits(ce->timeline)) {
851                 struct intel_timeline *tl;
852
853                 /*
854                  * Use the static global HWSP for the kernel context, and
855                  * a dynamically allocated cacheline for everyone else.
856                  */
857                 if (unlikely(ce->timeline))
858                         tl = pinned_timeline(ce, engine);
859                 else
860                         tl = intel_timeline_create(engine->gt);
861                 if (IS_ERR(tl)) {
862                         err = PTR_ERR(tl);
863                         goto err_ring;
864                 }
865
866                 ce->timeline = tl;
867         }
868
869         ce->ring = ring;
870         ce->state = vma;
871
872         return 0;
873
874 err_ring:
875         intel_ring_put(ring);
876 err_vma:
877         i915_vma_put(vma);
878         return err;
879 }
880
881 void lrc_reset(struct intel_context *ce)
882 {
883         GEM_BUG_ON(!intel_context_is_pinned(ce));
884
885         intel_ring_reset(ce->ring, ce->ring->emit);
886
887         /* Scrub away the garbage */
888         lrc_init_regs(ce, ce->engine, true);
889         ce->lrc.lrca = lrc_update_regs(ce, ce->engine, ce->ring->tail);
890 }
891
892 int
893 lrc_pre_pin(struct intel_context *ce,
894             struct intel_engine_cs *engine,
895             struct i915_gem_ww_ctx *ww,
896             void **vaddr)
897 {
898         GEM_BUG_ON(!ce->state);
899         GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
900
901         *vaddr = i915_gem_object_pin_map(ce->state->obj,
902                                          i915_coherent_map_type(ce->engine->i915) |
903                                          I915_MAP_OVERRIDE);
904
905         return PTR_ERR_OR_ZERO(*vaddr);
906 }
907
908 int
909 lrc_pin(struct intel_context *ce,
910         struct intel_engine_cs *engine,
911         void *vaddr)
912 {
913         ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
914
915         if (!__test_and_set_bit(CONTEXT_INIT_BIT, &ce->flags))
916                 lrc_init_state(ce, engine, vaddr);
917
918         ce->lrc.lrca = lrc_update_regs(ce, engine, ce->ring->tail);
919         return 0;
920 }
921
922 void lrc_unpin(struct intel_context *ce)
923 {
924         check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
925                       ce->engine);
926 }
927
928 void lrc_post_unpin(struct intel_context *ce)
929 {
930         i915_gem_object_unpin_map(ce->state->obj);
931 }
932
933 void lrc_fini(struct intel_context *ce)
934 {
935         if (!ce->state)
936                 return;
937
938         intel_ring_put(fetch_and_zero(&ce->ring));
939         i915_vma_put(fetch_and_zero(&ce->state));
940 }
941
942 void lrc_destroy(struct kref *kref)
943 {
944         struct intel_context *ce = container_of(kref, typeof(*ce), ref);
945
946         GEM_BUG_ON(!i915_active_is_idle(&ce->active));
947         GEM_BUG_ON(intel_context_is_pinned(ce));
948
949         lrc_fini(ce);
950
951         intel_context_fini(ce);
952         intel_context_free(ce);
953 }
954
955 static u32 *
956 gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs)
957 {
958         *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
959                 MI_SRM_LRM_GLOBAL_GTT |
960                 MI_LRI_LRM_CS_MMIO;
961         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
962         *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
963                 CTX_TIMESTAMP * sizeof(u32);
964         *cs++ = 0;
965
966         *cs++ = MI_LOAD_REGISTER_REG |
967                 MI_LRR_SOURCE_CS_MMIO |
968                 MI_LRI_LRM_CS_MMIO;
969         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
970         *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
971
972         *cs++ = MI_LOAD_REGISTER_REG |
973                 MI_LRR_SOURCE_CS_MMIO |
974                 MI_LRI_LRM_CS_MMIO;
975         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
976         *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
977
978         return cs;
979 }
980
981 static u32 *
982 gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs)
983 {
984         GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1);
985
986         *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
987                 MI_SRM_LRM_GLOBAL_GTT |
988                 MI_LRI_LRM_CS_MMIO;
989         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
990         *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
991                 (lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32);
992         *cs++ = 0;
993
994         return cs;
995 }
996
997 static u32 *
998 gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
999 {
1000         GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1);
1001
1002         *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
1003                 MI_SRM_LRM_GLOBAL_GTT |
1004                 MI_LRI_LRM_CS_MMIO;
1005         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1006         *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
1007                 (lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32);
1008         *cs++ = 0;
1009
1010         *cs++ = MI_LOAD_REGISTER_REG |
1011                 MI_LRR_SOURCE_CS_MMIO |
1012                 MI_LRI_LRM_CS_MMIO;
1013         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1014         *cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0));
1015
1016         return cs;
1017 }
1018
1019 static u32 *
1020 gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
1021 {
1022         cs = gen12_emit_timestamp_wa(ce, cs);
1023         cs = gen12_emit_cmd_buf_wa(ce, cs);
1024         cs = gen12_emit_restore_scratch(ce, cs);
1025
1026         return cs;
1027 }
1028
1029 static u32 *
1030 gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
1031 {
1032         cs = gen12_emit_timestamp_wa(ce, cs);
1033         cs = gen12_emit_restore_scratch(ce, cs);
1034
1035         return cs;
1036 }
1037
1038 static u32 context_wa_bb_offset(const struct intel_context *ce)
1039 {
1040         return PAGE_SIZE * ce->wa_bb_page;
1041 }
1042
1043 static u32 *context_indirect_bb(const struct intel_context *ce)
1044 {
1045         void *ptr;
1046
1047         GEM_BUG_ON(!ce->wa_bb_page);
1048
1049         ptr = ce->lrc_reg_state;
1050         ptr -= LRC_STATE_OFFSET; /* back to start of context image */
1051         ptr += context_wa_bb_offset(ce);
1052
1053         return ptr;
1054 }
1055
1056 static void
1057 setup_indirect_ctx_bb(const struct intel_context *ce,
1058                       const struct intel_engine_cs *engine,
1059                       u32 *(*emit)(const struct intel_context *, u32 *))
1060 {
1061         u32 * const start = context_indirect_bb(ce);
1062         u32 *cs;
1063
1064         cs = emit(ce, start);
1065         GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
1066         while ((unsigned long)cs % CACHELINE_BYTES)
1067                 *cs++ = MI_NOOP;
1068
1069         lrc_setup_indirect_ctx(ce->lrc_reg_state, engine,
1070                                i915_ggtt_offset(ce->state) +
1071                                context_wa_bb_offset(ce),
1072                                (cs - start) * sizeof(*cs));
1073 }
1074
1075 /*
1076  * The context descriptor encodes various attributes of a context,
1077  * including its GTT address and some flags. Because it's fairly
1078  * expensive to calculate, we'll just do it once and cache the result,
1079  * which remains valid until the context is unpinned.
1080  *
1081  * This is what a descriptor looks like, from LSB to MSB::
1082  *
1083  *      bits  0-11:    flags, GEN8_CTX_* (cached in ctx->desc_template)
1084  *      bits 12-31:    LRCA, GTT address of (the HWSP of) this context
1085  *      bits 32-52:    ctx ID, a globally unique tag (highest bit used by GuC)
1086  *      bits 53-54:    mbz, reserved for use by hardware
1087  *      bits 55-63:    group ID, currently unused and set to 0
1088  *
1089  * Starting from Gen11, the upper dword of the descriptor has a new format:
1090  *
1091  *      bits 32-36:    reserved
1092  *      bits 37-47:    SW context ID
1093  *      bits 48:53:    engine instance
1094  *      bit 54:        mbz, reserved for use by hardware
1095  *      bits 55-60:    SW counter
1096  *      bits 61-63:    engine class
1097  *
1098  * engine info, SW context ID and SW counter need to form a unique number
1099  * (Context ID) per lrc.
1100  */
1101 static u32 lrc_descriptor(const struct intel_context *ce)
1102 {
1103         u32 desc;
1104
1105         desc = INTEL_LEGACY_32B_CONTEXT;
1106         if (i915_vm_is_4lvl(ce->vm))
1107                 desc = INTEL_LEGACY_64B_CONTEXT;
1108         desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
1109
1110         desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
1111         if (IS_GEN(ce->vm->i915, 8))
1112                 desc |= GEN8_CTX_L3LLC_COHERENT;
1113
1114         return i915_ggtt_offset(ce->state) | desc;
1115 }
1116
1117 u32 lrc_update_regs(const struct intel_context *ce,
1118                     const struct intel_engine_cs *engine,
1119                     u32 head)
1120 {
1121         struct intel_ring *ring = ce->ring;
1122         u32 *regs = ce->lrc_reg_state;
1123
1124         GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
1125         GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
1126
1127         regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1128         regs[CTX_RING_HEAD] = head;
1129         regs[CTX_RING_TAIL] = ring->tail;
1130         regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1131
1132         /* RPCS */
1133         if (engine->class == RENDER_CLASS) {
1134                 regs[CTX_R_PWR_CLK_STATE] =
1135                         intel_sseu_make_rpcs(engine->gt, &ce->sseu);
1136
1137                 i915_oa_init_reg_state(ce, engine);
1138         }
1139
1140         if (ce->wa_bb_page) {
1141                 u32 *(*fn)(const struct intel_context *ce, u32 *cs);
1142
1143                 fn = gen12_emit_indirect_ctx_xcs;
1144                 if (ce->engine->class == RENDER_CLASS)
1145                         fn = gen12_emit_indirect_ctx_rcs;
1146
1147                 /* Mutually exclusive wrt to global indirect bb */
1148                 GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
1149                 setup_indirect_ctx_bb(ce, engine, fn);
1150         }
1151
1152         return lrc_descriptor(ce) | CTX_DESC_FORCE_RESTORE;
1153 }
1154
1155 void lrc_update_offsets(struct intel_context *ce,
1156                         struct intel_engine_cs *engine)
1157 {
1158         set_offsets(ce->lrc_reg_state, reg_offsets(engine), engine, false);
1159 }
1160
1161 void lrc_check_regs(const struct intel_context *ce,
1162                     const struct intel_engine_cs *engine,
1163                     const char *when)
1164 {
1165         const struct intel_ring *ring = ce->ring;
1166         u32 *regs = ce->lrc_reg_state;
1167         bool valid = true;
1168         int x;
1169
1170         if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
1171                 pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
1172                        engine->name,
1173                        regs[CTX_RING_START],
1174                        i915_ggtt_offset(ring->vma));
1175                 regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1176                 valid = false;
1177         }
1178
1179         if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
1180             (RING_CTL_SIZE(ring->size) | RING_VALID)) {
1181                 pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
1182                        engine->name,
1183                        regs[CTX_RING_CTL],
1184                        (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
1185                 regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1186                 valid = false;
1187         }
1188
1189         x = lrc_ring_mi_mode(engine);
1190         if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
1191                 pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
1192                        engine->name, regs[x + 1]);
1193                 regs[x + 1] &= ~STOP_RING;
1194                 regs[x + 1] |= STOP_RING << 16;
1195                 valid = false;
1196         }
1197
1198         WARN_ONCE(!valid, "Invalid lrc state found %s submission\n", when);
1199 }
1200
1201 /*
1202  * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
1203  * PIPE_CONTROL instruction. This is required for the flush to happen correctly
1204  * but there is a slight complication as this is applied in WA batch where the
1205  * values are only initialized once so we cannot take register value at the
1206  * beginning and reuse it further; hence we save its value to memory, upload a
1207  * constant value with bit21 set and then we restore it back with the saved value.
1208  * To simplify the WA, a constant value is formed by using the default value
1209  * of this register. This shouldn't be a problem because we are only modifying
1210  * it for a short period and this batch in non-premptible. We can ofcourse
1211  * use additional instructions that read the actual value of the register
1212  * at that time and set our bit of interest but it makes the WA complicated.
1213  *
1214  * This WA is also required for Gen9 so extracting as a function avoids
1215  * code duplication.
1216  */
1217 static u32 *
1218 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
1219 {
1220         /* NB no one else is allowed to scribble over scratch + 256! */
1221         *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
1222         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1223         *batch++ = intel_gt_scratch_offset(engine->gt,
1224                                            INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
1225         *batch++ = 0;
1226
1227         *batch++ = MI_LOAD_REGISTER_IMM(1);
1228         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1229         *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
1230
1231         batch = gen8_emit_pipe_control(batch,
1232                                        PIPE_CONTROL_CS_STALL |
1233                                        PIPE_CONTROL_DC_FLUSH_ENABLE,
1234                                        0);
1235
1236         *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
1237         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1238         *batch++ = intel_gt_scratch_offset(engine->gt,
1239                                            INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
1240         *batch++ = 0;
1241
1242         return batch;
1243 }
1244
1245 /*
1246  * Typically we only have one indirect_ctx and per_ctx batch buffer which are
1247  * initialized at the beginning and shared across all contexts but this field
1248  * helps us to have multiple batches at different offsets and select them based
1249  * on a criteria. At the moment this batch always start at the beginning of the page
1250  * and at this point we don't have multiple wa_ctx batch buffers.
1251  *
1252  * The number of WA applied are not known at the beginning; we use this field
1253  * to return the no of DWORDS written.
1254  *
1255  * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
1256  * so it adds NOOPs as padding to make it cacheline aligned.
1257  * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
1258  * makes a complete batch buffer.
1259  */
1260 static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1261 {
1262         /* WaDisableCtxRestoreArbitration:bdw,chv */
1263         *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1264
1265         /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
1266         if (IS_BROADWELL(engine->i915))
1267                 batch = gen8_emit_flush_coherentl3_wa(engine, batch);
1268
1269         /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
1270         /* Actual scratch location is at 128 bytes offset */
1271         batch = gen8_emit_pipe_control(batch,
1272                                        PIPE_CONTROL_FLUSH_L3 |
1273                                        PIPE_CONTROL_STORE_DATA_INDEX |
1274                                        PIPE_CONTROL_CS_STALL |
1275                                        PIPE_CONTROL_QW_WRITE,
1276                                        LRC_PPHWSP_SCRATCH_ADDR);
1277
1278         *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1279
1280         /* Pad to end of cacheline */
1281         while ((unsigned long)batch % CACHELINE_BYTES)
1282                 *batch++ = MI_NOOP;
1283
1284         /*
1285          * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
1286          * execution depends on the length specified in terms of cache lines
1287          * in the register CTX_RCS_INDIRECT_CTX
1288          */
1289
1290         return batch;
1291 }
1292
1293 struct lri {
1294         i915_reg_t reg;
1295         u32 value;
1296 };
1297
1298 static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
1299 {
1300         GEM_BUG_ON(!count || count > 63);
1301
1302         *batch++ = MI_LOAD_REGISTER_IMM(count);
1303         do {
1304                 *batch++ = i915_mmio_reg_offset(lri->reg);
1305                 *batch++ = lri->value;
1306         } while (lri++, --count);
1307         *batch++ = MI_NOOP;
1308
1309         return batch;
1310 }
1311
1312 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1313 {
1314         static const struct lri lri[] = {
1315                 /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
1316                 {
1317                         COMMON_SLICE_CHICKEN2,
1318                         __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
1319                                        0),
1320                 },
1321
1322                 /* BSpec: 11391 */
1323                 {
1324                         FF_SLICE_CHICKEN,
1325                         __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
1326                                        FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
1327                 },
1328
1329                 /* BSpec: 11299 */
1330                 {
1331                         _3D_CHICKEN3,
1332                         __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
1333                                        _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
1334                 }
1335         };
1336
1337         *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1338
1339         /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
1340         batch = gen8_emit_flush_coherentl3_wa(engine, batch);
1341
1342         /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
1343         batch = gen8_emit_pipe_control(batch,
1344                                        PIPE_CONTROL_FLUSH_L3 |
1345                                        PIPE_CONTROL_STORE_DATA_INDEX |
1346                                        PIPE_CONTROL_CS_STALL |
1347                                        PIPE_CONTROL_QW_WRITE,
1348                                        LRC_PPHWSP_SCRATCH_ADDR);
1349
1350         batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
1351
1352         /* WaMediaPoolStateCmdInWABB:bxt,glk */
1353         if (HAS_POOLED_EU(engine->i915)) {
1354                 /*
1355                  * EU pool configuration is setup along with golden context
1356                  * during context initialization. This value depends on
1357                  * device type (2x6 or 3x6) and needs to be updated based
1358                  * on which subslice is disabled especially for 2x6
1359                  * devices, however it is safe to load default
1360                  * configuration of 3x6 device instead of masking off
1361                  * corresponding bits because HW ignores bits of a disabled
1362                  * subslice and drops down to appropriate config. Please
1363                  * see render_state_setup() in i915_gem_render_state.c for
1364                  * possible configurations, to avoid duplication they are
1365                  * not shown here again.
1366                  */
1367                 *batch++ = GEN9_MEDIA_POOL_STATE;
1368                 *batch++ = GEN9_MEDIA_POOL_ENABLE;
1369                 *batch++ = 0x00777000;
1370                 *batch++ = 0;
1371                 *batch++ = 0;
1372                 *batch++ = 0;
1373         }
1374
1375         *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1376
1377         /* Pad to end of cacheline */
1378         while ((unsigned long)batch % CACHELINE_BYTES)
1379                 *batch++ = MI_NOOP;
1380
1381         return batch;
1382 }
1383
1384 static u32 *
1385 gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1386 {
1387         int i;
1388
1389         /*
1390          * WaPipeControlBefore3DStateSamplePattern: cnl
1391          *
1392          * Ensure the engine is idle prior to programming a
1393          * 3DSTATE_SAMPLE_PATTERN during a context restore.
1394          */
1395         batch = gen8_emit_pipe_control(batch,
1396                                        PIPE_CONTROL_CS_STALL,
1397                                        0);
1398         /*
1399          * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
1400          * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
1401          * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
1402          * confusing. Since gen8_emit_pipe_control() already advances the
1403          * batch by 6 dwords, we advance the other 10 here, completing a
1404          * cacheline. It's not clear if the workaround requires this padding
1405          * before other commands, or if it's just the regular padding we would
1406          * already have for the workaround bb, so leave it here for now.
1407          */
1408         for (i = 0; i < 10; i++)
1409                 *batch++ = MI_NOOP;
1410
1411         /* Pad to end of cacheline */
1412         while ((unsigned long)batch % CACHELINE_BYTES)
1413                 *batch++ = MI_NOOP;
1414
1415         return batch;
1416 }
1417
1418 #define CTX_WA_BB_SIZE (PAGE_SIZE)
1419
1420 static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
1421 {
1422         struct drm_i915_gem_object *obj;
1423         struct i915_vma *vma;
1424         int err;
1425
1426         obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_SIZE);
1427         if (IS_ERR(obj))
1428                 return PTR_ERR(obj);
1429
1430         vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
1431         if (IS_ERR(vma)) {
1432                 err = PTR_ERR(vma);
1433                 goto err;
1434         }
1435
1436         err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
1437         if (err)
1438                 goto err;
1439
1440         engine->wa_ctx.vma = vma;
1441         return 0;
1442
1443 err:
1444         i915_gem_object_put(obj);
1445         return err;
1446 }
1447
1448 void lrc_fini_wa_ctx(struct intel_engine_cs *engine)
1449 {
1450         i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
1451
1452         /* Called on error unwind, clear all flags to prevent further use */
1453         memset(&engine->wa_ctx, 0, sizeof(engine->wa_ctx));
1454 }
1455
1456 typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
1457
1458 void lrc_init_wa_ctx(struct intel_engine_cs *engine)
1459 {
1460         struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
1461         struct i915_wa_ctx_bb *wa_bb[] = {
1462                 &wa_ctx->indirect_ctx, &wa_ctx->per_ctx
1463         };
1464         wa_bb_func_t wa_bb_fn[ARRAY_SIZE(wa_bb)];
1465         void *batch, *batch_ptr;
1466         unsigned int i;
1467         int err;
1468
1469         if (engine->class != RENDER_CLASS)
1470                 return;
1471
1472         switch (INTEL_GEN(engine->i915)) {
1473         case 12:
1474         case 11:
1475                 return;
1476         case 10:
1477                 wa_bb_fn[0] = gen10_init_indirectctx_bb;
1478                 wa_bb_fn[1] = NULL;
1479                 break;
1480         case 9:
1481                 wa_bb_fn[0] = gen9_init_indirectctx_bb;
1482                 wa_bb_fn[1] = NULL;
1483                 break;
1484         case 8:
1485                 wa_bb_fn[0] = gen8_init_indirectctx_bb;
1486                 wa_bb_fn[1] = NULL;
1487                 break;
1488         default:
1489                 MISSING_CASE(INTEL_GEN(engine->i915));
1490                 return;
1491         }
1492
1493         err = lrc_setup_wa_ctx(engine);
1494         if (err) {
1495                 /*
1496                  * We continue even if we fail to initialize WA batch
1497                  * because we only expect rare glitches but nothing
1498                  * critical to prevent us from using GPU
1499                  */
1500                 drm_err(&engine->i915->drm,
1501                         "Ignoring context switch w/a allocation error:%d\n",
1502                         err);
1503                 return;
1504         }
1505
1506         batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB);
1507
1508         /*
1509          * Emit the two workaround batch buffers, recording the offset from the
1510          * start of the workaround batch buffer object for each and their
1511          * respective sizes.
1512          */
1513         batch_ptr = batch;
1514         for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
1515                 wa_bb[i]->offset = batch_ptr - batch;
1516                 if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
1517                                                   CACHELINE_BYTES))) {
1518                         err = -EINVAL;
1519                         break;
1520                 }
1521                 if (wa_bb_fn[i])
1522                         batch_ptr = wa_bb_fn[i](engine, batch_ptr);
1523                 wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
1524         }
1525         GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_SIZE);
1526
1527         __i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch);
1528         __i915_gem_object_release_map(wa_ctx->vma->obj);
1529
1530         /* Verify that we can handle failure to setup the wa_ctx */
1531         if (err || i915_inject_probe_error(engine->i915, -ENODEV))
1532                 lrc_fini_wa_ctx(engine);
1533 }
1534
1535 static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
1536 {
1537 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1538         ce->runtime.num_underflow++;
1539         ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt);
1540 #endif
1541 }
1542
1543 void lrc_update_runtime(struct intel_context *ce)
1544 {
1545         u32 old;
1546         s32 dt;
1547
1548         if (intel_context_is_barrier(ce))
1549                 return;
1550
1551         old = ce->runtime.last;
1552         ce->runtime.last = lrc_get_runtime(ce);
1553         dt = ce->runtime.last - old;
1554
1555         if (unlikely(dt < 0)) {
1556                 CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
1557                          old, ce->runtime.last, dt);
1558                 st_update_runtime_underflow(ce, dt);
1559                 return;
1560         }
1561
1562         ewma_runtime_add(&ce->runtime.avg, dt);
1563         ce->runtime.total += dt;
1564 }
1565
1566 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1567 #include "selftest_lrc.c"
1568 #endif