Merge branch 'for-5.15-verbose-console' into for-linus
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / gt / gen2_engine_cs.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2020 Intel Corporation
4  */
5
6 #include "gen2_engine_cs.h"
7 #include "i915_drv.h"
8 #include "intel_engine.h"
9 #include "intel_gpu_commands.h"
10 #include "intel_gt.h"
11 #include "intel_gt_irq.h"
12 #include "intel_ring.h"
13
14 int gen2_emit_flush(struct i915_request *rq, u32 mode)
15 {
16         unsigned int num_store_dw = 12;
17         u32 cmd, *cs;
18
19         cmd = MI_FLUSH;
20         if (mode & EMIT_INVALIDATE)
21                 cmd |= MI_READ_FLUSH;
22
23         cs = intel_ring_begin(rq, 2 + 4 * num_store_dw);
24         if (IS_ERR(cs))
25                 return PTR_ERR(cs);
26
27         *cs++ = cmd;
28         while (num_store_dw--) {
29                 *cs++ = MI_STORE_DWORD_INDEX;
30                 *cs++ = I915_GEM_HWS_SCRATCH * sizeof(u32);
31                 *cs++ = 0;
32                 *cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH;
33         }
34         *cs++ = cmd;
35
36         intel_ring_advance(rq, cs);
37
38         return 0;
39 }
40
41 int gen4_emit_flush_rcs(struct i915_request *rq, u32 mode)
42 {
43         u32 cmd, *cs;
44         int i;
45
46         /*
47          * read/write caches:
48          *
49          * I915_GEM_DOMAIN_RENDER is always invalidated, but is
50          * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
51          * also flushed at 2d versus 3d pipeline switches.
52          *
53          * read-only caches:
54          *
55          * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
56          * MI_READ_FLUSH is set, and is always flushed on 965.
57          *
58          * I915_GEM_DOMAIN_COMMAND may not exist?
59          *
60          * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
61          * invalidated when MI_EXE_FLUSH is set.
62          *
63          * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
64          * invalidated with every MI_FLUSH.
65          *
66          * TLBs:
67          *
68          * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
69          * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
70          * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
71          * are flushed at any MI_FLUSH.
72          */
73
74         cmd = MI_FLUSH;
75         if (mode & EMIT_INVALIDATE) {
76                 cmd |= MI_EXE_FLUSH;
77                 if (IS_G4X(rq->engine->i915) || GRAPHICS_VER(rq->engine->i915) == 5)
78                         cmd |= MI_INVALIDATE_ISP;
79         }
80
81         i = 2;
82         if (mode & EMIT_INVALIDATE)
83                 i += 20;
84
85         cs = intel_ring_begin(rq, i);
86         if (IS_ERR(cs))
87                 return PTR_ERR(cs);
88
89         *cs++ = cmd;
90
91         /*
92          * A random delay to let the CS invalidate take effect? Without this
93          * delay, the GPU relocation path fails as the CS does not see
94          * the updated contents. Just as important, if we apply the flushes
95          * to the EMIT_FLUSH branch (i.e. immediately after the relocation
96          * write and before the invalidate on the next batch), the relocations
97          * still fail. This implies that is a delay following invalidation
98          * that is required to reset the caches as opposed to a delay to
99          * ensure the memory is written.
100          */
101         if (mode & EMIT_INVALIDATE) {
102                 *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
103                 *cs++ = intel_gt_scratch_offset(rq->engine->gt,
104                                                 INTEL_GT_SCRATCH_FIELD_DEFAULT) |
105                         PIPE_CONTROL_GLOBAL_GTT;
106                 *cs++ = 0;
107                 *cs++ = 0;
108
109                 for (i = 0; i < 12; i++)
110                         *cs++ = MI_FLUSH;
111
112                 *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
113                 *cs++ = intel_gt_scratch_offset(rq->engine->gt,
114                                                 INTEL_GT_SCRATCH_FIELD_DEFAULT) |
115                         PIPE_CONTROL_GLOBAL_GTT;
116                 *cs++ = 0;
117                 *cs++ = 0;
118         }
119
120         *cs++ = cmd;
121
122         intel_ring_advance(rq, cs);
123
124         return 0;
125 }
126
127 int gen4_emit_flush_vcs(struct i915_request *rq, u32 mode)
128 {
129         u32 *cs;
130
131         cs = intel_ring_begin(rq, 2);
132         if (IS_ERR(cs))
133                 return PTR_ERR(cs);
134
135         *cs++ = MI_FLUSH;
136         *cs++ = MI_NOOP;
137         intel_ring_advance(rq, cs);
138
139         return 0;
140 }
141
142 static u32 *__gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs,
143                                    int flush, int post)
144 {
145         GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
146         GEM_BUG_ON(offset_in_page(rq->hwsp_seqno) != I915_GEM_HWS_SEQNO_ADDR);
147
148         *cs++ = MI_FLUSH;
149
150         while (flush--) {
151                 *cs++ = MI_STORE_DWORD_INDEX;
152                 *cs++ = I915_GEM_HWS_SCRATCH * sizeof(u32);
153                 *cs++ = rq->fence.seqno;
154         }
155
156         while (post--) {
157                 *cs++ = MI_STORE_DWORD_INDEX;
158                 *cs++ = I915_GEM_HWS_SEQNO_ADDR;
159                 *cs++ = rq->fence.seqno;
160         }
161
162         *cs++ = MI_USER_INTERRUPT;
163
164         rq->tail = intel_ring_offset(rq, cs);
165         assert_ring_tail_valid(rq->ring, rq->tail);
166
167         return cs;
168 }
169
170 u32 *gen3_emit_breadcrumb(struct i915_request *rq, u32 *cs)
171 {
172         return __gen2_emit_breadcrumb(rq, cs, 16, 8);
173 }
174
175 u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
176 {
177         return __gen2_emit_breadcrumb(rq, cs, 8, 8);
178 }
179
180 /* Just userspace ABI convention to limit the wa batch bo to a resonable size */
181 #define I830_BATCH_LIMIT SZ_256K
182 #define I830_TLB_ENTRIES (2)
183 #define I830_WA_SIZE max(I830_TLB_ENTRIES * SZ_4K, I830_BATCH_LIMIT)
184 int i830_emit_bb_start(struct i915_request *rq,
185                        u64 offset, u32 len,
186                        unsigned int dispatch_flags)
187 {
188         u32 *cs, cs_offset =
189                 intel_gt_scratch_offset(rq->engine->gt,
190                                         INTEL_GT_SCRATCH_FIELD_DEFAULT);
191
192         GEM_BUG_ON(rq->engine->gt->scratch->size < I830_WA_SIZE);
193
194         cs = intel_ring_begin(rq, 6);
195         if (IS_ERR(cs))
196                 return PTR_ERR(cs);
197
198         /* Evict the invalid PTE TLBs */
199         *cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA;
200         *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096;
201         *cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */
202         *cs++ = cs_offset;
203         *cs++ = 0xdeadbeef;
204         *cs++ = MI_NOOP;
205         intel_ring_advance(rq, cs);
206
207         if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
208                 if (len > I830_BATCH_LIMIT)
209                         return -ENOSPC;
210
211                 cs = intel_ring_begin(rq, 6 + 2);
212                 if (IS_ERR(cs))
213                         return PTR_ERR(cs);
214
215                 /*
216                  * Blit the batch (which has now all relocs applied) to the
217                  * stable batch scratch bo area (so that the CS never
218                  * stumbles over its tlb invalidation bug) ...
219                  */
220                 *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
221                 *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096;
222                 *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096;
223                 *cs++ = cs_offset;
224                 *cs++ = 4096;
225                 *cs++ = offset;
226
227                 *cs++ = MI_FLUSH;
228                 *cs++ = MI_NOOP;
229                 intel_ring_advance(rq, cs);
230
231                 /* ... and execute it. */
232                 offset = cs_offset;
233         }
234
235         if (!(dispatch_flags & I915_DISPATCH_SECURE))
236                 offset |= MI_BATCH_NON_SECURE;
237
238         cs = intel_ring_begin(rq, 2);
239         if (IS_ERR(cs))
240                 return PTR_ERR(cs);
241
242         *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
243         *cs++ = offset;
244         intel_ring_advance(rq, cs);
245
246         return 0;
247 }
248
249 int gen3_emit_bb_start(struct i915_request *rq,
250                        u64 offset, u32 len,
251                        unsigned int dispatch_flags)
252 {
253         u32 *cs;
254
255         if (!(dispatch_flags & I915_DISPATCH_SECURE))
256                 offset |= MI_BATCH_NON_SECURE;
257
258         cs = intel_ring_begin(rq, 2);
259         if (IS_ERR(cs))
260                 return PTR_ERR(cs);
261
262         *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
263         *cs++ = offset;
264         intel_ring_advance(rq, cs);
265
266         return 0;
267 }
268
269 int gen4_emit_bb_start(struct i915_request *rq,
270                        u64 offset, u32 length,
271                        unsigned int dispatch_flags)
272 {
273         u32 security;
274         u32 *cs;
275
276         security = MI_BATCH_NON_SECURE_I965;
277         if (dispatch_flags & I915_DISPATCH_SECURE)
278                 security = 0;
279
280         cs = intel_ring_begin(rq, 2);
281         if (IS_ERR(cs))
282                 return PTR_ERR(cs);
283
284         *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | security;
285         *cs++ = offset;
286         intel_ring_advance(rq, cs);
287
288         return 0;
289 }
290
291 void gen2_irq_enable(struct intel_engine_cs *engine)
292 {
293         struct drm_i915_private *i915 = engine->i915;
294
295         i915->irq_mask &= ~engine->irq_enable_mask;
296         intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
297         ENGINE_POSTING_READ16(engine, RING_IMR);
298 }
299
300 void gen2_irq_disable(struct intel_engine_cs *engine)
301 {
302         struct drm_i915_private *i915 = engine->i915;
303
304         i915->irq_mask |= engine->irq_enable_mask;
305         intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
306 }
307
308 void gen3_irq_enable(struct intel_engine_cs *engine)
309 {
310         engine->i915->irq_mask &= ~engine->irq_enable_mask;
311         intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
312         intel_uncore_posting_read_fw(engine->uncore, GEN2_IMR);
313 }
314
315 void gen3_irq_disable(struct intel_engine_cs *engine)
316 {
317         engine->i915->irq_mask |= engine->irq_enable_mask;
318         intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
319 }
320
321 void gen5_irq_enable(struct intel_engine_cs *engine)
322 {
323         gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask);
324 }
325
326 void gen5_irq_disable(struct intel_engine_cs *engine)
327 {
328         gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask);
329 }