drivers/gpu/drm/i915/gt/gen2_engine_cs.c

   1 // SPDX-License-Identifier: MIT
   2 /*
   3  * Copyright © 2020 Intel Corporation
   4  */
   5
   6 #include "gen2_engine_cs.h"
   7 #include "i915_drv.h"
   8 #include "intel_engine.h"
   9 #include "intel_gpu_commands.h"
  10 #include "intel_gt.h"
  11 #include "intel_gt_irq.h"
  12 #include "intel_ring.h"
  13
  14 int gen2_emit_flush(struct i915_request *rq, u32 mode)
  15 {
  16         unsigned int num_store_dw = 12;
  17         u32 cmd, *cs;
  18
  19         cmd = MI_FLUSH;
  20         if (mode & EMIT_INVALIDATE)
  21                 cmd |= MI_READ_FLUSH;
  22
  23         cs = intel_ring_begin(rq, 2 + 4 * num_store_dw);
  24         if (IS_ERR(cs))
  25                 return PTR_ERR(cs);
  26
  27         *cs++ = cmd;
  28         while (num_store_dw--) {
  29                 *cs++ = MI_STORE_DWORD_INDEX;
  30                 *cs++ = I915_GEM_HWS_SCRATCH * sizeof(u32);
  31                 *cs++ = 0;
  32                 *cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH;
  33         }
  34         *cs++ = cmd;
  35
  36         intel_ring_advance(rq, cs);
  37
  38         return 0;
  39 }
  40
  41 int gen4_emit_flush_rcs(struct i915_request *rq, u32 mode)
  42 {
  43         u32 cmd, *cs;
  44         int i;
  45
  46         /*
  47          * read/write caches:
  48          *
  49          * I915_GEM_DOMAIN_RENDER is always invalidated, but is
  50          * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
  51          * also flushed at 2d versus 3d pipeline switches.
  52          *
  53          * read-only caches:
  54          *
  55          * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
  56          * MI_READ_FLUSH is set, and is always flushed on 965.
  57          *
  58          * I915_GEM_DOMAIN_COMMAND may not exist?
  59          *
  60          * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
  61          * invalidated when MI_EXE_FLUSH is set.
  62          *
  63          * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
  64          * invalidated with every MI_FLUSH.
  65          *
  66          * TLBs:
  67          *
  68          * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
  69          * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
  70          * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
  71          * are flushed at any MI_FLUSH.
  72          */
  73
  74         cmd = MI_FLUSH;
  75         if (mode & EMIT_INVALIDATE) {
  76                 cmd |= MI_EXE_FLUSH;
  77                 if (IS_G4X(rq->engine->i915) || GRAPHICS_VER(rq->engine->i915) == 5)
  78                         cmd |= MI_INVALIDATE_ISP;
  79         }
  80
  81         i = 2;
  82         if (mode & EMIT_INVALIDATE)
  83                 i += 20;
  84
  85         cs = intel_ring_begin(rq, i);
  86         if (IS_ERR(cs))
  87                 return PTR_ERR(cs);
  88
  89         *cs++ = cmd;
  90
  91         /*
  92          * A random delay to let the CS invalidate take effect? Without this
  93          * delay, the GPU relocation path fails as the CS does not see
  94          * the updated contents. Just as important, if we apply the flushes
  95          * to the EMIT_FLUSH branch (i.e. immediately after the relocation
  96          * write and before the invalidate on the next batch), the relocations
  97          * still fail. This implies that is a delay following invalidation
  98          * that is required to reset the caches as opposed to a delay to
  99          * ensure the memory is written.
 100          */
 101         if (mode & EMIT_INVALIDATE) {
 102                 *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
 103                 *cs++ = intel_gt_scratch_offset(rq->engine->gt,
 104                                                 INTEL_GT_SCRATCH_FIELD_DEFAULT) |
 105                         PIPE_CONTROL_GLOBAL_GTT;
 106                 *cs++ = 0;
 107                 *cs++ = 0;
 108
 109                 for (i = 0; i < 12; i++)
 110                         *cs++ = MI_FLUSH;
 111
 112                 *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
 113                 *cs++ = intel_gt_scratch_offset(rq->engine->gt,
 114                                                 INTEL_GT_SCRATCH_FIELD_DEFAULT) |
 115                         PIPE_CONTROL_GLOBAL_GTT;
 116                 *cs++ = 0;
 117                 *cs++ = 0;
 118         }
 119
 120         *cs++ = cmd;
 121
 122         intel_ring_advance(rq, cs);
 123
 124         return 0;
 125 }
 126
 127 int gen4_emit_flush_vcs(struct i915_request *rq, u32 mode)
 128 {
 129         u32 *cs;
 130
 131         cs = intel_ring_begin(rq, 2);
 132         if (IS_ERR(cs))
 133                 return PTR_ERR(cs);
 134
 135         *cs++ = MI_FLUSH;
 136         *cs++ = MI_NOOP;
 137         intel_ring_advance(rq, cs);
 138
 139         return 0;
 140 }
 141
 142 static u32 *__gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs,
 143                                    int flush, int post)
 144 {
 145         GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
 146         GEM_BUG_ON(offset_in_page(rq->hwsp_seqno) != I915_GEM_HWS_SEQNO_ADDR);
 147
 148         *cs++ = MI_FLUSH;
 149
 150         while (flush--) {
 151                 *cs++ = MI_STORE_DWORD_INDEX;
 152                 *cs++ = I915_GEM_HWS_SCRATCH * sizeof(u32);
 153                 *cs++ = rq->fence.seqno;
 154         }
 155
 156         while (post--) {
 157                 *cs++ = MI_STORE_DWORD_INDEX;
 158                 *cs++ = I915_GEM_HWS_SEQNO_ADDR;
 159                 *cs++ = rq->fence.seqno;
 160         }
 161
 162         *cs++ = MI_USER_INTERRUPT;
 163
 164         rq->tail = intel_ring_offset(rq, cs);
 165         assert_ring_tail_valid(rq->ring, rq->tail);
 166
 167         return cs;
 168 }
 169
 170 u32 *gen3_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 171 {
 172         return __gen2_emit_breadcrumb(rq, cs, 16, 8);
 173 }
 174
 175 u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 176 {
 177         return __gen2_emit_breadcrumb(rq, cs, 8, 8);
 178 }
 179
 180 /* Just userspace ABI convention to limit the wa batch bo to a resonable size */
 181 #define I830_BATCH_LIMIT SZ_256K
 182 #define I830_TLB_ENTRIES (2)
 183 #define I830_WA_SIZE max(I830_TLB_ENTRIES * SZ_4K, I830_BATCH_LIMIT)
 184 int i830_emit_bb_start(struct i915_request *rq,
 185                        u64 offset, u32 len,
 186                        unsigned int dispatch_flags)
 187 {
 188         u32 *cs, cs_offset =
 189                 intel_gt_scratch_offset(rq->engine->gt,
 190                                         INTEL_GT_SCRATCH_FIELD_DEFAULT);
 191
 192         GEM_BUG_ON(rq->engine->gt->scratch->size < I830_WA_SIZE);
 193
 194         cs = intel_ring_begin(rq, 6);
 195         if (IS_ERR(cs))
 196                 return PTR_ERR(cs);
 197
 198         /* Evict the invalid PTE TLBs */
 199         *cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA;
 200         *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096;
 201         *cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */
 202         *cs++ = cs_offset;
 203         *cs++ = 0xdeadbeef;
 204         *cs++ = MI_NOOP;
 205         intel_ring_advance(rq, cs);
 206
 207         if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
 208                 if (len > I830_BATCH_LIMIT)
 209                         return -ENOSPC;
 210
 211                 cs = intel_ring_begin(rq, 6 + 2);
 212                 if (IS_ERR(cs))
 213                         return PTR_ERR(cs);
 214
 215                 /*
 216                  * Blit the batch (which has now all relocs applied) to the
 217                  * stable batch scratch bo area (so that the CS never
 218                  * stumbles over its tlb invalidation bug) ...
 219                  */
 220                 *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
 221                 *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096;
 222                 *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096;
 223                 *cs++ = cs_offset;
 224                 *cs++ = 4096;
 225                 *cs++ = offset;
 226
 227                 *cs++ = MI_FLUSH;
 228                 *cs++ = MI_NOOP;
 229                 intel_ring_advance(rq, cs);
 230
 231                 /* ... and execute it. */
 232                 offset = cs_offset;
 233         }
 234
 235         if (!(dispatch_flags & I915_DISPATCH_SECURE))
 236                 offset |= MI_BATCH_NON_SECURE;
 237
 238         cs = intel_ring_begin(rq, 2);
 239         if (IS_ERR(cs))
 240                 return PTR_ERR(cs);
 241
 242         *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
 243         *cs++ = offset;
 244         intel_ring_advance(rq, cs);
 245
 246         return 0;
 247 }
 248
 249 int gen3_emit_bb_start(struct i915_request *rq,
 250                        u64 offset, u32 len,
 251                        unsigned int dispatch_flags)
 252 {
 253         u32 *cs;
 254
 255         if (!(dispatch_flags & I915_DISPATCH_SECURE))
 256                 offset |= MI_BATCH_NON_SECURE;
 257
 258         cs = intel_ring_begin(rq, 2);
 259         if (IS_ERR(cs))
 260                 return PTR_ERR(cs);
 261
 262         *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
 263         *cs++ = offset;
 264         intel_ring_advance(rq, cs);
 265
 266         return 0;
 267 }
 268
 269 int gen4_emit_bb_start(struct i915_request *rq,
 270                        u64 offset, u32 length,
 271                        unsigned int dispatch_flags)
 272 {
 273         u32 security;
 274         u32 *cs;
 275
 276         security = MI_BATCH_NON_SECURE_I965;
 277         if (dispatch_flags & I915_DISPATCH_SECURE)
 278                 security = 0;
 279
 280         cs = intel_ring_begin(rq, 2);
 281         if (IS_ERR(cs))
 282                 return PTR_ERR(cs);
 283
 284         *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | security;
 285         *cs++ = offset;
 286         intel_ring_advance(rq, cs);
 287
 288         return 0;
 289 }
 290
 291 void gen2_irq_enable(struct intel_engine_cs *engine)
 292 {
 293         struct drm_i915_private *i915 = engine->i915;
 294
 295         i915->irq_mask &= ~engine->irq_enable_mask;
 296         intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
 297         ENGINE_POSTING_READ16(engine, RING_IMR);
 298 }
 299
 300 void gen2_irq_disable(struct intel_engine_cs *engine)
 301 {
 302         struct drm_i915_private *i915 = engine->i915;
 303
 304         i915->irq_mask |= engine->irq_enable_mask;
 305         intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
 306 }
 307
 308 void gen3_irq_enable(struct intel_engine_cs *engine)
 309 {
 310         engine->i915->irq_mask &= ~engine->irq_enable_mask;
 311         intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
 312         intel_uncore_posting_read_fw(engine->uncore, GEN2_IMR);
 313 }
 314
 315 void gen3_irq_disable(struct intel_engine_cs *engine)
 316 {
 317         engine->i915->irq_mask |= engine->irq_enable_mask;
 318         intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
 319 }
 320
 321 void gen5_irq_enable(struct intel_engine_cs *engine)
 322 {
 323         gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask);
 324 }
 325
 326 void gen5_irq_disable(struct intel_engine_cs *engine)
 327 {
 328         gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask);
 329 }