drivers/gpu/drm/i915/gt/intel_lrc.c

   1 /*
   2  * Copyright © 2014 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Ben Widawsky <ben@bwidawsk.net>
  25  *    Michel Thierry <michel.thierry@intel.com>
  26  *    Thomas Daniel <thomas.daniel@intel.com>
  27  *    Oscar Mateo <oscar.mateo@intel.com>
  28  *
  29  */
  30
  31 /**
  32  * DOC: Logical Rings, Logical Ring Contexts and Execlists
  33  *
  34  * Motivation:
  35  * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
  36  * These expanded contexts enable a number of new abilities, especially
  37  * "Execlists" (also implemented in this file).
  38  *
  39  * One of the main differences with the legacy HW contexts is that logical
  40  * ring contexts incorporate many more things to the context's state, like
  41  * PDPs or ringbuffer control registers:
  42  *
  43  * The reason why PDPs are included in the context is straightforward: as
  44  * PPGTTs (per-process GTTs) are actually per-context, having the PDPs
  45  * contained there mean you don't need to do a ppgtt->switch_mm yourself,
  46  * instead, the GPU will do it for you on the context switch.
  47  *
  48  * But, what about the ringbuffer control registers (head, tail, etc..)?
  49  * shouldn't we just need a set of those per engine command streamer? This is
  50  * where the name "Logical Rings" starts to make sense: by virtualizing the
  51  * rings, the engine cs shifts to a new "ring buffer" with every context
  52  * switch. When you want to submit a workload to the GPU you: A) choose your
  53  * context, B) find its appropriate virtualized ring, C) write commands to it
  54  * and then, finally, D) tell the GPU to switch to that context.
  55  *
  56  * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
  57  * to a contexts is via a context execution list, ergo "Execlists".
  58  *
  59  * LRC implementation:
  60  * Regarding the creation of contexts, we have:
  61  *
  62  * - One global default context.
  63  * - One local default context for each opened fd.
  64  * - One local extra context for each context create ioctl call.
  65  *
  66  * Now that ringbuffers belong per-context (and not per-engine, like before)
  67  * and that contexts are uniquely tied to a given engine (and not reusable,
  68  * like before) we need:
  69  *
  70  * - One ringbuffer per-engine inside each context.
  71  * - One backing object per-engine inside each context.
  72  *
  73  * The global default context starts its life with these new objects fully
  74  * allocated and populated. The local default context for each opened fd is
  75  * more complex, because we don't know at creation time which engine is going
  76  * to use them. To handle this, we have implemented a deferred creation of LR
  77  * contexts:
  78  *
  79  * The local context starts its life as a hollow or blank holder, that only
  80  * gets populated for a given engine once we receive an execbuffer. If later
  81  * on we receive another execbuffer ioctl for the same context but a different
  82  * engine, we allocate/populate a new ringbuffer and context backing object and
  83  * so on.
  84  *
  85  * Finally, regarding local contexts created using the ioctl call: as they are
  86  * only allowed with the render ring, we can allocate & populate them right
  87  * away (no need to defer anything, at least for now).
  88  *
  89  * Execlists implementation:
  90  * Execlists are the new method by which, on gen8+ hardware, workloads are
  91  * submitted for execution (as opposed to the legacy, ringbuffer-based, method).
  92  * This method works as follows:
  93  *
  94  * When a request is committed, its commands (the BB start and any leading or
  95  * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
  96  * for the appropriate context. The tail pointer in the hardware context is not
  97  * updated at this time, but instead, kept by the driver in the ringbuffer
  98  * structure. A structure representing this request is added to a request queue
  99  * for the appropriate engine: this structure contains a copy of the context's
 100  * tail after the request was written to the ring buffer and a pointer to the
 101  * context itself.
 102  *
 103  * If the engine's request queue was empty before the request was added, the
 104  * queue is processed immediately. Otherwise the queue will be processed during
 105  * a context switch interrupt. In any case, elements on the queue will get sent
 106  * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
 107  * globally unique 20-bits submission ID.
 108  *
 109  * When execution of a request completes, the GPU updates the context status
 110  * buffer with a context complete event and generates a context switch interrupt.
 111  * During the interrupt handling, the driver examines the events in the buffer:
 112  * for each context complete event, if the announced ID matches that on the head
 113  * of the request queue, then that request is retired and removed from the queue.
 114  *
 115  * After processing, if any requests were retired and the queue is not empty
 116  * then a new execution list can be submitted. The two requests at the front of
 117  * the queue are next to be submitted but since a context may not occur twice in
 118  * an execution list, if subsequent requests have the same ID as the first then
 119  * the two requests must be combined. This is done simply by discarding requests
 120  * at the head of the queue until either only one requests is left (in which case
 121  * we use a NULL second context) or the first two requests have unique IDs.
 122  *
 123  * By always executing the first two requests in the queue the driver ensures
 124  * that the GPU is kept as busy as possible. In the case where a single context
 125  * completes but a second context is still executing, the request for this second
 126  * context will be at the head of the queue when we remove the first one. This
 127  * request will then be resubmitted along with a new request for a different context,
 128  * which will cause the hardware to continue executing the second request and queue
 129  * the new request (the GPU detects the condition of a context getting preempted
 130  * with the same context and optimizes the context switch flow by not doing
 131  * preemption, but just sampling the new tail pointer).
 132  *
 133  */
 134 #include <linux/interrupt.h>
 135
 136 #include "i915_drv.h"
 137 #include "i915_perf.h"
 138 #include "i915_trace.h"
 139 #include "i915_vgpu.h"
 140 #include "intel_context.h"
 141 #include "intel_engine_pm.h"
 142 #include "intel_gt.h"
 143 #include "intel_gt_pm.h"
 144 #include "intel_gt_requests.h"
 145 #include "intel_lrc_reg.h"
 146 #include "intel_mocs.h"
 147 #include "intel_reset.h"
 148 #include "intel_ring.h"
 149 #include "intel_workarounds.h"
 150 #include "shmem_utils.h"
 151
 152 #define RING_EXECLIST_QFULL             (1 << 0x2)
 153 #define RING_EXECLIST1_VALID            (1 << 0x3)
 154 #define RING_EXECLIST0_VALID            (1 << 0x4)
 155 #define RING_EXECLIST_ACTIVE_STATUS     (3 << 0xE)
 156 #define RING_EXECLIST1_ACTIVE           (1 << 0x11)
 157 #define RING_EXECLIST0_ACTIVE           (1 << 0x12)
 158
 159 #define GEN8_CTX_STATUS_IDLE_ACTIVE     (1 << 0)
 160 #define GEN8_CTX_STATUS_PREEMPTED       (1 << 1)
 161 #define GEN8_CTX_STATUS_ELEMENT_SWITCH  (1 << 2)
 162 #define GEN8_CTX_STATUS_ACTIVE_IDLE     (1 << 3)
 163 #define GEN8_CTX_STATUS_COMPLETE        (1 << 4)
 164 #define GEN8_CTX_STATUS_LITE_RESTORE    (1 << 15)
 165
 166 #define GEN8_CTX_STATUS_COMPLETED_MASK \
 167          (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
 168
 169 #define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
 170
 171 #define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE  (0x1) /* lower csb dword */
 172 #define GEN12_CTX_SWITCH_DETAIL(csb_dw) ((csb_dw) & 0xF) /* upper csb dword */
 173 #define GEN12_CSB_SW_CTX_ID_MASK                GENMASK(25, 15)
 174 #define GEN12_IDLE_CTX_ID               0x7FF
 175 #define GEN12_CSB_CTX_VALID(csb_dw) \
 176         (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID)
 177
 178 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
 179 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
 180
 181 struct virtual_engine {
 182         struct intel_engine_cs base;
 183         struct intel_context context;
 184
 185         /*
 186          * We allow only a single request through the virtual engine at a time
 187          * (each request in the timeline waits for the completion fence of
 188          * the previous before being submitted). By restricting ourselves to
 189          * only submitting a single request, each request is placed on to a
 190          * physical to maximise load spreading (by virtue of the late greedy
 191          * scheduling -- each real engine takes the next available request
 192          * upon idling).
 193          */
 194         struct i915_request *request;
 195
 196         /*
 197          * We keep a rbtree of available virtual engines inside each physical
 198          * engine, sorted by priority. Here we preallocate the nodes we need
 199          * for the virtual engine, indexed by physical_engine->id.
 200          */
 201         struct ve_node {
 202                 struct rb_node rb;
 203                 int prio;
 204         } nodes[I915_NUM_ENGINES];
 205
 206         /*
 207          * Keep track of bonded pairs -- restrictions upon on our selection
 208          * of physical engines any particular request may be submitted to.
 209          * If we receive a submit-fence from a master engine, we will only
 210          * use one of sibling_mask physical engines.
 211          */
 212         struct ve_bond {
 213                 const struct intel_engine_cs *master;
 214                 intel_engine_mask_t sibling_mask;
 215         } *bonds;
 216         unsigned int num_bonds;
 217
 218         /* And finally, which physical engines this virtual engine maps onto. */
 219         unsigned int num_siblings;
 220         struct intel_engine_cs *siblings[];
 221 };
 222
 223 static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
 224 {
 225         GEM_BUG_ON(!intel_engine_is_virtual(engine));
 226         return container_of(engine, struct virtual_engine, base);
 227 }
 228
 229 static int __execlists_context_alloc(struct intel_context *ce,
 230                                      struct intel_engine_cs *engine);
 231
 232 static void execlists_init_reg_state(u32 *reg_state,
 233                                      const struct intel_context *ce,
 234                                      const struct intel_engine_cs *engine,
 235                                      const struct intel_ring *ring,
 236                                      bool close);
 237 static void
 238 __execlists_update_reg_state(const struct intel_context *ce,
 239                              const struct intel_engine_cs *engine,
 240                              u32 head);
 241
 242 static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
 243 {
 244         if (INTEL_GEN(engine->i915) >= 12)
 245                 return 0x60;
 246         else if (INTEL_GEN(engine->i915) >= 9)
 247                 return 0x54;
 248         else if (engine->class == RENDER_CLASS)
 249                 return 0x58;
 250         else
 251                 return -1;
 252 }
 253
 254 static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
 255 {
 256         if (INTEL_GEN(engine->i915) >= 12)
 257                 return 0x74;
 258         else if (INTEL_GEN(engine->i915) >= 9)
 259                 return 0x68;
 260         else if (engine->class == RENDER_CLASS)
 261                 return 0xd8;
 262         else
 263                 return -1;
 264 }
 265
 266 static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine)
 267 {
 268         if (INTEL_GEN(engine->i915) >= 12)
 269                 return 0x12;
 270         else if (INTEL_GEN(engine->i915) >= 9 || engine->class == RENDER_CLASS)
 271                 return 0x18;
 272         else
 273                 return -1;
 274 }
 275
 276 static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine)
 277 {
 278         int x;
 279
 280         x = lrc_ring_wa_bb_per_ctx(engine);
 281         if (x < 0)
 282                 return x;
 283
 284         return x + 2;
 285 }
 286
 287 static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
 288 {
 289         int x;
 290
 291         x = lrc_ring_indirect_ptr(engine);
 292         if (x < 0)
 293                 return x;
 294
 295         return x + 2;
 296 }
 297
 298 static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
 299 {
 300         if (engine->class != RENDER_CLASS)
 301                 return -1;
 302
 303         if (INTEL_GEN(engine->i915) >= 12)
 304                 return 0xb6;
 305         else if (INTEL_GEN(engine->i915) >= 11)
 306                 return 0xaa;
 307         else
 308                 return -1;
 309 }
 310
 311 static u32
 312 lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
 313 {
 314         switch (INTEL_GEN(engine->i915)) {
 315         default:
 316                 MISSING_CASE(INTEL_GEN(engine->i915));
 317                 fallthrough;
 318         case 12:
 319                 return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 320         case 11:
 321                 return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 322         case 10:
 323                 return GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 324         case 9:
 325                 return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 326         case 8:
 327                 return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 328         }
 329 }
 330
 331 static void
 332 lrc_ring_setup_indirect_ctx(u32 *regs,
 333                             const struct intel_engine_cs *engine,
 334                             u32 ctx_bb_ggtt_addr,
 335                             u32 size)
 336 {
 337         GEM_BUG_ON(!size);
 338         GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES));
 339         GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1);
 340         regs[lrc_ring_indirect_ptr(engine) + 1] =
 341                 ctx_bb_ggtt_addr | (size / CACHELINE_BYTES);
 342
 343         GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1);
 344         regs[lrc_ring_indirect_offset(engine) + 1] =
 345                 lrc_ring_indirect_offset_default(engine) << 6;
 346 }
 347
 348 static u32 intel_context_get_runtime(const struct intel_context *ce)
 349 {
 350         /*
 351          * We can use either ppHWSP[16] which is recorded before the context
 352          * switch (and so excludes the cost of context switches) or use the
 353          * value from the context image itself, which is saved/restored earlier
 354          * and so includes the cost of the save.
 355          */
 356         return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
 357 }
 358
 359 static void mark_eio(struct i915_request *rq)
 360 {
 361         if (i915_request_completed(rq))
 362                 return;
 363
 364         GEM_BUG_ON(i915_request_signaled(rq));
 365
 366         i915_request_set_error_once(rq, -EIO);
 367         i915_request_mark_complete(rq);
 368 }
 369
 370 static struct i915_request *
 371 active_request(const struct intel_timeline * const tl, struct i915_request *rq)
 372 {
 373         struct i915_request *active = rq;
 374
 375         rcu_read_lock();
 376         list_for_each_entry_continue_reverse(rq, &tl->requests, link) {
 377                 if (i915_request_completed(rq))
 378                         break;
 379
 380                 active = rq;
 381         }
 382         rcu_read_unlock();
 383
 384         return active;
 385 }
 386
 387 static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
 388 {
 389         return (i915_ggtt_offset(engine->status_page.vma) +
 390                 I915_GEM_HWS_PREEMPT_ADDR);
 391 }
 392
 393 static inline void
 394 ring_set_paused(const struct intel_engine_cs *engine, int state)
 395 {
 396         /*
 397          * We inspect HWS_PREEMPT with a semaphore inside
 398          * engine->emit_fini_breadcrumb. If the dword is true,
 399          * the ring is paused as the semaphore will busywait
 400          * until the dword is false.
 401          */
 402         engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state;
 403         if (state)
 404                 wmb();
 405 }
 406
 407 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 408 {
 409         return rb_entry(rb, struct i915_priolist, node);
 410 }
 411
 412 static inline int rq_prio(const struct i915_request *rq)
 413 {
 414         return READ_ONCE(rq->sched.attr.priority);
 415 }
 416
 417 static int effective_prio(const struct i915_request *rq)
 418 {
 419         int prio = rq_prio(rq);
 420
 421         /*
 422          * If this request is special and must not be interrupted at any
 423          * cost, so be it. Note we are only checking the most recent request
 424          * in the context and so may be masking an earlier vip request. It
 425          * is hoped that under the conditions where nopreempt is used, this
 426          * will not matter (i.e. all requests to that context will be
 427          * nopreempt for as long as desired).
 428          */
 429         if (i915_request_has_nopreempt(rq))
 430                 prio = I915_PRIORITY_UNPREEMPTABLE;
 431
 432         return prio;
 433 }
 434
 435 static int queue_prio(const struct intel_engine_execlists *execlists)
 436 {
 437         struct i915_priolist *p;
 438         struct rb_node *rb;
 439
 440         rb = rb_first_cached(&execlists->queue);
 441         if (!rb)
 442                 return INT_MIN;
 443
 444         /*
 445          * As the priolist[] are inverted, with the highest priority in [0],
 446          * we have to flip the index value to become priority.
 447          */
 448         p = to_priolist(rb);
 449         if (!I915_USER_PRIORITY_SHIFT)
 450                 return p->priority;
 451
 452         return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used);
 453 }
 454
 455 static inline bool need_preempt(const struct intel_engine_cs *engine,
 456                                 const struct i915_request *rq,
 457                                 struct rb_node *rb)
 458 {
 459         int last_prio;
 460
 461         if (!intel_engine_has_semaphores(engine))
 462                 return false;
 463
 464         /*
 465          * Check if the current priority hint merits a preemption attempt.
 466          *
 467          * We record the highest value priority we saw during rescheduling
 468          * prior to this dequeue, therefore we know that if it is strictly
 469          * less than the current tail of ESLP[0], we do not need to force
 470          * a preempt-to-idle cycle.
 471          *
 472          * However, the priority hint is a mere hint that we may need to
 473          * preempt. If that hint is stale or we may be trying to preempt
 474          * ourselves, ignore the request.
 475          *
 476          * More naturally we would write
 477          *      prio >= max(0, last);
 478          * except that we wish to prevent triggering preemption at the same
 479          * priority level: the task that is running should remain running
 480          * to preserve FIFO ordering of dependencies.
 481          */
 482         last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1);
 483         if (engine->execlists.queue_priority_hint <= last_prio)
 484                 return false;
 485
 486         /*
 487          * Check against the first request in ELSP[1], it will, thanks to the
 488          * power of PI, be the highest priority of that context.
 489          */
 490         if (!list_is_last(&rq->sched.link, &engine->active.requests) &&
 491             rq_prio(list_next_entry(rq, sched.link)) > last_prio)
 492                 return true;
 493
 494         if (rb) {
 495                 struct virtual_engine *ve =
 496                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
 497                 bool preempt = false;
 498
 499                 if (engine == ve->siblings[0]) { /* only preempt one sibling */
 500                         struct i915_request *next;
 501
 502                         rcu_read_lock();
 503                         next = READ_ONCE(ve->request);
 504                         if (next)
 505                                 preempt = rq_prio(next) > last_prio;
 506                         rcu_read_unlock();
 507                 }
 508
 509                 if (preempt)
 510                         return preempt;
 511         }
 512
 513         /*
 514          * If the inflight context did not trigger the preemption, then maybe
 515          * it was the set of queued requests? Pick the highest priority in
 516          * the queue (the first active priolist) and see if it deserves to be
 517          * running instead of ELSP[0].
 518          *
 519          * The highest priority request in the queue can not be either
 520          * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
 521          * context, it's priority would not exceed ELSP[0] aka last_prio.
 522          */
 523         return queue_prio(&engine->execlists) > last_prio;
 524 }
 525
 526 __maybe_unused static inline bool
 527 assert_priority_queue(const struct i915_request *prev,
 528                       const struct i915_request *next)
 529 {
 530         /*
 531          * Without preemption, the prev may refer to the still active element
 532          * which we refuse to let go.
 533          *
 534          * Even with preemption, there are times when we think it is better not
 535          * to preempt and leave an ostensibly lower priority request in flight.
 536          */
 537         if (i915_request_is_active(prev))
 538                 return true;
 539
 540         return rq_prio(prev) >= rq_prio(next);
 541 }
 542
 543 /*
 544  * The context descriptor encodes various attributes of a context,
 545  * including its GTT address and some flags. Because it's fairly
 546  * expensive to calculate, we'll just do it once and cache the result,
 547  * which remains valid until the context is unpinned.
 548  *
 549  * This is what a descriptor looks like, from LSB to MSB::
 550  *
 551  *      bits  0-11:    flags, GEN8_CTX_* (cached in ctx->desc_template)
 552  *      bits 12-31:    LRCA, GTT address of (the HWSP of) this context
 553  *      bits 32-52:    ctx ID, a globally unique tag (highest bit used by GuC)
 554  *      bits 53-54:    mbz, reserved for use by hardware
 555  *      bits 55-63:    group ID, currently unused and set to 0
 556  *
 557  * Starting from Gen11, the upper dword of the descriptor has a new format:
 558  *
 559  *      bits 32-36:    reserved
 560  *      bits 37-47:    SW context ID
 561  *      bits 48:53:    engine instance
 562  *      bit 54:        mbz, reserved for use by hardware
 563  *      bits 55-60:    SW counter
 564  *      bits 61-63:    engine class
 565  *
 566  * engine info, SW context ID and SW counter need to form a unique number
 567  * (Context ID) per lrc.
 568  */
 569 static u32
 570 lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
 571 {
 572         u32 desc;
 573
 574         desc = INTEL_LEGACY_32B_CONTEXT;
 575         if (i915_vm_is_4lvl(ce->vm))
 576                 desc = INTEL_LEGACY_64B_CONTEXT;
 577         desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
 578
 579         desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
 580         if (IS_GEN(engine->i915, 8))
 581                 desc |= GEN8_CTX_L3LLC_COHERENT;
 582
 583         return i915_ggtt_offset(ce->state) | desc;
 584 }
 585
 586 static inline unsigned int dword_in_page(void *addr)
 587 {
 588         return offset_in_page(addr) / sizeof(u32);
 589 }
 590
 591 static void set_offsets(u32 *regs,
 592                         const u8 *data,
 593                         const struct intel_engine_cs *engine,
 594                         bool clear)
 595 #define NOP(x) (BIT(7) | (x))
 596 #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
 597 #define POSTED BIT(0)
 598 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
 599 #define REG16(x) \
 600         (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
 601         (((x) >> 2) & 0x7f)
 602 #define END(total_state_size) 0, (total_state_size)
 603 {
 604         const u32 base = engine->mmio_base;
 605
 606         while (*data) {
 607                 u8 count, flags;
 608
 609                 if (*data & BIT(7)) { /* skip */
 610                         count = *data++ & ~BIT(7);
 611                         if (clear)
 612                                 memset32(regs, MI_NOOP, count);
 613                         regs += count;
 614                         continue;
 615                 }
 616
 617                 count = *data & 0x3f;
 618                 flags = *data >> 6;
 619                 data++;
 620
 621                 *regs = MI_LOAD_REGISTER_IMM(count);
 622                 if (flags & POSTED)
 623                         *regs |= MI_LRI_FORCE_POSTED;
 624                 if (INTEL_GEN(engine->i915) >= 11)
 625                         *regs |= MI_LRI_LRM_CS_MMIO;
 626                 regs++;
 627
 628                 GEM_BUG_ON(!count);
 629                 do {
 630                         u32 offset = 0;
 631                         u8 v;
 632
 633                         do {
 634                                 v = *data++;
 635                                 offset <<= 7;
 636                                 offset |= v & ~BIT(7);
 637                         } while (v & BIT(7));
 638
 639                         regs[0] = base + (offset << 2);
 640                         if (clear)
 641                                 regs[1] = 0;
 642                         regs += 2;
 643                 } while (--count);
 644         }
 645
 646         if (clear) {
 647                 u8 count = *++data;
 648
 649                 /* Clear past the tail for HW access */
 650                 GEM_BUG_ON(dword_in_page(regs) > count);
 651                 memset32(regs, MI_NOOP, count - dword_in_page(regs));
 652
 653                 /* Close the batch; used mainly by live_lrc_layout() */
 654                 *regs = MI_BATCH_BUFFER_END;
 655                 if (INTEL_GEN(engine->i915) >= 10)
 656                         *regs |= BIT(0);
 657         }
 658 }
 659
 660 static const u8 gen8_xcs_offsets[] = {
 661         NOP(1),
 662         LRI(11, 0),
 663         REG16(0x244),
 664         REG(0x034),
 665         REG(0x030),
 666         REG(0x038),
 667         REG(0x03c),
 668         REG(0x168),
 669         REG(0x140),
 670         REG(0x110),
 671         REG(0x11c),
 672         REG(0x114),
 673         REG(0x118),
 674
 675         NOP(9),
 676         LRI(9, 0),
 677         REG16(0x3a8),
 678         REG16(0x28c),
 679         REG16(0x288),
 680         REG16(0x284),
 681         REG16(0x280),
 682         REG16(0x27c),
 683         REG16(0x278),
 684         REG16(0x274),
 685         REG16(0x270),
 686
 687         NOP(13),
 688         LRI(2, 0),
 689         REG16(0x200),
 690         REG(0x028),
 691
 692         END(80)
 693 };
 694
 695 static const u8 gen9_xcs_offsets[] = {
 696         NOP(1),
 697         LRI(14, POSTED),
 698         REG16(0x244),
 699         REG(0x034),
 700         REG(0x030),
 701         REG(0x038),
 702         REG(0x03c),
 703         REG(0x168),
 704         REG(0x140),
 705         REG(0x110),
 706         REG(0x11c),
 707         REG(0x114),
 708         REG(0x118),
 709         REG(0x1c0),
 710         REG(0x1c4),
 711         REG(0x1c8),
 712
 713         NOP(3),
 714         LRI(9, POSTED),
 715         REG16(0x3a8),
 716         REG16(0x28c),
 717         REG16(0x288),
 718         REG16(0x284),
 719         REG16(0x280),
 720         REG16(0x27c),
 721         REG16(0x278),
 722         REG16(0x274),
 723         REG16(0x270),
 724
 725         NOP(13),
 726         LRI(1, POSTED),
 727         REG16(0x200),
 728
 729         NOP(13),
 730         LRI(44, POSTED),
 731         REG(0x028),
 732         REG(0x09c),
 733         REG(0x0c0),
 734         REG(0x178),
 735         REG(0x17c),
 736         REG16(0x358),
 737         REG(0x170),
 738         REG(0x150),
 739         REG(0x154),
 740         REG(0x158),
 741         REG16(0x41c),
 742         REG16(0x600),
 743         REG16(0x604),
 744         REG16(0x608),
 745         REG16(0x60c),
 746         REG16(0x610),
 747         REG16(0x614),
 748         REG16(0x618),
 749         REG16(0x61c),
 750         REG16(0x620),
 751         REG16(0x624),
 752         REG16(0x628),
 753         REG16(0x62c),
 754         REG16(0x630),
 755         REG16(0x634),
 756         REG16(0x638),
 757         REG16(0x63c),
 758         REG16(0x640),
 759         REG16(0x644),
 760         REG16(0x648),
 761         REG16(0x64c),
 762         REG16(0x650),
 763         REG16(0x654),
 764         REG16(0x658),
 765         REG16(0x65c),
 766         REG16(0x660),
 767         REG16(0x664),
 768         REG16(0x668),
 769         REG16(0x66c),
 770         REG16(0x670),
 771         REG16(0x674),
 772         REG16(0x678),
 773         REG16(0x67c),
 774         REG(0x068),
 775
 776         END(176)
 777 };
 778
 779 static const u8 gen12_xcs_offsets[] = {
 780         NOP(1),
 781         LRI(13, POSTED),
 782         REG16(0x244),
 783         REG(0x034),
 784         REG(0x030),
 785         REG(0x038),
 786         REG(0x03c),
 787         REG(0x168),
 788         REG(0x140),
 789         REG(0x110),
 790         REG(0x1c0),
 791         REG(0x1c4),
 792         REG(0x1c8),
 793         REG(0x180),
 794         REG16(0x2b4),
 795
 796         NOP(5),
 797         LRI(9, POSTED),
 798         REG16(0x3a8),
 799         REG16(0x28c),
 800         REG16(0x288),
 801         REG16(0x284),
 802         REG16(0x280),
 803         REG16(0x27c),
 804         REG16(0x278),
 805         REG16(0x274),
 806         REG16(0x270),
 807
 808         END(80)
 809 };
 810
 811 static const u8 gen8_rcs_offsets[] = {
 812         NOP(1),
 813         LRI(14, POSTED),
 814         REG16(0x244),
 815         REG(0x034),
 816         REG(0x030),
 817         REG(0x038),
 818         REG(0x03c),
 819         REG(0x168),
 820         REG(0x140),
 821         REG(0x110),
 822         REG(0x11c),
 823         REG(0x114),
 824         REG(0x118),
 825         REG(0x1c0),
 826         REG(0x1c4),
 827         REG(0x1c8),
 828
 829         NOP(3),
 830         LRI(9, POSTED),
 831         REG16(0x3a8),
 832         REG16(0x28c),
 833         REG16(0x288),
 834         REG16(0x284),
 835         REG16(0x280),
 836         REG16(0x27c),
 837         REG16(0x278),
 838         REG16(0x274),
 839         REG16(0x270),
 840
 841         NOP(13),
 842         LRI(1, 0),
 843         REG(0x0c8),
 844
 845         END(80)
 846 };
 847
 848 static const u8 gen9_rcs_offsets[] = {
 849         NOP(1),
 850         LRI(14, POSTED),
 851         REG16(0x244),
 852         REG(0x34),
 853         REG(0x30),
 854         REG(0x38),
 855         REG(0x3c),
 856         REG(0x168),
 857         REG(0x140),
 858         REG(0x110),
 859         REG(0x11c),
 860         REG(0x114),
 861         REG(0x118),
 862         REG(0x1c0),
 863         REG(0x1c4),
 864         REG(0x1c8),
 865
 866         NOP(3),
 867         LRI(9, POSTED),
 868         REG16(0x3a8),
 869         REG16(0x28c),
 870         REG16(0x288),
 871         REG16(0x284),
 872         REG16(0x280),
 873         REG16(0x27c),
 874         REG16(0x278),
 875         REG16(0x274),
 876         REG16(0x270),
 877
 878         NOP(13),
 879         LRI(1, 0),
 880         REG(0xc8),
 881
 882         NOP(13),
 883         LRI(44, POSTED),
 884         REG(0x28),
 885         REG(0x9c),
 886         REG(0xc0),
 887         REG(0x178),
 888         REG(0x17c),
 889         REG16(0x358),
 890         REG(0x170),
 891         REG(0x150),
 892         REG(0x154),
 893         REG(0x158),
 894         REG16(0x41c),
 895         REG16(0x600),
 896         REG16(0x604),
 897         REG16(0x608),
 898         REG16(0x60c),
 899         REG16(0x610),
 900         REG16(0x614),
 901         REG16(0x618),
 902         REG16(0x61c),
 903         REG16(0x620),
 904         REG16(0x624),
 905         REG16(0x628),
 906         REG16(0x62c),
 907         REG16(0x630),
 908         REG16(0x634),
 909         REG16(0x638),
 910         REG16(0x63c),
 911         REG16(0x640),
 912         REG16(0x644),
 913         REG16(0x648),
 914         REG16(0x64c),
 915         REG16(0x650),
 916         REG16(0x654),
 917         REG16(0x658),
 918         REG16(0x65c),
 919         REG16(0x660),
 920         REG16(0x664),
 921         REG16(0x668),
 922         REG16(0x66c),
 923         REG16(0x670),
 924         REG16(0x674),
 925         REG16(0x678),
 926         REG16(0x67c),
 927         REG(0x68),
 928
 929         END(176)
 930 };
 931
 932 static const u8 gen11_rcs_offsets[] = {
 933         NOP(1),
 934         LRI(15, POSTED),
 935         REG16(0x244),
 936         REG(0x034),
 937         REG(0x030),
 938         REG(0x038),
 939         REG(0x03c),
 940         REG(0x168),
 941         REG(0x140),
 942         REG(0x110),
 943         REG(0x11c),
 944         REG(0x114),
 945         REG(0x118),
 946         REG(0x1c0),
 947         REG(0x1c4),
 948         REG(0x1c8),
 949         REG(0x180),
 950
 951         NOP(1),
 952         LRI(9, POSTED),
 953         REG16(0x3a8),
 954         REG16(0x28c),
 955         REG16(0x288),
 956         REG16(0x284),
 957         REG16(0x280),
 958         REG16(0x27c),
 959         REG16(0x278),
 960         REG16(0x274),
 961         REG16(0x270),
 962
 963         LRI(1, POSTED),
 964         REG(0x1b0),
 965
 966         NOP(10),
 967         LRI(1, 0),
 968         REG(0x0c8),
 969
 970         END(80)
 971 };
 972
 973 static const u8 gen12_rcs_offsets[] = {
 974         NOP(1),
 975         LRI(13, POSTED),
 976         REG16(0x244),
 977         REG(0x034),
 978         REG(0x030),
 979         REG(0x038),
 980         REG(0x03c),
 981         REG(0x168),
 982         REG(0x140),
 983         REG(0x110),
 984         REG(0x1c0),
 985         REG(0x1c4),
 986         REG(0x1c8),
 987         REG(0x180),
 988         REG16(0x2b4),
 989
 990         NOP(5),
 991         LRI(9, POSTED),
 992         REG16(0x3a8),
 993         REG16(0x28c),
 994         REG16(0x288),
 995         REG16(0x284),
 996         REG16(0x280),
 997         REG16(0x27c),
 998         REG16(0x278),
 999         REG16(0x274),
1000         REG16(0x270),
1001
1002         LRI(3, POSTED),
1003         REG(0x1b0),
1004         REG16(0x5a8),
1005         REG16(0x5ac),
1006
1007         NOP(6),
1008         LRI(1, 0),
1009         REG(0x0c8),
1010         NOP(3 + 9 + 1),
1011
1012         LRI(51, POSTED),
1013         REG16(0x588),
1014         REG16(0x588),
1015         REG16(0x588),
1016         REG16(0x588),
1017         REG16(0x588),
1018         REG16(0x588),
1019         REG(0x028),
1020         REG(0x09c),
1021         REG(0x0c0),
1022         REG(0x178),
1023         REG(0x17c),
1024         REG16(0x358),
1025         REG(0x170),
1026         REG(0x150),
1027         REG(0x154),
1028         REG(0x158),
1029         REG16(0x41c),
1030         REG16(0x600),
1031         REG16(0x604),
1032         REG16(0x608),
1033         REG16(0x60c),
1034         REG16(0x610),
1035         REG16(0x614),
1036         REG16(0x618),
1037         REG16(0x61c),
1038         REG16(0x620),
1039         REG16(0x624),
1040         REG16(0x628),
1041         REG16(0x62c),
1042         REG16(0x630),
1043         REG16(0x634),
1044         REG16(0x638),
1045         REG16(0x63c),
1046         REG16(0x640),
1047         REG16(0x644),
1048         REG16(0x648),
1049         REG16(0x64c),
1050         REG16(0x650),
1051         REG16(0x654),
1052         REG16(0x658),
1053         REG16(0x65c),
1054         REG16(0x660),
1055         REG16(0x664),
1056         REG16(0x668),
1057         REG16(0x66c),
1058         REG16(0x670),
1059         REG16(0x674),
1060         REG16(0x678),
1061         REG16(0x67c),
1062         REG(0x068),
1063         REG(0x084),
1064         NOP(1),
1065
1066         END(192)
1067 };
1068
1069 #undef END
1070 #undef REG16
1071 #undef REG
1072 #undef LRI
1073 #undef NOP
1074
1075 static const u8 *reg_offsets(const struct intel_engine_cs *engine)
1076 {
1077         /*
1078          * The gen12+ lists only have the registers we program in the basic
1079          * default state. We rely on the context image using relative
1080          * addressing to automatic fixup the register state between the
1081          * physical engines for virtual engine.
1082          */
1083         GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 &&
1084                    !intel_engine_has_relative_mmio(engine));
1085
1086         if (engine->class == RENDER_CLASS) {
1087                 if (INTEL_GEN(engine->i915) >= 12)
1088                         return gen12_rcs_offsets;
1089                 else if (INTEL_GEN(engine->i915) >= 11)
1090                         return gen11_rcs_offsets;
1091                 else if (INTEL_GEN(engine->i915) >= 9)
1092                         return gen9_rcs_offsets;
1093                 else
1094                         return gen8_rcs_offsets;
1095         } else {
1096                 if (INTEL_GEN(engine->i915) >= 12)
1097                         return gen12_xcs_offsets;
1098                 else if (INTEL_GEN(engine->i915) >= 9)
1099                         return gen9_xcs_offsets;
1100                 else
1101                         return gen8_xcs_offsets;
1102         }
1103 }
1104
1105 static struct i915_request *
1106 __unwind_incomplete_requests(struct intel_engine_cs *engine)
1107 {
1108         struct i915_request *rq, *rn, *active = NULL;
1109         struct list_head *uninitialized_var(pl);
1110         int prio = I915_PRIORITY_INVALID;
1111
1112         lockdep_assert_held(&engine->active.lock);
1113
1114         list_for_each_entry_safe_reverse(rq, rn,
1115                                          &engine->active.requests,
1116                                          sched.link) {
1117                 if (i915_request_completed(rq))
1118                         continue; /* XXX */
1119
1120                 __i915_request_unsubmit(rq);
1121
1122                 /*
1123                  * Push the request back into the queue for later resubmission.
1124                  * If this request is not native to this physical engine (i.e.
1125                  * it came from a virtual source), push it back onto the virtual
1126                  * engine so that it can be moved across onto another physical
1127                  * engine as load dictates.
1128                  */
1129                 if (likely(rq->execution_mask == engine->mask)) {
1130                         GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
1131                         if (rq_prio(rq) != prio) {
1132                                 prio = rq_prio(rq);
1133                                 pl = i915_sched_lookup_priolist(engine, prio);
1134                         }
1135                         GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
1136
1137                         list_move(&rq->sched.link, pl);
1138                         set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
1139
1140                         /* Check in case we rollback so far we wrap [size/2] */
1141                         if (intel_ring_direction(rq->ring,
1142                                                  intel_ring_wrap(rq->ring,
1143                                                                  rq->tail),
1144                                                  rq->ring->tail) > 0)
1145                                 rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE;
1146
1147                         active = rq;
1148                 } else {
1149                         struct intel_engine_cs *owner = rq->context->engine;
1150
1151                         /*
1152                          * Decouple the virtual breadcrumb before moving it
1153                          * back to the virtual engine -- we don't want the
1154                          * request to complete in the background and try
1155                          * and cancel the breadcrumb on the virtual engine
1156                          * (instead of the old engine where it is linked)!
1157                          */
1158                         if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
1159                                      &rq->fence.flags)) {
1160                                 spin_lock_nested(&rq->lock,
1161                                                  SINGLE_DEPTH_NESTING);
1162                                 i915_request_cancel_breadcrumb(rq);
1163                                 spin_unlock(&rq->lock);
1164                         }
1165                         WRITE_ONCE(rq->engine, owner);
1166                         owner->submit_request(rq);
1167                         active = NULL;
1168                 }
1169         }
1170
1171         return active;
1172 }
1173
1174 struct i915_request *
1175 execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
1176 {
1177         struct intel_engine_cs *engine =
1178                 container_of(execlists, typeof(*engine), execlists);
1179
1180         return __unwind_incomplete_requests(engine);
1181 }
1182
1183 static inline void
1184 execlists_context_status_change(struct i915_request *rq, unsigned long status)
1185 {
1186         /*
1187          * Only used when GVT-g is enabled now. When GVT-g is disabled,
1188          * The compiler should eliminate this function as dead-code.
1189          */
1190         if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
1191                 return;
1192
1193         atomic_notifier_call_chain(&rq->engine->context_status_notifier,
1194                                    status, rq);
1195 }
1196
1197 static void intel_engine_context_in(struct intel_engine_cs *engine)
1198 {
1199         unsigned long flags;
1200
1201         if (atomic_add_unless(&engine->stats.active, 1, 0))
1202                 return;
1203
1204         write_seqlock_irqsave(&engine->stats.lock, flags);
1205         if (!atomic_add_unless(&engine->stats.active, 1, 0)) {
1206                 engine->stats.start = ktime_get();
1207                 atomic_inc(&engine->stats.active);
1208         }
1209         write_sequnlock_irqrestore(&engine->stats.lock, flags);
1210 }
1211
1212 static void intel_engine_context_out(struct intel_engine_cs *engine)
1213 {
1214         unsigned long flags;
1215
1216         GEM_BUG_ON(!atomic_read(&engine->stats.active));
1217
1218         if (atomic_add_unless(&engine->stats.active, -1, 1))
1219                 return;
1220
1221         write_seqlock_irqsave(&engine->stats.lock, flags);
1222         if (atomic_dec_and_test(&engine->stats.active)) {
1223                 engine->stats.total =
1224                         ktime_add(engine->stats.total,
1225                                   ktime_sub(ktime_get(), engine->stats.start));
1226         }
1227         write_sequnlock_irqrestore(&engine->stats.lock, flags);
1228 }
1229
1230 static void
1231 execlists_check_context(const struct intel_context *ce,
1232                         const struct intel_engine_cs *engine)
1233 {
1234         const struct intel_ring *ring = ce->ring;
1235         u32 *regs = ce->lrc_reg_state;
1236         bool valid = true;
1237         int x;
1238
1239         if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
1240                 pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
1241                        engine->name,
1242                        regs[CTX_RING_START],
1243                        i915_ggtt_offset(ring->vma));
1244                 regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1245                 valid = false;
1246         }
1247
1248         if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
1249             (RING_CTL_SIZE(ring->size) | RING_VALID)) {
1250                 pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
1251                        engine->name,
1252                        regs[CTX_RING_CTL],
1253                        (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
1254                 regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1255                 valid = false;
1256         }
1257
1258         x = lrc_ring_mi_mode(engine);
1259         if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
1260                 pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
1261                        engine->name, regs[x + 1]);
1262                 regs[x + 1] &= ~STOP_RING;
1263                 regs[x + 1] |= STOP_RING << 16;
1264                 valid = false;
1265         }
1266
1267         WARN_ONCE(!valid, "Invalid lrc state found before submission\n");
1268 }
1269
1270 static void restore_default_state(struct intel_context *ce,
1271                                   struct intel_engine_cs *engine)
1272 {
1273         u32 *regs;
1274
1275         regs = memset(ce->lrc_reg_state, 0, engine->context_size - PAGE_SIZE);
1276         execlists_init_reg_state(regs, ce, engine, ce->ring, true);
1277
1278         ce->runtime.last = intel_context_get_runtime(ce);
1279 }
1280
1281 static void reset_active(struct i915_request *rq,
1282                          struct intel_engine_cs *engine)
1283 {
1284         struct intel_context * const ce = rq->context;
1285         u32 head;
1286
1287         /*
1288          * The executing context has been cancelled. We want to prevent
1289          * further execution along this context and propagate the error on
1290          * to anything depending on its results.
1291          *
1292          * In __i915_request_submit(), we apply the -EIO and remove the
1293          * requests' payloads for any banned requests. But first, we must
1294          * rewind the context back to the start of the incomplete request so
1295          * that we do not jump back into the middle of the batch.
1296          *
1297          * We preserve the breadcrumbs and semaphores of the incomplete
1298          * requests so that inter-timeline dependencies (i.e other timelines)
1299          * remain correctly ordered. And we defer to __i915_request_submit()
1300          * so that all asynchronous waits are correctly handled.
1301          */
1302         ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n",
1303                      rq->fence.context, rq->fence.seqno);
1304
1305         /* On resubmission of the active request, payload will be scrubbed */
1306         if (i915_request_completed(rq))
1307                 head = rq->tail;
1308         else
1309                 head = active_request(ce->timeline, rq)->head;
1310         head = intel_ring_wrap(ce->ring, head);
1311
1312         /* Scrub the context image to prevent replaying the previous batch */
1313         restore_default_state(ce, engine);
1314         __execlists_update_reg_state(ce, engine, head);
1315
1316         /* We've switched away, so this should be a no-op, but intent matters */
1317         ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
1318 }
1319
1320 static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
1321 {
1322 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1323         ce->runtime.num_underflow += dt < 0;
1324         ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt);
1325 #endif
1326 }
1327
1328 static void intel_context_update_runtime(struct intel_context *ce)
1329 {
1330         u32 old;
1331         s32 dt;
1332
1333         if (intel_context_is_barrier(ce))
1334                 return;
1335
1336         old = ce->runtime.last;
1337         ce->runtime.last = intel_context_get_runtime(ce);
1338         dt = ce->runtime.last - old;
1339
1340         if (unlikely(dt <= 0)) {
1341                 CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
1342                          old, ce->runtime.last, dt);
1343                 st_update_runtime_underflow(ce, dt);
1344                 return;
1345         }
1346
1347         ewma_runtime_add(&ce->runtime.avg, dt);
1348         ce->runtime.total += dt;
1349 }
1350
1351 static inline struct intel_engine_cs *
1352 __execlists_schedule_in(struct i915_request *rq)
1353 {
1354         struct intel_engine_cs * const engine = rq->engine;
1355         struct intel_context * const ce = rq->context;
1356
1357         intel_context_get(ce);
1358
1359         if (unlikely(intel_context_is_banned(ce)))
1360                 reset_active(rq, engine);
1361
1362         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1363                 execlists_check_context(ce, engine);
1364
1365         if (ce->tag) {
1366                 /* Use a fixed tag for OA and friends */
1367                 GEM_BUG_ON(ce->tag <= BITS_PER_LONG);
1368                 ce->lrc.ccid = ce->tag;
1369         } else {
1370                 /* We don't need a strict matching tag, just different values */
1371                 unsigned int tag = ffs(READ_ONCE(engine->context_tag));
1372
1373                 GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG);
1374                 clear_bit(tag - 1, &engine->context_tag);
1375                 ce->lrc.ccid = tag << (GEN11_SW_CTX_ID_SHIFT - 32);
1376
1377                 BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID);
1378         }
1379
1380         ce->lrc.ccid |= engine->execlists.ccid;
1381
1382         __intel_gt_pm_get(engine->gt);
1383         if (engine->fw_domain && !atomic_fetch_inc(&engine->fw_active))
1384                 intel_uncore_forcewake_get(engine->uncore, engine->fw_domain);
1385         execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
1386         intel_engine_context_in(engine);
1387
1388         return engine;
1389 }
1390
1391 static inline struct i915_request *
1392 execlists_schedule_in(struct i915_request *rq, int idx)
1393 {
1394         struct intel_context * const ce = rq->context;
1395         struct intel_engine_cs *old;
1396
1397         GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
1398         trace_i915_request_in(rq, idx);
1399
1400         old = READ_ONCE(ce->inflight);
1401         do {
1402                 if (!old) {
1403                         WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
1404                         break;
1405                 }
1406         } while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
1407
1408         GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
1409         return i915_request_get(rq);
1410 }
1411
1412 static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
1413 {
1414         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
1415         struct i915_request *next = READ_ONCE(ve->request);
1416
1417         if (next == rq || (next && next->execution_mask & ~rq->execution_mask))
1418                 tasklet_hi_schedule(&ve->base.execlists.tasklet);
1419 }
1420
1421 static inline void
1422 __execlists_schedule_out(struct i915_request *rq,
1423                          struct intel_engine_cs * const engine,
1424                          unsigned int ccid)
1425 {
1426         struct intel_context * const ce = rq->context;
1427
1428         /*
1429          * NB process_csb() is not under the engine->active.lock and hence
1430          * schedule_out can race with schedule_in meaning that we should
1431          * refrain from doing non-trivial work here.
1432          */
1433
1434         /*
1435          * If we have just completed this context, the engine may now be
1436          * idle and we want to re-enter powersaving.
1437          */
1438         if (list_is_last_rcu(&rq->link, &ce->timeline->requests) &&
1439             i915_request_completed(rq))
1440                 intel_engine_add_retire(engine, ce->timeline);
1441
1442         ccid >>= GEN11_SW_CTX_ID_SHIFT - 32;
1443         ccid &= GEN12_MAX_CONTEXT_HW_ID;
1444         if (ccid < BITS_PER_LONG) {
1445                 GEM_BUG_ON(ccid == 0);
1446                 GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag));
1447                 set_bit(ccid - 1, &engine->context_tag);
1448         }
1449
1450         intel_context_update_runtime(ce);
1451         intel_engine_context_out(engine);
1452         execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
1453         if (engine->fw_domain && !atomic_dec_return(&engine->fw_active))
1454                 intel_uncore_forcewake_put(engine->uncore, engine->fw_domain);
1455         intel_gt_pm_put_async(engine->gt);
1456
1457         /*
1458          * If this is part of a virtual engine, its next request may
1459          * have been blocked waiting for access to the active context.
1460          * We have to kick all the siblings again in case we need to
1461          * switch (e.g. the next request is not runnable on this
1462          * engine). Hopefully, we will already have submitted the next
1463          * request before the tasklet runs and do not need to rebuild
1464          * each virtual tree and kick everyone again.
1465          */
1466         if (ce->engine != engine)
1467                 kick_siblings(rq, ce);
1468
1469         intel_context_put(ce);
1470 }
1471
1472 static inline void
1473 execlists_schedule_out(struct i915_request *rq)
1474 {
1475         struct intel_context * const ce = rq->context;
1476         struct intel_engine_cs *cur, *old;
1477         u32 ccid;
1478
1479         trace_i915_request_out(rq);
1480
1481         ccid = rq->context->lrc.ccid;
1482         old = READ_ONCE(ce->inflight);
1483         do
1484                 cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
1485         while (!try_cmpxchg(&ce->inflight, &old, cur));
1486         if (!cur)
1487                 __execlists_schedule_out(rq, old, ccid);
1488
1489         i915_request_put(rq);
1490 }
1491
1492 static u64 execlists_update_context(struct i915_request *rq)
1493 {
1494         struct intel_context *ce = rq->context;
1495         u64 desc = ce->lrc.desc;
1496         u32 tail, prev;
1497
1498         /*
1499          * WaIdleLiteRestore:bdw,skl
1500          *
1501          * We should never submit the context with the same RING_TAIL twice
1502          * just in case we submit an empty ring, which confuses the HW.
1503          *
1504          * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of
1505          * the normal request to be able to always advance the RING_TAIL on
1506          * subsequent resubmissions (for lite restore). Should that fail us,
1507          * and we try and submit the same tail again, force the context
1508          * reload.
1509          *
1510          * If we need to return to a preempted context, we need to skip the
1511          * lite-restore and force it to reload the RING_TAIL. Otherwise, the
1512          * HW has a tendency to ignore us rewinding the TAIL to the end of
1513          * an earlier request.
1514          */
1515         GEM_BUG_ON(ce->lrc_reg_state[CTX_RING_TAIL] != rq->ring->tail);
1516         prev = rq->ring->tail;
1517         tail = intel_ring_set_tail(rq->ring, rq->tail);
1518         if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0))
1519                 desc |= CTX_DESC_FORCE_RESTORE;
1520         ce->lrc_reg_state[CTX_RING_TAIL] = tail;
1521         rq->tail = rq->wa_tail;
1522
1523         /*
1524          * Make sure the context image is complete before we submit it to HW.
1525          *
1526          * Ostensibly, writes (including the WCB) should be flushed prior to
1527          * an uncached write such as our mmio register access, the empirical
1528          * evidence (esp. on Braswell) suggests that the WC write into memory
1529          * may not be visible to the HW prior to the completion of the UC
1530          * register write and that we may begin execution from the context
1531          * before its image is complete leading to invalid PD chasing.
1532          */
1533         wmb();
1534
1535         ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE;
1536         return desc;
1537 }
1538
1539 static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
1540 {
1541         if (execlists->ctrl_reg) {
1542                 writel(lower_32_bits(desc), execlists->submit_reg + port * 2);
1543                 writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1);
1544         } else {
1545                 writel(upper_32_bits(desc), execlists->submit_reg);
1546                 writel(lower_32_bits(desc), execlists->submit_reg);
1547         }
1548 }
1549
1550 static __maybe_unused char *
1551 dump_port(char *buf, int buflen, const char *prefix, struct i915_request *rq)
1552 {
1553         if (!rq)
1554                 return "";
1555
1556         snprintf(buf, buflen, "%sccid:%x %llx:%lld%s prio %d",
1557                  prefix,
1558                  rq->context->lrc.ccid,
1559                  rq->fence.context, rq->fence.seqno,
1560                  i915_request_completed(rq) ? "!" :
1561                  i915_request_started(rq) ? "*" :
1562                  "",
1563                  rq_prio(rq));
1564
1565         return buf;
1566 }
1567
1568 static __maybe_unused void
1569 trace_ports(const struct intel_engine_execlists *execlists,
1570             const char *msg,
1571             struct i915_request * const *ports)
1572 {
1573         const struct intel_engine_cs *engine =
1574                 container_of(execlists, typeof(*engine), execlists);
1575         char __maybe_unused p0[40], p1[40];
1576
1577         if (!ports[0])
1578                 return;
1579
1580         ENGINE_TRACE(engine, "%s { %s%s }\n", msg,
1581                      dump_port(p0, sizeof(p0), "", ports[0]),
1582                      dump_port(p1, sizeof(p1), ", ", ports[1]));
1583 }
1584
1585 static inline bool
1586 reset_in_progress(const struct intel_engine_execlists *execlists)
1587 {
1588         return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
1589 }
1590
1591 static __maybe_unused bool
1592 assert_pending_valid(const struct intel_engine_execlists *execlists,
1593                      const char *msg)
1594 {
1595         struct intel_engine_cs *engine =
1596                 container_of(execlists, typeof(*engine), execlists);
1597         struct i915_request * const *port, *rq;
1598         struct intel_context *ce = NULL;
1599         bool sentinel = false;
1600         u32 ccid = -1;
1601
1602         trace_ports(execlists, msg, execlists->pending);
1603
1604         /* We may be messing around with the lists during reset, lalala */
1605         if (reset_in_progress(execlists))
1606                 return true;
1607
1608         if (!execlists->pending[0]) {
1609                 GEM_TRACE_ERR("%s: Nothing pending for promotion!\n",
1610                               engine->name);
1611                 return false;
1612         }
1613
1614         if (execlists->pending[execlists_num_ports(execlists)]) {
1615                 GEM_TRACE_ERR("%s: Excess pending[%d] for promotion!\n",
1616                               engine->name, execlists_num_ports(execlists));
1617                 return false;
1618         }
1619
1620         for (port = execlists->pending; (rq = *port); port++) {
1621                 unsigned long flags;
1622                 bool ok = true;
1623
1624                 GEM_BUG_ON(!kref_read(&rq->fence.refcount));
1625                 GEM_BUG_ON(!i915_request_is_active(rq));
1626
1627                 if (ce == rq->context) {
1628                         GEM_TRACE_ERR("%s: Dup context:%llx in pending[%zd]\n",
1629                                       engine->name,
1630                                       ce->timeline->fence_context,
1631                                       port - execlists->pending);
1632                         return false;
1633                 }
1634                 ce = rq->context;
1635
1636                 if (ccid == ce->lrc.ccid) {
1637                         GEM_TRACE_ERR("%s: Dup ccid:%x context:%llx in pending[%zd]\n",
1638                                       engine->name,
1639                                       ccid, ce->timeline->fence_context,
1640                                       port - execlists->pending);
1641                         return false;
1642                 }
1643                 ccid = ce->lrc.ccid;
1644
1645                 /*
1646                  * Sentinels are supposed to be the last request so they flush
1647                  * the current execution off the HW. Check that they are the only
1648                  * request in the pending submission.
1649                  */
1650                 if (sentinel) {
1651                         GEM_TRACE_ERR("%s: context:%llx after sentinel in pending[%zd]\n",
1652                                       engine->name,
1653                                       ce->timeline->fence_context,
1654                                       port - execlists->pending);
1655                         return false;
1656                 }
1657                 sentinel = i915_request_has_sentinel(rq);
1658
1659                 /* Hold tightly onto the lock to prevent concurrent retires! */
1660                 if (!spin_trylock_irqsave(&rq->lock, flags))
1661                         continue;
1662
1663                 if (i915_request_completed(rq))
1664                         goto unlock;
1665
1666                 if (i915_active_is_idle(&ce->active) &&
1667                     !intel_context_is_barrier(ce)) {
1668                         GEM_TRACE_ERR("%s: Inactive context:%llx in pending[%zd]\n",
1669                                       engine->name,
1670                                       ce->timeline->fence_context,
1671                                       port - execlists->pending);
1672                         ok = false;
1673                         goto unlock;
1674                 }
1675
1676                 if (!i915_vma_is_pinned(ce->state)) {
1677                         GEM_TRACE_ERR("%s: Unpinned context:%llx in pending[%zd]\n",
1678                                       engine->name,
1679                                       ce->timeline->fence_context,
1680                                       port - execlists->pending);
1681                         ok = false;
1682                         goto unlock;
1683                 }
1684
1685                 if (!i915_vma_is_pinned(ce->ring->vma)) {
1686                         GEM_TRACE_ERR("%s: Unpinned ring:%llx in pending[%zd]\n",
1687                                       engine->name,
1688                                       ce->timeline->fence_context,
1689                                       port - execlists->pending);
1690                         ok = false;
1691                         goto unlock;
1692                 }
1693
1694 unlock:
1695                 spin_unlock_irqrestore(&rq->lock, flags);
1696                 if (!ok)
1697                         return false;
1698         }
1699
1700         return ce;
1701 }
1702
1703 static void execlists_submit_ports(struct intel_engine_cs *engine)
1704 {
1705         struct intel_engine_execlists *execlists = &engine->execlists;
1706         unsigned int n;
1707
1708         GEM_BUG_ON(!assert_pending_valid(execlists, "submit"));
1709
1710         /*
1711          * We can skip acquiring intel_runtime_pm_get() here as it was taken
1712          * on our behalf by the request (see i915_gem_mark_busy()) and it will
1713          * not be relinquished until the device is idle (see
1714          * i915_gem_idle_work_handler()). As a precaution, we make sure
1715          * that all ELSP are drained i.e. we have processed the CSB,
1716          * before allowing ourselves to idle and calling intel_runtime_pm_put().
1717          */
1718         GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
1719
1720         /*
1721          * ELSQ note: the submit queue is not cleared after being submitted
1722          * to the HW so we need to make sure we always clean it up. This is
1723          * currently ensured by the fact that we always write the same number
1724          * of elsq entries, keep this in mind before changing the loop below.
1725          */
1726         for (n = execlists_num_ports(execlists); n--; ) {
1727                 struct i915_request *rq = execlists->pending[n];
1728
1729                 write_desc(execlists,
1730                            rq ? execlists_update_context(rq) : 0,
1731                            n);
1732         }
1733
1734         /* we need to manually load the submit queue */
1735         if (execlists->ctrl_reg)
1736                 writel(EL_CTRL_LOAD, execlists->ctrl_reg);
1737 }
1738
1739 static bool ctx_single_port_submission(const struct intel_context *ce)
1740 {
1741         return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
1742                 intel_context_force_single_submission(ce));
1743 }
1744
1745 static bool can_merge_ctx(const struct intel_context *prev,
1746                           const struct intel_context *next)
1747 {
1748         if (prev != next)
1749                 return false;
1750
1751         if (ctx_single_port_submission(prev))
1752                 return false;
1753
1754         return true;
1755 }
1756
1757 static unsigned long i915_request_flags(const struct i915_request *rq)
1758 {
1759         return READ_ONCE(rq->fence.flags);
1760 }
1761
1762 static bool can_merge_rq(const struct i915_request *prev,
1763                          const struct i915_request *next)
1764 {
1765         GEM_BUG_ON(prev == next);
1766         GEM_BUG_ON(!assert_priority_queue(prev, next));
1767
1768         /*
1769          * We do not submit known completed requests. Therefore if the next
1770          * request is already completed, we can pretend to merge it in
1771          * with the previous context (and we will skip updating the ELSP
1772          * and tracking). Thus hopefully keeping the ELSP full with active
1773          * contexts, despite the best efforts of preempt-to-busy to confuse
1774          * us.
1775          */
1776         if (i915_request_completed(next))
1777                 return true;
1778
1779         if (unlikely((i915_request_flags(prev) ^ i915_request_flags(next)) &
1780                      (BIT(I915_FENCE_FLAG_NOPREEMPT) |
1781                       BIT(I915_FENCE_FLAG_SENTINEL))))
1782                 return false;
1783
1784         if (!can_merge_ctx(prev->context, next->context))
1785                 return false;
1786
1787         GEM_BUG_ON(i915_seqno_passed(prev->fence.seqno, next->fence.seqno));
1788         return true;
1789 }
1790
1791 static void virtual_update_register_offsets(u32 *regs,
1792                                             struct intel_engine_cs *engine)
1793 {
1794         set_offsets(regs, reg_offsets(engine), engine, false);
1795 }
1796
1797 static bool virtual_matches(const struct virtual_engine *ve,
1798                             const struct i915_request *rq,
1799                             const struct intel_engine_cs *engine)
1800 {
1801         const struct intel_engine_cs *inflight;
1802
1803         if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
1804                 return false;
1805
1806         /*
1807          * We track when the HW has completed saving the context image
1808          * (i.e. when we have seen the final CS event switching out of
1809          * the context) and must not overwrite the context image before
1810          * then. This restricts us to only using the active engine
1811          * while the previous virtualized request is inflight (so
1812          * we reuse the register offsets). This is a very small
1813          * hystersis on the greedy seelction algorithm.
1814          */
1815         inflight = intel_context_inflight(&ve->context);
1816         if (inflight && inflight != engine)
1817                 return false;
1818
1819         return true;
1820 }
1821
1822 static void virtual_xfer_breadcrumbs(struct virtual_engine *ve)
1823 {
1824         /*
1825          * All the outstanding signals on ve->siblings[0] must have
1826          * been completed, just pending the interrupt handler. As those
1827          * signals still refer to the old sibling (via rq->engine), we must
1828          * transfer those to the old irq_worker to keep our locking
1829          * consistent.
1830          */
1831         intel_engine_transfer_stale_breadcrumbs(ve->siblings[0], &ve->context);
1832 }
1833
1834 #define for_each_waiter(p__, rq__) \
1835         list_for_each_entry_lockless(p__, \
1836                                      &(rq__)->sched.waiters_list, \
1837                                      wait_link)
1838
1839 #define for_each_signaler(p__, rq__) \
1840         list_for_each_entry_rcu(p__, \
1841                                 &(rq__)->sched.signalers_list, \
1842                                 signal_link)
1843
1844 static void defer_request(struct i915_request *rq, struct list_head * const pl)
1845 {
1846         LIST_HEAD(list);
1847
1848         /*
1849          * We want to move the interrupted request to the back of
1850          * the round-robin list (i.e. its priority level), but
1851          * in doing so, we must then move all requests that were in
1852          * flight and were waiting for the interrupted request to
1853          * be run after it again.
1854          */
1855         do {
1856                 struct i915_dependency *p;
1857
1858                 GEM_BUG_ON(i915_request_is_active(rq));
1859                 list_move_tail(&rq->sched.link, pl);
1860
1861                 for_each_waiter(p, rq) {
1862                         struct i915_request *w =
1863                                 container_of(p->waiter, typeof(*w), sched);
1864
1865                         if (p->flags & I915_DEPENDENCY_WEAK)
1866                                 continue;
1867
1868                         /* Leave semaphores spinning on the other engines */
1869                         if (w->engine != rq->engine)
1870                                 continue;
1871
1872                         /* No waiter should start before its signaler */
1873                         GEM_BUG_ON(i915_request_has_initial_breadcrumb(w) &&
1874                                    i915_request_started(w) &&
1875                                    !i915_request_completed(rq));
1876
1877                         GEM_BUG_ON(i915_request_is_active(w));
1878                         if (!i915_request_is_ready(w))
1879                                 continue;
1880
1881                         if (rq_prio(w) < rq_prio(rq))
1882                                 continue;
1883
1884                         GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
1885                         list_move_tail(&w->sched.link, &list);
1886                 }
1887
1888                 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
1889         } while (rq);
1890 }
1891
1892 static void defer_active(struct intel_engine_cs *engine)
1893 {
1894         struct i915_request *rq;
1895
1896         rq = __unwind_incomplete_requests(engine);
1897         if (!rq)
1898                 return;
1899
1900         defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq)));
1901 }
1902
1903 static bool
1904 need_timeslice(const struct intel_engine_cs *engine,
1905                const struct i915_request *rq,
1906                const struct rb_node *rb)
1907 {
1908         int hint;
1909
1910         if (!intel_engine_has_timeslices(engine))
1911                 return false;
1912
1913         hint = engine->execlists.queue_priority_hint;
1914
1915         if (rb) {
1916                 const struct virtual_engine *ve =
1917                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
1918                 const struct intel_engine_cs *inflight =
1919                         intel_context_inflight(&ve->context);
1920
1921                 if (!inflight || inflight == engine) {
1922                         struct i915_request *next;
1923
1924                         rcu_read_lock();
1925                         next = READ_ONCE(ve->request);
1926                         if (next)
1927                                 hint = max(hint, rq_prio(next));
1928                         rcu_read_unlock();
1929                 }
1930         }
1931
1932         if (!list_is_last(&rq->sched.link, &engine->active.requests))
1933                 hint = max(hint, rq_prio(list_next_entry(rq, sched.link)));
1934
1935         GEM_BUG_ON(hint >= I915_PRIORITY_UNPREEMPTABLE);
1936         return hint >= effective_prio(rq);
1937 }
1938
1939 static bool
1940 timeslice_yield(const struct intel_engine_execlists *el,
1941                 const struct i915_request *rq)
1942 {
1943         /*
1944          * Once bitten, forever smitten!
1945          *
1946          * If the active context ever busy-waited on a semaphore,
1947          * it will be treated as a hog until the end of its timeslice (i.e.
1948          * until it is scheduled out and replaced by a new submission,
1949          * possibly even its own lite-restore). The HW only sends an interrupt
1950          * on the first miss, and we do know if that semaphore has been
1951          * signaled, or even if it is now stuck on another semaphore. Play
1952          * safe, yield if it might be stuck -- it will be given a fresh
1953          * timeslice in the near future.
1954          */
1955         return rq->context->lrc.ccid == READ_ONCE(el->yield);
1956 }
1957
1958 static bool
1959 timeslice_expired(const struct intel_engine_execlists *el,
1960                   const struct i915_request *rq)
1961 {
1962         return timer_expired(&el->timer) || timeslice_yield(el, rq);
1963 }
1964
1965 static int
1966 switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
1967 {
1968         if (list_is_last(&rq->sched.link, &engine->active.requests))
1969                 return engine->execlists.queue_priority_hint;
1970
1971         return rq_prio(list_next_entry(rq, sched.link));
1972 }
1973
1974 static inline unsigned long
1975 timeslice(const struct intel_engine_cs *engine)
1976 {
1977         return READ_ONCE(engine->props.timeslice_duration_ms);
1978 }
1979
1980 static unsigned long active_timeslice(const struct intel_engine_cs *engine)
1981 {
1982         const struct intel_engine_execlists *execlists = &engine->execlists;
1983         const struct i915_request *rq = *execlists->active;
1984
1985         if (!rq || i915_request_completed(rq))
1986                 return 0;
1987
1988         if (READ_ONCE(execlists->switch_priority_hint) < effective_prio(rq))
1989                 return 0;
1990
1991         return timeslice(engine);
1992 }
1993
1994 static void set_timeslice(struct intel_engine_cs *engine)
1995 {
1996         unsigned long duration;
1997
1998         if (!intel_engine_has_timeslices(engine))
1999                 return;
2000
2001         duration = active_timeslice(engine);
2002         ENGINE_TRACE(engine, "bump timeslicing, interval:%lu", duration);
2003
2004         set_timer_ms(&engine->execlists.timer, duration);
2005 }
2006
2007 static void start_timeslice(struct intel_engine_cs *engine, int prio)
2008 {
2009         struct intel_engine_execlists *execlists = &engine->execlists;
2010         unsigned long duration;
2011
2012         if (!intel_engine_has_timeslices(engine))
2013                 return;
2014
2015         WRITE_ONCE(execlists->switch_priority_hint, prio);
2016         if (prio == INT_MIN)
2017                 return;
2018
2019         if (timer_pending(&execlists->timer))
2020                 return;
2021
2022         duration = timeslice(engine);
2023         ENGINE_TRACE(engine,
2024                      "start timeslicing, prio:%d, interval:%lu",
2025                      prio, duration);
2026
2027         set_timer_ms(&execlists->timer, duration);
2028 }
2029
2030 static void record_preemption(struct intel_engine_execlists *execlists)
2031 {
2032         (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
2033 }
2034
2035 static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
2036                                             const struct i915_request *rq)
2037 {
2038         if (!rq)
2039                 return 0;
2040
2041         /* Force a fast reset for terminated contexts (ignoring sysfs!) */
2042         if (unlikely(intel_context_is_banned(rq->context)))
2043                 return 1;
2044
2045         return READ_ONCE(engine->props.preempt_timeout_ms);
2046 }
2047
2048 static void set_preempt_timeout(struct intel_engine_cs *engine,
2049                                 const struct i915_request *rq)
2050 {
2051         if (!intel_engine_has_preempt_reset(engine))
2052                 return;
2053
2054         set_timer_ms(&engine->execlists.preempt,
2055                      active_preempt_timeout(engine, rq));
2056 }
2057
2058 static inline void clear_ports(struct i915_request **ports, int count)
2059 {
2060         memset_p((void **)ports, NULL, count);
2061 }
2062
2063 static void execlists_dequeue(struct intel_engine_cs *engine)
2064 {
2065         struct intel_engine_execlists * const execlists = &engine->execlists;
2066         struct i915_request **port = execlists->pending;
2067         struct i915_request ** const last_port = port + execlists->port_mask;
2068         struct i915_request * const *active;
2069         struct i915_request *last;
2070         struct rb_node *rb;
2071         bool submit = false;
2072
2073         /*
2074          * Hardware submission is through 2 ports. Conceptually each port
2075          * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
2076          * static for a context, and unique to each, so we only execute
2077          * requests belonging to a single context from each ring. RING_HEAD
2078          * is maintained by the CS in the context image, it marks the place
2079          * where it got up to last time, and through RING_TAIL we tell the CS
2080          * where we want to execute up to this time.
2081          *
2082          * In this list the requests are in order of execution. Consecutive
2083          * requests from the same context are adjacent in the ringbuffer. We
2084          * can combine these requests into a single RING_TAIL update:
2085          *
2086          *              RING_HEAD...req1...req2
2087          *                                    ^- RING_TAIL
2088          * since to execute req2 the CS must first execute req1.
2089          *
2090          * Our goal then is to point each port to the end of a consecutive
2091          * sequence of requests as being the most optimal (fewest wake ups
2092          * and context switches) submission.
2093          */
2094
2095         for (rb = rb_first_cached(&execlists->virtual); rb; ) {
2096                 struct virtual_engine *ve =
2097                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
2098                 struct i915_request *rq = READ_ONCE(ve->request);
2099
2100                 if (!rq) { /* lazily cleanup after another engine handled rq */
2101                         rb_erase_cached(rb, &execlists->virtual);
2102                         RB_CLEAR_NODE(rb);
2103                         rb = rb_first_cached(&execlists->virtual);
2104                         continue;
2105                 }
2106
2107                 if (!virtual_matches(ve, rq, engine)) {
2108                         rb = rb_next(rb);
2109                         continue;
2110                 }
2111
2112                 break;
2113         }
2114
2115         /*
2116          * If the queue is higher priority than the last
2117          * request in the currently active context, submit afresh.
2118          * We will resubmit again afterwards in case we need to split
2119          * the active context to interject the preemption request,
2120          * i.e. we will retrigger preemption following the ack in case
2121          * of trouble.
2122          */
2123         active = READ_ONCE(execlists->active);
2124
2125         /*
2126          * In theory we can skip over completed contexts that have not
2127          * yet been processed by events (as those events are in flight):
2128          *
2129          * while ((last = *active) && i915_request_completed(last))
2130          *      active++;
2131          *
2132          * However, the GPU cannot handle this as it will ultimately
2133          * find itself trying to jump back into a context it has just
2134          * completed and barf.
2135          */
2136
2137         if ((last = *active)) {
2138                 if (need_preempt(engine, last, rb)) {
2139                         if (i915_request_completed(last)) {
2140                                 tasklet_hi_schedule(&execlists->tasklet);
2141                                 return;
2142                         }
2143
2144                         ENGINE_TRACE(engine,
2145                                      "preempting last=%llx:%lld, prio=%d, hint=%d\n",
2146                                      last->fence.context,
2147                                      last->fence.seqno,
2148                                      last->sched.attr.priority,
2149                                      execlists->queue_priority_hint);
2150                         record_preemption(execlists);
2151
2152                         /*
2153                          * Don't let the RING_HEAD advance past the breadcrumb
2154                          * as we unwind (and until we resubmit) so that we do
2155                          * not accidentally tell it to go backwards.
2156                          */
2157                         ring_set_paused(engine, 1);
2158
2159                         /*
2160                          * Note that we have not stopped the GPU at this point,
2161                          * so we are unwinding the incomplete requests as they
2162                          * remain inflight and so by the time we do complete
2163                          * the preemption, some of the unwound requests may
2164                          * complete!
2165                          */
2166                         __unwind_incomplete_requests(engine);
2167
2168                         last = NULL;
2169                 } else if (need_timeslice(engine, last, rb) &&
2170                            timeslice_expired(execlists, last)) {
2171                         if (i915_request_completed(last)) {
2172                                 tasklet_hi_schedule(&execlists->tasklet);
2173                                 return;
2174                         }
2175
2176                         ENGINE_TRACE(engine,
2177                                      "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n",
2178                                      last->fence.context,
2179                                      last->fence.seqno,
2180                                      last->sched.attr.priority,
2181                                      execlists->queue_priority_hint,
2182                                      yesno(timeslice_yield(execlists, last)));
2183
2184                         ring_set_paused(engine, 1);
2185                         defer_active(engine);
2186
2187                         /*
2188                          * Unlike for preemption, if we rewind and continue
2189                          * executing the same context as previously active,
2190                          * the order of execution will remain the same and
2191                          * the tail will only advance. We do not need to
2192                          * force a full context restore, as a lite-restore
2193                          * is sufficient to resample the monotonic TAIL.
2194                          *
2195                          * If we switch to any other context, similarly we
2196                          * will not rewind TAIL of current context, and
2197                          * normal save/restore will preserve state and allow
2198                          * us to later continue executing the same request.
2199                          */
2200                         last = NULL;
2201                 } else {
2202                         /*
2203                          * Otherwise if we already have a request pending
2204                          * for execution after the current one, we can
2205                          * just wait until the next CS event before
2206                          * queuing more. In either case we will force a
2207                          * lite-restore preemption event, but if we wait
2208                          * we hopefully coalesce several updates into a single
2209                          * submission.
2210                          */
2211                         if (!list_is_last(&last->sched.link,
2212                                           &engine->active.requests)) {
2213                                 /*
2214                                  * Even if ELSP[1] is occupied and not worthy
2215                                  * of timeslices, our queue might be.
2216                                  */
2217                                 start_timeslice(engine, queue_prio(execlists));
2218                                 return;
2219                         }
2220                 }
2221         }
2222
2223         while (rb) { /* XXX virtual is always taking precedence */
2224                 struct virtual_engine *ve =
2225                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
2226                 struct i915_request *rq;
2227
2228                 spin_lock(&ve->base.active.lock);
2229
2230                 rq = ve->request;
2231                 if (unlikely(!rq)) { /* lost the race to a sibling */
2232                         spin_unlock(&ve->base.active.lock);
2233                         rb_erase_cached(rb, &execlists->virtual);
2234                         RB_CLEAR_NODE(rb);
2235                         rb = rb_first_cached(&execlists->virtual);
2236                         continue;
2237                 }
2238
2239                 GEM_BUG_ON(rq != ve->request);
2240                 GEM_BUG_ON(rq->engine != &ve->base);
2241                 GEM_BUG_ON(rq->context != &ve->context);
2242
2243                 if (rq_prio(rq) >= queue_prio(execlists)) {
2244                         if (!virtual_matches(ve, rq, engine)) {
2245                                 spin_unlock(&ve->base.active.lock);
2246                                 rb = rb_next(rb);
2247                                 continue;
2248                         }
2249
2250                         if (last && !can_merge_rq(last, rq)) {
2251                                 spin_unlock(&ve->base.active.lock);
2252                                 start_timeslice(engine, rq_prio(rq));
2253                                 return; /* leave this for another sibling */
2254                         }
2255
2256                         ENGINE_TRACE(engine,
2257                                      "virtual rq=%llx:%lld%s, new engine? %s\n",
2258                                      rq->fence.context,
2259                                      rq->fence.seqno,
2260                                      i915_request_completed(rq) ? "!" :
2261                                      i915_request_started(rq) ? "*" :
2262                                      "",
2263                                      yesno(engine != ve->siblings[0]));
2264
2265                         WRITE_ONCE(ve->request, NULL);
2266                         WRITE_ONCE(ve->base.execlists.queue_priority_hint,
2267                                    INT_MIN);
2268                         rb_erase_cached(rb, &execlists->virtual);
2269                         RB_CLEAR_NODE(rb);
2270
2271                         GEM_BUG_ON(!(rq->execution_mask & engine->mask));
2272                         WRITE_ONCE(rq->engine, engine);
2273
2274                         if (engine != ve->siblings[0]) {
2275                                 u32 *regs = ve->context.lrc_reg_state;
2276                                 unsigned int n;
2277
2278                                 GEM_BUG_ON(READ_ONCE(ve->context.inflight));
2279
2280                                 if (!intel_engine_has_relative_mmio(engine))
2281                                         virtual_update_register_offsets(regs,
2282                                                                         engine);
2283
2284                                 if (!list_empty(&ve->context.signals))
2285                                         virtual_xfer_breadcrumbs(ve);
2286
2287                                 /*
2288                                  * Move the bound engine to the top of the list
2289                                  * for future execution. We then kick this
2290                                  * tasklet first before checking others, so that
2291                                  * we preferentially reuse this set of bound
2292                                  * registers.
2293                                  */
2294                                 for (n = 1; n < ve->num_siblings; n++) {
2295                                         if (ve->siblings[n] == engine) {
2296                                                 swap(ve->siblings[n],
2297                                                      ve->siblings[0]);
2298                                                 break;
2299                                         }
2300                                 }
2301
2302                                 GEM_BUG_ON(ve->siblings[0] != engine);
2303                         }
2304
2305                         if (__i915_request_submit(rq)) {
2306                                 submit = true;
2307                                 last = rq;
2308                         }
2309                         i915_request_put(rq);
2310
2311                         /*
2312                          * Hmm, we have a bunch of virtual engine requests,
2313                          * but the first one was already completed (thanks
2314                          * preempt-to-busy!). Keep looking at the veng queue
2315                          * until we have no more relevant requests (i.e.
2316                          * the normal submit queue has higher priority).
2317                          */
2318                         if (!submit) {
2319                                 spin_unlock(&ve->base.active.lock);
2320                                 rb = rb_first_cached(&execlists->virtual);
2321                                 continue;
2322                         }
2323                 }
2324
2325                 spin_unlock(&ve->base.active.lock);
2326                 break;
2327         }
2328
2329         while ((rb = rb_first_cached(&execlists->queue))) {
2330                 struct i915_priolist *p = to_priolist(rb);
2331                 struct i915_request *rq, *rn;
2332                 int i;
2333
2334                 priolist_for_each_request_consume(rq, rn, p, i) {
2335                         bool merge = true;
2336
2337                         /*
2338                          * Can we combine this request with the current port?
2339                          * It has to be the same context/ringbuffer and not
2340                          * have any exceptions (e.g. GVT saying never to
2341                          * combine contexts).
2342                          *
2343                          * If we can combine the requests, we can execute both
2344                          * by updating the RING_TAIL to point to the end of the
2345                          * second request, and so we never need to tell the
2346                          * hardware about the first.
2347                          */
2348                         if (last && !can_merge_rq(last, rq)) {
2349                                 /*
2350                                  * If we are on the second port and cannot
2351                                  * combine this request with the last, then we
2352                                  * are done.
2353                                  */
2354                                 if (port == last_port)
2355                                         goto done;
2356
2357                                 /*
2358                                  * We must not populate both ELSP[] with the
2359                                  * same LRCA, i.e. we must submit 2 different
2360                                  * contexts if we submit 2 ELSP.
2361                                  */
2362                                 if (last->context == rq->context)
2363                                         goto done;
2364
2365                                 if (i915_request_has_sentinel(last))
2366                                         goto done;
2367
2368                                 /*
2369                                  * If GVT overrides us we only ever submit
2370                                  * port[0], leaving port[1] empty. Note that we
2371                                  * also have to be careful that we don't queue
2372                                  * the same context (even though a different
2373                                  * request) to the second port.
2374                                  */
2375                                 if (ctx_single_port_submission(last->context) ||
2376                                     ctx_single_port_submission(rq->context))
2377                                         goto done;
2378
2379                                 merge = false;
2380                         }
2381
2382                         if (__i915_request_submit(rq)) {
2383                                 if (!merge) {
2384                                         *port = execlists_schedule_in(last, port - execlists->pending);
2385                                         port++;
2386                                         last = NULL;
2387                                 }
2388
2389                                 GEM_BUG_ON(last &&
2390                                            !can_merge_ctx(last->context,
2391                                                           rq->context));
2392                                 GEM_BUG_ON(last &&
2393                                            i915_seqno_passed(last->fence.seqno,
2394                                                              rq->fence.seqno));
2395
2396                                 submit = true;
2397                                 last = rq;
2398                         }
2399                 }
2400
2401                 rb_erase_cached(&p->node, &execlists->queue);
2402                 i915_priolist_free(p);
2403         }
2404
2405 done:
2406         /*
2407          * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
2408          *
2409          * We choose the priority hint such that if we add a request of greater
2410          * priority than this, we kick the submission tasklet to decide on
2411          * the right order of submitting the requests to hardware. We must
2412          * also be prepared to reorder requests as they are in-flight on the
2413          * HW. We derive the priority hint then as the first "hole" in
2414          * the HW submission ports and if there are no available slots,
2415          * the priority of the lowest executing request, i.e. last.
2416          *
2417          * When we do receive a higher priority request ready to run from the
2418          * user, see queue_request(), the priority hint is bumped to that
2419          * request triggering preemption on the next dequeue (or subsequent
2420          * interrupt for secondary ports).
2421          */
2422         execlists->queue_priority_hint = queue_prio(execlists);
2423
2424         if (submit) {
2425                 *port = execlists_schedule_in(last, port - execlists->pending);
2426                 execlists->switch_priority_hint =
2427                         switch_prio(engine, *execlists->pending);
2428
2429                 /*
2430                  * Skip if we ended up with exactly the same set of requests,
2431                  * e.g. trying to timeslice a pair of ordered contexts
2432                  */
2433                 if (!memcmp(active, execlists->pending,
2434                             (port - execlists->pending + 1) * sizeof(*port))) {
2435                         do
2436                                 execlists_schedule_out(fetch_and_zero(port));
2437                         while (port-- != execlists->pending);
2438
2439                         goto skip_submit;
2440                 }
2441                 clear_ports(port + 1, last_port - port);
2442
2443                 WRITE_ONCE(execlists->yield, -1);
2444                 set_preempt_timeout(engine, *active);
2445                 execlists_submit_ports(engine);
2446         } else {
2447                 start_timeslice(engine, execlists->queue_priority_hint);
2448 skip_submit:
2449                 ring_set_paused(engine, 0);
2450         }
2451 }
2452
2453 static void
2454 cancel_port_requests(struct intel_engine_execlists * const execlists)
2455 {
2456         struct i915_request * const *port;
2457
2458         for (port = execlists->pending; *port; port++)
2459                 execlists_schedule_out(*port);
2460         clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending));
2461
2462         /* Mark the end of active before we overwrite *active */
2463         for (port = xchg(&execlists->active, execlists->pending); *port; port++)
2464                 execlists_schedule_out(*port);
2465         clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight));
2466
2467         smp_wmb(); /* complete the seqlock for execlists_active() */
2468         WRITE_ONCE(execlists->active, execlists->inflight);
2469 }
2470
2471 static inline void
2472 invalidate_csb_entries(const u32 *first, const u32 *last)
2473 {
2474         clflush((void *)first);
2475         clflush((void *)last);
2476 }
2477
2478 /*
2479  * Starting with Gen12, the status has a new format:
2480  *
2481  *     bit  0:     switched to new queue
2482  *     bit  1:     reserved
2483  *     bit  2:     semaphore wait mode (poll or signal), only valid when
2484  *                 switch detail is set to "wait on semaphore"
2485  *     bits 3-5:   engine class
2486  *     bits 6-11:  engine instance
2487  *     bits 12-14: reserved
2488  *     bits 15-25: sw context id of the lrc the GT switched to
2489  *     bits 26-31: sw counter of the lrc the GT switched to
2490  *     bits 32-35: context switch detail
2491  *                  - 0: ctx complete
2492  *                  - 1: wait on sync flip
2493  *                  - 2: wait on vblank
2494  *                  - 3: wait on scanline
2495  *                  - 4: wait on semaphore
2496  *                  - 5: context preempted (not on SEMAPHORE_WAIT or
2497  *                       WAIT_FOR_EVENT)
2498  *     bit  36:    reserved
2499  *     bits 37-43: wait detail (for switch detail 1 to 4)
2500  *     bits 44-46: reserved
2501  *     bits 47-57: sw context id of the lrc the GT switched away from
2502  *     bits 58-63: sw counter of the lrc the GT switched away from
2503  */
2504 static inline bool
2505 gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
2506 {
2507         u32 lower_dw = csb[0];
2508         u32 upper_dw = csb[1];
2509         bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw);
2510         bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
2511         bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
2512
2513         /*
2514          * The context switch detail is not guaranteed to be 5 when a preemption
2515          * occurs, so we can't just check for that. The check below works for
2516          * all the cases we care about, including preemptions of WAIT
2517          * instructions and lite-restore. Preempt-to-idle via the CTRL register
2518          * would require some extra handling, but we don't support that.
2519          */
2520         if (!ctx_away_valid || new_queue) {
2521                 GEM_BUG_ON(!ctx_to_valid);
2522                 return true;
2523         }
2524
2525         /*
2526          * switch detail = 5 is covered by the case above and we do not expect a
2527          * context switch on an unsuccessful wait instruction since we always
2528          * use polling mode.
2529          */
2530         GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
2531         return false;
2532 }
2533
2534 static inline bool
2535 gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
2536 {
2537         return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
2538 }
2539
2540 static void process_csb(struct intel_engine_cs *engine)
2541 {
2542         struct intel_engine_execlists * const execlists = &engine->execlists;
2543         const u32 * const buf = execlists->csb_status;
2544         const u8 num_entries = execlists->csb_size;
2545         u8 head, tail;
2546
2547         /*
2548          * As we modify our execlists state tracking we require exclusive
2549          * access. Either we are inside the tasklet, or the tasklet is disabled
2550          * and we assume that is only inside the reset paths and so serialised.
2551          */
2552         GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) &&
2553                    !reset_in_progress(execlists));
2554         GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine));
2555
2556         /*
2557          * Note that csb_write, csb_status may be either in HWSP or mmio.
2558          * When reading from the csb_write mmio register, we have to be
2559          * careful to only use the GEN8_CSB_WRITE_PTR portion, which is
2560          * the low 4bits. As it happens we know the next 4bits are always
2561          * zero and so we can simply masked off the low u8 of the register
2562          * and treat it identically to reading from the HWSP (without having
2563          * to use explicit shifting and masking, and probably bifurcating
2564          * the code to handle the legacy mmio read).
2565          */
2566         head = execlists->csb_head;
2567         tail = READ_ONCE(*execlists->csb_write);
2568         if (unlikely(head == tail))
2569                 return;
2570
2571         /*
2572          * We will consume all events from HW, or at least pretend to.
2573          *
2574          * The sequence of events from the HW is deterministic, and derived
2575          * from our writes to the ELSP, with a smidgen of variability for
2576          * the arrival of the asynchronous requests wrt to the inflight
2577          * execution. If the HW sends an event that does not correspond with
2578          * the one we are expecting, we have to abandon all hope as we lose
2579          * all tracking of what the engine is actually executing. We will
2580          * only detect we are out of sequence with the HW when we get an
2581          * 'impossible' event because we have already drained our own
2582          * preemption/promotion queue. If this occurs, we know that we likely
2583          * lost track of execution earlier and must unwind and restart, the
2584          * simplest way is by stop processing the event queue and force the
2585          * engine to reset.
2586          */
2587         execlists->csb_head = tail;
2588         ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
2589
2590         /*
2591          * Hopefully paired with a wmb() in HW!
2592          *
2593          * We must complete the read of the write pointer before any reads
2594          * from the CSB, so that we do not see stale values. Without an rmb
2595          * (lfence) the HW may speculatively perform the CSB[] reads *before*
2596          * we perform the READ_ONCE(*csb_write).
2597          */
2598         rmb();
2599         do {
2600                 bool promote;
2601
2602                 if (++head == num_entries)
2603                         head = 0;
2604
2605                 /*
2606                  * We are flying near dragons again.
2607                  *
2608                  * We hold a reference to the request in execlist_port[]
2609                  * but no more than that. We are operating in softirq
2610                  * context and so cannot hold any mutex or sleep. That
2611                  * prevents us stopping the requests we are processing
2612                  * in port[] from being retired simultaneously (the
2613                  * breadcrumb will be complete before we see the
2614                  * context-switch). As we only hold the reference to the
2615                  * request, any pointer chasing underneath the request
2616                  * is subject to a potential use-after-free. Thus we
2617                  * store all of the bookkeeping within port[] as
2618                  * required, and avoid using unguarded pointers beneath
2619                  * request itself. The same applies to the atomic
2620                  * status notifier.
2621                  */
2622
2623                 ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
2624                              head, buf[2 * head + 0], buf[2 * head + 1]);
2625
2626                 if (INTEL_GEN(engine->i915) >= 12)
2627                         promote = gen12_csb_parse(execlists, buf + 2 * head);
2628                 else
2629                         promote = gen8_csb_parse(execlists, buf + 2 * head);
2630                 if (promote) {
2631                         struct i915_request * const *old = execlists->active;
2632
2633                         if (GEM_WARN_ON(!*execlists->pending)) {
2634                                 execlists->error_interrupt |= ERROR_CSB;
2635                                 break;
2636                         }
2637
2638                         ring_set_paused(engine, 0);
2639
2640                         /* Point active to the new ELSP; prevent overwriting */
2641                         WRITE_ONCE(execlists->active, execlists->pending);
2642                         smp_wmb(); /* notify execlists_active() */
2643
2644                         /* cancel old inflight, prepare for switch */
2645                         trace_ports(execlists, "preempted", old);
2646                         while (*old)
2647                                 execlists_schedule_out(*old++);
2648
2649                         /* switch pending to inflight */
2650                         GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
2651                         memcpy(execlists->inflight,
2652                                execlists->pending,
2653                                execlists_num_ports(execlists) *
2654                                sizeof(*execlists->pending));
2655                         smp_wmb(); /* complete the seqlock */
2656                         WRITE_ONCE(execlists->active, execlists->inflight);
2657
2658                         WRITE_ONCE(execlists->pending[0], NULL);
2659                 } else {
2660                         if (GEM_WARN_ON(!*execlists->active)) {
2661                                 execlists->error_interrupt |= ERROR_CSB;
2662                                 break;
2663                         }
2664
2665                         /* port0 completed, advanced to port1 */
2666                         trace_ports(execlists, "completed", execlists->active);
2667
2668                         /*
2669                          * We rely on the hardware being strongly
2670                          * ordered, that the breadcrumb write is
2671                          * coherent (visible from the CPU) before the
2672                          * user interrupt is processed. One might assume
2673                          * that the breadcrumb write being before the
2674                          * user interrupt and the CS event for the context
2675                          * switch would therefore be before the CS event
2676                          * itself...
2677                          */
2678                         if (GEM_SHOW_DEBUG() &&
2679                             !i915_request_completed(*execlists->active)) {
2680                                 struct i915_request *rq = *execlists->active;
2681                                 const u32 *regs __maybe_unused =
2682                                         rq->context->lrc_reg_state;
2683
2684                                 ENGINE_TRACE(engine,
2685                                              "context completed before request!\n");
2686                                 ENGINE_TRACE(engine,
2687                                              "ring:{start:0x%08x, head:%04x, tail:%04x, ctl:%08x, mode:%08x}\n",
2688                                              ENGINE_READ(engine, RING_START),
2689                                              ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR,
2690                                              ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR,
2691                                              ENGINE_READ(engine, RING_CTL),
2692                                              ENGINE_READ(engine, RING_MI_MODE));
2693                                 ENGINE_TRACE(engine,
2694                                              "rq:{start:%08x, head:%04x, tail:%04x, seqno:%llx:%d, hwsp:%d}, ",
2695                                              i915_ggtt_offset(rq->ring->vma),
2696                                              rq->head, rq->tail,
2697                                              rq->fence.context,
2698                                              lower_32_bits(rq->fence.seqno),
2699                                              hwsp_seqno(rq));
2700                                 ENGINE_TRACE(engine,
2701                                              "ctx:{start:%08x, head:%04x, tail:%04x}, ",
2702                                              regs[CTX_RING_START],
2703                                              regs[CTX_RING_HEAD],
2704                                              regs[CTX_RING_TAIL]);
2705                         }
2706
2707                         execlists_schedule_out(*execlists->active++);
2708
2709                         GEM_BUG_ON(execlists->active - execlists->inflight >
2710                                    execlists_num_ports(execlists));
2711                 }
2712         } while (head != tail);
2713
2714         set_timeslice(engine);
2715
2716         /*
2717          * Gen11 has proven to fail wrt global observation point between
2718          * entry and tail update, failing on the ordering and thus
2719          * we see an old entry in the context status buffer.
2720          *
2721          * Forcibly evict out entries for the next gpu csb update,
2722          * to increase the odds that we get a fresh entries with non
2723          * working hardware. The cost for doing so comes out mostly with
2724          * the wash as hardware, working or not, will need to do the
2725          * invalidation before.
2726          */
2727         invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
2728 }
2729
2730 static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
2731 {
2732         lockdep_assert_held(&engine->active.lock);
2733         if (!READ_ONCE(engine->execlists.pending[0])) {
2734                 rcu_read_lock(); /* protect peeking at execlists->active */
2735                 execlists_dequeue(engine);
2736                 rcu_read_unlock();
2737         }
2738 }
2739
2740 static void __execlists_hold(struct i915_request *rq)
2741 {
2742         LIST_HEAD(list);
2743
2744         do {
2745                 struct i915_dependency *p;
2746
2747                 if (i915_request_is_active(rq))
2748                         __i915_request_unsubmit(rq);
2749
2750                 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2751                 list_move_tail(&rq->sched.link, &rq->engine->active.hold);
2752                 i915_request_set_hold(rq);
2753                 RQ_TRACE(rq, "on hold\n");
2754
2755                 for_each_waiter(p, rq) {
2756                         struct i915_request *w =
2757                                 container_of(p->waiter, typeof(*w), sched);
2758
2759                         /* Leave semaphores spinning on the other engines */
2760                         if (w->engine != rq->engine)
2761                                 continue;
2762
2763                         if (!i915_request_is_ready(w))
2764                                 continue;
2765
2766                         if (i915_request_completed(w))
2767                                 continue;
2768
2769                         if (i915_request_on_hold(w))
2770                                 continue;
2771
2772                         list_move_tail(&w->sched.link, &list);
2773                 }
2774
2775                 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
2776         } while (rq);
2777 }
2778
2779 static bool execlists_hold(struct intel_engine_cs *engine,
2780                            struct i915_request *rq)
2781 {
2782         spin_lock_irq(&engine->active.lock);
2783
2784         if (i915_request_completed(rq)) { /* too late! */
2785                 rq = NULL;
2786                 goto unlock;
2787         }
2788
2789         if (rq->engine != engine) { /* preempted virtual engine */
2790                 struct virtual_engine *ve = to_virtual_engine(rq->engine);
2791
2792                 /*
2793                  * intel_context_inflight() is only protected by virtue
2794                  * of process_csb() being called only by the tasklet (or
2795                  * directly from inside reset while the tasklet is suspended).
2796                  * Assert that neither of those are allowed to run while we
2797                  * poke at the request queues.
2798                  */
2799                 GEM_BUG_ON(!reset_in_progress(&engine->execlists));
2800
2801                 /*
2802                  * An unsubmitted request along a virtual engine will
2803                  * remain on the active (this) engine until we are able
2804                  * to process the context switch away (and so mark the
2805                  * context as no longer in flight). That cannot have happened
2806                  * yet, otherwise we would not be hanging!
2807                  */
2808                 spin_lock(&ve->base.active.lock);
2809                 GEM_BUG_ON(intel_context_inflight(rq->context) != engine);
2810                 GEM_BUG_ON(ve->request != rq);
2811                 ve->request = NULL;
2812                 spin_unlock(&ve->base.active.lock);
2813                 i915_request_put(rq);
2814
2815                 rq->engine = engine;
2816         }
2817
2818         /*
2819          * Transfer this request onto the hold queue to prevent it
2820          * being resumbitted to HW (and potentially completed) before we have
2821          * released it. Since we may have already submitted following
2822          * requests, we need to remove those as well.
2823          */
2824         GEM_BUG_ON(i915_request_on_hold(rq));
2825         GEM_BUG_ON(rq->engine != engine);
2826         __execlists_hold(rq);
2827         GEM_BUG_ON(list_empty(&engine->active.hold));
2828
2829 unlock:
2830         spin_unlock_irq(&engine->active.lock);
2831         return rq;
2832 }
2833
2834 static bool hold_request(const struct i915_request *rq)
2835 {
2836         struct i915_dependency *p;
2837         bool result = false;
2838
2839         /*
2840          * If one of our ancestors is on hold, we must also be on hold,
2841          * otherwise we will bypass it and execute before it.
2842          */
2843         rcu_read_lock();
2844         for_each_signaler(p, rq) {
2845                 const struct i915_request *s =
2846                         container_of(p->signaler, typeof(*s), sched);
2847
2848                 if (s->engine != rq->engine)
2849                         continue;
2850
2851                 result = i915_request_on_hold(s);
2852                 if (result)
2853                         break;
2854         }
2855         rcu_read_unlock();
2856
2857         return result;
2858 }
2859
2860 static void __execlists_unhold(struct i915_request *rq)
2861 {
2862         LIST_HEAD(list);
2863
2864         do {
2865                 struct i915_dependency *p;
2866
2867                 RQ_TRACE(rq, "hold release\n");
2868
2869                 GEM_BUG_ON(!i915_request_on_hold(rq));
2870                 GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
2871
2872                 i915_request_clear_hold(rq);
2873                 list_move_tail(&rq->sched.link,
2874                                i915_sched_lookup_priolist(rq->engine,
2875                                                           rq_prio(rq)));
2876                 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2877
2878                 /* Also release any children on this engine that are ready */
2879                 for_each_waiter(p, rq) {
2880                         struct i915_request *w =
2881                                 container_of(p->waiter, typeof(*w), sched);
2882
2883                         /* Propagate any change in error status */
2884                         if (rq->fence.error)
2885                                 i915_request_set_error_once(w, rq->fence.error);
2886
2887                         if (w->engine != rq->engine)
2888                                 continue;
2889
2890                         if (!i915_request_on_hold(w))
2891                                 continue;
2892
2893                         /* Check that no other parents are also on hold */
2894                         if (hold_request(w))
2895                                 continue;
2896
2897                         list_move_tail(&w->sched.link, &list);
2898                 }
2899
2900                 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
2901         } while (rq);
2902 }
2903
2904 static void execlists_unhold(struct intel_engine_cs *engine,
2905                              struct i915_request *rq)
2906 {
2907         spin_lock_irq(&engine->active.lock);
2908
2909         /*
2910          * Move this request back to the priority queue, and all of its
2911          * children and grandchildren that were suspended along with it.
2912          */
2913         __execlists_unhold(rq);
2914
2915         if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
2916                 engine->execlists.queue_priority_hint = rq_prio(rq);
2917                 tasklet_hi_schedule(&engine->execlists.tasklet);
2918         }
2919
2920         spin_unlock_irq(&engine->active.lock);
2921 }
2922
2923 struct execlists_capture {
2924         struct work_struct work;
2925         struct i915_request *rq;
2926         struct i915_gpu_coredump *error;
2927 };
2928
2929 static void execlists_capture_work(struct work_struct *work)
2930 {
2931         struct execlists_capture *cap = container_of(work, typeof(*cap), work);
2932         const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
2933         struct intel_engine_cs *engine = cap->rq->engine;
2934         struct intel_gt_coredump *gt = cap->error->gt;
2935         struct intel_engine_capture_vma *vma;
2936
2937         /* Compress all the objects attached to the request, slow! */
2938         vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
2939         if (vma) {
2940                 struct i915_vma_compress *compress =
2941                         i915_vma_capture_prepare(gt);
2942
2943                 intel_engine_coredump_add_vma(gt->engine, vma, compress);
2944                 i915_vma_capture_finish(gt, compress);
2945         }
2946
2947         gt->simulated = gt->engine->simulated;
2948         cap->error->simulated = gt->simulated;
2949
2950         /* Publish the error state, and announce it to the world */
2951         i915_error_state_store(cap->error);
2952         i915_gpu_coredump_put(cap->error);
2953
2954         /* Return this request and all that depend upon it for signaling */
2955         execlists_unhold(engine, cap->rq);
2956         i915_request_put(cap->rq);
2957
2958         kfree(cap);
2959 }
2960
2961 static struct execlists_capture *capture_regs(struct intel_engine_cs *engine)
2962 {
2963         const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
2964         struct execlists_capture *cap;
2965
2966         cap = kmalloc(sizeof(*cap), gfp);
2967         if (!cap)
2968                 return NULL;
2969
2970         cap->error = i915_gpu_coredump_alloc(engine->i915, gfp);
2971         if (!cap->error)
2972                 goto err_cap;
2973
2974         cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp);
2975         if (!cap->error->gt)
2976                 goto err_gpu;
2977
2978         cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp);
2979         if (!cap->error->gt->engine)
2980                 goto err_gt;
2981
2982         return cap;
2983
2984 err_gt:
2985         kfree(cap->error->gt);
2986 err_gpu:
2987         kfree(cap->error);
2988 err_cap:
2989         kfree(cap);
2990         return NULL;
2991 }
2992
2993 static struct i915_request *
2994 active_context(struct intel_engine_cs *engine, u32 ccid)
2995 {
2996         const struct intel_engine_execlists * const el = &engine->execlists;
2997         struct i915_request * const *port, *rq;
2998
2999         /*
3000          * Use the most recent result from process_csb(), but just in case
3001          * we trigger an error (via interrupt) before the first CS event has
3002          * been written, peek at the next submission.
3003          */
3004
3005         for (port = el->active; (rq = *port); port++) {
3006                 if (rq->context->lrc.ccid == ccid) {
3007                         ENGINE_TRACE(engine,
3008                                      "ccid found at active:%zd\n",
3009                                      port - el->active);
3010                         return rq;
3011                 }
3012         }
3013
3014         for (port = el->pending; (rq = *port); port++) {
3015                 if (rq->context->lrc.ccid == ccid) {
3016                         ENGINE_TRACE(engine,
3017                                      "ccid found at pending:%zd\n",
3018                                      port - el->pending);
3019                         return rq;
3020                 }
3021         }
3022
3023         ENGINE_TRACE(engine, "ccid:%x not found\n", ccid);
3024         return NULL;
3025 }
3026
3027 static u32 active_ccid(struct intel_engine_cs *engine)
3028 {
3029         return ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI);
3030 }
3031
3032 static void execlists_capture(struct intel_engine_cs *engine)
3033 {
3034         struct execlists_capture *cap;
3035
3036         if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR))
3037                 return;
3038
3039         /*
3040          * We need to _quickly_ capture the engine state before we reset.
3041          * We are inside an atomic section (softirq) here and we are delaying
3042          * the forced preemption event.
3043          */
3044         cap = capture_regs(engine);
3045         if (!cap)
3046                 return;
3047
3048         spin_lock_irq(&engine->active.lock);
3049         cap->rq = active_context(engine, active_ccid(engine));
3050         if (cap->rq) {
3051                 cap->rq = active_request(cap->rq->context->timeline, cap->rq);
3052                 cap->rq = i915_request_get_rcu(cap->rq);
3053         }
3054         spin_unlock_irq(&engine->active.lock);
3055         if (!cap->rq)
3056                 goto err_free;
3057
3058         /*
3059          * Remove the request from the execlists queue, and take ownership
3060          * of the request. We pass it to our worker who will _slowly_ compress
3061          * all the pages the _user_ requested for debugging their batch, after
3062          * which we return it to the queue for signaling.
3063          *
3064          * By removing them from the execlists queue, we also remove the
3065          * requests from being processed by __unwind_incomplete_requests()
3066          * during the intel_engine_reset(), and so they will *not* be replayed
3067          * afterwards.
3068          *
3069          * Note that because we have not yet reset the engine at this point,
3070          * it is possible for the request that we have identified as being
3071          * guilty, did in fact complete and we will then hit an arbitration
3072          * point allowing the outstanding preemption to succeed. The likelihood
3073          * of that is very low (as capturing of the engine registers should be
3074          * fast enough to run inside an irq-off atomic section!), so we will
3075          * simply hold that request accountable for being non-preemptible
3076          * long enough to force the reset.
3077          */
3078         if (!execlists_hold(engine, cap->rq))
3079                 goto err_rq;
3080
3081         INIT_WORK(&cap->work, execlists_capture_work);
3082         schedule_work(&cap->work);
3083         return;
3084
3085 err_rq:
3086         i915_request_put(cap->rq);
3087 err_free:
3088         i915_gpu_coredump_put(cap->error);
3089         kfree(cap);
3090 }
3091
3092 static void execlists_reset(struct intel_engine_cs *engine, const char *msg)
3093 {
3094         const unsigned int bit = I915_RESET_ENGINE + engine->id;
3095         unsigned long *lock = &engine->gt->reset.flags;
3096
3097         if (!intel_has_reset_engine(engine->gt))
3098                 return;
3099
3100         if (test_and_set_bit(bit, lock))
3101                 return;
3102
3103         ENGINE_TRACE(engine, "reset for %s\n", msg);
3104
3105         /* Mark this tasklet as disabled to avoid waiting for it to complete */
3106         tasklet_disable_nosync(&engine->execlists.tasklet);
3107
3108         ring_set_paused(engine, 1); /* Freeze the current request in place */
3109         execlists_capture(engine);
3110         intel_engine_reset(engine, msg);
3111
3112         tasklet_enable(&engine->execlists.tasklet);
3113         clear_and_wake_up_bit(bit, lock);
3114 }
3115
3116 static bool preempt_timeout(const struct intel_engine_cs *const engine)
3117 {
3118         const struct timer_list *t = &engine->execlists.preempt;
3119
3120         if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
3121                 return false;
3122
3123         if (!timer_expired(t))
3124                 return false;
3125
3126         return READ_ONCE(engine->execlists.pending[0]);
3127 }
3128
3129 /*
3130  * Check the unread Context Status Buffers and manage the submission of new
3131  * contexts to the ELSP accordingly.
3132  */
3133 static void execlists_submission_tasklet(unsigned long data)
3134 {
3135         struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
3136         bool timeout = preempt_timeout(engine);
3137
3138         process_csb(engine);
3139
3140         if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) {
3141                 const char *msg;
3142
3143                 /* Generate the error message in priority wrt to the user! */
3144                 if (engine->execlists.error_interrupt & GENMASK(15, 0))
3145                         msg = "CS error"; /* thrown by a user payload */
3146                 else if (engine->execlists.error_interrupt & ERROR_CSB)
3147                         msg = "invalid CSB event";
3148                 else
3149                         msg = "internal error";
3150
3151                 engine->execlists.error_interrupt = 0;
3152                 execlists_reset(engine, msg);
3153         }
3154
3155         if (!READ_ONCE(engine->execlists.pending[0]) || timeout) {
3156                 unsigned long flags;
3157
3158                 spin_lock_irqsave(&engine->active.lock, flags);
3159                 __execlists_submission_tasklet(engine);
3160                 spin_unlock_irqrestore(&engine->active.lock, flags);
3161
3162                 /* Recheck after serialising with direct-submission */
3163                 if (unlikely(timeout && preempt_timeout(engine)))
3164                         execlists_reset(engine, "preemption time out");
3165         }
3166 }
3167
3168 static void __execlists_kick(struct intel_engine_execlists *execlists)
3169 {
3170         /* Kick the tasklet for some interrupt coalescing and reset handling */
3171         tasklet_hi_schedule(&execlists->tasklet);
3172 }
3173
3174 #define execlists_kick(t, member) \
3175         __execlists_kick(container_of(t, struct intel_engine_execlists, member))
3176
3177 static void execlists_timeslice(struct timer_list *timer)
3178 {
3179         execlists_kick(timer, timer);
3180 }
3181
3182 static void execlists_preempt(struct timer_list *timer)
3183 {
3184         execlists_kick(timer, preempt);
3185 }
3186
3187 static void queue_request(struct intel_engine_cs *engine,
3188                           struct i915_request *rq)
3189 {
3190         GEM_BUG_ON(!list_empty(&rq->sched.link));
3191         list_add_tail(&rq->sched.link,
3192                       i915_sched_lookup_priolist(engine, rq_prio(rq)));
3193         set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
3194 }
3195
3196 static void __submit_queue_imm(struct intel_engine_cs *engine)
3197 {
3198         struct intel_engine_execlists * const execlists = &engine->execlists;
3199
3200         if (reset_in_progress(execlists))
3201                 return; /* defer until we restart the engine following reset */
3202
3203         __execlists_submission_tasklet(engine);
3204 }
3205
3206 static void submit_queue(struct intel_engine_cs *engine,
3207                          const struct i915_request *rq)
3208 {
3209         struct intel_engine_execlists *execlists = &engine->execlists;
3210
3211         if (rq_prio(rq) <= execlists->queue_priority_hint)
3212                 return;
3213
3214         execlists->queue_priority_hint = rq_prio(rq);
3215         __submit_queue_imm(engine);
3216 }
3217
3218 static bool ancestor_on_hold(const struct intel_engine_cs *engine,
3219                              const struct i915_request *rq)
3220 {
3221         GEM_BUG_ON(i915_request_on_hold(rq));
3222         return !list_empty(&engine->active.hold) && hold_request(rq);
3223 }
3224
3225 static void flush_csb(struct intel_engine_cs *engine)
3226 {
3227         struct intel_engine_execlists *el = &engine->execlists;
3228
3229         if (READ_ONCE(el->pending[0]) && tasklet_trylock(&el->tasklet)) {
3230                 if (!reset_in_progress(el))
3231                         process_csb(engine);
3232                 tasklet_unlock(&el->tasklet);
3233         }
3234 }
3235
3236 static void execlists_submit_request(struct i915_request *request)
3237 {
3238         struct intel_engine_cs *engine = request->engine;
3239         unsigned long flags;
3240
3241         /* Hopefully we clear execlists->pending[] to let us through */
3242         flush_csb(engine);
3243
3244         /* Will be called from irq-context when using foreign fences. */
3245         spin_lock_irqsave(&engine->active.lock, flags);
3246
3247         if (unlikely(ancestor_on_hold(engine, request))) {
3248                 RQ_TRACE(request, "ancestor on hold\n");
3249                 list_add_tail(&request->sched.link, &engine->active.hold);
3250                 i915_request_set_hold(request);
3251         } else {
3252                 queue_request(engine, request);
3253
3254                 GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
3255                 GEM_BUG_ON(list_empty(&request->sched.link));
3256
3257                 submit_queue(engine, request);
3258         }
3259
3260         spin_unlock_irqrestore(&engine->active.lock, flags);
3261 }
3262
3263 static void __execlists_context_fini(struct intel_context *ce)
3264 {
3265         intel_ring_put(ce->ring);
3266         i915_vma_put(ce->state);
3267 }
3268
3269 static void execlists_context_destroy(struct kref *kref)
3270 {
3271         struct intel_context *ce = container_of(kref, typeof(*ce), ref);
3272
3273         GEM_BUG_ON(!i915_active_is_idle(&ce->active));
3274         GEM_BUG_ON(intel_context_is_pinned(ce));
3275
3276         if (ce->state)
3277                 __execlists_context_fini(ce);
3278
3279         intel_context_fini(ce);
3280         intel_context_free(ce);
3281 }
3282
3283 static void
3284 set_redzone(void *vaddr, const struct intel_engine_cs *engine)
3285 {
3286         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
3287                 return;
3288
3289         vaddr += engine->context_size;
3290
3291         memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
3292 }
3293
3294 static void
3295 check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
3296 {
3297         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
3298                 return;
3299
3300         vaddr += engine->context_size;
3301
3302         if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
3303                 drm_err_once(&engine->i915->drm,
3304                              "%s context redzone overwritten!\n",
3305                              engine->name);
3306 }
3307
3308 static void execlists_context_unpin(struct intel_context *ce)
3309 {
3310         check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
3311                       ce->engine);
3312
3313         i915_gem_object_unpin_map(ce->state->obj);
3314 }
3315
3316 static u32 *
3317 gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs)
3318 {
3319         *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
3320                 MI_SRM_LRM_GLOBAL_GTT |
3321                 MI_LRI_LRM_CS_MMIO;
3322         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3323         *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
3324                 CTX_TIMESTAMP * sizeof(u32);
3325         *cs++ = 0;
3326
3327         *cs++ = MI_LOAD_REGISTER_REG |
3328                 MI_LRR_SOURCE_CS_MMIO |
3329                 MI_LRI_LRM_CS_MMIO;
3330         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3331         *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
3332
3333         *cs++ = MI_LOAD_REGISTER_REG |
3334                 MI_LRR_SOURCE_CS_MMIO |
3335                 MI_LRI_LRM_CS_MMIO;
3336         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3337         *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
3338
3339         return cs;
3340 }
3341
3342 static u32 *
3343 gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs)
3344 {
3345         GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1);
3346
3347         *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
3348                 MI_SRM_LRM_GLOBAL_GTT |
3349                 MI_LRI_LRM_CS_MMIO;
3350         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3351         *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
3352                 (lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32);
3353         *cs++ = 0;
3354
3355         return cs;
3356 }
3357
3358 static u32 *
3359 gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
3360 {
3361         GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1);
3362
3363         *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
3364                 MI_SRM_LRM_GLOBAL_GTT |
3365                 MI_LRI_LRM_CS_MMIO;
3366         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3367         *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
3368                 (lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32);
3369         *cs++ = 0;
3370
3371         *cs++ = MI_LOAD_REGISTER_REG |
3372                 MI_LRR_SOURCE_CS_MMIO |
3373                 MI_LRI_LRM_CS_MMIO;
3374         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3375         *cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0));
3376
3377         return cs;
3378 }
3379
3380 static u32 *
3381 gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
3382 {
3383         cs = gen12_emit_timestamp_wa(ce, cs);
3384         cs = gen12_emit_cmd_buf_wa(ce, cs);
3385         cs = gen12_emit_restore_scratch(ce, cs);
3386
3387         return cs;
3388 }
3389
3390 static u32 *
3391 gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
3392 {
3393         cs = gen12_emit_timestamp_wa(ce, cs);
3394         cs = gen12_emit_restore_scratch(ce, cs);
3395
3396         return cs;
3397 }
3398
3399 static inline u32 context_wa_bb_offset(const struct intel_context *ce)
3400 {
3401         return PAGE_SIZE * ce->wa_bb_page;
3402 }
3403
3404 static u32 *context_indirect_bb(const struct intel_context *ce)
3405 {
3406         void *ptr;
3407
3408         GEM_BUG_ON(!ce->wa_bb_page);
3409
3410         ptr = ce->lrc_reg_state;
3411         ptr -= LRC_STATE_OFFSET; /* back to start of context image */
3412         ptr += context_wa_bb_offset(ce);
3413
3414         return ptr;
3415 }
3416
3417 static void
3418 setup_indirect_ctx_bb(const struct intel_context *ce,
3419                       const struct intel_engine_cs *engine,
3420                       u32 *(*emit)(const struct intel_context *, u32 *))
3421 {
3422         u32 * const start = context_indirect_bb(ce);
3423         u32 *cs;
3424
3425         cs = emit(ce, start);
3426         GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
3427         while ((unsigned long)cs % CACHELINE_BYTES)
3428                 *cs++ = MI_NOOP;
3429
3430         lrc_ring_setup_indirect_ctx(ce->lrc_reg_state, engine,
3431                                     i915_ggtt_offset(ce->state) +
3432                                     context_wa_bb_offset(ce),
3433                                     (cs - start) * sizeof(*cs));
3434 }
3435
3436 static void
3437 __execlists_update_reg_state(const struct intel_context *ce,
3438                              const struct intel_engine_cs *engine,
3439                              u32 head)
3440 {
3441         struct intel_ring *ring = ce->ring;
3442         u32 *regs = ce->lrc_reg_state;
3443
3444         GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
3445         GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
3446
3447         regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
3448         regs[CTX_RING_HEAD] = head;
3449         regs[CTX_RING_TAIL] = ring->tail;
3450         regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
3451
3452         /* RPCS */
3453         if (engine->class == RENDER_CLASS) {
3454                 regs[CTX_R_PWR_CLK_STATE] =
3455                         intel_sseu_make_rpcs(engine->gt, &ce->sseu);
3456
3457                 i915_oa_init_reg_state(ce, engine);
3458         }
3459
3460         if (ce->wa_bb_page) {
3461                 u32 *(*fn)(const struct intel_context *ce, u32 *cs);
3462
3463                 fn = gen12_emit_indirect_ctx_xcs;
3464                 if (ce->engine->class == RENDER_CLASS)
3465                         fn = gen12_emit_indirect_ctx_rcs;
3466
3467                 /* Mutually exclusive wrt to global indirect bb */
3468                 GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
3469                 setup_indirect_ctx_bb(ce, engine, fn);
3470         }
3471 }
3472
3473 static int
3474 __execlists_context_pin(struct intel_context *ce,
3475                         struct intel_engine_cs *engine)
3476 {
3477         void *vaddr;
3478
3479         GEM_BUG_ON(!ce->state);
3480         GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
3481
3482         vaddr = i915_gem_object_pin_map(ce->state->obj,
3483                                         i915_coherent_map_type(engine->i915) |
3484                                         I915_MAP_OVERRIDE);
3485         if (IS_ERR(vaddr))
3486                 return PTR_ERR(vaddr);
3487
3488         ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
3489         ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
3490         __execlists_update_reg_state(ce, engine, ce->ring->tail);
3491
3492         return 0;
3493 }
3494
3495 static int execlists_context_pin(struct intel_context *ce)
3496 {
3497         return __execlists_context_pin(ce, ce->engine);
3498 }
3499
3500 static int execlists_context_alloc(struct intel_context *ce)
3501 {
3502         return __execlists_context_alloc(ce, ce->engine);
3503 }
3504
3505 static void execlists_context_reset(struct intel_context *ce)
3506 {
3507         CE_TRACE(ce, "reset\n");
3508         GEM_BUG_ON(!intel_context_is_pinned(ce));
3509
3510         intel_ring_reset(ce->ring, ce->ring->emit);
3511
3512         /* Scrub away the garbage */
3513         execlists_init_reg_state(ce->lrc_reg_state,
3514                                  ce, ce->engine, ce->ring, true);
3515         __execlists_update_reg_state(ce, ce->engine, ce->ring->tail);
3516
3517         ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
3518 }
3519
3520 static const struct intel_context_ops execlists_context_ops = {
3521         .alloc = execlists_context_alloc,
3522
3523         .pin = execlists_context_pin,
3524         .unpin = execlists_context_unpin,
3525
3526         .enter = intel_context_enter_engine,
3527         .exit = intel_context_exit_engine,
3528
3529         .reset = execlists_context_reset,
3530         .destroy = execlists_context_destroy,
3531 };
3532
3533 static int gen8_emit_init_breadcrumb(struct i915_request *rq)
3534 {
3535         u32 *cs;
3536
3537         GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq));
3538         if (!i915_request_timeline(rq)->has_initial_breadcrumb)
3539                 return 0;
3540
3541         cs = intel_ring_begin(rq, 6);
3542         if (IS_ERR(cs))
3543                 return PTR_ERR(cs);
3544
3545         /*
3546          * Check if we have been preempted before we even get started.
3547          *
3548          * After this point i915_request_started() reports true, even if
3549          * we get preempted and so are no longer running.
3550          */
3551         *cs++ = MI_ARB_CHECK;
3552         *cs++ = MI_NOOP;
3553
3554         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3555         *cs++ = i915_request_timeline(rq)->hwsp_offset;
3556         *cs++ = 0;
3557         *cs++ = rq->fence.seqno - 1;
3558
3559         intel_ring_advance(rq, cs);
3560
3561         /* Record the updated position of the request's payload */
3562         rq->infix = intel_ring_offset(rq, cs);
3563
3564         __set_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags);
3565
3566         return 0;
3567 }
3568
3569 static int emit_pdps(struct i915_request *rq)
3570 {
3571         const struct intel_engine_cs * const engine = rq->engine;
3572         struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->context->vm);
3573         int err, i;
3574         u32 *cs;
3575
3576         GEM_BUG_ON(intel_vgpu_active(rq->engine->i915));
3577
3578         /*
3579          * Beware ye of the dragons, this sequence is magic!
3580          *
3581          * Small changes to this sequence can cause anything from
3582          * GPU hangs to forcewake errors and machine lockups!
3583          */
3584
3585         /* Flush any residual operations from the context load */
3586         err = engine->emit_flush(rq, EMIT_FLUSH);
3587         if (err)
3588                 return err;
3589
3590         /* Magic required to prevent forcewake errors! */
3591         err = engine->emit_flush(rq, EMIT_INVALIDATE);
3592         if (err)
3593                 return err;
3594
3595         cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
3596         if (IS_ERR(cs))
3597                 return PTR_ERR(cs);
3598
3599         /* Ensure the LRI have landed before we invalidate & continue */
3600         *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
3601         for (i = GEN8_3LVL_PDPES; i--; ) {
3602                 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
3603                 u32 base = engine->mmio_base;
3604
3605                 *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
3606                 *cs++ = upper_32_bits(pd_daddr);
3607                 *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
3608                 *cs++ = lower_32_bits(pd_daddr);
3609         }
3610         *cs++ = MI_NOOP;
3611
3612         intel_ring_advance(rq, cs);
3613
3614         return 0;
3615 }
3616
3617 static int execlists_request_alloc(struct i915_request *request)
3618 {
3619         int ret;
3620
3621         GEM_BUG_ON(!intel_context_is_pinned(request->context));
3622
3623         /*
3624          * Flush enough space to reduce the likelihood of waiting after
3625          * we start building the request - in which case we will just
3626          * have to repeat work.
3627          */
3628         request->reserved_space += EXECLISTS_REQUEST_SIZE;
3629
3630         /*
3631          * Note that after this point, we have committed to using
3632          * this request as it is being used to both track the
3633          * state of engine initialisation and liveness of the
3634          * golden renderstate above. Think twice before you try
3635          * to cancel/unwind this request now.
3636          */
3637
3638         if (!i915_vm_is_4lvl(request->context->vm)) {
3639                 ret = emit_pdps(request);
3640                 if (ret)
3641                         return ret;
3642         }
3643
3644         /* Unconditionally invalidate GPU caches and TLBs. */
3645         ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
3646         if (ret)
3647                 return ret;
3648
3649         request->reserved_space -= EXECLISTS_REQUEST_SIZE;
3650         return 0;
3651 }
3652
3653 /*
3654  * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
3655  * PIPE_CONTROL instruction. This is required for the flush to happen correctly
3656  * but there is a slight complication as this is applied in WA batch where the
3657  * values are only initialized once so we cannot take register value at the
3658  * beginning and reuse it further; hence we save its value to memory, upload a
3659  * constant value with bit21 set and then we restore it back with the saved value.
3660  * To simplify the WA, a constant value is formed by using the default value
3661  * of this register. This shouldn't be a problem because we are only modifying
3662  * it for a short period and this batch in non-premptible. We can ofcourse
3663  * use additional instructions that read the actual value of the register
3664  * at that time and set our bit of interest but it makes the WA complicated.
3665  *
3666  * This WA is also required for Gen9 so extracting as a function avoids
3667  * code duplication.
3668  */
3669 static u32 *
3670 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
3671 {
3672         /* NB no one else is allowed to scribble over scratch + 256! */
3673         *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
3674         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3675         *batch++ = intel_gt_scratch_offset(engine->gt,
3676                                            INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3677         *batch++ = 0;
3678
3679         *batch++ = MI_LOAD_REGISTER_IMM(1);
3680         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3681         *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
3682
3683         batch = gen8_emit_pipe_control(batch,
3684                                        PIPE_CONTROL_CS_STALL |
3685                                        PIPE_CONTROL_DC_FLUSH_ENABLE,
3686                                        0);
3687
3688         *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
3689         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3690         *batch++ = intel_gt_scratch_offset(engine->gt,
3691                                            INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3692         *batch++ = 0;
3693
3694         return batch;
3695 }
3696
3697 /*
3698  * Typically we only have one indirect_ctx and per_ctx batch buffer which are
3699  * initialized at the beginning and shared across all contexts but this field
3700  * helps us to have multiple batches at different offsets and select them based
3701  * on a criteria. At the moment this batch always start at the beginning of the page
3702  * and at this point we don't have multiple wa_ctx batch buffers.
3703  *
3704  * The number of WA applied are not known at the beginning; we use this field
3705  * to return the no of DWORDS written.
3706  *
3707  * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
3708  * so it adds NOOPs as padding to make it cacheline aligned.
3709  * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
3710  * makes a complete batch buffer.
3711  */
3712 static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3713 {
3714         /* WaDisableCtxRestoreArbitration:bdw,chv */
3715         *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3716
3717         /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
3718         if (IS_BROADWELL(engine->i915))
3719                 batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3720
3721         /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
3722         /* Actual scratch location is at 128 bytes offset */
3723         batch = gen8_emit_pipe_control(batch,
3724                                        PIPE_CONTROL_FLUSH_L3 |
3725                                        PIPE_CONTROL_STORE_DATA_INDEX |
3726                                        PIPE_CONTROL_CS_STALL |
3727                                        PIPE_CONTROL_QW_WRITE,
3728                                        LRC_PPHWSP_SCRATCH_ADDR);
3729
3730         *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3731
3732         /* Pad to end of cacheline */
3733         while ((unsigned long)batch % CACHELINE_BYTES)
3734                 *batch++ = MI_NOOP;
3735
3736         /*
3737          * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
3738          * execution depends on the length specified in terms of cache lines
3739          * in the register CTX_RCS_INDIRECT_CTX
3740          */
3741
3742         return batch;
3743 }
3744
3745 struct lri {
3746         i915_reg_t reg;
3747         u32 value;
3748 };
3749
3750 static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
3751 {
3752         GEM_BUG_ON(!count || count > 63);
3753
3754         *batch++ = MI_LOAD_REGISTER_IMM(count);
3755         do {
3756                 *batch++ = i915_mmio_reg_offset(lri->reg);
3757                 *batch++ = lri->value;
3758         } while (lri++, --count);
3759         *batch++ = MI_NOOP;
3760
3761         return batch;
3762 }
3763
3764 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3765 {
3766         static const struct lri lri[] = {
3767                 /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
3768                 {
3769                         COMMON_SLICE_CHICKEN2,
3770                         __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
3771                                        0),
3772                 },
3773
3774                 /* BSpec: 11391 */
3775                 {
3776                         FF_SLICE_CHICKEN,
3777                         __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
3778                                        FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
3779                 },
3780
3781                 /* BSpec: 11299 */
3782                 {
3783                         _3D_CHICKEN3,
3784                         __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
3785                                        _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
3786                 }
3787         };
3788
3789         *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3790
3791         /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
3792         batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3793
3794         /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
3795         batch = gen8_emit_pipe_control(batch,
3796                                        PIPE_CONTROL_FLUSH_L3 |
3797                                        PIPE_CONTROL_STORE_DATA_INDEX |
3798                                        PIPE_CONTROL_CS_STALL |
3799                                        PIPE_CONTROL_QW_WRITE,
3800                                        LRC_PPHWSP_SCRATCH_ADDR);
3801
3802         batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
3803
3804         /* WaMediaPoolStateCmdInWABB:bxt,glk */
3805         if (HAS_POOLED_EU(engine->i915)) {
3806                 /*
3807                  * EU pool configuration is setup along with golden context
3808                  * during context initialization. This value depends on
3809                  * device type (2x6 or 3x6) and needs to be updated based
3810                  * on which subslice is disabled especially for 2x6
3811                  * devices, however it is safe to load default
3812                  * configuration of 3x6 device instead of masking off
3813                  * corresponding bits because HW ignores bits of a disabled
3814                  * subslice and drops down to appropriate config. Please
3815                  * see render_state_setup() in i915_gem_render_state.c for
3816                  * possible configurations, to avoid duplication they are
3817                  * not shown here again.
3818                  */
3819                 *batch++ = GEN9_MEDIA_POOL_STATE;
3820                 *batch++ = GEN9_MEDIA_POOL_ENABLE;
3821                 *batch++ = 0x00777000;
3822                 *batch++ = 0;
3823                 *batch++ = 0;
3824                 *batch++ = 0;
3825         }
3826
3827         *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3828
3829         /* Pad to end of cacheline */
3830         while ((unsigned long)batch % CACHELINE_BYTES)
3831                 *batch++ = MI_NOOP;
3832
3833         return batch;
3834 }
3835
3836 static u32 *
3837 gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3838 {
3839         int i;
3840
3841         /*
3842          * WaPipeControlBefore3DStateSamplePattern: cnl
3843          *
3844          * Ensure the engine is idle prior to programming a
3845          * 3DSTATE_SAMPLE_PATTERN during a context restore.
3846          */
3847         batch = gen8_emit_pipe_control(batch,
3848                                        PIPE_CONTROL_CS_STALL,
3849                                        0);
3850         /*
3851          * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
3852          * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
3853          * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
3854          * confusing. Since gen8_emit_pipe_control() already advances the
3855          * batch by 6 dwords, we advance the other 10 here, completing a
3856          * cacheline. It's not clear if the workaround requires this padding
3857          * before other commands, or if it's just the regular padding we would
3858          * already have for the workaround bb, so leave it here for now.
3859          */
3860         for (i = 0; i < 10; i++)
3861                 *batch++ = MI_NOOP;
3862
3863         /* Pad to end of cacheline */
3864         while ((unsigned long)batch % CACHELINE_BYTES)
3865                 *batch++ = MI_NOOP;
3866
3867         return batch;
3868 }
3869
3870 #define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
3871
3872 static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
3873 {
3874         struct drm_i915_gem_object *obj;
3875         struct i915_vma *vma;
3876         int err;
3877
3878         obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE);
3879         if (IS_ERR(obj))
3880                 return PTR_ERR(obj);
3881
3882         vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
3883         if (IS_ERR(vma)) {
3884                 err = PTR_ERR(vma);
3885                 goto err;
3886         }
3887
3888         err = i915_ggtt_pin(vma, 0, PIN_HIGH);
3889         if (err)
3890                 goto err;
3891
3892         engine->wa_ctx.vma = vma;
3893         return 0;
3894
3895 err:
3896         i915_gem_object_put(obj);
3897         return err;
3898 }
3899
3900 static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
3901 {
3902         i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
3903 }
3904
3905 typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
3906
3907 static int intel_init_workaround_bb(struct intel_engine_cs *engine)
3908 {
3909         struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
3910         struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
3911                                             &wa_ctx->per_ctx };
3912         wa_bb_func_t wa_bb_fn[2];
3913         void *batch, *batch_ptr;
3914         unsigned int i;
3915         int ret;
3916
3917         if (engine->class != RENDER_CLASS)
3918                 return 0;
3919
3920         switch (INTEL_GEN(engine->i915)) {
3921         case 12:
3922         case 11:
3923                 return 0;
3924         case 10:
3925                 wa_bb_fn[0] = gen10_init_indirectctx_bb;
3926                 wa_bb_fn[1] = NULL;
3927                 break;
3928         case 9:
3929                 wa_bb_fn[0] = gen9_init_indirectctx_bb;
3930                 wa_bb_fn[1] = NULL;
3931                 break;
3932         case 8:
3933                 wa_bb_fn[0] = gen8_init_indirectctx_bb;
3934                 wa_bb_fn[1] = NULL;
3935                 break;
3936         default:
3937                 MISSING_CASE(INTEL_GEN(engine->i915));
3938                 return 0;
3939         }
3940
3941         ret = lrc_setup_wa_ctx(engine);
3942         if (ret) {
3943                 drm_dbg(&engine->i915->drm,
3944                         "Failed to setup context WA page: %d\n", ret);
3945                 return ret;
3946         }
3947
3948         batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB);
3949
3950         /*
3951          * Emit the two workaround batch buffers, recording the offset from the
3952          * start of the workaround batch buffer object for each and their
3953          * respective sizes.
3954          */
3955         batch_ptr = batch;
3956         for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
3957                 wa_bb[i]->offset = batch_ptr - batch;
3958                 if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
3959                                                   CACHELINE_BYTES))) {
3960                         ret = -EINVAL;
3961                         break;
3962                 }
3963                 if (wa_bb_fn[i])
3964                         batch_ptr = wa_bb_fn[i](engine, batch_ptr);
3965                 wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
3966         }
3967         GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE);
3968
3969         __i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch);
3970         __i915_gem_object_release_map(wa_ctx->vma->obj);
3971         if (ret)
3972                 lrc_destroy_wa_ctx(engine);
3973
3974         return ret;
3975 }
3976
3977 static void reset_csb_pointers(struct intel_engine_cs *engine)
3978 {
3979         struct intel_engine_execlists * const execlists = &engine->execlists;
3980         const unsigned int reset_value = execlists->csb_size - 1;
3981
3982         ring_set_paused(engine, 0);
3983
3984         /*
3985          * Sometimes Icelake forgets to reset its pointers on a GPU reset.
3986          * Bludgeon them with a mmio update to be sure.
3987          */
3988         ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
3989                      0xffff << 16 | reset_value << 8 | reset_value);
3990         ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
3991
3992         /*
3993          * After a reset, the HW starts writing into CSB entry [0]. We
3994          * therefore have to set our HEAD pointer back one entry so that
3995          * the *first* entry we check is entry 0. To complicate this further,
3996          * as we don't wait for the first interrupt after reset, we have to
3997          * fake the HW write to point back to the last entry so that our
3998          * inline comparison of our cached head position against the last HW
3999          * write works even before the first interrupt.
4000          */
4001         execlists->csb_head = reset_value;
4002         WRITE_ONCE(*execlists->csb_write, reset_value);
4003         wmb(); /* Make sure this is visible to HW (paranoia?) */
4004
4005         invalidate_csb_entries(&execlists->csb_status[0],
4006                                &execlists->csb_status[reset_value]);
4007
4008         /* Once more for luck and our trusty paranoia */
4009         ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
4010                      0xffff << 16 | reset_value << 8 | reset_value);
4011         ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
4012
4013         GEM_BUG_ON(READ_ONCE(*execlists->csb_write) != reset_value);
4014 }
4015
4016 static void execlists_sanitize(struct intel_engine_cs *engine)
4017 {
4018         /*
4019          * Poison residual state on resume, in case the suspend didn't!
4020          *
4021          * We have to assume that across suspend/resume (or other loss
4022          * of control) that the contents of our pinned buffers has been
4023          * lost, replaced by garbage. Since this doesn't always happen,
4024          * let's poison such state so that we more quickly spot when
4025          * we falsely assume it has been preserved.
4026          */
4027         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
4028                 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
4029
4030         reset_csb_pointers(engine);
4031
4032         /*
4033          * The kernel_context HWSP is stored in the status_page. As above,
4034          * that may be lost on resume/initialisation, and so we need to
4035          * reset the value in the HWSP.
4036          */
4037         intel_timeline_reset_seqno(engine->kernel_context->timeline);
4038
4039         /* And scrub the dirty cachelines for the HWSP */
4040         clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
4041 }
4042
4043 static void enable_error_interrupt(struct intel_engine_cs *engine)
4044 {
4045         u32 status;
4046
4047         engine->execlists.error_interrupt = 0;
4048         ENGINE_WRITE(engine, RING_EMR, ~0u);
4049         ENGINE_WRITE(engine, RING_EIR, ~0u); /* clear all existing errors */
4050
4051         status = ENGINE_READ(engine, RING_ESR);
4052         if (unlikely(status)) {
4053                 drm_err(&engine->i915->drm,
4054                         "engine '%s' resumed still in error: %08x\n",
4055                         engine->name, status);
4056                 __intel_gt_reset(engine->gt, engine->mask);
4057         }
4058
4059         /*
4060          * On current gen8+, we have 2 signals to play with
4061          *
4062          * - I915_ERROR_INSTUCTION (bit 0)
4063          *
4064          *    Generate an error if the command parser encounters an invalid
4065          *    instruction
4066          *
4067          *    This is a fatal error.
4068          *
4069          * - CP_PRIV (bit 2)
4070          *
4071          *    Generate an error on privilege violation (where the CP replaces
4072          *    the instruction with a no-op). This also fires for writes into
4073          *    read-only scratch pages.
4074          *
4075          *    This is a non-fatal error, parsing continues.
4076          *
4077          * * there are a few others defined for odd HW that we do not use
4078          *
4079          * Since CP_PRIV fires for cases where we have chosen to ignore the
4080          * error (as the HW is validating and suppressing the mistakes), we
4081          * only unmask the instruction error bit.
4082          */
4083         ENGINE_WRITE(engine, RING_EMR, ~I915_ERROR_INSTRUCTION);
4084 }
4085
4086 static void enable_execlists(struct intel_engine_cs *engine)
4087 {
4088         u32 mode;
4089
4090         assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
4091
4092         intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
4093
4094         if (INTEL_GEN(engine->i915) >= 11)
4095                 mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE);
4096         else
4097                 mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE);
4098         ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode);
4099
4100         ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
4101
4102         ENGINE_WRITE_FW(engine,
4103                         RING_HWS_PGA,
4104                         i915_ggtt_offset(engine->status_page.vma));
4105         ENGINE_POSTING_READ(engine, RING_HWS_PGA);
4106
4107         enable_error_interrupt(engine);
4108
4109         engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0);
4110 }
4111
4112 static bool unexpected_starting_state(struct intel_engine_cs *engine)
4113 {
4114         bool unexpected = false;
4115
4116         if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) {
4117                 drm_dbg(&engine->i915->drm,
4118                         "STOP_RING still set in RING_MI_MODE\n");
4119                 unexpected = true;
4120         }
4121
4122         return unexpected;
4123 }
4124
4125 static int execlists_resume(struct intel_engine_cs *engine)
4126 {
4127         intel_mocs_init_engine(engine);
4128
4129         intel_engine_reset_breadcrumbs(engine);
4130
4131         if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) {
4132                 struct drm_printer p = drm_debug_printer(__func__);
4133
4134                 intel_engine_dump(engine, &p, NULL);
4135         }
4136
4137         enable_execlists(engine);
4138
4139         return 0;
4140 }
4141
4142 static void execlists_reset_prepare(struct intel_engine_cs *engine)
4143 {
4144         struct intel_engine_execlists * const execlists = &engine->execlists;
4145         unsigned long flags;
4146
4147         ENGINE_TRACE(engine, "depth<-%d\n",
4148                      atomic_read(&execlists->tasklet.count));
4149
4150         /*
4151          * Prevent request submission to the hardware until we have
4152          * completed the reset in i915_gem_reset_finish(). If a request
4153          * is completed by one engine, it may then queue a request
4154          * to a second via its execlists->tasklet *just* as we are
4155          * calling engine->resume() and also writing the ELSP.
4156          * Turning off the execlists->tasklet until the reset is over
4157          * prevents the race.
4158          */
4159         __tasklet_disable_sync_once(&execlists->tasklet);
4160         GEM_BUG_ON(!reset_in_progress(execlists));
4161
4162         /* And flush any current direct submission. */
4163         spin_lock_irqsave(&engine->active.lock, flags);
4164         spin_unlock_irqrestore(&engine->active.lock, flags);
4165
4166         /*
4167          * We stop engines, otherwise we might get failed reset and a
4168          * dead gpu (on elk). Also as modern gpu as kbl can suffer
4169          * from system hang if batchbuffer is progressing when
4170          * the reset is issued, regardless of READY_TO_RESET ack.
4171          * Thus assume it is best to stop engines on all gens
4172          * where we have a gpu reset.
4173          *
4174          * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
4175          *
4176          * FIXME: Wa for more modern gens needs to be validated
4177          */
4178         ring_set_paused(engine, 1);
4179         intel_engine_stop_cs(engine);
4180
4181         engine->execlists.reset_ccid = active_ccid(engine);
4182 }
4183
4184 static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
4185 {
4186         int x;
4187
4188         x = lrc_ring_mi_mode(engine);
4189         if (x != -1) {
4190                 regs[x + 1] &= ~STOP_RING;
4191                 regs[x + 1] |= STOP_RING << 16;
4192         }
4193 }
4194
4195 static void __execlists_reset_reg_state(const struct intel_context *ce,
4196                                         const struct intel_engine_cs *engine)
4197 {
4198         u32 *regs = ce->lrc_reg_state;
4199
4200         __reset_stop_ring(regs, engine);
4201 }
4202
4203 static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
4204 {
4205         struct intel_engine_execlists * const execlists = &engine->execlists;
4206         struct intel_context *ce;
4207         struct i915_request *rq;
4208         u32 head;
4209
4210         mb(); /* paranoia: read the CSB pointers from after the reset */
4211         clflush(execlists->csb_write);
4212         mb();
4213
4214         process_csb(engine); /* drain preemption events */
4215
4216         /* Following the reset, we need to reload the CSB read/write pointers */
4217         reset_csb_pointers(engine);
4218
4219         /*
4220          * Save the currently executing context, even if we completed
4221          * its request, it was still running at the time of the
4222          * reset and will have been clobbered.
4223          */
4224         rq = active_context(engine, engine->execlists.reset_ccid);
4225         if (!rq)
4226                 goto unwind;
4227
4228         ce = rq->context;
4229         GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
4230
4231         if (i915_request_completed(rq)) {
4232                 /* Idle context; tidy up the ring so we can restart afresh */
4233                 head = intel_ring_wrap(ce->ring, rq->tail);
4234                 goto out_replay;
4235         }
4236
4237         /* We still have requests in-flight; the engine should be active */
4238         GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
4239
4240         /* Context has requests still in-flight; it should not be idle! */
4241         GEM_BUG_ON(i915_active_is_idle(&ce->active));
4242
4243         rq = active_request(ce->timeline, rq);
4244         head = intel_ring_wrap(ce->ring, rq->head);
4245         GEM_BUG_ON(head == ce->ring->tail);
4246
4247         /*
4248          * If this request hasn't started yet, e.g. it is waiting on a
4249          * semaphore, we need to avoid skipping the request or else we
4250          * break the signaling chain. However, if the context is corrupt
4251          * the request will not restart and we will be stuck with a wedged
4252          * device. It is quite often the case that if we issue a reset
4253          * while the GPU is loading the context image, that the context
4254          * image becomes corrupt.
4255          *
4256          * Otherwise, if we have not started yet, the request should replay
4257          * perfectly and we do not need to flag the result as being erroneous.
4258          */
4259         if (!i915_request_started(rq))
4260                 goto out_replay;
4261
4262         /*
4263          * If the request was innocent, we leave the request in the ELSP
4264          * and will try to replay it on restarting. The context image may
4265          * have been corrupted by the reset, in which case we may have
4266          * to service a new GPU hang, but more likely we can continue on
4267          * without impact.
4268          *
4269          * If the request was guilty, we presume the context is corrupt
4270          * and have to at least restore the RING register in the context
4271          * image back to the expected values to skip over the guilty request.
4272          */
4273         __i915_request_reset(rq, stalled);
4274
4275         /*
4276          * We want a simple context + ring to execute the breadcrumb update.
4277          * We cannot rely on the context being intact across the GPU hang,
4278          * so clear it and rebuild just what we need for the breadcrumb.
4279          * All pending requests for this context will be zapped, and any
4280          * future request will be after userspace has had the opportunity
4281          * to recreate its own state.
4282          */
4283 out_replay:
4284         ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
4285                      head, ce->ring->tail);
4286         __execlists_reset_reg_state(ce, engine);
4287         __execlists_update_reg_state(ce, engine, head);
4288         ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
4289
4290 unwind:
4291         /* Push back any incomplete requests for replay after the reset. */
4292         cancel_port_requests(execlists);
4293         __unwind_incomplete_requests(engine);
4294 }
4295
4296 static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
4297 {
4298         unsigned long flags;
4299
4300         ENGINE_TRACE(engine, "\n");
4301
4302         spin_lock_irqsave(&engine->active.lock, flags);
4303
4304         __execlists_reset(engine, stalled);
4305
4306         spin_unlock_irqrestore(&engine->active.lock, flags);
4307 }
4308
4309 static void nop_submission_tasklet(unsigned long data)
4310 {
4311         struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
4312
4313         /* The driver is wedged; don't process any more events. */
4314         WRITE_ONCE(engine->execlists.queue_priority_hint, INT_MIN);
4315 }
4316
4317 static void execlists_reset_cancel(struct intel_engine_cs *engine)
4318 {
4319         struct intel_engine_execlists * const execlists = &engine->execlists;
4320         struct i915_request *rq, *rn;
4321         struct rb_node *rb;
4322         unsigned long flags;
4323
4324         ENGINE_TRACE(engine, "\n");
4325
4326         /*
4327          * Before we call engine->cancel_requests(), we should have exclusive
4328          * access to the submission state. This is arranged for us by the
4329          * caller disabling the interrupt generation, the tasklet and other
4330          * threads that may then access the same state, giving us a free hand
4331          * to reset state. However, we still need to let lockdep be aware that
4332          * we know this state may be accessed in hardirq context, so we
4333          * disable the irq around this manipulation and we want to keep
4334          * the spinlock focused on its duties and not accidentally conflate
4335          * coverage to the submission's irq state. (Similarly, although we
4336          * shouldn't need to disable irq around the manipulation of the
4337          * submission's irq state, we also wish to remind ourselves that
4338          * it is irq state.)
4339          */
4340         spin_lock_irqsave(&engine->active.lock, flags);
4341
4342         __execlists_reset(engine, true);
4343
4344         /* Mark all executing requests as skipped. */
4345         list_for_each_entry(rq, &engine->active.requests, sched.link)
4346                 mark_eio(rq);
4347
4348         /* Flush the queued requests to the timeline list (for retiring). */
4349         while ((rb = rb_first_cached(&execlists->queue))) {
4350                 struct i915_priolist *p = to_priolist(rb);
4351                 int i;
4352
4353                 priolist_for_each_request_consume(rq, rn, p, i) {
4354                         mark_eio(rq);
4355                         __i915_request_submit(rq);
4356                 }
4357
4358                 rb_erase_cached(&p->node, &execlists->queue);
4359                 i915_priolist_free(p);
4360         }
4361
4362         /* On-hold requests will be flushed to timeline upon their release */
4363         list_for_each_entry(rq, &engine->active.hold, sched.link)
4364                 mark_eio(rq);
4365
4366         /* Cancel all attached virtual engines */
4367         while ((rb = rb_first_cached(&execlists->virtual))) {
4368                 struct virtual_engine *ve =
4369                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
4370
4371                 rb_erase_cached(rb, &execlists->virtual);
4372                 RB_CLEAR_NODE(rb);
4373
4374                 spin_lock(&ve->base.active.lock);
4375                 rq = fetch_and_zero(&ve->request);
4376                 if (rq) {
4377                         mark_eio(rq);
4378
4379                         rq->engine = engine;
4380                         __i915_request_submit(rq);
4381                         i915_request_put(rq);
4382
4383                         ve->base.execlists.queue_priority_hint = INT_MIN;
4384                 }
4385                 spin_unlock(&ve->base.active.lock);
4386         }
4387
4388         /* Remaining _unready_ requests will be nop'ed when submitted */
4389
4390         execlists->queue_priority_hint = INT_MIN;
4391         execlists->queue = RB_ROOT_CACHED;
4392
4393         GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
4394         execlists->tasklet.func = nop_submission_tasklet;
4395
4396         spin_unlock_irqrestore(&engine->active.lock, flags);
4397 }
4398
4399 static void execlists_reset_finish(struct intel_engine_cs *engine)
4400 {
4401         struct intel_engine_execlists * const execlists = &engine->execlists;
4402
4403         /*
4404          * After a GPU reset, we may have requests to replay. Do so now while
4405          * we still have the forcewake to be sure that the GPU is not allowed
4406          * to sleep before we restart and reload a context.
4407          */
4408         GEM_BUG_ON(!reset_in_progress(execlists));
4409         if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
4410                 execlists->tasklet.func(execlists->tasklet.data);
4411
4412         if (__tasklet_enable(&execlists->tasklet))
4413                 /* And kick in case we missed a new request submission. */
4414                 tasklet_hi_schedule(&execlists->tasklet);
4415         ENGINE_TRACE(engine, "depth->%d\n",
4416                      atomic_read(&execlists->tasklet.count));
4417 }
4418
4419 static int gen8_emit_bb_start_noarb(struct i915_request *rq,
4420                                     u64 offset, u32 len,
4421                                     const unsigned int flags)
4422 {
4423         u32 *cs;
4424
4425         cs = intel_ring_begin(rq, 4);
4426         if (IS_ERR(cs))
4427                 return PTR_ERR(cs);
4428
4429         /*
4430          * WaDisableCtxRestoreArbitration:bdw,chv
4431          *
4432          * We don't need to perform MI_ARB_ENABLE as often as we do (in
4433          * particular all the gen that do not need the w/a at all!), if we
4434          * took care to make sure that on every switch into this context
4435          * (both ordinary and for preemption) that arbitrartion was enabled
4436          * we would be fine.  However, for gen8 there is another w/a that
4437          * requires us to not preempt inside GPGPU execution, so we keep
4438          * arbitration disabled for gen8 batches. Arbitration will be
4439          * re-enabled before we close the request
4440          * (engine->emit_fini_breadcrumb).
4441          */
4442         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
4443
4444         /* FIXME(BDW+): Address space and security selectors. */
4445         *cs++ = MI_BATCH_BUFFER_START_GEN8 |
4446                 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
4447         *cs++ = lower_32_bits(offset);
4448         *cs++ = upper_32_bits(offset);
4449
4450         intel_ring_advance(rq, cs);
4451
4452         return 0;
4453 }
4454
4455 static int gen8_emit_bb_start(struct i915_request *rq,
4456                               u64 offset, u32 len,
4457                               const unsigned int flags)
4458 {
4459         u32 *cs;
4460
4461         cs = intel_ring_begin(rq, 6);
4462         if (IS_ERR(cs))
4463                 return PTR_ERR(cs);
4464
4465         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4466
4467         *cs++ = MI_BATCH_BUFFER_START_GEN8 |
4468                 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
4469         *cs++ = lower_32_bits(offset);
4470         *cs++ = upper_32_bits(offset);
4471
4472         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
4473         *cs++ = MI_NOOP;
4474
4475         intel_ring_advance(rq, cs);
4476
4477         return 0;
4478 }
4479
4480 static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine)
4481 {
4482         ENGINE_WRITE(engine, RING_IMR,
4483                      ~(engine->irq_enable_mask | engine->irq_keep_mask));
4484         ENGINE_POSTING_READ(engine, RING_IMR);
4485 }
4486
4487 static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
4488 {
4489         ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
4490 }
4491
4492 static int gen8_emit_flush(struct i915_request *request, u32 mode)
4493 {
4494         u32 cmd, *cs;
4495
4496         cs = intel_ring_begin(request, 4);
4497         if (IS_ERR(cs))
4498                 return PTR_ERR(cs);
4499
4500         cmd = MI_FLUSH_DW + 1;
4501
4502         /* We always require a command barrier so that subsequent
4503          * commands, such as breadcrumb interrupts, are strictly ordered
4504          * wrt the contents of the write cache being flushed to memory
4505          * (and thus being coherent from the CPU).
4506          */
4507         cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
4508
4509         if (mode & EMIT_INVALIDATE) {
4510                 cmd |= MI_INVALIDATE_TLB;
4511                 if (request->engine->class == VIDEO_DECODE_CLASS)
4512                         cmd |= MI_INVALIDATE_BSD;
4513         }
4514
4515         *cs++ = cmd;
4516         *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
4517         *cs++ = 0; /* upper addr */
4518         *cs++ = 0; /* value */
4519         intel_ring_advance(request, cs);
4520
4521         return 0;
4522 }
4523
4524 static int gen8_emit_flush_render(struct i915_request *request,
4525                                   u32 mode)
4526 {
4527         bool vf_flush_wa = false, dc_flush_wa = false;
4528         u32 *cs, flags = 0;
4529         int len;
4530
4531         flags |= PIPE_CONTROL_CS_STALL;
4532
4533         if (mode & EMIT_FLUSH) {
4534                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
4535                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4536                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
4537                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
4538         }
4539
4540         if (mode & EMIT_INVALIDATE) {
4541                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
4542                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4543                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4544                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4545                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4546                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4547                 flags |= PIPE_CONTROL_QW_WRITE;
4548                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4549
4550                 /*
4551                  * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
4552                  * pipe control.
4553                  */
4554                 if (IS_GEN(request->engine->i915, 9))
4555                         vf_flush_wa = true;
4556
4557                 /* WaForGAMHang:kbl */
4558                 if (IS_KBL_REVID(request->engine->i915, 0, KBL_REVID_B0))
4559                         dc_flush_wa = true;
4560         }
4561
4562         len = 6;
4563
4564         if (vf_flush_wa)
4565                 len += 6;
4566
4567         if (dc_flush_wa)
4568                 len += 12;
4569
4570         cs = intel_ring_begin(request, len);
4571         if (IS_ERR(cs))
4572                 return PTR_ERR(cs);
4573
4574         if (vf_flush_wa)
4575                 cs = gen8_emit_pipe_control(cs, 0, 0);
4576
4577         if (dc_flush_wa)
4578                 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
4579                                             0);
4580
4581         cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4582
4583         if (dc_flush_wa)
4584                 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
4585
4586         intel_ring_advance(request, cs);
4587
4588         return 0;
4589 }
4590
4591 static int gen11_emit_flush_render(struct i915_request *request,
4592                                    u32 mode)
4593 {
4594         if (mode & EMIT_FLUSH) {
4595                 u32 *cs;
4596                 u32 flags = 0;
4597
4598                 flags |= PIPE_CONTROL_CS_STALL;
4599
4600                 flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
4601                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
4602                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4603                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
4604                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
4605                 flags |= PIPE_CONTROL_QW_WRITE;
4606                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4607
4608                 cs = intel_ring_begin(request, 6);
4609                 if (IS_ERR(cs))
4610                         return PTR_ERR(cs);
4611
4612                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4613                 intel_ring_advance(request, cs);
4614         }
4615
4616         if (mode & EMIT_INVALIDATE) {
4617                 u32 *cs;
4618                 u32 flags = 0;
4619
4620                 flags |= PIPE_CONTROL_CS_STALL;
4621
4622                 flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
4623                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
4624                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4625                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4626                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4627                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4628                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4629                 flags |= PIPE_CONTROL_QW_WRITE;
4630                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4631
4632                 cs = intel_ring_begin(request, 6);
4633                 if (IS_ERR(cs))
4634                         return PTR_ERR(cs);
4635
4636                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4637                 intel_ring_advance(request, cs);
4638         }
4639
4640         return 0;
4641 }
4642
4643 static u32 preparser_disable(bool state)
4644 {
4645         return MI_ARB_CHECK | 1 << 8 | state;
4646 }
4647
4648 static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine)
4649 {
4650         static const i915_reg_t vd[] = {
4651                 GEN12_VD0_AUX_NV,
4652                 GEN12_VD1_AUX_NV,
4653                 GEN12_VD2_AUX_NV,
4654                 GEN12_VD3_AUX_NV,
4655         };
4656
4657         static const i915_reg_t ve[] = {
4658                 GEN12_VE0_AUX_NV,
4659                 GEN12_VE1_AUX_NV,
4660         };
4661
4662         if (engine->class == VIDEO_DECODE_CLASS)
4663                 return vd[engine->instance];
4664
4665         if (engine->class == VIDEO_ENHANCEMENT_CLASS)
4666                 return ve[engine->instance];
4667
4668         GEM_BUG_ON("unknown aux_inv_reg\n");
4669
4670         return INVALID_MMIO_REG;
4671 }
4672
4673 static u32 *
4674 gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs)
4675 {
4676         *cs++ = MI_LOAD_REGISTER_IMM(1);
4677         *cs++ = i915_mmio_reg_offset(inv_reg);
4678         *cs++ = AUX_INV;
4679         *cs++ = MI_NOOP;
4680
4681         return cs;
4682 }
4683
4684 static int gen12_emit_flush_render(struct i915_request *request,
4685                                    u32 mode)
4686 {
4687         if (mode & EMIT_FLUSH) {
4688                 u32 flags = 0;
4689                 u32 *cs;
4690
4691                 flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
4692                 flags |= PIPE_CONTROL_FLUSH_L3;
4693                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
4694                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4695                 /* Wa_1409600907:tgl */
4696                 flags |= PIPE_CONTROL_DEPTH_STALL;
4697                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
4698                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
4699
4700                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4701                 flags |= PIPE_CONTROL_QW_WRITE;
4702
4703                 flags |= PIPE_CONTROL_CS_STALL;
4704
4705                 cs = intel_ring_begin(request, 6);
4706                 if (IS_ERR(cs))
4707                         return PTR_ERR(cs);
4708
4709                 cs = gen12_emit_pipe_control(cs,
4710                                              PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
4711                                              flags, LRC_PPHWSP_SCRATCH_ADDR);
4712                 intel_ring_advance(request, cs);
4713         }
4714
4715         if (mode & EMIT_INVALIDATE) {
4716                 u32 flags = 0;
4717                 u32 *cs;
4718
4719                 flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
4720                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
4721                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4722                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4723                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4724                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4725                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4726
4727                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4728                 flags |= PIPE_CONTROL_QW_WRITE;
4729
4730                 flags |= PIPE_CONTROL_CS_STALL;
4731
4732                 cs = intel_ring_begin(request, 8 + 4);
4733                 if (IS_ERR(cs))
4734                         return PTR_ERR(cs);
4735
4736                 /*
4737                  * Prevent the pre-parser from skipping past the TLB
4738                  * invalidate and loading a stale page for the batch
4739                  * buffer / request payload.
4740                  */
4741                 *cs++ = preparser_disable(true);
4742
4743                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4744
4745                 /* hsdes: 1809175790 */
4746                 cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV, cs);
4747
4748                 *cs++ = preparser_disable(false);
4749                 intel_ring_advance(request, cs);
4750         }
4751
4752         return 0;
4753 }
4754
4755 static int gen12_emit_flush(struct i915_request *request, u32 mode)
4756 {
4757         intel_engine_mask_t aux_inv = 0;
4758         u32 cmd, *cs;
4759
4760         if (mode & EMIT_INVALIDATE)
4761                 aux_inv = request->engine->mask & ~BIT(BCS0);
4762
4763         cs = intel_ring_begin(request,
4764                               4 + (aux_inv ? 2 * hweight8(aux_inv) + 2 : 0));
4765         if (IS_ERR(cs))
4766                 return PTR_ERR(cs);
4767
4768         cmd = MI_FLUSH_DW + 1;
4769
4770         /* We always require a command barrier so that subsequent
4771          * commands, such as breadcrumb interrupts, are strictly ordered
4772          * wrt the contents of the write cache being flushed to memory
4773          * (and thus being coherent from the CPU).
4774          */
4775         cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
4776
4777         if (mode & EMIT_INVALIDATE) {
4778                 cmd |= MI_INVALIDATE_TLB;
4779                 if (request->engine->class == VIDEO_DECODE_CLASS)
4780                         cmd |= MI_INVALIDATE_BSD;
4781         }
4782
4783         *cs++ = cmd;
4784         *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
4785         *cs++ = 0; /* upper addr */
4786         *cs++ = 0; /* value */
4787
4788         if (aux_inv) { /* hsdes: 1809175790 */
4789                 struct intel_engine_cs *engine;
4790                 unsigned int tmp;
4791
4792                 *cs++ = MI_LOAD_REGISTER_IMM(hweight8(aux_inv));
4793                 for_each_engine_masked(engine, request->engine->gt,
4794                                        aux_inv, tmp) {
4795                         *cs++ = i915_mmio_reg_offset(aux_inv_reg(engine));
4796                         *cs++ = AUX_INV;
4797                 }
4798                 *cs++ = MI_NOOP;
4799         }
4800         intel_ring_advance(request, cs);
4801
4802         return 0;
4803 }
4804
4805 static void assert_request_valid(struct i915_request *rq)
4806 {
4807         struct intel_ring *ring __maybe_unused = rq->ring;
4808
4809         /* Can we unwind this request without appearing to go forwards? */
4810         GEM_BUG_ON(intel_ring_direction(ring, rq->wa_tail, rq->head) <= 0);
4811 }
4812
4813 /*
4814  * Reserve space for 2 NOOPs at the end of each request to be
4815  * used as a workaround for not being allowed to do lite
4816  * restore with HEAD==TAIL (WaIdleLiteRestore).
4817  */
4818 static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
4819 {
4820         /* Ensure there's always at least one preemption point per-request. */
4821         *cs++ = MI_ARB_CHECK;
4822         *cs++ = MI_NOOP;
4823         request->wa_tail = intel_ring_offset(request, cs);
4824
4825         /* Check that entire request is less than half the ring */
4826         assert_request_valid(request);
4827
4828         return cs;
4829 }
4830
4831 static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
4832 {
4833         *cs++ = MI_SEMAPHORE_WAIT |
4834                 MI_SEMAPHORE_GLOBAL_GTT |
4835                 MI_SEMAPHORE_POLL |
4836                 MI_SEMAPHORE_SAD_EQ_SDD;
4837         *cs++ = 0;
4838         *cs++ = intel_hws_preempt_address(request->engine);
4839         *cs++ = 0;
4840
4841         return cs;
4842 }
4843
4844 static __always_inline u32*
4845 gen8_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
4846 {
4847         *cs++ = MI_USER_INTERRUPT;
4848
4849         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4850         if (intel_engine_has_semaphores(request->engine))
4851                 cs = emit_preempt_busywait(request, cs);
4852
4853         request->tail = intel_ring_offset(request, cs);
4854         assert_ring_tail_valid(request->ring, request->tail);
4855
4856         return gen8_emit_wa_tail(request, cs);
4857 }
4858
4859 static u32 *emit_xcs_breadcrumb(struct i915_request *request, u32 *cs)
4860 {
4861         u32 addr = i915_request_active_timeline(request)->hwsp_offset;
4862
4863         return gen8_emit_ggtt_write(cs, request->fence.seqno, addr, 0);
4864 }
4865
4866 static u32 *gen8_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
4867 {
4868         return gen8_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
4869 }
4870
4871 static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4872 {
4873         cs = gen8_emit_pipe_control(cs,
4874                                     PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4875                                     PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4876                                     PIPE_CONTROL_DC_FLUSH_ENABLE,
4877                                     0);
4878
4879         /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
4880         cs = gen8_emit_ggtt_write_rcs(cs,
4881                                       request->fence.seqno,
4882                                       i915_request_active_timeline(request)->hwsp_offset,
4883                                       PIPE_CONTROL_FLUSH_ENABLE |
4884                                       PIPE_CONTROL_CS_STALL);
4885
4886         return gen8_emit_fini_breadcrumb_tail(request, cs);
4887 }
4888
4889 static u32 *
4890 gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4891 {
4892         cs = gen8_emit_ggtt_write_rcs(cs,
4893                                       request->fence.seqno,
4894                                       i915_request_active_timeline(request)->hwsp_offset,
4895                                       PIPE_CONTROL_CS_STALL |
4896                                       PIPE_CONTROL_TILE_CACHE_FLUSH |
4897                                       PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4898                                       PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4899                                       PIPE_CONTROL_DC_FLUSH_ENABLE |
4900                                       PIPE_CONTROL_FLUSH_ENABLE);
4901
4902         return gen8_emit_fini_breadcrumb_tail(request, cs);
4903 }
4904
4905 /*
4906  * Note that the CS instruction pre-parser will not stall on the breadcrumb
4907  * flush and will continue pre-fetching the instructions after it before the
4908  * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
4909  * BB_START/END instructions, so, even though we might pre-fetch the pre-amble
4910  * of the next request before the memory has been flushed, we're guaranteed that
4911  * we won't access the batch itself too early.
4912  * However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
4913  * so, if the current request is modifying an instruction in the next request on
4914  * the same intel_context, we might pre-fetch and then execute the pre-update
4915  * instruction. To avoid this, the users of self-modifying code should either
4916  * disable the parser around the code emitting the memory writes, via a new flag
4917  * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
4918  * the in-kernel use-cases we've opted to use a separate context, see
4919  * reloc_gpu() as an example.
4920  * All the above applies only to the instructions themselves. Non-inline data
4921  * used by the instructions is not pre-fetched.
4922  */
4923
4924 static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
4925 {
4926         *cs++ = MI_SEMAPHORE_WAIT_TOKEN |
4927                 MI_SEMAPHORE_GLOBAL_GTT |
4928                 MI_SEMAPHORE_POLL |
4929                 MI_SEMAPHORE_SAD_EQ_SDD;
4930         *cs++ = 0;
4931         *cs++ = intel_hws_preempt_address(request->engine);
4932         *cs++ = 0;
4933         *cs++ = 0;
4934         *cs++ = MI_NOOP;
4935
4936         return cs;
4937 }
4938
4939 static __always_inline u32*
4940 gen12_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
4941 {
4942         *cs++ = MI_USER_INTERRUPT;
4943
4944         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4945         if (intel_engine_has_semaphores(request->engine))
4946                 cs = gen12_emit_preempt_busywait(request, cs);
4947
4948         request->tail = intel_ring_offset(request, cs);
4949         assert_ring_tail_valid(request->ring, request->tail);
4950
4951         return gen8_emit_wa_tail(request, cs);
4952 }
4953
4954 static u32 *gen12_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
4955 {
4956         return gen12_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
4957 }
4958
4959 static u32 *
4960 gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4961 {
4962         cs = gen12_emit_ggtt_write_rcs(cs,
4963                                        request->fence.seqno,
4964                                        i915_request_active_timeline(request)->hwsp_offset,
4965                                        PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
4966                                        PIPE_CONTROL_CS_STALL |
4967                                        PIPE_CONTROL_TILE_CACHE_FLUSH |
4968                                        PIPE_CONTROL_FLUSH_L3 |
4969                                        PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4970                                        PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4971                                        /* Wa_1409600907:tgl */
4972                                        PIPE_CONTROL_DEPTH_STALL |
4973                                        PIPE_CONTROL_DC_FLUSH_ENABLE |
4974                                        PIPE_CONTROL_FLUSH_ENABLE);
4975
4976         return gen12_emit_fini_breadcrumb_tail(request, cs);
4977 }
4978
4979 static void execlists_park(struct intel_engine_cs *engine)
4980 {
4981         cancel_timer(&engine->execlists.timer);
4982         cancel_timer(&engine->execlists.preempt);
4983 }
4984
4985 void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
4986 {
4987         engine->submit_request = execlists_submit_request;
4988         engine->schedule = i915_schedule;
4989         engine->execlists.tasklet.func = execlists_submission_tasklet;
4990
4991         engine->reset.prepare = execlists_reset_prepare;
4992         engine->reset.rewind = execlists_reset_rewind;
4993         engine->reset.cancel = execlists_reset_cancel;
4994         engine->reset.finish = execlists_reset_finish;
4995
4996         engine->park = execlists_park;
4997         engine->unpark = NULL;
4998
4999         engine->flags |= I915_ENGINE_SUPPORTS_STATS;
5000         if (!intel_vgpu_active(engine->i915)) {
5001                 engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
5002                 if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) {
5003                         engine->flags |= I915_ENGINE_HAS_PREEMPTION;
5004                         if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
5005                                 engine->flags |= I915_ENGINE_HAS_TIMESLICES;
5006                 }
5007         }
5008
5009         if (INTEL_GEN(engine->i915) >= 12)
5010                 engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
5011
5012         if (intel_engine_has_preemption(engine))
5013                 engine->emit_bb_start = gen8_emit_bb_start;
5014         else
5015                 engine->emit_bb_start = gen8_emit_bb_start_noarb;
5016 }
5017
5018 static void execlists_shutdown(struct intel_engine_cs *engine)
5019 {
5020         /* Synchronise with residual timers and any softirq they raise */
5021         del_timer_sync(&engine->execlists.timer);
5022         del_timer_sync(&engine->execlists.preempt);
5023         tasklet_kill(&engine->execlists.tasklet);
5024 }
5025
5026 static void execlists_release(struct intel_engine_cs *engine)
5027 {
5028         engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
5029
5030         execlists_shutdown(engine);
5031
5032         intel_engine_cleanup_common(engine);
5033         lrc_destroy_wa_ctx(engine);
5034 }
5035
5036 static void
5037 logical_ring_default_vfuncs(struct intel_engine_cs *engine)
5038 {
5039         /* Default vfuncs which can be overriden by each engine. */
5040
5041         engine->resume = execlists_resume;
5042
5043         engine->cops = &execlists_context_ops;
5044         engine->request_alloc = execlists_request_alloc;
5045
5046         engine->emit_flush = gen8_emit_flush;
5047         engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
5048         engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
5049         if (INTEL_GEN(engine->i915) >= 12) {
5050                 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
5051                 engine->emit_flush = gen12_emit_flush;
5052         }
5053         engine->set_default_submission = intel_execlists_set_default_submission;
5054
5055         if (INTEL_GEN(engine->i915) < 11) {
5056                 engine->irq_enable = gen8_logical_ring_enable_irq;
5057                 engine->irq_disable = gen8_logical_ring_disable_irq;
5058         } else {
5059                 /*
5060                  * TODO: On Gen11 interrupt masks need to be clear
5061                  * to allow C6 entry. Keep interrupts enabled at
5062                  * and take the hit of generating extra interrupts
5063                  * until a more refined solution exists.
5064                  */
5065         }
5066 }
5067
5068 static inline void
5069 logical_ring_default_irqs(struct intel_engine_cs *engine)
5070 {
5071         unsigned int shift = 0;
5072
5073         if (INTEL_GEN(engine->i915) < 11) {
5074                 const u8 irq_shifts[] = {
5075                         [RCS0]  = GEN8_RCS_IRQ_SHIFT,
5076                         [BCS0]  = GEN8_BCS_IRQ_SHIFT,
5077                         [VCS0]  = GEN8_VCS0_IRQ_SHIFT,
5078                         [VCS1]  = GEN8_VCS1_IRQ_SHIFT,
5079                         [VECS0] = GEN8_VECS_IRQ_SHIFT,
5080                 };
5081
5082                 shift = irq_shifts[engine->id];
5083         }
5084
5085         engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
5086         engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
5087         engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift;
5088         engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift;
5089 }
5090
5091 static void rcs_submission_override(struct intel_engine_cs *engine)
5092 {
5093         switch (INTEL_GEN(engine->i915)) {
5094         case 12:
5095                 engine->emit_flush = gen12_emit_flush_render;
5096                 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
5097                 break;
5098         case 11:
5099                 engine->emit_flush = gen11_emit_flush_render;
5100                 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
5101                 break;
5102         default:
5103                 engine->emit_flush = gen8_emit_flush_render;
5104                 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
5105                 break;
5106         }
5107 }
5108
5109 int intel_execlists_submission_setup(struct intel_engine_cs *engine)
5110 {
5111         struct intel_engine_execlists * const execlists = &engine->execlists;
5112         struct drm_i915_private *i915 = engine->i915;
5113         struct intel_uncore *uncore = engine->uncore;
5114         u32 base = engine->mmio_base;
5115
5116         tasklet_init(&engine->execlists.tasklet,
5117                      execlists_submission_tasklet, (unsigned long)engine);
5118         timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
5119         timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
5120
5121         logical_ring_default_vfuncs(engine);
5122         logical_ring_default_irqs(engine);
5123
5124         if (engine->class == RENDER_CLASS)
5125                 rcs_submission_override(engine);
5126
5127         if (intel_init_workaround_bb(engine))
5128                 /*
5129                  * We continue even if we fail to initialize WA batch
5130                  * because we only expect rare glitches but nothing
5131                  * critical to prevent us from using GPU
5132                  */
5133                 drm_err(&i915->drm, "WA batch buffer initialization failed\n");
5134
5135         if (HAS_LOGICAL_RING_ELSQ(i915)) {
5136                 execlists->submit_reg = uncore->regs +
5137                         i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base));
5138                 execlists->ctrl_reg = uncore->regs +
5139                         i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base));
5140         } else {
5141                 execlists->submit_reg = uncore->regs +
5142                         i915_mmio_reg_offset(RING_ELSP(base));
5143         }
5144
5145         execlists->csb_status =
5146                 &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
5147
5148         execlists->csb_write =
5149                 &engine->status_page.addr[intel_hws_csb_write_index(i915)];
5150
5151         if (INTEL_GEN(i915) < 11)
5152                 execlists->csb_size = GEN8_CSB_ENTRIES;
5153         else
5154                 execlists->csb_size = GEN11_CSB_ENTRIES;
5155
5156         if (INTEL_GEN(engine->i915) >= 11) {
5157                 execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32);
5158                 execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32);
5159         }
5160
5161         /* Finally, take ownership and responsibility for cleanup! */
5162         engine->sanitize = execlists_sanitize;
5163         engine->release = execlists_release;
5164
5165         return 0;
5166 }
5167
5168 static void init_common_reg_state(u32 * const regs,
5169                                   const struct intel_engine_cs *engine,
5170                                   const struct intel_ring *ring,
5171                                   bool inhibit)
5172 {
5173         u32 ctl;
5174
5175         ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
5176         ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
5177         if (inhibit)
5178                 ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
5179         if (INTEL_GEN(engine->i915) < 11)
5180                 ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
5181                                            CTX_CTRL_RS_CTX_ENABLE);
5182         regs[CTX_CONTEXT_CONTROL] = ctl;
5183
5184         regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
5185         regs[CTX_TIMESTAMP] = 0;
5186 }
5187
5188 static void init_wa_bb_reg_state(u32 * const regs,
5189                                  const struct intel_engine_cs *engine)
5190 {
5191         const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
5192
5193         if (wa_ctx->per_ctx.size) {
5194                 const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
5195
5196                 GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
5197                 regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
5198                         (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
5199         }
5200
5201         if (wa_ctx->indirect_ctx.size) {
5202                 lrc_ring_setup_indirect_ctx(regs, engine,
5203                                             i915_ggtt_offset(wa_ctx->vma) +
5204                                             wa_ctx->indirect_ctx.offset,
5205                                             wa_ctx->indirect_ctx.size);
5206         }
5207 }
5208
5209 static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt)
5210 {
5211         if (i915_vm_is_4lvl(&ppgtt->vm)) {
5212                 /* 64b PPGTT (48bit canonical)
5213                  * PDP0_DESCRIPTOR contains the base address to PML4 and
5214                  * other PDP Descriptors are ignored.
5215                  */
5216                 ASSIGN_CTX_PML4(ppgtt, regs);
5217         } else {
5218                 ASSIGN_CTX_PDP(ppgtt, regs, 3);
5219                 ASSIGN_CTX_PDP(ppgtt, regs, 2);
5220                 ASSIGN_CTX_PDP(ppgtt, regs, 1);
5221                 ASSIGN_CTX_PDP(ppgtt, regs, 0);
5222         }
5223 }
5224
5225 static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
5226 {
5227         if (i915_is_ggtt(vm))
5228                 return i915_vm_to_ggtt(vm)->alias;
5229         else
5230                 return i915_vm_to_ppgtt(vm);
5231 }
5232
5233 static void execlists_init_reg_state(u32 *regs,
5234                                      const struct intel_context *ce,
5235                                      const struct intel_engine_cs *engine,
5236                                      const struct intel_ring *ring,
5237                                      bool inhibit)
5238 {
5239         /*
5240          * A context is actually a big batch buffer with several
5241          * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
5242          * values we are setting here are only for the first context restore:
5243          * on a subsequent save, the GPU will recreate this batchbuffer with new
5244          * values (including all the missing MI_LOAD_REGISTER_IMM commands that
5245          * we are not initializing here).
5246          *
5247          * Must keep consistent with virtual_update_register_offsets().
5248          */
5249         set_offsets(regs, reg_offsets(engine), engine, inhibit);
5250
5251         init_common_reg_state(regs, engine, ring, inhibit);
5252         init_ppgtt_reg_state(regs, vm_alias(ce->vm));
5253
5254         init_wa_bb_reg_state(regs, engine);
5255
5256         __reset_stop_ring(regs, engine);
5257 }
5258
5259 static int
5260 populate_lr_context(struct intel_context *ce,
5261                     struct drm_i915_gem_object *ctx_obj,
5262                     struct intel_engine_cs *engine,
5263                     struct intel_ring *ring)
5264 {
5265         bool inhibit = true;
5266         void *vaddr;
5267
5268         vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
5269         if (IS_ERR(vaddr)) {
5270                 drm_dbg(&engine->i915->drm, "Could not map object pages!\n");
5271                 return PTR_ERR(vaddr);
5272         }
5273
5274         set_redzone(vaddr, engine);
5275
5276         if (engine->default_state) {
5277                 shmem_read(engine->default_state, 0,
5278                            vaddr, engine->context_size);
5279                 __set_bit(CONTEXT_VALID_BIT, &ce->flags);
5280                 inhibit = false;
5281         }
5282
5283         /* Clear the ppHWSP (inc. per-context counters) */
5284         memset(vaddr, 0, PAGE_SIZE);
5285
5286         /*
5287          * The second page of the context object contains some registers which
5288          * must be set up prior to the first execution.
5289          */
5290         execlists_init_reg_state(vaddr + LRC_STATE_OFFSET,
5291                                  ce, engine, ring, inhibit);
5292
5293         __i915_gem_object_flush_map(ctx_obj, 0, engine->context_size);
5294         i915_gem_object_unpin_map(ctx_obj);
5295         return 0;
5296 }
5297
5298 static int __execlists_context_alloc(struct intel_context *ce,
5299                                      struct intel_engine_cs *engine)
5300 {
5301         struct drm_i915_gem_object *ctx_obj;
5302         struct intel_ring *ring;
5303         struct i915_vma *vma;
5304         u32 context_size;
5305         int ret;
5306
5307         GEM_BUG_ON(ce->state);
5308         context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
5309
5310         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
5311                 context_size += I915_GTT_PAGE_SIZE; /* for redzone */
5312
5313         if (INTEL_GEN(engine->i915) == 12) {
5314                 ce->wa_bb_page = context_size / PAGE_SIZE;
5315                 context_size += PAGE_SIZE;
5316         }
5317
5318         ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
5319         if (IS_ERR(ctx_obj))
5320                 return PTR_ERR(ctx_obj);
5321
5322         vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL);
5323         if (IS_ERR(vma)) {
5324                 ret = PTR_ERR(vma);
5325                 goto error_deref_obj;
5326         }
5327
5328         if (!ce->timeline) {
5329                 struct intel_timeline *tl;
5330                 struct i915_vma *hwsp;
5331
5332                 /*
5333                  * Use the static global HWSP for the kernel context, and
5334                  * a dynamically allocated cacheline for everyone else.
5335                  */
5336                 hwsp = NULL;
5337                 if (unlikely(intel_context_is_barrier(ce)))
5338                         hwsp = engine->status_page.vma;
5339
5340                 tl = intel_timeline_create(engine->gt, hwsp);
5341                 if (IS_ERR(tl)) {
5342                         ret = PTR_ERR(tl);
5343                         goto error_deref_obj;
5344                 }
5345
5346                 ce->timeline = tl;
5347         }
5348
5349         ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
5350         if (IS_ERR(ring)) {
5351                 ret = PTR_ERR(ring);
5352                 goto error_deref_obj;
5353         }
5354
5355         ret = populate_lr_context(ce, ctx_obj, engine, ring);
5356         if (ret) {
5357                 drm_dbg(&engine->i915->drm,
5358                         "Failed to populate LRC: %d\n", ret);
5359                 goto error_ring_free;
5360         }
5361
5362         ce->ring = ring;
5363         ce->state = vma;
5364
5365         return 0;
5366
5367 error_ring_free:
5368         intel_ring_put(ring);
5369 error_deref_obj:
5370         i915_gem_object_put(ctx_obj);
5371         return ret;
5372 }
5373
5374 static struct list_head *virtual_queue(struct virtual_engine *ve)
5375 {
5376         return &ve->base.execlists.default_priolist.requests[0];
5377 }
5378
5379 static void virtual_context_destroy(struct kref *kref)
5380 {
5381         struct virtual_engine *ve =
5382                 container_of(kref, typeof(*ve), context.ref);
5383         unsigned int n;
5384
5385         GEM_BUG_ON(!list_empty(virtual_queue(ve)));
5386         GEM_BUG_ON(ve->request);
5387         GEM_BUG_ON(ve->context.inflight);
5388
5389         for (n = 0; n < ve->num_siblings; n++) {
5390                 struct intel_engine_cs *sibling = ve->siblings[n];
5391                 struct rb_node *node = &ve->nodes[sibling->id].rb;
5392                 unsigned long flags;
5393
5394                 if (RB_EMPTY_NODE(node))
5395                         continue;
5396
5397                 spin_lock_irqsave(&sibling->active.lock, flags);
5398
5399                 /* Detachment is lazily performed in the execlists tasklet */
5400                 if (!RB_EMPTY_NODE(node))
5401                         rb_erase_cached(node, &sibling->execlists.virtual);
5402
5403                 spin_unlock_irqrestore(&sibling->active.lock, flags);
5404         }
5405         GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
5406
5407         if (ve->context.state)
5408                 __execlists_context_fini(&ve->context);
5409         intel_context_fini(&ve->context);
5410
5411         intel_engine_free_request_pool(&ve->base);
5412
5413         kfree(ve->bonds);
5414         kfree(ve);
5415 }
5416
5417 static void virtual_engine_initial_hint(struct virtual_engine *ve)
5418 {
5419         int swp;
5420
5421         /*
5422          * Pick a random sibling on starting to help spread the load around.
5423          *
5424          * New contexts are typically created with exactly the same order
5425          * of siblings, and often started in batches. Due to the way we iterate
5426          * the array of sibling when submitting requests, sibling[0] is
5427          * prioritised for dequeuing. If we make sure that sibling[0] is fairly
5428          * randomised across the system, we also help spread the load by the
5429          * first engine we inspect being different each time.
5430          *
5431          * NB This does not force us to execute on this engine, it will just
5432          * typically be the first we inspect for submission.
5433          */
5434         swp = prandom_u32_max(ve->num_siblings);
5435         if (!swp)
5436                 return;
5437
5438         swap(ve->siblings[swp], ve->siblings[0]);
5439         if (!intel_engine_has_relative_mmio(ve->siblings[0]))
5440                 virtual_update_register_offsets(ve->context.lrc_reg_state,
5441                                                 ve->siblings[0]);
5442 }
5443
5444 static int virtual_context_alloc(struct intel_context *ce)
5445 {
5446         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5447
5448         return __execlists_context_alloc(ce, ve->siblings[0]);
5449 }
5450
5451 static int virtual_context_pin(struct intel_context *ce)
5452 {
5453         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5454         int err;
5455
5456         /* Note: we must use a real engine class for setting up reg state */
5457         err = __execlists_context_pin(ce, ve->siblings[0]);
5458         if (err)
5459                 return err;
5460
5461         virtual_engine_initial_hint(ve);
5462         return 0;
5463 }
5464
5465 static void virtual_context_enter(struct intel_context *ce)
5466 {
5467         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5468         unsigned int n;
5469
5470         for (n = 0; n < ve->num_siblings; n++)
5471                 intel_engine_pm_get(ve->siblings[n]);
5472
5473         intel_timeline_enter(ce->timeline);
5474 }
5475
5476 static void virtual_context_exit(struct intel_context *ce)
5477 {
5478         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5479         unsigned int n;
5480
5481         intel_timeline_exit(ce->timeline);
5482
5483         for (n = 0; n < ve->num_siblings; n++)
5484                 intel_engine_pm_put(ve->siblings[n]);
5485 }
5486
5487 static const struct intel_context_ops virtual_context_ops = {
5488         .alloc = virtual_context_alloc,
5489
5490         .pin = virtual_context_pin,
5491         .unpin = execlists_context_unpin,
5492
5493         .enter = virtual_context_enter,
5494         .exit = virtual_context_exit,
5495
5496         .destroy = virtual_context_destroy,
5497 };
5498
5499 static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
5500 {
5501         struct i915_request *rq;
5502         intel_engine_mask_t mask;
5503
5504         rq = READ_ONCE(ve->request);
5505         if (!rq)
5506                 return 0;
5507
5508         /* The rq is ready for submission; rq->execution_mask is now stable. */
5509         mask = rq->execution_mask;
5510         if (unlikely(!mask)) {
5511                 /* Invalid selection, submit to a random engine in error */
5512                 i915_request_set_error_once(rq, -ENODEV);
5513                 mask = ve->siblings[0]->mask;
5514         }
5515
5516         ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n",
5517                      rq->fence.context, rq->fence.seqno,
5518                      mask, ve->base.execlists.queue_priority_hint);
5519
5520         return mask;
5521 }
5522
5523 static void virtual_submission_tasklet(unsigned long data)
5524 {
5525         struct virtual_engine * const ve = (struct virtual_engine *)data;
5526         const int prio = READ_ONCE(ve->base.execlists.queue_priority_hint);
5527         intel_engine_mask_t mask;
5528         unsigned int n;
5529
5530         rcu_read_lock();
5531         mask = virtual_submission_mask(ve);
5532         rcu_read_unlock();
5533         if (unlikely(!mask))
5534                 return;
5535
5536         local_irq_disable();
5537         for (n = 0; n < ve->num_siblings; n++) {
5538                 struct intel_engine_cs *sibling = READ_ONCE(ve->siblings[n]);
5539                 struct ve_node * const node = &ve->nodes[sibling->id];
5540                 struct rb_node **parent, *rb;
5541                 bool first;
5542
5543                 if (!READ_ONCE(ve->request))
5544                         break; /* already handled by a sibling's tasklet */
5545
5546                 if (unlikely(!(mask & sibling->mask))) {
5547                         if (!RB_EMPTY_NODE(&node->rb)) {
5548                                 spin_lock(&sibling->active.lock);
5549                                 rb_erase_cached(&node->rb,
5550                                                 &sibling->execlists.virtual);
5551                                 RB_CLEAR_NODE(&node->rb);
5552                                 spin_unlock(&sibling->active.lock);
5553                         }
5554                         continue;
5555                 }
5556
5557                 spin_lock(&sibling->active.lock);
5558
5559                 if (!RB_EMPTY_NODE(&node->rb)) {
5560                         /*
5561                          * Cheat and avoid rebalancing the tree if we can
5562                          * reuse this node in situ.
5563                          */
5564                         first = rb_first_cached(&sibling->execlists.virtual) ==
5565                                 &node->rb;
5566                         if (prio == node->prio || (prio > node->prio && first))
5567                                 goto submit_engine;
5568
5569                         rb_erase_cached(&node->rb, &sibling->execlists.virtual);
5570                 }
5571
5572                 rb = NULL;
5573                 first = true;
5574                 parent = &sibling->execlists.virtual.rb_root.rb_node;
5575                 while (*parent) {
5576                         struct ve_node *other;
5577
5578                         rb = *parent;
5579                         other = rb_entry(rb, typeof(*other), rb);
5580                         if (prio > other->prio) {
5581                                 parent = &rb->rb_left;
5582                         } else {
5583                                 parent = &rb->rb_right;
5584                                 first = false;
5585                         }
5586                 }
5587
5588                 rb_link_node(&node->rb, rb, parent);
5589                 rb_insert_color_cached(&node->rb,
5590                                        &sibling->execlists.virtual,
5591                                        first);
5592
5593 submit_engine:
5594                 GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
5595                 node->prio = prio;
5596                 if (first && prio > sibling->execlists.queue_priority_hint)
5597                         tasklet_hi_schedule(&sibling->execlists.tasklet);
5598
5599                 spin_unlock(&sibling->active.lock);
5600         }
5601         local_irq_enable();
5602 }
5603
5604 static void virtual_submit_request(struct i915_request *rq)
5605 {
5606         struct virtual_engine *ve = to_virtual_engine(rq->engine);
5607         struct i915_request *old;
5608         unsigned long flags;
5609
5610         ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n",
5611                      rq->fence.context,
5612                      rq->fence.seqno);
5613
5614         GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
5615
5616         spin_lock_irqsave(&ve->base.active.lock, flags);
5617
5618         old = ve->request;
5619         if (old) { /* background completion event from preempt-to-busy */
5620                 GEM_BUG_ON(!i915_request_completed(old));
5621                 __i915_request_submit(old);
5622                 i915_request_put(old);
5623         }
5624
5625         if (i915_request_completed(rq)) {
5626                 __i915_request_submit(rq);
5627
5628                 ve->base.execlists.queue_priority_hint = INT_MIN;
5629                 ve->request = NULL;
5630         } else {
5631                 ve->base.execlists.queue_priority_hint = rq_prio(rq);
5632                 ve->request = i915_request_get(rq);
5633
5634                 GEM_BUG_ON(!list_empty(virtual_queue(ve)));
5635                 list_move_tail(&rq->sched.link, virtual_queue(ve));
5636
5637                 tasklet_hi_schedule(&ve->base.execlists.tasklet);
5638         }
5639
5640         spin_unlock_irqrestore(&ve->base.active.lock, flags);
5641 }
5642
5643 static struct ve_bond *
5644 virtual_find_bond(struct virtual_engine *ve,
5645                   const struct intel_engine_cs *master)
5646 {
5647         int i;
5648
5649         for (i = 0; i < ve->num_bonds; i++) {
5650                 if (ve->bonds[i].master == master)
5651                         return &ve->bonds[i];
5652         }
5653
5654         return NULL;
5655 }
5656
5657 static void
5658 virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
5659 {
5660         struct virtual_engine *ve = to_virtual_engine(rq->engine);
5661         intel_engine_mask_t allowed, exec;
5662         struct ve_bond *bond;
5663
5664         allowed = ~to_request(signal)->engine->mask;
5665
5666         bond = virtual_find_bond(ve, to_request(signal)->engine);
5667         if (bond)
5668                 allowed &= bond->sibling_mask;
5669
5670         /* Restrict the bonded request to run on only the available engines */
5671         exec = READ_ONCE(rq->execution_mask);
5672         while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed))
5673                 ;
5674
5675         /* Prevent the master from being re-run on the bonded engines */
5676         to_request(signal)->execution_mask &= ~allowed;
5677 }
5678
5679 struct intel_context *
5680 intel_execlists_create_virtual(struct intel_engine_cs **siblings,
5681                                unsigned int count)
5682 {
5683         struct virtual_engine *ve;
5684         unsigned int n;
5685         int err;
5686
5687         if (count == 0)
5688                 return ERR_PTR(-EINVAL);
5689
5690         if (count == 1)
5691                 return intel_context_create(siblings[0]);
5692
5693         ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
5694         if (!ve)
5695                 return ERR_PTR(-ENOMEM);
5696
5697         ve->base.i915 = siblings[0]->i915;
5698         ve->base.gt = siblings[0]->gt;
5699         ve->base.uncore = siblings[0]->uncore;
5700         ve->base.id = -1;
5701
5702         ve->base.class = OTHER_CLASS;
5703         ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
5704         ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5705         ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5706
5707         /*
5708          * The decision on whether to submit a request using semaphores
5709          * depends on the saturated state of the engine. We only compute
5710          * this during HW submission of the request, and we need for this
5711          * state to be globally applied to all requests being submitted
5712          * to this engine. Virtual engines encompass more than one physical
5713          * engine and so we cannot accurately tell in advance if one of those
5714          * engines is already saturated and so cannot afford to use a semaphore
5715          * and be pessimized in priority for doing so -- if we are the only
5716          * context using semaphores after all other clients have stopped, we
5717          * will be starved on the saturated system. Such a global switch for
5718          * semaphores is less than ideal, but alas is the current compromise.
5719          */
5720         ve->base.saturated = ALL_ENGINES;
5721
5722         snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
5723
5724         intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
5725         intel_engine_init_breadcrumbs(&ve->base);
5726         intel_engine_init_execlists(&ve->base);
5727         ve->base.breadcrumbs.irq_armed = true; /* fake HW, used for irq_work */
5728
5729         ve->base.cops = &virtual_context_ops;
5730         ve->base.request_alloc = execlists_request_alloc;
5731
5732         ve->base.schedule = i915_schedule;
5733         ve->base.submit_request = virtual_submit_request;
5734         ve->base.bond_execute = virtual_bond_execute;
5735
5736         INIT_LIST_HEAD(virtual_queue(ve));
5737         ve->base.execlists.queue_priority_hint = INT_MIN;
5738         tasklet_init(&ve->base.execlists.tasklet,
5739                      virtual_submission_tasklet,
5740                      (unsigned long)ve);
5741
5742         intel_context_init(&ve->context, &ve->base);
5743
5744         for (n = 0; n < count; n++) {
5745                 struct intel_engine_cs *sibling = siblings[n];
5746
5747                 GEM_BUG_ON(!is_power_of_2(sibling->mask));
5748                 if (sibling->mask & ve->base.mask) {
5749                         DRM_DEBUG("duplicate %s entry in load balancer\n",
5750                                   sibling->name);
5751                         err = -EINVAL;
5752                         goto err_put;
5753                 }
5754
5755                 /*
5756                  * The virtual engine implementation is tightly coupled to
5757                  * the execlists backend -- we push out request directly
5758                  * into a tree inside each physical engine. We could support
5759                  * layering if we handle cloning of the requests and
5760                  * submitting a copy into each backend.
5761                  */
5762                 if (sibling->execlists.tasklet.func !=
5763                     execlists_submission_tasklet) {
5764                         err = -ENODEV;
5765                         goto err_put;
5766                 }
5767
5768                 GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb));
5769                 RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
5770
5771                 ve->siblings[ve->num_siblings++] = sibling;
5772                 ve->base.mask |= sibling->mask;
5773
5774                 /*
5775                  * All physical engines must be compatible for their emission
5776                  * functions (as we build the instructions during request
5777                  * construction and do not alter them before submission
5778                  * on the physical engine). We use the engine class as a guide
5779                  * here, although that could be refined.
5780                  */
5781                 if (ve->base.class != OTHER_CLASS) {
5782                         if (ve->base.class != sibling->class) {
5783                                 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
5784                                           sibling->class, ve->base.class);
5785                                 err = -EINVAL;
5786                                 goto err_put;
5787                         }
5788                         continue;
5789                 }
5790
5791                 ve->base.class = sibling->class;
5792                 ve->base.uabi_class = sibling->uabi_class;
5793                 snprintf(ve->base.name, sizeof(ve->base.name),
5794                          "v%dx%d", ve->base.class, count);
5795                 ve->base.context_size = sibling->context_size;
5796
5797                 ve->base.emit_bb_start = sibling->emit_bb_start;
5798                 ve->base.emit_flush = sibling->emit_flush;
5799                 ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
5800                 ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
5801                 ve->base.emit_fini_breadcrumb_dw =
5802                         sibling->emit_fini_breadcrumb_dw;
5803
5804                 ve->base.flags = sibling->flags;
5805         }
5806
5807         ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
5808
5809         return &ve->context;
5810
5811 err_put:
5812         intel_context_put(&ve->context);
5813         return ERR_PTR(err);
5814 }
5815
5816 struct intel_context *
5817 intel_execlists_clone_virtual(struct intel_engine_cs *src)
5818 {
5819         struct virtual_engine *se = to_virtual_engine(src);
5820         struct intel_context *dst;
5821
5822         dst = intel_execlists_create_virtual(se->siblings,
5823                                              se->num_siblings);
5824         if (IS_ERR(dst))
5825                 return dst;
5826
5827         if (se->num_bonds) {
5828                 struct virtual_engine *de = to_virtual_engine(dst->engine);
5829
5830                 de->bonds = kmemdup(se->bonds,
5831                                     sizeof(*se->bonds) * se->num_bonds,
5832                                     GFP_KERNEL);
5833                 if (!de->bonds) {
5834                         intel_context_put(dst);
5835                         return ERR_PTR(-ENOMEM);
5836                 }
5837
5838                 de->num_bonds = se->num_bonds;
5839         }
5840
5841         return dst;
5842 }
5843
5844 int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
5845                                      const struct intel_engine_cs *master,
5846                                      const struct intel_engine_cs *sibling)
5847 {
5848         struct virtual_engine *ve = to_virtual_engine(engine);
5849         struct ve_bond *bond;
5850         int n;
5851
5852         /* Sanity check the sibling is part of the virtual engine */
5853         for (n = 0; n < ve->num_siblings; n++)
5854                 if (sibling == ve->siblings[n])
5855                         break;
5856         if (n == ve->num_siblings)
5857                 return -EINVAL;
5858
5859         bond = virtual_find_bond(ve, master);
5860         if (bond) {
5861                 bond->sibling_mask |= sibling->mask;
5862                 return 0;
5863         }
5864
5865         bond = krealloc(ve->bonds,
5866                         sizeof(*bond) * (ve->num_bonds + 1),
5867                         GFP_KERNEL);
5868         if (!bond)
5869                 return -ENOMEM;
5870
5871         bond[ve->num_bonds].master = master;
5872         bond[ve->num_bonds].sibling_mask = sibling->mask;
5873
5874         ve->bonds = bond;
5875         ve->num_bonds++;
5876
5877         return 0;
5878 }
5879
5880 struct intel_engine_cs *
5881 intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
5882                                  unsigned int sibling)
5883 {
5884         struct virtual_engine *ve = to_virtual_engine(engine);
5885
5886         if (sibling >= ve->num_siblings)
5887                 return NULL;
5888
5889         return ve->siblings[sibling];
5890 }
5891
5892 void intel_execlists_show_requests(struct intel_engine_cs *engine,
5893                                    struct drm_printer *m,
5894                                    void (*show_request)(struct drm_printer *m,
5895                                                         struct i915_request *rq,
5896                                                         const char *prefix),
5897                                    unsigned int max)
5898 {
5899         const struct intel_engine_execlists *execlists = &engine->execlists;
5900         struct i915_request *rq, *last;
5901         unsigned long flags;
5902         unsigned int count;
5903         struct rb_node *rb;
5904
5905         spin_lock_irqsave(&engine->active.lock, flags);
5906
5907         last = NULL;
5908         count = 0;
5909         list_for_each_entry(rq, &engine->active.requests, sched.link) {
5910                 if (count++ < max - 1)
5911                         show_request(m, rq, "\t\tE ");
5912                 else
5913                         last = rq;
5914         }
5915         if (last) {
5916                 if (count > max) {
5917                         drm_printf(m,
5918                                    "\t\t...skipping %d executing requests...\n",
5919                                    count - max);
5920                 }
5921                 show_request(m, last, "\t\tE ");
5922         }
5923
5924         if (execlists->switch_priority_hint != INT_MIN)
5925                 drm_printf(m, "\t\tSwitch priority hint: %d\n",
5926                            READ_ONCE(execlists->switch_priority_hint));
5927         if (execlists->queue_priority_hint != INT_MIN)
5928                 drm_printf(m, "\t\tQueue priority hint: %d\n",
5929                            READ_ONCE(execlists->queue_priority_hint));
5930
5931         last = NULL;
5932         count = 0;
5933         for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
5934                 struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
5935                 int i;
5936
5937                 priolist_for_each_request(rq, p, i) {
5938                         if (count++ < max - 1)
5939                                 show_request(m, rq, "\t\tQ ");
5940                         else
5941                                 last = rq;
5942                 }
5943         }
5944         if (last) {
5945                 if (count > max) {
5946                         drm_printf(m,
5947                                    "\t\t...skipping %d queued requests...\n",
5948                                    count - max);
5949                 }
5950                 show_request(m, last, "\t\tQ ");
5951         }
5952
5953         last = NULL;
5954         count = 0;
5955         for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) {
5956                 struct virtual_engine *ve =
5957                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
5958                 struct i915_request *rq = READ_ONCE(ve->request);
5959
5960                 if (rq) {
5961                         if (count++ < max - 1)
5962                                 show_request(m, rq, "\t\tV ");
5963                         else
5964                                 last = rq;
5965                 }
5966         }
5967         if (last) {
5968                 if (count > max) {
5969                         drm_printf(m,
5970                                    "\t\t...skipping %d virtual requests...\n",
5971                                    count - max);
5972                 }
5973                 show_request(m, last, "\t\tV ");
5974         }
5975
5976         spin_unlock_irqrestore(&engine->active.lock, flags);
5977 }
5978
5979 void intel_lr_context_reset(struct intel_engine_cs *engine,
5980                             struct intel_context *ce,
5981                             u32 head,
5982                             bool scrub)
5983 {
5984         GEM_BUG_ON(!intel_context_is_pinned(ce));
5985
5986         /*
5987          * We want a simple context + ring to execute the breadcrumb update.
5988          * We cannot rely on the context being intact across the GPU hang,
5989          * so clear it and rebuild just what we need for the breadcrumb.
5990          * All pending requests for this context will be zapped, and any
5991          * future request will be after userspace has had the opportunity
5992          * to recreate its own state.
5993          */
5994         if (scrub)
5995                 restore_default_state(ce, engine);
5996
5997         /* Rerun the request; its payload has been neutered (if guilty). */
5998         __execlists_update_reg_state(ce, engine, head);
5999 }
6000
6001 bool
6002 intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine)
6003 {
6004         return engine->set_default_submission ==
6005                intel_execlists_set_default_submission;
6006 }
6007
6008 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
6009 #include "selftest_lrc.c"
6010 #endif