drivers/gpu/drm/i915/gt/intel_lrc.c

   1 /*
   2  * Copyright © 2014 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Ben Widawsky <ben@bwidawsk.net>
  25  *    Michel Thierry <michel.thierry@intel.com>
  26  *    Thomas Daniel <thomas.daniel@intel.com>
  27  *    Oscar Mateo <oscar.mateo@intel.com>
  28  *
  29  */
  30
  31 /**
  32  * DOC: Logical Rings, Logical Ring Contexts and Execlists
  33  *
  34  * Motivation:
  35  * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
  36  * These expanded contexts enable a number of new abilities, especially
  37  * "Execlists" (also implemented in this file).
  38  *
  39  * One of the main differences with the legacy HW contexts is that logical
  40  * ring contexts incorporate many more things to the context's state, like
  41  * PDPs or ringbuffer control registers:
  42  *
  43  * The reason why PDPs are included in the context is straightforward: as
  44  * PPGTTs (per-process GTTs) are actually per-context, having the PDPs
  45  * contained there mean you don't need to do a ppgtt->switch_mm yourself,
  46  * instead, the GPU will do it for you on the context switch.
  47  *
  48  * But, what about the ringbuffer control registers (head, tail, etc..)?
  49  * shouldn't we just need a set of those per engine command streamer? This is
  50  * where the name "Logical Rings" starts to make sense: by virtualizing the
  51  * rings, the engine cs shifts to a new "ring buffer" with every context
  52  * switch. When you want to submit a workload to the GPU you: A) choose your
  53  * context, B) find its appropriate virtualized ring, C) write commands to it
  54  * and then, finally, D) tell the GPU to switch to that context.
  55  *
  56  * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
  57  * to a contexts is via a context execution list, ergo "Execlists".
  58  *
  59  * LRC implementation:
  60  * Regarding the creation of contexts, we have:
  61  *
  62  * - One global default context.
  63  * - One local default context for each opened fd.
  64  * - One local extra context for each context create ioctl call.
  65  *
  66  * Now that ringbuffers belong per-context (and not per-engine, like before)
  67  * and that contexts are uniquely tied to a given engine (and not reusable,
  68  * like before) we need:
  69  *
  70  * - One ringbuffer per-engine inside each context.
  71  * - One backing object per-engine inside each context.
  72  *
  73  * The global default context starts its life with these new objects fully
  74  * allocated and populated. The local default context for each opened fd is
  75  * more complex, because we don't know at creation time which engine is going
  76  * to use them. To handle this, we have implemented a deferred creation of LR
  77  * contexts:
  78  *
  79  * The local context starts its life as a hollow or blank holder, that only
  80  * gets populated for a given engine once we receive an execbuffer. If later
  81  * on we receive another execbuffer ioctl for the same context but a different
  82  * engine, we allocate/populate a new ringbuffer and context backing object and
  83  * so on.
  84  *
  85  * Finally, regarding local contexts created using the ioctl call: as they are
  86  * only allowed with the render ring, we can allocate & populate them right
  87  * away (no need to defer anything, at least for now).
  88  *
  89  * Execlists implementation:
  90  * Execlists are the new method by which, on gen8+ hardware, workloads are
  91  * submitted for execution (as opposed to the legacy, ringbuffer-based, method).
  92  * This method works as follows:
  93  *
  94  * When a request is committed, its commands (the BB start and any leading or
  95  * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
  96  * for the appropriate context. The tail pointer in the hardware context is not
  97  * updated at this time, but instead, kept by the driver in the ringbuffer
  98  * structure. A structure representing this request is added to a request queue
  99  * for the appropriate engine: this structure contains a copy of the context's
 100  * tail after the request was written to the ring buffer and a pointer to the
 101  * context itself.
 102  *
 103  * If the engine's request queue was empty before the request was added, the
 104  * queue is processed immediately. Otherwise the queue will be processed during
 105  * a context switch interrupt. In any case, elements on the queue will get sent
 106  * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
 107  * globally unique 20-bits submission ID.
 108  *
 109  * When execution of a request completes, the GPU updates the context status
 110  * buffer with a context complete event and generates a context switch interrupt.
 111  * During the interrupt handling, the driver examines the events in the buffer:
 112  * for each context complete event, if the announced ID matches that on the head
 113  * of the request queue, then that request is retired and removed from the queue.
 114  *
 115  * After processing, if any requests were retired and the queue is not empty
 116  * then a new execution list can be submitted. The two requests at the front of
 117  * the queue are next to be submitted but since a context may not occur twice in
 118  * an execution list, if subsequent requests have the same ID as the first then
 119  * the two requests must be combined. This is done simply by discarding requests
 120  * at the head of the queue until either only one requests is left (in which case
 121  * we use a NULL second context) or the first two requests have unique IDs.
 122  *
 123  * By always executing the first two requests in the queue the driver ensures
 124  * that the GPU is kept as busy as possible. In the case where a single context
 125  * completes but a second context is still executing, the request for this second
 126  * context will be at the head of the queue when we remove the first one. This
 127  * request will then be resubmitted along with a new request for a different context,
 128  * which will cause the hardware to continue executing the second request and queue
 129  * the new request (the GPU detects the condition of a context getting preempted
 130  * with the same context and optimizes the context switch flow by not doing
 131  * preemption, but just sampling the new tail pointer).
 132  *
 133  */
 134 #include <linux/interrupt.h>
 135
 136 #include "i915_drv.h"
 137 #include "i915_perf.h"
 138 #include "i915_trace.h"
 139 #include "i915_vgpu.h"
 140 #include "intel_context.h"
 141 #include "intel_engine_pm.h"
 142 #include "intel_gt.h"
 143 #include "intel_gt_pm.h"
 144 #include "intel_gt_requests.h"
 145 #include "intel_lrc_reg.h"
 146 #include "intel_mocs.h"
 147 #include "intel_reset.h"
 148 #include "intel_ring.h"
 149 #include "intel_workarounds.h"
 150 #include "shmem_utils.h"
 151
 152 #define RING_EXECLIST_QFULL             (1 << 0x2)
 153 #define RING_EXECLIST1_VALID            (1 << 0x3)
 154 #define RING_EXECLIST0_VALID            (1 << 0x4)
 155 #define RING_EXECLIST_ACTIVE_STATUS     (3 << 0xE)
 156 #define RING_EXECLIST1_ACTIVE           (1 << 0x11)
 157 #define RING_EXECLIST0_ACTIVE           (1 << 0x12)
 158
 159 #define GEN8_CTX_STATUS_IDLE_ACTIVE     (1 << 0)
 160 #define GEN8_CTX_STATUS_PREEMPTED       (1 << 1)
 161 #define GEN8_CTX_STATUS_ELEMENT_SWITCH  (1 << 2)
 162 #define GEN8_CTX_STATUS_ACTIVE_IDLE     (1 << 3)
 163 #define GEN8_CTX_STATUS_COMPLETE        (1 << 4)
 164 #define GEN8_CTX_STATUS_LITE_RESTORE    (1 << 15)
 165
 166 #define GEN8_CTX_STATUS_COMPLETED_MASK \
 167          (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
 168
 169 #define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
 170
 171 #define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE  (0x1) /* lower csb dword */
 172 #define GEN12_CTX_SWITCH_DETAIL(csb_dw) ((csb_dw) & 0xF) /* upper csb dword */
 173 #define GEN12_CSB_SW_CTX_ID_MASK                GENMASK(25, 15)
 174 #define GEN12_IDLE_CTX_ID               0x7FF
 175 #define GEN12_CSB_CTX_VALID(csb_dw) \
 176         (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID)
 177
 178 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
 179 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
 180
 181 struct virtual_engine {
 182         struct intel_engine_cs base;
 183         struct intel_context context;
 184
 185         /*
 186          * We allow only a single request through the virtual engine at a time
 187          * (each request in the timeline waits for the completion fence of
 188          * the previous before being submitted). By restricting ourselves to
 189          * only submitting a single request, each request is placed on to a
 190          * physical to maximise load spreading (by virtue of the late greedy
 191          * scheduling -- each real engine takes the next available request
 192          * upon idling).
 193          */
 194         struct i915_request *request;
 195
 196         /*
 197          * We keep a rbtree of available virtual engines inside each physical
 198          * engine, sorted by priority. Here we preallocate the nodes we need
 199          * for the virtual engine, indexed by physical_engine->id.
 200          */
 201         struct ve_node {
 202                 struct rb_node rb;
 203                 int prio;
 204         } nodes[I915_NUM_ENGINES];
 205
 206         /*
 207          * Keep track of bonded pairs -- restrictions upon on our selection
 208          * of physical engines any particular request may be submitted to.
 209          * If we receive a submit-fence from a master engine, we will only
 210          * use one of sibling_mask physical engines.
 211          */
 212         struct ve_bond {
 213                 const struct intel_engine_cs *master;
 214                 intel_engine_mask_t sibling_mask;
 215         } *bonds;
 216         unsigned int num_bonds;
 217
 218         /* And finally, which physical engines this virtual engine maps onto. */
 219         unsigned int num_siblings;
 220         struct intel_engine_cs *siblings[];
 221 };
 222
 223 static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
 224 {
 225         GEM_BUG_ON(!intel_engine_is_virtual(engine));
 226         return container_of(engine, struct virtual_engine, base);
 227 }
 228
 229 static int __execlists_context_alloc(struct intel_context *ce,
 230                                      struct intel_engine_cs *engine);
 231
 232 static void execlists_init_reg_state(u32 *reg_state,
 233                                      const struct intel_context *ce,
 234                                      const struct intel_engine_cs *engine,
 235                                      const struct intel_ring *ring,
 236                                      bool close);
 237 static void
 238 __execlists_update_reg_state(const struct intel_context *ce,
 239                              const struct intel_engine_cs *engine,
 240                              u32 head);
 241
 242 static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
 243 {
 244         if (INTEL_GEN(engine->i915) >= 12)
 245                 return 0x60;
 246         else if (INTEL_GEN(engine->i915) >= 9)
 247                 return 0x54;
 248         else if (engine->class == RENDER_CLASS)
 249                 return 0x58;
 250         else
 251                 return -1;
 252 }
 253
 254 static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
 255 {
 256         if (INTEL_GEN(engine->i915) >= 12)
 257                 return 0x74;
 258         else if (INTEL_GEN(engine->i915) >= 9)
 259                 return 0x68;
 260         else if (engine->class == RENDER_CLASS)
 261                 return 0xd8;
 262         else
 263                 return -1;
 264 }
 265
 266 static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine)
 267 {
 268         if (INTEL_GEN(engine->i915) >= 12)
 269                 return 0x12;
 270         else if (INTEL_GEN(engine->i915) >= 9 || engine->class == RENDER_CLASS)
 271                 return 0x18;
 272         else
 273                 return -1;
 274 }
 275
 276 static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine)
 277 {
 278         int x;
 279
 280         x = lrc_ring_wa_bb_per_ctx(engine);
 281         if (x < 0)
 282                 return x;
 283
 284         return x + 2;
 285 }
 286
 287 static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
 288 {
 289         int x;
 290
 291         x = lrc_ring_indirect_ptr(engine);
 292         if (x < 0)
 293                 return x;
 294
 295         return x + 2;
 296 }
 297
 298 static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
 299 {
 300         if (engine->class != RENDER_CLASS)
 301                 return -1;
 302
 303         if (INTEL_GEN(engine->i915) >= 12)
 304                 return 0xb6;
 305         else if (INTEL_GEN(engine->i915) >= 11)
 306                 return 0xaa;
 307         else
 308                 return -1;
 309 }
 310
 311 static u32
 312 lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
 313 {
 314         switch (INTEL_GEN(engine->i915)) {
 315         default:
 316                 MISSING_CASE(INTEL_GEN(engine->i915));
 317                 fallthrough;
 318         case 12:
 319                 return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 320         case 11:
 321                 return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 322         case 10:
 323                 return GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 324         case 9:
 325                 return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 326         case 8:
 327                 return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 328         }
 329 }
 330
 331 static void
 332 lrc_ring_setup_indirect_ctx(u32 *regs,
 333                             const struct intel_engine_cs *engine,
 334                             u32 ctx_bb_ggtt_addr,
 335                             u32 size)
 336 {
 337         GEM_BUG_ON(!size);
 338         GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES));
 339         GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1);
 340         regs[lrc_ring_indirect_ptr(engine) + 1] =
 341                 ctx_bb_ggtt_addr | (size / CACHELINE_BYTES);
 342
 343         GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1);
 344         regs[lrc_ring_indirect_offset(engine) + 1] =
 345                 lrc_ring_indirect_offset_default(engine) << 6;
 346 }
 347
 348 static u32 intel_context_get_runtime(const struct intel_context *ce)
 349 {
 350         /*
 351          * We can use either ppHWSP[16] which is recorded before the context
 352          * switch (and so excludes the cost of context switches) or use the
 353          * value from the context image itself, which is saved/restored earlier
 354          * and so includes the cost of the save.
 355          */
 356         return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
 357 }
 358
 359 static void mark_eio(struct i915_request *rq)
 360 {
 361         if (i915_request_completed(rq))
 362                 return;
 363
 364         GEM_BUG_ON(i915_request_signaled(rq));
 365
 366         i915_request_set_error_once(rq, -EIO);
 367         i915_request_mark_complete(rq);
 368 }
 369
 370 static struct i915_request *
 371 active_request(const struct intel_timeline * const tl, struct i915_request *rq)
 372 {
 373         struct i915_request *active = rq;
 374
 375         rcu_read_lock();
 376         list_for_each_entry_continue_reverse(rq, &tl->requests, link) {
 377                 if (i915_request_completed(rq))
 378                         break;
 379
 380                 active = rq;
 381         }
 382         rcu_read_unlock();
 383
 384         return active;
 385 }
 386
 387 static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
 388 {
 389         return (i915_ggtt_offset(engine->status_page.vma) +
 390                 I915_GEM_HWS_PREEMPT_ADDR);
 391 }
 392
 393 static inline void
 394 ring_set_paused(const struct intel_engine_cs *engine, int state)
 395 {
 396         /*
 397          * We inspect HWS_PREEMPT with a semaphore inside
 398          * engine->emit_fini_breadcrumb. If the dword is true,
 399          * the ring is paused as the semaphore will busywait
 400          * until the dword is false.
 401          */
 402         engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state;
 403         if (state)
 404                 wmb();
 405 }
 406
 407 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 408 {
 409         return rb_entry(rb, struct i915_priolist, node);
 410 }
 411
 412 static inline int rq_prio(const struct i915_request *rq)
 413 {
 414         return READ_ONCE(rq->sched.attr.priority);
 415 }
 416
 417 static int effective_prio(const struct i915_request *rq)
 418 {
 419         int prio = rq_prio(rq);
 420
 421         /*
 422          * If this request is special and must not be interrupted at any
 423          * cost, so be it. Note we are only checking the most recent request
 424          * in the context and so may be masking an earlier vip request. It
 425          * is hoped that under the conditions where nopreempt is used, this
 426          * will not matter (i.e. all requests to that context will be
 427          * nopreempt for as long as desired).
 428          */
 429         if (i915_request_has_nopreempt(rq))
 430                 prio = I915_PRIORITY_UNPREEMPTABLE;
 431
 432         return prio;
 433 }
 434
 435 static int queue_prio(const struct intel_engine_execlists *execlists)
 436 {
 437         struct i915_priolist *p;
 438         struct rb_node *rb;
 439
 440         rb = rb_first_cached(&execlists->queue);
 441         if (!rb)
 442                 return INT_MIN;
 443
 444         /*
 445          * As the priolist[] are inverted, with the highest priority in [0],
 446          * we have to flip the index value to become priority.
 447          */
 448         p = to_priolist(rb);
 449         return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used);
 450 }
 451
 452 static inline bool need_preempt(const struct intel_engine_cs *engine,
 453                                 const struct i915_request *rq,
 454                                 struct rb_node *rb)
 455 {
 456         int last_prio;
 457
 458         if (!intel_engine_has_semaphores(engine))
 459                 return false;
 460
 461         /*
 462          * Check if the current priority hint merits a preemption attempt.
 463          *
 464          * We record the highest value priority we saw during rescheduling
 465          * prior to this dequeue, therefore we know that if it is strictly
 466          * less than the current tail of ESLP[0], we do not need to force
 467          * a preempt-to-idle cycle.
 468          *
 469          * However, the priority hint is a mere hint that we may need to
 470          * preempt. If that hint is stale or we may be trying to preempt
 471          * ourselves, ignore the request.
 472          *
 473          * More naturally we would write
 474          *      prio >= max(0, last);
 475          * except that we wish to prevent triggering preemption at the same
 476          * priority level: the task that is running should remain running
 477          * to preserve FIFO ordering of dependencies.
 478          */
 479         last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1);
 480         if (engine->execlists.queue_priority_hint <= last_prio)
 481                 return false;
 482
 483         /*
 484          * Check against the first request in ELSP[1], it will, thanks to the
 485          * power of PI, be the highest priority of that context.
 486          */
 487         if (!list_is_last(&rq->sched.link, &engine->active.requests) &&
 488             rq_prio(list_next_entry(rq, sched.link)) > last_prio)
 489                 return true;
 490
 491         if (rb) {
 492                 struct virtual_engine *ve =
 493                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
 494                 bool preempt = false;
 495
 496                 if (engine == ve->siblings[0]) { /* only preempt one sibling */
 497                         struct i915_request *next;
 498
 499                         rcu_read_lock();
 500                         next = READ_ONCE(ve->request);
 501                         if (next)
 502                                 preempt = rq_prio(next) > last_prio;
 503                         rcu_read_unlock();
 504                 }
 505
 506                 if (preempt)
 507                         return preempt;
 508         }
 509
 510         /*
 511          * If the inflight context did not trigger the preemption, then maybe
 512          * it was the set of queued requests? Pick the highest priority in
 513          * the queue (the first active priolist) and see if it deserves to be
 514          * running instead of ELSP[0].
 515          *
 516          * The highest priority request in the queue can not be either
 517          * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
 518          * context, it's priority would not exceed ELSP[0] aka last_prio.
 519          */
 520         return queue_prio(&engine->execlists) > last_prio;
 521 }
 522
 523 __maybe_unused static inline bool
 524 assert_priority_queue(const struct i915_request *prev,
 525                       const struct i915_request *next)
 526 {
 527         /*
 528          * Without preemption, the prev may refer to the still active element
 529          * which we refuse to let go.
 530          *
 531          * Even with preemption, there are times when we think it is better not
 532          * to preempt and leave an ostensibly lower priority request in flight.
 533          */
 534         if (i915_request_is_active(prev))
 535                 return true;
 536
 537         return rq_prio(prev) >= rq_prio(next);
 538 }
 539
 540 /*
 541  * The context descriptor encodes various attributes of a context,
 542  * including its GTT address and some flags. Because it's fairly
 543  * expensive to calculate, we'll just do it once and cache the result,
 544  * which remains valid until the context is unpinned.
 545  *
 546  * This is what a descriptor looks like, from LSB to MSB::
 547  *
 548  *      bits  0-11:    flags, GEN8_CTX_* (cached in ctx->desc_template)
 549  *      bits 12-31:    LRCA, GTT address of (the HWSP of) this context
 550  *      bits 32-52:    ctx ID, a globally unique tag (highest bit used by GuC)
 551  *      bits 53-54:    mbz, reserved for use by hardware
 552  *      bits 55-63:    group ID, currently unused and set to 0
 553  *
 554  * Starting from Gen11, the upper dword of the descriptor has a new format:
 555  *
 556  *      bits 32-36:    reserved
 557  *      bits 37-47:    SW context ID
 558  *      bits 48:53:    engine instance
 559  *      bit 54:        mbz, reserved for use by hardware
 560  *      bits 55-60:    SW counter
 561  *      bits 61-63:    engine class
 562  *
 563  * engine info, SW context ID and SW counter need to form a unique number
 564  * (Context ID) per lrc.
 565  */
 566 static u32
 567 lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
 568 {
 569         u32 desc;
 570
 571         desc = INTEL_LEGACY_32B_CONTEXT;
 572         if (i915_vm_is_4lvl(ce->vm))
 573                 desc = INTEL_LEGACY_64B_CONTEXT;
 574         desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
 575
 576         desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
 577         if (IS_GEN(engine->i915, 8))
 578                 desc |= GEN8_CTX_L3LLC_COHERENT;
 579
 580         return i915_ggtt_offset(ce->state) | desc;
 581 }
 582
 583 static inline unsigned int dword_in_page(void *addr)
 584 {
 585         return offset_in_page(addr) / sizeof(u32);
 586 }
 587
 588 static void set_offsets(u32 *regs,
 589                         const u8 *data,
 590                         const struct intel_engine_cs *engine,
 591                         bool clear)
 592 #define NOP(x) (BIT(7) | (x))
 593 #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
 594 #define POSTED BIT(0)
 595 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
 596 #define REG16(x) \
 597         (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
 598         (((x) >> 2) & 0x7f)
 599 #define END(total_state_size) 0, (total_state_size)
 600 {
 601         const u32 base = engine->mmio_base;
 602
 603         while (*data) {
 604                 u8 count, flags;
 605
 606                 if (*data & BIT(7)) { /* skip */
 607                         count = *data++ & ~BIT(7);
 608                         if (clear)
 609                                 memset32(regs, MI_NOOP, count);
 610                         regs += count;
 611                         continue;
 612                 }
 613
 614                 count = *data & 0x3f;
 615                 flags = *data >> 6;
 616                 data++;
 617
 618                 *regs = MI_LOAD_REGISTER_IMM(count);
 619                 if (flags & POSTED)
 620                         *regs |= MI_LRI_FORCE_POSTED;
 621                 if (INTEL_GEN(engine->i915) >= 11)
 622                         *regs |= MI_LRI_LRM_CS_MMIO;
 623                 regs++;
 624
 625                 GEM_BUG_ON(!count);
 626                 do {
 627                         u32 offset = 0;
 628                         u8 v;
 629
 630                         do {
 631                                 v = *data++;
 632                                 offset <<= 7;
 633                                 offset |= v & ~BIT(7);
 634                         } while (v & BIT(7));
 635
 636                         regs[0] = base + (offset << 2);
 637                         if (clear)
 638                                 regs[1] = 0;
 639                         regs += 2;
 640                 } while (--count);
 641         }
 642
 643         if (clear) {
 644                 u8 count = *++data;
 645
 646                 /* Clear past the tail for HW access */
 647                 GEM_BUG_ON(dword_in_page(regs) > count);
 648                 memset32(regs, MI_NOOP, count - dword_in_page(regs));
 649
 650                 /* Close the batch; used mainly by live_lrc_layout() */
 651                 *regs = MI_BATCH_BUFFER_END;
 652                 if (INTEL_GEN(engine->i915) >= 10)
 653                         *regs |= BIT(0);
 654         }
 655 }
 656
 657 static const u8 gen8_xcs_offsets[] = {
 658         NOP(1),
 659         LRI(11, 0),
 660         REG16(0x244),
 661         REG(0x034),
 662         REG(0x030),
 663         REG(0x038),
 664         REG(0x03c),
 665         REG(0x168),
 666         REG(0x140),
 667         REG(0x110),
 668         REG(0x11c),
 669         REG(0x114),
 670         REG(0x118),
 671
 672         NOP(9),
 673         LRI(9, 0),
 674         REG16(0x3a8),
 675         REG16(0x28c),
 676         REG16(0x288),
 677         REG16(0x284),
 678         REG16(0x280),
 679         REG16(0x27c),
 680         REG16(0x278),
 681         REG16(0x274),
 682         REG16(0x270),
 683
 684         NOP(13),
 685         LRI(2, 0),
 686         REG16(0x200),
 687         REG(0x028),
 688
 689         END(80)
 690 };
 691
 692 static const u8 gen9_xcs_offsets[] = {
 693         NOP(1),
 694         LRI(14, POSTED),
 695         REG16(0x244),
 696         REG(0x034),
 697         REG(0x030),
 698         REG(0x038),
 699         REG(0x03c),
 700         REG(0x168),
 701         REG(0x140),
 702         REG(0x110),
 703         REG(0x11c),
 704         REG(0x114),
 705         REG(0x118),
 706         REG(0x1c0),
 707         REG(0x1c4),
 708         REG(0x1c8),
 709
 710         NOP(3),
 711         LRI(9, POSTED),
 712         REG16(0x3a8),
 713         REG16(0x28c),
 714         REG16(0x288),
 715         REG16(0x284),
 716         REG16(0x280),
 717         REG16(0x27c),
 718         REG16(0x278),
 719         REG16(0x274),
 720         REG16(0x270),
 721
 722         NOP(13),
 723         LRI(1, POSTED),
 724         REG16(0x200),
 725
 726         NOP(13),
 727         LRI(44, POSTED),
 728         REG(0x028),
 729         REG(0x09c),
 730         REG(0x0c0),
 731         REG(0x178),
 732         REG(0x17c),
 733         REG16(0x358),
 734         REG(0x170),
 735         REG(0x150),
 736         REG(0x154),
 737         REG(0x158),
 738         REG16(0x41c),
 739         REG16(0x600),
 740         REG16(0x604),
 741         REG16(0x608),
 742         REG16(0x60c),
 743         REG16(0x610),
 744         REG16(0x614),
 745         REG16(0x618),
 746         REG16(0x61c),
 747         REG16(0x620),
 748         REG16(0x624),
 749         REG16(0x628),
 750         REG16(0x62c),
 751         REG16(0x630),
 752         REG16(0x634),
 753         REG16(0x638),
 754         REG16(0x63c),
 755         REG16(0x640),
 756         REG16(0x644),
 757         REG16(0x648),
 758         REG16(0x64c),
 759         REG16(0x650),
 760         REG16(0x654),
 761         REG16(0x658),
 762         REG16(0x65c),
 763         REG16(0x660),
 764         REG16(0x664),
 765         REG16(0x668),
 766         REG16(0x66c),
 767         REG16(0x670),
 768         REG16(0x674),
 769         REG16(0x678),
 770         REG16(0x67c),
 771         REG(0x068),
 772
 773         END(176)
 774 };
 775
 776 static const u8 gen12_xcs_offsets[] = {
 777         NOP(1),
 778         LRI(13, POSTED),
 779         REG16(0x244),
 780         REG(0x034),
 781         REG(0x030),
 782         REG(0x038),
 783         REG(0x03c),
 784         REG(0x168),
 785         REG(0x140),
 786         REG(0x110),
 787         REG(0x1c0),
 788         REG(0x1c4),
 789         REG(0x1c8),
 790         REG(0x180),
 791         REG16(0x2b4),
 792
 793         NOP(5),
 794         LRI(9, POSTED),
 795         REG16(0x3a8),
 796         REG16(0x28c),
 797         REG16(0x288),
 798         REG16(0x284),
 799         REG16(0x280),
 800         REG16(0x27c),
 801         REG16(0x278),
 802         REG16(0x274),
 803         REG16(0x270),
 804
 805         END(80)
 806 };
 807
 808 static const u8 gen8_rcs_offsets[] = {
 809         NOP(1),
 810         LRI(14, POSTED),
 811         REG16(0x244),
 812         REG(0x034),
 813         REG(0x030),
 814         REG(0x038),
 815         REG(0x03c),
 816         REG(0x168),
 817         REG(0x140),
 818         REG(0x110),
 819         REG(0x11c),
 820         REG(0x114),
 821         REG(0x118),
 822         REG(0x1c0),
 823         REG(0x1c4),
 824         REG(0x1c8),
 825
 826         NOP(3),
 827         LRI(9, POSTED),
 828         REG16(0x3a8),
 829         REG16(0x28c),
 830         REG16(0x288),
 831         REG16(0x284),
 832         REG16(0x280),
 833         REG16(0x27c),
 834         REG16(0x278),
 835         REG16(0x274),
 836         REG16(0x270),
 837
 838         NOP(13),
 839         LRI(1, 0),
 840         REG(0x0c8),
 841
 842         END(80)
 843 };
 844
 845 static const u8 gen9_rcs_offsets[] = {
 846         NOP(1),
 847         LRI(14, POSTED),
 848         REG16(0x244),
 849         REG(0x34),
 850         REG(0x30),
 851         REG(0x38),
 852         REG(0x3c),
 853         REG(0x168),
 854         REG(0x140),
 855         REG(0x110),
 856         REG(0x11c),
 857         REG(0x114),
 858         REG(0x118),
 859         REG(0x1c0),
 860         REG(0x1c4),
 861         REG(0x1c8),
 862
 863         NOP(3),
 864         LRI(9, POSTED),
 865         REG16(0x3a8),
 866         REG16(0x28c),
 867         REG16(0x288),
 868         REG16(0x284),
 869         REG16(0x280),
 870         REG16(0x27c),
 871         REG16(0x278),
 872         REG16(0x274),
 873         REG16(0x270),
 874
 875         NOP(13),
 876         LRI(1, 0),
 877         REG(0xc8),
 878
 879         NOP(13),
 880         LRI(44, POSTED),
 881         REG(0x28),
 882         REG(0x9c),
 883         REG(0xc0),
 884         REG(0x178),
 885         REG(0x17c),
 886         REG16(0x358),
 887         REG(0x170),
 888         REG(0x150),
 889         REG(0x154),
 890         REG(0x158),
 891         REG16(0x41c),
 892         REG16(0x600),
 893         REG16(0x604),
 894         REG16(0x608),
 895         REG16(0x60c),
 896         REG16(0x610),
 897         REG16(0x614),
 898         REG16(0x618),
 899         REG16(0x61c),
 900         REG16(0x620),
 901         REG16(0x624),
 902         REG16(0x628),
 903         REG16(0x62c),
 904         REG16(0x630),
 905         REG16(0x634),
 906         REG16(0x638),
 907         REG16(0x63c),
 908         REG16(0x640),
 909         REG16(0x644),
 910         REG16(0x648),
 911         REG16(0x64c),
 912         REG16(0x650),
 913         REG16(0x654),
 914         REG16(0x658),
 915         REG16(0x65c),
 916         REG16(0x660),
 917         REG16(0x664),
 918         REG16(0x668),
 919         REG16(0x66c),
 920         REG16(0x670),
 921         REG16(0x674),
 922         REG16(0x678),
 923         REG16(0x67c),
 924         REG(0x68),
 925
 926         END(176)
 927 };
 928
 929 static const u8 gen11_rcs_offsets[] = {
 930         NOP(1),
 931         LRI(15, POSTED),
 932         REG16(0x244),
 933         REG(0x034),
 934         REG(0x030),
 935         REG(0x038),
 936         REG(0x03c),
 937         REG(0x168),
 938         REG(0x140),
 939         REG(0x110),
 940         REG(0x11c),
 941         REG(0x114),
 942         REG(0x118),
 943         REG(0x1c0),
 944         REG(0x1c4),
 945         REG(0x1c8),
 946         REG(0x180),
 947
 948         NOP(1),
 949         LRI(9, POSTED),
 950         REG16(0x3a8),
 951         REG16(0x28c),
 952         REG16(0x288),
 953         REG16(0x284),
 954         REG16(0x280),
 955         REG16(0x27c),
 956         REG16(0x278),
 957         REG16(0x274),
 958         REG16(0x270),
 959
 960         LRI(1, POSTED),
 961         REG(0x1b0),
 962
 963         NOP(10),
 964         LRI(1, 0),
 965         REG(0x0c8),
 966
 967         END(80)
 968 };
 969
 970 static const u8 gen12_rcs_offsets[] = {
 971         NOP(1),
 972         LRI(13, POSTED),
 973         REG16(0x244),
 974         REG(0x034),
 975         REG(0x030),
 976         REG(0x038),
 977         REG(0x03c),
 978         REG(0x168),
 979         REG(0x140),
 980         REG(0x110),
 981         REG(0x1c0),
 982         REG(0x1c4),
 983         REG(0x1c8),
 984         REG(0x180),
 985         REG16(0x2b4),
 986
 987         NOP(5),
 988         LRI(9, POSTED),
 989         REG16(0x3a8),
 990         REG16(0x28c),
 991         REG16(0x288),
 992         REG16(0x284),
 993         REG16(0x280),
 994         REG16(0x27c),
 995         REG16(0x278),
 996         REG16(0x274),
 997         REG16(0x270),
 998
 999         LRI(3, POSTED),
1000         REG(0x1b0),
1001         REG16(0x5a8),
1002         REG16(0x5ac),
1003
1004         NOP(6),
1005         LRI(1, 0),
1006         REG(0x0c8),
1007         NOP(3 + 9 + 1),
1008
1009         LRI(51, POSTED),
1010         REG16(0x588),
1011         REG16(0x588),
1012         REG16(0x588),
1013         REG16(0x588),
1014         REG16(0x588),
1015         REG16(0x588),
1016         REG(0x028),
1017         REG(0x09c),
1018         REG(0x0c0),
1019         REG(0x178),
1020         REG(0x17c),
1021         REG16(0x358),
1022         REG(0x170),
1023         REG(0x150),
1024         REG(0x154),
1025         REG(0x158),
1026         REG16(0x41c),
1027         REG16(0x600),
1028         REG16(0x604),
1029         REG16(0x608),
1030         REG16(0x60c),
1031         REG16(0x610),
1032         REG16(0x614),
1033         REG16(0x618),
1034         REG16(0x61c),
1035         REG16(0x620),
1036         REG16(0x624),
1037         REG16(0x628),
1038         REG16(0x62c),
1039         REG16(0x630),
1040         REG16(0x634),
1041         REG16(0x638),
1042         REG16(0x63c),
1043         REG16(0x640),
1044         REG16(0x644),
1045         REG16(0x648),
1046         REG16(0x64c),
1047         REG16(0x650),
1048         REG16(0x654),
1049         REG16(0x658),
1050         REG16(0x65c),
1051         REG16(0x660),
1052         REG16(0x664),
1053         REG16(0x668),
1054         REG16(0x66c),
1055         REG16(0x670),
1056         REG16(0x674),
1057         REG16(0x678),
1058         REG16(0x67c),
1059         REG(0x068),
1060         REG(0x084),
1061         NOP(1),
1062
1063         END(192)
1064 };
1065
1066 #undef END
1067 #undef REG16
1068 #undef REG
1069 #undef LRI
1070 #undef NOP
1071
1072 static const u8 *reg_offsets(const struct intel_engine_cs *engine)
1073 {
1074         /*
1075          * The gen12+ lists only have the registers we program in the basic
1076          * default state. We rely on the context image using relative
1077          * addressing to automatic fixup the register state between the
1078          * physical engines for virtual engine.
1079          */
1080         GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 &&
1081                    !intel_engine_has_relative_mmio(engine));
1082
1083         if (engine->class == RENDER_CLASS) {
1084                 if (INTEL_GEN(engine->i915) >= 12)
1085                         return gen12_rcs_offsets;
1086                 else if (INTEL_GEN(engine->i915) >= 11)
1087                         return gen11_rcs_offsets;
1088                 else if (INTEL_GEN(engine->i915) >= 9)
1089                         return gen9_rcs_offsets;
1090                 else
1091                         return gen8_rcs_offsets;
1092         } else {
1093                 if (INTEL_GEN(engine->i915) >= 12)
1094                         return gen12_xcs_offsets;
1095                 else if (INTEL_GEN(engine->i915) >= 9)
1096                         return gen9_xcs_offsets;
1097                 else
1098                         return gen8_xcs_offsets;
1099         }
1100 }
1101
1102 static struct i915_request *
1103 __unwind_incomplete_requests(struct intel_engine_cs *engine)
1104 {
1105         struct i915_request *rq, *rn, *active = NULL;
1106         struct list_head *uninitialized_var(pl);
1107         int prio = I915_PRIORITY_INVALID;
1108
1109         lockdep_assert_held(&engine->active.lock);
1110
1111         list_for_each_entry_safe_reverse(rq, rn,
1112                                          &engine->active.requests,
1113                                          sched.link) {
1114                 if (i915_request_completed(rq))
1115                         continue; /* XXX */
1116
1117                 __i915_request_unsubmit(rq);
1118
1119                 /*
1120                  * Push the request back into the queue for later resubmission.
1121                  * If this request is not native to this physical engine (i.e.
1122                  * it came from a virtual source), push it back onto the virtual
1123                  * engine so that it can be moved across onto another physical
1124                  * engine as load dictates.
1125                  */
1126                 if (likely(rq->execution_mask == engine->mask)) {
1127                         GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
1128                         if (rq_prio(rq) != prio) {
1129                                 prio = rq_prio(rq);
1130                                 pl = i915_sched_lookup_priolist(engine, prio);
1131                         }
1132                         GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
1133
1134                         list_move(&rq->sched.link, pl);
1135                         set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
1136
1137                         active = rq;
1138                 } else {
1139                         struct intel_engine_cs *owner = rq->context->engine;
1140
1141                         /*
1142                          * Decouple the virtual breadcrumb before moving it
1143                          * back to the virtual engine -- we don't want the
1144                          * request to complete in the background and try
1145                          * and cancel the breadcrumb on the virtual engine
1146                          * (instead of the old engine where it is linked)!
1147                          */
1148                         if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
1149                                      &rq->fence.flags)) {
1150                                 spin_lock_nested(&rq->lock,
1151                                                  SINGLE_DEPTH_NESTING);
1152                                 i915_request_cancel_breadcrumb(rq);
1153                                 spin_unlock(&rq->lock);
1154                         }
1155                         WRITE_ONCE(rq->engine, owner);
1156                         owner->submit_request(rq);
1157                         active = NULL;
1158                 }
1159         }
1160
1161         return active;
1162 }
1163
1164 struct i915_request *
1165 execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
1166 {
1167         struct intel_engine_cs *engine =
1168                 container_of(execlists, typeof(*engine), execlists);
1169
1170         return __unwind_incomplete_requests(engine);
1171 }
1172
1173 static inline void
1174 execlists_context_status_change(struct i915_request *rq, unsigned long status)
1175 {
1176         /*
1177          * Only used when GVT-g is enabled now. When GVT-g is disabled,
1178          * The compiler should eliminate this function as dead-code.
1179          */
1180         if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
1181                 return;
1182
1183         atomic_notifier_call_chain(&rq->engine->context_status_notifier,
1184                                    status, rq);
1185 }
1186
1187 static void intel_engine_context_in(struct intel_engine_cs *engine)
1188 {
1189         unsigned long flags;
1190
1191         if (atomic_add_unless(&engine->stats.active, 1, 0))
1192                 return;
1193
1194         write_seqlock_irqsave(&engine->stats.lock, flags);
1195         if (!atomic_add_unless(&engine->stats.active, 1, 0)) {
1196                 engine->stats.start = ktime_get();
1197                 atomic_inc(&engine->stats.active);
1198         }
1199         write_sequnlock_irqrestore(&engine->stats.lock, flags);
1200 }
1201
1202 static void intel_engine_context_out(struct intel_engine_cs *engine)
1203 {
1204         unsigned long flags;
1205
1206         GEM_BUG_ON(!atomic_read(&engine->stats.active));
1207
1208         if (atomic_add_unless(&engine->stats.active, -1, 1))
1209                 return;
1210
1211         write_seqlock_irqsave(&engine->stats.lock, flags);
1212         if (atomic_dec_and_test(&engine->stats.active)) {
1213                 engine->stats.total =
1214                         ktime_add(engine->stats.total,
1215                                   ktime_sub(ktime_get(), engine->stats.start));
1216         }
1217         write_sequnlock_irqrestore(&engine->stats.lock, flags);
1218 }
1219
1220 static void
1221 execlists_check_context(const struct intel_context *ce,
1222                         const struct intel_engine_cs *engine)
1223 {
1224         const struct intel_ring *ring = ce->ring;
1225         u32 *regs = ce->lrc_reg_state;
1226         bool valid = true;
1227         int x;
1228
1229         if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
1230                 pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
1231                        engine->name,
1232                        regs[CTX_RING_START],
1233                        i915_ggtt_offset(ring->vma));
1234                 regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1235                 valid = false;
1236         }
1237
1238         if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
1239             (RING_CTL_SIZE(ring->size) | RING_VALID)) {
1240                 pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
1241                        engine->name,
1242                        regs[CTX_RING_CTL],
1243                        (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
1244                 regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1245                 valid = false;
1246         }
1247
1248         x = lrc_ring_mi_mode(engine);
1249         if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
1250                 pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
1251                        engine->name, regs[x + 1]);
1252                 regs[x + 1] &= ~STOP_RING;
1253                 regs[x + 1] |= STOP_RING << 16;
1254                 valid = false;
1255         }
1256
1257         WARN_ONCE(!valid, "Invalid lrc state found before submission\n");
1258 }
1259
1260 static void restore_default_state(struct intel_context *ce,
1261                                   struct intel_engine_cs *engine)
1262 {
1263         u32 *regs;
1264
1265         regs = memset(ce->lrc_reg_state, 0, engine->context_size - PAGE_SIZE);
1266         execlists_init_reg_state(regs, ce, engine, ce->ring, true);
1267
1268         ce->runtime.last = intel_context_get_runtime(ce);
1269 }
1270
1271 static void reset_active(struct i915_request *rq,
1272                          struct intel_engine_cs *engine)
1273 {
1274         struct intel_context * const ce = rq->context;
1275         u32 head;
1276
1277         /*
1278          * The executing context has been cancelled. We want to prevent
1279          * further execution along this context and propagate the error on
1280          * to anything depending on its results.
1281          *
1282          * In __i915_request_submit(), we apply the -EIO and remove the
1283          * requests' payloads for any banned requests. But first, we must
1284          * rewind the context back to the start of the incomplete request so
1285          * that we do not jump back into the middle of the batch.
1286          *
1287          * We preserve the breadcrumbs and semaphores of the incomplete
1288          * requests so that inter-timeline dependencies (i.e other timelines)
1289          * remain correctly ordered. And we defer to __i915_request_submit()
1290          * so that all asynchronous waits are correctly handled.
1291          */
1292         ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n",
1293                      rq->fence.context, rq->fence.seqno);
1294
1295         /* On resubmission of the active request, payload will be scrubbed */
1296         if (i915_request_completed(rq))
1297                 head = rq->tail;
1298         else
1299                 head = active_request(ce->timeline, rq)->head;
1300         head = intel_ring_wrap(ce->ring, head);
1301
1302         /* Scrub the context image to prevent replaying the previous batch */
1303         restore_default_state(ce, engine);
1304         __execlists_update_reg_state(ce, engine, head);
1305
1306         /* We've switched away, so this should be a no-op, but intent matters */
1307         ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
1308 }
1309
1310 static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
1311 {
1312 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1313         ce->runtime.num_underflow += dt < 0;
1314         ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt);
1315 #endif
1316 }
1317
1318 static void intel_context_update_runtime(struct intel_context *ce)
1319 {
1320         u32 old;
1321         s32 dt;
1322
1323         if (intel_context_is_barrier(ce))
1324                 return;
1325
1326         old = ce->runtime.last;
1327         ce->runtime.last = intel_context_get_runtime(ce);
1328         dt = ce->runtime.last - old;
1329
1330         if (unlikely(dt <= 0)) {
1331                 CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
1332                          old, ce->runtime.last, dt);
1333                 st_update_runtime_underflow(ce, dt);
1334                 return;
1335         }
1336
1337         ewma_runtime_add(&ce->runtime.avg, dt);
1338         ce->runtime.total += dt;
1339 }
1340
1341 static inline struct intel_engine_cs *
1342 __execlists_schedule_in(struct i915_request *rq)
1343 {
1344         struct intel_engine_cs * const engine = rq->engine;
1345         struct intel_context * const ce = rq->context;
1346
1347         intel_context_get(ce);
1348
1349         if (unlikely(intel_context_is_banned(ce)))
1350                 reset_active(rq, engine);
1351
1352         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1353                 execlists_check_context(ce, engine);
1354
1355         if (ce->tag) {
1356                 /* Use a fixed tag for OA and friends */
1357                 GEM_BUG_ON(ce->tag <= BITS_PER_LONG);
1358                 ce->lrc.ccid = ce->tag;
1359         } else {
1360                 /* We don't need a strict matching tag, just different values */
1361                 unsigned int tag = ffs(READ_ONCE(engine->context_tag));
1362
1363                 GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG);
1364                 clear_bit(tag - 1, &engine->context_tag);
1365                 ce->lrc.ccid = tag << (GEN11_SW_CTX_ID_SHIFT - 32);
1366
1367                 BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID);
1368         }
1369
1370         ce->lrc.ccid |= engine->execlists.ccid;
1371
1372         __intel_gt_pm_get(engine->gt);
1373         execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
1374         intel_engine_context_in(engine);
1375
1376         return engine;
1377 }
1378
1379 static inline struct i915_request *
1380 execlists_schedule_in(struct i915_request *rq, int idx)
1381 {
1382         struct intel_context * const ce = rq->context;
1383         struct intel_engine_cs *old;
1384
1385         GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
1386         trace_i915_request_in(rq, idx);
1387
1388         old = READ_ONCE(ce->inflight);
1389         do {
1390                 if (!old) {
1391                         WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
1392                         break;
1393                 }
1394         } while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
1395
1396         GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
1397         return i915_request_get(rq);
1398 }
1399
1400 static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
1401 {
1402         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
1403         struct i915_request *next = READ_ONCE(ve->request);
1404
1405         if (next == rq || (next && next->execution_mask & ~rq->execution_mask))
1406                 tasklet_hi_schedule(&ve->base.execlists.tasklet);
1407 }
1408
1409 static inline void
1410 __execlists_schedule_out(struct i915_request *rq,
1411                          struct intel_engine_cs * const engine,
1412                          unsigned int ccid)
1413 {
1414         struct intel_context * const ce = rq->context;
1415
1416         /*
1417          * NB process_csb() is not under the engine->active.lock and hence
1418          * schedule_out can race with schedule_in meaning that we should
1419          * refrain from doing non-trivial work here.
1420          */
1421
1422         /*
1423          * If we have just completed this context, the engine may now be
1424          * idle and we want to re-enter powersaving.
1425          */
1426         if (list_is_last_rcu(&rq->link, &ce->timeline->requests) &&
1427             i915_request_completed(rq))
1428                 intel_engine_add_retire(engine, ce->timeline);
1429
1430         ccid >>= GEN11_SW_CTX_ID_SHIFT - 32;
1431         ccid &= GEN12_MAX_CONTEXT_HW_ID;
1432         if (ccid < BITS_PER_LONG) {
1433                 GEM_BUG_ON(ccid == 0);
1434                 GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag));
1435                 set_bit(ccid - 1, &engine->context_tag);
1436         }
1437
1438         intel_context_update_runtime(ce);
1439         intel_engine_context_out(engine);
1440         execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
1441         intel_gt_pm_put_async(engine->gt);
1442
1443         /*
1444          * If this is part of a virtual engine, its next request may
1445          * have been blocked waiting for access to the active context.
1446          * We have to kick all the siblings again in case we need to
1447          * switch (e.g. the next request is not runnable on this
1448          * engine). Hopefully, we will already have submitted the next
1449          * request before the tasklet runs and do not need to rebuild
1450          * each virtual tree and kick everyone again.
1451          */
1452         if (ce->engine != engine)
1453                 kick_siblings(rq, ce);
1454
1455         intel_context_put(ce);
1456 }
1457
1458 static inline void
1459 execlists_schedule_out(struct i915_request *rq)
1460 {
1461         struct intel_context * const ce = rq->context;
1462         struct intel_engine_cs *cur, *old;
1463         u32 ccid;
1464
1465         trace_i915_request_out(rq);
1466
1467         ccid = rq->context->lrc.ccid;
1468         old = READ_ONCE(ce->inflight);
1469         do
1470                 cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
1471         while (!try_cmpxchg(&ce->inflight, &old, cur));
1472         if (!cur)
1473                 __execlists_schedule_out(rq, old, ccid);
1474
1475         i915_request_put(rq);
1476 }
1477
1478 static u64 execlists_update_context(struct i915_request *rq)
1479 {
1480         struct intel_context *ce = rq->context;
1481         u64 desc = ce->lrc.desc;
1482         u32 tail, prev;
1483
1484         /*
1485          * WaIdleLiteRestore:bdw,skl
1486          *
1487          * We should never submit the context with the same RING_TAIL twice
1488          * just in case we submit an empty ring, which confuses the HW.
1489          *
1490          * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of
1491          * the normal request to be able to always advance the RING_TAIL on
1492          * subsequent resubmissions (for lite restore). Should that fail us,
1493          * and we try and submit the same tail again, force the context
1494          * reload.
1495          *
1496          * If we need to return to a preempted context, we need to skip the
1497          * lite-restore and force it to reload the RING_TAIL. Otherwise, the
1498          * HW has a tendency to ignore us rewinding the TAIL to the end of
1499          * an earlier request.
1500          */
1501         tail = intel_ring_set_tail(rq->ring, rq->tail);
1502         prev = ce->lrc_reg_state[CTX_RING_TAIL];
1503         if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0))
1504                 desc |= CTX_DESC_FORCE_RESTORE;
1505         ce->lrc_reg_state[CTX_RING_TAIL] = tail;
1506         rq->tail = rq->wa_tail;
1507
1508         /*
1509          * Make sure the context image is complete before we submit it to HW.
1510          *
1511          * Ostensibly, writes (including the WCB) should be flushed prior to
1512          * an uncached write such as our mmio register access, the empirical
1513          * evidence (esp. on Braswell) suggests that the WC write into memory
1514          * may not be visible to the HW prior to the completion of the UC
1515          * register write and that we may begin execution from the context
1516          * before its image is complete leading to invalid PD chasing.
1517          */
1518         wmb();
1519
1520         ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE;
1521         return desc;
1522 }
1523
1524 static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
1525 {
1526         if (execlists->ctrl_reg) {
1527                 writel(lower_32_bits(desc), execlists->submit_reg + port * 2);
1528                 writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1);
1529         } else {
1530                 writel(upper_32_bits(desc), execlists->submit_reg);
1531                 writel(lower_32_bits(desc), execlists->submit_reg);
1532         }
1533 }
1534
1535 static __maybe_unused char *
1536 dump_port(char *buf, int buflen, const char *prefix, struct i915_request *rq)
1537 {
1538         if (!rq)
1539                 return "";
1540
1541         snprintf(buf, buflen, "%sccid:%x %llx:%lld%s prio %d",
1542                  prefix,
1543                  rq->context->lrc.ccid,
1544                  rq->fence.context, rq->fence.seqno,
1545                  i915_request_completed(rq) ? "!" :
1546                  i915_request_started(rq) ? "*" :
1547                  "",
1548                  rq_prio(rq));
1549
1550         return buf;
1551 }
1552
1553 static __maybe_unused void
1554 trace_ports(const struct intel_engine_execlists *execlists,
1555             const char *msg,
1556             struct i915_request * const *ports)
1557 {
1558         const struct intel_engine_cs *engine =
1559                 container_of(execlists, typeof(*engine), execlists);
1560         char __maybe_unused p0[40], p1[40];
1561
1562         if (!ports[0])
1563                 return;
1564
1565         ENGINE_TRACE(engine, "%s { %s%s }\n", msg,
1566                      dump_port(p0, sizeof(p0), "", ports[0]),
1567                      dump_port(p1, sizeof(p1), ", ", ports[1]));
1568 }
1569
1570 static inline bool
1571 reset_in_progress(const struct intel_engine_execlists *execlists)
1572 {
1573         return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
1574 }
1575
1576 static __maybe_unused bool
1577 assert_pending_valid(const struct intel_engine_execlists *execlists,
1578                      const char *msg)
1579 {
1580         struct intel_engine_cs *engine =
1581                 container_of(execlists, typeof(*engine), execlists);
1582         struct i915_request * const *port, *rq;
1583         struct intel_context *ce = NULL;
1584         bool sentinel = false;
1585         u32 ccid = -1;
1586
1587         trace_ports(execlists, msg, execlists->pending);
1588
1589         /* We may be messing around with the lists during reset, lalala */
1590         if (reset_in_progress(execlists))
1591                 return true;
1592
1593         if (!execlists->pending[0]) {
1594                 GEM_TRACE_ERR("%s: Nothing pending for promotion!\n",
1595                               engine->name);
1596                 return false;
1597         }
1598
1599         if (execlists->pending[execlists_num_ports(execlists)]) {
1600                 GEM_TRACE_ERR("%s: Excess pending[%d] for promotion!\n",
1601                               engine->name, execlists_num_ports(execlists));
1602                 return false;
1603         }
1604
1605         for (port = execlists->pending; (rq = *port); port++) {
1606                 unsigned long flags;
1607                 bool ok = true;
1608
1609                 GEM_BUG_ON(!kref_read(&rq->fence.refcount));
1610                 GEM_BUG_ON(!i915_request_is_active(rq));
1611
1612                 if (ce == rq->context) {
1613                         GEM_TRACE_ERR("%s: Dup context:%llx in pending[%zd]\n",
1614                                       engine->name,
1615                                       ce->timeline->fence_context,
1616                                       port - execlists->pending);
1617                         return false;
1618                 }
1619                 ce = rq->context;
1620
1621                 if (ccid == ce->lrc.ccid) {
1622                         GEM_TRACE_ERR("%s: Dup ccid:%x context:%llx in pending[%zd]\n",
1623                                       engine->name,
1624                                       ccid, ce->timeline->fence_context,
1625                                       port - execlists->pending);
1626                         return false;
1627                 }
1628                 ccid = ce->lrc.ccid;
1629
1630                 /*
1631                  * Sentinels are supposed to be lonely so they flush the
1632                  * current exection off the HW. Check that they are the
1633                  * only request in the pending submission.
1634                  */
1635                 if (sentinel) {
1636                         GEM_TRACE_ERR("%s: context:%llx after sentinel in pending[%zd]\n",
1637                                       engine->name,
1638                                       ce->timeline->fence_context,
1639                                       port - execlists->pending);
1640                         return false;
1641                 }
1642
1643                 sentinel = i915_request_has_sentinel(rq);
1644                 if (sentinel && port != execlists->pending) {
1645                         GEM_TRACE_ERR("%s: sentinel context:%llx not in prime position[%zd]\n",
1646                                       engine->name,
1647                                       ce->timeline->fence_context,
1648                                       port - execlists->pending);
1649                         return false;
1650                 }
1651
1652                 /* Hold tightly onto the lock to prevent concurrent retires! */
1653                 if (!spin_trylock_irqsave(&rq->lock, flags))
1654                         continue;
1655
1656                 if (i915_request_completed(rq))
1657                         goto unlock;
1658
1659                 if (i915_active_is_idle(&ce->active) &&
1660                     !intel_context_is_barrier(ce)) {
1661                         GEM_TRACE_ERR("%s: Inactive context:%llx in pending[%zd]\n",
1662                                       engine->name,
1663                                       ce->timeline->fence_context,
1664                                       port - execlists->pending);
1665                         ok = false;
1666                         goto unlock;
1667                 }
1668
1669                 if (!i915_vma_is_pinned(ce->state)) {
1670                         GEM_TRACE_ERR("%s: Unpinned context:%llx in pending[%zd]\n",
1671                                       engine->name,
1672                                       ce->timeline->fence_context,
1673                                       port - execlists->pending);
1674                         ok = false;
1675                         goto unlock;
1676                 }
1677
1678                 if (!i915_vma_is_pinned(ce->ring->vma)) {
1679                         GEM_TRACE_ERR("%s: Unpinned ring:%llx in pending[%zd]\n",
1680                                       engine->name,
1681                                       ce->timeline->fence_context,
1682                                       port - execlists->pending);
1683                         ok = false;
1684                         goto unlock;
1685                 }
1686
1687 unlock:
1688                 spin_unlock_irqrestore(&rq->lock, flags);
1689                 if (!ok)
1690                         return false;
1691         }
1692
1693         return ce;
1694 }
1695
1696 static void execlists_submit_ports(struct intel_engine_cs *engine)
1697 {
1698         struct intel_engine_execlists *execlists = &engine->execlists;
1699         unsigned int n;
1700
1701         GEM_BUG_ON(!assert_pending_valid(execlists, "submit"));
1702
1703         /*
1704          * We can skip acquiring intel_runtime_pm_get() here as it was taken
1705          * on our behalf by the request (see i915_gem_mark_busy()) and it will
1706          * not be relinquished until the device is idle (see
1707          * i915_gem_idle_work_handler()). As a precaution, we make sure
1708          * that all ELSP are drained i.e. we have processed the CSB,
1709          * before allowing ourselves to idle and calling intel_runtime_pm_put().
1710          */
1711         GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
1712
1713         /*
1714          * ELSQ note: the submit queue is not cleared after being submitted
1715          * to the HW so we need to make sure we always clean it up. This is
1716          * currently ensured by the fact that we always write the same number
1717          * of elsq entries, keep this in mind before changing the loop below.
1718          */
1719         for (n = execlists_num_ports(execlists); n--; ) {
1720                 struct i915_request *rq = execlists->pending[n];
1721
1722                 write_desc(execlists,
1723                            rq ? execlists_update_context(rq) : 0,
1724                            n);
1725         }
1726
1727         /* we need to manually load the submit queue */
1728         if (execlists->ctrl_reg)
1729                 writel(EL_CTRL_LOAD, execlists->ctrl_reg);
1730 }
1731
1732 static bool ctx_single_port_submission(const struct intel_context *ce)
1733 {
1734         return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
1735                 intel_context_force_single_submission(ce));
1736 }
1737
1738 static bool can_merge_ctx(const struct intel_context *prev,
1739                           const struct intel_context *next)
1740 {
1741         if (prev != next)
1742                 return false;
1743
1744         if (ctx_single_port_submission(prev))
1745                 return false;
1746
1747         return true;
1748 }
1749
1750 static unsigned long i915_request_flags(const struct i915_request *rq)
1751 {
1752         return READ_ONCE(rq->fence.flags);
1753 }
1754
1755 static bool can_merge_rq(const struct i915_request *prev,
1756                          const struct i915_request *next)
1757 {
1758         GEM_BUG_ON(prev == next);
1759         GEM_BUG_ON(!assert_priority_queue(prev, next));
1760
1761         /*
1762          * We do not submit known completed requests. Therefore if the next
1763          * request is already completed, we can pretend to merge it in
1764          * with the previous context (and we will skip updating the ELSP
1765          * and tracking). Thus hopefully keeping the ELSP full with active
1766          * contexts, despite the best efforts of preempt-to-busy to confuse
1767          * us.
1768          */
1769         if (i915_request_completed(next))
1770                 return true;
1771
1772         if (unlikely((i915_request_flags(prev) ^ i915_request_flags(next)) &
1773                      (BIT(I915_FENCE_FLAG_NOPREEMPT) |
1774                       BIT(I915_FENCE_FLAG_SENTINEL))))
1775                 return false;
1776
1777         if (!can_merge_ctx(prev->context, next->context))
1778                 return false;
1779
1780         GEM_BUG_ON(i915_seqno_passed(prev->fence.seqno, next->fence.seqno));
1781         return true;
1782 }
1783
1784 static void virtual_update_register_offsets(u32 *regs,
1785                                             struct intel_engine_cs *engine)
1786 {
1787         set_offsets(regs, reg_offsets(engine), engine, false);
1788 }
1789
1790 static bool virtual_matches(const struct virtual_engine *ve,
1791                             const struct i915_request *rq,
1792                             const struct intel_engine_cs *engine)
1793 {
1794         const struct intel_engine_cs *inflight;
1795
1796         if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
1797                 return false;
1798
1799         /*
1800          * We track when the HW has completed saving the context image
1801          * (i.e. when we have seen the final CS event switching out of
1802          * the context) and must not overwrite the context image before
1803          * then. This restricts us to only using the active engine
1804          * while the previous virtualized request is inflight (so
1805          * we reuse the register offsets). This is a very small
1806          * hystersis on the greedy seelction algorithm.
1807          */
1808         inflight = intel_context_inflight(&ve->context);
1809         if (inflight && inflight != engine)
1810                 return false;
1811
1812         return true;
1813 }
1814
1815 static void virtual_xfer_breadcrumbs(struct virtual_engine *ve)
1816 {
1817         /*
1818          * All the outstanding signals on ve->siblings[0] must have
1819          * been completed, just pending the interrupt handler. As those
1820          * signals still refer to the old sibling (via rq->engine), we must
1821          * transfer those to the old irq_worker to keep our locking
1822          * consistent.
1823          */
1824         intel_engine_transfer_stale_breadcrumbs(ve->siblings[0], &ve->context);
1825 }
1826
1827 #define for_each_waiter(p__, rq__) \
1828         list_for_each_entry_lockless(p__, \
1829                                      &(rq__)->sched.waiters_list, \
1830                                      wait_link)
1831
1832 #define for_each_signaler(p__, rq__) \
1833         list_for_each_entry_rcu(p__, \
1834                                 &(rq__)->sched.signalers_list, \
1835                                 signal_link)
1836
1837 static void defer_request(struct i915_request *rq, struct list_head * const pl)
1838 {
1839         LIST_HEAD(list);
1840
1841         /*
1842          * We want to move the interrupted request to the back of
1843          * the round-robin list (i.e. its priority level), but
1844          * in doing so, we must then move all requests that were in
1845          * flight and were waiting for the interrupted request to
1846          * be run after it again.
1847          */
1848         do {
1849                 struct i915_dependency *p;
1850
1851                 GEM_BUG_ON(i915_request_is_active(rq));
1852                 list_move_tail(&rq->sched.link, pl);
1853
1854                 for_each_waiter(p, rq) {
1855                         struct i915_request *w =
1856                                 container_of(p->waiter, typeof(*w), sched);
1857
1858                         if (p->flags & I915_DEPENDENCY_WEAK)
1859                                 continue;
1860
1861                         /* Leave semaphores spinning on the other engines */
1862                         if (w->engine != rq->engine)
1863                                 continue;
1864
1865                         /* No waiter should start before its signaler */
1866                         GEM_BUG_ON(i915_request_has_initial_breadcrumb(w) &&
1867                                    i915_request_started(w) &&
1868                                    !i915_request_completed(rq));
1869
1870                         GEM_BUG_ON(i915_request_is_active(w));
1871                         if (!i915_request_is_ready(w))
1872                                 continue;
1873
1874                         if (rq_prio(w) < rq_prio(rq))
1875                                 continue;
1876
1877                         GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
1878                         list_move_tail(&w->sched.link, &list);
1879                 }
1880
1881                 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
1882         } while (rq);
1883 }
1884
1885 static void defer_active(struct intel_engine_cs *engine)
1886 {
1887         struct i915_request *rq;
1888
1889         rq = __unwind_incomplete_requests(engine);
1890         if (!rq)
1891                 return;
1892
1893         defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq)));
1894 }
1895
1896 static bool
1897 need_timeslice(const struct intel_engine_cs *engine,
1898                const struct i915_request *rq)
1899 {
1900         int hint;
1901
1902         if (!intel_engine_has_timeslices(engine))
1903                 return false;
1904
1905         hint = engine->execlists.queue_priority_hint;
1906         if (!list_is_last(&rq->sched.link, &engine->active.requests))
1907                 hint = max(hint, rq_prio(list_next_entry(rq, sched.link)));
1908
1909         return hint >= effective_prio(rq);
1910 }
1911
1912 static bool
1913 timeslice_yield(const struct intel_engine_execlists *el,
1914                 const struct i915_request *rq)
1915 {
1916         /*
1917          * Once bitten, forever smitten!
1918          *
1919          * If the active context ever busy-waited on a semaphore,
1920          * it will be treated as a hog until the end of its timeslice (i.e.
1921          * until it is scheduled out and replaced by a new submission,
1922          * possibly even its own lite-restore). The HW only sends an interrupt
1923          * on the first miss, and we do know if that semaphore has been
1924          * signaled, or even if it is now stuck on another semaphore. Play
1925          * safe, yield if it might be stuck -- it will be given a fresh
1926          * timeslice in the near future.
1927          */
1928         return rq->context->lrc.ccid == READ_ONCE(el->yield);
1929 }
1930
1931 static bool
1932 timeslice_expired(const struct intel_engine_execlists *el,
1933                   const struct i915_request *rq)
1934 {
1935         return timer_expired(&el->timer) || timeslice_yield(el, rq);
1936 }
1937
1938 static int
1939 switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
1940 {
1941         if (list_is_last(&rq->sched.link, &engine->active.requests))
1942                 return INT_MIN;
1943
1944         return rq_prio(list_next_entry(rq, sched.link));
1945 }
1946
1947 static inline unsigned long
1948 timeslice(const struct intel_engine_cs *engine)
1949 {
1950         return READ_ONCE(engine->props.timeslice_duration_ms);
1951 }
1952
1953 static unsigned long active_timeslice(const struct intel_engine_cs *engine)
1954 {
1955         const struct intel_engine_execlists *execlists = &engine->execlists;
1956         const struct i915_request *rq = *execlists->active;
1957
1958         if (!rq || i915_request_completed(rq))
1959                 return 0;
1960
1961         if (READ_ONCE(execlists->switch_priority_hint) < effective_prio(rq))
1962                 return 0;
1963
1964         return timeslice(engine);
1965 }
1966
1967 static void set_timeslice(struct intel_engine_cs *engine)
1968 {
1969         unsigned long duration;
1970
1971         if (!intel_engine_has_timeslices(engine))
1972                 return;
1973
1974         duration = active_timeslice(engine);
1975         ENGINE_TRACE(engine, "bump timeslicing, interval:%lu", duration);
1976
1977         set_timer_ms(&engine->execlists.timer, duration);
1978 }
1979
1980 static void start_timeslice(struct intel_engine_cs *engine)
1981 {
1982         struct intel_engine_execlists *execlists = &engine->execlists;
1983         const int prio = queue_prio(execlists);
1984         unsigned long duration;
1985
1986         if (!intel_engine_has_timeslices(engine))
1987                 return;
1988
1989         WRITE_ONCE(execlists->switch_priority_hint, prio);
1990         if (prio == INT_MIN)
1991                 return;
1992
1993         if (timer_pending(&execlists->timer))
1994                 return;
1995
1996         duration = timeslice(engine);
1997         ENGINE_TRACE(engine,
1998                      "start timeslicing, prio:%d, interval:%lu",
1999                      prio, duration);
2000
2001         set_timer_ms(&execlists->timer, duration);
2002 }
2003
2004 static void record_preemption(struct intel_engine_execlists *execlists)
2005 {
2006         (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
2007 }
2008
2009 static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
2010                                             const struct i915_request *rq)
2011 {
2012         if (!rq)
2013                 return 0;
2014
2015         /* Force a fast reset for terminated contexts (ignoring sysfs!) */
2016         if (unlikely(intel_context_is_banned(rq->context)))
2017                 return 1;
2018
2019         return READ_ONCE(engine->props.preempt_timeout_ms);
2020 }
2021
2022 static void set_preempt_timeout(struct intel_engine_cs *engine,
2023                                 const struct i915_request *rq)
2024 {
2025         if (!intel_engine_has_preempt_reset(engine))
2026                 return;
2027
2028         set_timer_ms(&engine->execlists.preempt,
2029                      active_preempt_timeout(engine, rq));
2030 }
2031
2032 static inline void clear_ports(struct i915_request **ports, int count)
2033 {
2034         memset_p((void **)ports, NULL, count);
2035 }
2036
2037 static void execlists_dequeue(struct intel_engine_cs *engine)
2038 {
2039         struct intel_engine_execlists * const execlists = &engine->execlists;
2040         struct i915_request **port = execlists->pending;
2041         struct i915_request ** const last_port = port + execlists->port_mask;
2042         struct i915_request * const *active;
2043         struct i915_request *last;
2044         struct rb_node *rb;
2045         bool submit = false;
2046
2047         /*
2048          * Hardware submission is through 2 ports. Conceptually each port
2049          * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
2050          * static for a context, and unique to each, so we only execute
2051          * requests belonging to a single context from each ring. RING_HEAD
2052          * is maintained by the CS in the context image, it marks the place
2053          * where it got up to last time, and through RING_TAIL we tell the CS
2054          * where we want to execute up to this time.
2055          *
2056          * In this list the requests are in order of execution. Consecutive
2057          * requests from the same context are adjacent in the ringbuffer. We
2058          * can combine these requests into a single RING_TAIL update:
2059          *
2060          *              RING_HEAD...req1...req2
2061          *                                    ^- RING_TAIL
2062          * since to execute req2 the CS must first execute req1.
2063          *
2064          * Our goal then is to point each port to the end of a consecutive
2065          * sequence of requests as being the most optimal (fewest wake ups
2066          * and context switches) submission.
2067          */
2068
2069         for (rb = rb_first_cached(&execlists->virtual); rb; ) {
2070                 struct virtual_engine *ve =
2071                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
2072                 struct i915_request *rq = READ_ONCE(ve->request);
2073
2074                 if (!rq) { /* lazily cleanup after another engine handled rq */
2075                         rb_erase_cached(rb, &execlists->virtual);
2076                         RB_CLEAR_NODE(rb);
2077                         rb = rb_first_cached(&execlists->virtual);
2078                         continue;
2079                 }
2080
2081                 if (!virtual_matches(ve, rq, engine)) {
2082                         rb = rb_next(rb);
2083                         continue;
2084                 }
2085
2086                 break;
2087         }
2088
2089         /*
2090          * If the queue is higher priority than the last
2091          * request in the currently active context, submit afresh.
2092          * We will resubmit again afterwards in case we need to split
2093          * the active context to interject the preemption request,
2094          * i.e. we will retrigger preemption following the ack in case
2095          * of trouble.
2096          */
2097         active = READ_ONCE(execlists->active);
2098
2099         /*
2100          * In theory we can skip over completed contexts that have not
2101          * yet been processed by events (as those events are in flight):
2102          *
2103          * while ((last = *active) && i915_request_completed(last))
2104          *      active++;
2105          *
2106          * However, the GPU cannot handle this as it will ultimately
2107          * find itself trying to jump back into a context it has just
2108          * completed and barf.
2109          */
2110
2111         if ((last = *active)) {
2112                 if (need_preempt(engine, last, rb)) {
2113                         if (i915_request_completed(last)) {
2114                                 tasklet_hi_schedule(&execlists->tasklet);
2115                                 return;
2116                         }
2117
2118                         ENGINE_TRACE(engine,
2119                                      "preempting last=%llx:%lld, prio=%d, hint=%d\n",
2120                                      last->fence.context,
2121                                      last->fence.seqno,
2122                                      last->sched.attr.priority,
2123                                      execlists->queue_priority_hint);
2124                         record_preemption(execlists);
2125
2126                         /*
2127                          * Don't let the RING_HEAD advance past the breadcrumb
2128                          * as we unwind (and until we resubmit) so that we do
2129                          * not accidentally tell it to go backwards.
2130                          */
2131                         ring_set_paused(engine, 1);
2132
2133                         /*
2134                          * Note that we have not stopped the GPU at this point,
2135                          * so we are unwinding the incomplete requests as they
2136                          * remain inflight and so by the time we do complete
2137                          * the preemption, some of the unwound requests may
2138                          * complete!
2139                          */
2140                         __unwind_incomplete_requests(engine);
2141
2142                         last = NULL;
2143                 } else if (need_timeslice(engine, last) &&
2144                            timeslice_expired(execlists, last)) {
2145                         if (i915_request_completed(last)) {
2146                                 tasklet_hi_schedule(&execlists->tasklet);
2147                                 return;
2148                         }
2149
2150                         ENGINE_TRACE(engine,
2151                                      "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n",
2152                                      last->fence.context,
2153                                      last->fence.seqno,
2154                                      last->sched.attr.priority,
2155                                      execlists->queue_priority_hint,
2156                                      yesno(timeslice_yield(execlists, last)));
2157
2158                         ring_set_paused(engine, 1);
2159                         defer_active(engine);
2160
2161                         /*
2162                          * Unlike for preemption, if we rewind and continue
2163                          * executing the same context as previously active,
2164                          * the order of execution will remain the same and
2165                          * the tail will only advance. We do not need to
2166                          * force a full context restore, as a lite-restore
2167                          * is sufficient to resample the monotonic TAIL.
2168                          *
2169                          * If we switch to any other context, similarly we
2170                          * will not rewind TAIL of current context, and
2171                          * normal save/restore will preserve state and allow
2172                          * us to later continue executing the same request.
2173                          */
2174                         last = NULL;
2175                 } else {
2176                         /*
2177                          * Otherwise if we already have a request pending
2178                          * for execution after the current one, we can
2179                          * just wait until the next CS event before
2180                          * queuing more. In either case we will force a
2181                          * lite-restore preemption event, but if we wait
2182                          * we hopefully coalesce several updates into a single
2183                          * submission.
2184                          */
2185                         if (!list_is_last(&last->sched.link,
2186                                           &engine->active.requests)) {
2187                                 /*
2188                                  * Even if ELSP[1] is occupied and not worthy
2189                                  * of timeslices, our queue might be.
2190                                  */
2191                                 start_timeslice(engine);
2192                                 return;
2193                         }
2194                 }
2195         }
2196
2197         while (rb) { /* XXX virtual is always taking precedence */
2198                 struct virtual_engine *ve =
2199                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
2200                 struct i915_request *rq;
2201
2202                 spin_lock(&ve->base.active.lock);
2203
2204                 rq = ve->request;
2205                 if (unlikely(!rq)) { /* lost the race to a sibling */
2206                         spin_unlock(&ve->base.active.lock);
2207                         rb_erase_cached(rb, &execlists->virtual);
2208                         RB_CLEAR_NODE(rb);
2209                         rb = rb_first_cached(&execlists->virtual);
2210                         continue;
2211                 }
2212
2213                 GEM_BUG_ON(rq != ve->request);
2214                 GEM_BUG_ON(rq->engine != &ve->base);
2215                 GEM_BUG_ON(rq->context != &ve->context);
2216
2217                 if (rq_prio(rq) >= queue_prio(execlists)) {
2218                         if (!virtual_matches(ve, rq, engine)) {
2219                                 spin_unlock(&ve->base.active.lock);
2220                                 rb = rb_next(rb);
2221                                 continue;
2222                         }
2223
2224                         if (last && !can_merge_rq(last, rq)) {
2225                                 spin_unlock(&ve->base.active.lock);
2226                                 start_timeslice(engine);
2227                                 return; /* leave this for another sibling */
2228                         }
2229
2230                         ENGINE_TRACE(engine,
2231                                      "virtual rq=%llx:%lld%s, new engine? %s\n",
2232                                      rq->fence.context,
2233                                      rq->fence.seqno,
2234                                      i915_request_completed(rq) ? "!" :
2235                                      i915_request_started(rq) ? "*" :
2236                                      "",
2237                                      yesno(engine != ve->siblings[0]));
2238
2239                         WRITE_ONCE(ve->request, NULL);
2240                         WRITE_ONCE(ve->base.execlists.queue_priority_hint,
2241                                    INT_MIN);
2242                         rb_erase_cached(rb, &execlists->virtual);
2243                         RB_CLEAR_NODE(rb);
2244
2245                         GEM_BUG_ON(!(rq->execution_mask & engine->mask));
2246                         WRITE_ONCE(rq->engine, engine);
2247
2248                         if (engine != ve->siblings[0]) {
2249                                 u32 *regs = ve->context.lrc_reg_state;
2250                                 unsigned int n;
2251
2252                                 GEM_BUG_ON(READ_ONCE(ve->context.inflight));
2253
2254                                 if (!intel_engine_has_relative_mmio(engine))
2255                                         virtual_update_register_offsets(regs,
2256                                                                         engine);
2257
2258                                 if (!list_empty(&ve->context.signals))
2259                                         virtual_xfer_breadcrumbs(ve);
2260
2261                                 /*
2262                                  * Move the bound engine to the top of the list
2263                                  * for future execution. We then kick this
2264                                  * tasklet first before checking others, so that
2265                                  * we preferentially reuse this set of bound
2266                                  * registers.
2267                                  */
2268                                 for (n = 1; n < ve->num_siblings; n++) {
2269                                         if (ve->siblings[n] == engine) {
2270                                                 swap(ve->siblings[n],
2271                                                      ve->siblings[0]);
2272                                                 break;
2273                                         }
2274                                 }
2275
2276                                 GEM_BUG_ON(ve->siblings[0] != engine);
2277                         }
2278
2279                         if (__i915_request_submit(rq)) {
2280                                 submit = true;
2281                                 last = rq;
2282                         }
2283                         i915_request_put(rq);
2284
2285                         /*
2286                          * Hmm, we have a bunch of virtual engine requests,
2287                          * but the first one was already completed (thanks
2288                          * preempt-to-busy!). Keep looking at the veng queue
2289                          * until we have no more relevant requests (i.e.
2290                          * the normal submit queue has higher priority).
2291                          */
2292                         if (!submit) {
2293                                 spin_unlock(&ve->base.active.lock);
2294                                 rb = rb_first_cached(&execlists->virtual);
2295                                 continue;
2296                         }
2297                 }
2298
2299                 spin_unlock(&ve->base.active.lock);
2300                 break;
2301         }
2302
2303         while ((rb = rb_first_cached(&execlists->queue))) {
2304                 struct i915_priolist *p = to_priolist(rb);
2305                 struct i915_request *rq, *rn;
2306                 int i;
2307
2308                 priolist_for_each_request_consume(rq, rn, p, i) {
2309                         bool merge = true;
2310
2311                         /*
2312                          * Can we combine this request with the current port?
2313                          * It has to be the same context/ringbuffer and not
2314                          * have any exceptions (e.g. GVT saying never to
2315                          * combine contexts).
2316                          *
2317                          * If we can combine the requests, we can execute both
2318                          * by updating the RING_TAIL to point to the end of the
2319                          * second request, and so we never need to tell the
2320                          * hardware about the first.
2321                          */
2322                         if (last && !can_merge_rq(last, rq)) {
2323                                 /*
2324                                  * If we are on the second port and cannot
2325                                  * combine this request with the last, then we
2326                                  * are done.
2327                                  */
2328                                 if (port == last_port)
2329                                         goto done;
2330
2331                                 /*
2332                                  * We must not populate both ELSP[] with the
2333                                  * same LRCA, i.e. we must submit 2 different
2334                                  * contexts if we submit 2 ELSP.
2335                                  */
2336                                 if (last->context == rq->context)
2337                                         goto done;
2338
2339                                 if (i915_request_has_sentinel(last))
2340                                         goto done;
2341
2342                                 /*
2343                                  * If GVT overrides us we only ever submit
2344                                  * port[0], leaving port[1] empty. Note that we
2345                                  * also have to be careful that we don't queue
2346                                  * the same context (even though a different
2347                                  * request) to the second port.
2348                                  */
2349                                 if (ctx_single_port_submission(last->context) ||
2350                                     ctx_single_port_submission(rq->context))
2351                                         goto done;
2352
2353                                 merge = false;
2354                         }
2355
2356                         if (__i915_request_submit(rq)) {
2357                                 if (!merge) {
2358                                         *port = execlists_schedule_in(last, port - execlists->pending);
2359                                         port++;
2360                                         last = NULL;
2361                                 }
2362
2363                                 GEM_BUG_ON(last &&
2364                                            !can_merge_ctx(last->context,
2365                                                           rq->context));
2366                                 GEM_BUG_ON(last &&
2367                                            i915_seqno_passed(last->fence.seqno,
2368                                                              rq->fence.seqno));
2369
2370                                 submit = true;
2371                                 last = rq;
2372                         }
2373                 }
2374
2375                 rb_erase_cached(&p->node, &execlists->queue);
2376                 i915_priolist_free(p);
2377         }
2378
2379 done:
2380         /*
2381          * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
2382          *
2383          * We choose the priority hint such that if we add a request of greater
2384          * priority than this, we kick the submission tasklet to decide on
2385          * the right order of submitting the requests to hardware. We must
2386          * also be prepared to reorder requests as they are in-flight on the
2387          * HW. We derive the priority hint then as the first "hole" in
2388          * the HW submission ports and if there are no available slots,
2389          * the priority of the lowest executing request, i.e. last.
2390          *
2391          * When we do receive a higher priority request ready to run from the
2392          * user, see queue_request(), the priority hint is bumped to that
2393          * request triggering preemption on the next dequeue (or subsequent
2394          * interrupt for secondary ports).
2395          */
2396         execlists->queue_priority_hint = queue_prio(execlists);
2397
2398         if (submit) {
2399                 *port = execlists_schedule_in(last, port - execlists->pending);
2400                 execlists->switch_priority_hint =
2401                         switch_prio(engine, *execlists->pending);
2402
2403                 /*
2404                  * Skip if we ended up with exactly the same set of requests,
2405                  * e.g. trying to timeslice a pair of ordered contexts
2406                  */
2407                 if (!memcmp(active, execlists->pending,
2408                             (port - execlists->pending + 1) * sizeof(*port))) {
2409                         do
2410                                 execlists_schedule_out(fetch_and_zero(port));
2411                         while (port-- != execlists->pending);
2412
2413                         goto skip_submit;
2414                 }
2415                 clear_ports(port + 1, last_port - port);
2416
2417                 WRITE_ONCE(execlists->yield, -1);
2418                 set_preempt_timeout(engine, *active);
2419                 execlists_submit_ports(engine);
2420         } else {
2421 skip_submit:
2422                 ring_set_paused(engine, 0);
2423         }
2424 }
2425
2426 static void
2427 cancel_port_requests(struct intel_engine_execlists * const execlists)
2428 {
2429         struct i915_request * const *port;
2430
2431         for (port = execlists->pending; *port; port++)
2432                 execlists_schedule_out(*port);
2433         clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending));
2434
2435         /* Mark the end of active before we overwrite *active */
2436         for (port = xchg(&execlists->active, execlists->pending); *port; port++)
2437                 execlists_schedule_out(*port);
2438         clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight));
2439
2440         smp_wmb(); /* complete the seqlock for execlists_active() */
2441         WRITE_ONCE(execlists->active, execlists->inflight);
2442 }
2443
2444 static inline void
2445 invalidate_csb_entries(const u32 *first, const u32 *last)
2446 {
2447         clflush((void *)first);
2448         clflush((void *)last);
2449 }
2450
2451 /*
2452  * Starting with Gen12, the status has a new format:
2453  *
2454  *     bit  0:     switched to new queue
2455  *     bit  1:     reserved
2456  *     bit  2:     semaphore wait mode (poll or signal), only valid when
2457  *                 switch detail is set to "wait on semaphore"
2458  *     bits 3-5:   engine class
2459  *     bits 6-11:  engine instance
2460  *     bits 12-14: reserved
2461  *     bits 15-25: sw context id of the lrc the GT switched to
2462  *     bits 26-31: sw counter of the lrc the GT switched to
2463  *     bits 32-35: context switch detail
2464  *                  - 0: ctx complete
2465  *                  - 1: wait on sync flip
2466  *                  - 2: wait on vblank
2467  *                  - 3: wait on scanline
2468  *                  - 4: wait on semaphore
2469  *                  - 5: context preempted (not on SEMAPHORE_WAIT or
2470  *                       WAIT_FOR_EVENT)
2471  *     bit  36:    reserved
2472  *     bits 37-43: wait detail (for switch detail 1 to 4)
2473  *     bits 44-46: reserved
2474  *     bits 47-57: sw context id of the lrc the GT switched away from
2475  *     bits 58-63: sw counter of the lrc the GT switched away from
2476  */
2477 static inline bool
2478 gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
2479 {
2480         u32 lower_dw = csb[0];
2481         u32 upper_dw = csb[1];
2482         bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw);
2483         bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
2484         bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
2485
2486         /*
2487          * The context switch detail is not guaranteed to be 5 when a preemption
2488          * occurs, so we can't just check for that. The check below works for
2489          * all the cases we care about, including preemptions of WAIT
2490          * instructions and lite-restore. Preempt-to-idle via the CTRL register
2491          * would require some extra handling, but we don't support that.
2492          */
2493         if (!ctx_away_valid || new_queue) {
2494                 GEM_BUG_ON(!ctx_to_valid);
2495                 return true;
2496         }
2497
2498         /*
2499          * switch detail = 5 is covered by the case above and we do not expect a
2500          * context switch on an unsuccessful wait instruction since we always
2501          * use polling mode.
2502          */
2503         GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
2504         return false;
2505 }
2506
2507 static inline bool
2508 gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
2509 {
2510         return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
2511 }
2512
2513 static void process_csb(struct intel_engine_cs *engine)
2514 {
2515         struct intel_engine_execlists * const execlists = &engine->execlists;
2516         const u32 * const buf = execlists->csb_status;
2517         const u8 num_entries = execlists->csb_size;
2518         u8 head, tail;
2519
2520         /*
2521          * As we modify our execlists state tracking we require exclusive
2522          * access. Either we are inside the tasklet, or the tasklet is disabled
2523          * and we assume that is only inside the reset paths and so serialised.
2524          */
2525         GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) &&
2526                    !reset_in_progress(execlists));
2527         GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine));
2528
2529         /*
2530          * Note that csb_write, csb_status may be either in HWSP or mmio.
2531          * When reading from the csb_write mmio register, we have to be
2532          * careful to only use the GEN8_CSB_WRITE_PTR portion, which is
2533          * the low 4bits. As it happens we know the next 4bits are always
2534          * zero and so we can simply masked off the low u8 of the register
2535          * and treat it identically to reading from the HWSP (without having
2536          * to use explicit shifting and masking, and probably bifurcating
2537          * the code to handle the legacy mmio read).
2538          */
2539         head = execlists->csb_head;
2540         tail = READ_ONCE(*execlists->csb_write);
2541         if (unlikely(head == tail))
2542                 return;
2543
2544         /*
2545          * Hopefully paired with a wmb() in HW!
2546          *
2547          * We must complete the read of the write pointer before any reads
2548          * from the CSB, so that we do not see stale values. Without an rmb
2549          * (lfence) the HW may speculatively perform the CSB[] reads *before*
2550          * we perform the READ_ONCE(*csb_write).
2551          */
2552         rmb();
2553
2554         ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
2555         do {
2556                 bool promote;
2557
2558                 if (++head == num_entries)
2559                         head = 0;
2560
2561                 /*
2562                  * We are flying near dragons again.
2563                  *
2564                  * We hold a reference to the request in execlist_port[]
2565                  * but no more than that. We are operating in softirq
2566                  * context and so cannot hold any mutex or sleep. That
2567                  * prevents us stopping the requests we are processing
2568                  * in port[] from being retired simultaneously (the
2569                  * breadcrumb will be complete before we see the
2570                  * context-switch). As we only hold the reference to the
2571                  * request, any pointer chasing underneath the request
2572                  * is subject to a potential use-after-free. Thus we
2573                  * store all of the bookkeeping within port[] as
2574                  * required, and avoid using unguarded pointers beneath
2575                  * request itself. The same applies to the atomic
2576                  * status notifier.
2577                  */
2578
2579                 ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
2580                              head, buf[2 * head + 0], buf[2 * head + 1]);
2581
2582                 if (INTEL_GEN(engine->i915) >= 12)
2583                         promote = gen12_csb_parse(execlists, buf + 2 * head);
2584                 else
2585                         promote = gen8_csb_parse(execlists, buf + 2 * head);
2586                 if (promote) {
2587                         struct i915_request * const *old = execlists->active;
2588
2589                         ring_set_paused(engine, 0);
2590
2591                         /* Point active to the new ELSP; prevent overwriting */
2592                         WRITE_ONCE(execlists->active, execlists->pending);
2593                         smp_wmb(); /* notify execlists_active() */
2594
2595                         /* cancel old inflight, prepare for switch */
2596                         trace_ports(execlists, "preempted", old);
2597                         while (*old)
2598                                 execlists_schedule_out(*old++);
2599
2600                         /* switch pending to inflight */
2601                         GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
2602                         memcpy(execlists->inflight,
2603                                execlists->pending,
2604                                execlists_num_ports(execlists) *
2605                                sizeof(*execlists->pending));
2606                         smp_wmb(); /* complete the seqlock */
2607                         WRITE_ONCE(execlists->active, execlists->inflight);
2608
2609                         WRITE_ONCE(execlists->pending[0], NULL);
2610                 } else {
2611                         GEM_BUG_ON(!*execlists->active);
2612
2613                         /* port0 completed, advanced to port1 */
2614                         trace_ports(execlists, "completed", execlists->active);
2615
2616                         /*
2617                          * We rely on the hardware being strongly
2618                          * ordered, that the breadcrumb write is
2619                          * coherent (visible from the CPU) before the
2620                          * user interrupt is processed. One might assume
2621                          * that the breadcrumb write being before the
2622                          * user interrupt and the CS event for the context
2623                          * switch would therefore be before the CS event
2624                          * itself...
2625                          */
2626                         if (GEM_SHOW_DEBUG() &&
2627                             !i915_request_completed(*execlists->active)) {
2628                                 struct i915_request *rq = *execlists->active;
2629                                 const u32 *regs __maybe_unused =
2630                                         rq->context->lrc_reg_state;
2631
2632                                 ENGINE_TRACE(engine,
2633                                              "context completed before request!\n");
2634                                 ENGINE_TRACE(engine,
2635                                              "ring:{start:0x%08x, head:%04x, tail:%04x, ctl:%08x, mode:%08x}\n",
2636                                              ENGINE_READ(engine, RING_START),
2637                                              ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR,
2638                                              ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR,
2639                                              ENGINE_READ(engine, RING_CTL),
2640                                              ENGINE_READ(engine, RING_MI_MODE));
2641                                 ENGINE_TRACE(engine,
2642                                              "rq:{start:%08x, head:%04x, tail:%04x, seqno:%llx:%d, hwsp:%d}, ",
2643                                              i915_ggtt_offset(rq->ring->vma),
2644                                              rq->head, rq->tail,
2645                                              rq->fence.context,
2646                                              lower_32_bits(rq->fence.seqno),
2647                                              hwsp_seqno(rq));
2648                                 ENGINE_TRACE(engine,
2649                                              "ctx:{start:%08x, head:%04x, tail:%04x}, ",
2650                                              regs[CTX_RING_START],
2651                                              regs[CTX_RING_HEAD],
2652                                              regs[CTX_RING_TAIL]);
2653                         }
2654
2655                         execlists_schedule_out(*execlists->active++);
2656
2657                         GEM_BUG_ON(execlists->active - execlists->inflight >
2658                                    execlists_num_ports(execlists));
2659                 }
2660         } while (head != tail);
2661
2662         execlists->csb_head = head;
2663         set_timeslice(engine);
2664
2665         /*
2666          * Gen11 has proven to fail wrt global observation point between
2667          * entry and tail update, failing on the ordering and thus
2668          * we see an old entry in the context status buffer.
2669          *
2670          * Forcibly evict out entries for the next gpu csb update,
2671          * to increase the odds that we get a fresh entries with non
2672          * working hardware. The cost for doing so comes out mostly with
2673          * the wash as hardware, working or not, will need to do the
2674          * invalidation before.
2675          */
2676         invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
2677 }
2678
2679 static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
2680 {
2681         lockdep_assert_held(&engine->active.lock);
2682         if (!READ_ONCE(engine->execlists.pending[0])) {
2683                 rcu_read_lock(); /* protect peeking at execlists->active */
2684                 execlists_dequeue(engine);
2685                 rcu_read_unlock();
2686         }
2687 }
2688
2689 static void __execlists_hold(struct i915_request *rq)
2690 {
2691         LIST_HEAD(list);
2692
2693         do {
2694                 struct i915_dependency *p;
2695
2696                 if (i915_request_is_active(rq))
2697                         __i915_request_unsubmit(rq);
2698
2699                 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2700                 list_move_tail(&rq->sched.link, &rq->engine->active.hold);
2701                 i915_request_set_hold(rq);
2702                 RQ_TRACE(rq, "on hold\n");
2703
2704                 for_each_waiter(p, rq) {
2705                         struct i915_request *w =
2706                                 container_of(p->waiter, typeof(*w), sched);
2707
2708                         /* Leave semaphores spinning on the other engines */
2709                         if (w->engine != rq->engine)
2710                                 continue;
2711
2712                         if (!i915_request_is_ready(w))
2713                                 continue;
2714
2715                         if (i915_request_completed(w))
2716                                 continue;
2717
2718                         if (i915_request_on_hold(w))
2719                                 continue;
2720
2721                         list_move_tail(&w->sched.link, &list);
2722                 }
2723
2724                 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
2725         } while (rq);
2726 }
2727
2728 static bool execlists_hold(struct intel_engine_cs *engine,
2729                            struct i915_request *rq)
2730 {
2731         spin_lock_irq(&engine->active.lock);
2732
2733         if (i915_request_completed(rq)) { /* too late! */
2734                 rq = NULL;
2735                 goto unlock;
2736         }
2737
2738         if (rq->engine != engine) { /* preempted virtual engine */
2739                 struct virtual_engine *ve = to_virtual_engine(rq->engine);
2740
2741                 /*
2742                  * intel_context_inflight() is only protected by virtue
2743                  * of process_csb() being called only by the tasklet (or
2744                  * directly from inside reset while the tasklet is suspended).
2745                  * Assert that neither of those are allowed to run while we
2746                  * poke at the request queues.
2747                  */
2748                 GEM_BUG_ON(!reset_in_progress(&engine->execlists));
2749
2750                 /*
2751                  * An unsubmitted request along a virtual engine will
2752                  * remain on the active (this) engine until we are able
2753                  * to process the context switch away (and so mark the
2754                  * context as no longer in flight). That cannot have happened
2755                  * yet, otherwise we would not be hanging!
2756                  */
2757                 spin_lock(&ve->base.active.lock);
2758                 GEM_BUG_ON(intel_context_inflight(rq->context) != engine);
2759                 GEM_BUG_ON(ve->request != rq);
2760                 ve->request = NULL;
2761                 spin_unlock(&ve->base.active.lock);
2762                 i915_request_put(rq);
2763
2764                 rq->engine = engine;
2765         }
2766
2767         /*
2768          * Transfer this request onto the hold queue to prevent it
2769          * being resumbitted to HW (and potentially completed) before we have
2770          * released it. Since we may have already submitted following
2771          * requests, we need to remove those as well.
2772          */
2773         GEM_BUG_ON(i915_request_on_hold(rq));
2774         GEM_BUG_ON(rq->engine != engine);
2775         __execlists_hold(rq);
2776         GEM_BUG_ON(list_empty(&engine->active.hold));
2777
2778 unlock:
2779         spin_unlock_irq(&engine->active.lock);
2780         return rq;
2781 }
2782
2783 static bool hold_request(const struct i915_request *rq)
2784 {
2785         struct i915_dependency *p;
2786         bool result = false;
2787
2788         /*
2789          * If one of our ancestors is on hold, we must also be on hold,
2790          * otherwise we will bypass it and execute before it.
2791          */
2792         rcu_read_lock();
2793         for_each_signaler(p, rq) {
2794                 const struct i915_request *s =
2795                         container_of(p->signaler, typeof(*s), sched);
2796
2797                 if (s->engine != rq->engine)
2798                         continue;
2799
2800                 result = i915_request_on_hold(s);
2801                 if (result)
2802                         break;
2803         }
2804         rcu_read_unlock();
2805
2806         return result;
2807 }
2808
2809 static void __execlists_unhold(struct i915_request *rq)
2810 {
2811         LIST_HEAD(list);
2812
2813         do {
2814                 struct i915_dependency *p;
2815
2816                 RQ_TRACE(rq, "hold release\n");
2817
2818                 GEM_BUG_ON(!i915_request_on_hold(rq));
2819                 GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
2820
2821                 i915_request_clear_hold(rq);
2822                 list_move_tail(&rq->sched.link,
2823                                i915_sched_lookup_priolist(rq->engine,
2824                                                           rq_prio(rq)));
2825                 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2826
2827                 /* Also release any children on this engine that are ready */
2828                 for_each_waiter(p, rq) {
2829                         struct i915_request *w =
2830                                 container_of(p->waiter, typeof(*w), sched);
2831
2832                         /* Propagate any change in error status */
2833                         if (rq->fence.error)
2834                                 i915_request_set_error_once(w, rq->fence.error);
2835
2836                         if (w->engine != rq->engine)
2837                                 continue;
2838
2839                         if (!i915_request_on_hold(w))
2840                                 continue;
2841
2842                         /* Check that no other parents are also on hold */
2843                         if (hold_request(w))
2844                                 continue;
2845
2846                         list_move_tail(&w->sched.link, &list);
2847                 }
2848
2849                 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
2850         } while (rq);
2851 }
2852
2853 static void execlists_unhold(struct intel_engine_cs *engine,
2854                              struct i915_request *rq)
2855 {
2856         spin_lock_irq(&engine->active.lock);
2857
2858         /*
2859          * Move this request back to the priority queue, and all of its
2860          * children and grandchildren that were suspended along with it.
2861          */
2862         __execlists_unhold(rq);
2863
2864         if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
2865                 engine->execlists.queue_priority_hint = rq_prio(rq);
2866                 tasklet_hi_schedule(&engine->execlists.tasklet);
2867         }
2868
2869         spin_unlock_irq(&engine->active.lock);
2870 }
2871
2872 struct execlists_capture {
2873         struct work_struct work;
2874         struct i915_request *rq;
2875         struct i915_gpu_coredump *error;
2876 };
2877
2878 static void execlists_capture_work(struct work_struct *work)
2879 {
2880         struct execlists_capture *cap = container_of(work, typeof(*cap), work);
2881         const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
2882         struct intel_engine_cs *engine = cap->rq->engine;
2883         struct intel_gt_coredump *gt = cap->error->gt;
2884         struct intel_engine_capture_vma *vma;
2885
2886         /* Compress all the objects attached to the request, slow! */
2887         vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
2888         if (vma) {
2889                 struct i915_vma_compress *compress =
2890                         i915_vma_capture_prepare(gt);
2891
2892                 intel_engine_coredump_add_vma(gt->engine, vma, compress);
2893                 i915_vma_capture_finish(gt, compress);
2894         }
2895
2896         gt->simulated = gt->engine->simulated;
2897         cap->error->simulated = gt->simulated;
2898
2899         /* Publish the error state, and announce it to the world */
2900         i915_error_state_store(cap->error);
2901         i915_gpu_coredump_put(cap->error);
2902
2903         /* Return this request and all that depend upon it for signaling */
2904         execlists_unhold(engine, cap->rq);
2905         i915_request_put(cap->rq);
2906
2907         kfree(cap);
2908 }
2909
2910 static struct execlists_capture *capture_regs(struct intel_engine_cs *engine)
2911 {
2912         const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
2913         struct execlists_capture *cap;
2914
2915         cap = kmalloc(sizeof(*cap), gfp);
2916         if (!cap)
2917                 return NULL;
2918
2919         cap->error = i915_gpu_coredump_alloc(engine->i915, gfp);
2920         if (!cap->error)
2921                 goto err_cap;
2922
2923         cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp);
2924         if (!cap->error->gt)
2925                 goto err_gpu;
2926
2927         cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp);
2928         if (!cap->error->gt->engine)
2929                 goto err_gt;
2930
2931         return cap;
2932
2933 err_gt:
2934         kfree(cap->error->gt);
2935 err_gpu:
2936         kfree(cap->error);
2937 err_cap:
2938         kfree(cap);
2939         return NULL;
2940 }
2941
2942 static struct i915_request *
2943 active_context(struct intel_engine_cs *engine, u32 ccid)
2944 {
2945         const struct intel_engine_execlists * const el = &engine->execlists;
2946         struct i915_request * const *port, *rq;
2947
2948         /*
2949          * Use the most recent result from process_csb(), but just in case
2950          * we trigger an error (via interrupt) before the first CS event has
2951          * been written, peek at the next submission.
2952          */
2953
2954         for (port = el->active; (rq = *port); port++) {
2955                 if (rq->context->lrc.ccid == ccid) {
2956                         ENGINE_TRACE(engine,
2957                                      "ccid found at active:%zd\n",
2958                                      port - el->active);
2959                         return rq;
2960                 }
2961         }
2962
2963         for (port = el->pending; (rq = *port); port++) {
2964                 if (rq->context->lrc.ccid == ccid) {
2965                         ENGINE_TRACE(engine,
2966                                      "ccid found at pending:%zd\n",
2967                                      port - el->pending);
2968                         return rq;
2969                 }
2970         }
2971
2972         ENGINE_TRACE(engine, "ccid:%x not found\n", ccid);
2973         return NULL;
2974 }
2975
2976 static u32 active_ccid(struct intel_engine_cs *engine)
2977 {
2978         return ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI);
2979 }
2980
2981 static bool execlists_capture(struct intel_engine_cs *engine)
2982 {
2983         struct execlists_capture *cap;
2984
2985         if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR))
2986                 return true;
2987
2988         /*
2989          * We need to _quickly_ capture the engine state before we reset.
2990          * We are inside an atomic section (softirq) here and we are delaying
2991          * the forced preemption event.
2992          */
2993         cap = capture_regs(engine);
2994         if (!cap)
2995                 return true;
2996
2997         spin_lock_irq(&engine->active.lock);
2998         cap->rq = active_context(engine, active_ccid(engine));
2999         if (cap->rq) {
3000                 cap->rq = active_request(cap->rq->context->timeline, cap->rq);
3001                 cap->rq = i915_request_get_rcu(cap->rq);
3002         }
3003         spin_unlock_irq(&engine->active.lock);
3004         if (!cap->rq)
3005                 goto err_free;
3006
3007         /*
3008          * Remove the request from the execlists queue, and take ownership
3009          * of the request. We pass it to our worker who will _slowly_ compress
3010          * all the pages the _user_ requested for debugging their batch, after
3011          * which we return it to the queue for signaling.
3012          *
3013          * By removing them from the execlists queue, we also remove the
3014          * requests from being processed by __unwind_incomplete_requests()
3015          * during the intel_engine_reset(), and so they will *not* be replayed
3016          * afterwards.
3017          *
3018          * Note that because we have not yet reset the engine at this point,
3019          * it is possible for the request that we have identified as being
3020          * guilty, did in fact complete and we will then hit an arbitration
3021          * point allowing the outstanding preemption to succeed. The likelihood
3022          * of that is very low (as capturing of the engine registers should be
3023          * fast enough to run inside an irq-off atomic section!), so we will
3024          * simply hold that request accountable for being non-preemptible
3025          * long enough to force the reset.
3026          */
3027         if (!execlists_hold(engine, cap->rq))
3028                 goto err_rq;
3029
3030         INIT_WORK(&cap->work, execlists_capture_work);
3031         schedule_work(&cap->work);
3032         return true;
3033
3034 err_rq:
3035         i915_request_put(cap->rq);
3036 err_free:
3037         i915_gpu_coredump_put(cap->error);
3038         kfree(cap);
3039         return false;
3040 }
3041
3042 static void execlists_reset(struct intel_engine_cs *engine, const char *msg)
3043 {
3044         const unsigned int bit = I915_RESET_ENGINE + engine->id;
3045         unsigned long *lock = &engine->gt->reset.flags;
3046
3047         if (!intel_has_reset_engine(engine->gt))
3048                 return;
3049
3050         if (test_and_set_bit(bit, lock))
3051                 return;
3052
3053         ENGINE_TRACE(engine, "reset for %s\n", msg);
3054
3055         /* Mark this tasklet as disabled to avoid waiting for it to complete */
3056         tasklet_disable_nosync(&engine->execlists.tasklet);
3057
3058         ring_set_paused(engine, 1); /* Freeze the current request in place */
3059         if (execlists_capture(engine))
3060                 intel_engine_reset(engine, msg);
3061         else
3062                 ring_set_paused(engine, 0);
3063
3064         tasklet_enable(&engine->execlists.tasklet);
3065         clear_and_wake_up_bit(bit, lock);
3066 }
3067
3068 static bool preempt_timeout(const struct intel_engine_cs *const engine)
3069 {
3070         const struct timer_list *t = &engine->execlists.preempt;
3071
3072         if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
3073                 return false;
3074
3075         if (!timer_expired(t))
3076                 return false;
3077
3078         return READ_ONCE(engine->execlists.pending[0]);
3079 }
3080
3081 /*
3082  * Check the unread Context Status Buffers and manage the submission of new
3083  * contexts to the ELSP accordingly.
3084  */
3085 static void execlists_submission_tasklet(unsigned long data)
3086 {
3087         struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
3088         bool timeout = preempt_timeout(engine);
3089
3090         process_csb(engine);
3091
3092         if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) {
3093                 engine->execlists.error_interrupt = 0;
3094                 if (ENGINE_READ(engine, RING_ESR)) /* confirm the error */
3095                         execlists_reset(engine, "CS error");
3096         }
3097
3098         if (!READ_ONCE(engine->execlists.pending[0]) || timeout) {
3099                 unsigned long flags;
3100
3101                 spin_lock_irqsave(&engine->active.lock, flags);
3102                 __execlists_submission_tasklet(engine);
3103                 spin_unlock_irqrestore(&engine->active.lock, flags);
3104
3105                 /* Recheck after serialising with direct-submission */
3106                 if (unlikely(timeout && preempt_timeout(engine)))
3107                         execlists_reset(engine, "preemption time out");
3108         }
3109 }
3110
3111 static void __execlists_kick(struct intel_engine_execlists *execlists)
3112 {
3113         /* Kick the tasklet for some interrupt coalescing and reset handling */
3114         tasklet_hi_schedule(&execlists->tasklet);
3115 }
3116
3117 #define execlists_kick(t, member) \
3118         __execlists_kick(container_of(t, struct intel_engine_execlists, member))
3119
3120 static void execlists_timeslice(struct timer_list *timer)
3121 {
3122         execlists_kick(timer, timer);
3123 }
3124
3125 static void execlists_preempt(struct timer_list *timer)
3126 {
3127         execlists_kick(timer, preempt);
3128 }
3129
3130 static void queue_request(struct intel_engine_cs *engine,
3131                           struct i915_request *rq)
3132 {
3133         GEM_BUG_ON(!list_empty(&rq->sched.link));
3134         list_add_tail(&rq->sched.link,
3135                       i915_sched_lookup_priolist(engine, rq_prio(rq)));
3136         set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
3137 }
3138
3139 static void __submit_queue_imm(struct intel_engine_cs *engine)
3140 {
3141         struct intel_engine_execlists * const execlists = &engine->execlists;
3142
3143         if (reset_in_progress(execlists))
3144                 return; /* defer until we restart the engine following reset */
3145
3146         /* Hopefully we clear execlists->pending[] to let us through */
3147         if (READ_ONCE(execlists->pending[0]) &&
3148             tasklet_trylock(&execlists->tasklet)) {
3149                 process_csb(engine);
3150                 tasklet_unlock(&execlists->tasklet);
3151         }
3152
3153         __execlists_submission_tasklet(engine);
3154 }
3155
3156 static void submit_queue(struct intel_engine_cs *engine,
3157                          const struct i915_request *rq)
3158 {
3159         struct intel_engine_execlists *execlists = &engine->execlists;
3160
3161         if (rq_prio(rq) <= execlists->queue_priority_hint)
3162                 return;
3163
3164         execlists->queue_priority_hint = rq_prio(rq);
3165         __submit_queue_imm(engine);
3166 }
3167
3168 static bool ancestor_on_hold(const struct intel_engine_cs *engine,
3169                              const struct i915_request *rq)
3170 {
3171         GEM_BUG_ON(i915_request_on_hold(rq));
3172         return !list_empty(&engine->active.hold) && hold_request(rq);
3173 }
3174
3175 static void execlists_submit_request(struct i915_request *request)
3176 {
3177         struct intel_engine_cs *engine = request->engine;
3178         unsigned long flags;
3179
3180         /* Will be called from irq-context when using foreign fences. */
3181         spin_lock_irqsave(&engine->active.lock, flags);
3182
3183         if (unlikely(ancestor_on_hold(engine, request))) {
3184                 RQ_TRACE(request, "ancestor on hold\n");
3185                 list_add_tail(&request->sched.link, &engine->active.hold);
3186                 i915_request_set_hold(request);
3187         } else {
3188                 queue_request(engine, request);
3189
3190                 GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
3191                 GEM_BUG_ON(list_empty(&request->sched.link));
3192
3193                 submit_queue(engine, request);
3194         }
3195
3196         spin_unlock_irqrestore(&engine->active.lock, flags);
3197 }
3198
3199 static void __execlists_context_fini(struct intel_context *ce)
3200 {
3201         intel_ring_put(ce->ring);
3202         i915_vma_put(ce->state);
3203 }
3204
3205 static void execlists_context_destroy(struct kref *kref)
3206 {
3207         struct intel_context *ce = container_of(kref, typeof(*ce), ref);
3208
3209         GEM_BUG_ON(!i915_active_is_idle(&ce->active));
3210         GEM_BUG_ON(intel_context_is_pinned(ce));
3211
3212         if (ce->state)
3213                 __execlists_context_fini(ce);
3214
3215         intel_context_fini(ce);
3216         intel_context_free(ce);
3217 }
3218
3219 static void
3220 set_redzone(void *vaddr, const struct intel_engine_cs *engine)
3221 {
3222         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
3223                 return;
3224
3225         vaddr += engine->context_size;
3226
3227         memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
3228 }
3229
3230 static void
3231 check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
3232 {
3233         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
3234                 return;
3235
3236         vaddr += engine->context_size;
3237
3238         if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
3239                 drm_err_once(&engine->i915->drm,
3240                              "%s context redzone overwritten!\n",
3241                              engine->name);
3242 }
3243
3244 static void execlists_context_unpin(struct intel_context *ce)
3245 {
3246         check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
3247                       ce->engine);
3248
3249         i915_gem_object_unpin_map(ce->state->obj);
3250 }
3251
3252 static u32 *
3253 gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs)
3254 {
3255         *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
3256                 MI_SRM_LRM_GLOBAL_GTT |
3257                 MI_LRI_LRM_CS_MMIO;
3258         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3259         *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
3260                 CTX_TIMESTAMP * sizeof(u32);
3261         *cs++ = 0;
3262
3263         *cs++ = MI_LOAD_REGISTER_REG |
3264                 MI_LRR_SOURCE_CS_MMIO |
3265                 MI_LRI_LRM_CS_MMIO;
3266         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3267         *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
3268
3269         *cs++ = MI_LOAD_REGISTER_REG |
3270                 MI_LRR_SOURCE_CS_MMIO |
3271                 MI_LRI_LRM_CS_MMIO;
3272         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3273         *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
3274
3275         return cs;
3276 }
3277
3278 static u32 *
3279 gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs)
3280 {
3281         GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1);
3282
3283         *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
3284                 MI_SRM_LRM_GLOBAL_GTT |
3285                 MI_LRI_LRM_CS_MMIO;
3286         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3287         *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
3288                 (lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32);
3289         *cs++ = 0;
3290
3291         return cs;
3292 }
3293
3294 static u32 *
3295 gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
3296 {
3297         GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1);
3298
3299         *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
3300                 MI_SRM_LRM_GLOBAL_GTT |
3301                 MI_LRI_LRM_CS_MMIO;
3302         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3303         *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
3304                 (lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32);
3305         *cs++ = 0;
3306
3307         *cs++ = MI_LOAD_REGISTER_REG |
3308                 MI_LRR_SOURCE_CS_MMIO |
3309                 MI_LRI_LRM_CS_MMIO;
3310         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3311         *cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0));
3312
3313         return cs;
3314 }
3315
3316 static u32 *
3317 gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
3318 {
3319         cs = gen12_emit_timestamp_wa(ce, cs);
3320         cs = gen12_emit_cmd_buf_wa(ce, cs);
3321         cs = gen12_emit_restore_scratch(ce, cs);
3322
3323         return cs;
3324 }
3325
3326 static u32 *
3327 gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
3328 {
3329         cs = gen12_emit_timestamp_wa(ce, cs);
3330         cs = gen12_emit_restore_scratch(ce, cs);
3331
3332         return cs;
3333 }
3334
3335 static inline u32 context_wa_bb_offset(const struct intel_context *ce)
3336 {
3337         return PAGE_SIZE * ce->wa_bb_page;
3338 }
3339
3340 static u32 *context_indirect_bb(const struct intel_context *ce)
3341 {
3342         void *ptr;
3343
3344         GEM_BUG_ON(!ce->wa_bb_page);
3345
3346         ptr = ce->lrc_reg_state;
3347         ptr -= LRC_STATE_OFFSET; /* back to start of context image */
3348         ptr += context_wa_bb_offset(ce);
3349
3350         return ptr;
3351 }
3352
3353 static void
3354 setup_indirect_ctx_bb(const struct intel_context *ce,
3355                       const struct intel_engine_cs *engine,
3356                       u32 *(*emit)(const struct intel_context *, u32 *))
3357 {
3358         u32 * const start = context_indirect_bb(ce);
3359         u32 *cs;
3360
3361         cs = emit(ce, start);
3362         GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
3363         while ((unsigned long)cs % CACHELINE_BYTES)
3364                 *cs++ = MI_NOOP;
3365
3366         lrc_ring_setup_indirect_ctx(ce->lrc_reg_state, engine,
3367                                     i915_ggtt_offset(ce->state) +
3368                                     context_wa_bb_offset(ce),
3369                                     (cs - start) * sizeof(*cs));
3370 }
3371
3372 static void
3373 __execlists_update_reg_state(const struct intel_context *ce,
3374                              const struct intel_engine_cs *engine,
3375                              u32 head)
3376 {
3377         struct intel_ring *ring = ce->ring;
3378         u32 *regs = ce->lrc_reg_state;
3379
3380         GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
3381         GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
3382
3383         regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
3384         regs[CTX_RING_HEAD] = head;
3385         regs[CTX_RING_TAIL] = ring->tail;
3386         regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
3387
3388         /* RPCS */
3389         if (engine->class == RENDER_CLASS) {
3390                 regs[CTX_R_PWR_CLK_STATE] =
3391                         intel_sseu_make_rpcs(engine->i915, &ce->sseu);
3392
3393                 i915_oa_init_reg_state(ce, engine);
3394         }
3395
3396         if (ce->wa_bb_page) {
3397                 u32 *(*fn)(const struct intel_context *ce, u32 *cs);
3398
3399                 fn = gen12_emit_indirect_ctx_xcs;
3400                 if (ce->engine->class == RENDER_CLASS)
3401                         fn = gen12_emit_indirect_ctx_rcs;
3402
3403                 /* Mutually exclusive wrt to global indirect bb */
3404                 GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
3405                 setup_indirect_ctx_bb(ce, engine, fn);
3406         }
3407 }
3408
3409 static int
3410 __execlists_context_pin(struct intel_context *ce,
3411                         struct intel_engine_cs *engine)
3412 {
3413         void *vaddr;
3414
3415         GEM_BUG_ON(!ce->state);
3416         GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
3417
3418         vaddr = i915_gem_object_pin_map(ce->state->obj,
3419                                         i915_coherent_map_type(engine->i915) |
3420                                         I915_MAP_OVERRIDE);
3421         if (IS_ERR(vaddr))
3422                 return PTR_ERR(vaddr);
3423
3424         ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
3425         ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
3426         __execlists_update_reg_state(ce, engine, ce->ring->tail);
3427
3428         return 0;
3429 }
3430
3431 static int execlists_context_pin(struct intel_context *ce)
3432 {
3433         return __execlists_context_pin(ce, ce->engine);
3434 }
3435
3436 static int execlists_context_alloc(struct intel_context *ce)
3437 {
3438         return __execlists_context_alloc(ce, ce->engine);
3439 }
3440
3441 static void execlists_context_reset(struct intel_context *ce)
3442 {
3443         CE_TRACE(ce, "reset\n");
3444         GEM_BUG_ON(!intel_context_is_pinned(ce));
3445
3446         intel_ring_reset(ce->ring, ce->ring->emit);
3447
3448         /* Scrub away the garbage */
3449         execlists_init_reg_state(ce->lrc_reg_state,
3450                                  ce, ce->engine, ce->ring, true);
3451         __execlists_update_reg_state(ce, ce->engine, ce->ring->tail);
3452
3453         ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
3454 }
3455
3456 static const struct intel_context_ops execlists_context_ops = {
3457         .alloc = execlists_context_alloc,
3458
3459         .pin = execlists_context_pin,
3460         .unpin = execlists_context_unpin,
3461
3462         .enter = intel_context_enter_engine,
3463         .exit = intel_context_exit_engine,
3464
3465         .reset = execlists_context_reset,
3466         .destroy = execlists_context_destroy,
3467 };
3468
3469 static int gen8_emit_init_breadcrumb(struct i915_request *rq)
3470 {
3471         u32 *cs;
3472
3473         GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq));
3474         if (!i915_request_timeline(rq)->has_initial_breadcrumb)
3475                 return 0;
3476
3477         cs = intel_ring_begin(rq, 6);
3478         if (IS_ERR(cs))
3479                 return PTR_ERR(cs);
3480
3481         /*
3482          * Check if we have been preempted before we even get started.
3483          *
3484          * After this point i915_request_started() reports true, even if
3485          * we get preempted and so are no longer running.
3486          */
3487         *cs++ = MI_ARB_CHECK;
3488         *cs++ = MI_NOOP;
3489
3490         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3491         *cs++ = i915_request_timeline(rq)->hwsp_offset;
3492         *cs++ = 0;
3493         *cs++ = rq->fence.seqno - 1;
3494
3495         intel_ring_advance(rq, cs);
3496
3497         /* Record the updated position of the request's payload */
3498         rq->infix = intel_ring_offset(rq, cs);
3499
3500         __set_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags);
3501
3502         return 0;
3503 }
3504
3505 static int emit_pdps(struct i915_request *rq)
3506 {
3507         const struct intel_engine_cs * const engine = rq->engine;
3508         struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->context->vm);
3509         int err, i;
3510         u32 *cs;
3511
3512         GEM_BUG_ON(intel_vgpu_active(rq->i915));
3513
3514         /*
3515          * Beware ye of the dragons, this sequence is magic!
3516          *
3517          * Small changes to this sequence can cause anything from
3518          * GPU hangs to forcewake errors and machine lockups!
3519          */
3520
3521         /* Flush any residual operations from the context load */
3522         err = engine->emit_flush(rq, EMIT_FLUSH);
3523         if (err)
3524                 return err;
3525
3526         /* Magic required to prevent forcewake errors! */
3527         err = engine->emit_flush(rq, EMIT_INVALIDATE);
3528         if (err)
3529                 return err;
3530
3531         cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
3532         if (IS_ERR(cs))
3533                 return PTR_ERR(cs);
3534
3535         /* Ensure the LRI have landed before we invalidate & continue */
3536         *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
3537         for (i = GEN8_3LVL_PDPES; i--; ) {
3538                 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
3539                 u32 base = engine->mmio_base;
3540
3541                 *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
3542                 *cs++ = upper_32_bits(pd_daddr);
3543                 *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
3544                 *cs++ = lower_32_bits(pd_daddr);
3545         }
3546         *cs++ = MI_NOOP;
3547
3548         intel_ring_advance(rq, cs);
3549
3550         return 0;
3551 }
3552
3553 static int execlists_request_alloc(struct i915_request *request)
3554 {
3555         int ret;
3556
3557         GEM_BUG_ON(!intel_context_is_pinned(request->context));
3558
3559         /*
3560          * Flush enough space to reduce the likelihood of waiting after
3561          * we start building the request - in which case we will just
3562          * have to repeat work.
3563          */
3564         request->reserved_space += EXECLISTS_REQUEST_SIZE;
3565
3566         /*
3567          * Note that after this point, we have committed to using
3568          * this request as it is being used to both track the
3569          * state of engine initialisation and liveness of the
3570          * golden renderstate above. Think twice before you try
3571          * to cancel/unwind this request now.
3572          */
3573
3574         if (!i915_vm_is_4lvl(request->context->vm)) {
3575                 ret = emit_pdps(request);
3576                 if (ret)
3577                         return ret;
3578         }
3579
3580         /* Unconditionally invalidate GPU caches and TLBs. */
3581         ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
3582         if (ret)
3583                 return ret;
3584
3585         request->reserved_space -= EXECLISTS_REQUEST_SIZE;
3586         return 0;
3587 }
3588
3589 /*
3590  * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
3591  * PIPE_CONTROL instruction. This is required for the flush to happen correctly
3592  * but there is a slight complication as this is applied in WA batch where the
3593  * values are only initialized once so we cannot take register value at the
3594  * beginning and reuse it further; hence we save its value to memory, upload a
3595  * constant value with bit21 set and then we restore it back with the saved value.
3596  * To simplify the WA, a constant value is formed by using the default value
3597  * of this register. This shouldn't be a problem because we are only modifying
3598  * it for a short period and this batch in non-premptible. We can ofcourse
3599  * use additional instructions that read the actual value of the register
3600  * at that time and set our bit of interest but it makes the WA complicated.
3601  *
3602  * This WA is also required for Gen9 so extracting as a function avoids
3603  * code duplication.
3604  */
3605 static u32 *
3606 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
3607 {
3608         /* NB no one else is allowed to scribble over scratch + 256! */
3609         *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
3610         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3611         *batch++ = intel_gt_scratch_offset(engine->gt,
3612                                            INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3613         *batch++ = 0;
3614
3615         *batch++ = MI_LOAD_REGISTER_IMM(1);
3616         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3617         *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
3618
3619         batch = gen8_emit_pipe_control(batch,
3620                                        PIPE_CONTROL_CS_STALL |
3621                                        PIPE_CONTROL_DC_FLUSH_ENABLE,
3622                                        0);
3623
3624         *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
3625         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3626         *batch++ = intel_gt_scratch_offset(engine->gt,
3627                                            INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3628         *batch++ = 0;
3629
3630         return batch;
3631 }
3632
3633 /*
3634  * Typically we only have one indirect_ctx and per_ctx batch buffer which are
3635  * initialized at the beginning and shared across all contexts but this field
3636  * helps us to have multiple batches at different offsets and select them based
3637  * on a criteria. At the moment this batch always start at the beginning of the page
3638  * and at this point we don't have multiple wa_ctx batch buffers.
3639  *
3640  * The number of WA applied are not known at the beginning; we use this field
3641  * to return the no of DWORDS written.
3642  *
3643  * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
3644  * so it adds NOOPs as padding to make it cacheline aligned.
3645  * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
3646  * makes a complete batch buffer.
3647  */
3648 static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3649 {
3650         /* WaDisableCtxRestoreArbitration:bdw,chv */
3651         *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3652
3653         /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
3654         if (IS_BROADWELL(engine->i915))
3655                 batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3656
3657         /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
3658         /* Actual scratch location is at 128 bytes offset */
3659         batch = gen8_emit_pipe_control(batch,
3660                                        PIPE_CONTROL_FLUSH_L3 |
3661                                        PIPE_CONTROL_STORE_DATA_INDEX |
3662                                        PIPE_CONTROL_CS_STALL |
3663                                        PIPE_CONTROL_QW_WRITE,
3664                                        LRC_PPHWSP_SCRATCH_ADDR);
3665
3666         *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3667
3668         /* Pad to end of cacheline */
3669         while ((unsigned long)batch % CACHELINE_BYTES)
3670                 *batch++ = MI_NOOP;
3671
3672         /*
3673          * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
3674          * execution depends on the length specified in terms of cache lines
3675          * in the register CTX_RCS_INDIRECT_CTX
3676          */
3677
3678         return batch;
3679 }
3680
3681 struct lri {
3682         i915_reg_t reg;
3683         u32 value;
3684 };
3685
3686 static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
3687 {
3688         GEM_BUG_ON(!count || count > 63);
3689
3690         *batch++ = MI_LOAD_REGISTER_IMM(count);
3691         do {
3692                 *batch++ = i915_mmio_reg_offset(lri->reg);
3693                 *batch++ = lri->value;
3694         } while (lri++, --count);
3695         *batch++ = MI_NOOP;
3696
3697         return batch;
3698 }
3699
3700 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3701 {
3702         static const struct lri lri[] = {
3703                 /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
3704                 {
3705                         COMMON_SLICE_CHICKEN2,
3706                         __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
3707                                        0),
3708                 },
3709
3710                 /* BSpec: 11391 */
3711                 {
3712                         FF_SLICE_CHICKEN,
3713                         __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
3714                                        FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
3715                 },
3716
3717                 /* BSpec: 11299 */
3718                 {
3719                         _3D_CHICKEN3,
3720                         __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
3721                                        _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
3722                 }
3723         };
3724
3725         *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3726
3727         /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
3728         batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3729
3730         /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
3731         batch = gen8_emit_pipe_control(batch,
3732                                        PIPE_CONTROL_FLUSH_L3 |
3733                                        PIPE_CONTROL_STORE_DATA_INDEX |
3734                                        PIPE_CONTROL_CS_STALL |
3735                                        PIPE_CONTROL_QW_WRITE,
3736                                        LRC_PPHWSP_SCRATCH_ADDR);
3737
3738         batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
3739
3740         /* WaMediaPoolStateCmdInWABB:bxt,glk */
3741         if (HAS_POOLED_EU(engine->i915)) {
3742                 /*
3743                  * EU pool configuration is setup along with golden context
3744                  * during context initialization. This value depends on
3745                  * device type (2x6 or 3x6) and needs to be updated based
3746                  * on which subslice is disabled especially for 2x6
3747                  * devices, however it is safe to load default
3748                  * configuration of 3x6 device instead of masking off
3749                  * corresponding bits because HW ignores bits of a disabled
3750                  * subslice and drops down to appropriate config. Please
3751                  * see render_state_setup() in i915_gem_render_state.c for
3752                  * possible configurations, to avoid duplication they are
3753                  * not shown here again.
3754                  */
3755                 *batch++ = GEN9_MEDIA_POOL_STATE;
3756                 *batch++ = GEN9_MEDIA_POOL_ENABLE;
3757                 *batch++ = 0x00777000;
3758                 *batch++ = 0;
3759                 *batch++ = 0;
3760                 *batch++ = 0;
3761         }
3762
3763         *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3764
3765         /* Pad to end of cacheline */
3766         while ((unsigned long)batch % CACHELINE_BYTES)
3767                 *batch++ = MI_NOOP;
3768
3769         return batch;
3770 }
3771
3772 static u32 *
3773 gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3774 {
3775         int i;
3776
3777         /*
3778          * WaPipeControlBefore3DStateSamplePattern: cnl
3779          *
3780          * Ensure the engine is idle prior to programming a
3781          * 3DSTATE_SAMPLE_PATTERN during a context restore.
3782          */
3783         batch = gen8_emit_pipe_control(batch,
3784                                        PIPE_CONTROL_CS_STALL,
3785                                        0);
3786         /*
3787          * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
3788          * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
3789          * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
3790          * confusing. Since gen8_emit_pipe_control() already advances the
3791          * batch by 6 dwords, we advance the other 10 here, completing a
3792          * cacheline. It's not clear if the workaround requires this padding
3793          * before other commands, or if it's just the regular padding we would
3794          * already have for the workaround bb, so leave it here for now.
3795          */
3796         for (i = 0; i < 10; i++)
3797                 *batch++ = MI_NOOP;
3798
3799         /* Pad to end of cacheline */
3800         while ((unsigned long)batch % CACHELINE_BYTES)
3801                 *batch++ = MI_NOOP;
3802
3803         return batch;
3804 }
3805
3806 #define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
3807
3808 static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
3809 {
3810         struct drm_i915_gem_object *obj;
3811         struct i915_vma *vma;
3812         int err;
3813
3814         obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE);
3815         if (IS_ERR(obj))
3816                 return PTR_ERR(obj);
3817
3818         vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
3819         if (IS_ERR(vma)) {
3820                 err = PTR_ERR(vma);
3821                 goto err;
3822         }
3823
3824         err = i915_ggtt_pin(vma, 0, PIN_HIGH);
3825         if (err)
3826                 goto err;
3827
3828         engine->wa_ctx.vma = vma;
3829         return 0;
3830
3831 err:
3832         i915_gem_object_put(obj);
3833         return err;
3834 }
3835
3836 static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
3837 {
3838         i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
3839 }
3840
3841 typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
3842
3843 static int intel_init_workaround_bb(struct intel_engine_cs *engine)
3844 {
3845         struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
3846         struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
3847                                             &wa_ctx->per_ctx };
3848         wa_bb_func_t wa_bb_fn[2];
3849         struct page *page;
3850         void *batch, *batch_ptr;
3851         unsigned int i;
3852         int ret;
3853
3854         if (engine->class != RENDER_CLASS)
3855                 return 0;
3856
3857         switch (INTEL_GEN(engine->i915)) {
3858         case 12:
3859         case 11:
3860                 return 0;
3861         case 10:
3862                 wa_bb_fn[0] = gen10_init_indirectctx_bb;
3863                 wa_bb_fn[1] = NULL;
3864                 break;
3865         case 9:
3866                 wa_bb_fn[0] = gen9_init_indirectctx_bb;
3867                 wa_bb_fn[1] = NULL;
3868                 break;
3869         case 8:
3870                 wa_bb_fn[0] = gen8_init_indirectctx_bb;
3871                 wa_bb_fn[1] = NULL;
3872                 break;
3873         default:
3874                 MISSING_CASE(INTEL_GEN(engine->i915));
3875                 return 0;
3876         }
3877
3878         ret = lrc_setup_wa_ctx(engine);
3879         if (ret) {
3880                 drm_dbg(&engine->i915->drm,
3881                         "Failed to setup context WA page: %d\n", ret);
3882                 return ret;
3883         }
3884
3885         page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0);
3886         batch = batch_ptr = kmap_atomic(page);
3887
3888         /*
3889          * Emit the two workaround batch buffers, recording the offset from the
3890          * start of the workaround batch buffer object for each and their
3891          * respective sizes.
3892          */
3893         for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
3894                 wa_bb[i]->offset = batch_ptr - batch;
3895                 if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
3896                                                   CACHELINE_BYTES))) {
3897                         ret = -EINVAL;
3898                         break;
3899                 }
3900                 if (wa_bb_fn[i])
3901                         batch_ptr = wa_bb_fn[i](engine, batch_ptr);
3902                 wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
3903         }
3904
3905         BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE);
3906
3907         kunmap_atomic(batch);
3908         if (ret)
3909                 lrc_destroy_wa_ctx(engine);
3910
3911         return ret;
3912 }
3913
3914 static void reset_csb_pointers(struct intel_engine_cs *engine)
3915 {
3916         struct intel_engine_execlists * const execlists = &engine->execlists;
3917         const unsigned int reset_value = execlists->csb_size - 1;
3918
3919         ring_set_paused(engine, 0);
3920
3921         /*
3922          * Sometimes Icelake forgets to reset its pointers on a GPU reset.
3923          * Bludgeon them with a mmio update to be sure.
3924          */
3925         ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
3926                      0xffff << 16 | reset_value << 8 | reset_value);
3927         ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
3928
3929         /*
3930          * After a reset, the HW starts writing into CSB entry [0]. We
3931          * therefore have to set our HEAD pointer back one entry so that
3932          * the *first* entry we check is entry 0. To complicate this further,
3933          * as we don't wait for the first interrupt after reset, we have to
3934          * fake the HW write to point back to the last entry so that our
3935          * inline comparison of our cached head position against the last HW
3936          * write works even before the first interrupt.
3937          */
3938         execlists->csb_head = reset_value;
3939         WRITE_ONCE(*execlists->csb_write, reset_value);
3940         wmb(); /* Make sure this is visible to HW (paranoia?) */
3941
3942         invalidate_csb_entries(&execlists->csb_status[0],
3943                                &execlists->csb_status[reset_value]);
3944
3945         /* Once more for luck and our trusty paranoia */
3946         ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
3947                      0xffff << 16 | reset_value << 8 | reset_value);
3948         ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
3949
3950         GEM_BUG_ON(READ_ONCE(*execlists->csb_write) != reset_value);
3951 }
3952
3953 static void execlists_sanitize(struct intel_engine_cs *engine)
3954 {
3955         /*
3956          * Poison residual state on resume, in case the suspend didn't!
3957          *
3958          * We have to assume that across suspend/resume (or other loss
3959          * of control) that the contents of our pinned buffers has been
3960          * lost, replaced by garbage. Since this doesn't always happen,
3961          * let's poison such state so that we more quickly spot when
3962          * we falsely assume it has been preserved.
3963          */
3964         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
3965                 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
3966
3967         reset_csb_pointers(engine);
3968
3969         /*
3970          * The kernel_context HWSP is stored in the status_page. As above,
3971          * that may be lost on resume/initialisation, and so we need to
3972          * reset the value in the HWSP.
3973          */
3974         intel_timeline_reset_seqno(engine->kernel_context->timeline);
3975
3976         /* And scrub the dirty cachelines for the HWSP */
3977         clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
3978 }
3979
3980 static void enable_error_interrupt(struct intel_engine_cs *engine)
3981 {
3982         u32 status;
3983
3984         engine->execlists.error_interrupt = 0;
3985         ENGINE_WRITE(engine, RING_EMR, ~0u);
3986         ENGINE_WRITE(engine, RING_EIR, ~0u); /* clear all existing errors */
3987
3988         status = ENGINE_READ(engine, RING_ESR);
3989         if (unlikely(status)) {
3990                 drm_err(&engine->i915->drm,
3991                         "engine '%s' resumed still in error: %08x\n",
3992                         engine->name, status);
3993                 __intel_gt_reset(engine->gt, engine->mask);
3994         }
3995
3996         /*
3997          * On current gen8+, we have 2 signals to play with
3998          *
3999          * - I915_ERROR_INSTUCTION (bit 0)
4000          *
4001          *    Generate an error if the command parser encounters an invalid
4002          *    instruction
4003          *
4004          *    This is a fatal error.
4005          *
4006          * - CP_PRIV (bit 2)
4007          *
4008          *    Generate an error on privilege violation (where the CP replaces
4009          *    the instruction with a no-op). This also fires for writes into
4010          *    read-only scratch pages.
4011          *
4012          *    This is a non-fatal error, parsing continues.
4013          *
4014          * * there are a few others defined for odd HW that we do not use
4015          *
4016          * Since CP_PRIV fires for cases where we have chosen to ignore the
4017          * error (as the HW is validating and suppressing the mistakes), we
4018          * only unmask the instruction error bit.
4019          */
4020         ENGINE_WRITE(engine, RING_EMR, ~I915_ERROR_INSTRUCTION);
4021 }
4022
4023 static void enable_execlists(struct intel_engine_cs *engine)
4024 {
4025         u32 mode;
4026
4027         assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
4028
4029         intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
4030
4031         if (INTEL_GEN(engine->i915) >= 11)
4032                 mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE);
4033         else
4034                 mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE);
4035         ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode);
4036
4037         ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
4038
4039         ENGINE_WRITE_FW(engine,
4040                         RING_HWS_PGA,
4041                         i915_ggtt_offset(engine->status_page.vma));
4042         ENGINE_POSTING_READ(engine, RING_HWS_PGA);
4043
4044         enable_error_interrupt(engine);
4045
4046         engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0);
4047 }
4048
4049 static bool unexpected_starting_state(struct intel_engine_cs *engine)
4050 {
4051         bool unexpected = false;
4052
4053         if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) {
4054                 drm_dbg(&engine->i915->drm,
4055                         "STOP_RING still set in RING_MI_MODE\n");
4056                 unexpected = true;
4057         }
4058
4059         return unexpected;
4060 }
4061
4062 static int execlists_resume(struct intel_engine_cs *engine)
4063 {
4064         intel_mocs_init_engine(engine);
4065
4066         intel_engine_reset_breadcrumbs(engine);
4067
4068         if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) {
4069                 struct drm_printer p = drm_debug_printer(__func__);
4070
4071                 intel_engine_dump(engine, &p, NULL);
4072         }
4073
4074         enable_execlists(engine);
4075
4076         return 0;
4077 }
4078
4079 static void execlists_reset_prepare(struct intel_engine_cs *engine)
4080 {
4081         struct intel_engine_execlists * const execlists = &engine->execlists;
4082         unsigned long flags;
4083
4084         ENGINE_TRACE(engine, "depth<-%d\n",
4085                      atomic_read(&execlists->tasklet.count));
4086
4087         /*
4088          * Prevent request submission to the hardware until we have
4089          * completed the reset in i915_gem_reset_finish(). If a request
4090          * is completed by one engine, it may then queue a request
4091          * to a second via its execlists->tasklet *just* as we are
4092          * calling engine->resume() and also writing the ELSP.
4093          * Turning off the execlists->tasklet until the reset is over
4094          * prevents the race.
4095          */
4096         __tasklet_disable_sync_once(&execlists->tasklet);
4097         GEM_BUG_ON(!reset_in_progress(execlists));
4098
4099         /* And flush any current direct submission. */
4100         spin_lock_irqsave(&engine->active.lock, flags);
4101         spin_unlock_irqrestore(&engine->active.lock, flags);
4102
4103         /*
4104          * We stop engines, otherwise we might get failed reset and a
4105          * dead gpu (on elk). Also as modern gpu as kbl can suffer
4106          * from system hang if batchbuffer is progressing when
4107          * the reset is issued, regardless of READY_TO_RESET ack.
4108          * Thus assume it is best to stop engines on all gens
4109          * where we have a gpu reset.
4110          *
4111          * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
4112          *
4113          * FIXME: Wa for more modern gens needs to be validated
4114          */
4115         ring_set_paused(engine, 1);
4116         intel_engine_stop_cs(engine);
4117
4118         engine->execlists.reset_ccid = active_ccid(engine);
4119 }
4120
4121 static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
4122 {
4123         int x;
4124
4125         x = lrc_ring_mi_mode(engine);
4126         if (x != -1) {
4127                 regs[x + 1] &= ~STOP_RING;
4128                 regs[x + 1] |= STOP_RING << 16;
4129         }
4130 }
4131
4132 static void __execlists_reset_reg_state(const struct intel_context *ce,
4133                                         const struct intel_engine_cs *engine)
4134 {
4135         u32 *regs = ce->lrc_reg_state;
4136
4137         __reset_stop_ring(regs, engine);
4138 }
4139
4140 static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
4141 {
4142         struct intel_engine_execlists * const execlists = &engine->execlists;
4143         struct intel_context *ce;
4144         struct i915_request *rq;
4145         u32 head;
4146
4147         mb(); /* paranoia: read the CSB pointers from after the reset */
4148         clflush(execlists->csb_write);
4149         mb();
4150
4151         process_csb(engine); /* drain preemption events */
4152
4153         /* Following the reset, we need to reload the CSB read/write pointers */
4154         reset_csb_pointers(engine);
4155
4156         /*
4157          * Save the currently executing context, even if we completed
4158          * its request, it was still running at the time of the
4159          * reset and will have been clobbered.
4160          */
4161         rq = active_context(engine, engine->execlists.reset_ccid);
4162         if (!rq)
4163                 goto unwind;
4164
4165         ce = rq->context;
4166         GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
4167
4168         if (i915_request_completed(rq)) {
4169                 /* Idle context; tidy up the ring so we can restart afresh */
4170                 head = intel_ring_wrap(ce->ring, rq->tail);
4171                 goto out_replay;
4172         }
4173
4174         /* We still have requests in-flight; the engine should be active */
4175         GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
4176
4177         /* Context has requests still in-flight; it should not be idle! */
4178         GEM_BUG_ON(i915_active_is_idle(&ce->active));
4179
4180         rq = active_request(ce->timeline, rq);
4181         head = intel_ring_wrap(ce->ring, rq->head);
4182         GEM_BUG_ON(head == ce->ring->tail);
4183
4184         /*
4185          * If this request hasn't started yet, e.g. it is waiting on a
4186          * semaphore, we need to avoid skipping the request or else we
4187          * break the signaling chain. However, if the context is corrupt
4188          * the request will not restart and we will be stuck with a wedged
4189          * device. It is quite often the case that if we issue a reset
4190          * while the GPU is loading the context image, that the context
4191          * image becomes corrupt.
4192          *
4193          * Otherwise, if we have not started yet, the request should replay
4194          * perfectly and we do not need to flag the result as being erroneous.
4195          */
4196         if (!i915_request_started(rq))
4197                 goto out_replay;
4198
4199         /*
4200          * If the request was innocent, we leave the request in the ELSP
4201          * and will try to replay it on restarting. The context image may
4202          * have been corrupted by the reset, in which case we may have
4203          * to service a new GPU hang, but more likely we can continue on
4204          * without impact.
4205          *
4206          * If the request was guilty, we presume the context is corrupt
4207          * and have to at least restore the RING register in the context
4208          * image back to the expected values to skip over the guilty request.
4209          */
4210         __i915_request_reset(rq, stalled);
4211
4212         /*
4213          * We want a simple context + ring to execute the breadcrumb update.
4214          * We cannot rely on the context being intact across the GPU hang,
4215          * so clear it and rebuild just what we need for the breadcrumb.
4216          * All pending requests for this context will be zapped, and any
4217          * future request will be after userspace has had the opportunity
4218          * to recreate its own state.
4219          */
4220 out_replay:
4221         ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
4222                      head, ce->ring->tail);
4223         __execlists_reset_reg_state(ce, engine);
4224         __execlists_update_reg_state(ce, engine, head);
4225         ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
4226
4227 unwind:
4228         /* Push back any incomplete requests for replay after the reset. */
4229         cancel_port_requests(execlists);
4230         __unwind_incomplete_requests(engine);
4231 }
4232
4233 static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
4234 {
4235         unsigned long flags;
4236
4237         ENGINE_TRACE(engine, "\n");
4238
4239         spin_lock_irqsave(&engine->active.lock, flags);
4240
4241         __execlists_reset(engine, stalled);
4242
4243         spin_unlock_irqrestore(&engine->active.lock, flags);
4244 }
4245
4246 static void nop_submission_tasklet(unsigned long data)
4247 {
4248         struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
4249
4250         /* The driver is wedged; don't process any more events. */
4251         WRITE_ONCE(engine->execlists.queue_priority_hint, INT_MIN);
4252 }
4253
4254 static void execlists_reset_cancel(struct intel_engine_cs *engine)
4255 {
4256         struct intel_engine_execlists * const execlists = &engine->execlists;
4257         struct i915_request *rq, *rn;
4258         struct rb_node *rb;
4259         unsigned long flags;
4260
4261         ENGINE_TRACE(engine, "\n");
4262
4263         /*
4264          * Before we call engine->cancel_requests(), we should have exclusive
4265          * access to the submission state. This is arranged for us by the
4266          * caller disabling the interrupt generation, the tasklet and other
4267          * threads that may then access the same state, giving us a free hand
4268          * to reset state. However, we still need to let lockdep be aware that
4269          * we know this state may be accessed in hardirq context, so we
4270          * disable the irq around this manipulation and we want to keep
4271          * the spinlock focused on its duties and not accidentally conflate
4272          * coverage to the submission's irq state. (Similarly, although we
4273          * shouldn't need to disable irq around the manipulation of the
4274          * submission's irq state, we also wish to remind ourselves that
4275          * it is irq state.)
4276          */
4277         spin_lock_irqsave(&engine->active.lock, flags);
4278
4279         __execlists_reset(engine, true);
4280
4281         /* Mark all executing requests as skipped. */
4282         list_for_each_entry(rq, &engine->active.requests, sched.link)
4283                 mark_eio(rq);
4284
4285         /* Flush the queued requests to the timeline list (for retiring). */
4286         while ((rb = rb_first_cached(&execlists->queue))) {
4287                 struct i915_priolist *p = to_priolist(rb);
4288                 int i;
4289
4290                 priolist_for_each_request_consume(rq, rn, p, i) {
4291                         mark_eio(rq);
4292                         __i915_request_submit(rq);
4293                 }
4294
4295                 rb_erase_cached(&p->node, &execlists->queue);
4296                 i915_priolist_free(p);
4297         }
4298
4299         /* On-hold requests will be flushed to timeline upon their release */
4300         list_for_each_entry(rq, &engine->active.hold, sched.link)
4301                 mark_eio(rq);
4302
4303         /* Cancel all attached virtual engines */
4304         while ((rb = rb_first_cached(&execlists->virtual))) {
4305                 struct virtual_engine *ve =
4306                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
4307
4308                 rb_erase_cached(rb, &execlists->virtual);
4309                 RB_CLEAR_NODE(rb);
4310
4311                 spin_lock(&ve->base.active.lock);
4312                 rq = fetch_and_zero(&ve->request);
4313                 if (rq) {
4314                         mark_eio(rq);
4315
4316                         rq->engine = engine;
4317                         __i915_request_submit(rq);
4318                         i915_request_put(rq);
4319
4320                         ve->base.execlists.queue_priority_hint = INT_MIN;
4321                 }
4322                 spin_unlock(&ve->base.active.lock);
4323         }
4324
4325         /* Remaining _unready_ requests will be nop'ed when submitted */
4326
4327         execlists->queue_priority_hint = INT_MIN;
4328         execlists->queue = RB_ROOT_CACHED;
4329
4330         GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
4331         execlists->tasklet.func = nop_submission_tasklet;
4332
4333         spin_unlock_irqrestore(&engine->active.lock, flags);
4334 }
4335
4336 static void execlists_reset_finish(struct intel_engine_cs *engine)
4337 {
4338         struct intel_engine_execlists * const execlists = &engine->execlists;
4339
4340         /*
4341          * After a GPU reset, we may have requests to replay. Do so now while
4342          * we still have the forcewake to be sure that the GPU is not allowed
4343          * to sleep before we restart and reload a context.
4344          */
4345         GEM_BUG_ON(!reset_in_progress(execlists));
4346         if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
4347                 execlists->tasklet.func(execlists->tasklet.data);
4348
4349         if (__tasklet_enable(&execlists->tasklet))
4350                 /* And kick in case we missed a new request submission. */
4351                 tasklet_hi_schedule(&execlists->tasklet);
4352         ENGINE_TRACE(engine, "depth->%d\n",
4353                      atomic_read(&execlists->tasklet.count));
4354 }
4355
4356 static int gen8_emit_bb_start_noarb(struct i915_request *rq,
4357                                     u64 offset, u32 len,
4358                                     const unsigned int flags)
4359 {
4360         u32 *cs;
4361
4362         cs = intel_ring_begin(rq, 4);
4363         if (IS_ERR(cs))
4364                 return PTR_ERR(cs);
4365
4366         /*
4367          * WaDisableCtxRestoreArbitration:bdw,chv
4368          *
4369          * We don't need to perform MI_ARB_ENABLE as often as we do (in
4370          * particular all the gen that do not need the w/a at all!), if we
4371          * took care to make sure that on every switch into this context
4372          * (both ordinary and for preemption) that arbitrartion was enabled
4373          * we would be fine.  However, for gen8 there is another w/a that
4374          * requires us to not preempt inside GPGPU execution, so we keep
4375          * arbitration disabled for gen8 batches. Arbitration will be
4376          * re-enabled before we close the request
4377          * (engine->emit_fini_breadcrumb).
4378          */
4379         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
4380
4381         /* FIXME(BDW+): Address space and security selectors. */
4382         *cs++ = MI_BATCH_BUFFER_START_GEN8 |
4383                 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
4384         *cs++ = lower_32_bits(offset);
4385         *cs++ = upper_32_bits(offset);
4386
4387         intel_ring_advance(rq, cs);
4388
4389         return 0;
4390 }
4391
4392 static int gen8_emit_bb_start(struct i915_request *rq,
4393                               u64 offset, u32 len,
4394                               const unsigned int flags)
4395 {
4396         u32 *cs;
4397
4398         cs = intel_ring_begin(rq, 6);
4399         if (IS_ERR(cs))
4400                 return PTR_ERR(cs);
4401
4402         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4403
4404         *cs++ = MI_BATCH_BUFFER_START_GEN8 |
4405                 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
4406         *cs++ = lower_32_bits(offset);
4407         *cs++ = upper_32_bits(offset);
4408
4409         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
4410         *cs++ = MI_NOOP;
4411
4412         intel_ring_advance(rq, cs);
4413
4414         return 0;
4415 }
4416
4417 static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine)
4418 {
4419         ENGINE_WRITE(engine, RING_IMR,
4420                      ~(engine->irq_enable_mask | engine->irq_keep_mask));
4421         ENGINE_POSTING_READ(engine, RING_IMR);
4422 }
4423
4424 static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
4425 {
4426         ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
4427 }
4428
4429 static int gen8_emit_flush(struct i915_request *request, u32 mode)
4430 {
4431         u32 cmd, *cs;
4432
4433         cs = intel_ring_begin(request, 4);
4434         if (IS_ERR(cs))
4435                 return PTR_ERR(cs);
4436
4437         cmd = MI_FLUSH_DW + 1;
4438
4439         /* We always require a command barrier so that subsequent
4440          * commands, such as breadcrumb interrupts, are strictly ordered
4441          * wrt the contents of the write cache being flushed to memory
4442          * (and thus being coherent from the CPU).
4443          */
4444         cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
4445
4446         if (mode & EMIT_INVALIDATE) {
4447                 cmd |= MI_INVALIDATE_TLB;
4448                 if (request->engine->class == VIDEO_DECODE_CLASS)
4449                         cmd |= MI_INVALIDATE_BSD;
4450         }
4451
4452         *cs++ = cmd;
4453         *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
4454         *cs++ = 0; /* upper addr */
4455         *cs++ = 0; /* value */
4456         intel_ring_advance(request, cs);
4457
4458         return 0;
4459 }
4460
4461 static int gen8_emit_flush_render(struct i915_request *request,
4462                                   u32 mode)
4463 {
4464         bool vf_flush_wa = false, dc_flush_wa = false;
4465         u32 *cs, flags = 0;
4466         int len;
4467
4468         flags |= PIPE_CONTROL_CS_STALL;
4469
4470         if (mode & EMIT_FLUSH) {
4471                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
4472                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4473                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
4474                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
4475         }
4476
4477         if (mode & EMIT_INVALIDATE) {
4478                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
4479                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4480                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4481                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4482                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4483                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4484                 flags |= PIPE_CONTROL_QW_WRITE;
4485                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4486
4487                 /*
4488                  * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
4489                  * pipe control.
4490                  */
4491                 if (IS_GEN(request->i915, 9))
4492                         vf_flush_wa = true;
4493
4494                 /* WaForGAMHang:kbl */
4495                 if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0))
4496                         dc_flush_wa = true;
4497         }
4498
4499         len = 6;
4500
4501         if (vf_flush_wa)
4502                 len += 6;
4503
4504         if (dc_flush_wa)
4505                 len += 12;
4506
4507         cs = intel_ring_begin(request, len);
4508         if (IS_ERR(cs))
4509                 return PTR_ERR(cs);
4510
4511         if (vf_flush_wa)
4512                 cs = gen8_emit_pipe_control(cs, 0, 0);
4513
4514         if (dc_flush_wa)
4515                 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
4516                                             0);
4517
4518         cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4519
4520         if (dc_flush_wa)
4521                 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
4522
4523         intel_ring_advance(request, cs);
4524
4525         return 0;
4526 }
4527
4528 static int gen11_emit_flush_render(struct i915_request *request,
4529                                    u32 mode)
4530 {
4531         if (mode & EMIT_FLUSH) {
4532                 u32 *cs;
4533                 u32 flags = 0;
4534
4535                 flags |= PIPE_CONTROL_CS_STALL;
4536
4537                 flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
4538                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
4539                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4540                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
4541                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
4542                 flags |= PIPE_CONTROL_QW_WRITE;
4543                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4544
4545                 cs = intel_ring_begin(request, 6);
4546                 if (IS_ERR(cs))
4547                         return PTR_ERR(cs);
4548
4549                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4550                 intel_ring_advance(request, cs);
4551         }
4552
4553         if (mode & EMIT_INVALIDATE) {
4554                 u32 *cs;
4555                 u32 flags = 0;
4556
4557                 flags |= PIPE_CONTROL_CS_STALL;
4558
4559                 flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
4560                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
4561                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4562                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4563                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4564                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4565                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4566                 flags |= PIPE_CONTROL_QW_WRITE;
4567                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4568
4569                 cs = intel_ring_begin(request, 6);
4570                 if (IS_ERR(cs))
4571                         return PTR_ERR(cs);
4572
4573                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4574                 intel_ring_advance(request, cs);
4575         }
4576
4577         return 0;
4578 }
4579
4580 static u32 preparser_disable(bool state)
4581 {
4582         return MI_ARB_CHECK | 1 << 8 | state;
4583 }
4584
4585 static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine)
4586 {
4587         static const i915_reg_t vd[] = {
4588                 GEN12_VD0_AUX_NV,
4589                 GEN12_VD1_AUX_NV,
4590                 GEN12_VD2_AUX_NV,
4591                 GEN12_VD3_AUX_NV,
4592         };
4593
4594         static const i915_reg_t ve[] = {
4595                 GEN12_VE0_AUX_NV,
4596                 GEN12_VE1_AUX_NV,
4597         };
4598
4599         if (engine->class == VIDEO_DECODE_CLASS)
4600                 return vd[engine->instance];
4601
4602         if (engine->class == VIDEO_ENHANCEMENT_CLASS)
4603                 return ve[engine->instance];
4604
4605         GEM_BUG_ON("unknown aux_inv_reg\n");
4606
4607         return INVALID_MMIO_REG;
4608 }
4609
4610 static u32 *
4611 gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs)
4612 {
4613         *cs++ = MI_LOAD_REGISTER_IMM(1);
4614         *cs++ = i915_mmio_reg_offset(inv_reg);
4615         *cs++ = AUX_INV;
4616         *cs++ = MI_NOOP;
4617
4618         return cs;
4619 }
4620
4621 static int gen12_emit_flush_render(struct i915_request *request,
4622                                    u32 mode)
4623 {
4624         if (mode & EMIT_FLUSH) {
4625                 u32 flags = 0;
4626                 u32 *cs;
4627
4628                 flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
4629                 flags |= PIPE_CONTROL_FLUSH_L3;
4630                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
4631                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4632                 /* Wa_1409600907:tgl */
4633                 flags |= PIPE_CONTROL_DEPTH_STALL;
4634                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
4635                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
4636
4637                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4638                 flags |= PIPE_CONTROL_QW_WRITE;
4639
4640                 flags |= PIPE_CONTROL_CS_STALL;
4641
4642                 cs = intel_ring_begin(request, 6);
4643                 if (IS_ERR(cs))
4644                         return PTR_ERR(cs);
4645
4646                 cs = gen12_emit_pipe_control(cs,
4647                                              PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
4648                                              flags, LRC_PPHWSP_SCRATCH_ADDR);
4649                 intel_ring_advance(request, cs);
4650         }
4651
4652         if (mode & EMIT_INVALIDATE) {
4653                 u32 flags = 0;
4654                 u32 *cs;
4655
4656                 flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
4657                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
4658                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4659                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4660                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4661                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4662                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4663
4664                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4665                 flags |= PIPE_CONTROL_QW_WRITE;
4666
4667                 flags |= PIPE_CONTROL_CS_STALL;
4668
4669                 cs = intel_ring_begin(request, 8 + 4);
4670                 if (IS_ERR(cs))
4671                         return PTR_ERR(cs);
4672
4673                 /*
4674                  * Prevent the pre-parser from skipping past the TLB
4675                  * invalidate and loading a stale page for the batch
4676                  * buffer / request payload.
4677                  */
4678                 *cs++ = preparser_disable(true);
4679
4680                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4681
4682                 /* hsdes: 1809175790 */
4683                 cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV, cs);
4684
4685                 *cs++ = preparser_disable(false);
4686                 intel_ring_advance(request, cs);
4687         }
4688
4689         return 0;
4690 }
4691
4692 static int gen12_emit_flush(struct i915_request *request, u32 mode)
4693 {
4694         intel_engine_mask_t aux_inv = 0;
4695         u32 cmd, *cs;
4696
4697         if (mode & EMIT_INVALIDATE)
4698                 aux_inv = request->engine->mask & ~BIT(BCS0);
4699
4700         cs = intel_ring_begin(request,
4701                               4 + (aux_inv ? 2 * hweight8(aux_inv) + 2 : 0));
4702         if (IS_ERR(cs))
4703                 return PTR_ERR(cs);
4704
4705         cmd = MI_FLUSH_DW + 1;
4706
4707         /* We always require a command barrier so that subsequent
4708          * commands, such as breadcrumb interrupts, are strictly ordered
4709          * wrt the contents of the write cache being flushed to memory
4710          * (and thus being coherent from the CPU).
4711          */
4712         cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
4713
4714         if (mode & EMIT_INVALIDATE) {
4715                 cmd |= MI_INVALIDATE_TLB;
4716                 if (request->engine->class == VIDEO_DECODE_CLASS)
4717                         cmd |= MI_INVALIDATE_BSD;
4718         }
4719
4720         *cs++ = cmd;
4721         *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
4722         *cs++ = 0; /* upper addr */
4723         *cs++ = 0; /* value */
4724
4725         if (aux_inv) { /* hsdes: 1809175790 */
4726                 struct intel_engine_cs *engine;
4727                 unsigned int tmp;
4728
4729                 *cs++ = MI_LOAD_REGISTER_IMM(hweight8(aux_inv));
4730                 for_each_engine_masked(engine, request->engine->gt,
4731                                        aux_inv, tmp) {
4732                         *cs++ = i915_mmio_reg_offset(aux_inv_reg(engine));
4733                         *cs++ = AUX_INV;
4734                 }
4735                 *cs++ = MI_NOOP;
4736         }
4737         intel_ring_advance(request, cs);
4738
4739         return 0;
4740 }
4741
4742 /*
4743  * Reserve space for 2 NOOPs at the end of each request to be
4744  * used as a workaround for not being allowed to do lite
4745  * restore with HEAD==TAIL (WaIdleLiteRestore).
4746  */
4747 static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
4748 {
4749         /* Ensure there's always at least one preemption point per-request. */
4750         *cs++ = MI_ARB_CHECK;
4751         *cs++ = MI_NOOP;
4752         request->wa_tail = intel_ring_offset(request, cs);
4753
4754         return cs;
4755 }
4756
4757 static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
4758 {
4759         *cs++ = MI_SEMAPHORE_WAIT |
4760                 MI_SEMAPHORE_GLOBAL_GTT |
4761                 MI_SEMAPHORE_POLL |
4762                 MI_SEMAPHORE_SAD_EQ_SDD;
4763         *cs++ = 0;
4764         *cs++ = intel_hws_preempt_address(request->engine);
4765         *cs++ = 0;
4766
4767         return cs;
4768 }
4769
4770 static __always_inline u32*
4771 gen8_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
4772 {
4773         *cs++ = MI_USER_INTERRUPT;
4774
4775         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4776         if (intel_engine_has_semaphores(request->engine))
4777                 cs = emit_preempt_busywait(request, cs);
4778
4779         request->tail = intel_ring_offset(request, cs);
4780         assert_ring_tail_valid(request->ring, request->tail);
4781
4782         return gen8_emit_wa_tail(request, cs);
4783 }
4784
4785 static u32 *emit_xcs_breadcrumb(struct i915_request *request, u32 *cs)
4786 {
4787         u32 addr = i915_request_active_timeline(request)->hwsp_offset;
4788
4789         return gen8_emit_ggtt_write(cs, request->fence.seqno, addr, 0);
4790 }
4791
4792 static u32 *gen8_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
4793 {
4794         return gen8_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
4795 }
4796
4797 static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4798 {
4799         cs = gen8_emit_pipe_control(cs,
4800                                     PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4801                                     PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4802                                     PIPE_CONTROL_DC_FLUSH_ENABLE,
4803                                     0);
4804
4805         /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
4806         cs = gen8_emit_ggtt_write_rcs(cs,
4807                                       request->fence.seqno,
4808                                       i915_request_active_timeline(request)->hwsp_offset,
4809                                       PIPE_CONTROL_FLUSH_ENABLE |
4810                                       PIPE_CONTROL_CS_STALL);
4811
4812         return gen8_emit_fini_breadcrumb_tail(request, cs);
4813 }
4814
4815 static u32 *
4816 gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4817 {
4818         cs = gen8_emit_ggtt_write_rcs(cs,
4819                                       request->fence.seqno,
4820                                       i915_request_active_timeline(request)->hwsp_offset,
4821                                       PIPE_CONTROL_CS_STALL |
4822                                       PIPE_CONTROL_TILE_CACHE_FLUSH |
4823                                       PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4824                                       PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4825                                       PIPE_CONTROL_DC_FLUSH_ENABLE |
4826                                       PIPE_CONTROL_FLUSH_ENABLE);
4827
4828         return gen8_emit_fini_breadcrumb_tail(request, cs);
4829 }
4830
4831 /*
4832  * Note that the CS instruction pre-parser will not stall on the breadcrumb
4833  * flush and will continue pre-fetching the instructions after it before the
4834  * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
4835  * BB_START/END instructions, so, even though we might pre-fetch the pre-amble
4836  * of the next request before the memory has been flushed, we're guaranteed that
4837  * we won't access the batch itself too early.
4838  * However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
4839  * so, if the current request is modifying an instruction in the next request on
4840  * the same intel_context, we might pre-fetch and then execute the pre-update
4841  * instruction. To avoid this, the users of self-modifying code should either
4842  * disable the parser around the code emitting the memory writes, via a new flag
4843  * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
4844  * the in-kernel use-cases we've opted to use a separate context, see
4845  * reloc_gpu() as an example.
4846  * All the above applies only to the instructions themselves. Non-inline data
4847  * used by the instructions is not pre-fetched.
4848  */
4849
4850 static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
4851 {
4852         *cs++ = MI_SEMAPHORE_WAIT_TOKEN |
4853                 MI_SEMAPHORE_GLOBAL_GTT |
4854                 MI_SEMAPHORE_POLL |
4855                 MI_SEMAPHORE_SAD_EQ_SDD;
4856         *cs++ = 0;
4857         *cs++ = intel_hws_preempt_address(request->engine);
4858         *cs++ = 0;
4859         *cs++ = 0;
4860         *cs++ = MI_NOOP;
4861
4862         return cs;
4863 }
4864
4865 static __always_inline u32*
4866 gen12_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
4867 {
4868         *cs++ = MI_USER_INTERRUPT;
4869
4870         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4871         if (intel_engine_has_semaphores(request->engine))
4872                 cs = gen12_emit_preempt_busywait(request, cs);
4873
4874         request->tail = intel_ring_offset(request, cs);
4875         assert_ring_tail_valid(request->ring, request->tail);
4876
4877         return gen8_emit_wa_tail(request, cs);
4878 }
4879
4880 static u32 *gen12_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
4881 {
4882         return gen12_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
4883 }
4884
4885 static u32 *
4886 gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4887 {
4888         cs = gen12_emit_ggtt_write_rcs(cs,
4889                                        request->fence.seqno,
4890                                        i915_request_active_timeline(request)->hwsp_offset,
4891                                        PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
4892                                        PIPE_CONTROL_CS_STALL |
4893                                        PIPE_CONTROL_TILE_CACHE_FLUSH |
4894                                        PIPE_CONTROL_FLUSH_L3 |
4895                                        PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4896                                        PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4897                                        /* Wa_1409600907:tgl */
4898                                        PIPE_CONTROL_DEPTH_STALL |
4899                                        PIPE_CONTROL_DC_FLUSH_ENABLE |
4900                                        PIPE_CONTROL_FLUSH_ENABLE);
4901
4902         return gen12_emit_fini_breadcrumb_tail(request, cs);
4903 }
4904
4905 static void execlists_park(struct intel_engine_cs *engine)
4906 {
4907         cancel_timer(&engine->execlists.timer);
4908         cancel_timer(&engine->execlists.preempt);
4909 }
4910
4911 void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
4912 {
4913         engine->submit_request = execlists_submit_request;
4914         engine->schedule = i915_schedule;
4915         engine->execlists.tasklet.func = execlists_submission_tasklet;
4916
4917         engine->reset.prepare = execlists_reset_prepare;
4918         engine->reset.rewind = execlists_reset_rewind;
4919         engine->reset.cancel = execlists_reset_cancel;
4920         engine->reset.finish = execlists_reset_finish;
4921
4922         engine->park = execlists_park;
4923         engine->unpark = NULL;
4924
4925         engine->flags |= I915_ENGINE_SUPPORTS_STATS;
4926         if (!intel_vgpu_active(engine->i915)) {
4927                 engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
4928                 if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) {
4929                         engine->flags |= I915_ENGINE_HAS_PREEMPTION;
4930                         if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
4931                                 engine->flags |= I915_ENGINE_HAS_TIMESLICES;
4932                 }
4933         }
4934
4935         if (INTEL_GEN(engine->i915) >= 12)
4936                 engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
4937
4938         if (intel_engine_has_preemption(engine))
4939                 engine->emit_bb_start = gen8_emit_bb_start;
4940         else
4941                 engine->emit_bb_start = gen8_emit_bb_start_noarb;
4942 }
4943
4944 static void execlists_shutdown(struct intel_engine_cs *engine)
4945 {
4946         /* Synchronise with residual timers and any softirq they raise */
4947         del_timer_sync(&engine->execlists.timer);
4948         del_timer_sync(&engine->execlists.preempt);
4949         tasklet_kill(&engine->execlists.tasklet);
4950 }
4951
4952 static void execlists_release(struct intel_engine_cs *engine)
4953 {
4954         engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
4955
4956         execlists_shutdown(engine);
4957
4958         intel_engine_cleanup_common(engine);
4959         lrc_destroy_wa_ctx(engine);
4960 }
4961
4962 static void
4963 logical_ring_default_vfuncs(struct intel_engine_cs *engine)
4964 {
4965         /* Default vfuncs which can be overriden by each engine. */
4966
4967         engine->resume = execlists_resume;
4968
4969         engine->cops = &execlists_context_ops;
4970         engine->request_alloc = execlists_request_alloc;
4971
4972         engine->emit_flush = gen8_emit_flush;
4973         engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
4974         engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
4975         if (INTEL_GEN(engine->i915) >= 12) {
4976                 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
4977                 engine->emit_flush = gen12_emit_flush;
4978         }
4979         engine->set_default_submission = intel_execlists_set_default_submission;
4980
4981         if (INTEL_GEN(engine->i915) < 11) {
4982                 engine->irq_enable = gen8_logical_ring_enable_irq;
4983                 engine->irq_disable = gen8_logical_ring_disable_irq;
4984         } else {
4985                 /*
4986                  * TODO: On Gen11 interrupt masks need to be clear
4987                  * to allow C6 entry. Keep interrupts enabled at
4988                  * and take the hit of generating extra interrupts
4989                  * until a more refined solution exists.
4990                  */
4991         }
4992 }
4993
4994 static inline void
4995 logical_ring_default_irqs(struct intel_engine_cs *engine)
4996 {
4997         unsigned int shift = 0;
4998
4999         if (INTEL_GEN(engine->i915) < 11) {
5000                 const u8 irq_shifts[] = {
5001                         [RCS0]  = GEN8_RCS_IRQ_SHIFT,
5002                         [BCS0]  = GEN8_BCS_IRQ_SHIFT,
5003                         [VCS0]  = GEN8_VCS0_IRQ_SHIFT,
5004                         [VCS1]  = GEN8_VCS1_IRQ_SHIFT,
5005                         [VECS0] = GEN8_VECS_IRQ_SHIFT,
5006                 };
5007
5008                 shift = irq_shifts[engine->id];
5009         }
5010
5011         engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
5012         engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
5013         engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift;
5014         engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift;
5015 }
5016
5017 static void rcs_submission_override(struct intel_engine_cs *engine)
5018 {
5019         switch (INTEL_GEN(engine->i915)) {
5020         case 12:
5021                 engine->emit_flush = gen12_emit_flush_render;
5022                 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
5023                 break;
5024         case 11:
5025                 engine->emit_flush = gen11_emit_flush_render;
5026                 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
5027                 break;
5028         default:
5029                 engine->emit_flush = gen8_emit_flush_render;
5030                 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
5031                 break;
5032         }
5033 }
5034
5035 int intel_execlists_submission_setup(struct intel_engine_cs *engine)
5036 {
5037         struct intel_engine_execlists * const execlists = &engine->execlists;
5038         struct drm_i915_private *i915 = engine->i915;
5039         struct intel_uncore *uncore = engine->uncore;
5040         u32 base = engine->mmio_base;
5041
5042         tasklet_init(&engine->execlists.tasklet,
5043                      execlists_submission_tasklet, (unsigned long)engine);
5044         timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
5045         timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
5046
5047         logical_ring_default_vfuncs(engine);
5048         logical_ring_default_irqs(engine);
5049
5050         if (engine->class == RENDER_CLASS)
5051                 rcs_submission_override(engine);
5052
5053         if (intel_init_workaround_bb(engine))
5054                 /*
5055                  * We continue even if we fail to initialize WA batch
5056                  * because we only expect rare glitches but nothing
5057                  * critical to prevent us from using GPU
5058                  */
5059                 drm_err(&i915->drm, "WA batch buffer initialization failed\n");
5060
5061         if (HAS_LOGICAL_RING_ELSQ(i915)) {
5062                 execlists->submit_reg = uncore->regs +
5063                         i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base));
5064                 execlists->ctrl_reg = uncore->regs +
5065                         i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base));
5066         } else {
5067                 execlists->submit_reg = uncore->regs +
5068                         i915_mmio_reg_offset(RING_ELSP(base));
5069         }
5070
5071         execlists->csb_status =
5072                 &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
5073
5074         execlists->csb_write =
5075                 &engine->status_page.addr[intel_hws_csb_write_index(i915)];
5076
5077         if (INTEL_GEN(i915) < 11)
5078                 execlists->csb_size = GEN8_CSB_ENTRIES;
5079         else
5080                 execlists->csb_size = GEN11_CSB_ENTRIES;
5081
5082         if (INTEL_GEN(engine->i915) >= 11) {
5083                 execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32);
5084                 execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32);
5085         }
5086
5087         /* Finally, take ownership and responsibility for cleanup! */
5088         engine->sanitize = execlists_sanitize;
5089         engine->release = execlists_release;
5090
5091         return 0;
5092 }
5093
5094 static void init_common_reg_state(u32 * const regs,
5095                                   const struct intel_engine_cs *engine,
5096                                   const struct intel_ring *ring,
5097                                   bool inhibit)
5098 {
5099         u32 ctl;
5100
5101         ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
5102         ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
5103         if (inhibit)
5104                 ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
5105         if (INTEL_GEN(engine->i915) < 11)
5106                 ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
5107                                            CTX_CTRL_RS_CTX_ENABLE);
5108         regs[CTX_CONTEXT_CONTROL] = ctl;
5109
5110         regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
5111         regs[CTX_TIMESTAMP] = 0;
5112 }
5113
5114 static void init_wa_bb_reg_state(u32 * const regs,
5115                                  const struct intel_engine_cs *engine)
5116 {
5117         const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
5118
5119         if (wa_ctx->per_ctx.size) {
5120                 const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
5121
5122                 GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
5123                 regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
5124                         (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
5125         }
5126
5127         if (wa_ctx->indirect_ctx.size) {
5128                 lrc_ring_setup_indirect_ctx(regs, engine,
5129                                             i915_ggtt_offset(wa_ctx->vma) +
5130                                             wa_ctx->indirect_ctx.offset,
5131                                             wa_ctx->indirect_ctx.size);
5132         }
5133 }
5134
5135 static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt)
5136 {
5137         if (i915_vm_is_4lvl(&ppgtt->vm)) {
5138                 /* 64b PPGTT (48bit canonical)
5139                  * PDP0_DESCRIPTOR contains the base address to PML4 and
5140                  * other PDP Descriptors are ignored.
5141                  */
5142                 ASSIGN_CTX_PML4(ppgtt, regs);
5143         } else {
5144                 ASSIGN_CTX_PDP(ppgtt, regs, 3);
5145                 ASSIGN_CTX_PDP(ppgtt, regs, 2);
5146                 ASSIGN_CTX_PDP(ppgtt, regs, 1);
5147                 ASSIGN_CTX_PDP(ppgtt, regs, 0);
5148         }
5149 }
5150
5151 static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
5152 {
5153         if (i915_is_ggtt(vm))
5154                 return i915_vm_to_ggtt(vm)->alias;
5155         else
5156                 return i915_vm_to_ppgtt(vm);
5157 }
5158
5159 static void execlists_init_reg_state(u32 *regs,
5160                                      const struct intel_context *ce,
5161                                      const struct intel_engine_cs *engine,
5162                                      const struct intel_ring *ring,
5163                                      bool inhibit)
5164 {
5165         /*
5166          * A context is actually a big batch buffer with several
5167          * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
5168          * values we are setting here are only for the first context restore:
5169          * on a subsequent save, the GPU will recreate this batchbuffer with new
5170          * values (including all the missing MI_LOAD_REGISTER_IMM commands that
5171          * we are not initializing here).
5172          *
5173          * Must keep consistent with virtual_update_register_offsets().
5174          */
5175         set_offsets(regs, reg_offsets(engine), engine, inhibit);
5176
5177         init_common_reg_state(regs, engine, ring, inhibit);
5178         init_ppgtt_reg_state(regs, vm_alias(ce->vm));
5179
5180         init_wa_bb_reg_state(regs, engine);
5181
5182         __reset_stop_ring(regs, engine);
5183 }
5184
5185 static int
5186 populate_lr_context(struct intel_context *ce,
5187                     struct drm_i915_gem_object *ctx_obj,
5188                     struct intel_engine_cs *engine,
5189                     struct intel_ring *ring)
5190 {
5191         bool inhibit = true;
5192         void *vaddr;
5193
5194         vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
5195         if (IS_ERR(vaddr)) {
5196                 drm_dbg(&engine->i915->drm, "Could not map object pages!\n");
5197                 return PTR_ERR(vaddr);
5198         }
5199
5200         set_redzone(vaddr, engine);
5201
5202         if (engine->default_state) {
5203                 shmem_read(engine->default_state, 0,
5204                            vaddr, engine->context_size);
5205                 __set_bit(CONTEXT_VALID_BIT, &ce->flags);
5206                 inhibit = false;
5207         }
5208
5209         /* Clear the ppHWSP (inc. per-context counters) */
5210         memset(vaddr, 0, PAGE_SIZE);
5211
5212         /*
5213          * The second page of the context object contains some registers which
5214          * must be set up prior to the first execution.
5215          */
5216         execlists_init_reg_state(vaddr + LRC_STATE_OFFSET,
5217                                  ce, engine, ring, inhibit);
5218
5219         __i915_gem_object_flush_map(ctx_obj, 0, engine->context_size);
5220         i915_gem_object_unpin_map(ctx_obj);
5221         return 0;
5222 }
5223
5224 static int __execlists_context_alloc(struct intel_context *ce,
5225                                      struct intel_engine_cs *engine)
5226 {
5227         struct drm_i915_gem_object *ctx_obj;
5228         struct intel_ring *ring;
5229         struct i915_vma *vma;
5230         u32 context_size;
5231         int ret;
5232
5233         GEM_BUG_ON(ce->state);
5234         context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
5235
5236         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
5237                 context_size += I915_GTT_PAGE_SIZE; /* for redzone */
5238
5239         if (INTEL_GEN(engine->i915) == 12) {
5240                 ce->wa_bb_page = context_size / PAGE_SIZE;
5241                 context_size += PAGE_SIZE;
5242         }
5243
5244         ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
5245         if (IS_ERR(ctx_obj))
5246                 return PTR_ERR(ctx_obj);
5247
5248         vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL);
5249         if (IS_ERR(vma)) {
5250                 ret = PTR_ERR(vma);
5251                 goto error_deref_obj;
5252         }
5253
5254         if (!ce->timeline) {
5255                 struct intel_timeline *tl;
5256                 struct i915_vma *hwsp;
5257
5258                 /*
5259                  * Use the static global HWSP for the kernel context, and
5260                  * a dynamically allocated cacheline for everyone else.
5261                  */
5262                 hwsp = NULL;
5263                 if (unlikely(intel_context_is_barrier(ce)))
5264                         hwsp = engine->status_page.vma;
5265
5266                 tl = intel_timeline_create(engine->gt, hwsp);
5267                 if (IS_ERR(tl)) {
5268                         ret = PTR_ERR(tl);
5269                         goto error_deref_obj;
5270                 }
5271
5272                 ce->timeline = tl;
5273         }
5274
5275         ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
5276         if (IS_ERR(ring)) {
5277                 ret = PTR_ERR(ring);
5278                 goto error_deref_obj;
5279         }
5280
5281         ret = populate_lr_context(ce, ctx_obj, engine, ring);
5282         if (ret) {
5283                 drm_dbg(&engine->i915->drm,
5284                         "Failed to populate LRC: %d\n", ret);
5285                 goto error_ring_free;
5286         }
5287
5288         ce->ring = ring;
5289         ce->state = vma;
5290
5291         return 0;
5292
5293 error_ring_free:
5294         intel_ring_put(ring);
5295 error_deref_obj:
5296         i915_gem_object_put(ctx_obj);
5297         return ret;
5298 }
5299
5300 static struct list_head *virtual_queue(struct virtual_engine *ve)
5301 {
5302         return &ve->base.execlists.default_priolist.requests[0];
5303 }
5304
5305 static void virtual_context_destroy(struct kref *kref)
5306 {
5307         struct virtual_engine *ve =
5308                 container_of(kref, typeof(*ve), context.ref);
5309         unsigned int n;
5310
5311         GEM_BUG_ON(!list_empty(virtual_queue(ve)));
5312         GEM_BUG_ON(ve->request);
5313         GEM_BUG_ON(ve->context.inflight);
5314
5315         for (n = 0; n < ve->num_siblings; n++) {
5316                 struct intel_engine_cs *sibling = ve->siblings[n];
5317                 struct rb_node *node = &ve->nodes[sibling->id].rb;
5318                 unsigned long flags;
5319
5320                 if (RB_EMPTY_NODE(node))
5321                         continue;
5322
5323                 spin_lock_irqsave(&sibling->active.lock, flags);
5324
5325                 /* Detachment is lazily performed in the execlists tasklet */
5326                 if (!RB_EMPTY_NODE(node))
5327                         rb_erase_cached(node, &sibling->execlists.virtual);
5328
5329                 spin_unlock_irqrestore(&sibling->active.lock, flags);
5330         }
5331         GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
5332
5333         if (ve->context.state)
5334                 __execlists_context_fini(&ve->context);
5335         intel_context_fini(&ve->context);
5336
5337         intel_engine_free_request_pool(&ve->base);
5338
5339         kfree(ve->bonds);
5340         kfree(ve);
5341 }
5342
5343 static void virtual_engine_initial_hint(struct virtual_engine *ve)
5344 {
5345         int swp;
5346
5347         /*
5348          * Pick a random sibling on starting to help spread the load around.
5349          *
5350          * New contexts are typically created with exactly the same order
5351          * of siblings, and often started in batches. Due to the way we iterate
5352          * the array of sibling when submitting requests, sibling[0] is
5353          * prioritised for dequeuing. If we make sure that sibling[0] is fairly
5354          * randomised across the system, we also help spread the load by the
5355          * first engine we inspect being different each time.
5356          *
5357          * NB This does not force us to execute on this engine, it will just
5358          * typically be the first we inspect for submission.
5359          */
5360         swp = prandom_u32_max(ve->num_siblings);
5361         if (!swp)
5362                 return;
5363
5364         swap(ve->siblings[swp], ve->siblings[0]);
5365         if (!intel_engine_has_relative_mmio(ve->siblings[0]))
5366                 virtual_update_register_offsets(ve->context.lrc_reg_state,
5367                                                 ve->siblings[0]);
5368 }
5369
5370 static int virtual_context_alloc(struct intel_context *ce)
5371 {
5372         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5373
5374         return __execlists_context_alloc(ce, ve->siblings[0]);
5375 }
5376
5377 static int virtual_context_pin(struct intel_context *ce)
5378 {
5379         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5380         int err;
5381
5382         /* Note: we must use a real engine class for setting up reg state */
5383         err = __execlists_context_pin(ce, ve->siblings[0]);
5384         if (err)
5385                 return err;
5386
5387         virtual_engine_initial_hint(ve);
5388         return 0;
5389 }
5390
5391 static void virtual_context_enter(struct intel_context *ce)
5392 {
5393         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5394         unsigned int n;
5395
5396         for (n = 0; n < ve->num_siblings; n++)
5397                 intel_engine_pm_get(ve->siblings[n]);
5398
5399         intel_timeline_enter(ce->timeline);
5400 }
5401
5402 static void virtual_context_exit(struct intel_context *ce)
5403 {
5404         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5405         unsigned int n;
5406
5407         intel_timeline_exit(ce->timeline);
5408
5409         for (n = 0; n < ve->num_siblings; n++)
5410                 intel_engine_pm_put(ve->siblings[n]);
5411 }
5412
5413 static const struct intel_context_ops virtual_context_ops = {
5414         .alloc = virtual_context_alloc,
5415
5416         .pin = virtual_context_pin,
5417         .unpin = execlists_context_unpin,
5418
5419         .enter = virtual_context_enter,
5420         .exit = virtual_context_exit,
5421
5422         .destroy = virtual_context_destroy,
5423 };
5424
5425 static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
5426 {
5427         struct i915_request *rq;
5428         intel_engine_mask_t mask;
5429
5430         rq = READ_ONCE(ve->request);
5431         if (!rq)
5432                 return 0;
5433
5434         /* The rq is ready for submission; rq->execution_mask is now stable. */
5435         mask = rq->execution_mask;
5436         if (unlikely(!mask)) {
5437                 /* Invalid selection, submit to a random engine in error */
5438                 i915_request_set_error_once(rq, -ENODEV);
5439                 mask = ve->siblings[0]->mask;
5440         }
5441
5442         ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n",
5443                      rq->fence.context, rq->fence.seqno,
5444                      mask, ve->base.execlists.queue_priority_hint);
5445
5446         return mask;
5447 }
5448
5449 static void virtual_submission_tasklet(unsigned long data)
5450 {
5451         struct virtual_engine * const ve = (struct virtual_engine *)data;
5452         const int prio = READ_ONCE(ve->base.execlists.queue_priority_hint);
5453         intel_engine_mask_t mask;
5454         unsigned int n;
5455
5456         rcu_read_lock();
5457         mask = virtual_submission_mask(ve);
5458         rcu_read_unlock();
5459         if (unlikely(!mask))
5460                 return;
5461
5462         local_irq_disable();
5463         for (n = 0; n < ve->num_siblings; n++) {
5464                 struct intel_engine_cs *sibling = READ_ONCE(ve->siblings[n]);
5465                 struct ve_node * const node = &ve->nodes[sibling->id];
5466                 struct rb_node **parent, *rb;
5467                 bool first;
5468
5469                 if (!READ_ONCE(ve->request))
5470                         break; /* already handled by a sibling's tasklet */
5471
5472                 if (unlikely(!(mask & sibling->mask))) {
5473                         if (!RB_EMPTY_NODE(&node->rb)) {
5474                                 spin_lock(&sibling->active.lock);
5475                                 rb_erase_cached(&node->rb,
5476                                                 &sibling->execlists.virtual);
5477                                 RB_CLEAR_NODE(&node->rb);
5478                                 spin_unlock(&sibling->active.lock);
5479                         }
5480                         continue;
5481                 }
5482
5483                 spin_lock(&sibling->active.lock);
5484
5485                 if (!RB_EMPTY_NODE(&node->rb)) {
5486                         /*
5487                          * Cheat and avoid rebalancing the tree if we can
5488                          * reuse this node in situ.
5489                          */
5490                         first = rb_first_cached(&sibling->execlists.virtual) ==
5491                                 &node->rb;
5492                         if (prio == node->prio || (prio > node->prio && first))
5493                                 goto submit_engine;
5494
5495                         rb_erase_cached(&node->rb, &sibling->execlists.virtual);
5496                 }
5497
5498                 rb = NULL;
5499                 first = true;
5500                 parent = &sibling->execlists.virtual.rb_root.rb_node;
5501                 while (*parent) {
5502                         struct ve_node *other;
5503
5504                         rb = *parent;
5505                         other = rb_entry(rb, typeof(*other), rb);
5506                         if (prio > other->prio) {
5507                                 parent = &rb->rb_left;
5508                         } else {
5509                                 parent = &rb->rb_right;
5510                                 first = false;
5511                         }
5512                 }
5513
5514                 rb_link_node(&node->rb, rb, parent);
5515                 rb_insert_color_cached(&node->rb,
5516                                        &sibling->execlists.virtual,
5517                                        first);
5518
5519 submit_engine:
5520                 GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
5521                 node->prio = prio;
5522                 if (first && prio > sibling->execlists.queue_priority_hint)
5523                         tasklet_hi_schedule(&sibling->execlists.tasklet);
5524
5525                 spin_unlock(&sibling->active.lock);
5526         }
5527         local_irq_enable();
5528 }
5529
5530 static void virtual_submit_request(struct i915_request *rq)
5531 {
5532         struct virtual_engine *ve = to_virtual_engine(rq->engine);
5533         struct i915_request *old;
5534         unsigned long flags;
5535
5536         ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n",
5537                      rq->fence.context,
5538                      rq->fence.seqno);
5539
5540         GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
5541
5542         spin_lock_irqsave(&ve->base.active.lock, flags);
5543
5544         old = ve->request;
5545         if (old) { /* background completion event from preempt-to-busy */
5546                 GEM_BUG_ON(!i915_request_completed(old));
5547                 __i915_request_submit(old);
5548                 i915_request_put(old);
5549         }
5550
5551         if (i915_request_completed(rq)) {
5552                 __i915_request_submit(rq);
5553
5554                 ve->base.execlists.queue_priority_hint = INT_MIN;
5555                 ve->request = NULL;
5556         } else {
5557                 ve->base.execlists.queue_priority_hint = rq_prio(rq);
5558                 ve->request = i915_request_get(rq);
5559
5560                 GEM_BUG_ON(!list_empty(virtual_queue(ve)));
5561                 list_move_tail(&rq->sched.link, virtual_queue(ve));
5562
5563                 tasklet_hi_schedule(&ve->base.execlists.tasklet);
5564         }
5565
5566         spin_unlock_irqrestore(&ve->base.active.lock, flags);
5567 }
5568
5569 static struct ve_bond *
5570 virtual_find_bond(struct virtual_engine *ve,
5571                   const struct intel_engine_cs *master)
5572 {
5573         int i;
5574
5575         for (i = 0; i < ve->num_bonds; i++) {
5576                 if (ve->bonds[i].master == master)
5577                         return &ve->bonds[i];
5578         }
5579
5580         return NULL;
5581 }
5582
5583 static void
5584 virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
5585 {
5586         struct virtual_engine *ve = to_virtual_engine(rq->engine);
5587         intel_engine_mask_t allowed, exec;
5588         struct ve_bond *bond;
5589
5590         allowed = ~to_request(signal)->engine->mask;
5591
5592         bond = virtual_find_bond(ve, to_request(signal)->engine);
5593         if (bond)
5594                 allowed &= bond->sibling_mask;
5595
5596         /* Restrict the bonded request to run on only the available engines */
5597         exec = READ_ONCE(rq->execution_mask);
5598         while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed))
5599                 ;
5600
5601         /* Prevent the master from being re-run on the bonded engines */
5602         to_request(signal)->execution_mask &= ~allowed;
5603 }
5604
5605 struct intel_context *
5606 intel_execlists_create_virtual(struct intel_engine_cs **siblings,
5607                                unsigned int count)
5608 {
5609         struct virtual_engine *ve;
5610         unsigned int n;
5611         int err;
5612
5613         if (count == 0)
5614                 return ERR_PTR(-EINVAL);
5615
5616         if (count == 1)
5617                 return intel_context_create(siblings[0]);
5618
5619         ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
5620         if (!ve)
5621                 return ERR_PTR(-ENOMEM);
5622
5623         ve->base.i915 = siblings[0]->i915;
5624         ve->base.gt = siblings[0]->gt;
5625         ve->base.uncore = siblings[0]->uncore;
5626         ve->base.id = -1;
5627
5628         ve->base.class = OTHER_CLASS;
5629         ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
5630         ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5631         ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5632
5633         /*
5634          * The decision on whether to submit a request using semaphores
5635          * depends on the saturated state of the engine. We only compute
5636          * this during HW submission of the request, and we need for this
5637          * state to be globally applied to all requests being submitted
5638          * to this engine. Virtual engines encompass more than one physical
5639          * engine and so we cannot accurately tell in advance if one of those
5640          * engines is already saturated and so cannot afford to use a semaphore
5641          * and be pessimized in priority for doing so -- if we are the only
5642          * context using semaphores after all other clients have stopped, we
5643          * will be starved on the saturated system. Such a global switch for
5644          * semaphores is less than ideal, but alas is the current compromise.
5645          */
5646         ve->base.saturated = ALL_ENGINES;
5647
5648         snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
5649
5650         intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
5651         intel_engine_init_breadcrumbs(&ve->base);
5652         intel_engine_init_execlists(&ve->base);
5653
5654         ve->base.cops = &virtual_context_ops;
5655         ve->base.request_alloc = execlists_request_alloc;
5656
5657         ve->base.schedule = i915_schedule;
5658         ve->base.submit_request = virtual_submit_request;
5659         ve->base.bond_execute = virtual_bond_execute;
5660
5661         INIT_LIST_HEAD(virtual_queue(ve));
5662         ve->base.execlists.queue_priority_hint = INT_MIN;
5663         tasklet_init(&ve->base.execlists.tasklet,
5664                      virtual_submission_tasklet,
5665                      (unsigned long)ve);
5666
5667         intel_context_init(&ve->context, &ve->base);
5668
5669         for (n = 0; n < count; n++) {
5670                 struct intel_engine_cs *sibling = siblings[n];
5671
5672                 GEM_BUG_ON(!is_power_of_2(sibling->mask));
5673                 if (sibling->mask & ve->base.mask) {
5674                         DRM_DEBUG("duplicate %s entry in load balancer\n",
5675                                   sibling->name);
5676                         err = -EINVAL;
5677                         goto err_put;
5678                 }
5679
5680                 /*
5681                  * The virtual engine implementation is tightly coupled to
5682                  * the execlists backend -- we push out request directly
5683                  * into a tree inside each physical engine. We could support
5684                  * layering if we handle cloning of the requests and
5685                  * submitting a copy into each backend.
5686                  */
5687                 if (sibling->execlists.tasklet.func !=
5688                     execlists_submission_tasklet) {
5689                         err = -ENODEV;
5690                         goto err_put;
5691                 }
5692
5693                 GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb));
5694                 RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
5695
5696                 ve->siblings[ve->num_siblings++] = sibling;
5697                 ve->base.mask |= sibling->mask;
5698
5699                 /*
5700                  * All physical engines must be compatible for their emission
5701                  * functions (as we build the instructions during request
5702                  * construction and do not alter them before submission
5703                  * on the physical engine). We use the engine class as a guide
5704                  * here, although that could be refined.
5705                  */
5706                 if (ve->base.class != OTHER_CLASS) {
5707                         if (ve->base.class != sibling->class) {
5708                                 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
5709                                           sibling->class, ve->base.class);
5710                                 err = -EINVAL;
5711                                 goto err_put;
5712                         }
5713                         continue;
5714                 }
5715
5716                 ve->base.class = sibling->class;
5717                 ve->base.uabi_class = sibling->uabi_class;
5718                 snprintf(ve->base.name, sizeof(ve->base.name),
5719                          "v%dx%d", ve->base.class, count);
5720                 ve->base.context_size = sibling->context_size;
5721
5722                 ve->base.emit_bb_start = sibling->emit_bb_start;
5723                 ve->base.emit_flush = sibling->emit_flush;
5724                 ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
5725                 ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
5726                 ve->base.emit_fini_breadcrumb_dw =
5727                         sibling->emit_fini_breadcrumb_dw;
5728
5729                 ve->base.flags = sibling->flags;
5730         }
5731
5732         ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
5733
5734         return &ve->context;
5735
5736 err_put:
5737         intel_context_put(&ve->context);
5738         return ERR_PTR(err);
5739 }
5740
5741 struct intel_context *
5742 intel_execlists_clone_virtual(struct intel_engine_cs *src)
5743 {
5744         struct virtual_engine *se = to_virtual_engine(src);
5745         struct intel_context *dst;
5746
5747         dst = intel_execlists_create_virtual(se->siblings,
5748                                              se->num_siblings);
5749         if (IS_ERR(dst))
5750                 return dst;
5751
5752         if (se->num_bonds) {
5753                 struct virtual_engine *de = to_virtual_engine(dst->engine);
5754
5755                 de->bonds = kmemdup(se->bonds,
5756                                     sizeof(*se->bonds) * se->num_bonds,
5757                                     GFP_KERNEL);
5758                 if (!de->bonds) {
5759                         intel_context_put(dst);
5760                         return ERR_PTR(-ENOMEM);
5761                 }
5762
5763                 de->num_bonds = se->num_bonds;
5764         }
5765
5766         return dst;
5767 }
5768
5769 int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
5770                                      const struct intel_engine_cs *master,
5771                                      const struct intel_engine_cs *sibling)
5772 {
5773         struct virtual_engine *ve = to_virtual_engine(engine);
5774         struct ve_bond *bond;
5775         int n;
5776
5777         /* Sanity check the sibling is part of the virtual engine */
5778         for (n = 0; n < ve->num_siblings; n++)
5779                 if (sibling == ve->siblings[n])
5780                         break;
5781         if (n == ve->num_siblings)
5782                 return -EINVAL;
5783
5784         bond = virtual_find_bond(ve, master);
5785         if (bond) {
5786                 bond->sibling_mask |= sibling->mask;
5787                 return 0;
5788         }
5789
5790         bond = krealloc(ve->bonds,
5791                         sizeof(*bond) * (ve->num_bonds + 1),
5792                         GFP_KERNEL);
5793         if (!bond)
5794                 return -ENOMEM;
5795
5796         bond[ve->num_bonds].master = master;
5797         bond[ve->num_bonds].sibling_mask = sibling->mask;
5798
5799         ve->bonds = bond;
5800         ve->num_bonds++;
5801
5802         return 0;
5803 }
5804
5805 struct intel_engine_cs *
5806 intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
5807                                  unsigned int sibling)
5808 {
5809         struct virtual_engine *ve = to_virtual_engine(engine);
5810
5811         if (sibling >= ve->num_siblings)
5812                 return NULL;
5813
5814         return ve->siblings[sibling];
5815 }
5816
5817 void intel_execlists_show_requests(struct intel_engine_cs *engine,
5818                                    struct drm_printer *m,
5819                                    void (*show_request)(struct drm_printer *m,
5820                                                         struct i915_request *rq,
5821                                                         const char *prefix),
5822                                    unsigned int max)
5823 {
5824         const struct intel_engine_execlists *execlists = &engine->execlists;
5825         struct i915_request *rq, *last;
5826         unsigned long flags;
5827         unsigned int count;
5828         struct rb_node *rb;
5829
5830         spin_lock_irqsave(&engine->active.lock, flags);
5831
5832         last = NULL;
5833         count = 0;
5834         list_for_each_entry(rq, &engine->active.requests, sched.link) {
5835                 if (count++ < max - 1)
5836                         show_request(m, rq, "\t\tE ");
5837                 else
5838                         last = rq;
5839         }
5840         if (last) {
5841                 if (count > max) {
5842                         drm_printf(m,
5843                                    "\t\t...skipping %d executing requests...\n",
5844                                    count - max);
5845                 }
5846                 show_request(m, last, "\t\tE ");
5847         }
5848
5849         if (execlists->switch_priority_hint != INT_MIN)
5850                 drm_printf(m, "\t\tSwitch priority hint: %d\n",
5851                            READ_ONCE(execlists->switch_priority_hint));
5852         if (execlists->queue_priority_hint != INT_MIN)
5853                 drm_printf(m, "\t\tQueue priority hint: %d\n",
5854                            READ_ONCE(execlists->queue_priority_hint));
5855
5856         last = NULL;
5857         count = 0;
5858         for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
5859                 struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
5860                 int i;
5861
5862                 priolist_for_each_request(rq, p, i) {
5863                         if (count++ < max - 1)
5864                                 show_request(m, rq, "\t\tQ ");
5865                         else
5866                                 last = rq;
5867                 }
5868         }
5869         if (last) {
5870                 if (count > max) {
5871                         drm_printf(m,
5872                                    "\t\t...skipping %d queued requests...\n",
5873                                    count - max);
5874                 }
5875                 show_request(m, last, "\t\tQ ");
5876         }
5877
5878         last = NULL;
5879         count = 0;
5880         for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) {
5881                 struct virtual_engine *ve =
5882                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
5883                 struct i915_request *rq = READ_ONCE(ve->request);
5884
5885                 if (rq) {
5886                         if (count++ < max - 1)
5887                                 show_request(m, rq, "\t\tV ");
5888                         else
5889                                 last = rq;
5890                 }
5891         }
5892         if (last) {
5893                 if (count > max) {
5894                         drm_printf(m,
5895                                    "\t\t...skipping %d virtual requests...\n",
5896                                    count - max);
5897                 }
5898                 show_request(m, last, "\t\tV ");
5899         }
5900
5901         spin_unlock_irqrestore(&engine->active.lock, flags);
5902 }
5903
5904 void intel_lr_context_reset(struct intel_engine_cs *engine,
5905                             struct intel_context *ce,
5906                             u32 head,
5907                             bool scrub)
5908 {
5909         GEM_BUG_ON(!intel_context_is_pinned(ce));
5910
5911         /*
5912          * We want a simple context + ring to execute the breadcrumb update.
5913          * We cannot rely on the context being intact across the GPU hang,
5914          * so clear it and rebuild just what we need for the breadcrumb.
5915          * All pending requests for this context will be zapped, and any
5916          * future request will be after userspace has had the opportunity
5917          * to recreate its own state.
5918          */
5919         if (scrub)
5920                 restore_default_state(ce, engine);
5921
5922         /* Rerun the request; its payload has been neutered (if guilty). */
5923         __execlists_update_reg_state(ce, engine, head);
5924 }
5925
5926 bool
5927 intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine)
5928 {
5929         return engine->set_default_submission ==
5930                intel_execlists_set_default_submission;
5931 }
5932
5933 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
5934 #include "selftest_lrc.c"
5935 #endif