drivers/gpu/drm/i915/gt/intel_lrc.c

   1 /*
   2  * Copyright © 2014 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Ben Widawsky <ben@bwidawsk.net>
  25  *    Michel Thierry <michel.thierry@intel.com>
  26  *    Thomas Daniel <thomas.daniel@intel.com>
  27  *    Oscar Mateo <oscar.mateo@intel.com>
  28  *
  29  */
  30
  31 /**
  32  * DOC: Logical Rings, Logical Ring Contexts and Execlists
  33  *
  34  * Motivation:
  35  * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
  36  * These expanded contexts enable a number of new abilities, especially
  37  * "Execlists" (also implemented in this file).
  38  *
  39  * One of the main differences with the legacy HW contexts is that logical
  40  * ring contexts incorporate many more things to the context's state, like
  41  * PDPs or ringbuffer control registers:
  42  *
  43  * The reason why PDPs are included in the context is straightforward: as
  44  * PPGTTs (per-process GTTs) are actually per-context, having the PDPs
  45  * contained there mean you don't need to do a ppgtt->switch_mm yourself,
  46  * instead, the GPU will do it for you on the context switch.
  47  *
  48  * But, what about the ringbuffer control registers (head, tail, etc..)?
  49  * shouldn't we just need a set of those per engine command streamer? This is
  50  * where the name "Logical Rings" starts to make sense: by virtualizing the
  51  * rings, the engine cs shifts to a new "ring buffer" with every context
  52  * switch. When you want to submit a workload to the GPU you: A) choose your
  53  * context, B) find its appropriate virtualized ring, C) write commands to it
  54  * and then, finally, D) tell the GPU to switch to that context.
  55  *
  56  * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
  57  * to a contexts is via a context execution list, ergo "Execlists".
  58  *
  59  * LRC implementation:
  60  * Regarding the creation of contexts, we have:
  61  *
  62  * - One global default context.
  63  * - One local default context for each opened fd.
  64  * - One local extra context for each context create ioctl call.
  65  *
  66  * Now that ringbuffers belong per-context (and not per-engine, like before)
  67  * and that contexts are uniquely tied to a given engine (and not reusable,
  68  * like before) we need:
  69  *
  70  * - One ringbuffer per-engine inside each context.
  71  * - One backing object per-engine inside each context.
  72  *
  73  * The global default context starts its life with these new objects fully
  74  * allocated and populated. The local default context for each opened fd is
  75  * more complex, because we don't know at creation time which engine is going
  76  * to use them. To handle this, we have implemented a deferred creation of LR
  77  * contexts:
  78  *
  79  * The local context starts its life as a hollow or blank holder, that only
  80  * gets populated for a given engine once we receive an execbuffer. If later
  81  * on we receive another execbuffer ioctl for the same context but a different
  82  * engine, we allocate/populate a new ringbuffer and context backing object and
  83  * so on.
  84  *
  85  * Finally, regarding local contexts created using the ioctl call: as they are
  86  * only allowed with the render ring, we can allocate & populate them right
  87  * away (no need to defer anything, at least for now).
  88  *
  89  * Execlists implementation:
  90  * Execlists are the new method by which, on gen8+ hardware, workloads are
  91  * submitted for execution (as opposed to the legacy, ringbuffer-based, method).
  92  * This method works as follows:
  93  *
  94  * When a request is committed, its commands (the BB start and any leading or
  95  * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
  96  * for the appropriate context. The tail pointer in the hardware context is not
  97  * updated at this time, but instead, kept by the driver in the ringbuffer
  98  * structure. A structure representing this request is added to a request queue
  99  * for the appropriate engine: this structure contains a copy of the context's
 100  * tail after the request was written to the ring buffer and a pointer to the
 101  * context itself.
 102  *
 103  * If the engine's request queue was empty before the request was added, the
 104  * queue is processed immediately. Otherwise the queue will be processed during
 105  * a context switch interrupt. In any case, elements on the queue will get sent
 106  * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
 107  * globally unique 20-bits submission ID.
 108  *
 109  * When execution of a request completes, the GPU updates the context status
 110  * buffer with a context complete event and generates a context switch interrupt.
 111  * During the interrupt handling, the driver examines the events in the buffer:
 112  * for each context complete event, if the announced ID matches that on the head
 113  * of the request queue, then that request is retired and removed from the queue.
 114  *
 115  * After processing, if any requests were retired and the queue is not empty
 116  * then a new execution list can be submitted. The two requests at the front of
 117  * the queue are next to be submitted but since a context may not occur twice in
 118  * an execution list, if subsequent requests have the same ID as the first then
 119  * the two requests must be combined. This is done simply by discarding requests
 120  * at the head of the queue until either only one requests is left (in which case
 121  * we use a NULL second context) or the first two requests have unique IDs.
 122  *
 123  * By always executing the first two requests in the queue the driver ensures
 124  * that the GPU is kept as busy as possible. In the case where a single context
 125  * completes but a second context is still executing, the request for this second
 126  * context will be at the head of the queue when we remove the first one. This
 127  * request will then be resubmitted along with a new request for a different context,
 128  * which will cause the hardware to continue executing the second request and queue
 129  * the new request (the GPU detects the condition of a context getting preempted
 130  * with the same context and optimizes the context switch flow by not doing
 131  * preemption, but just sampling the new tail pointer).
 132  *
 133  */
 134 #include <linux/interrupt.h>
 135
 136 #include "i915_drv.h"
 137 #include "i915_perf.h"
 138 #include "i915_trace.h"
 139 #include "i915_vgpu.h"
 140 #include "intel_breadcrumbs.h"
 141 #include "intel_context.h"
 142 #include "intel_engine_pm.h"
 143 #include "intel_gt.h"
 144 #include "intel_gt_pm.h"
 145 #include "intel_gt_requests.h"
 146 #include "intel_lrc_reg.h"
 147 #include "intel_mocs.h"
 148 #include "intel_reset.h"
 149 #include "intel_ring.h"
 150 #include "intel_workarounds.h"
 151 #include "shmem_utils.h"
 152
 153 #define RING_EXECLIST_QFULL             (1 << 0x2)
 154 #define RING_EXECLIST1_VALID            (1 << 0x3)
 155 #define RING_EXECLIST0_VALID            (1 << 0x4)
 156 #define RING_EXECLIST_ACTIVE_STATUS     (3 << 0xE)
 157 #define RING_EXECLIST1_ACTIVE           (1 << 0x11)
 158 #define RING_EXECLIST0_ACTIVE           (1 << 0x12)
 159
 160 #define GEN8_CTX_STATUS_IDLE_ACTIVE     (1 << 0)
 161 #define GEN8_CTX_STATUS_PREEMPTED       (1 << 1)
 162 #define GEN8_CTX_STATUS_ELEMENT_SWITCH  (1 << 2)
 163 #define GEN8_CTX_STATUS_ACTIVE_IDLE     (1 << 3)
 164 #define GEN8_CTX_STATUS_COMPLETE        (1 << 4)
 165 #define GEN8_CTX_STATUS_LITE_RESTORE    (1 << 15)
 166
 167 #define GEN8_CTX_STATUS_COMPLETED_MASK \
 168          (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
 169
 170 #define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
 171
 172 #define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE  (0x1) /* lower csb dword */
 173 #define GEN12_CTX_SWITCH_DETAIL(csb_dw) ((csb_dw) & 0xF) /* upper csb dword */
 174 #define GEN12_CSB_SW_CTX_ID_MASK                GENMASK(25, 15)
 175 #define GEN12_IDLE_CTX_ID               0x7FF
 176 #define GEN12_CSB_CTX_VALID(csb_dw) \
 177         (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID)
 178
 179 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
 180 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
 181
 182 struct virtual_engine {
 183         struct intel_engine_cs base;
 184         struct intel_context context;
 185
 186         /*
 187          * We allow only a single request through the virtual engine at a time
 188          * (each request in the timeline waits for the completion fence of
 189          * the previous before being submitted). By restricting ourselves to
 190          * only submitting a single request, each request is placed on to a
 191          * physical to maximise load spreading (by virtue of the late greedy
 192          * scheduling -- each real engine takes the next available request
 193          * upon idling).
 194          */
 195         struct i915_request *request;
 196
 197         /*
 198          * We keep a rbtree of available virtual engines inside each physical
 199          * engine, sorted by priority. Here we preallocate the nodes we need
 200          * for the virtual engine, indexed by physical_engine->id.
 201          */
 202         struct ve_node {
 203                 struct rb_node rb;
 204                 int prio;
 205         } nodes[I915_NUM_ENGINES];
 206
 207         /*
 208          * Keep track of bonded pairs -- restrictions upon on our selection
 209          * of physical engines any particular request may be submitted to.
 210          * If we receive a submit-fence from a master engine, we will only
 211          * use one of sibling_mask physical engines.
 212          */
 213         struct ve_bond {
 214                 const struct intel_engine_cs *master;
 215                 intel_engine_mask_t sibling_mask;
 216         } *bonds;
 217         unsigned int num_bonds;
 218
 219         /* And finally, which physical engines this virtual engine maps onto. */
 220         unsigned int num_siblings;
 221         struct intel_engine_cs *siblings[];
 222 };
 223
 224 static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
 225 {
 226         GEM_BUG_ON(!intel_engine_is_virtual(engine));
 227         return container_of(engine, struct virtual_engine, base);
 228 }
 229
 230 static int __execlists_context_alloc(struct intel_context *ce,
 231                                      struct intel_engine_cs *engine);
 232
 233 static void execlists_init_reg_state(u32 *reg_state,
 234                                      const struct intel_context *ce,
 235                                      const struct intel_engine_cs *engine,
 236                                      const struct intel_ring *ring,
 237                                      bool close);
 238 static void
 239 __execlists_update_reg_state(const struct intel_context *ce,
 240                              const struct intel_engine_cs *engine,
 241                              u32 head);
 242
 243 static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
 244 {
 245         if (INTEL_GEN(engine->i915) >= 12)
 246                 return 0x60;
 247         else if (INTEL_GEN(engine->i915) >= 9)
 248                 return 0x54;
 249         else if (engine->class == RENDER_CLASS)
 250                 return 0x58;
 251         else
 252                 return -1;
 253 }
 254
 255 static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
 256 {
 257         if (INTEL_GEN(engine->i915) >= 12)
 258                 return 0x74;
 259         else if (INTEL_GEN(engine->i915) >= 9)
 260                 return 0x68;
 261         else if (engine->class == RENDER_CLASS)
 262                 return 0xd8;
 263         else
 264                 return -1;
 265 }
 266
 267 static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine)
 268 {
 269         if (INTEL_GEN(engine->i915) >= 12)
 270                 return 0x12;
 271         else if (INTEL_GEN(engine->i915) >= 9 || engine->class == RENDER_CLASS)
 272                 return 0x18;
 273         else
 274                 return -1;
 275 }
 276
 277 static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine)
 278 {
 279         int x;
 280
 281         x = lrc_ring_wa_bb_per_ctx(engine);
 282         if (x < 0)
 283                 return x;
 284
 285         return x + 2;
 286 }
 287
 288 static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
 289 {
 290         int x;
 291
 292         x = lrc_ring_indirect_ptr(engine);
 293         if (x < 0)
 294                 return x;
 295
 296         return x + 2;
 297 }
 298
 299 static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
 300 {
 301         if (engine->class != RENDER_CLASS)
 302                 return -1;
 303
 304         if (INTEL_GEN(engine->i915) >= 12)
 305                 return 0xb6;
 306         else if (INTEL_GEN(engine->i915) >= 11)
 307                 return 0xaa;
 308         else
 309                 return -1;
 310 }
 311
 312 static u32
 313 lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
 314 {
 315         switch (INTEL_GEN(engine->i915)) {
 316         default:
 317                 MISSING_CASE(INTEL_GEN(engine->i915));
 318                 fallthrough;
 319         case 12:
 320                 return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 321         case 11:
 322                 return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 323         case 10:
 324                 return GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 325         case 9:
 326                 return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 327         case 8:
 328                 return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 329         }
 330 }
 331
 332 static void
 333 lrc_ring_setup_indirect_ctx(u32 *regs,
 334                             const struct intel_engine_cs *engine,
 335                             u32 ctx_bb_ggtt_addr,
 336                             u32 size)
 337 {
 338         GEM_BUG_ON(!size);
 339         GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES));
 340         GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1);
 341         regs[lrc_ring_indirect_ptr(engine) + 1] =
 342                 ctx_bb_ggtt_addr | (size / CACHELINE_BYTES);
 343
 344         GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1);
 345         regs[lrc_ring_indirect_offset(engine) + 1] =
 346                 lrc_ring_indirect_offset_default(engine) << 6;
 347 }
 348
 349 static u32 intel_context_get_runtime(const struct intel_context *ce)
 350 {
 351         /*
 352          * We can use either ppHWSP[16] which is recorded before the context
 353          * switch (and so excludes the cost of context switches) or use the
 354          * value from the context image itself, which is saved/restored earlier
 355          * and so includes the cost of the save.
 356          */
 357         return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
 358 }
 359
 360 static void mark_eio(struct i915_request *rq)
 361 {
 362         if (i915_request_completed(rq))
 363                 return;
 364
 365         GEM_BUG_ON(i915_request_signaled(rq));
 366
 367         i915_request_set_error_once(rq, -EIO);
 368         i915_request_mark_complete(rq);
 369 }
 370
 371 static struct i915_request *
 372 active_request(const struct intel_timeline * const tl, struct i915_request *rq)
 373 {
 374         struct i915_request *active = rq;
 375
 376         rcu_read_lock();
 377         list_for_each_entry_continue_reverse(rq, &tl->requests, link) {
 378                 if (i915_request_completed(rq))
 379                         break;
 380
 381                 active = rq;
 382         }
 383         rcu_read_unlock();
 384
 385         return active;
 386 }
 387
 388 static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
 389 {
 390         return (i915_ggtt_offset(engine->status_page.vma) +
 391                 I915_GEM_HWS_PREEMPT_ADDR);
 392 }
 393
 394 static inline void
 395 ring_set_paused(const struct intel_engine_cs *engine, int state)
 396 {
 397         /*
 398          * We inspect HWS_PREEMPT with a semaphore inside
 399          * engine->emit_fini_breadcrumb. If the dword is true,
 400          * the ring is paused as the semaphore will busywait
 401          * until the dword is false.
 402          */
 403         engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state;
 404         if (state)
 405                 wmb();
 406 }
 407
 408 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 409 {
 410         return rb_entry(rb, struct i915_priolist, node);
 411 }
 412
 413 static inline int rq_prio(const struct i915_request *rq)
 414 {
 415         return READ_ONCE(rq->sched.attr.priority);
 416 }
 417
 418 static int effective_prio(const struct i915_request *rq)
 419 {
 420         int prio = rq_prio(rq);
 421
 422         /*
 423          * If this request is special and must not be interrupted at any
 424          * cost, so be it. Note we are only checking the most recent request
 425          * in the context and so may be masking an earlier vip request. It
 426          * is hoped that under the conditions where nopreempt is used, this
 427          * will not matter (i.e. all requests to that context will be
 428          * nopreempt for as long as desired).
 429          */
 430         if (i915_request_has_nopreempt(rq))
 431                 prio = I915_PRIORITY_UNPREEMPTABLE;
 432
 433         return prio;
 434 }
 435
 436 static int queue_prio(const struct intel_engine_execlists *execlists)
 437 {
 438         struct i915_priolist *p;
 439         struct rb_node *rb;
 440
 441         rb = rb_first_cached(&execlists->queue);
 442         if (!rb)
 443                 return INT_MIN;
 444
 445         /*
 446          * As the priolist[] are inverted, with the highest priority in [0],
 447          * we have to flip the index value to become priority.
 448          */
 449         p = to_priolist(rb);
 450         if (!I915_USER_PRIORITY_SHIFT)
 451                 return p->priority;
 452
 453         return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used);
 454 }
 455
 456 static inline bool need_preempt(const struct intel_engine_cs *engine,
 457                                 const struct i915_request *rq,
 458                                 struct rb_node *rb)
 459 {
 460         int last_prio;
 461
 462         if (!intel_engine_has_semaphores(engine))
 463                 return false;
 464
 465         /*
 466          * Check if the current priority hint merits a preemption attempt.
 467          *
 468          * We record the highest value priority we saw during rescheduling
 469          * prior to this dequeue, therefore we know that if it is strictly
 470          * less than the current tail of ESLP[0], we do not need to force
 471          * a preempt-to-idle cycle.
 472          *
 473          * However, the priority hint is a mere hint that we may need to
 474          * preempt. If that hint is stale or we may be trying to preempt
 475          * ourselves, ignore the request.
 476          *
 477          * More naturally we would write
 478          *      prio >= max(0, last);
 479          * except that we wish to prevent triggering preemption at the same
 480          * priority level: the task that is running should remain running
 481          * to preserve FIFO ordering of dependencies.
 482          */
 483         last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1);
 484         if (engine->execlists.queue_priority_hint <= last_prio)
 485                 return false;
 486
 487         /*
 488          * Check against the first request in ELSP[1], it will, thanks to the
 489          * power of PI, be the highest priority of that context.
 490          */
 491         if (!list_is_last(&rq->sched.link, &engine->active.requests) &&
 492             rq_prio(list_next_entry(rq, sched.link)) > last_prio)
 493                 return true;
 494
 495         if (rb) {
 496                 struct virtual_engine *ve =
 497                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
 498                 bool preempt = false;
 499
 500                 if (engine == ve->siblings[0]) { /* only preempt one sibling */
 501                         struct i915_request *next;
 502
 503                         rcu_read_lock();
 504                         next = READ_ONCE(ve->request);
 505                         if (next)
 506                                 preempt = rq_prio(next) > last_prio;
 507                         rcu_read_unlock();
 508                 }
 509
 510                 if (preempt)
 511                         return preempt;
 512         }
 513
 514         /*
 515          * If the inflight context did not trigger the preemption, then maybe
 516          * it was the set of queued requests? Pick the highest priority in
 517          * the queue (the first active priolist) and see if it deserves to be
 518          * running instead of ELSP[0].
 519          *
 520          * The highest priority request in the queue can not be either
 521          * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
 522          * context, it's priority would not exceed ELSP[0] aka last_prio.
 523          */
 524         return queue_prio(&engine->execlists) > last_prio;
 525 }
 526
 527 __maybe_unused static inline bool
 528 assert_priority_queue(const struct i915_request *prev,
 529                       const struct i915_request *next)
 530 {
 531         /*
 532          * Without preemption, the prev may refer to the still active element
 533          * which we refuse to let go.
 534          *
 535          * Even with preemption, there are times when we think it is better not
 536          * to preempt and leave an ostensibly lower priority request in flight.
 537          */
 538         if (i915_request_is_active(prev))
 539                 return true;
 540
 541         return rq_prio(prev) >= rq_prio(next);
 542 }
 543
 544 /*
 545  * The context descriptor encodes various attributes of a context,
 546  * including its GTT address and some flags. Because it's fairly
 547  * expensive to calculate, we'll just do it once and cache the result,
 548  * which remains valid until the context is unpinned.
 549  *
 550  * This is what a descriptor looks like, from LSB to MSB::
 551  *
 552  *      bits  0-11:    flags, GEN8_CTX_* (cached in ctx->desc_template)
 553  *      bits 12-31:    LRCA, GTT address of (the HWSP of) this context
 554  *      bits 32-52:    ctx ID, a globally unique tag (highest bit used by GuC)
 555  *      bits 53-54:    mbz, reserved for use by hardware
 556  *      bits 55-63:    group ID, currently unused and set to 0
 557  *
 558  * Starting from Gen11, the upper dword of the descriptor has a new format:
 559  *
 560  *      bits 32-36:    reserved
 561  *      bits 37-47:    SW context ID
 562  *      bits 48:53:    engine instance
 563  *      bit 54:        mbz, reserved for use by hardware
 564  *      bits 55-60:    SW counter
 565  *      bits 61-63:    engine class
 566  *
 567  * engine info, SW context ID and SW counter need to form a unique number
 568  * (Context ID) per lrc.
 569  */
 570 static u32
 571 lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
 572 {
 573         u32 desc;
 574
 575         desc = INTEL_LEGACY_32B_CONTEXT;
 576         if (i915_vm_is_4lvl(ce->vm))
 577                 desc = INTEL_LEGACY_64B_CONTEXT;
 578         desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
 579
 580         desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
 581         if (IS_GEN(engine->i915, 8))
 582                 desc |= GEN8_CTX_L3LLC_COHERENT;
 583
 584         return i915_ggtt_offset(ce->state) | desc;
 585 }
 586
 587 static inline unsigned int dword_in_page(void *addr)
 588 {
 589         return offset_in_page(addr) / sizeof(u32);
 590 }
 591
 592 static void set_offsets(u32 *regs,
 593                         const u8 *data,
 594                         const struct intel_engine_cs *engine,
 595                         bool clear)
 596 #define NOP(x) (BIT(7) | (x))
 597 #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
 598 #define POSTED BIT(0)
 599 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
 600 #define REG16(x) \
 601         (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
 602         (((x) >> 2) & 0x7f)
 603 #define END(total_state_size) 0, (total_state_size)
 604 {
 605         const u32 base = engine->mmio_base;
 606
 607         while (*data) {
 608                 u8 count, flags;
 609
 610                 if (*data & BIT(7)) { /* skip */
 611                         count = *data++ & ~BIT(7);
 612                         if (clear)
 613                                 memset32(regs, MI_NOOP, count);
 614                         regs += count;
 615                         continue;
 616                 }
 617
 618                 count = *data & 0x3f;
 619                 flags = *data >> 6;
 620                 data++;
 621
 622                 *regs = MI_LOAD_REGISTER_IMM(count);
 623                 if (flags & POSTED)
 624                         *regs |= MI_LRI_FORCE_POSTED;
 625                 if (INTEL_GEN(engine->i915) >= 11)
 626                         *regs |= MI_LRI_LRM_CS_MMIO;
 627                 regs++;
 628
 629                 GEM_BUG_ON(!count);
 630                 do {
 631                         u32 offset = 0;
 632                         u8 v;
 633
 634                         do {
 635                                 v = *data++;
 636                                 offset <<= 7;
 637                                 offset |= v & ~BIT(7);
 638                         } while (v & BIT(7));
 639
 640                         regs[0] = base + (offset << 2);
 641                         if (clear)
 642                                 regs[1] = 0;
 643                         regs += 2;
 644                 } while (--count);
 645         }
 646
 647         if (clear) {
 648                 u8 count = *++data;
 649
 650                 /* Clear past the tail for HW access */
 651                 GEM_BUG_ON(dword_in_page(regs) > count);
 652                 memset32(regs, MI_NOOP, count - dword_in_page(regs));
 653
 654                 /* Close the batch; used mainly by live_lrc_layout() */
 655                 *regs = MI_BATCH_BUFFER_END;
 656                 if (INTEL_GEN(engine->i915) >= 10)
 657                         *regs |= BIT(0);
 658         }
 659 }
 660
 661 static const u8 gen8_xcs_offsets[] = {
 662         NOP(1),
 663         LRI(11, 0),
 664         REG16(0x244),
 665         REG(0x034),
 666         REG(0x030),
 667         REG(0x038),
 668         REG(0x03c),
 669         REG(0x168),
 670         REG(0x140),
 671         REG(0x110),
 672         REG(0x11c),
 673         REG(0x114),
 674         REG(0x118),
 675
 676         NOP(9),
 677         LRI(9, 0),
 678         REG16(0x3a8),
 679         REG16(0x28c),
 680         REG16(0x288),
 681         REG16(0x284),
 682         REG16(0x280),
 683         REG16(0x27c),
 684         REG16(0x278),
 685         REG16(0x274),
 686         REG16(0x270),
 687
 688         NOP(13),
 689         LRI(2, 0),
 690         REG16(0x200),
 691         REG(0x028),
 692
 693         END(80)
 694 };
 695
 696 static const u8 gen9_xcs_offsets[] = {
 697         NOP(1),
 698         LRI(14, POSTED),
 699         REG16(0x244),
 700         REG(0x034),
 701         REG(0x030),
 702         REG(0x038),
 703         REG(0x03c),
 704         REG(0x168),
 705         REG(0x140),
 706         REG(0x110),
 707         REG(0x11c),
 708         REG(0x114),
 709         REG(0x118),
 710         REG(0x1c0),
 711         REG(0x1c4),
 712         REG(0x1c8),
 713
 714         NOP(3),
 715         LRI(9, POSTED),
 716         REG16(0x3a8),
 717         REG16(0x28c),
 718         REG16(0x288),
 719         REG16(0x284),
 720         REG16(0x280),
 721         REG16(0x27c),
 722         REG16(0x278),
 723         REG16(0x274),
 724         REG16(0x270),
 725
 726         NOP(13),
 727         LRI(1, POSTED),
 728         REG16(0x200),
 729
 730         NOP(13),
 731         LRI(44, POSTED),
 732         REG(0x028),
 733         REG(0x09c),
 734         REG(0x0c0),
 735         REG(0x178),
 736         REG(0x17c),
 737         REG16(0x358),
 738         REG(0x170),
 739         REG(0x150),
 740         REG(0x154),
 741         REG(0x158),
 742         REG16(0x41c),
 743         REG16(0x600),
 744         REG16(0x604),
 745         REG16(0x608),
 746         REG16(0x60c),
 747         REG16(0x610),
 748         REG16(0x614),
 749         REG16(0x618),
 750         REG16(0x61c),
 751         REG16(0x620),
 752         REG16(0x624),
 753         REG16(0x628),
 754         REG16(0x62c),
 755         REG16(0x630),
 756         REG16(0x634),
 757         REG16(0x638),
 758         REG16(0x63c),
 759         REG16(0x640),
 760         REG16(0x644),
 761         REG16(0x648),
 762         REG16(0x64c),
 763         REG16(0x650),
 764         REG16(0x654),
 765         REG16(0x658),
 766         REG16(0x65c),
 767         REG16(0x660),
 768         REG16(0x664),
 769         REG16(0x668),
 770         REG16(0x66c),
 771         REG16(0x670),
 772         REG16(0x674),
 773         REG16(0x678),
 774         REG16(0x67c),
 775         REG(0x068),
 776
 777         END(176)
 778 };
 779
 780 static const u8 gen12_xcs_offsets[] = {
 781         NOP(1),
 782         LRI(13, POSTED),
 783         REG16(0x244),
 784         REG(0x034),
 785         REG(0x030),
 786         REG(0x038),
 787         REG(0x03c),
 788         REG(0x168),
 789         REG(0x140),
 790         REG(0x110),
 791         REG(0x1c0),
 792         REG(0x1c4),
 793         REG(0x1c8),
 794         REG(0x180),
 795         REG16(0x2b4),
 796
 797         NOP(5),
 798         LRI(9, POSTED),
 799         REG16(0x3a8),
 800         REG16(0x28c),
 801         REG16(0x288),
 802         REG16(0x284),
 803         REG16(0x280),
 804         REG16(0x27c),
 805         REG16(0x278),
 806         REG16(0x274),
 807         REG16(0x270),
 808
 809         END(80)
 810 };
 811
 812 static const u8 gen8_rcs_offsets[] = {
 813         NOP(1),
 814         LRI(14, POSTED),
 815         REG16(0x244),
 816         REG(0x034),
 817         REG(0x030),
 818         REG(0x038),
 819         REG(0x03c),
 820         REG(0x168),
 821         REG(0x140),
 822         REG(0x110),
 823         REG(0x11c),
 824         REG(0x114),
 825         REG(0x118),
 826         REG(0x1c0),
 827         REG(0x1c4),
 828         REG(0x1c8),
 829
 830         NOP(3),
 831         LRI(9, POSTED),
 832         REG16(0x3a8),
 833         REG16(0x28c),
 834         REG16(0x288),
 835         REG16(0x284),
 836         REG16(0x280),
 837         REG16(0x27c),
 838         REG16(0x278),
 839         REG16(0x274),
 840         REG16(0x270),
 841
 842         NOP(13),
 843         LRI(1, 0),
 844         REG(0x0c8),
 845
 846         END(80)
 847 };
 848
 849 static const u8 gen9_rcs_offsets[] = {
 850         NOP(1),
 851         LRI(14, POSTED),
 852         REG16(0x244),
 853         REG(0x34),
 854         REG(0x30),
 855         REG(0x38),
 856         REG(0x3c),
 857         REG(0x168),
 858         REG(0x140),
 859         REG(0x110),
 860         REG(0x11c),
 861         REG(0x114),
 862         REG(0x118),
 863         REG(0x1c0),
 864         REG(0x1c4),
 865         REG(0x1c8),
 866
 867         NOP(3),
 868         LRI(9, POSTED),
 869         REG16(0x3a8),
 870         REG16(0x28c),
 871         REG16(0x288),
 872         REG16(0x284),
 873         REG16(0x280),
 874         REG16(0x27c),
 875         REG16(0x278),
 876         REG16(0x274),
 877         REG16(0x270),
 878
 879         NOP(13),
 880         LRI(1, 0),
 881         REG(0xc8),
 882
 883         NOP(13),
 884         LRI(44, POSTED),
 885         REG(0x28),
 886         REG(0x9c),
 887         REG(0xc0),
 888         REG(0x178),
 889         REG(0x17c),
 890         REG16(0x358),
 891         REG(0x170),
 892         REG(0x150),
 893         REG(0x154),
 894         REG(0x158),
 895         REG16(0x41c),
 896         REG16(0x600),
 897         REG16(0x604),
 898         REG16(0x608),
 899         REG16(0x60c),
 900         REG16(0x610),
 901         REG16(0x614),
 902         REG16(0x618),
 903         REG16(0x61c),
 904         REG16(0x620),
 905         REG16(0x624),
 906         REG16(0x628),
 907         REG16(0x62c),
 908         REG16(0x630),
 909         REG16(0x634),
 910         REG16(0x638),
 911         REG16(0x63c),
 912         REG16(0x640),
 913         REG16(0x644),
 914         REG16(0x648),
 915         REG16(0x64c),
 916         REG16(0x650),
 917         REG16(0x654),
 918         REG16(0x658),
 919         REG16(0x65c),
 920         REG16(0x660),
 921         REG16(0x664),
 922         REG16(0x668),
 923         REG16(0x66c),
 924         REG16(0x670),
 925         REG16(0x674),
 926         REG16(0x678),
 927         REG16(0x67c),
 928         REG(0x68),
 929
 930         END(176)
 931 };
 932
 933 static const u8 gen11_rcs_offsets[] = {
 934         NOP(1),
 935         LRI(15, POSTED),
 936         REG16(0x244),
 937         REG(0x034),
 938         REG(0x030),
 939         REG(0x038),
 940         REG(0x03c),
 941         REG(0x168),
 942         REG(0x140),
 943         REG(0x110),
 944         REG(0x11c),
 945         REG(0x114),
 946         REG(0x118),
 947         REG(0x1c0),
 948         REG(0x1c4),
 949         REG(0x1c8),
 950         REG(0x180),
 951
 952         NOP(1),
 953         LRI(9, POSTED),
 954         REG16(0x3a8),
 955         REG16(0x28c),
 956         REG16(0x288),
 957         REG16(0x284),
 958         REG16(0x280),
 959         REG16(0x27c),
 960         REG16(0x278),
 961         REG16(0x274),
 962         REG16(0x270),
 963
 964         LRI(1, POSTED),
 965         REG(0x1b0),
 966
 967         NOP(10),
 968         LRI(1, 0),
 969         REG(0x0c8),
 970
 971         END(80)
 972 };
 973
 974 static const u8 gen12_rcs_offsets[] = {
 975         NOP(1),
 976         LRI(13, POSTED),
 977         REG16(0x244),
 978         REG(0x034),
 979         REG(0x030),
 980         REG(0x038),
 981         REG(0x03c),
 982         REG(0x168),
 983         REG(0x140),
 984         REG(0x110),
 985         REG(0x1c0),
 986         REG(0x1c4),
 987         REG(0x1c8),
 988         REG(0x180),
 989         REG16(0x2b4),
 990
 991         NOP(5),
 992         LRI(9, POSTED),
 993         REG16(0x3a8),
 994         REG16(0x28c),
 995         REG16(0x288),
 996         REG16(0x284),
 997         REG16(0x280),
 998         REG16(0x27c),
 999         REG16(0x278),
1000         REG16(0x274),
1001         REG16(0x270),
1002
1003         LRI(3, POSTED),
1004         REG(0x1b0),
1005         REG16(0x5a8),
1006         REG16(0x5ac),
1007
1008         NOP(6),
1009         LRI(1, 0),
1010         REG(0x0c8),
1011         NOP(3 + 9 + 1),
1012
1013         LRI(51, POSTED),
1014         REG16(0x588),
1015         REG16(0x588),
1016         REG16(0x588),
1017         REG16(0x588),
1018         REG16(0x588),
1019         REG16(0x588),
1020         REG(0x028),
1021         REG(0x09c),
1022         REG(0x0c0),
1023         REG(0x178),
1024         REG(0x17c),
1025         REG16(0x358),
1026         REG(0x170),
1027         REG(0x150),
1028         REG(0x154),
1029         REG(0x158),
1030         REG16(0x41c),
1031         REG16(0x600),
1032         REG16(0x604),
1033         REG16(0x608),
1034         REG16(0x60c),
1035         REG16(0x610),
1036         REG16(0x614),
1037         REG16(0x618),
1038         REG16(0x61c),
1039         REG16(0x620),
1040         REG16(0x624),
1041         REG16(0x628),
1042         REG16(0x62c),
1043         REG16(0x630),
1044         REG16(0x634),
1045         REG16(0x638),
1046         REG16(0x63c),
1047         REG16(0x640),
1048         REG16(0x644),
1049         REG16(0x648),
1050         REG16(0x64c),
1051         REG16(0x650),
1052         REG16(0x654),
1053         REG16(0x658),
1054         REG16(0x65c),
1055         REG16(0x660),
1056         REG16(0x664),
1057         REG16(0x668),
1058         REG16(0x66c),
1059         REG16(0x670),
1060         REG16(0x674),
1061         REG16(0x678),
1062         REG16(0x67c),
1063         REG(0x068),
1064         REG(0x084),
1065         NOP(1),
1066
1067         END(192)
1068 };
1069
1070 #undef END
1071 #undef REG16
1072 #undef REG
1073 #undef LRI
1074 #undef NOP
1075
1076 static const u8 *reg_offsets(const struct intel_engine_cs *engine)
1077 {
1078         /*
1079          * The gen12+ lists only have the registers we program in the basic
1080          * default state. We rely on the context image using relative
1081          * addressing to automatic fixup the register state between the
1082          * physical engines for virtual engine.
1083          */
1084         GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 &&
1085                    !intel_engine_has_relative_mmio(engine));
1086
1087         if (engine->class == RENDER_CLASS) {
1088                 if (INTEL_GEN(engine->i915) >= 12)
1089                         return gen12_rcs_offsets;
1090                 else if (INTEL_GEN(engine->i915) >= 11)
1091                         return gen11_rcs_offsets;
1092                 else if (INTEL_GEN(engine->i915) >= 9)
1093                         return gen9_rcs_offsets;
1094                 else
1095                         return gen8_rcs_offsets;
1096         } else {
1097                 if (INTEL_GEN(engine->i915) >= 12)
1098                         return gen12_xcs_offsets;
1099                 else if (INTEL_GEN(engine->i915) >= 9)
1100                         return gen9_xcs_offsets;
1101                 else
1102                         return gen8_xcs_offsets;
1103         }
1104 }
1105
1106 static struct i915_request *
1107 __unwind_incomplete_requests(struct intel_engine_cs *engine)
1108 {
1109         struct i915_request *rq, *rn, *active = NULL;
1110         struct list_head *pl;
1111         int prio = I915_PRIORITY_INVALID;
1112
1113         lockdep_assert_held(&engine->active.lock);
1114
1115         list_for_each_entry_safe_reverse(rq, rn,
1116                                          &engine->active.requests,
1117                                          sched.link) {
1118                 if (i915_request_completed(rq))
1119                         continue; /* XXX */
1120
1121                 __i915_request_unsubmit(rq);
1122
1123                 /*
1124                  * Push the request back into the queue for later resubmission.
1125                  * If this request is not native to this physical engine (i.e.
1126                  * it came from a virtual source), push it back onto the virtual
1127                  * engine so that it can be moved across onto another physical
1128                  * engine as load dictates.
1129                  */
1130                 if (likely(rq->execution_mask == engine->mask)) {
1131                         GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
1132                         if (rq_prio(rq) != prio) {
1133                                 prio = rq_prio(rq);
1134                                 pl = i915_sched_lookup_priolist(engine, prio);
1135                         }
1136                         GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
1137
1138                         list_move(&rq->sched.link, pl);
1139                         set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
1140
1141                         /* Check in case we rollback so far we wrap [size/2] */
1142                         if (intel_ring_direction(rq->ring,
1143                                                  rq->tail,
1144                                                  rq->ring->tail + 8) > 0)
1145                                 rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE;
1146
1147                         active = rq;
1148                 } else {
1149                         struct intel_engine_cs *owner = rq->context->engine;
1150
1151                         WRITE_ONCE(rq->engine, owner);
1152                         owner->submit_request(rq);
1153                         active = NULL;
1154                 }
1155         }
1156
1157         return active;
1158 }
1159
1160 struct i915_request *
1161 execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
1162 {
1163         struct intel_engine_cs *engine =
1164                 container_of(execlists, typeof(*engine), execlists);
1165
1166         return __unwind_incomplete_requests(engine);
1167 }
1168
1169 static inline void
1170 execlists_context_status_change(struct i915_request *rq, unsigned long status)
1171 {
1172         /*
1173          * Only used when GVT-g is enabled now. When GVT-g is disabled,
1174          * The compiler should eliminate this function as dead-code.
1175          */
1176         if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
1177                 return;
1178
1179         atomic_notifier_call_chain(&rq->engine->context_status_notifier,
1180                                    status, rq);
1181 }
1182
1183 static void intel_engine_context_in(struct intel_engine_cs *engine)
1184 {
1185         unsigned long flags;
1186
1187         if (atomic_add_unless(&engine->stats.active, 1, 0))
1188                 return;
1189
1190         write_seqlock_irqsave(&engine->stats.lock, flags);
1191         if (!atomic_add_unless(&engine->stats.active, 1, 0)) {
1192                 engine->stats.start = ktime_get();
1193                 atomic_inc(&engine->stats.active);
1194         }
1195         write_sequnlock_irqrestore(&engine->stats.lock, flags);
1196 }
1197
1198 static void intel_engine_context_out(struct intel_engine_cs *engine)
1199 {
1200         unsigned long flags;
1201
1202         GEM_BUG_ON(!atomic_read(&engine->stats.active));
1203
1204         if (atomic_add_unless(&engine->stats.active, -1, 1))
1205                 return;
1206
1207         write_seqlock_irqsave(&engine->stats.lock, flags);
1208         if (atomic_dec_and_test(&engine->stats.active)) {
1209                 engine->stats.total =
1210                         ktime_add(engine->stats.total,
1211                                   ktime_sub(ktime_get(), engine->stats.start));
1212         }
1213         write_sequnlock_irqrestore(&engine->stats.lock, flags);
1214 }
1215
1216 static void
1217 execlists_check_context(const struct intel_context *ce,
1218                         const struct intel_engine_cs *engine)
1219 {
1220         const struct intel_ring *ring = ce->ring;
1221         u32 *regs = ce->lrc_reg_state;
1222         bool valid = true;
1223         int x;
1224
1225         if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
1226                 pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
1227                        engine->name,
1228                        regs[CTX_RING_START],
1229                        i915_ggtt_offset(ring->vma));
1230                 regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1231                 valid = false;
1232         }
1233
1234         if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
1235             (RING_CTL_SIZE(ring->size) | RING_VALID)) {
1236                 pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
1237                        engine->name,
1238                        regs[CTX_RING_CTL],
1239                        (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
1240                 regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1241                 valid = false;
1242         }
1243
1244         x = lrc_ring_mi_mode(engine);
1245         if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
1246                 pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
1247                        engine->name, regs[x + 1]);
1248                 regs[x + 1] &= ~STOP_RING;
1249                 regs[x + 1] |= STOP_RING << 16;
1250                 valid = false;
1251         }
1252
1253         WARN_ONCE(!valid, "Invalid lrc state found before submission\n");
1254 }
1255
1256 static void restore_default_state(struct intel_context *ce,
1257                                   struct intel_engine_cs *engine)
1258 {
1259         u32 *regs;
1260
1261         regs = memset(ce->lrc_reg_state, 0, engine->context_size - PAGE_SIZE);
1262         execlists_init_reg_state(regs, ce, engine, ce->ring, true);
1263
1264         ce->runtime.last = intel_context_get_runtime(ce);
1265 }
1266
1267 static void reset_active(struct i915_request *rq,
1268                          struct intel_engine_cs *engine)
1269 {
1270         struct intel_context * const ce = rq->context;
1271         u32 head;
1272
1273         /*
1274          * The executing context has been cancelled. We want to prevent
1275          * further execution along this context and propagate the error on
1276          * to anything depending on its results.
1277          *
1278          * In __i915_request_submit(), we apply the -EIO and remove the
1279          * requests' payloads for any banned requests. But first, we must
1280          * rewind the context back to the start of the incomplete request so
1281          * that we do not jump back into the middle of the batch.
1282          *
1283          * We preserve the breadcrumbs and semaphores of the incomplete
1284          * requests so that inter-timeline dependencies (i.e other timelines)
1285          * remain correctly ordered. And we defer to __i915_request_submit()
1286          * so that all asynchronous waits are correctly handled.
1287          */
1288         ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n",
1289                      rq->fence.context, rq->fence.seqno);
1290
1291         /* On resubmission of the active request, payload will be scrubbed */
1292         if (i915_request_completed(rq))
1293                 head = rq->tail;
1294         else
1295                 head = active_request(ce->timeline, rq)->head;
1296         head = intel_ring_wrap(ce->ring, head);
1297
1298         /* Scrub the context image to prevent replaying the previous batch */
1299         restore_default_state(ce, engine);
1300         __execlists_update_reg_state(ce, engine, head);
1301
1302         /* We've switched away, so this should be a no-op, but intent matters */
1303         ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
1304 }
1305
1306 static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
1307 {
1308 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1309         ce->runtime.num_underflow += dt < 0;
1310         ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt);
1311 #endif
1312 }
1313
1314 static void intel_context_update_runtime(struct intel_context *ce)
1315 {
1316         u32 old;
1317         s32 dt;
1318
1319         if (intel_context_is_barrier(ce))
1320                 return;
1321
1322         old = ce->runtime.last;
1323         ce->runtime.last = intel_context_get_runtime(ce);
1324         dt = ce->runtime.last - old;
1325
1326         if (unlikely(dt <= 0)) {
1327                 CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
1328                          old, ce->runtime.last, dt);
1329                 st_update_runtime_underflow(ce, dt);
1330                 return;
1331         }
1332
1333         ewma_runtime_add(&ce->runtime.avg, dt);
1334         ce->runtime.total += dt;
1335 }
1336
1337 static inline struct intel_engine_cs *
1338 __execlists_schedule_in(struct i915_request *rq)
1339 {
1340         struct intel_engine_cs * const engine = rq->engine;
1341         struct intel_context * const ce = rq->context;
1342
1343         intel_context_get(ce);
1344
1345         if (unlikely(intel_context_is_banned(ce)))
1346                 reset_active(rq, engine);
1347
1348         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1349                 execlists_check_context(ce, engine);
1350
1351         if (ce->tag) {
1352                 /* Use a fixed tag for OA and friends */
1353                 GEM_BUG_ON(ce->tag <= BITS_PER_LONG);
1354                 ce->lrc.ccid = ce->tag;
1355         } else {
1356                 /* We don't need a strict matching tag, just different values */
1357                 unsigned int tag = ffs(READ_ONCE(engine->context_tag));
1358
1359                 GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG);
1360                 clear_bit(tag - 1, &engine->context_tag);
1361                 ce->lrc.ccid = tag << (GEN11_SW_CTX_ID_SHIFT - 32);
1362
1363                 BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID);
1364         }
1365
1366         ce->lrc.ccid |= engine->execlists.ccid;
1367
1368         __intel_gt_pm_get(engine->gt);
1369         if (engine->fw_domain && !atomic_fetch_inc(&engine->fw_active))
1370                 intel_uncore_forcewake_get(engine->uncore, engine->fw_domain);
1371         execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
1372         intel_engine_context_in(engine);
1373
1374         return engine;
1375 }
1376
1377 static inline struct i915_request *
1378 execlists_schedule_in(struct i915_request *rq, int idx)
1379 {
1380         struct intel_context * const ce = rq->context;
1381         struct intel_engine_cs *old;
1382
1383         GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
1384         trace_i915_request_in(rq, idx);
1385
1386         old = READ_ONCE(ce->inflight);
1387         do {
1388                 if (!old) {
1389                         WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
1390                         break;
1391                 }
1392         } while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
1393
1394         GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
1395         return i915_request_get(rq);
1396 }
1397
1398 static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
1399 {
1400         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
1401         struct i915_request *next = READ_ONCE(ve->request);
1402
1403         if (next == rq || (next && next->execution_mask & ~rq->execution_mask))
1404                 tasklet_hi_schedule(&ve->base.execlists.tasklet);
1405 }
1406
1407 static inline void
1408 __execlists_schedule_out(struct i915_request *rq,
1409                          struct intel_engine_cs * const engine,
1410                          unsigned int ccid)
1411 {
1412         struct intel_context * const ce = rq->context;
1413
1414         /*
1415          * NB process_csb() is not under the engine->active.lock and hence
1416          * schedule_out can race with schedule_in meaning that we should
1417          * refrain from doing non-trivial work here.
1418          */
1419
1420         /*
1421          * If we have just completed this context, the engine may now be
1422          * idle and we want to re-enter powersaving.
1423          */
1424         if (list_is_last_rcu(&rq->link, &ce->timeline->requests) &&
1425             i915_request_completed(rq))
1426                 intel_engine_add_retire(engine, ce->timeline);
1427
1428         ccid >>= GEN11_SW_CTX_ID_SHIFT - 32;
1429         ccid &= GEN12_MAX_CONTEXT_HW_ID;
1430         if (ccid < BITS_PER_LONG) {
1431                 GEM_BUG_ON(ccid == 0);
1432                 GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag));
1433                 set_bit(ccid - 1, &engine->context_tag);
1434         }
1435
1436         intel_context_update_runtime(ce);
1437         intel_engine_context_out(engine);
1438         execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
1439         if (engine->fw_domain && !atomic_dec_return(&engine->fw_active))
1440                 intel_uncore_forcewake_put(engine->uncore, engine->fw_domain);
1441         intel_gt_pm_put_async(engine->gt);
1442
1443         /*
1444          * If this is part of a virtual engine, its next request may
1445          * have been blocked waiting for access to the active context.
1446          * We have to kick all the siblings again in case we need to
1447          * switch (e.g. the next request is not runnable on this
1448          * engine). Hopefully, we will already have submitted the next
1449          * request before the tasklet runs and do not need to rebuild
1450          * each virtual tree and kick everyone again.
1451          */
1452         if (ce->engine != engine)
1453                 kick_siblings(rq, ce);
1454
1455         intel_context_put(ce);
1456 }
1457
1458 static inline void
1459 execlists_schedule_out(struct i915_request *rq)
1460 {
1461         struct intel_context * const ce = rq->context;
1462         struct intel_engine_cs *cur, *old;
1463         u32 ccid;
1464
1465         trace_i915_request_out(rq);
1466
1467         ccid = rq->context->lrc.ccid;
1468         old = READ_ONCE(ce->inflight);
1469         do
1470                 cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
1471         while (!try_cmpxchg(&ce->inflight, &old, cur));
1472         if (!cur)
1473                 __execlists_schedule_out(rq, old, ccid);
1474
1475         i915_request_put(rq);
1476 }
1477
1478 static u64 execlists_update_context(struct i915_request *rq)
1479 {
1480         struct intel_context *ce = rq->context;
1481         u64 desc = ce->lrc.desc;
1482         u32 tail, prev;
1483
1484         /*
1485          * WaIdleLiteRestore:bdw,skl
1486          *
1487          * We should never submit the context with the same RING_TAIL twice
1488          * just in case we submit an empty ring, which confuses the HW.
1489          *
1490          * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of
1491          * the normal request to be able to always advance the RING_TAIL on
1492          * subsequent resubmissions (for lite restore). Should that fail us,
1493          * and we try and submit the same tail again, force the context
1494          * reload.
1495          *
1496          * If we need to return to a preempted context, we need to skip the
1497          * lite-restore and force it to reload the RING_TAIL. Otherwise, the
1498          * HW has a tendency to ignore us rewinding the TAIL to the end of
1499          * an earlier request.
1500          */
1501         GEM_BUG_ON(ce->lrc_reg_state[CTX_RING_TAIL] != rq->ring->tail);
1502         prev = rq->ring->tail;
1503         tail = intel_ring_set_tail(rq->ring, rq->tail);
1504         if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0))
1505                 desc |= CTX_DESC_FORCE_RESTORE;
1506         ce->lrc_reg_state[CTX_RING_TAIL] = tail;
1507         rq->tail = rq->wa_tail;
1508
1509         /*
1510          * Make sure the context image is complete before we submit it to HW.
1511          *
1512          * Ostensibly, writes (including the WCB) should be flushed prior to
1513          * an uncached write such as our mmio register access, the empirical
1514          * evidence (esp. on Braswell) suggests that the WC write into memory
1515          * may not be visible to the HW prior to the completion of the UC
1516          * register write and that we may begin execution from the context
1517          * before its image is complete leading to invalid PD chasing.
1518          */
1519         wmb();
1520
1521         ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE;
1522         return desc;
1523 }
1524
1525 static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
1526 {
1527         if (execlists->ctrl_reg) {
1528                 writel(lower_32_bits(desc), execlists->submit_reg + port * 2);
1529                 writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1);
1530         } else {
1531                 writel(upper_32_bits(desc), execlists->submit_reg);
1532                 writel(lower_32_bits(desc), execlists->submit_reg);
1533         }
1534 }
1535
1536 static __maybe_unused char *
1537 dump_port(char *buf, int buflen, const char *prefix, struct i915_request *rq)
1538 {
1539         if (!rq)
1540                 return "";
1541
1542         snprintf(buf, buflen, "%sccid:%x %llx:%lld%s prio %d",
1543                  prefix,
1544                  rq->context->lrc.ccid,
1545                  rq->fence.context, rq->fence.seqno,
1546                  i915_request_completed(rq) ? "!" :
1547                  i915_request_started(rq) ? "*" :
1548                  "",
1549                  rq_prio(rq));
1550
1551         return buf;
1552 }
1553
1554 static __maybe_unused void
1555 trace_ports(const struct intel_engine_execlists *execlists,
1556             const char *msg,
1557             struct i915_request * const *ports)
1558 {
1559         const struct intel_engine_cs *engine =
1560                 container_of(execlists, typeof(*engine), execlists);
1561         char __maybe_unused p0[40], p1[40];
1562
1563         if (!ports[0])
1564                 return;
1565
1566         ENGINE_TRACE(engine, "%s { %s%s }\n", msg,
1567                      dump_port(p0, sizeof(p0), "", ports[0]),
1568                      dump_port(p1, sizeof(p1), ", ", ports[1]));
1569 }
1570
1571 static inline bool
1572 reset_in_progress(const struct intel_engine_execlists *execlists)
1573 {
1574         return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
1575 }
1576
1577 static __maybe_unused bool
1578 assert_pending_valid(const struct intel_engine_execlists *execlists,
1579                      const char *msg)
1580 {
1581         struct intel_engine_cs *engine =
1582                 container_of(execlists, typeof(*engine), execlists);
1583         struct i915_request * const *port, *rq;
1584         struct intel_context *ce = NULL;
1585         bool sentinel = false;
1586         u32 ccid = -1;
1587
1588         trace_ports(execlists, msg, execlists->pending);
1589
1590         /* We may be messing around with the lists during reset, lalala */
1591         if (reset_in_progress(execlists))
1592                 return true;
1593
1594         if (!execlists->pending[0]) {
1595                 GEM_TRACE_ERR("%s: Nothing pending for promotion!\n",
1596                               engine->name);
1597                 return false;
1598         }
1599
1600         if (execlists->pending[execlists_num_ports(execlists)]) {
1601                 GEM_TRACE_ERR("%s: Excess pending[%d] for promotion!\n",
1602                               engine->name, execlists_num_ports(execlists));
1603                 return false;
1604         }
1605
1606         for (port = execlists->pending; (rq = *port); port++) {
1607                 unsigned long flags;
1608                 bool ok = true;
1609
1610                 GEM_BUG_ON(!kref_read(&rq->fence.refcount));
1611                 GEM_BUG_ON(!i915_request_is_active(rq));
1612
1613                 if (ce == rq->context) {
1614                         GEM_TRACE_ERR("%s: Dup context:%llx in pending[%zd]\n",
1615                                       engine->name,
1616                                       ce->timeline->fence_context,
1617                                       port - execlists->pending);
1618                         return false;
1619                 }
1620                 ce = rq->context;
1621
1622                 if (ccid == ce->lrc.ccid) {
1623                         GEM_TRACE_ERR("%s: Dup ccid:%x context:%llx in pending[%zd]\n",
1624                                       engine->name,
1625                                       ccid, ce->timeline->fence_context,
1626                                       port - execlists->pending);
1627                         return false;
1628                 }
1629                 ccid = ce->lrc.ccid;
1630
1631                 /*
1632                  * Sentinels are supposed to be the last request so they flush
1633                  * the current execution off the HW. Check that they are the only
1634                  * request in the pending submission.
1635                  */
1636                 if (sentinel) {
1637                         GEM_TRACE_ERR("%s: context:%llx after sentinel in pending[%zd]\n",
1638                                       engine->name,
1639                                       ce->timeline->fence_context,
1640                                       port - execlists->pending);
1641                         return false;
1642                 }
1643                 sentinel = i915_request_has_sentinel(rq);
1644
1645                 /* Hold tightly onto the lock to prevent concurrent retires! */
1646                 if (!spin_trylock_irqsave(&rq->lock, flags))
1647                         continue;
1648
1649                 if (i915_request_completed(rq))
1650                         goto unlock;
1651
1652                 if (i915_active_is_idle(&ce->active) &&
1653                     !intel_context_is_barrier(ce)) {
1654                         GEM_TRACE_ERR("%s: Inactive context:%llx in pending[%zd]\n",
1655                                       engine->name,
1656                                       ce->timeline->fence_context,
1657                                       port - execlists->pending);
1658                         ok = false;
1659                         goto unlock;
1660                 }
1661
1662                 if (!i915_vma_is_pinned(ce->state)) {
1663                         GEM_TRACE_ERR("%s: Unpinned context:%llx in pending[%zd]\n",
1664                                       engine->name,
1665                                       ce->timeline->fence_context,
1666                                       port - execlists->pending);
1667                         ok = false;
1668                         goto unlock;
1669                 }
1670
1671                 if (!i915_vma_is_pinned(ce->ring->vma)) {
1672                         GEM_TRACE_ERR("%s: Unpinned ring:%llx in pending[%zd]\n",
1673                                       engine->name,
1674                                       ce->timeline->fence_context,
1675                                       port - execlists->pending);
1676                         ok = false;
1677                         goto unlock;
1678                 }
1679
1680 unlock:
1681                 spin_unlock_irqrestore(&rq->lock, flags);
1682                 if (!ok)
1683                         return false;
1684         }
1685
1686         return ce;
1687 }
1688
1689 static void execlists_submit_ports(struct intel_engine_cs *engine)
1690 {
1691         struct intel_engine_execlists *execlists = &engine->execlists;
1692         unsigned int n;
1693
1694         GEM_BUG_ON(!assert_pending_valid(execlists, "submit"));
1695
1696         /*
1697          * We can skip acquiring intel_runtime_pm_get() here as it was taken
1698          * on our behalf by the request (see i915_gem_mark_busy()) and it will
1699          * not be relinquished until the device is idle (see
1700          * i915_gem_idle_work_handler()). As a precaution, we make sure
1701          * that all ELSP are drained i.e. we have processed the CSB,
1702          * before allowing ourselves to idle and calling intel_runtime_pm_put().
1703          */
1704         GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
1705
1706         /*
1707          * ELSQ note: the submit queue is not cleared after being submitted
1708          * to the HW so we need to make sure we always clean it up. This is
1709          * currently ensured by the fact that we always write the same number
1710          * of elsq entries, keep this in mind before changing the loop below.
1711          */
1712         for (n = execlists_num_ports(execlists); n--; ) {
1713                 struct i915_request *rq = execlists->pending[n];
1714
1715                 write_desc(execlists,
1716                            rq ? execlists_update_context(rq) : 0,
1717                            n);
1718         }
1719
1720         /* we need to manually load the submit queue */
1721         if (execlists->ctrl_reg)
1722                 writel(EL_CTRL_LOAD, execlists->ctrl_reg);
1723 }
1724
1725 static bool ctx_single_port_submission(const struct intel_context *ce)
1726 {
1727         return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
1728                 intel_context_force_single_submission(ce));
1729 }
1730
1731 static bool can_merge_ctx(const struct intel_context *prev,
1732                           const struct intel_context *next)
1733 {
1734         if (prev != next)
1735                 return false;
1736
1737         if (ctx_single_port_submission(prev))
1738                 return false;
1739
1740         return true;
1741 }
1742
1743 static unsigned long i915_request_flags(const struct i915_request *rq)
1744 {
1745         return READ_ONCE(rq->fence.flags);
1746 }
1747
1748 static bool can_merge_rq(const struct i915_request *prev,
1749                          const struct i915_request *next)
1750 {
1751         GEM_BUG_ON(prev == next);
1752         GEM_BUG_ON(!assert_priority_queue(prev, next));
1753
1754         /*
1755          * We do not submit known completed requests. Therefore if the next
1756          * request is already completed, we can pretend to merge it in
1757          * with the previous context (and we will skip updating the ELSP
1758          * and tracking). Thus hopefully keeping the ELSP full with active
1759          * contexts, despite the best efforts of preempt-to-busy to confuse
1760          * us.
1761          */
1762         if (i915_request_completed(next))
1763                 return true;
1764
1765         if (unlikely((i915_request_flags(prev) ^ i915_request_flags(next)) &
1766                      (BIT(I915_FENCE_FLAG_NOPREEMPT) |
1767                       BIT(I915_FENCE_FLAG_SENTINEL))))
1768                 return false;
1769
1770         if (!can_merge_ctx(prev->context, next->context))
1771                 return false;
1772
1773         GEM_BUG_ON(i915_seqno_passed(prev->fence.seqno, next->fence.seqno));
1774         return true;
1775 }
1776
1777 static void virtual_update_register_offsets(u32 *regs,
1778                                             struct intel_engine_cs *engine)
1779 {
1780         set_offsets(regs, reg_offsets(engine), engine, false);
1781 }
1782
1783 static bool virtual_matches(const struct virtual_engine *ve,
1784                             const struct i915_request *rq,
1785                             const struct intel_engine_cs *engine)
1786 {
1787         const struct intel_engine_cs *inflight;
1788
1789         if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
1790                 return false;
1791
1792         /*
1793          * We track when the HW has completed saving the context image
1794          * (i.e. when we have seen the final CS event switching out of
1795          * the context) and must not overwrite the context image before
1796          * then. This restricts us to only using the active engine
1797          * while the previous virtualized request is inflight (so
1798          * we reuse the register offsets). This is a very small
1799          * hystersis on the greedy seelction algorithm.
1800          */
1801         inflight = intel_context_inflight(&ve->context);
1802         if (inflight && inflight != engine)
1803                 return false;
1804
1805         return true;
1806 }
1807
1808 static void virtual_xfer_context(struct virtual_engine *ve,
1809                                  struct intel_engine_cs *engine)
1810 {
1811         unsigned int n;
1812
1813         if (likely(engine == ve->siblings[0]))
1814                 return;
1815
1816         GEM_BUG_ON(READ_ONCE(ve->context.inflight));
1817         if (!intel_engine_has_relative_mmio(engine))
1818                 virtual_update_register_offsets(ve->context.lrc_reg_state,
1819                                                 engine);
1820
1821         /*
1822          * Move the bound engine to the top of the list for
1823          * future execution. We then kick this tasklet first
1824          * before checking others, so that we preferentially
1825          * reuse this set of bound registers.
1826          */
1827         for (n = 1; n < ve->num_siblings; n++) {
1828                 if (ve->siblings[n] == engine) {
1829                         swap(ve->siblings[n], ve->siblings[0]);
1830                         break;
1831                 }
1832         }
1833 }
1834
1835 #define for_each_waiter(p__, rq__) \
1836         list_for_each_entry_lockless(p__, \
1837                                      &(rq__)->sched.waiters_list, \
1838                                      wait_link)
1839
1840 #define for_each_signaler(p__, rq__) \
1841         list_for_each_entry_rcu(p__, \
1842                                 &(rq__)->sched.signalers_list, \
1843                                 signal_link)
1844
1845 static void defer_request(struct i915_request *rq, struct list_head * const pl)
1846 {
1847         LIST_HEAD(list);
1848
1849         /*
1850          * We want to move the interrupted request to the back of
1851          * the round-robin list (i.e. its priority level), but
1852          * in doing so, we must then move all requests that were in
1853          * flight and were waiting for the interrupted request to
1854          * be run after it again.
1855          */
1856         do {
1857                 struct i915_dependency *p;
1858
1859                 GEM_BUG_ON(i915_request_is_active(rq));
1860                 list_move_tail(&rq->sched.link, pl);
1861
1862                 for_each_waiter(p, rq) {
1863                         struct i915_request *w =
1864                                 container_of(p->waiter, typeof(*w), sched);
1865
1866                         if (p->flags & I915_DEPENDENCY_WEAK)
1867                                 continue;
1868
1869                         /* Leave semaphores spinning on the other engines */
1870                         if (w->engine != rq->engine)
1871                                 continue;
1872
1873                         /* No waiter should start before its signaler */
1874                         GEM_BUG_ON(i915_request_has_initial_breadcrumb(w) &&
1875                                    i915_request_started(w) &&
1876                                    !i915_request_completed(rq));
1877
1878                         GEM_BUG_ON(i915_request_is_active(w));
1879                         if (!i915_request_is_ready(w))
1880                                 continue;
1881
1882                         if (rq_prio(w) < rq_prio(rq))
1883                                 continue;
1884
1885                         GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
1886                         list_move_tail(&w->sched.link, &list);
1887                 }
1888
1889                 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
1890         } while (rq);
1891 }
1892
1893 static void defer_active(struct intel_engine_cs *engine)
1894 {
1895         struct i915_request *rq;
1896
1897         rq = __unwind_incomplete_requests(engine);
1898         if (!rq)
1899                 return;
1900
1901         defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq)));
1902 }
1903
1904 static bool
1905 need_timeslice(const struct intel_engine_cs *engine,
1906                const struct i915_request *rq,
1907                const struct rb_node *rb)
1908 {
1909         int hint;
1910
1911         if (!intel_engine_has_timeslices(engine))
1912                 return false;
1913
1914         hint = engine->execlists.queue_priority_hint;
1915
1916         if (rb) {
1917                 const struct virtual_engine *ve =
1918                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
1919                 const struct intel_engine_cs *inflight =
1920                         intel_context_inflight(&ve->context);
1921
1922                 if (!inflight || inflight == engine) {
1923                         struct i915_request *next;
1924
1925                         rcu_read_lock();
1926                         next = READ_ONCE(ve->request);
1927                         if (next)
1928                                 hint = max(hint, rq_prio(next));
1929                         rcu_read_unlock();
1930                 }
1931         }
1932
1933         if (!list_is_last(&rq->sched.link, &engine->active.requests))
1934                 hint = max(hint, rq_prio(list_next_entry(rq, sched.link)));
1935
1936         GEM_BUG_ON(hint >= I915_PRIORITY_UNPREEMPTABLE);
1937         return hint >= effective_prio(rq);
1938 }
1939
1940 static bool
1941 timeslice_yield(const struct intel_engine_execlists *el,
1942                 const struct i915_request *rq)
1943 {
1944         /*
1945          * Once bitten, forever smitten!
1946          *
1947          * If the active context ever busy-waited on a semaphore,
1948          * it will be treated as a hog until the end of its timeslice (i.e.
1949          * until it is scheduled out and replaced by a new submission,
1950          * possibly even its own lite-restore). The HW only sends an interrupt
1951          * on the first miss, and we do know if that semaphore has been
1952          * signaled, or even if it is now stuck on another semaphore. Play
1953          * safe, yield if it might be stuck -- it will be given a fresh
1954          * timeslice in the near future.
1955          */
1956         return rq->context->lrc.ccid == READ_ONCE(el->yield);
1957 }
1958
1959 static bool
1960 timeslice_expired(const struct intel_engine_execlists *el,
1961                   const struct i915_request *rq)
1962 {
1963         return timer_expired(&el->timer) || timeslice_yield(el, rq);
1964 }
1965
1966 static int
1967 switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
1968 {
1969         if (list_is_last(&rq->sched.link, &engine->active.requests))
1970                 return engine->execlists.queue_priority_hint;
1971
1972         return rq_prio(list_next_entry(rq, sched.link));
1973 }
1974
1975 static inline unsigned long
1976 timeslice(const struct intel_engine_cs *engine)
1977 {
1978         return READ_ONCE(engine->props.timeslice_duration_ms);
1979 }
1980
1981 static unsigned long active_timeslice(const struct intel_engine_cs *engine)
1982 {
1983         const struct intel_engine_execlists *execlists = &engine->execlists;
1984         const struct i915_request *rq = *execlists->active;
1985
1986         if (!rq || i915_request_completed(rq))
1987                 return 0;
1988
1989         if (READ_ONCE(execlists->switch_priority_hint) < effective_prio(rq))
1990                 return 0;
1991
1992         return timeslice(engine);
1993 }
1994
1995 static void set_timeslice(struct intel_engine_cs *engine)
1996 {
1997         unsigned long duration;
1998
1999         if (!intel_engine_has_timeslices(engine))
2000                 return;
2001
2002         duration = active_timeslice(engine);
2003         ENGINE_TRACE(engine, "bump timeslicing, interval:%lu", duration);
2004
2005         set_timer_ms(&engine->execlists.timer, duration);
2006 }
2007
2008 static void start_timeslice(struct intel_engine_cs *engine, int prio)
2009 {
2010         struct intel_engine_execlists *execlists = &engine->execlists;
2011         unsigned long duration;
2012
2013         if (!intel_engine_has_timeslices(engine))
2014                 return;
2015
2016         WRITE_ONCE(execlists->switch_priority_hint, prio);
2017         if (prio == INT_MIN)
2018                 return;
2019
2020         if (timer_pending(&execlists->timer))
2021                 return;
2022
2023         duration = timeslice(engine);
2024         ENGINE_TRACE(engine,
2025                      "start timeslicing, prio:%d, interval:%lu",
2026                      prio, duration);
2027
2028         set_timer_ms(&execlists->timer, duration);
2029 }
2030
2031 static void record_preemption(struct intel_engine_execlists *execlists)
2032 {
2033         (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
2034 }
2035
2036 static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
2037                                             const struct i915_request *rq)
2038 {
2039         if (!rq)
2040                 return 0;
2041
2042         /* Force a fast reset for terminated contexts (ignoring sysfs!) */
2043         if (unlikely(intel_context_is_banned(rq->context)))
2044                 return 1;
2045
2046         return READ_ONCE(engine->props.preempt_timeout_ms);
2047 }
2048
2049 static void set_preempt_timeout(struct intel_engine_cs *engine,
2050                                 const struct i915_request *rq)
2051 {
2052         if (!intel_engine_has_preempt_reset(engine))
2053                 return;
2054
2055         set_timer_ms(&engine->execlists.preempt,
2056                      active_preempt_timeout(engine, rq));
2057 }
2058
2059 static inline void clear_ports(struct i915_request **ports, int count)
2060 {
2061         memset_p((void **)ports, NULL, count);
2062 }
2063
2064 static inline void
2065 copy_ports(struct i915_request **dst, struct i915_request **src, int count)
2066 {
2067         /* A memcpy_p() would be very useful here! */
2068         while (count--)
2069                 WRITE_ONCE(*dst++, *src++); /* avoid write tearing */
2070 }
2071
2072 static void execlists_dequeue(struct intel_engine_cs *engine)
2073 {
2074         struct intel_engine_execlists * const execlists = &engine->execlists;
2075         struct i915_request **port = execlists->pending;
2076         struct i915_request ** const last_port = port + execlists->port_mask;
2077         struct i915_request * const *active;
2078         struct i915_request *last;
2079         struct rb_node *rb;
2080         bool submit = false;
2081
2082         /*
2083          * Hardware submission is through 2 ports. Conceptually each port
2084          * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
2085          * static for a context, and unique to each, so we only execute
2086          * requests belonging to a single context from each ring. RING_HEAD
2087          * is maintained by the CS in the context image, it marks the place
2088          * where it got up to last time, and through RING_TAIL we tell the CS
2089          * where we want to execute up to this time.
2090          *
2091          * In this list the requests are in order of execution. Consecutive
2092          * requests from the same context are adjacent in the ringbuffer. We
2093          * can combine these requests into a single RING_TAIL update:
2094          *
2095          *              RING_HEAD...req1...req2
2096          *                                    ^- RING_TAIL
2097          * since to execute req2 the CS must first execute req1.
2098          *
2099          * Our goal then is to point each port to the end of a consecutive
2100          * sequence of requests as being the most optimal (fewest wake ups
2101          * and context switches) submission.
2102          */
2103
2104         for (rb = rb_first_cached(&execlists->virtual); rb; ) {
2105                 struct virtual_engine *ve =
2106                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
2107                 struct i915_request *rq = READ_ONCE(ve->request);
2108
2109                 if (!rq) { /* lazily cleanup after another engine handled rq */
2110                         rb_erase_cached(rb, &execlists->virtual);
2111                         RB_CLEAR_NODE(rb);
2112                         rb = rb_first_cached(&execlists->virtual);
2113                         continue;
2114                 }
2115
2116                 if (!virtual_matches(ve, rq, engine)) {
2117                         rb = rb_next(rb);
2118                         continue;
2119                 }
2120
2121                 break;
2122         }
2123
2124         /*
2125          * If the queue is higher priority than the last
2126          * request in the currently active context, submit afresh.
2127          * We will resubmit again afterwards in case we need to split
2128          * the active context to interject the preemption request,
2129          * i.e. we will retrigger preemption following the ack in case
2130          * of trouble.
2131          */
2132         active = READ_ONCE(execlists->active);
2133
2134         /*
2135          * In theory we can skip over completed contexts that have not
2136          * yet been processed by events (as those events are in flight):
2137          *
2138          * while ((last = *active) && i915_request_completed(last))
2139          *      active++;
2140          *
2141          * However, the GPU cannot handle this as it will ultimately
2142          * find itself trying to jump back into a context it has just
2143          * completed and barf.
2144          */
2145
2146         if ((last = *active)) {
2147                 if (need_preempt(engine, last, rb)) {
2148                         if (i915_request_completed(last)) {
2149                                 tasklet_hi_schedule(&execlists->tasklet);
2150                                 return;
2151                         }
2152
2153                         ENGINE_TRACE(engine,
2154                                      "preempting last=%llx:%lld, prio=%d, hint=%d\n",
2155                                      last->fence.context,
2156                                      last->fence.seqno,
2157                                      last->sched.attr.priority,
2158                                      execlists->queue_priority_hint);
2159                         record_preemption(execlists);
2160
2161                         /*
2162                          * Don't let the RING_HEAD advance past the breadcrumb
2163                          * as we unwind (and until we resubmit) so that we do
2164                          * not accidentally tell it to go backwards.
2165                          */
2166                         ring_set_paused(engine, 1);
2167
2168                         /*
2169                          * Note that we have not stopped the GPU at this point,
2170                          * so we are unwinding the incomplete requests as they
2171                          * remain inflight and so by the time we do complete
2172                          * the preemption, some of the unwound requests may
2173                          * complete!
2174                          */
2175                         __unwind_incomplete_requests(engine);
2176
2177                         last = NULL;
2178                 } else if (need_timeslice(engine, last, rb) &&
2179                            timeslice_expired(execlists, last)) {
2180                         if (i915_request_completed(last)) {
2181                                 tasklet_hi_schedule(&execlists->tasklet);
2182                                 return;
2183                         }
2184
2185                         ENGINE_TRACE(engine,
2186                                      "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n",
2187                                      last->fence.context,
2188                                      last->fence.seqno,
2189                                      last->sched.attr.priority,
2190                                      execlists->queue_priority_hint,
2191                                      yesno(timeslice_yield(execlists, last)));
2192
2193                         ring_set_paused(engine, 1);
2194                         defer_active(engine);
2195
2196                         /*
2197                          * Unlike for preemption, if we rewind and continue
2198                          * executing the same context as previously active,
2199                          * the order of execution will remain the same and
2200                          * the tail will only advance. We do not need to
2201                          * force a full context restore, as a lite-restore
2202                          * is sufficient to resample the monotonic TAIL.
2203                          *
2204                          * If we switch to any other context, similarly we
2205                          * will not rewind TAIL of current context, and
2206                          * normal save/restore will preserve state and allow
2207                          * us to later continue executing the same request.
2208                          */
2209                         last = NULL;
2210                 } else {
2211                         /*
2212                          * Otherwise if we already have a request pending
2213                          * for execution after the current one, we can
2214                          * just wait until the next CS event before
2215                          * queuing more. In either case we will force a
2216                          * lite-restore preemption event, but if we wait
2217                          * we hopefully coalesce several updates into a single
2218                          * submission.
2219                          */
2220                         if (!list_is_last(&last->sched.link,
2221                                           &engine->active.requests)) {
2222                                 /*
2223                                  * Even if ELSP[1] is occupied and not worthy
2224                                  * of timeslices, our queue might be.
2225                                  */
2226                                 start_timeslice(engine, queue_prio(execlists));
2227                                 return;
2228                         }
2229                 }
2230         }
2231
2232         while (rb) { /* XXX virtual is always taking precedence */
2233                 struct virtual_engine *ve =
2234                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
2235                 struct i915_request *rq;
2236
2237                 spin_lock(&ve->base.active.lock);
2238
2239                 rq = ve->request;
2240                 if (unlikely(!rq)) { /* lost the race to a sibling */
2241                         spin_unlock(&ve->base.active.lock);
2242                         rb_erase_cached(rb, &execlists->virtual);
2243                         RB_CLEAR_NODE(rb);
2244                         rb = rb_first_cached(&execlists->virtual);
2245                         continue;
2246                 }
2247
2248                 GEM_BUG_ON(rq != ve->request);
2249                 GEM_BUG_ON(rq->engine != &ve->base);
2250                 GEM_BUG_ON(rq->context != &ve->context);
2251
2252                 if (rq_prio(rq) >= queue_prio(execlists)) {
2253                         if (!virtual_matches(ve, rq, engine)) {
2254                                 spin_unlock(&ve->base.active.lock);
2255                                 rb = rb_next(rb);
2256                                 continue;
2257                         }
2258
2259                         if (last && !can_merge_rq(last, rq)) {
2260                                 spin_unlock(&ve->base.active.lock);
2261                                 start_timeslice(engine, rq_prio(rq));
2262                                 return; /* leave this for another sibling */
2263                         }
2264
2265                         ENGINE_TRACE(engine,
2266                                      "virtual rq=%llx:%lld%s, new engine? %s\n",
2267                                      rq->fence.context,
2268                                      rq->fence.seqno,
2269                                      i915_request_completed(rq) ? "!" :
2270                                      i915_request_started(rq) ? "*" :
2271                                      "",
2272                                      yesno(engine != ve->siblings[0]));
2273
2274                         WRITE_ONCE(ve->request, NULL);
2275                         WRITE_ONCE(ve->base.execlists.queue_priority_hint,
2276                                    INT_MIN);
2277                         rb_erase_cached(rb, &execlists->virtual);
2278                         RB_CLEAR_NODE(rb);
2279
2280                         GEM_BUG_ON(!(rq->execution_mask & engine->mask));
2281                         WRITE_ONCE(rq->engine, engine);
2282
2283                         if (__i915_request_submit(rq)) {
2284                                 /*
2285                                  * Only after we confirm that we will submit
2286                                  * this request (i.e. it has not already
2287                                  * completed), do we want to update the context.
2288                                  *
2289                                  * This serves two purposes. It avoids
2290                                  * unnecessary work if we are resubmitting an
2291                                  * already completed request after timeslicing.
2292                                  * But more importantly, it prevents us altering
2293                                  * ve->siblings[] on an idle context, where
2294                                  * we may be using ve->siblings[] in
2295                                  * virtual_context_enter / virtual_context_exit.
2296                                  */
2297                                 virtual_xfer_context(ve, engine);
2298                                 GEM_BUG_ON(ve->siblings[0] != engine);
2299
2300                                 submit = true;
2301                                 last = rq;
2302                         }
2303                         i915_request_put(rq);
2304
2305                         /*
2306                          * Hmm, we have a bunch of virtual engine requests,
2307                          * but the first one was already completed (thanks
2308                          * preempt-to-busy!). Keep looking at the veng queue
2309                          * until we have no more relevant requests (i.e.
2310                          * the normal submit queue has higher priority).
2311                          */
2312                         if (!submit) {
2313                                 spin_unlock(&ve->base.active.lock);
2314                                 rb = rb_first_cached(&execlists->virtual);
2315                                 continue;
2316                         }
2317                 }
2318
2319                 spin_unlock(&ve->base.active.lock);
2320                 break;
2321         }
2322
2323         while ((rb = rb_first_cached(&execlists->queue))) {
2324                 struct i915_priolist *p = to_priolist(rb);
2325                 struct i915_request *rq, *rn;
2326                 int i;
2327
2328                 priolist_for_each_request_consume(rq, rn, p, i) {
2329                         bool merge = true;
2330
2331                         /*
2332                          * Can we combine this request with the current port?
2333                          * It has to be the same context/ringbuffer and not
2334                          * have any exceptions (e.g. GVT saying never to
2335                          * combine contexts).
2336                          *
2337                          * If we can combine the requests, we can execute both
2338                          * by updating the RING_TAIL to point to the end of the
2339                          * second request, and so we never need to tell the
2340                          * hardware about the first.
2341                          */
2342                         if (last && !can_merge_rq(last, rq)) {
2343                                 /*
2344                                  * If we are on the second port and cannot
2345                                  * combine this request with the last, then we
2346                                  * are done.
2347                                  */
2348                                 if (port == last_port)
2349                                         goto done;
2350
2351                                 /*
2352                                  * We must not populate both ELSP[] with the
2353                                  * same LRCA, i.e. we must submit 2 different
2354                                  * contexts if we submit 2 ELSP.
2355                                  */
2356                                 if (last->context == rq->context)
2357                                         goto done;
2358
2359                                 if (i915_request_has_sentinel(last))
2360                                         goto done;
2361
2362                                 /*
2363                                  * If GVT overrides us we only ever submit
2364                                  * port[0], leaving port[1] empty. Note that we
2365                                  * also have to be careful that we don't queue
2366                                  * the same context (even though a different
2367                                  * request) to the second port.
2368                                  */
2369                                 if (ctx_single_port_submission(last->context) ||
2370                                     ctx_single_port_submission(rq->context))
2371                                         goto done;
2372
2373                                 merge = false;
2374                         }
2375
2376                         if (__i915_request_submit(rq)) {
2377                                 if (!merge) {
2378                                         *port = execlists_schedule_in(last, port - execlists->pending);
2379                                         port++;
2380                                         last = NULL;
2381                                 }
2382
2383                                 GEM_BUG_ON(last &&
2384                                            !can_merge_ctx(last->context,
2385                                                           rq->context));
2386                                 GEM_BUG_ON(last &&
2387                                            i915_seqno_passed(last->fence.seqno,
2388                                                              rq->fence.seqno));
2389
2390                                 submit = true;
2391                                 last = rq;
2392                         }
2393                 }
2394
2395                 rb_erase_cached(&p->node, &execlists->queue);
2396                 i915_priolist_free(p);
2397         }
2398
2399 done:
2400         /*
2401          * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
2402          *
2403          * We choose the priority hint such that if we add a request of greater
2404          * priority than this, we kick the submission tasklet to decide on
2405          * the right order of submitting the requests to hardware. We must
2406          * also be prepared to reorder requests as they are in-flight on the
2407          * HW. We derive the priority hint then as the first "hole" in
2408          * the HW submission ports and if there are no available slots,
2409          * the priority of the lowest executing request, i.e. last.
2410          *
2411          * When we do receive a higher priority request ready to run from the
2412          * user, see queue_request(), the priority hint is bumped to that
2413          * request triggering preemption on the next dequeue (or subsequent
2414          * interrupt for secondary ports).
2415          */
2416         execlists->queue_priority_hint = queue_prio(execlists);
2417
2418         if (submit) {
2419                 *port = execlists_schedule_in(last, port - execlists->pending);
2420                 execlists->switch_priority_hint =
2421                         switch_prio(engine, *execlists->pending);
2422
2423                 /*
2424                  * Skip if we ended up with exactly the same set of requests,
2425                  * e.g. trying to timeslice a pair of ordered contexts
2426                  */
2427                 if (!memcmp(active, execlists->pending,
2428                             (port - execlists->pending + 1) * sizeof(*port))) {
2429                         do
2430                                 execlists_schedule_out(fetch_and_zero(port));
2431                         while (port-- != execlists->pending);
2432
2433                         goto skip_submit;
2434                 }
2435                 clear_ports(port + 1, last_port - port);
2436
2437                 WRITE_ONCE(execlists->yield, -1);
2438                 set_preempt_timeout(engine, *active);
2439                 execlists_submit_ports(engine);
2440         } else {
2441                 start_timeslice(engine, execlists->queue_priority_hint);
2442 skip_submit:
2443                 ring_set_paused(engine, 0);
2444         }
2445 }
2446
2447 static void
2448 cancel_port_requests(struct intel_engine_execlists * const execlists)
2449 {
2450         struct i915_request * const *port;
2451
2452         for (port = execlists->pending; *port; port++)
2453                 execlists_schedule_out(*port);
2454         clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending));
2455
2456         /* Mark the end of active before we overwrite *active */
2457         for (port = xchg(&execlists->active, execlists->pending); *port; port++)
2458                 execlists_schedule_out(*port);
2459         clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight));
2460
2461         smp_wmb(); /* complete the seqlock for execlists_active() */
2462         WRITE_ONCE(execlists->active, execlists->inflight);
2463 }
2464
2465 static inline void
2466 invalidate_csb_entries(const u64 *first, const u64 *last)
2467 {
2468         clflush((void *)first);
2469         clflush((void *)last);
2470 }
2471
2472 /*
2473  * Starting with Gen12, the status has a new format:
2474  *
2475  *     bit  0:     switched to new queue
2476  *     bit  1:     reserved
2477  *     bit  2:     semaphore wait mode (poll or signal), only valid when
2478  *                 switch detail is set to "wait on semaphore"
2479  *     bits 3-5:   engine class
2480  *     bits 6-11:  engine instance
2481  *     bits 12-14: reserved
2482  *     bits 15-25: sw context id of the lrc the GT switched to
2483  *     bits 26-31: sw counter of the lrc the GT switched to
2484  *     bits 32-35: context switch detail
2485  *                  - 0: ctx complete
2486  *                  - 1: wait on sync flip
2487  *                  - 2: wait on vblank
2488  *                  - 3: wait on scanline
2489  *                  - 4: wait on semaphore
2490  *                  - 5: context preempted (not on SEMAPHORE_WAIT or
2491  *                       WAIT_FOR_EVENT)
2492  *     bit  36:    reserved
2493  *     bits 37-43: wait detail (for switch detail 1 to 4)
2494  *     bits 44-46: reserved
2495  *     bits 47-57: sw context id of the lrc the GT switched away from
2496  *     bits 58-63: sw counter of the lrc the GT switched away from
2497  */
2498 static inline bool gen12_csb_parse(const u64 *csb)
2499 {
2500         bool ctx_away_valid;
2501         bool new_queue;
2502         u64 entry;
2503
2504         /* HSD#22011248461 */
2505         entry = READ_ONCE(*csb);
2506         if (unlikely(entry == -1)) {
2507                 preempt_disable();
2508                 if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
2509                         GEM_WARN_ON("50us CSB timeout");
2510                 preempt_enable();
2511         }
2512         WRITE_ONCE(*(u64 *)csb, -1);
2513
2514         ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
2515         new_queue =
2516                 lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
2517
2518         /*
2519          * The context switch detail is not guaranteed to be 5 when a preemption
2520          * occurs, so we can't just check for that. The check below works for
2521          * all the cases we care about, including preemptions of WAIT
2522          * instructions and lite-restore. Preempt-to-idle via the CTRL register
2523          * would require some extra handling, but we don't support that.
2524          */
2525         if (!ctx_away_valid || new_queue) {
2526                 GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(entry)));
2527                 return true;
2528         }
2529
2530         /*
2531          * switch detail = 5 is covered by the case above and we do not expect a
2532          * context switch on an unsuccessful wait instruction since we always
2533          * use polling mode.
2534          */
2535         GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(entry)));
2536         return false;
2537 }
2538
2539 static inline bool gen8_csb_parse(const u64 *csb)
2540 {
2541         return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
2542 }
2543
2544 static void process_csb(struct intel_engine_cs *engine)
2545 {
2546         struct intel_engine_execlists * const execlists = &engine->execlists;
2547         const u64 * const buf = execlists->csb_status;
2548         const u8 num_entries = execlists->csb_size;
2549         u8 head, tail;
2550
2551         /*
2552          * As we modify our execlists state tracking we require exclusive
2553          * access. Either we are inside the tasklet, or the tasklet is disabled
2554          * and we assume that is only inside the reset paths and so serialised.
2555          */
2556         GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) &&
2557                    !reset_in_progress(execlists));
2558         GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine));
2559
2560         /*
2561          * Note that csb_write, csb_status may be either in HWSP or mmio.
2562          * When reading from the csb_write mmio register, we have to be
2563          * careful to only use the GEN8_CSB_WRITE_PTR portion, which is
2564          * the low 4bits. As it happens we know the next 4bits are always
2565          * zero and so we can simply masked off the low u8 of the register
2566          * and treat it identically to reading from the HWSP (without having
2567          * to use explicit shifting and masking, and probably bifurcating
2568          * the code to handle the legacy mmio read).
2569          */
2570         head = execlists->csb_head;
2571         tail = READ_ONCE(*execlists->csb_write);
2572         if (unlikely(head == tail))
2573                 return;
2574
2575         /*
2576          * We will consume all events from HW, or at least pretend to.
2577          *
2578          * The sequence of events from the HW is deterministic, and derived
2579          * from our writes to the ELSP, with a smidgen of variability for
2580          * the arrival of the asynchronous requests wrt to the inflight
2581          * execution. If the HW sends an event that does not correspond with
2582          * the one we are expecting, we have to abandon all hope as we lose
2583          * all tracking of what the engine is actually executing. We will
2584          * only detect we are out of sequence with the HW when we get an
2585          * 'impossible' event because we have already drained our own
2586          * preemption/promotion queue. If this occurs, we know that we likely
2587          * lost track of execution earlier and must unwind and restart, the
2588          * simplest way is by stop processing the event queue and force the
2589          * engine to reset.
2590          */
2591         execlists->csb_head = tail;
2592         ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
2593
2594         /*
2595          * Hopefully paired with a wmb() in HW!
2596          *
2597          * We must complete the read of the write pointer before any reads
2598          * from the CSB, so that we do not see stale values. Without an rmb
2599          * (lfence) the HW may speculatively perform the CSB[] reads *before*
2600          * we perform the READ_ONCE(*csb_write).
2601          */
2602         rmb();
2603         do {
2604                 bool promote;
2605
2606                 if (++head == num_entries)
2607                         head = 0;
2608
2609                 /*
2610                  * We are flying near dragons again.
2611                  *
2612                  * We hold a reference to the request in execlist_port[]
2613                  * but no more than that. We are operating in softirq
2614                  * context and so cannot hold any mutex or sleep. That
2615                  * prevents us stopping the requests we are processing
2616                  * in port[] from being retired simultaneously (the
2617                  * breadcrumb will be complete before we see the
2618                  * context-switch). As we only hold the reference to the
2619                  * request, any pointer chasing underneath the request
2620                  * is subject to a potential use-after-free. Thus we
2621                  * store all of the bookkeeping within port[] as
2622                  * required, and avoid using unguarded pointers beneath
2623                  * request itself. The same applies to the atomic
2624                  * status notifier.
2625                  */
2626
2627                 ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
2628                              head,
2629                              upper_32_bits(buf[head]),
2630                              lower_32_bits(buf[head]));
2631
2632                 if (INTEL_GEN(engine->i915) >= 12)
2633                         promote = gen12_csb_parse(buf + head);
2634                 else
2635                         promote = gen8_csb_parse(buf + head);
2636                 if (promote) {
2637                         struct i915_request * const *old = execlists->active;
2638
2639                         if (GEM_WARN_ON(!*execlists->pending)) {
2640                                 execlists->error_interrupt |= ERROR_CSB;
2641                                 break;
2642                         }
2643
2644                         ring_set_paused(engine, 0);
2645
2646                         /* Point active to the new ELSP; prevent overwriting */
2647                         WRITE_ONCE(execlists->active, execlists->pending);
2648                         smp_wmb(); /* notify execlists_active() */
2649
2650                         /* cancel old inflight, prepare for switch */
2651                         trace_ports(execlists, "preempted", old);
2652                         while (*old)
2653                                 execlists_schedule_out(*old++);
2654
2655                         /* switch pending to inflight */
2656                         GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
2657                         copy_ports(execlists->inflight,
2658                                    execlists->pending,
2659                                    execlists_num_ports(execlists));
2660                         smp_wmb(); /* complete the seqlock */
2661                         WRITE_ONCE(execlists->active, execlists->inflight);
2662
2663                         /* XXX Magic delay for tgl */
2664                         ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
2665
2666                         WRITE_ONCE(execlists->pending[0], NULL);
2667                 } else {
2668                         if (GEM_WARN_ON(!*execlists->active)) {
2669                                 execlists->error_interrupt |= ERROR_CSB;
2670                                 break;
2671                         }
2672
2673                         /* port0 completed, advanced to port1 */
2674                         trace_ports(execlists, "completed", execlists->active);
2675
2676                         /*
2677                          * We rely on the hardware being strongly
2678                          * ordered, that the breadcrumb write is
2679                          * coherent (visible from the CPU) before the
2680                          * user interrupt is processed. One might assume
2681                          * that the breadcrumb write being before the
2682                          * user interrupt and the CS event for the context
2683                          * switch would therefore be before the CS event
2684                          * itself...
2685                          */
2686                         if (GEM_SHOW_DEBUG() &&
2687                             !i915_request_completed(*execlists->active)) {
2688                                 struct i915_request *rq = *execlists->active;
2689                                 const u32 *regs __maybe_unused =
2690                                         rq->context->lrc_reg_state;
2691
2692                                 ENGINE_TRACE(engine,
2693                                              "context completed before request!\n");
2694                                 ENGINE_TRACE(engine,
2695                                              "ring:{start:0x%08x, head:%04x, tail:%04x, ctl:%08x, mode:%08x}\n",
2696                                              ENGINE_READ(engine, RING_START),
2697                                              ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR,
2698                                              ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR,
2699                                              ENGINE_READ(engine, RING_CTL),
2700                                              ENGINE_READ(engine, RING_MI_MODE));
2701                                 ENGINE_TRACE(engine,
2702                                              "rq:{start:%08x, head:%04x, tail:%04x, seqno:%llx:%d, hwsp:%d}, ",
2703                                              i915_ggtt_offset(rq->ring->vma),
2704                                              rq->head, rq->tail,
2705                                              rq->fence.context,
2706                                              lower_32_bits(rq->fence.seqno),
2707                                              hwsp_seqno(rq));
2708                                 ENGINE_TRACE(engine,
2709                                              "ctx:{start:%08x, head:%04x, tail:%04x}, ",
2710                                              regs[CTX_RING_START],
2711                                              regs[CTX_RING_HEAD],
2712                                              regs[CTX_RING_TAIL]);
2713                         }
2714
2715                         execlists_schedule_out(*execlists->active++);
2716
2717                         GEM_BUG_ON(execlists->active - execlists->inflight >
2718                                    execlists_num_ports(execlists));
2719                 }
2720         } while (head != tail);
2721
2722         set_timeslice(engine);
2723
2724         /*
2725          * Gen11 has proven to fail wrt global observation point between
2726          * entry and tail update, failing on the ordering and thus
2727          * we see an old entry in the context status buffer.
2728          *
2729          * Forcibly evict out entries for the next gpu csb update,
2730          * to increase the odds that we get a fresh entries with non
2731          * working hardware. The cost for doing so comes out mostly with
2732          * the wash as hardware, working or not, will need to do the
2733          * invalidation before.
2734          */
2735         invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
2736 }
2737
2738 static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
2739 {
2740         lockdep_assert_held(&engine->active.lock);
2741         if (!READ_ONCE(engine->execlists.pending[0])) {
2742                 rcu_read_lock(); /* protect peeking at execlists->active */
2743                 execlists_dequeue(engine);
2744                 rcu_read_unlock();
2745         }
2746 }
2747
2748 static void __execlists_hold(struct i915_request *rq)
2749 {
2750         LIST_HEAD(list);
2751
2752         do {
2753                 struct i915_dependency *p;
2754
2755                 if (i915_request_is_active(rq))
2756                         __i915_request_unsubmit(rq);
2757
2758                 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2759                 list_move_tail(&rq->sched.link, &rq->engine->active.hold);
2760                 i915_request_set_hold(rq);
2761                 RQ_TRACE(rq, "on hold\n");
2762
2763                 for_each_waiter(p, rq) {
2764                         struct i915_request *w =
2765                                 container_of(p->waiter, typeof(*w), sched);
2766
2767                         /* Leave semaphores spinning on the other engines */
2768                         if (w->engine != rq->engine)
2769                                 continue;
2770
2771                         if (!i915_request_is_ready(w))
2772                                 continue;
2773
2774                         if (i915_request_completed(w))
2775                                 continue;
2776
2777                         if (i915_request_on_hold(w))
2778                                 continue;
2779
2780                         list_move_tail(&w->sched.link, &list);
2781                 }
2782
2783                 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
2784         } while (rq);
2785 }
2786
2787 static bool execlists_hold(struct intel_engine_cs *engine,
2788                            struct i915_request *rq)
2789 {
2790         spin_lock_irq(&engine->active.lock);
2791
2792         if (i915_request_completed(rq)) { /* too late! */
2793                 rq = NULL;
2794                 goto unlock;
2795         }
2796
2797         if (rq->engine != engine) { /* preempted virtual engine */
2798                 struct virtual_engine *ve = to_virtual_engine(rq->engine);
2799
2800                 /*
2801                  * intel_context_inflight() is only protected by virtue
2802                  * of process_csb() being called only by the tasklet (or
2803                  * directly from inside reset while the tasklet is suspended).
2804                  * Assert that neither of those are allowed to run while we
2805                  * poke at the request queues.
2806                  */
2807                 GEM_BUG_ON(!reset_in_progress(&engine->execlists));
2808
2809                 /*
2810                  * An unsubmitted request along a virtual engine will
2811                  * remain on the active (this) engine until we are able
2812                  * to process the context switch away (and so mark the
2813                  * context as no longer in flight). That cannot have happened
2814                  * yet, otherwise we would not be hanging!
2815                  */
2816                 spin_lock(&ve->base.active.lock);
2817                 GEM_BUG_ON(intel_context_inflight(rq->context) != engine);
2818                 GEM_BUG_ON(ve->request != rq);
2819                 ve->request = NULL;
2820                 spin_unlock(&ve->base.active.lock);
2821                 i915_request_put(rq);
2822
2823                 rq->engine = engine;
2824         }
2825
2826         /*
2827          * Transfer this request onto the hold queue to prevent it
2828          * being resumbitted to HW (and potentially completed) before we have
2829          * released it. Since we may have already submitted following
2830          * requests, we need to remove those as well.
2831          */
2832         GEM_BUG_ON(i915_request_on_hold(rq));
2833         GEM_BUG_ON(rq->engine != engine);
2834         __execlists_hold(rq);
2835         GEM_BUG_ON(list_empty(&engine->active.hold));
2836
2837 unlock:
2838         spin_unlock_irq(&engine->active.lock);
2839         return rq;
2840 }
2841
2842 static bool hold_request(const struct i915_request *rq)
2843 {
2844         struct i915_dependency *p;
2845         bool result = false;
2846
2847         /*
2848          * If one of our ancestors is on hold, we must also be on hold,
2849          * otherwise we will bypass it and execute before it.
2850          */
2851         rcu_read_lock();
2852         for_each_signaler(p, rq) {
2853                 const struct i915_request *s =
2854                         container_of(p->signaler, typeof(*s), sched);
2855
2856                 if (s->engine != rq->engine)
2857                         continue;
2858
2859                 result = i915_request_on_hold(s);
2860                 if (result)
2861                         break;
2862         }
2863         rcu_read_unlock();
2864
2865         return result;
2866 }
2867
2868 static void __execlists_unhold(struct i915_request *rq)
2869 {
2870         LIST_HEAD(list);
2871
2872         do {
2873                 struct i915_dependency *p;
2874
2875                 RQ_TRACE(rq, "hold release\n");
2876
2877                 GEM_BUG_ON(!i915_request_on_hold(rq));
2878                 GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
2879
2880                 i915_request_clear_hold(rq);
2881                 list_move_tail(&rq->sched.link,
2882                                i915_sched_lookup_priolist(rq->engine,
2883                                                           rq_prio(rq)));
2884                 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2885
2886                 /* Also release any children on this engine that are ready */
2887                 for_each_waiter(p, rq) {
2888                         struct i915_request *w =
2889                                 container_of(p->waiter, typeof(*w), sched);
2890
2891                         /* Propagate any change in error status */
2892                         if (rq->fence.error)
2893                                 i915_request_set_error_once(w, rq->fence.error);
2894
2895                         if (w->engine != rq->engine)
2896                                 continue;
2897
2898                         if (!i915_request_on_hold(w))
2899                                 continue;
2900
2901                         /* Check that no other parents are also on hold */
2902                         if (hold_request(w))
2903                                 continue;
2904
2905                         list_move_tail(&w->sched.link, &list);
2906                 }
2907
2908                 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
2909         } while (rq);
2910 }
2911
2912 static void execlists_unhold(struct intel_engine_cs *engine,
2913                              struct i915_request *rq)
2914 {
2915         spin_lock_irq(&engine->active.lock);
2916
2917         /*
2918          * Move this request back to the priority queue, and all of its
2919          * children and grandchildren that were suspended along with it.
2920          */
2921         __execlists_unhold(rq);
2922
2923         if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
2924                 engine->execlists.queue_priority_hint = rq_prio(rq);
2925                 tasklet_hi_schedule(&engine->execlists.tasklet);
2926         }
2927
2928         spin_unlock_irq(&engine->active.lock);
2929 }
2930
2931 struct execlists_capture {
2932         struct work_struct work;
2933         struct i915_request *rq;
2934         struct i915_gpu_coredump *error;
2935 };
2936
2937 static void execlists_capture_work(struct work_struct *work)
2938 {
2939         struct execlists_capture *cap = container_of(work, typeof(*cap), work);
2940         const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
2941         struct intel_engine_cs *engine = cap->rq->engine;
2942         struct intel_gt_coredump *gt = cap->error->gt;
2943         struct intel_engine_capture_vma *vma;
2944
2945         /* Compress all the objects attached to the request, slow! */
2946         vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
2947         if (vma) {
2948                 struct i915_vma_compress *compress =
2949                         i915_vma_capture_prepare(gt);
2950
2951                 intel_engine_coredump_add_vma(gt->engine, vma, compress);
2952                 i915_vma_capture_finish(gt, compress);
2953         }
2954
2955         gt->simulated = gt->engine->simulated;
2956         cap->error->simulated = gt->simulated;
2957
2958         /* Publish the error state, and announce it to the world */
2959         i915_error_state_store(cap->error);
2960         i915_gpu_coredump_put(cap->error);
2961
2962         /* Return this request and all that depend upon it for signaling */
2963         execlists_unhold(engine, cap->rq);
2964         i915_request_put(cap->rq);
2965
2966         kfree(cap);
2967 }
2968
2969 static struct execlists_capture *capture_regs(struct intel_engine_cs *engine)
2970 {
2971         const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
2972         struct execlists_capture *cap;
2973
2974         cap = kmalloc(sizeof(*cap), gfp);
2975         if (!cap)
2976                 return NULL;
2977
2978         cap->error = i915_gpu_coredump_alloc(engine->i915, gfp);
2979         if (!cap->error)
2980                 goto err_cap;
2981
2982         cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp);
2983         if (!cap->error->gt)
2984                 goto err_gpu;
2985
2986         cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp);
2987         if (!cap->error->gt->engine)
2988                 goto err_gt;
2989
2990         return cap;
2991
2992 err_gt:
2993         kfree(cap->error->gt);
2994 err_gpu:
2995         kfree(cap->error);
2996 err_cap:
2997         kfree(cap);
2998         return NULL;
2999 }
3000
3001 static struct i915_request *
3002 active_context(struct intel_engine_cs *engine, u32 ccid)
3003 {
3004         const struct intel_engine_execlists * const el = &engine->execlists;
3005         struct i915_request * const *port, *rq;
3006
3007         /*
3008          * Use the most recent result from process_csb(), but just in case
3009          * we trigger an error (via interrupt) before the first CS event has
3010          * been written, peek at the next submission.
3011          */
3012
3013         for (port = el->active; (rq = *port); port++) {
3014                 if (rq->context->lrc.ccid == ccid) {
3015                         ENGINE_TRACE(engine,
3016                                      "ccid found at active:%zd\n",
3017                                      port - el->active);
3018                         return rq;
3019                 }
3020         }
3021
3022         for (port = el->pending; (rq = *port); port++) {
3023                 if (rq->context->lrc.ccid == ccid) {
3024                         ENGINE_TRACE(engine,
3025                                      "ccid found at pending:%zd\n",
3026                                      port - el->pending);
3027                         return rq;
3028                 }
3029         }
3030
3031         ENGINE_TRACE(engine, "ccid:%x not found\n", ccid);
3032         return NULL;
3033 }
3034
3035 static u32 active_ccid(struct intel_engine_cs *engine)
3036 {
3037         return ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI);
3038 }
3039
3040 static void execlists_capture(struct intel_engine_cs *engine)
3041 {
3042         struct execlists_capture *cap;
3043
3044         if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR))
3045                 return;
3046
3047         /*
3048          * We need to _quickly_ capture the engine state before we reset.
3049          * We are inside an atomic section (softirq) here and we are delaying
3050          * the forced preemption event.
3051          */
3052         cap = capture_regs(engine);
3053         if (!cap)
3054                 return;
3055
3056         spin_lock_irq(&engine->active.lock);
3057         cap->rq = active_context(engine, active_ccid(engine));
3058         if (cap->rq) {
3059                 cap->rq = active_request(cap->rq->context->timeline, cap->rq);
3060                 cap->rq = i915_request_get_rcu(cap->rq);
3061         }
3062         spin_unlock_irq(&engine->active.lock);
3063         if (!cap->rq)
3064                 goto err_free;
3065
3066         /*
3067          * Remove the request from the execlists queue, and take ownership
3068          * of the request. We pass it to our worker who will _slowly_ compress
3069          * all the pages the _user_ requested for debugging their batch, after
3070          * which we return it to the queue for signaling.
3071          *
3072          * By removing them from the execlists queue, we also remove the
3073          * requests from being processed by __unwind_incomplete_requests()
3074          * during the intel_engine_reset(), and so they will *not* be replayed
3075          * afterwards.
3076          *
3077          * Note that because we have not yet reset the engine at this point,
3078          * it is possible for the request that we have identified as being
3079          * guilty, did in fact complete and we will then hit an arbitration
3080          * point allowing the outstanding preemption to succeed. The likelihood
3081          * of that is very low (as capturing of the engine registers should be
3082          * fast enough to run inside an irq-off atomic section!), so we will
3083          * simply hold that request accountable for being non-preemptible
3084          * long enough to force the reset.
3085          */
3086         if (!execlists_hold(engine, cap->rq))
3087                 goto err_rq;
3088
3089         INIT_WORK(&cap->work, execlists_capture_work);
3090         schedule_work(&cap->work);
3091         return;
3092
3093 err_rq:
3094         i915_request_put(cap->rq);
3095 err_free:
3096         i915_gpu_coredump_put(cap->error);
3097         kfree(cap);
3098 }
3099
3100 static void execlists_reset(struct intel_engine_cs *engine, const char *msg)
3101 {
3102         const unsigned int bit = I915_RESET_ENGINE + engine->id;
3103         unsigned long *lock = &engine->gt->reset.flags;
3104
3105         if (!intel_has_reset_engine(engine->gt))
3106                 return;
3107
3108         if (test_and_set_bit(bit, lock))
3109                 return;
3110
3111         ENGINE_TRACE(engine, "reset for %s\n", msg);
3112
3113         /* Mark this tasklet as disabled to avoid waiting for it to complete */
3114         tasklet_disable_nosync(&engine->execlists.tasklet);
3115
3116         ring_set_paused(engine, 1); /* Freeze the current request in place */
3117         execlists_capture(engine);
3118         intel_engine_reset(engine, msg);
3119
3120         tasklet_enable(&engine->execlists.tasklet);
3121         clear_and_wake_up_bit(bit, lock);
3122 }
3123
3124 static bool preempt_timeout(const struct intel_engine_cs *const engine)
3125 {
3126         const struct timer_list *t = &engine->execlists.preempt;
3127
3128         if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
3129                 return false;
3130
3131         if (!timer_expired(t))
3132                 return false;
3133
3134         return READ_ONCE(engine->execlists.pending[0]);
3135 }
3136
3137 /*
3138  * Check the unread Context Status Buffers and manage the submission of new
3139  * contexts to the ELSP accordingly.
3140  */
3141 static void execlists_submission_tasklet(unsigned long data)
3142 {
3143         struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
3144         bool timeout = preempt_timeout(engine);
3145
3146         process_csb(engine);
3147
3148         if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) {
3149                 const char *msg;
3150
3151                 /* Generate the error message in priority wrt to the user! */
3152                 if (engine->execlists.error_interrupt & GENMASK(15, 0))
3153                         msg = "CS error"; /* thrown by a user payload */
3154                 else if (engine->execlists.error_interrupt & ERROR_CSB)
3155                         msg = "invalid CSB event";
3156                 else
3157                         msg = "internal error";
3158
3159                 engine->execlists.error_interrupt = 0;
3160                 execlists_reset(engine, msg);
3161         }
3162
3163         if (!READ_ONCE(engine->execlists.pending[0]) || timeout) {
3164                 unsigned long flags;
3165
3166                 spin_lock_irqsave(&engine->active.lock, flags);
3167                 __execlists_submission_tasklet(engine);
3168                 spin_unlock_irqrestore(&engine->active.lock, flags);
3169
3170                 /* Recheck after serialising with direct-submission */
3171                 if (unlikely(timeout && preempt_timeout(engine)))
3172                         execlists_reset(engine, "preemption time out");
3173         }
3174 }
3175
3176 static void __execlists_kick(struct intel_engine_execlists *execlists)
3177 {
3178         /* Kick the tasklet for some interrupt coalescing and reset handling */
3179         tasklet_hi_schedule(&execlists->tasklet);
3180 }
3181
3182 #define execlists_kick(t, member) \
3183         __execlists_kick(container_of(t, struct intel_engine_execlists, member))
3184
3185 static void execlists_timeslice(struct timer_list *timer)
3186 {
3187         execlists_kick(timer, timer);
3188 }
3189
3190 static void execlists_preempt(struct timer_list *timer)
3191 {
3192         execlists_kick(timer, preempt);
3193 }
3194
3195 static void queue_request(struct intel_engine_cs *engine,
3196                           struct i915_request *rq)
3197 {
3198         GEM_BUG_ON(!list_empty(&rq->sched.link));
3199         list_add_tail(&rq->sched.link,
3200                       i915_sched_lookup_priolist(engine, rq_prio(rq)));
3201         set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
3202 }
3203
3204 static void __submit_queue_imm(struct intel_engine_cs *engine)
3205 {
3206         struct intel_engine_execlists * const execlists = &engine->execlists;
3207
3208         if (reset_in_progress(execlists))
3209                 return; /* defer until we restart the engine following reset */
3210
3211         __execlists_submission_tasklet(engine);
3212 }
3213
3214 static void submit_queue(struct intel_engine_cs *engine,
3215                          const struct i915_request *rq)
3216 {
3217         struct intel_engine_execlists *execlists = &engine->execlists;
3218
3219         if (rq_prio(rq) <= execlists->queue_priority_hint)
3220                 return;
3221
3222         execlists->queue_priority_hint = rq_prio(rq);
3223         __submit_queue_imm(engine);
3224 }
3225
3226 static bool ancestor_on_hold(const struct intel_engine_cs *engine,
3227                              const struct i915_request *rq)
3228 {
3229         GEM_BUG_ON(i915_request_on_hold(rq));
3230         return !list_empty(&engine->active.hold) && hold_request(rq);
3231 }
3232
3233 static void flush_csb(struct intel_engine_cs *engine)
3234 {
3235         struct intel_engine_execlists *el = &engine->execlists;
3236
3237         if (READ_ONCE(el->pending[0]) && tasklet_trylock(&el->tasklet)) {
3238                 if (!reset_in_progress(el))
3239                         process_csb(engine);
3240                 tasklet_unlock(&el->tasklet);
3241         }
3242 }
3243
3244 static void execlists_submit_request(struct i915_request *request)
3245 {
3246         struct intel_engine_cs *engine = request->engine;
3247         unsigned long flags;
3248
3249         /* Hopefully we clear execlists->pending[] to let us through */
3250         flush_csb(engine);
3251
3252         /* Will be called from irq-context when using foreign fences. */
3253         spin_lock_irqsave(&engine->active.lock, flags);
3254
3255         if (unlikely(ancestor_on_hold(engine, request))) {
3256                 RQ_TRACE(request, "ancestor on hold\n");
3257                 list_add_tail(&request->sched.link, &engine->active.hold);
3258                 i915_request_set_hold(request);
3259         } else {
3260                 queue_request(engine, request);
3261
3262                 GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
3263                 GEM_BUG_ON(list_empty(&request->sched.link));
3264
3265                 submit_queue(engine, request);
3266         }
3267
3268         spin_unlock_irqrestore(&engine->active.lock, flags);
3269 }
3270
3271 static void __execlists_context_fini(struct intel_context *ce)
3272 {
3273         intel_ring_put(ce->ring);
3274         i915_vma_put(ce->state);
3275 }
3276
3277 static void execlists_context_destroy(struct kref *kref)
3278 {
3279         struct intel_context *ce = container_of(kref, typeof(*ce), ref);
3280
3281         GEM_BUG_ON(!i915_active_is_idle(&ce->active));
3282         GEM_BUG_ON(intel_context_is_pinned(ce));
3283
3284         if (ce->state)
3285                 __execlists_context_fini(ce);
3286
3287         intel_context_fini(ce);
3288         intel_context_free(ce);
3289 }
3290
3291 static void
3292 set_redzone(void *vaddr, const struct intel_engine_cs *engine)
3293 {
3294         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
3295                 return;
3296
3297         vaddr += engine->context_size;
3298
3299         memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
3300 }
3301
3302 static void
3303 check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
3304 {
3305         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
3306                 return;
3307
3308         vaddr += engine->context_size;
3309
3310         if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
3311                 drm_err_once(&engine->i915->drm,
3312                              "%s context redzone overwritten!\n",
3313                              engine->name);
3314 }
3315
3316 static void execlists_context_unpin(struct intel_context *ce)
3317 {
3318         check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
3319                       ce->engine);
3320 }
3321
3322 static void execlists_context_post_unpin(struct intel_context *ce)
3323 {
3324         i915_gem_object_unpin_map(ce->state->obj);
3325 }
3326
3327 static u32 *
3328 gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs)
3329 {
3330         *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
3331                 MI_SRM_LRM_GLOBAL_GTT |
3332                 MI_LRI_LRM_CS_MMIO;
3333         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3334         *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
3335                 CTX_TIMESTAMP * sizeof(u32);
3336         *cs++ = 0;
3337
3338         *cs++ = MI_LOAD_REGISTER_REG |
3339                 MI_LRR_SOURCE_CS_MMIO |
3340                 MI_LRI_LRM_CS_MMIO;
3341         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3342         *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
3343
3344         *cs++ = MI_LOAD_REGISTER_REG |
3345                 MI_LRR_SOURCE_CS_MMIO |
3346                 MI_LRI_LRM_CS_MMIO;
3347         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3348         *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
3349
3350         return cs;
3351 }
3352
3353 static u32 *
3354 gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs)
3355 {
3356         GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1);
3357
3358         *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
3359                 MI_SRM_LRM_GLOBAL_GTT |
3360                 MI_LRI_LRM_CS_MMIO;
3361         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3362         *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
3363                 (lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32);
3364         *cs++ = 0;
3365
3366         return cs;
3367 }
3368
3369 static u32 *
3370 gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
3371 {
3372         GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1);
3373
3374         *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
3375                 MI_SRM_LRM_GLOBAL_GTT |
3376                 MI_LRI_LRM_CS_MMIO;
3377         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3378         *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
3379                 (lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32);
3380         *cs++ = 0;
3381
3382         *cs++ = MI_LOAD_REGISTER_REG |
3383                 MI_LRR_SOURCE_CS_MMIO |
3384                 MI_LRI_LRM_CS_MMIO;
3385         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3386         *cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0));
3387
3388         return cs;
3389 }
3390
3391 static u32 *
3392 gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
3393 {
3394         cs = gen12_emit_timestamp_wa(ce, cs);
3395         cs = gen12_emit_cmd_buf_wa(ce, cs);
3396         cs = gen12_emit_restore_scratch(ce, cs);
3397
3398         return cs;
3399 }
3400
3401 static u32 *
3402 gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
3403 {
3404         cs = gen12_emit_timestamp_wa(ce, cs);
3405         cs = gen12_emit_restore_scratch(ce, cs);
3406
3407         return cs;
3408 }
3409
3410 static inline u32 context_wa_bb_offset(const struct intel_context *ce)
3411 {
3412         return PAGE_SIZE * ce->wa_bb_page;
3413 }
3414
3415 static u32 *context_indirect_bb(const struct intel_context *ce)
3416 {
3417         void *ptr;
3418
3419         GEM_BUG_ON(!ce->wa_bb_page);
3420
3421         ptr = ce->lrc_reg_state;
3422         ptr -= LRC_STATE_OFFSET; /* back to start of context image */
3423         ptr += context_wa_bb_offset(ce);
3424
3425         return ptr;
3426 }
3427
3428 static void
3429 setup_indirect_ctx_bb(const struct intel_context *ce,
3430                       const struct intel_engine_cs *engine,
3431                       u32 *(*emit)(const struct intel_context *, u32 *))
3432 {
3433         u32 * const start = context_indirect_bb(ce);
3434         u32 *cs;
3435
3436         cs = emit(ce, start);
3437         GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
3438         while ((unsigned long)cs % CACHELINE_BYTES)
3439                 *cs++ = MI_NOOP;
3440
3441         lrc_ring_setup_indirect_ctx(ce->lrc_reg_state, engine,
3442                                     i915_ggtt_offset(ce->state) +
3443                                     context_wa_bb_offset(ce),
3444                                     (cs - start) * sizeof(*cs));
3445 }
3446
3447 static void
3448 __execlists_update_reg_state(const struct intel_context *ce,
3449                              const struct intel_engine_cs *engine,
3450                              u32 head)
3451 {
3452         struct intel_ring *ring = ce->ring;
3453         u32 *regs = ce->lrc_reg_state;
3454
3455         GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
3456         GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
3457
3458         regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
3459         regs[CTX_RING_HEAD] = head;
3460         regs[CTX_RING_TAIL] = ring->tail;
3461         regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
3462
3463         /* RPCS */
3464         if (engine->class == RENDER_CLASS) {
3465                 regs[CTX_R_PWR_CLK_STATE] =
3466                         intel_sseu_make_rpcs(engine->gt, &ce->sseu);
3467
3468                 i915_oa_init_reg_state(ce, engine);
3469         }
3470
3471         if (ce->wa_bb_page) {
3472                 u32 *(*fn)(const struct intel_context *ce, u32 *cs);
3473
3474                 fn = gen12_emit_indirect_ctx_xcs;
3475                 if (ce->engine->class == RENDER_CLASS)
3476                         fn = gen12_emit_indirect_ctx_rcs;
3477
3478                 /* Mutually exclusive wrt to global indirect bb */
3479                 GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
3480                 setup_indirect_ctx_bb(ce, engine, fn);
3481         }
3482 }
3483
3484 static int
3485 execlists_context_pre_pin(struct intel_context *ce,
3486                           struct i915_gem_ww_ctx *ww, void **vaddr)
3487 {
3488         GEM_BUG_ON(!ce->state);
3489         GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
3490
3491         *vaddr = i915_gem_object_pin_map(ce->state->obj,
3492                                         i915_coherent_map_type(ce->engine->i915) |
3493                                         I915_MAP_OVERRIDE);
3494
3495         return PTR_ERR_OR_ZERO(*vaddr);
3496 }
3497
3498 static int
3499 __execlists_context_pin(struct intel_context *ce,
3500                         struct intel_engine_cs *engine,
3501                         void *vaddr)
3502 {
3503         ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
3504         ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
3505         __execlists_update_reg_state(ce, engine, ce->ring->tail);
3506
3507         return 0;
3508 }
3509
3510 static int execlists_context_pin(struct intel_context *ce, void *vaddr)
3511 {
3512         return __execlists_context_pin(ce, ce->engine, vaddr);
3513 }
3514
3515 static int execlists_context_alloc(struct intel_context *ce)
3516 {
3517         return __execlists_context_alloc(ce, ce->engine);
3518 }
3519
3520 static void execlists_context_reset(struct intel_context *ce)
3521 {
3522         CE_TRACE(ce, "reset\n");
3523         GEM_BUG_ON(!intel_context_is_pinned(ce));
3524
3525         intel_ring_reset(ce->ring, ce->ring->emit);
3526
3527         /* Scrub away the garbage */
3528         execlists_init_reg_state(ce->lrc_reg_state,
3529                                  ce, ce->engine, ce->ring, true);
3530         __execlists_update_reg_state(ce, ce->engine, ce->ring->tail);
3531
3532         ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
3533 }
3534
3535 static const struct intel_context_ops execlists_context_ops = {
3536         .alloc = execlists_context_alloc,
3537
3538         .pre_pin = execlists_context_pre_pin,
3539         .pin = execlists_context_pin,
3540         .unpin = execlists_context_unpin,
3541         .post_unpin = execlists_context_post_unpin,
3542
3543         .enter = intel_context_enter_engine,
3544         .exit = intel_context_exit_engine,
3545
3546         .reset = execlists_context_reset,
3547         .destroy = execlists_context_destroy,
3548 };
3549
3550 static int gen8_emit_init_breadcrumb(struct i915_request *rq)
3551 {
3552         u32 *cs;
3553
3554         GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq));
3555         if (!i915_request_timeline(rq)->has_initial_breadcrumb)
3556                 return 0;
3557
3558         cs = intel_ring_begin(rq, 6);
3559         if (IS_ERR(cs))
3560                 return PTR_ERR(cs);
3561
3562         /*
3563          * Check if we have been preempted before we even get started.
3564          *
3565          * After this point i915_request_started() reports true, even if
3566          * we get preempted and so are no longer running.
3567          */
3568         *cs++ = MI_ARB_CHECK;
3569         *cs++ = MI_NOOP;
3570
3571         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3572         *cs++ = i915_request_timeline(rq)->hwsp_offset;
3573         *cs++ = 0;
3574         *cs++ = rq->fence.seqno - 1;
3575
3576         intel_ring_advance(rq, cs);
3577
3578         /* Record the updated position of the request's payload */
3579         rq->infix = intel_ring_offset(rq, cs);
3580
3581         __set_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags);
3582
3583         return 0;
3584 }
3585
3586 static int emit_pdps(struct i915_request *rq)
3587 {
3588         const struct intel_engine_cs * const engine = rq->engine;
3589         struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->context->vm);
3590         int err, i;
3591         u32 *cs;
3592
3593         GEM_BUG_ON(intel_vgpu_active(rq->engine->i915));
3594
3595         /*
3596          * Beware ye of the dragons, this sequence is magic!
3597          *
3598          * Small changes to this sequence can cause anything from
3599          * GPU hangs to forcewake errors and machine lockups!
3600          */
3601
3602         /* Flush any residual operations from the context load */
3603         err = engine->emit_flush(rq, EMIT_FLUSH);
3604         if (err)
3605                 return err;
3606
3607         /* Magic required to prevent forcewake errors! */
3608         err = engine->emit_flush(rq, EMIT_INVALIDATE);
3609         if (err)
3610                 return err;
3611
3612         cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
3613         if (IS_ERR(cs))
3614                 return PTR_ERR(cs);
3615
3616         /* Ensure the LRI have landed before we invalidate & continue */
3617         *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
3618         for (i = GEN8_3LVL_PDPES; i--; ) {
3619                 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
3620                 u32 base = engine->mmio_base;
3621
3622                 *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
3623                 *cs++ = upper_32_bits(pd_daddr);
3624                 *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
3625                 *cs++ = lower_32_bits(pd_daddr);
3626         }
3627         *cs++ = MI_NOOP;
3628
3629         intel_ring_advance(rq, cs);
3630
3631         return 0;
3632 }
3633
3634 static int execlists_request_alloc(struct i915_request *request)
3635 {
3636         int ret;
3637
3638         GEM_BUG_ON(!intel_context_is_pinned(request->context));
3639
3640         /*
3641          * Flush enough space to reduce the likelihood of waiting after
3642          * we start building the request - in which case we will just
3643          * have to repeat work.
3644          */
3645         request->reserved_space += EXECLISTS_REQUEST_SIZE;
3646
3647         /*
3648          * Note that after this point, we have committed to using
3649          * this request as it is being used to both track the
3650          * state of engine initialisation and liveness of the
3651          * golden renderstate above. Think twice before you try
3652          * to cancel/unwind this request now.
3653          */
3654
3655         if (!i915_vm_is_4lvl(request->context->vm)) {
3656                 ret = emit_pdps(request);
3657                 if (ret)
3658                         return ret;
3659         }
3660
3661         /* Unconditionally invalidate GPU caches and TLBs. */
3662         ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
3663         if (ret)
3664                 return ret;
3665
3666         request->reserved_space -= EXECLISTS_REQUEST_SIZE;
3667         return 0;
3668 }
3669
3670 /*
3671  * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
3672  * PIPE_CONTROL instruction. This is required for the flush to happen correctly
3673  * but there is a slight complication as this is applied in WA batch where the
3674  * values are only initialized once so we cannot take register value at the
3675  * beginning and reuse it further; hence we save its value to memory, upload a
3676  * constant value with bit21 set and then we restore it back with the saved value.
3677  * To simplify the WA, a constant value is formed by using the default value
3678  * of this register. This shouldn't be a problem because we are only modifying
3679  * it for a short period and this batch in non-premptible. We can ofcourse
3680  * use additional instructions that read the actual value of the register
3681  * at that time and set our bit of interest but it makes the WA complicated.
3682  *
3683  * This WA is also required for Gen9 so extracting as a function avoids
3684  * code duplication.
3685  */
3686 static u32 *
3687 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
3688 {
3689         /* NB no one else is allowed to scribble over scratch + 256! */
3690         *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
3691         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3692         *batch++ = intel_gt_scratch_offset(engine->gt,
3693                                            INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3694         *batch++ = 0;
3695
3696         *batch++ = MI_LOAD_REGISTER_IMM(1);
3697         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3698         *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
3699
3700         batch = gen8_emit_pipe_control(batch,
3701                                        PIPE_CONTROL_CS_STALL |
3702                                        PIPE_CONTROL_DC_FLUSH_ENABLE,
3703                                        0);
3704
3705         *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
3706         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3707         *batch++ = intel_gt_scratch_offset(engine->gt,
3708                                            INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3709         *batch++ = 0;
3710
3711         return batch;
3712 }
3713
3714 /*
3715  * Typically we only have one indirect_ctx and per_ctx batch buffer which are
3716  * initialized at the beginning and shared across all contexts but this field
3717  * helps us to have multiple batches at different offsets and select them based
3718  * on a criteria. At the moment this batch always start at the beginning of the page
3719  * and at this point we don't have multiple wa_ctx batch buffers.
3720  *
3721  * The number of WA applied are not known at the beginning; we use this field
3722  * to return the no of DWORDS written.
3723  *
3724  * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
3725  * so it adds NOOPs as padding to make it cacheline aligned.
3726  * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
3727  * makes a complete batch buffer.
3728  */
3729 static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3730 {
3731         /* WaDisableCtxRestoreArbitration:bdw,chv */
3732         *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3733
3734         /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
3735         if (IS_BROADWELL(engine->i915))
3736                 batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3737
3738         /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
3739         /* Actual scratch location is at 128 bytes offset */
3740         batch = gen8_emit_pipe_control(batch,
3741                                        PIPE_CONTROL_FLUSH_L3 |
3742                                        PIPE_CONTROL_STORE_DATA_INDEX |
3743                                        PIPE_CONTROL_CS_STALL |
3744                                        PIPE_CONTROL_QW_WRITE,
3745                                        LRC_PPHWSP_SCRATCH_ADDR);
3746
3747         *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3748
3749         /* Pad to end of cacheline */
3750         while ((unsigned long)batch % CACHELINE_BYTES)
3751                 *batch++ = MI_NOOP;
3752
3753         /*
3754          * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
3755          * execution depends on the length specified in terms of cache lines
3756          * in the register CTX_RCS_INDIRECT_CTX
3757          */
3758
3759         return batch;
3760 }
3761
3762 struct lri {
3763         i915_reg_t reg;
3764         u32 value;
3765 };
3766
3767 static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
3768 {
3769         GEM_BUG_ON(!count || count > 63);
3770
3771         *batch++ = MI_LOAD_REGISTER_IMM(count);
3772         do {
3773                 *batch++ = i915_mmio_reg_offset(lri->reg);
3774                 *batch++ = lri->value;
3775         } while (lri++, --count);
3776         *batch++ = MI_NOOP;
3777
3778         return batch;
3779 }
3780
3781 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3782 {
3783         static const struct lri lri[] = {
3784                 /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
3785                 {
3786                         COMMON_SLICE_CHICKEN2,
3787                         __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
3788                                        0),
3789                 },
3790
3791                 /* BSpec: 11391 */
3792                 {
3793                         FF_SLICE_CHICKEN,
3794                         __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
3795                                        FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
3796                 },
3797
3798                 /* BSpec: 11299 */
3799                 {
3800                         _3D_CHICKEN3,
3801                         __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
3802                                        _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
3803                 }
3804         };
3805
3806         *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3807
3808         /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
3809         batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3810
3811         /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
3812         batch = gen8_emit_pipe_control(batch,
3813                                        PIPE_CONTROL_FLUSH_L3 |
3814                                        PIPE_CONTROL_STORE_DATA_INDEX |
3815                                        PIPE_CONTROL_CS_STALL |
3816                                        PIPE_CONTROL_QW_WRITE,
3817                                        LRC_PPHWSP_SCRATCH_ADDR);
3818
3819         batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
3820
3821         /* WaMediaPoolStateCmdInWABB:bxt,glk */
3822         if (HAS_POOLED_EU(engine->i915)) {
3823                 /*
3824                  * EU pool configuration is setup along with golden context
3825                  * during context initialization. This value depends on
3826                  * device type (2x6 or 3x6) and needs to be updated based
3827                  * on which subslice is disabled especially for 2x6
3828                  * devices, however it is safe to load default
3829                  * configuration of 3x6 device instead of masking off
3830                  * corresponding bits because HW ignores bits of a disabled
3831                  * subslice and drops down to appropriate config. Please
3832                  * see render_state_setup() in i915_gem_render_state.c for
3833                  * possible configurations, to avoid duplication they are
3834                  * not shown here again.
3835                  */
3836                 *batch++ = GEN9_MEDIA_POOL_STATE;
3837                 *batch++ = GEN9_MEDIA_POOL_ENABLE;
3838                 *batch++ = 0x00777000;
3839                 *batch++ = 0;
3840                 *batch++ = 0;
3841                 *batch++ = 0;
3842         }
3843
3844         *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3845
3846         /* Pad to end of cacheline */
3847         while ((unsigned long)batch % CACHELINE_BYTES)
3848                 *batch++ = MI_NOOP;
3849
3850         return batch;
3851 }
3852
3853 static u32 *
3854 gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3855 {
3856         int i;
3857
3858         /*
3859          * WaPipeControlBefore3DStateSamplePattern: cnl
3860          *
3861          * Ensure the engine is idle prior to programming a
3862          * 3DSTATE_SAMPLE_PATTERN during a context restore.
3863          */
3864         batch = gen8_emit_pipe_control(batch,
3865                                        PIPE_CONTROL_CS_STALL,
3866                                        0);
3867         /*
3868          * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
3869          * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
3870          * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
3871          * confusing. Since gen8_emit_pipe_control() already advances the
3872          * batch by 6 dwords, we advance the other 10 here, completing a
3873          * cacheline. It's not clear if the workaround requires this padding
3874          * before other commands, or if it's just the regular padding we would
3875          * already have for the workaround bb, so leave it here for now.
3876          */
3877         for (i = 0; i < 10; i++)
3878                 *batch++ = MI_NOOP;
3879
3880         /* Pad to end of cacheline */
3881         while ((unsigned long)batch % CACHELINE_BYTES)
3882                 *batch++ = MI_NOOP;
3883
3884         return batch;
3885 }
3886
3887 #define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
3888
3889 static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
3890 {
3891         struct drm_i915_gem_object *obj;
3892         struct i915_vma *vma;
3893         int err;
3894
3895         obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE);
3896         if (IS_ERR(obj))
3897                 return PTR_ERR(obj);
3898
3899         vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
3900         if (IS_ERR(vma)) {
3901                 err = PTR_ERR(vma);
3902                 goto err;
3903         }
3904
3905         err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
3906         if (err)
3907                 goto err;
3908
3909         engine->wa_ctx.vma = vma;
3910         return 0;
3911
3912 err:
3913         i915_gem_object_put(obj);
3914         return err;
3915 }
3916
3917 static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
3918 {
3919         i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
3920 }
3921
3922 typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
3923
3924 static int intel_init_workaround_bb(struct intel_engine_cs *engine)
3925 {
3926         struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
3927         struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
3928                                             &wa_ctx->per_ctx };
3929         wa_bb_func_t wa_bb_fn[2];
3930         void *batch, *batch_ptr;
3931         unsigned int i;
3932         int ret;
3933
3934         if (engine->class != RENDER_CLASS)
3935                 return 0;
3936
3937         switch (INTEL_GEN(engine->i915)) {
3938         case 12:
3939         case 11:
3940                 return 0;
3941         case 10:
3942                 wa_bb_fn[0] = gen10_init_indirectctx_bb;
3943                 wa_bb_fn[1] = NULL;
3944                 break;
3945         case 9:
3946                 wa_bb_fn[0] = gen9_init_indirectctx_bb;
3947                 wa_bb_fn[1] = NULL;
3948                 break;
3949         case 8:
3950                 wa_bb_fn[0] = gen8_init_indirectctx_bb;
3951                 wa_bb_fn[1] = NULL;
3952                 break;
3953         default:
3954                 MISSING_CASE(INTEL_GEN(engine->i915));
3955                 return 0;
3956         }
3957
3958         ret = lrc_setup_wa_ctx(engine);
3959         if (ret) {
3960                 drm_dbg(&engine->i915->drm,
3961                         "Failed to setup context WA page: %d\n", ret);
3962                 return ret;
3963         }
3964
3965         batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB);
3966
3967         /*
3968          * Emit the two workaround batch buffers, recording the offset from the
3969          * start of the workaround batch buffer object for each and their
3970          * respective sizes.
3971          */
3972         batch_ptr = batch;
3973         for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
3974                 wa_bb[i]->offset = batch_ptr - batch;
3975                 if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
3976                                                   CACHELINE_BYTES))) {
3977                         ret = -EINVAL;
3978                         break;
3979                 }
3980                 if (wa_bb_fn[i])
3981                         batch_ptr = wa_bb_fn[i](engine, batch_ptr);
3982                 wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
3983         }
3984         GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE);
3985
3986         __i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch);
3987         __i915_gem_object_release_map(wa_ctx->vma->obj);
3988         if (ret)
3989                 lrc_destroy_wa_ctx(engine);
3990
3991         return ret;
3992 }
3993
3994 static void reset_csb_pointers(struct intel_engine_cs *engine)
3995 {
3996         struct intel_engine_execlists * const execlists = &engine->execlists;
3997         const unsigned int reset_value = execlists->csb_size - 1;
3998
3999         ring_set_paused(engine, 0);
4000
4001         /*
4002          * Sometimes Icelake forgets to reset its pointers on a GPU reset.
4003          * Bludgeon them with a mmio update to be sure.
4004          */
4005         ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
4006                      0xffff << 16 | reset_value << 8 | reset_value);
4007         ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
4008
4009         /*
4010          * After a reset, the HW starts writing into CSB entry [0]. We
4011          * therefore have to set our HEAD pointer back one entry so that
4012          * the *first* entry we check is entry 0. To complicate this further,
4013          * as we don't wait for the first interrupt after reset, we have to
4014          * fake the HW write to point back to the last entry so that our
4015          * inline comparison of our cached head position against the last HW
4016          * write works even before the first interrupt.
4017          */
4018         execlists->csb_head = reset_value;
4019         WRITE_ONCE(*execlists->csb_write, reset_value);
4020         wmb(); /* Make sure this is visible to HW (paranoia?) */
4021
4022         /* Check that the GPU does indeed update the CSB entries! */
4023         memset(execlists->csb_status, -1, (reset_value + 1) * sizeof(u64));
4024         invalidate_csb_entries(&execlists->csb_status[0],
4025                                &execlists->csb_status[reset_value]);
4026
4027         /* Once more for luck and our trusty paranoia */
4028         ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
4029                      0xffff << 16 | reset_value << 8 | reset_value);
4030         ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
4031
4032         GEM_BUG_ON(READ_ONCE(*execlists->csb_write) != reset_value);
4033 }
4034
4035 static void execlists_sanitize(struct intel_engine_cs *engine)
4036 {
4037         /*
4038          * Poison residual state on resume, in case the suspend didn't!
4039          *
4040          * We have to assume that across suspend/resume (or other loss
4041          * of control) that the contents of our pinned buffers has been
4042          * lost, replaced by garbage. Since this doesn't always happen,
4043          * let's poison such state so that we more quickly spot when
4044          * we falsely assume it has been preserved.
4045          */
4046         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
4047                 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
4048
4049         reset_csb_pointers(engine);
4050
4051         /*
4052          * The kernel_context HWSP is stored in the status_page. As above,
4053          * that may be lost on resume/initialisation, and so we need to
4054          * reset the value in the HWSP.
4055          */
4056         intel_timeline_reset_seqno(engine->kernel_context->timeline);
4057
4058         /* And scrub the dirty cachelines for the HWSP */
4059         clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
4060 }
4061
4062 static void enable_error_interrupt(struct intel_engine_cs *engine)
4063 {
4064         u32 status;
4065
4066         engine->execlists.error_interrupt = 0;
4067         ENGINE_WRITE(engine, RING_EMR, ~0u);
4068         ENGINE_WRITE(engine, RING_EIR, ~0u); /* clear all existing errors */
4069
4070         status = ENGINE_READ(engine, RING_ESR);
4071         if (unlikely(status)) {
4072                 drm_err(&engine->i915->drm,
4073                         "engine '%s' resumed still in error: %08x\n",
4074                         engine->name, status);
4075                 __intel_gt_reset(engine->gt, engine->mask);
4076         }
4077
4078         /*
4079          * On current gen8+, we have 2 signals to play with
4080          *
4081          * - I915_ERROR_INSTUCTION (bit 0)
4082          *
4083          *    Generate an error if the command parser encounters an invalid
4084          *    instruction
4085          *
4086          *    This is a fatal error.
4087          *
4088          * - CP_PRIV (bit 2)
4089          *
4090          *    Generate an error on privilege violation (where the CP replaces
4091          *    the instruction with a no-op). This also fires for writes into
4092          *    read-only scratch pages.
4093          *
4094          *    This is a non-fatal error, parsing continues.
4095          *
4096          * * there are a few others defined for odd HW that we do not use
4097          *
4098          * Since CP_PRIV fires for cases where we have chosen to ignore the
4099          * error (as the HW is validating and suppressing the mistakes), we
4100          * only unmask the instruction error bit.
4101          */
4102         ENGINE_WRITE(engine, RING_EMR, ~I915_ERROR_INSTRUCTION);
4103 }
4104
4105 static void enable_execlists(struct intel_engine_cs *engine)
4106 {
4107         u32 mode;
4108
4109         assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
4110
4111         intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
4112
4113         if (INTEL_GEN(engine->i915) >= 11)
4114                 mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE);
4115         else
4116                 mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE);
4117         ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode);
4118
4119         ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
4120
4121         ENGINE_WRITE_FW(engine,
4122                         RING_HWS_PGA,
4123                         i915_ggtt_offset(engine->status_page.vma));
4124         ENGINE_POSTING_READ(engine, RING_HWS_PGA);
4125
4126         enable_error_interrupt(engine);
4127
4128         engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0);
4129 }
4130
4131 static bool unexpected_starting_state(struct intel_engine_cs *engine)
4132 {
4133         bool unexpected = false;
4134
4135         if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) {
4136                 drm_dbg(&engine->i915->drm,
4137                         "STOP_RING still set in RING_MI_MODE\n");
4138                 unexpected = true;
4139         }
4140
4141         return unexpected;
4142 }
4143
4144 static int execlists_resume(struct intel_engine_cs *engine)
4145 {
4146         intel_mocs_init_engine(engine);
4147
4148         intel_breadcrumbs_reset(engine->breadcrumbs);
4149
4150         if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) {
4151                 struct drm_printer p = drm_debug_printer(__func__);
4152
4153                 intel_engine_dump(engine, &p, NULL);
4154         }
4155
4156         enable_execlists(engine);
4157
4158         return 0;
4159 }
4160
4161 static void execlists_reset_prepare(struct intel_engine_cs *engine)
4162 {
4163         struct intel_engine_execlists * const execlists = &engine->execlists;
4164         unsigned long flags;
4165
4166         ENGINE_TRACE(engine, "depth<-%d\n",
4167                      atomic_read(&execlists->tasklet.count));
4168
4169         /*
4170          * Prevent request submission to the hardware until we have
4171          * completed the reset in i915_gem_reset_finish(). If a request
4172          * is completed by one engine, it may then queue a request
4173          * to a second via its execlists->tasklet *just* as we are
4174          * calling engine->resume() and also writing the ELSP.
4175          * Turning off the execlists->tasklet until the reset is over
4176          * prevents the race.
4177          */
4178         __tasklet_disable_sync_once(&execlists->tasklet);
4179         GEM_BUG_ON(!reset_in_progress(execlists));
4180
4181         /* And flush any current direct submission. */
4182         spin_lock_irqsave(&engine->active.lock, flags);
4183         spin_unlock_irqrestore(&engine->active.lock, flags);
4184
4185         /*
4186          * We stop engines, otherwise we might get failed reset and a
4187          * dead gpu (on elk). Also as modern gpu as kbl can suffer
4188          * from system hang if batchbuffer is progressing when
4189          * the reset is issued, regardless of READY_TO_RESET ack.
4190          * Thus assume it is best to stop engines on all gens
4191          * where we have a gpu reset.
4192          *
4193          * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
4194          *
4195          * FIXME: Wa for more modern gens needs to be validated
4196          */
4197         ring_set_paused(engine, 1);
4198         intel_engine_stop_cs(engine);
4199
4200         engine->execlists.reset_ccid = active_ccid(engine);
4201 }
4202
4203 static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
4204 {
4205         int x;
4206
4207         x = lrc_ring_mi_mode(engine);
4208         if (x != -1) {
4209                 regs[x + 1] &= ~STOP_RING;
4210                 regs[x + 1] |= STOP_RING << 16;
4211         }
4212 }
4213
4214 static void __execlists_reset_reg_state(const struct intel_context *ce,
4215                                         const struct intel_engine_cs *engine)
4216 {
4217         u32 *regs = ce->lrc_reg_state;
4218
4219         __reset_stop_ring(regs, engine);
4220 }
4221
4222 static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
4223 {
4224         struct intel_engine_execlists * const execlists = &engine->execlists;
4225         struct intel_context *ce;
4226         struct i915_request *rq;
4227         u32 head;
4228
4229         mb(); /* paranoia: read the CSB pointers from after the reset */
4230         clflush(execlists->csb_write);
4231         mb();
4232
4233         process_csb(engine); /* drain preemption events */
4234
4235         /* Following the reset, we need to reload the CSB read/write pointers */
4236         reset_csb_pointers(engine);
4237
4238         /*
4239          * Save the currently executing context, even if we completed
4240          * its request, it was still running at the time of the
4241          * reset and will have been clobbered.
4242          */
4243         rq = active_context(engine, engine->execlists.reset_ccid);
4244         if (!rq)
4245                 goto unwind;
4246
4247         ce = rq->context;
4248         GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
4249
4250         if (i915_request_completed(rq)) {
4251                 /* Idle context; tidy up the ring so we can restart afresh */
4252                 head = intel_ring_wrap(ce->ring, rq->tail);
4253                 goto out_replay;
4254         }
4255
4256         /* We still have requests in-flight; the engine should be active */
4257         GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
4258
4259         /* Context has requests still in-flight; it should not be idle! */
4260         GEM_BUG_ON(i915_active_is_idle(&ce->active));
4261
4262         rq = active_request(ce->timeline, rq);
4263         head = intel_ring_wrap(ce->ring, rq->head);
4264         GEM_BUG_ON(head == ce->ring->tail);
4265
4266         /*
4267          * If this request hasn't started yet, e.g. it is waiting on a
4268          * semaphore, we need to avoid skipping the request or else we
4269          * break the signaling chain. However, if the context is corrupt
4270          * the request will not restart and we will be stuck with a wedged
4271          * device. It is quite often the case that if we issue a reset
4272          * while the GPU is loading the context image, that the context
4273          * image becomes corrupt.
4274          *
4275          * Otherwise, if we have not started yet, the request should replay
4276          * perfectly and we do not need to flag the result as being erroneous.
4277          */
4278         if (!i915_request_started(rq))
4279                 goto out_replay;
4280
4281         /*
4282          * If the request was innocent, we leave the request in the ELSP
4283          * and will try to replay it on restarting. The context image may
4284          * have been corrupted by the reset, in which case we may have
4285          * to service a new GPU hang, but more likely we can continue on
4286          * without impact.
4287          *
4288          * If the request was guilty, we presume the context is corrupt
4289          * and have to at least restore the RING register in the context
4290          * image back to the expected values to skip over the guilty request.
4291          */
4292         __i915_request_reset(rq, stalled);
4293
4294         /*
4295          * We want a simple context + ring to execute the breadcrumb update.
4296          * We cannot rely on the context being intact across the GPU hang,
4297          * so clear it and rebuild just what we need for the breadcrumb.
4298          * All pending requests for this context will be zapped, and any
4299          * future request will be after userspace has had the opportunity
4300          * to recreate its own state.
4301          */
4302 out_replay:
4303         ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
4304                      head, ce->ring->tail);
4305         __execlists_reset_reg_state(ce, engine);
4306         __execlists_update_reg_state(ce, engine, head);
4307         ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
4308
4309 unwind:
4310         /* Push back any incomplete requests for replay after the reset. */
4311         cancel_port_requests(execlists);
4312         __unwind_incomplete_requests(engine);
4313 }
4314
4315 static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
4316 {
4317         unsigned long flags;
4318
4319         ENGINE_TRACE(engine, "\n");
4320
4321         spin_lock_irqsave(&engine->active.lock, flags);
4322
4323         __execlists_reset(engine, stalled);
4324
4325         spin_unlock_irqrestore(&engine->active.lock, flags);
4326 }
4327
4328 static void nop_submission_tasklet(unsigned long data)
4329 {
4330         struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
4331
4332         /* The driver is wedged; don't process any more events. */
4333         WRITE_ONCE(engine->execlists.queue_priority_hint, INT_MIN);
4334 }
4335
4336 static void execlists_reset_cancel(struct intel_engine_cs *engine)
4337 {
4338         struct intel_engine_execlists * const execlists = &engine->execlists;
4339         struct i915_request *rq, *rn;
4340         struct rb_node *rb;
4341         unsigned long flags;
4342
4343         ENGINE_TRACE(engine, "\n");
4344
4345         /*
4346          * Before we call engine->cancel_requests(), we should have exclusive
4347          * access to the submission state. This is arranged for us by the
4348          * caller disabling the interrupt generation, the tasklet and other
4349          * threads that may then access the same state, giving us a free hand
4350          * to reset state. However, we still need to let lockdep be aware that
4351          * we know this state may be accessed in hardirq context, so we
4352          * disable the irq around this manipulation and we want to keep
4353          * the spinlock focused on its duties and not accidentally conflate
4354          * coverage to the submission's irq state. (Similarly, although we
4355          * shouldn't need to disable irq around the manipulation of the
4356          * submission's irq state, we also wish to remind ourselves that
4357          * it is irq state.)
4358          */
4359         spin_lock_irqsave(&engine->active.lock, flags);
4360
4361         __execlists_reset(engine, true);
4362
4363         /* Mark all executing requests as skipped. */
4364         list_for_each_entry(rq, &engine->active.requests, sched.link)
4365                 mark_eio(rq);
4366
4367         /* Flush the queued requests to the timeline list (for retiring). */
4368         while ((rb = rb_first_cached(&execlists->queue))) {
4369                 struct i915_priolist *p = to_priolist(rb);
4370                 int i;
4371
4372                 priolist_for_each_request_consume(rq, rn, p, i) {
4373                         mark_eio(rq);
4374                         __i915_request_submit(rq);
4375                 }
4376
4377                 rb_erase_cached(&p->node, &execlists->queue);
4378                 i915_priolist_free(p);
4379         }
4380
4381         /* On-hold requests will be flushed to timeline upon their release */
4382         list_for_each_entry(rq, &engine->active.hold, sched.link)
4383                 mark_eio(rq);
4384
4385         /* Cancel all attached virtual engines */
4386         while ((rb = rb_first_cached(&execlists->virtual))) {
4387                 struct virtual_engine *ve =
4388                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
4389
4390                 rb_erase_cached(rb, &execlists->virtual);
4391                 RB_CLEAR_NODE(rb);
4392
4393                 spin_lock(&ve->base.active.lock);
4394                 rq = fetch_and_zero(&ve->request);
4395                 if (rq) {
4396                         mark_eio(rq);
4397
4398                         rq->engine = engine;
4399                         __i915_request_submit(rq);
4400                         i915_request_put(rq);
4401
4402                         ve->base.execlists.queue_priority_hint = INT_MIN;
4403                 }
4404                 spin_unlock(&ve->base.active.lock);
4405         }
4406
4407         /* Remaining _unready_ requests will be nop'ed when submitted */
4408
4409         execlists->queue_priority_hint = INT_MIN;
4410         execlists->queue = RB_ROOT_CACHED;
4411
4412         GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
4413         execlists->tasklet.func = nop_submission_tasklet;
4414
4415         spin_unlock_irqrestore(&engine->active.lock, flags);
4416 }
4417
4418 static void execlists_reset_finish(struct intel_engine_cs *engine)
4419 {
4420         struct intel_engine_execlists * const execlists = &engine->execlists;
4421
4422         /*
4423          * After a GPU reset, we may have requests to replay. Do so now while
4424          * we still have the forcewake to be sure that the GPU is not allowed
4425          * to sleep before we restart and reload a context.
4426          */
4427         GEM_BUG_ON(!reset_in_progress(execlists));
4428         if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
4429                 execlists->tasklet.func(execlists->tasklet.data);
4430
4431         if (__tasklet_enable(&execlists->tasklet))
4432                 /* And kick in case we missed a new request submission. */
4433                 tasklet_hi_schedule(&execlists->tasklet);
4434         ENGINE_TRACE(engine, "depth->%d\n",
4435                      atomic_read(&execlists->tasklet.count));
4436 }
4437
4438 static int gen8_emit_bb_start_noarb(struct i915_request *rq,
4439                                     u64 offset, u32 len,
4440                                     const unsigned int flags)
4441 {
4442         u32 *cs;
4443
4444         cs = intel_ring_begin(rq, 4);
4445         if (IS_ERR(cs))
4446                 return PTR_ERR(cs);
4447
4448         /*
4449          * WaDisableCtxRestoreArbitration:bdw,chv
4450          *
4451          * We don't need to perform MI_ARB_ENABLE as often as we do (in
4452          * particular all the gen that do not need the w/a at all!), if we
4453          * took care to make sure that on every switch into this context
4454          * (both ordinary and for preemption) that arbitrartion was enabled
4455          * we would be fine.  However, for gen8 there is another w/a that
4456          * requires us to not preempt inside GPGPU execution, so we keep
4457          * arbitration disabled for gen8 batches. Arbitration will be
4458          * re-enabled before we close the request
4459          * (engine->emit_fini_breadcrumb).
4460          */
4461         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
4462
4463         /* FIXME(BDW+): Address space and security selectors. */
4464         *cs++ = MI_BATCH_BUFFER_START_GEN8 |
4465                 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
4466         *cs++ = lower_32_bits(offset);
4467         *cs++ = upper_32_bits(offset);
4468
4469         intel_ring_advance(rq, cs);
4470
4471         return 0;
4472 }
4473
4474 static int gen8_emit_bb_start(struct i915_request *rq,
4475                               u64 offset, u32 len,
4476                               const unsigned int flags)
4477 {
4478         u32 *cs;
4479
4480         cs = intel_ring_begin(rq, 6);
4481         if (IS_ERR(cs))
4482                 return PTR_ERR(cs);
4483
4484         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4485
4486         *cs++ = MI_BATCH_BUFFER_START_GEN8 |
4487                 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
4488         *cs++ = lower_32_bits(offset);
4489         *cs++ = upper_32_bits(offset);
4490
4491         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
4492         *cs++ = MI_NOOP;
4493
4494         intel_ring_advance(rq, cs);
4495
4496         return 0;
4497 }
4498
4499 static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine)
4500 {
4501         ENGINE_WRITE(engine, RING_IMR,
4502                      ~(engine->irq_enable_mask | engine->irq_keep_mask));
4503         ENGINE_POSTING_READ(engine, RING_IMR);
4504 }
4505
4506 static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
4507 {
4508         ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
4509 }
4510
4511 static int gen8_emit_flush(struct i915_request *request, u32 mode)
4512 {
4513         u32 cmd, *cs;
4514
4515         cs = intel_ring_begin(request, 4);
4516         if (IS_ERR(cs))
4517                 return PTR_ERR(cs);
4518
4519         cmd = MI_FLUSH_DW + 1;
4520
4521         /* We always require a command barrier so that subsequent
4522          * commands, such as breadcrumb interrupts, are strictly ordered
4523          * wrt the contents of the write cache being flushed to memory
4524          * (and thus being coherent from the CPU).
4525          */
4526         cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
4527
4528         if (mode & EMIT_INVALIDATE) {
4529                 cmd |= MI_INVALIDATE_TLB;
4530                 if (request->engine->class == VIDEO_DECODE_CLASS)
4531                         cmd |= MI_INVALIDATE_BSD;
4532         }
4533
4534         *cs++ = cmd;
4535         *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
4536         *cs++ = 0; /* upper addr */
4537         *cs++ = 0; /* value */
4538         intel_ring_advance(request, cs);
4539
4540         return 0;
4541 }
4542
4543 static int gen8_emit_flush_render(struct i915_request *request,
4544                                   u32 mode)
4545 {
4546         bool vf_flush_wa = false, dc_flush_wa = false;
4547         u32 *cs, flags = 0;
4548         int len;
4549
4550         flags |= PIPE_CONTROL_CS_STALL;
4551
4552         if (mode & EMIT_FLUSH) {
4553                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
4554                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4555                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
4556                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
4557         }
4558
4559         if (mode & EMIT_INVALIDATE) {
4560                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
4561                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4562                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4563                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4564                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4565                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4566                 flags |= PIPE_CONTROL_QW_WRITE;
4567                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4568
4569                 /*
4570                  * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
4571                  * pipe control.
4572                  */
4573                 if (IS_GEN(request->engine->i915, 9))
4574                         vf_flush_wa = true;
4575
4576                 /* WaForGAMHang:kbl */
4577                 if (IS_KBL_GT_REVID(request->engine->i915, 0, KBL_REVID_B0))
4578                         dc_flush_wa = true;
4579         }
4580
4581         len = 6;
4582
4583         if (vf_flush_wa)
4584                 len += 6;
4585
4586         if (dc_flush_wa)
4587                 len += 12;
4588
4589         cs = intel_ring_begin(request, len);
4590         if (IS_ERR(cs))
4591                 return PTR_ERR(cs);
4592
4593         if (vf_flush_wa)
4594                 cs = gen8_emit_pipe_control(cs, 0, 0);
4595
4596         if (dc_flush_wa)
4597                 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
4598                                             0);
4599
4600         cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4601
4602         if (dc_flush_wa)
4603                 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
4604
4605         intel_ring_advance(request, cs);
4606
4607         return 0;
4608 }
4609
4610 static int gen11_emit_flush_render(struct i915_request *request,
4611                                    u32 mode)
4612 {
4613         if (mode & EMIT_FLUSH) {
4614                 u32 *cs;
4615                 u32 flags = 0;
4616
4617                 flags |= PIPE_CONTROL_CS_STALL;
4618
4619                 flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
4620                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
4621                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4622                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
4623                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
4624                 flags |= PIPE_CONTROL_QW_WRITE;
4625                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4626
4627                 cs = intel_ring_begin(request, 6);
4628                 if (IS_ERR(cs))
4629                         return PTR_ERR(cs);
4630
4631                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4632                 intel_ring_advance(request, cs);
4633         }
4634
4635         if (mode & EMIT_INVALIDATE) {
4636                 u32 *cs;
4637                 u32 flags = 0;
4638
4639                 flags |= PIPE_CONTROL_CS_STALL;
4640
4641                 flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
4642                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
4643                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4644                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4645                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4646                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4647                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4648                 flags |= PIPE_CONTROL_QW_WRITE;
4649                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4650
4651                 cs = intel_ring_begin(request, 6);
4652                 if (IS_ERR(cs))
4653                         return PTR_ERR(cs);
4654
4655                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4656                 intel_ring_advance(request, cs);
4657         }
4658
4659         return 0;
4660 }
4661
4662 static u32 preparser_disable(bool state)
4663 {
4664         return MI_ARB_CHECK | 1 << 8 | state;
4665 }
4666
4667 static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine)
4668 {
4669         static const i915_reg_t vd[] = {
4670                 GEN12_VD0_AUX_NV,
4671                 GEN12_VD1_AUX_NV,
4672                 GEN12_VD2_AUX_NV,
4673                 GEN12_VD3_AUX_NV,
4674         };
4675
4676         static const i915_reg_t ve[] = {
4677                 GEN12_VE0_AUX_NV,
4678                 GEN12_VE1_AUX_NV,
4679         };
4680
4681         if (engine->class == VIDEO_DECODE_CLASS)
4682                 return vd[engine->instance];
4683
4684         if (engine->class == VIDEO_ENHANCEMENT_CLASS)
4685                 return ve[engine->instance];
4686
4687         GEM_BUG_ON("unknown aux_inv_reg\n");
4688
4689         return INVALID_MMIO_REG;
4690 }
4691
4692 static u32 *
4693 gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs)
4694 {
4695         *cs++ = MI_LOAD_REGISTER_IMM(1);
4696         *cs++ = i915_mmio_reg_offset(inv_reg);
4697         *cs++ = AUX_INV;
4698         *cs++ = MI_NOOP;
4699
4700         return cs;
4701 }
4702
4703 static int gen12_emit_flush_render(struct i915_request *request,
4704                                    u32 mode)
4705 {
4706         if (mode & EMIT_FLUSH) {
4707                 u32 flags = 0;
4708                 u32 *cs;
4709
4710                 flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
4711                 flags |= PIPE_CONTROL_FLUSH_L3;
4712                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
4713                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4714                 /* Wa_1409600907:tgl */
4715                 flags |= PIPE_CONTROL_DEPTH_STALL;
4716                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
4717                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
4718
4719                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4720                 flags |= PIPE_CONTROL_QW_WRITE;
4721
4722                 flags |= PIPE_CONTROL_CS_STALL;
4723
4724                 cs = intel_ring_begin(request, 6);
4725                 if (IS_ERR(cs))
4726                         return PTR_ERR(cs);
4727
4728                 cs = gen12_emit_pipe_control(cs,
4729                                              PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
4730                                              flags, LRC_PPHWSP_SCRATCH_ADDR);
4731                 intel_ring_advance(request, cs);
4732         }
4733
4734         if (mode & EMIT_INVALIDATE) {
4735                 u32 flags = 0;
4736                 u32 *cs;
4737
4738                 flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
4739                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
4740                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4741                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4742                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4743                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4744                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4745
4746                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4747                 flags |= PIPE_CONTROL_QW_WRITE;
4748
4749                 flags |= PIPE_CONTROL_CS_STALL;
4750
4751                 cs = intel_ring_begin(request, 8 + 4);
4752                 if (IS_ERR(cs))
4753                         return PTR_ERR(cs);
4754
4755                 /*
4756                  * Prevent the pre-parser from skipping past the TLB
4757                  * invalidate and loading a stale page for the batch
4758                  * buffer / request payload.
4759                  */
4760                 *cs++ = preparser_disable(true);
4761
4762                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4763
4764                 /* hsdes: 1809175790 */
4765                 cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV, cs);
4766
4767                 *cs++ = preparser_disable(false);
4768                 intel_ring_advance(request, cs);
4769         }
4770
4771         return 0;
4772 }
4773
4774 static int gen12_emit_flush(struct i915_request *request, u32 mode)
4775 {
4776         intel_engine_mask_t aux_inv = 0;
4777         u32 cmd, *cs;
4778
4779         cmd = 4;
4780         if (mode & EMIT_INVALIDATE)
4781                 cmd += 2;
4782         if (mode & EMIT_INVALIDATE)
4783                 aux_inv = request->engine->mask & ~BIT(BCS0);
4784         if (aux_inv)
4785                 cmd += 2 * hweight8(aux_inv) + 2;
4786
4787         cs = intel_ring_begin(request, cmd);
4788         if (IS_ERR(cs))
4789                 return PTR_ERR(cs);
4790
4791         if (mode & EMIT_INVALIDATE)
4792                 *cs++ = preparser_disable(true);
4793
4794         cmd = MI_FLUSH_DW + 1;
4795
4796         /* We always require a command barrier so that subsequent
4797          * commands, such as breadcrumb interrupts, are strictly ordered
4798          * wrt the contents of the write cache being flushed to memory
4799          * (and thus being coherent from the CPU).
4800          */
4801         cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
4802
4803         if (mode & EMIT_INVALIDATE) {
4804                 cmd |= MI_INVALIDATE_TLB;
4805                 if (request->engine->class == VIDEO_DECODE_CLASS)
4806                         cmd |= MI_INVALIDATE_BSD;
4807         }
4808
4809         *cs++ = cmd;
4810         *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
4811         *cs++ = 0; /* upper addr */
4812         *cs++ = 0; /* value */
4813
4814         if (aux_inv) { /* hsdes: 1809175790 */
4815                 struct intel_engine_cs *engine;
4816                 unsigned int tmp;
4817
4818                 *cs++ = MI_LOAD_REGISTER_IMM(hweight8(aux_inv));
4819                 for_each_engine_masked(engine, request->engine->gt,
4820                                        aux_inv, tmp) {
4821                         *cs++ = i915_mmio_reg_offset(aux_inv_reg(engine));
4822                         *cs++ = AUX_INV;
4823                 }
4824                 *cs++ = MI_NOOP;
4825         }
4826
4827         if (mode & EMIT_INVALIDATE)
4828                 *cs++ = preparser_disable(false);
4829
4830         intel_ring_advance(request, cs);
4831
4832         return 0;
4833 }
4834
4835 static void assert_request_valid(struct i915_request *rq)
4836 {
4837         struct intel_ring *ring __maybe_unused = rq->ring;
4838
4839         /* Can we unwind this request without appearing to go forwards? */
4840         GEM_BUG_ON(intel_ring_direction(ring, rq->wa_tail, rq->head) <= 0);
4841 }
4842
4843 /*
4844  * Reserve space for 2 NOOPs at the end of each request to be
4845  * used as a workaround for not being allowed to do lite
4846  * restore with HEAD==TAIL (WaIdleLiteRestore).
4847  */
4848 static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
4849 {
4850         /* Ensure there's always at least one preemption point per-request. */
4851         *cs++ = MI_ARB_CHECK;
4852         *cs++ = MI_NOOP;
4853         request->wa_tail = intel_ring_offset(request, cs);
4854
4855         /* Check that entire request is less than half the ring */
4856         assert_request_valid(request);
4857
4858         return cs;
4859 }
4860
4861 static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
4862 {
4863         *cs++ = MI_SEMAPHORE_WAIT |
4864                 MI_SEMAPHORE_GLOBAL_GTT |
4865                 MI_SEMAPHORE_POLL |
4866                 MI_SEMAPHORE_SAD_EQ_SDD;
4867         *cs++ = 0;
4868         *cs++ = intel_hws_preempt_address(request->engine);
4869         *cs++ = 0;
4870
4871         return cs;
4872 }
4873
4874 static __always_inline u32*
4875 gen8_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
4876 {
4877         *cs++ = MI_USER_INTERRUPT;
4878
4879         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4880         if (intel_engine_has_semaphores(request->engine))
4881                 cs = emit_preempt_busywait(request, cs);
4882
4883         request->tail = intel_ring_offset(request, cs);
4884         assert_ring_tail_valid(request->ring, request->tail);
4885
4886         return gen8_emit_wa_tail(request, cs);
4887 }
4888
4889 static u32 *emit_xcs_breadcrumb(struct i915_request *request, u32 *cs)
4890 {
4891         u32 addr = i915_request_active_timeline(request)->hwsp_offset;
4892
4893         return gen8_emit_ggtt_write(cs, request->fence.seqno, addr, 0);
4894 }
4895
4896 static u32 *gen8_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
4897 {
4898         return gen8_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
4899 }
4900
4901 static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4902 {
4903         cs = gen8_emit_pipe_control(cs,
4904                                     PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4905                                     PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4906                                     PIPE_CONTROL_DC_FLUSH_ENABLE,
4907                                     0);
4908
4909         /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
4910         cs = gen8_emit_ggtt_write_rcs(cs,
4911                                       request->fence.seqno,
4912                                       i915_request_active_timeline(request)->hwsp_offset,
4913                                       PIPE_CONTROL_FLUSH_ENABLE |
4914                                       PIPE_CONTROL_CS_STALL);
4915
4916         return gen8_emit_fini_breadcrumb_tail(request, cs);
4917 }
4918
4919 static u32 *
4920 gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4921 {
4922         cs = gen8_emit_ggtt_write_rcs(cs,
4923                                       request->fence.seqno,
4924                                       i915_request_active_timeline(request)->hwsp_offset,
4925                                       PIPE_CONTROL_CS_STALL |
4926                                       PIPE_CONTROL_TILE_CACHE_FLUSH |
4927                                       PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4928                                       PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4929                                       PIPE_CONTROL_DC_FLUSH_ENABLE |
4930                                       PIPE_CONTROL_FLUSH_ENABLE);
4931
4932         return gen8_emit_fini_breadcrumb_tail(request, cs);
4933 }
4934
4935 /*
4936  * Note that the CS instruction pre-parser will not stall on the breadcrumb
4937  * flush and will continue pre-fetching the instructions after it before the
4938  * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
4939  * BB_START/END instructions, so, even though we might pre-fetch the pre-amble
4940  * of the next request before the memory has been flushed, we're guaranteed that
4941  * we won't access the batch itself too early.
4942  * However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
4943  * so, if the current request is modifying an instruction in the next request on
4944  * the same intel_context, we might pre-fetch and then execute the pre-update
4945  * instruction. To avoid this, the users of self-modifying code should either
4946  * disable the parser around the code emitting the memory writes, via a new flag
4947  * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
4948  * the in-kernel use-cases we've opted to use a separate context, see
4949  * reloc_gpu() as an example.
4950  * All the above applies only to the instructions themselves. Non-inline data
4951  * used by the instructions is not pre-fetched.
4952  */
4953
4954 static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
4955 {
4956         *cs++ = MI_SEMAPHORE_WAIT_TOKEN |
4957                 MI_SEMAPHORE_GLOBAL_GTT |
4958                 MI_SEMAPHORE_POLL |
4959                 MI_SEMAPHORE_SAD_EQ_SDD;
4960         *cs++ = 0;
4961         *cs++ = intel_hws_preempt_address(request->engine);
4962         *cs++ = 0;
4963         *cs++ = 0;
4964         *cs++ = MI_NOOP;
4965
4966         return cs;
4967 }
4968
4969 static __always_inline u32*
4970 gen12_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
4971 {
4972         *cs++ = MI_USER_INTERRUPT;
4973
4974         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4975         if (intel_engine_has_semaphores(request->engine))
4976                 cs = gen12_emit_preempt_busywait(request, cs);
4977
4978         request->tail = intel_ring_offset(request, cs);
4979         assert_ring_tail_valid(request->ring, request->tail);
4980
4981         return gen8_emit_wa_tail(request, cs);
4982 }
4983
4984 static u32 *gen12_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
4985 {
4986         return gen12_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
4987 }
4988
4989 static u32 *
4990 gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4991 {
4992         cs = gen12_emit_ggtt_write_rcs(cs,
4993                                        request->fence.seqno,
4994                                        i915_request_active_timeline(request)->hwsp_offset,
4995                                        PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
4996                                        PIPE_CONTROL_CS_STALL |
4997                                        PIPE_CONTROL_TILE_CACHE_FLUSH |
4998                                        PIPE_CONTROL_FLUSH_L3 |
4999                                        PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
5000                                        PIPE_CONTROL_DEPTH_CACHE_FLUSH |
5001                                        /* Wa_1409600907:tgl */
5002                                        PIPE_CONTROL_DEPTH_STALL |
5003                                        PIPE_CONTROL_DC_FLUSH_ENABLE |
5004                                        PIPE_CONTROL_FLUSH_ENABLE);
5005
5006         return gen12_emit_fini_breadcrumb_tail(request, cs);
5007 }
5008
5009 static void execlists_park(struct intel_engine_cs *engine)
5010 {
5011         cancel_timer(&engine->execlists.timer);
5012         cancel_timer(&engine->execlists.preempt);
5013 }
5014
5015 void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
5016 {
5017         engine->submit_request = execlists_submit_request;
5018         engine->schedule = i915_schedule;
5019         engine->execlists.tasklet.func = execlists_submission_tasklet;
5020
5021         engine->reset.prepare = execlists_reset_prepare;
5022         engine->reset.rewind = execlists_reset_rewind;
5023         engine->reset.cancel = execlists_reset_cancel;
5024         engine->reset.finish = execlists_reset_finish;
5025
5026         engine->park = execlists_park;
5027         engine->unpark = NULL;
5028
5029         engine->flags |= I915_ENGINE_SUPPORTS_STATS;
5030         if (!intel_vgpu_active(engine->i915)) {
5031                 engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
5032                 if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) {
5033                         engine->flags |= I915_ENGINE_HAS_PREEMPTION;
5034                         if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
5035                                 engine->flags |= I915_ENGINE_HAS_TIMESLICES;
5036                 }
5037         }
5038
5039         if (INTEL_GEN(engine->i915) >= 12)
5040                 engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
5041
5042         if (intel_engine_has_preemption(engine))
5043                 engine->emit_bb_start = gen8_emit_bb_start;
5044         else
5045                 engine->emit_bb_start = gen8_emit_bb_start_noarb;
5046 }
5047
5048 static void execlists_shutdown(struct intel_engine_cs *engine)
5049 {
5050         /* Synchronise with residual timers and any softirq they raise */
5051         del_timer_sync(&engine->execlists.timer);
5052         del_timer_sync(&engine->execlists.preempt);
5053         tasklet_kill(&engine->execlists.tasklet);
5054 }
5055
5056 static void execlists_release(struct intel_engine_cs *engine)
5057 {
5058         engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
5059
5060         execlists_shutdown(engine);
5061
5062         intel_engine_cleanup_common(engine);
5063         lrc_destroy_wa_ctx(engine);
5064 }
5065
5066 static void
5067 logical_ring_default_vfuncs(struct intel_engine_cs *engine)
5068 {
5069         /* Default vfuncs which can be overriden by each engine. */
5070
5071         engine->resume = execlists_resume;
5072
5073         engine->cops = &execlists_context_ops;
5074         engine->request_alloc = execlists_request_alloc;
5075
5076         engine->emit_flush = gen8_emit_flush;
5077         engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
5078         engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
5079         if (INTEL_GEN(engine->i915) >= 12) {
5080                 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
5081                 engine->emit_flush = gen12_emit_flush;
5082         }
5083         engine->set_default_submission = intel_execlists_set_default_submission;
5084
5085         if (INTEL_GEN(engine->i915) < 11) {
5086                 engine->irq_enable = gen8_logical_ring_enable_irq;
5087                 engine->irq_disable = gen8_logical_ring_disable_irq;
5088         } else {
5089                 /*
5090                  * TODO: On Gen11 interrupt masks need to be clear
5091                  * to allow C6 entry. Keep interrupts enabled at
5092                  * and take the hit of generating extra interrupts
5093                  * until a more refined solution exists.
5094                  */
5095         }
5096 }
5097
5098 static inline void
5099 logical_ring_default_irqs(struct intel_engine_cs *engine)
5100 {
5101         unsigned int shift = 0;
5102
5103         if (INTEL_GEN(engine->i915) < 11) {
5104                 const u8 irq_shifts[] = {
5105                         [RCS0]  = GEN8_RCS_IRQ_SHIFT,
5106                         [BCS0]  = GEN8_BCS_IRQ_SHIFT,
5107                         [VCS0]  = GEN8_VCS0_IRQ_SHIFT,
5108                         [VCS1]  = GEN8_VCS1_IRQ_SHIFT,
5109                         [VECS0] = GEN8_VECS_IRQ_SHIFT,
5110                 };
5111
5112                 shift = irq_shifts[engine->id];
5113         }
5114
5115         engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
5116         engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
5117         engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift;
5118         engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift;
5119 }
5120
5121 static void rcs_submission_override(struct intel_engine_cs *engine)
5122 {
5123         switch (INTEL_GEN(engine->i915)) {
5124         case 12:
5125                 engine->emit_flush = gen12_emit_flush_render;
5126                 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
5127                 break;
5128         case 11:
5129                 engine->emit_flush = gen11_emit_flush_render;
5130                 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
5131                 break;
5132         default:
5133                 engine->emit_flush = gen8_emit_flush_render;
5134                 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
5135                 break;
5136         }
5137 }
5138
5139 int intel_execlists_submission_setup(struct intel_engine_cs *engine)
5140 {
5141         struct intel_engine_execlists * const execlists = &engine->execlists;
5142         struct drm_i915_private *i915 = engine->i915;
5143         struct intel_uncore *uncore = engine->uncore;
5144         u32 base = engine->mmio_base;
5145
5146         tasklet_init(&engine->execlists.tasklet,
5147                      execlists_submission_tasklet, (unsigned long)engine);
5148         timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
5149         timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
5150
5151         logical_ring_default_vfuncs(engine);
5152         logical_ring_default_irqs(engine);
5153
5154         if (engine->class == RENDER_CLASS)
5155                 rcs_submission_override(engine);
5156
5157         if (intel_init_workaround_bb(engine))
5158                 /*
5159                  * We continue even if we fail to initialize WA batch
5160                  * because we only expect rare glitches but nothing
5161                  * critical to prevent us from using GPU
5162                  */
5163                 drm_err(&i915->drm, "WA batch buffer initialization failed\n");
5164
5165         if (HAS_LOGICAL_RING_ELSQ(i915)) {
5166                 execlists->submit_reg = uncore->regs +
5167                         i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base));
5168                 execlists->ctrl_reg = uncore->regs +
5169                         i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base));
5170         } else {
5171                 execlists->submit_reg = uncore->regs +
5172                         i915_mmio_reg_offset(RING_ELSP(base));
5173         }
5174
5175         execlists->csb_status =
5176                 (u64 *)&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
5177
5178         execlists->csb_write =
5179                 &engine->status_page.addr[intel_hws_csb_write_index(i915)];
5180
5181         if (INTEL_GEN(i915) < 11)
5182                 execlists->csb_size = GEN8_CSB_ENTRIES;
5183         else
5184                 execlists->csb_size = GEN11_CSB_ENTRIES;
5185
5186         if (INTEL_GEN(engine->i915) >= 11) {
5187                 execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32);
5188                 execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32);
5189         }
5190
5191         /* Finally, take ownership and responsibility for cleanup! */
5192         engine->sanitize = execlists_sanitize;
5193         engine->release = execlists_release;
5194
5195         return 0;
5196 }
5197
5198 static void init_common_reg_state(u32 * const regs,
5199                                   const struct intel_engine_cs *engine,
5200                                   const struct intel_ring *ring,
5201                                   bool inhibit)
5202 {
5203         u32 ctl;
5204
5205         ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
5206         ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
5207         if (inhibit)
5208                 ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
5209         if (INTEL_GEN(engine->i915) < 11)
5210                 ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
5211                                            CTX_CTRL_RS_CTX_ENABLE);
5212         regs[CTX_CONTEXT_CONTROL] = ctl;
5213
5214         regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
5215         regs[CTX_TIMESTAMP] = 0;
5216 }
5217
5218 static void init_wa_bb_reg_state(u32 * const regs,
5219                                  const struct intel_engine_cs *engine)
5220 {
5221         const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
5222
5223         if (wa_ctx->per_ctx.size) {
5224                 const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
5225
5226                 GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
5227                 regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
5228                         (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
5229         }
5230
5231         if (wa_ctx->indirect_ctx.size) {
5232                 lrc_ring_setup_indirect_ctx(regs, engine,
5233                                             i915_ggtt_offset(wa_ctx->vma) +
5234                                             wa_ctx->indirect_ctx.offset,
5235                                             wa_ctx->indirect_ctx.size);
5236         }
5237 }
5238
5239 static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt)
5240 {
5241         if (i915_vm_is_4lvl(&ppgtt->vm)) {
5242                 /* 64b PPGTT (48bit canonical)
5243                  * PDP0_DESCRIPTOR contains the base address to PML4 and
5244                  * other PDP Descriptors are ignored.
5245                  */
5246                 ASSIGN_CTX_PML4(ppgtt, regs);
5247         } else {
5248                 ASSIGN_CTX_PDP(ppgtt, regs, 3);
5249                 ASSIGN_CTX_PDP(ppgtt, regs, 2);
5250                 ASSIGN_CTX_PDP(ppgtt, regs, 1);
5251                 ASSIGN_CTX_PDP(ppgtt, regs, 0);
5252         }
5253 }
5254
5255 static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
5256 {
5257         if (i915_is_ggtt(vm))
5258                 return i915_vm_to_ggtt(vm)->alias;
5259         else
5260                 return i915_vm_to_ppgtt(vm);
5261 }
5262
5263 static void execlists_init_reg_state(u32 *regs,
5264                                      const struct intel_context *ce,
5265                                      const struct intel_engine_cs *engine,
5266                                      const struct intel_ring *ring,
5267                                      bool inhibit)
5268 {
5269         /*
5270          * A context is actually a big batch buffer with several
5271          * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
5272          * values we are setting here are only for the first context restore:
5273          * on a subsequent save, the GPU will recreate this batchbuffer with new
5274          * values (including all the missing MI_LOAD_REGISTER_IMM commands that
5275          * we are not initializing here).
5276          *
5277          * Must keep consistent with virtual_update_register_offsets().
5278          */
5279         set_offsets(regs, reg_offsets(engine), engine, inhibit);
5280
5281         init_common_reg_state(regs, engine, ring, inhibit);
5282         init_ppgtt_reg_state(regs, vm_alias(ce->vm));
5283
5284         init_wa_bb_reg_state(regs, engine);
5285
5286         __reset_stop_ring(regs, engine);
5287 }
5288
5289 static int
5290 populate_lr_context(struct intel_context *ce,
5291                     struct drm_i915_gem_object *ctx_obj,
5292                     struct intel_engine_cs *engine,
5293                     struct intel_ring *ring)
5294 {
5295         bool inhibit = true;
5296         void *vaddr;
5297
5298         vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
5299         if (IS_ERR(vaddr)) {
5300                 drm_dbg(&engine->i915->drm, "Could not map object pages!\n");
5301                 return PTR_ERR(vaddr);
5302         }
5303
5304         set_redzone(vaddr, engine);
5305
5306         if (engine->default_state) {
5307                 shmem_read(engine->default_state, 0,
5308                            vaddr, engine->context_size);
5309                 __set_bit(CONTEXT_VALID_BIT, &ce->flags);
5310                 inhibit = false;
5311         }
5312
5313         /* Clear the ppHWSP (inc. per-context counters) */
5314         memset(vaddr, 0, PAGE_SIZE);
5315
5316         /*
5317          * The second page of the context object contains some registers which
5318          * must be set up prior to the first execution.
5319          */
5320         execlists_init_reg_state(vaddr + LRC_STATE_OFFSET,
5321                                  ce, engine, ring, inhibit);
5322
5323         __i915_gem_object_flush_map(ctx_obj, 0, engine->context_size);
5324         i915_gem_object_unpin_map(ctx_obj);
5325         return 0;
5326 }
5327
5328 static struct intel_timeline *pinned_timeline(struct intel_context *ce)
5329 {
5330         struct intel_timeline *tl = fetch_and_zero(&ce->timeline);
5331
5332         return intel_timeline_create_from_engine(ce->engine,
5333                                                  page_unmask_bits(tl));
5334 }
5335
5336 static int __execlists_context_alloc(struct intel_context *ce,
5337                                      struct intel_engine_cs *engine)
5338 {
5339         struct drm_i915_gem_object *ctx_obj;
5340         struct intel_ring *ring;
5341         struct i915_vma *vma;
5342         u32 context_size;
5343         int ret;
5344
5345         GEM_BUG_ON(ce->state);
5346         context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
5347
5348         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
5349                 context_size += I915_GTT_PAGE_SIZE; /* for redzone */
5350
5351         if (INTEL_GEN(engine->i915) == 12) {
5352                 ce->wa_bb_page = context_size / PAGE_SIZE;
5353                 context_size += PAGE_SIZE;
5354         }
5355
5356         ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
5357         if (IS_ERR(ctx_obj))
5358                 return PTR_ERR(ctx_obj);
5359
5360         vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL);
5361         if (IS_ERR(vma)) {
5362                 ret = PTR_ERR(vma);
5363                 goto error_deref_obj;
5364         }
5365
5366         if (!page_mask_bits(ce->timeline)) {
5367                 struct intel_timeline *tl;
5368
5369                 /*
5370                  * Use the static global HWSP for the kernel context, and
5371                  * a dynamically allocated cacheline for everyone else.
5372                  */
5373                 if (unlikely(ce->timeline))
5374                         tl = pinned_timeline(ce);
5375                 else
5376                         tl = intel_timeline_create(engine->gt);
5377                 if (IS_ERR(tl)) {
5378                         ret = PTR_ERR(tl);
5379                         goto error_deref_obj;
5380                 }
5381
5382                 ce->timeline = tl;
5383         }
5384
5385         ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
5386         if (IS_ERR(ring)) {
5387                 ret = PTR_ERR(ring);
5388                 goto error_deref_obj;
5389         }
5390
5391         ret = populate_lr_context(ce, ctx_obj, engine, ring);
5392         if (ret) {
5393                 drm_dbg(&engine->i915->drm,
5394                         "Failed to populate LRC: %d\n", ret);
5395                 goto error_ring_free;
5396         }
5397
5398         ce->ring = ring;
5399         ce->state = vma;
5400
5401         return 0;
5402
5403 error_ring_free:
5404         intel_ring_put(ring);
5405 error_deref_obj:
5406         i915_gem_object_put(ctx_obj);
5407         return ret;
5408 }
5409
5410 static struct list_head *virtual_queue(struct virtual_engine *ve)
5411 {
5412         return &ve->base.execlists.default_priolist.requests[0];
5413 }
5414
5415 static void virtual_context_destroy(struct kref *kref)
5416 {
5417         struct virtual_engine *ve =
5418                 container_of(kref, typeof(*ve), context.ref);
5419         unsigned int n;
5420
5421         GEM_BUG_ON(!list_empty(virtual_queue(ve)));
5422         GEM_BUG_ON(ve->request);
5423         GEM_BUG_ON(ve->context.inflight);
5424
5425         for (n = 0; n < ve->num_siblings; n++) {
5426                 struct intel_engine_cs *sibling = ve->siblings[n];
5427                 struct rb_node *node = &ve->nodes[sibling->id].rb;
5428                 unsigned long flags;
5429
5430                 if (RB_EMPTY_NODE(node))
5431                         continue;
5432
5433                 spin_lock_irqsave(&sibling->active.lock, flags);
5434
5435                 /* Detachment is lazily performed in the execlists tasklet */
5436                 if (!RB_EMPTY_NODE(node))
5437                         rb_erase_cached(node, &sibling->execlists.virtual);
5438
5439                 spin_unlock_irqrestore(&sibling->active.lock, flags);
5440         }
5441         GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
5442
5443         if (ve->context.state)
5444                 __execlists_context_fini(&ve->context);
5445         intel_context_fini(&ve->context);
5446
5447         intel_engine_free_request_pool(&ve->base);
5448
5449         kfree(ve->bonds);
5450         kfree(ve);
5451 }
5452
5453 static void virtual_engine_initial_hint(struct virtual_engine *ve)
5454 {
5455         int swp;
5456
5457         /*
5458          * Pick a random sibling on starting to help spread the load around.
5459          *
5460          * New contexts are typically created with exactly the same order
5461          * of siblings, and often started in batches. Due to the way we iterate
5462          * the array of sibling when submitting requests, sibling[0] is
5463          * prioritised for dequeuing. If we make sure that sibling[0] is fairly
5464          * randomised across the system, we also help spread the load by the
5465          * first engine we inspect being different each time.
5466          *
5467          * NB This does not force us to execute on this engine, it will just
5468          * typically be the first we inspect for submission.
5469          */
5470         swp = prandom_u32_max(ve->num_siblings);
5471         if (swp)
5472                 swap(ve->siblings[swp], ve->siblings[0]);
5473 }
5474
5475 static int virtual_context_alloc(struct intel_context *ce)
5476 {
5477         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5478
5479         return __execlists_context_alloc(ce, ve->siblings[0]);
5480 }
5481
5482 static int virtual_context_pin(struct intel_context *ce, void *vaddr)
5483 {
5484         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5485
5486         /* Note: we must use a real engine class for setting up reg state */
5487         return __execlists_context_pin(ce, ve->siblings[0], vaddr);
5488 }
5489
5490 static void virtual_context_enter(struct intel_context *ce)
5491 {
5492         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5493         unsigned int n;
5494
5495         for (n = 0; n < ve->num_siblings; n++)
5496                 intel_engine_pm_get(ve->siblings[n]);
5497
5498         intel_timeline_enter(ce->timeline);
5499 }
5500
5501 static void virtual_context_exit(struct intel_context *ce)
5502 {
5503         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5504         unsigned int n;
5505
5506         intel_timeline_exit(ce->timeline);
5507
5508         for (n = 0; n < ve->num_siblings; n++)
5509                 intel_engine_pm_put(ve->siblings[n]);
5510 }
5511
5512 static const struct intel_context_ops virtual_context_ops = {
5513         .alloc = virtual_context_alloc,
5514
5515         .pre_pin = execlists_context_pre_pin,
5516         .pin = virtual_context_pin,
5517         .unpin = execlists_context_unpin,
5518         .post_unpin = execlists_context_post_unpin,
5519
5520         .enter = virtual_context_enter,
5521         .exit = virtual_context_exit,
5522
5523         .destroy = virtual_context_destroy,
5524 };
5525
5526 static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
5527 {
5528         struct i915_request *rq;
5529         intel_engine_mask_t mask;
5530
5531         rq = READ_ONCE(ve->request);
5532         if (!rq)
5533                 return 0;
5534
5535         /* The rq is ready for submission; rq->execution_mask is now stable. */
5536         mask = rq->execution_mask;
5537         if (unlikely(!mask)) {
5538                 /* Invalid selection, submit to a random engine in error */
5539                 i915_request_set_error_once(rq, -ENODEV);
5540                 mask = ve->siblings[0]->mask;
5541         }
5542
5543         ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n",
5544                      rq->fence.context, rq->fence.seqno,
5545                      mask, ve->base.execlists.queue_priority_hint);
5546
5547         return mask;
5548 }
5549
5550 static void virtual_submission_tasklet(unsigned long data)
5551 {
5552         struct virtual_engine * const ve = (struct virtual_engine *)data;
5553         const int prio = READ_ONCE(ve->base.execlists.queue_priority_hint);
5554         intel_engine_mask_t mask;
5555         unsigned int n;
5556
5557         rcu_read_lock();
5558         mask = virtual_submission_mask(ve);
5559         rcu_read_unlock();
5560         if (unlikely(!mask))
5561                 return;
5562
5563         local_irq_disable();
5564         for (n = 0; n < ve->num_siblings; n++) {
5565                 struct intel_engine_cs *sibling = READ_ONCE(ve->siblings[n]);
5566                 struct ve_node * const node = &ve->nodes[sibling->id];
5567                 struct rb_node **parent, *rb;
5568                 bool first;
5569
5570                 if (!READ_ONCE(ve->request))
5571                         break; /* already handled by a sibling's tasklet */
5572
5573                 if (unlikely(!(mask & sibling->mask))) {
5574                         if (!RB_EMPTY_NODE(&node->rb)) {
5575                                 spin_lock(&sibling->active.lock);
5576                                 rb_erase_cached(&node->rb,
5577                                                 &sibling->execlists.virtual);
5578                                 RB_CLEAR_NODE(&node->rb);
5579                                 spin_unlock(&sibling->active.lock);
5580                         }
5581                         continue;
5582                 }
5583
5584                 spin_lock(&sibling->active.lock);
5585
5586                 if (!RB_EMPTY_NODE(&node->rb)) {
5587                         /*
5588                          * Cheat and avoid rebalancing the tree if we can
5589                          * reuse this node in situ.
5590                          */
5591                         first = rb_first_cached(&sibling->execlists.virtual) ==
5592                                 &node->rb;
5593                         if (prio == node->prio || (prio > node->prio && first))
5594                                 goto submit_engine;
5595
5596                         rb_erase_cached(&node->rb, &sibling->execlists.virtual);
5597                 }
5598
5599                 rb = NULL;
5600                 first = true;
5601                 parent = &sibling->execlists.virtual.rb_root.rb_node;
5602                 while (*parent) {
5603                         struct ve_node *other;
5604
5605                         rb = *parent;
5606                         other = rb_entry(rb, typeof(*other), rb);
5607                         if (prio > other->prio) {
5608                                 parent = &rb->rb_left;
5609                         } else {
5610                                 parent = &rb->rb_right;
5611                                 first = false;
5612                         }
5613                 }
5614
5615                 rb_link_node(&node->rb, rb, parent);
5616                 rb_insert_color_cached(&node->rb,
5617                                        &sibling->execlists.virtual,
5618                                        first);
5619
5620 submit_engine:
5621                 GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
5622                 node->prio = prio;
5623                 if (first && prio > sibling->execlists.queue_priority_hint)
5624                         tasklet_hi_schedule(&sibling->execlists.tasklet);
5625
5626                 spin_unlock(&sibling->active.lock);
5627         }
5628         local_irq_enable();
5629 }
5630
5631 static void virtual_submit_request(struct i915_request *rq)
5632 {
5633         struct virtual_engine *ve = to_virtual_engine(rq->engine);
5634         struct i915_request *old;
5635         unsigned long flags;
5636
5637         ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n",
5638                      rq->fence.context,
5639                      rq->fence.seqno);
5640
5641         GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
5642
5643         spin_lock_irqsave(&ve->base.active.lock, flags);
5644
5645         old = ve->request;
5646         if (old) { /* background completion event from preempt-to-busy */
5647                 GEM_BUG_ON(!i915_request_completed(old));
5648                 __i915_request_submit(old);
5649                 i915_request_put(old);
5650         }
5651
5652         if (i915_request_completed(rq)) {
5653                 __i915_request_submit(rq);
5654
5655                 ve->base.execlists.queue_priority_hint = INT_MIN;
5656                 ve->request = NULL;
5657         } else {
5658                 ve->base.execlists.queue_priority_hint = rq_prio(rq);
5659                 ve->request = i915_request_get(rq);
5660
5661                 GEM_BUG_ON(!list_empty(virtual_queue(ve)));
5662                 list_move_tail(&rq->sched.link, virtual_queue(ve));
5663
5664                 tasklet_hi_schedule(&ve->base.execlists.tasklet);
5665         }
5666
5667         spin_unlock_irqrestore(&ve->base.active.lock, flags);
5668 }
5669
5670 static struct ve_bond *
5671 virtual_find_bond(struct virtual_engine *ve,
5672                   const struct intel_engine_cs *master)
5673 {
5674         int i;
5675
5676         for (i = 0; i < ve->num_bonds; i++) {
5677                 if (ve->bonds[i].master == master)
5678                         return &ve->bonds[i];
5679         }
5680
5681         return NULL;
5682 }
5683
5684 static void
5685 virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
5686 {
5687         struct virtual_engine *ve = to_virtual_engine(rq->engine);
5688         intel_engine_mask_t allowed, exec;
5689         struct ve_bond *bond;
5690
5691         allowed = ~to_request(signal)->engine->mask;
5692
5693         bond = virtual_find_bond(ve, to_request(signal)->engine);
5694         if (bond)
5695                 allowed &= bond->sibling_mask;
5696
5697         /* Restrict the bonded request to run on only the available engines */
5698         exec = READ_ONCE(rq->execution_mask);
5699         while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed))
5700                 ;
5701
5702         /* Prevent the master from being re-run on the bonded engines */
5703         to_request(signal)->execution_mask &= ~allowed;
5704 }
5705
5706 struct intel_context *
5707 intel_execlists_create_virtual(struct intel_engine_cs **siblings,
5708                                unsigned int count)
5709 {
5710         struct virtual_engine *ve;
5711         unsigned int n;
5712         int err;
5713
5714         if (count == 0)
5715                 return ERR_PTR(-EINVAL);
5716
5717         if (count == 1)
5718                 return intel_context_create(siblings[0]);
5719
5720         ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
5721         if (!ve)
5722                 return ERR_PTR(-ENOMEM);
5723
5724         ve->base.i915 = siblings[0]->i915;
5725         ve->base.gt = siblings[0]->gt;
5726         ve->base.uncore = siblings[0]->uncore;
5727         ve->base.id = -1;
5728
5729         ve->base.class = OTHER_CLASS;
5730         ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
5731         ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5732         ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5733
5734         /*
5735          * The decision on whether to submit a request using semaphores
5736          * depends on the saturated state of the engine. We only compute
5737          * this during HW submission of the request, and we need for this
5738          * state to be globally applied to all requests being submitted
5739          * to this engine. Virtual engines encompass more than one physical
5740          * engine and so we cannot accurately tell in advance if one of those
5741          * engines is already saturated and so cannot afford to use a semaphore
5742          * and be pessimized in priority for doing so -- if we are the only
5743          * context using semaphores after all other clients have stopped, we
5744          * will be starved on the saturated system. Such a global switch for
5745          * semaphores is less than ideal, but alas is the current compromise.
5746          */
5747         ve->base.saturated = ALL_ENGINES;
5748
5749         snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
5750
5751         intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
5752         intel_engine_init_execlists(&ve->base);
5753
5754         ve->base.cops = &virtual_context_ops;
5755         ve->base.request_alloc = execlists_request_alloc;
5756
5757         ve->base.schedule = i915_schedule;
5758         ve->base.submit_request = virtual_submit_request;
5759         ve->base.bond_execute = virtual_bond_execute;
5760
5761         INIT_LIST_HEAD(virtual_queue(ve));
5762         ve->base.execlists.queue_priority_hint = INT_MIN;
5763         tasklet_init(&ve->base.execlists.tasklet,
5764                      virtual_submission_tasklet,
5765                      (unsigned long)ve);
5766
5767         intel_context_init(&ve->context, &ve->base);
5768
5769         ve->base.breadcrumbs = intel_breadcrumbs_create(NULL);
5770         if (!ve->base.breadcrumbs) {
5771                 err = -ENOMEM;
5772                 goto err_put;
5773         }
5774
5775         for (n = 0; n < count; n++) {
5776                 struct intel_engine_cs *sibling = siblings[n];
5777
5778                 GEM_BUG_ON(!is_power_of_2(sibling->mask));
5779                 if (sibling->mask & ve->base.mask) {
5780                         DRM_DEBUG("duplicate %s entry in load balancer\n",
5781                                   sibling->name);
5782                         err = -EINVAL;
5783                         goto err_put;
5784                 }
5785
5786                 /*
5787                  * The virtual engine implementation is tightly coupled to
5788                  * the execlists backend -- we push out request directly
5789                  * into a tree inside each physical engine. We could support
5790                  * layering if we handle cloning of the requests and
5791                  * submitting a copy into each backend.
5792                  */
5793                 if (sibling->execlists.tasklet.func !=
5794                     execlists_submission_tasklet) {
5795                         err = -ENODEV;
5796                         goto err_put;
5797                 }
5798
5799                 GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb));
5800                 RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
5801
5802                 ve->siblings[ve->num_siblings++] = sibling;
5803                 ve->base.mask |= sibling->mask;
5804
5805                 /*
5806                  * All physical engines must be compatible for their emission
5807                  * functions (as we build the instructions during request
5808                  * construction and do not alter them before submission
5809                  * on the physical engine). We use the engine class as a guide
5810                  * here, although that could be refined.
5811                  */
5812                 if (ve->base.class != OTHER_CLASS) {
5813                         if (ve->base.class != sibling->class) {
5814                                 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
5815                                           sibling->class, ve->base.class);
5816                                 err = -EINVAL;
5817                                 goto err_put;
5818                         }
5819                         continue;
5820                 }
5821
5822                 ve->base.class = sibling->class;
5823                 ve->base.uabi_class = sibling->uabi_class;
5824                 snprintf(ve->base.name, sizeof(ve->base.name),
5825                          "v%dx%d", ve->base.class, count);
5826                 ve->base.context_size = sibling->context_size;
5827
5828                 ve->base.emit_bb_start = sibling->emit_bb_start;
5829                 ve->base.emit_flush = sibling->emit_flush;
5830                 ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
5831                 ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
5832                 ve->base.emit_fini_breadcrumb_dw =
5833                         sibling->emit_fini_breadcrumb_dw;
5834
5835                 ve->base.flags = sibling->flags;
5836         }
5837
5838         ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
5839
5840         virtual_engine_initial_hint(ve);
5841         return &ve->context;
5842
5843 err_put:
5844         intel_context_put(&ve->context);
5845         return ERR_PTR(err);
5846 }
5847
5848 struct intel_context *
5849 intel_execlists_clone_virtual(struct intel_engine_cs *src)
5850 {
5851         struct virtual_engine *se = to_virtual_engine(src);
5852         struct intel_context *dst;
5853
5854         dst = intel_execlists_create_virtual(se->siblings,
5855                                              se->num_siblings);
5856         if (IS_ERR(dst))
5857                 return dst;
5858
5859         if (se->num_bonds) {
5860                 struct virtual_engine *de = to_virtual_engine(dst->engine);
5861
5862                 de->bonds = kmemdup(se->bonds,
5863                                     sizeof(*se->bonds) * se->num_bonds,
5864                                     GFP_KERNEL);
5865                 if (!de->bonds) {
5866                         intel_context_put(dst);
5867                         return ERR_PTR(-ENOMEM);
5868                 }
5869
5870                 de->num_bonds = se->num_bonds;
5871         }
5872
5873         return dst;
5874 }
5875
5876 int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
5877                                      const struct intel_engine_cs *master,
5878                                      const struct intel_engine_cs *sibling)
5879 {
5880         struct virtual_engine *ve = to_virtual_engine(engine);
5881         struct ve_bond *bond;
5882         int n;
5883
5884         /* Sanity check the sibling is part of the virtual engine */
5885         for (n = 0; n < ve->num_siblings; n++)
5886                 if (sibling == ve->siblings[n])
5887                         break;
5888         if (n == ve->num_siblings)
5889                 return -EINVAL;
5890
5891         bond = virtual_find_bond(ve, master);
5892         if (bond) {
5893                 bond->sibling_mask |= sibling->mask;
5894                 return 0;
5895         }
5896
5897         bond = krealloc(ve->bonds,
5898                         sizeof(*bond) * (ve->num_bonds + 1),
5899                         GFP_KERNEL);
5900         if (!bond)
5901                 return -ENOMEM;
5902
5903         bond[ve->num_bonds].master = master;
5904         bond[ve->num_bonds].sibling_mask = sibling->mask;
5905
5906         ve->bonds = bond;
5907         ve->num_bonds++;
5908
5909         return 0;
5910 }
5911
5912 struct intel_engine_cs *
5913 intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
5914                                  unsigned int sibling)
5915 {
5916         struct virtual_engine *ve = to_virtual_engine(engine);
5917
5918         if (sibling >= ve->num_siblings)
5919                 return NULL;
5920
5921         return ve->siblings[sibling];
5922 }
5923
5924 void intel_execlists_show_requests(struct intel_engine_cs *engine,
5925                                    struct drm_printer *m,
5926                                    void (*show_request)(struct drm_printer *m,
5927                                                         struct i915_request *rq,
5928                                                         const char *prefix),
5929                                    unsigned int max)
5930 {
5931         const struct intel_engine_execlists *execlists = &engine->execlists;
5932         struct i915_request *rq, *last;
5933         unsigned long flags;
5934         unsigned int count;
5935         struct rb_node *rb;
5936
5937         spin_lock_irqsave(&engine->active.lock, flags);
5938
5939         last = NULL;
5940         count = 0;
5941         list_for_each_entry(rq, &engine->active.requests, sched.link) {
5942                 if (count++ < max - 1)
5943                         show_request(m, rq, "\t\tE ");
5944                 else
5945                         last = rq;
5946         }
5947         if (last) {
5948                 if (count > max) {
5949                         drm_printf(m,
5950                                    "\t\t...skipping %d executing requests...\n",
5951                                    count - max);
5952                 }
5953                 show_request(m, last, "\t\tE ");
5954         }
5955
5956         if (execlists->switch_priority_hint != INT_MIN)
5957                 drm_printf(m, "\t\tSwitch priority hint: %d\n",
5958                            READ_ONCE(execlists->switch_priority_hint));
5959         if (execlists->queue_priority_hint != INT_MIN)
5960                 drm_printf(m, "\t\tQueue priority hint: %d\n",
5961                            READ_ONCE(execlists->queue_priority_hint));
5962
5963         last = NULL;
5964         count = 0;
5965         for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
5966                 struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
5967                 int i;
5968
5969                 priolist_for_each_request(rq, p, i) {
5970                         if (count++ < max - 1)
5971                                 show_request(m, rq, "\t\tQ ");
5972                         else
5973                                 last = rq;
5974                 }
5975         }
5976         if (last) {
5977                 if (count > max) {
5978                         drm_printf(m,
5979                                    "\t\t...skipping %d queued requests...\n",
5980                                    count - max);
5981                 }
5982                 show_request(m, last, "\t\tQ ");
5983         }
5984
5985         last = NULL;
5986         count = 0;
5987         for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) {
5988                 struct virtual_engine *ve =
5989                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
5990                 struct i915_request *rq = READ_ONCE(ve->request);
5991
5992                 if (rq) {
5993                         if (count++ < max - 1)
5994                                 show_request(m, rq, "\t\tV ");
5995                         else
5996                                 last = rq;
5997                 }
5998         }
5999         if (last) {
6000                 if (count > max) {
6001                         drm_printf(m,
6002                                    "\t\t...skipping %d virtual requests...\n",
6003                                    count - max);
6004                 }
6005                 show_request(m, last, "\t\tV ");
6006         }
6007
6008         spin_unlock_irqrestore(&engine->active.lock, flags);
6009 }
6010
6011 void intel_lr_context_reset(struct intel_engine_cs *engine,
6012                             struct intel_context *ce,
6013                             u32 head,
6014                             bool scrub)
6015 {
6016         GEM_BUG_ON(!intel_context_is_pinned(ce));
6017
6018         /*
6019          * We want a simple context + ring to execute the breadcrumb update.
6020          * We cannot rely on the context being intact across the GPU hang,
6021          * so clear it and rebuild just what we need for the breadcrumb.
6022          * All pending requests for this context will be zapped, and any
6023          * future request will be after userspace has had the opportunity
6024          * to recreate its own state.
6025          */
6026         if (scrub)
6027                 restore_default_state(ce, engine);
6028
6029         /* Rerun the request; its payload has been neutered (if guilty). */
6030         __execlists_update_reg_state(ce, engine, head);
6031 }
6032
6033 bool
6034 intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine)
6035 {
6036         return engine->set_default_submission ==
6037                intel_execlists_set_default_submission;
6038 }
6039
6040 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
6041 #include "selftest_lrc.c"
6042 #endif