Merge tag 'drm-fixes-5.4-2019-10-16' of git://people.freedesktop.org/~agd5f/linux...
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / gt / intel_lrc.c
index 82b7ace..06a506c 100644 (file)
 #include "gem/i915_gem_context.h"
 
 #include "i915_drv.h"
-#include "i915_gem_render_state.h"
+#include "i915_perf.h"
+#include "i915_trace.h"
 #include "i915_vgpu.h"
 #include "intel_engine_pm.h"
+#include "intel_gt.h"
+#include "intel_gt_pm.h"
 #include "intel_lrc_reg.h"
 #include "intel_mocs.h"
 #include "intel_reset.h"
 #define GEN8_CTX_STATUS_COMPLETED_MASK \
         (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
 
+#define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
+
+#define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE (0x1) /* lower csb dword */
+#define GEN12_CTX_SWITCH_DETAIL(csb_dw)        ((csb_dw) & 0xF) /* upper csb dword */
+#define GEN12_CSB_SW_CTX_ID_MASK               GENMASK(25, 15)
+#define GEN12_IDLE_CTX_ID              0x7FF
+#define GEN12_CSB_CTX_VALID(csb_dw) \
+       (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID)
+
 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
 #define WA_TAIL_DWORDS 2
@@ -214,13 +226,41 @@ static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
        return container_of(engine, struct virtual_engine, base);
 }
 
-static int execlists_context_deferred_alloc(struct intel_context *ce,
-                                           struct intel_engine_cs *engine);
+static int __execlists_context_alloc(struct intel_context *ce,
+                                    struct intel_engine_cs *engine);
+
 static void execlists_init_reg_state(u32 *reg_state,
                                     struct intel_context *ce,
                                     struct intel_engine_cs *engine,
                                     struct intel_ring *ring);
 
+static void mark_eio(struct i915_request *rq)
+{
+       if (!i915_request_signaled(rq))
+               dma_fence_set_error(&rq->fence, -EIO);
+       i915_request_mark_complete(rq);
+}
+
+static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
+{
+       return (i915_ggtt_offset(engine->status_page.vma) +
+               I915_GEM_HWS_PREEMPT_ADDR);
+}
+
+static inline void
+ring_set_paused(const struct intel_engine_cs *engine, int state)
+{
+       /*
+        * We inspect HWS_PREEMPT with a semaphore inside
+        * engine->emit_fini_breadcrumb. If the dword is true,
+        * the ring is paused as the semaphore will busywait
+        * until the dword is false.
+        */
+       engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state;
+       if (state)
+               wmb();
+}
+
 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 {
        return rb_entry(rb, struct i915_priolist, node);
@@ -235,6 +275,17 @@ static int effective_prio(const struct i915_request *rq)
 {
        int prio = rq_prio(rq);
 
+       /*
+        * If this request is special and must not be interrupted at any
+        * cost, so be it. Note we are only checking the most recent request
+        * in the context and so may be masking an earlier vip request. It
+        * is hoped that under the conditions where nopreempt is used, this
+        * will not matter (i.e. all requests to that context will be
+        * nopreempt for as long as desired).
+        */
+       if (i915_request_has_nopreempt(rq))
+               prio = I915_PRIORITY_UNPREEMPTABLE;
+
        /*
         * On unwinding the active request, we give it a priority bump
         * if it has completed waiting on any semaphore. If we know that
@@ -245,6 +296,7 @@ static int effective_prio(const struct i915_request *rq)
                prio |= I915_PRIORITY_NOSEMAPHORE;
 
        /* Restrict mere WAIT boosts from triggering preemption */
+       BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */
        return prio | __NO_PREEMPTION;
 }
 
@@ -271,10 +323,7 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
 {
        int last_prio;
 
-       if (!engine->preempt_context)
-               return false;
-
-       if (i915_request_completed(rq))
+       if (!intel_engine_has_semaphores(engine))
                return false;
 
        /*
@@ -338,9 +387,6 @@ __maybe_unused static inline bool
 assert_priority_queue(const struct i915_request *prev,
                      const struct i915_request *next)
 {
-       const struct intel_engine_execlists *execlists =
-               &prev->engine->execlists;
-
        /*
         * Without preemption, the prev may refer to the still active element
         * which we refuse to let go.
@@ -348,7 +394,7 @@ assert_priority_queue(const struct i915_request *prev,
         * Even with preemption, there are times when we think it is better not
         * to preempt and leave an ostensibly lower priority request in flight.
         */
-       if (port_request(execlists->port) == prev)
+       if (i915_request_is_active(prev))
                return true;
 
        return rq_prio(prev) >= rq_prio(next);
@@ -389,13 +435,17 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
        BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
        BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH)));
 
-       desc = ctx->desc_template;                              /* bits  0-11 */
-       GEM_BUG_ON(desc & GENMASK_ULL(63, 12));
+       desc = INTEL_LEGACY_32B_CONTEXT;
+       if (i915_vm_is_4lvl(ce->vm))
+               desc = INTEL_LEGACY_64B_CONTEXT;
+       desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
+
+       desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
+       if (IS_GEN(engine->i915, 8))
+               desc |= GEN8_CTX_L3LLC_COHERENT;
 
        desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE;
                                                                /* bits 12-31 */
-       GEM_BUG_ON(desc & GENMASK_ULL(63, 32));
-
        /*
         * The following 32bits are copied into the OA reports (dword 2).
         * Consider updating oa_get_render_ctx_id in i915_perf.c when changing
@@ -442,13 +492,11 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
                struct intel_engine_cs *owner;
 
                if (i915_request_completed(rq))
-                       break;
+                       continue; /* XXX */
 
                __i915_request_unsubmit(rq);
                unwind_wa_tail(rq);
 
-               GEM_BUG_ON(rq->hw_context->inflight);
-
                /*
                 * Push the request back into the queue for later resubmission.
                 * If this request is not native to this physical engine (i.e.
@@ -468,6 +516,19 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
                        list_move(&rq->sched.link, pl);
                        active = rq;
                } else {
+                       /*
+                        * Decouple the virtual breadcrumb before moving it
+                        * back to the virtual engine -- we don't want the
+                        * request to complete in the background and try
+                        * and cancel the breadcrumb on the virtual engine
+                        * (instead of the old engine where it is linked)!
+                        */
+                       if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+                                    &rq->fence.flags)) {
+                               spin_lock(&rq->lock);
+                               i915_request_cancel_breadcrumb(rq);
+                               spin_unlock(&rq->lock);
+                       }
                        rq->engine = owner;
                        owner->submit_request(rq);
                        active = NULL;
@@ -500,32 +561,45 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status)
                                   status, rq);
 }
 
-inline void
-execlists_user_begin(struct intel_engine_execlists *execlists,
-                    const struct execlist_port *port)
+static inline struct intel_engine_cs *
+__execlists_schedule_in(struct i915_request *rq)
 {
-       execlists_set_active_once(execlists, EXECLISTS_ACTIVE_USER);
-}
+       struct intel_engine_cs * const engine = rq->engine;
+       struct intel_context * const ce = rq->hw_context;
 
-inline void
-execlists_user_end(struct intel_engine_execlists *execlists)
-{
-       execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
+       intel_context_get(ce);
+
+       intel_gt_pm_get(engine->gt);
+       execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
+       intel_engine_context_in(engine);
+
+       return engine;
 }
 
-static inline void
-execlists_context_schedule_in(struct i915_request *rq)
+static inline struct i915_request *
+execlists_schedule_in(struct i915_request *rq, int idx)
 {
-       GEM_BUG_ON(rq->hw_context->inflight);
+       struct intel_context * const ce = rq->hw_context;
+       struct intel_engine_cs *old;
 
-       execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
-       intel_engine_context_in(rq->engine);
-       rq->hw_context->inflight = rq->engine;
+       GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
+       trace_i915_request_in(rq, idx);
+
+       old = READ_ONCE(ce->inflight);
+       do {
+               if (!old) {
+                       WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
+                       break;
+               }
+       } while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
+
+       GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
+       return i915_request_get(rq);
 }
 
-static void kick_siblings(struct i915_request *rq)
+static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
 {
-       struct virtual_engine *ve = to_virtual_engine(rq->hw_context->engine);
+       struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
        struct i915_request *next = READ_ONCE(ve->request);
 
        if (next && next->execution_mask & ~rq->execution_mask)
@@ -533,29 +607,52 @@ static void kick_siblings(struct i915_request *rq)
 }
 
 static inline void
-execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
+__execlists_schedule_out(struct i915_request *rq,
+                        struct intel_engine_cs * const engine)
 {
-       rq->hw_context->inflight = NULL;
-       intel_engine_context_out(rq->engine);
-       execlists_context_status_change(rq, status);
-       trace_i915_request_out(rq);
+       struct intel_context * const ce = rq->hw_context;
+
+       intel_engine_context_out(engine);
+       execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
+       intel_gt_pm_put(engine->gt);
 
        /*
-        * If this is part of a virtual engine, its next request may have
-        * been blocked waiting for access to the active context. We have
-        * to kick all the siblings again in case we need to switch (e.g.
-        * the next request is not runnable on this engine). Hopefully,
-        * we will already have submitted the next request before the
-        * tasklet runs and do not need to rebuild each virtual tree
-        * and kick everyone again.
+        * If this is part of a virtual engine, its next request may
+        * have been blocked waiting for access to the active context.
+        * We have to kick all the siblings again in case we need to
+        * switch (e.g. the next request is not runnable on this
+        * engine). Hopefully, we will already have submitted the next
+        * request before the tasklet runs and do not need to rebuild
+        * each virtual tree and kick everyone again.
         */
-       if (rq->engine != rq->hw_context->engine)
-               kick_siblings(rq);
+       if (ce->engine != engine)
+               kick_siblings(rq, ce);
+
+       intel_context_put(ce);
+}
+
+static inline void
+execlists_schedule_out(struct i915_request *rq)
+{
+       struct intel_context * const ce = rq->hw_context;
+       struct intel_engine_cs *cur, *old;
+
+       trace_i915_request_out(rq);
+
+       old = READ_ONCE(ce->inflight);
+       do
+               cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
+       while (!try_cmpxchg(&ce->inflight, &old, cur));
+       if (!cur)
+               __execlists_schedule_out(rq, old);
+
+       i915_request_put(rq);
 }
 
-static u64 execlists_update_context(struct i915_request *rq)
+static u64 execlists_update_context(const struct i915_request *rq)
 {
        struct intel_context *ce = rq->hw_context;
+       u64 desc;
 
        ce->lrc_reg_state[CTX_RING_TAIL + 1] =
                intel_ring_set_tail(rq->ring, rq->tail);
@@ -576,7 +673,11 @@ static u64 execlists_update_context(struct i915_request *rq)
         * wmb).
         */
        mb();
-       return ce->lrc_desc;
+
+       desc = ce->lrc_desc;
+       ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
+
+       return desc;
 }
 
 static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
@@ -590,12 +691,65 @@ static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc
        }
 }
 
+static __maybe_unused void
+trace_ports(const struct intel_engine_execlists *execlists,
+           const char *msg,
+           struct i915_request * const *ports)
+{
+       const struct intel_engine_cs *engine =
+               container_of(execlists, typeof(*engine), execlists);
+
+       GEM_TRACE("%s: %s { %llx:%lld%s, %llx:%lld }\n",
+                 engine->name, msg,
+                 ports[0]->fence.context,
+                 ports[0]->fence.seqno,
+                 i915_request_completed(ports[0]) ? "!" :
+                 i915_request_started(ports[0]) ? "*" :
+                 "",
+                 ports[1] ? ports[1]->fence.context : 0,
+                 ports[1] ? ports[1]->fence.seqno : 0);
+}
+
+static __maybe_unused bool
+assert_pending_valid(const struct intel_engine_execlists *execlists,
+                    const char *msg)
+{
+       struct i915_request * const *port, *rq;
+       struct intel_context *ce = NULL;
+
+       trace_ports(execlists, msg, execlists->pending);
+
+       if (!execlists->pending[0])
+               return false;
+
+       if (execlists->pending[execlists_num_ports(execlists)])
+               return false;
+
+       for (port = execlists->pending; (rq = *port); port++) {
+               if (ce == rq->hw_context)
+                       return false;
+
+               ce = rq->hw_context;
+               if (i915_request_completed(rq))
+                       continue;
+
+               if (i915_active_is_idle(&ce->active))
+                       return false;
+
+               if (!i915_vma_is_pinned(ce->state))
+                       return false;
+       }
+
+       return ce;
+}
+
 static void execlists_submit_ports(struct intel_engine_cs *engine)
 {
        struct intel_engine_execlists *execlists = &engine->execlists;
-       struct execlist_port *port = execlists->port;
        unsigned int n;
 
+       GEM_BUG_ON(!assert_pending_valid(execlists, "submit"));
+
        /*
         * We can skip acquiring intel_runtime_pm_get() here as it was taken
         * on our behalf by the request (see i915_gem_mark_busy()) and it will
@@ -604,7 +758,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
         * that all ELSP are drained i.e. we have processed the CSB,
         * before allowing ourselves to idle and calling intel_runtime_pm_put().
         */
-       GEM_BUG_ON(!intel_wakeref_active(&engine->wakeref));
+       GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
 
        /*
         * ELSQ note: the submit queue is not cleared after being submitted
@@ -613,38 +767,16 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
         * of elsq entries, keep this in mind before changing the loop below.
         */
        for (n = execlists_num_ports(execlists); n--; ) {
-               struct i915_request *rq;
-               unsigned int count;
-               u64 desc;
-
-               rq = port_unpack(&port[n], &count);
-               if (rq) {
-                       GEM_BUG_ON(count > !n);
-                       if (!count++)
-                               execlists_context_schedule_in(rq);
-                       port_set(&port[n], port_pack(rq, count));
-                       desc = execlists_update_context(rq);
-                       GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
-
-                       GEM_TRACE("%s in[%d]:  ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n",
-                                 engine->name, n,
-                                 port[n].context_id, count,
-                                 rq->fence.context, rq->fence.seqno,
-                                 hwsp_seqno(rq),
-                                 rq_prio(rq));
-               } else {
-                       GEM_BUG_ON(!n);
-                       desc = 0;
-               }
+               struct i915_request *rq = execlists->pending[n];
 
-               write_desc(execlists, desc, n);
+               write_desc(execlists,
+                          rq ? execlists_update_context(rq) : 0,
+                          n);
        }
 
        /* we need to manually load the submit queue */
        if (execlists->ctrl_reg)
                writel(EL_CTRL_LOAD, execlists->ctrl_reg);
-
-       execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
 }
 
 static bool ctx_single_port_submission(const struct intel_context *ce)
@@ -668,64 +800,24 @@ static bool can_merge_ctx(const struct intel_context *prev,
 static bool can_merge_rq(const struct i915_request *prev,
                         const struct i915_request *next)
 {
+       GEM_BUG_ON(prev == next);
        GEM_BUG_ON(!assert_priority_queue(prev, next));
 
-       if (!can_merge_ctx(prev->hw_context, next->hw_context))
-               return false;
-
-       return true;
-}
-
-static void port_assign(struct execlist_port *port, struct i915_request *rq)
-{
-       GEM_BUG_ON(rq == port_request(port));
-
-       if (port_isset(port))
-               i915_request_put(port_request(port));
-
-       port_set(port, port_pack(i915_request_get(rq), port_count(port)));
-}
-
-static void inject_preempt_context(struct intel_engine_cs *engine)
-{
-       struct intel_engine_execlists *execlists = &engine->execlists;
-       struct intel_context *ce = engine->preempt_context;
-       unsigned int n;
-
-       GEM_BUG_ON(execlists->preempt_complete_status !=
-                  upper_32_bits(ce->lrc_desc));
-
        /*
-        * Switch to our empty preempt context so
-        * the state of the GPU is known (idle).
+        * We do not submit known completed requests. Therefore if the next
+        * request is already completed, we can pretend to merge it in
+        * with the previous context (and we will skip updating the ELSP
+        * and tracking). Thus hopefully keeping the ELSP full with active
+        * contexts, despite the best efforts of preempt-to-busy to confuse
+        * us.
         */
-       GEM_TRACE("%s\n", engine->name);
-       for (n = execlists_num_ports(execlists); --n; )
-               write_desc(execlists, 0, n);
-
-       write_desc(execlists, ce->lrc_desc, n);
-
-       /* we need to manually load the submit queue */
-       if (execlists->ctrl_reg)
-               writel(EL_CTRL_LOAD, execlists->ctrl_reg);
-
-       execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
-       execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
-
-       (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
-}
-
-static void complete_preempt_context(struct intel_engine_execlists *execlists)
-{
-       GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
+       if (i915_request_completed(next))
+               return true;
 
-       if (inject_preempt_hang(execlists))
-               return;
+       if (!can_merge_ctx(prev->hw_context, next->hw_context))
+               return false;
 
-       execlists_cancel_port_requests(execlists);
-       __unwind_incomplete_requests(container_of(execlists,
-                                                 struct intel_engine_cs,
-                                                 execlists));
+       return true;
 }
 
 static void virtual_update_register_offsets(u32 *regs,
@@ -792,7 +884,7 @@ static bool virtual_matches(const struct virtual_engine *ve,
         * we reuse the register offsets). This is a very small
         * hystersis on the greedy seelction algorithm.
         */
-       inflight = READ_ONCE(ve->context.inflight);
+       inflight = intel_context_inflight(&ve->context);
        if (inflight && inflight != engine)
                return false;
 
@@ -815,13 +907,120 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
        spin_unlock(&old->breadcrumbs.irq_lock);
 }
 
+static struct i915_request *
+last_active(const struct intel_engine_execlists *execlists)
+{
+       struct i915_request * const *last = READ_ONCE(execlists->active);
+
+       while (*last && i915_request_completed(*last))
+               last++;
+
+       return *last;
+}
+
+static void defer_request(struct i915_request *rq, struct list_head * const pl)
+{
+       LIST_HEAD(list);
+
+       /*
+        * We want to move the interrupted request to the back of
+        * the round-robin list (i.e. its priority level), but
+        * in doing so, we must then move all requests that were in
+        * flight and were waiting for the interrupted request to
+        * be run after it again.
+        */
+       do {
+               struct i915_dependency *p;
+
+               GEM_BUG_ON(i915_request_is_active(rq));
+               list_move_tail(&rq->sched.link, pl);
+
+               list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
+                       struct i915_request *w =
+                               container_of(p->waiter, typeof(*w), sched);
+
+                       /* Leave semaphores spinning on the other engines */
+                       if (w->engine != rq->engine)
+                               continue;
+
+                       /* No waiter should start before its signaler */
+                       GEM_BUG_ON(i915_request_started(w) &&
+                                  !i915_request_completed(rq));
+
+                       GEM_BUG_ON(i915_request_is_active(w));
+                       if (list_empty(&w->sched.link))
+                               continue; /* Not yet submitted; unready */
+
+                       if (rq_prio(w) < rq_prio(rq))
+                               continue;
+
+                       GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
+                       list_move_tail(&w->sched.link, &list);
+               }
+
+               rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
+       } while (rq);
+}
+
+static void defer_active(struct intel_engine_cs *engine)
+{
+       struct i915_request *rq;
+
+       rq = __unwind_incomplete_requests(engine);
+       if (!rq)
+               return;
+
+       defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq)));
+}
+
+static bool
+need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
+{
+       int hint;
+
+       if (!intel_engine_has_semaphores(engine))
+               return false;
+
+       if (list_is_last(&rq->sched.link, &engine->active.requests))
+               return false;
+
+       hint = max(rq_prio(list_next_entry(rq, sched.link)),
+                  engine->execlists.queue_priority_hint);
+
+       return hint >= effective_prio(rq);
+}
+
+static int
+switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
+{
+       if (list_is_last(&rq->sched.link, &engine->active.requests))
+               return INT_MIN;
+
+       return rq_prio(list_next_entry(rq, sched.link));
+}
+
+static bool
+enable_timeslice(const struct intel_engine_execlists *execlists)
+{
+       const struct i915_request *rq = *execlists->active;
+
+       if (i915_request_completed(rq))
+               return false;
+
+       return execlists->switch_priority_hint >= effective_prio(rq);
+}
+
+static void record_preemption(struct intel_engine_execlists *execlists)
+{
+       (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
        struct intel_engine_execlists * const execlists = &engine->execlists;
-       struct execlist_port *port = execlists->port;
-       const struct execlist_port * const last_port =
-               &execlists->port[execlists->port_mask];
-       struct i915_request *last = port_request(port);
+       struct i915_request **port = execlists->pending;
+       struct i915_request ** const last_port = port + execlists->port_mask;
+       struct i915_request *last;
        struct rb_node *rb;
        bool submit = false;
 
@@ -867,65 +1066,100 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                break;
        }
 
+       /*
+        * If the queue is higher priority than the last
+        * request in the currently active context, submit afresh.
+        * We will resubmit again afterwards in case we need to split
+        * the active context to interject the preemption request,
+        * i.e. we will retrigger preemption following the ack in case
+        * of trouble.
+        */
+       last = last_active(execlists);
        if (last) {
-               /*
-                * Don't resubmit or switch until all outstanding
-                * preemptions (lite-restore) are seen. Then we
-                * know the next preemption status we see corresponds
-                * to this ELSP update.
-                */
-               GEM_BUG_ON(!execlists_is_active(execlists,
-                                               EXECLISTS_ACTIVE_USER));
-               GEM_BUG_ON(!port_count(&port[0]));
+               if (need_preempt(engine, last, rb)) {
+                       GEM_TRACE("%s: preempting last=%llx:%lld, prio=%d, hint=%d\n",
+                                 engine->name,
+                                 last->fence.context,
+                                 last->fence.seqno,
+                                 last->sched.attr.priority,
+                                 execlists->queue_priority_hint);
+                       record_preemption(execlists);
 
-               /*
-                * If we write to ELSP a second time before the HW has had
-                * a chance to respond to the previous write, we can confuse
-                * the HW and hit "undefined behaviour". After writing to ELSP,
-                * we must then wait until we see a context-switch event from
-                * the HW to indicate that it has had a chance to respond.
-                */
-               if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
-                       return;
+                       /*
+                        * Don't let the RING_HEAD advance past the breadcrumb
+                        * as we unwind (and until we resubmit) so that we do
+                        * not accidentally tell it to go backwards.
+                        */
+                       ring_set_paused(engine, 1);
 
-               if (need_preempt(engine, last, rb)) {
-                       inject_preempt_context(engine);
-                       return;
-               }
+                       /*
+                        * Note that we have not stopped the GPU at this point,
+                        * so we are unwinding the incomplete requests as they
+                        * remain inflight and so by the time we do complete
+                        * the preemption, some of the unwound requests may
+                        * complete!
+                        */
+                       __unwind_incomplete_requests(engine);
 
-               /*
-                * In theory, we could coalesce more requests onto
-                * the second port (the first port is active, with
-                * no preemptions pending). However, that means we
-                * then have to deal with the possible lite-restore
-                * of the second port (as we submit the ELSP, there
-                * may be a context-switch) but also we may complete
-                * the resubmission before the context-switch. Ergo,
-                * coalescing onto the second port will cause a
-                * preemption event, but we cannot predict whether
-                * that will affect port[0] or port[1].
-                *
-                * If the second port is already active, we can wait
-                * until the next context-switch before contemplating
-                * new requests. The GPU will be busy and we should be
-                * able to resubmit the new ELSP before it idles,
-                * avoiding pipeline bubbles (momentary pauses where
-                * the driver is unable to keep up the supply of new
-                * work). However, we have to double check that the
-                * priorities of the ports haven't been switch.
-                */
-               if (port_count(&port[1]))
-                       return;
+                       /*
+                        * If we need to return to the preempted context, we
+                        * need to skip the lite-restore and force it to
+                        * reload the RING_TAIL. Otherwise, the HW has a
+                        * tendency to ignore us rewinding the TAIL to the
+                        * end of an earlier request.
+                        */
+                       last->hw_context->lrc_desc |= CTX_DESC_FORCE_RESTORE;
+                       last = NULL;
+               } else if (need_timeslice(engine, last) &&
+                          !timer_pending(&engine->execlists.timer)) {
+                       GEM_TRACE("%s: expired last=%llx:%lld, prio=%d, hint=%d\n",
+                                 engine->name,
+                                 last->fence.context,
+                                 last->fence.seqno,
+                                 last->sched.attr.priority,
+                                 execlists->queue_priority_hint);
 
-               /*
-                * WaIdleLiteRestore:bdw,skl
-                * Apply the wa NOOPs to prevent
-                * ring:HEAD == rq:TAIL as we resubmit the
-                * request. See gen8_emit_fini_breadcrumb() for
-                * where we prepare the padding after the
-                * end of the request.
-                */
-               last->tail = last->wa_tail;
+                       ring_set_paused(engine, 1);
+                       defer_active(engine);
+
+                       /*
+                        * Unlike for preemption, if we rewind and continue
+                        * executing the same context as previously active,
+                        * the order of execution will remain the same and
+                        * the tail will only advance. We do not need to
+                        * force a full context restore, as a lite-restore
+                        * is sufficient to resample the monotonic TAIL.
+                        *
+                        * If we switch to any other context, similarly we
+                        * will not rewind TAIL of current context, and
+                        * normal save/restore will preserve state and allow
+                        * us to later continue executing the same request.
+                        */
+                       last = NULL;
+               } else {
+                       /*
+                        * Otherwise if we already have a request pending
+                        * for execution after the current one, we can
+                        * just wait until the next CS event before
+                        * queuing more. In either case we will force a
+                        * lite-restore preemption event, but if we wait
+                        * we hopefully coalesce several updates into a single
+                        * submission.
+                        */
+                       if (!list_is_last(&last->sched.link,
+                                         &engine->active.requests))
+                               return;
+
+                       /*
+                        * WaIdleLiteRestore:bdw,skl
+                        * Apply the wa NOOPs to prevent
+                        * ring:HEAD == rq:TAIL as we resubmit the
+                        * request. See gen8_emit_fini_breadcrumb() for
+                        * where we prepare the padding after the
+                        * end of the request.
+                        */
+                       last->tail = last->wa_tail;
+               }
        }
 
        while (rb) { /* XXX virtual is always taking precedence */
@@ -957,7 +1191,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 
                        if (last && !can_merge_rq(last, rq)) {
                                spin_unlock(&ve->base.active.lock);
-                               return; /* leave this rq for another engine */
+                               return; /* leave this for another */
                        }
 
                        GEM_TRACE("%s: virtual rq=%llx:%lld%s, new engine? %s\n",
@@ -1005,10 +1239,24 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                                GEM_BUG_ON(ve->siblings[0] != engine);
                        }
 
-                       __i915_request_submit(rq);
-                       trace_i915_request_in(rq, port_index(port, execlists));
-                       submit = true;
-                       last = rq;
+                       if (__i915_request_submit(rq)) {
+                               submit = true;
+                               last = rq;
+                       }
+                       i915_request_put(rq);
+
+                       /*
+                        * Hmm, we have a bunch of virtual engine requests,
+                        * but the first one was already completed (thanks
+                        * preempt-to-busy!). Keep looking at the veng queue
+                        * until we have no more relevant requests (i.e.
+                        * the normal submit queue has higher priority).
+                        */
+                       if (!submit) {
+                               spin_unlock(&ve->base.active.lock);
+                               rb = rb_first_cached(&execlists->virtual);
+                               continue;
+                       }
                }
 
                spin_unlock(&ve->base.active.lock);
@@ -1021,6 +1269,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                int i;
 
                priolist_for_each_request_consume(rq, rn, p, i) {
+                       bool merge = true;
+
                        /*
                         * Can we combine this request with the current port?
                         * It has to be the same context/ringbuffer and not
@@ -1060,19 +1310,23 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                                    ctx_single_port_submission(rq->hw_context))
                                        goto done;
 
-
-                               if (submit)
-                                       port_assign(port, last);
-                               port++;
-
-                               GEM_BUG_ON(port_isset(port));
+                               merge = false;
                        }
 
-                       __i915_request_submit(rq);
-                       trace_i915_request_in(rq, port_index(port, execlists));
+                       if (__i915_request_submit(rq)) {
+                               if (!merge) {
+                                       *port = execlists_schedule_in(last, port - execlists->pending);
+                                       port++;
+                                       last = NULL;
+                               }
 
-                       last = rq;
-                       submit = true;
+                               GEM_BUG_ON(last &&
+                                          !can_merge_ctx(last->hw_context,
+                                                         rq->hw_context));
+
+                               submit = true;
+                               last = rq;
+                       }
                }
 
                rb_erase_cached(&p->node, &execlists->queue);
@@ -1097,54 +1351,34 @@ done:
         * interrupt for secondary ports).
         */
        execlists->queue_priority_hint = queue_prio(execlists);
+       GEM_TRACE("%s: queue_priority_hint:%d, submit:%s\n",
+                 engine->name, execlists->queue_priority_hint,
+                 yesno(submit));
 
        if (submit) {
-               port_assign(port, last);
+               *port = execlists_schedule_in(last, port - execlists->pending);
+               memset(port + 1, 0, (last_port - port) * sizeof(*port));
+               execlists->switch_priority_hint =
+                       switch_prio(engine, *execlists->pending);
                execlists_submit_ports(engine);
+       } else {
+               ring_set_paused(engine, 0);
        }
-
-       /* We must always keep the beast fed if we have work piled up */
-       GEM_BUG_ON(rb_first_cached(&execlists->queue) &&
-                  !port_isset(execlists->port));
-
-       /* Re-evaluate the executing context setup after each preemptive kick */
-       if (last)
-               execlists_user_begin(execlists, execlists->port);
-
-       /* If the engine is now idle, so should be the flag; and vice versa. */
-       GEM_BUG_ON(execlists_is_active(&engine->execlists,
-                                      EXECLISTS_ACTIVE_USER) ==
-                  !port_isset(engine->execlists.port));
 }
 
-void
-execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
+static void
+cancel_port_requests(struct intel_engine_execlists * const execlists)
 {
-       struct execlist_port *port = execlists->port;
-       unsigned int num_ports = execlists_num_ports(execlists);
-
-       while (num_ports-- && port_isset(port)) {
-               struct i915_request *rq = port_request(port);
+       struct i915_request * const *port, *rq;
 
-               GEM_TRACE("%s:port%u fence %llx:%lld, (current %d)\n",
-                         rq->engine->name,
-                         (unsigned int)(port - execlists->port),
-                         rq->fence.context, rq->fence.seqno,
-                         hwsp_seqno(rq));
-
-               GEM_BUG_ON(!execlists->active);
-               execlists_context_schedule_out(rq,
-                                              i915_request_completed(rq) ?
-                                              INTEL_CONTEXT_SCHEDULE_OUT :
-                                              INTEL_CONTEXT_SCHEDULE_PREEMPTED);
-
-               i915_request_put(rq);
-
-               memset(port, 0, sizeof(*port));
-               port++;
-       }
+       for (port = execlists->pending; (rq = *port); port++)
+               execlists_schedule_out(rq);
+       memset(execlists->pending, 0, sizeof(execlists->pending));
 
-       execlists_clear_all_active(execlists);
+       for (port = execlists->active; (rq = *port); port++)
+               execlists_schedule_out(rq);
+       execlists->active =
+               memset(execlists->inflight, 0, sizeof(execlists->inflight));
 }
 
 static inline void
@@ -1160,15 +1394,100 @@ reset_in_progress(const struct intel_engine_execlists *execlists)
        return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
 }
 
+enum csb_step {
+       CSB_NOP,
+       CSB_PROMOTE,
+       CSB_PREEMPT,
+       CSB_COMPLETE,
+};
+
+/*
+ * Starting with Gen12, the status has a new format:
+ *
+ *     bit  0:     switched to new queue
+ *     bit  1:     reserved
+ *     bit  2:     semaphore wait mode (poll or signal), only valid when
+ *                 switch detail is set to "wait on semaphore"
+ *     bits 3-5:   engine class
+ *     bits 6-11:  engine instance
+ *     bits 12-14: reserved
+ *     bits 15-25: sw context id of the lrc the GT switched to
+ *     bits 26-31: sw counter of the lrc the GT switched to
+ *     bits 32-35: context switch detail
+ *                  - 0: ctx complete
+ *                  - 1: wait on sync flip
+ *                  - 2: wait on vblank
+ *                  - 3: wait on scanline
+ *                  - 4: wait on semaphore
+ *                  - 5: context preempted (not on SEMAPHORE_WAIT or
+ *                       WAIT_FOR_EVENT)
+ *     bit  36:    reserved
+ *     bits 37-43: wait detail (for switch detail 1 to 4)
+ *     bits 44-46: reserved
+ *     bits 47-57: sw context id of the lrc the GT switched away from
+ *     bits 58-63: sw counter of the lrc the GT switched away from
+ */
+static inline enum csb_step
+gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
+{
+       u32 lower_dw = csb[0];
+       u32 upper_dw = csb[1];
+       bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw);
+       bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
+       bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
+
+       if (!ctx_away_valid && ctx_to_valid)
+               return CSB_PROMOTE;
+
+       /*
+        * The context switch detail is not guaranteed to be 5 when a preemption
+        * occurs, so we can't just check for that. The check below works for
+        * all the cases we care about, including preemptions of WAIT
+        * instructions and lite-restore. Preempt-to-idle via the CTRL register
+        * would require some extra handling, but we don't support that.
+        */
+       if (new_queue && ctx_away_valid)
+               return CSB_PREEMPT;
+
+       /*
+        * switch detail = 5 is covered by the case above and we do not expect a
+        * context switch on an unsuccessful wait instruction since we always
+        * use polling mode.
+        */
+       GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
+
+       if (*execlists->active) {
+               GEM_BUG_ON(!ctx_away_valid);
+               return CSB_COMPLETE;
+       }
+
+       return CSB_NOP;
+}
+
+static inline enum csb_step
+gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
+{
+       unsigned int status = *csb;
+
+       if (status & GEN8_CTX_STATUS_IDLE_ACTIVE)
+               return CSB_PROMOTE;
+
+       if (status & GEN8_CTX_STATUS_PREEMPTED)
+               return CSB_PREEMPT;
+
+       if (*execlists->active)
+               return CSB_COMPLETE;
+
+       return CSB_NOP;
+}
+
 static void process_csb(struct intel_engine_cs *engine)
 {
        struct intel_engine_execlists * const execlists = &engine->execlists;
-       struct execlist_port *port = execlists->port;
        const u32 * const buf = execlists->csb_status;
        const u8 num_entries = execlists->csb_size;
        u8 head, tail;
 
-       lockdep_assert_held(&engine->active.lock);
        GEM_BUG_ON(USES_GUC_SUBMISSION(engine->i915));
 
        /*
@@ -1198,9 +1517,7 @@ static void process_csb(struct intel_engine_cs *engine)
        rmb();
 
        do {
-               struct i915_request *rq;
-               unsigned int status;
-               unsigned int count;
+               enum csb_step csb_step;
 
                if (++head == num_entries)
                        head = 0;
@@ -1223,68 +1540,43 @@ static void process_csb(struct intel_engine_cs *engine)
                 * status notifier.
                 */
 
-               GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
+               GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x\n",
                          engine->name, head,
-                         buf[2 * head + 0], buf[2 * head + 1],
-                         execlists->active);
-
-               status = buf[2 * head];
-               if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
-                             GEN8_CTX_STATUS_PREEMPTED))
-                       execlists_set_active(execlists,
-                                            EXECLISTS_ACTIVE_HWACK);
-               if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
-                       execlists_clear_active(execlists,
-                                              EXECLISTS_ACTIVE_HWACK);
-
-               if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
-                       continue;
+                         buf[2 * head + 0], buf[2 * head + 1]);
 
-               /* We should never get a COMPLETED | IDLE_ACTIVE! */
-               GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+               if (INTEL_GEN(engine->i915) >= 12)
+                       csb_step = gen12_csb_parse(execlists, buf + 2 * head);
+               else
+                       csb_step = gen8_csb_parse(execlists, buf + 2 * head);
 
-               if (status & GEN8_CTX_STATUS_COMPLETE &&
-                   buf[2*head + 1] == execlists->preempt_complete_status) {
-                       GEM_TRACE("%s preempt-idle\n", engine->name);
-                       complete_preempt_context(execlists);
-                       continue;
-               }
+               switch (csb_step) {
+               case CSB_PREEMPT: /* cancel old inflight, prepare for switch */
+                       trace_ports(execlists, "preempted", execlists->active);
 
-               if (status & GEN8_CTX_STATUS_PREEMPTED &&
-                   execlists_is_active(execlists,
-                                       EXECLISTS_ACTIVE_PREEMPT))
-                       continue;
+                       while (*execlists->active)
+                               execlists_schedule_out(*execlists->active++);
 
-               GEM_BUG_ON(!execlists_is_active(execlists,
-                                               EXECLISTS_ACTIVE_USER));
+                       /* fallthrough */
+               case CSB_PROMOTE: /* switch pending to inflight */
+                       GEM_BUG_ON(*execlists->active);
+                       GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
+                       execlists->active =
+                               memcpy(execlists->inflight,
+                                      execlists->pending,
+                                      execlists_num_ports(execlists) *
+                                      sizeof(*execlists->pending));
 
-               rq = port_unpack(port, &count);
-               GEM_TRACE("%s out[0]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n",
-                         engine->name,
-                         port->context_id, count,
-                         rq ? rq->fence.context : 0,
-                         rq ? rq->fence.seqno : 0,
-                         rq ? hwsp_seqno(rq) : 0,
-                         rq ? rq_prio(rq) : 0);
+                       if (enable_timeslice(execlists))
+                               mod_timer(&execlists->timer, jiffies + 1);
 
-               /* Check the context/desc id for this event matches */
-               GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
+                       if (!inject_preempt_hang(execlists))
+                               ring_set_paused(engine, 0);
 
-               GEM_BUG_ON(count == 0);
-               if (--count == 0) {
-                       /*
-                        * On the final event corresponding to the
-                        * submission of this context, we expect either
-                        * an element-switch event or a completion
-                        * event (and on completion, the active-idle
-                        * marker). No more preemptions, lite-restore
-                        * or otherwise.
-                        */
-                       GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
-                       GEM_BUG_ON(port_isset(&port[1]) &&
-                                  !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
-                       GEM_BUG_ON(!port_isset(&port[1]) &&
-                                  !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
+                       WRITE_ONCE(execlists->pending[0], NULL);
+                       break;
+
+               case CSB_COMPLETE: /* port0 completed, advanced to port1 */
+                       trace_ports(execlists, "completed", execlists->active);
 
                        /*
                         * We rely on the hardware being strongly
@@ -1292,22 +1584,16 @@ static void process_csb(struct intel_engine_cs *engine)
                         * coherent (visible from the CPU) before the
                         * user interrupt and CSB is processed.
                         */
-                       GEM_BUG_ON(!i915_request_completed(rq));
+                       GEM_BUG_ON(!i915_request_completed(*execlists->active) &&
+                                  !reset_in_progress(execlists));
+                       execlists_schedule_out(*execlists->active++);
 
-                       execlists_context_schedule_out(rq,
-                                                      INTEL_CONTEXT_SCHEDULE_OUT);
-                       i915_request_put(rq);
-
-                       GEM_TRACE("%s completed ctx=%d\n",
-                                 engine->name, port->context_id);
+                       GEM_BUG_ON(execlists->active - execlists->inflight >
+                                  execlists_num_ports(execlists));
+                       break;
 
-                       port = execlists_port_complete(execlists, port);
-                       if (port_isset(port))
-                               execlists_user_begin(execlists, port);
-                       else
-                               execlists_user_end(execlists);
-               } else {
-                       port_set(port, port_pack(rq, count));
+               case CSB_NOP:
+                       break;
                }
        } while (head != tail);
 
@@ -1330,10 +1616,11 @@ static void process_csb(struct intel_engine_cs *engine)
 static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
 {
        lockdep_assert_held(&engine->active.lock);
-
-       process_csb(engine);
-       if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
+       if (!engine->execlists.pending[0]) {
+               rcu_read_lock(); /* protect peeking at execlists->active */
                execlists_dequeue(engine);
+               rcu_read_unlock();
+       }
 }
 
 /*
@@ -1345,14 +1632,21 @@ static void execlists_submission_tasklet(unsigned long data)
        struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
        unsigned long flags;
 
-       GEM_TRACE("%s awake?=%d, active=%x\n",
-                 engine->name,
-                 !!intel_wakeref_active(&engine->wakeref),
-                 engine->execlists.active);
+       process_csb(engine);
+       if (!READ_ONCE(engine->execlists.pending[0])) {
+               spin_lock_irqsave(&engine->active.lock, flags);
+               __execlists_submission_tasklet(engine);
+               spin_unlock_irqrestore(&engine->active.lock, flags);
+       }
+}
+
+static void execlists_submission_timer(struct timer_list *timer)
+{
+       struct intel_engine_cs *engine =
+               from_timer(engine, timer, execlists.timer);
 
-       spin_lock_irqsave(&engine->active.lock, flags);
-       __execlists_submission_tasklet(engine);
-       spin_unlock_irqrestore(&engine->active.lock, flags);
+       /* Kick the tasklet for some interrupt coalescing and reset handling */
+       tasklet_hi_schedule(&engine->execlists.tasklet);
 }
 
 static void queue_request(struct intel_engine_cs *engine,
@@ -1376,12 +1670,16 @@ static void __submit_queue_imm(struct intel_engine_cs *engine)
                tasklet_hi_schedule(&execlists->tasklet);
 }
 
-static void submit_queue(struct intel_engine_cs *engine, int prio)
+static void submit_queue(struct intel_engine_cs *engine,
+                        const struct i915_request *rq)
 {
-       if (prio > engine->execlists.queue_priority_hint) {
-               engine->execlists.queue_priority_hint = prio;
-               __submit_queue_imm(engine);
-       }
+       struct intel_engine_execlists *execlists = &engine->execlists;
+
+       if (rq_prio(rq) <= execlists->queue_priority_hint)
+               return;
+
+       execlists->queue_priority_hint = rq_prio(rq);
+       __submit_queue_imm(engine);
 }
 
 static void execlists_submit_request(struct i915_request *request)
@@ -1397,7 +1695,7 @@ static void execlists_submit_request(struct i915_request *request)
        GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
        GEM_BUG_ON(list_empty(&request->sched.link));
 
-       submit_queue(engine, rq_prio(request));
+       submit_queue(engine, request);
 
        spin_unlock_irqrestore(&engine->active.lock, flags);
 }
@@ -1405,9 +1703,7 @@ static void execlists_submit_request(struct i915_request *request)
 static void __execlists_context_fini(struct intel_context *ce)
 {
        intel_ring_put(ce->ring);
-
-       GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj));
-       i915_gem_object_put(ce->state->obj);
+       i915_vma_put(ce->state);
 }
 
 static void execlists_context_destroy(struct kref *kref)
@@ -1420,13 +1716,45 @@ static void execlists_context_destroy(struct kref *kref)
        if (ce->state)
                __execlists_context_fini(ce);
 
+       intel_context_fini(ce);
        intel_context_free(ce);
 }
 
+static void
+set_redzone(void *vaddr, const struct intel_engine_cs *engine)
+{
+       if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+               return;
+
+       vaddr += LRC_HEADER_PAGES * PAGE_SIZE;
+       vaddr += engine->context_size;
+
+       memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE);
+}
+
+static void
+check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
+{
+       if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+               return;
+
+       vaddr += LRC_HEADER_PAGES * PAGE_SIZE;
+       vaddr += engine->context_size;
+
+       if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE))
+               dev_err_once(engine->i915->drm.dev,
+                            "%s context redzone overwritten!\n",
+                            engine->name);
+}
+
 static void execlists_context_unpin(struct intel_context *ce)
 {
+       check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE,
+                     ce->engine);
+
        i915_gem_context_unpin_hw_id(ce->gem_context);
        i915_gem_object_unpin_map(ce->state->obj);
+       intel_ring_reset(ce->ring, ce->ring->tail);
 }
 
 static void
@@ -1444,9 +1772,12 @@ __execlists_update_reg_state(struct intel_context *ce,
        regs[CTX_RING_TAIL + 1] = ring->tail;
 
        /* RPCS */
-       if (engine->class == RENDER_CLASS)
+       if (engine->class == RENDER_CLASS) {
                regs[CTX_R_PWR_CLK_STATE + 1] =
                        intel_sseu_make_rpcs(engine->i915, &ce->sseu);
+
+               i915_oa_init_reg_state(engine, ce, regs);
+       }
 }
 
 static int
@@ -1456,19 +1787,12 @@ __execlists_context_pin(struct intel_context *ce,
        void *vaddr;
        int ret;
 
-       GEM_BUG_ON(!ce->gem_context->vm);
-
-       ret = execlists_context_deferred_alloc(ce, engine);
-       if (ret)
-               goto err;
        GEM_BUG_ON(!ce->state);
 
-       ret = intel_context_active_acquire(ce,
-                                          engine->i915->ggtt.pin_bias |
-                                          PIN_OFFSET_BIAS |
-                                          PIN_HIGH);
+       ret = intel_context_active_acquire(ce);
        if (ret)
                goto err;
+       GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
 
        vaddr = i915_gem_object_pin_map(ce->state->obj,
                                        i915_coherent_map_type(engine->i915) |
@@ -1501,6 +1825,11 @@ static int execlists_context_pin(struct intel_context *ce)
        return __execlists_context_pin(ce, ce->engine);
 }
 
+static int execlists_context_alloc(struct intel_context *ce)
+{
+       return __execlists_context_alloc(ce, ce->engine);
+}
+
 static void execlists_context_reset(struct intel_context *ce)
 {
        /*
@@ -1524,6 +1853,8 @@ static void execlists_context_reset(struct intel_context *ce)
 }
 
 static const struct intel_context_ops execlists_context_ops = {
+       .alloc = execlists_context_alloc,
+
        .pin = execlists_context_pin,
        .unpin = execlists_context_unpin,
 
@@ -1569,8 +1900,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
 static int emit_pdps(struct i915_request *rq)
 {
        const struct intel_engine_cs * const engine = rq->engine;
-       struct i915_ppgtt * const ppgtt =
-               i915_vm_to_ppgtt(rq->gem_context->vm);
+       struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->hw_context->vm);
        int err, i;
        u32 *cs;
 
@@ -1643,7 +1973,7 @@ static int execlists_request_alloc(struct i915_request *request)
         */
 
        /* Unconditionally invalidate GPU caches and TLBs. */
-       if (i915_vm_is_4lvl(request->gem_context->vm))
+       if (i915_vm_is_4lvl(request->hw_context->vm))
                ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
        else
                ret = emit_pdps(request);
@@ -1676,7 +2006,8 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
        /* NB no one else is allowed to scribble over scratch + 256! */
        *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
        *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
-       *batch++ = i915_scratch_offset(engine->i915) + 256;
+       *batch++ = intel_gt_scratch_offset(engine->gt,
+                                          INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
        *batch++ = 0;
 
        *batch++ = MI_LOAD_REGISTER_IMM(1);
@@ -1690,12 +2021,19 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
 
        *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
        *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
-       *batch++ = i915_scratch_offset(engine->i915) + 256;
+       *batch++ = intel_gt_scratch_offset(engine->gt,
+                                          INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
        *batch++ = 0;
 
        return batch;
 }
 
+static u32 slm_offset(struct intel_engine_cs *engine)
+{
+       return intel_gt_scratch_offset(engine->gt,
+                                      INTEL_GT_SCRATCH_FIELD_CLEAR_SLM_WA);
+}
+
 /*
  * Typically we only have one indirect_ctx and per_ctx batch buffer which are
  * initialized at the beginning and shared across all contexts but this field
@@ -1727,8 +2065,7 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
                                       PIPE_CONTROL_GLOBAL_GTT_IVB |
                                       PIPE_CONTROL_CS_STALL |
                                       PIPE_CONTROL_QW_WRITE,
-                                      i915_scratch_offset(engine->i915) +
-                                      2 * CACHELINE_BYTES);
+                                      slm_offset(engine));
 
        *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
 
@@ -1874,7 +2211,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
        if (IS_ERR(obj))
                return PTR_ERR(obj);
 
-       vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL);
+       vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
        if (IS_ERR(vma)) {
                err = PTR_ERR(vma);
                goto err;
@@ -1914,6 +2251,7 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
                return 0;
 
        switch (INTEL_GEN(engine->i915)) {
+       case 12:
        case 11:
                return 0;
        case 10:
@@ -1970,22 +2308,23 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
 
 static void enable_execlists(struct intel_engine_cs *engine)
 {
+       u32 mode;
+
+       assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
+
        intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
 
        if (INTEL_GEN(engine->i915) >= 11)
-               ENGINE_WRITE(engine,
-                            RING_MODE_GEN7,
-                            _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
+               mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE);
        else
-               ENGINE_WRITE(engine,
-                            RING_MODE_GEN7,
-                            _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
+               mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE);
+       ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode);
 
-       ENGINE_WRITE(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
+       ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
 
-       ENGINE_WRITE(engine,
-                    RING_HWS_PGA,
-                    i915_ggtt_offset(engine->status_page.vma));
+       ENGINE_WRITE_FW(engine,
+                       RING_HWS_PGA,
+                       i915_ggtt_offset(engine->status_page.vma));
        ENGINE_POSTING_READ(engine, RING_HWS_PGA);
 }
 
@@ -1993,7 +2332,7 @@ static bool unexpected_starting_state(struct intel_engine_cs *engine)
 {
        bool unexpected = false;
 
-       if (ENGINE_READ(engine, RING_MI_MODE) & STOP_RING) {
+       if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) {
                DRM_DEBUG_DRIVER("STOP_RING still set in RING_MI_MODE\n");
                unexpected = true;
        }
@@ -2041,34 +2380,32 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
        __tasklet_disable_sync_once(&execlists->tasklet);
        GEM_BUG_ON(!reset_in_progress(execlists));
 
-       intel_engine_stop_cs(engine);
-
        /* And flush any current direct submission. */
        spin_lock_irqsave(&engine->active.lock, flags);
        spin_unlock_irqrestore(&engine->active.lock, flags);
-}
-
-static bool lrc_regs_ok(const struct i915_request *rq)
-{
-       const struct intel_ring *ring = rq->ring;
-       const u32 *regs = rq->hw_context->lrc_reg_state;
-
-       /* Quick spot check for the common signs of context corruption */
-
-       if (regs[CTX_RING_BUFFER_CONTROL + 1] !=
-           (RING_CTL_SIZE(ring->size) | RING_VALID))
-               return false;
 
-       if (regs[CTX_RING_BUFFER_START + 1] != i915_ggtt_offset(ring->vma))
-               return false;
-
-       return true;
+       /*
+        * We stop engines, otherwise we might get failed reset and a
+        * dead gpu (on elk). Also as modern gpu as kbl can suffer
+        * from system hang if batchbuffer is progressing when
+        * the reset is issued, regardless of READY_TO_RESET ack.
+        * Thus assume it is best to stop engines on all gens
+        * where we have a gpu reset.
+        *
+        * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
+        *
+        * FIXME: Wa for more modern gens needs to be validated
+        */
+       intel_engine_stop_cs(engine);
 }
 
-static void reset_csb_pointers(struct intel_engine_execlists *execlists)
+static void reset_csb_pointers(struct intel_engine_cs *engine)
 {
+       struct intel_engine_execlists * const execlists = &engine->execlists;
        const unsigned int reset_value = execlists->csb_size - 1;
 
+       ring_set_paused(engine, 0);
+
        /*
         * After a reset, the HW starts writing into CSB entry [0]. We
         * therefore have to set our HEAD pointer back one entry so that
@@ -2088,15 +2425,19 @@ static void reset_csb_pointers(struct intel_engine_execlists *execlists)
 
 static struct i915_request *active_request(struct i915_request *rq)
 {
-       const struct list_head * const list = &rq->engine->active.requests;
-       const struct intel_context * const context = rq->hw_context;
+       const struct intel_context * const ce = rq->hw_context;
        struct i915_request *active = NULL;
+       struct list_head *list;
 
-       list_for_each_entry_from_reverse(rq, list, sched.link) {
+       if (!i915_request_is_active(rq)) /* unwound, but incomplete! */
+               return rq;
+
+       list = &rq->timeline->requests;
+       list_for_each_entry_from_reverse(rq, list, link) {
                if (i915_request_completed(rq))
                        break;
 
-               if (rq->hw_context != context)
+               if (rq->hw_context != ce)
                        break;
 
                active = rq;
@@ -2115,33 +2456,27 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
        process_csb(engine); /* drain preemption events */
 
        /* Following the reset, we need to reload the CSB read/write pointers */
-       reset_csb_pointers(&engine->execlists);
+       reset_csb_pointers(engine);
 
        /*
         * Save the currently executing context, even if we completed
         * its request, it was still running at the time of the
         * reset and will have been clobbered.
         */
-       if (!port_isset(execlists->port))
-               goto out_clear;
+       rq = execlists_active(execlists);
+       if (!rq)
+               goto unwind;
 
-       rq = port_request(execlists->port);
        ce = rq->hw_context;
-
-       /*
-        * Catch up with any missed context-switch interrupts.
-        *
-        * Ideally we would just read the remaining CSB entries now that we
-        * know the gpu is idle. However, the CSB registers are sometimes^W
-        * often trashed across a GPU reset! Instead we have to rely on
-        * guessing the missed context-switch events by looking at what
-        * requests were completed.
-        */
-       execlists_cancel_port_requests(execlists);
-
+       GEM_BUG_ON(i915_active_is_idle(&ce->active));
+       GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
        rq = active_request(rq);
-       if (!rq)
+       if (!rq) {
+               ce->ring->head = ce->ring->tail;
                goto out_replay;
+       }
+
+       ce->ring->head = intel_ring_wrap(ce->ring, rq->head);
 
        /*
         * If this request hasn't started yet, e.g. it is waiting on a
@@ -2155,7 +2490,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
         * Otherwise, if we have not started yet, the request should replay
         * perfectly and we do not need to flag the result as being erroneous.
         */
-       if (!i915_request_started(rq) && lrc_regs_ok(rq))
+       if (!i915_request_started(rq))
                goto out_replay;
 
        /*
@@ -2169,8 +2504,8 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
         * and have to at least restore the RING register in the context
         * image back to the expected values to skip over the guilty request.
         */
-       i915_reset_request(rq, stalled);
-       if (!stalled && lrc_regs_ok(rq))
+       __i915_request_reset(rq, stalled);
+       if (!stalled)
                goto out_replay;
 
        /*
@@ -2190,17 +2525,15 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
        execlists_init_reg_state(regs, ce, engine, ce->ring);
 
 out_replay:
-       /* Rerun the request; its payload has been neutered (if guilty). */
-       ce->ring->head =
-               rq ? intel_ring_wrap(ce->ring, rq->head) : ce->ring->tail;
+       GEM_TRACE("%s replay {head:%04x, tail:%04x\n",
+                 engine->name, ce->ring->head, ce->ring->tail);
        intel_ring_update_space(ce->ring);
        __execlists_update_reg_state(ce, engine);
 
+unwind:
        /* Push back any incomplete requests for replay after the reset. */
+       cancel_port_requests(execlists);
        __unwind_incomplete_requests(engine);
-
-out_clear:
-       execlists_clear_all_active(execlists);
 }
 
 static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
@@ -2249,12 +2582,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
        __execlists_reset(engine, true);
 
        /* Mark all executing requests as skipped. */
-       list_for_each_entry(rq, &engine->active.requests, sched.link) {
-               if (!i915_request_signaled(rq))
-                       dma_fence_set_error(&rq->fence, -EIO);
-
-               i915_request_mark_complete(rq);
-       }
+       list_for_each_entry(rq, &engine->active.requests, sched.link)
+               mark_eio(rq);
 
        /* Flush the queued requests to the timeline list (for retiring). */
        while ((rb = rb_first_cached(&execlists->queue))) {
@@ -2262,10 +2591,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
                int i;
 
                priolist_for_each_request_consume(rq, rn, p, i) {
-                       list_del_init(&rq->sched.link);
+                       mark_eio(rq);
                        __i915_request_submit(rq);
-                       dma_fence_set_error(&rq->fence, -EIO);
-                       i915_request_mark_complete(rq);
                }
 
                rb_erase_cached(&p->node, &execlists->queue);
@@ -2281,13 +2608,15 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
                RB_CLEAR_NODE(rb);
 
                spin_lock(&ve->base.active.lock);
-               if (ve->request) {
-                       ve->request->engine = engine;
-                       __i915_request_submit(ve->request);
-                       dma_fence_set_error(&ve->request->fence, -EIO);
-                       i915_request_mark_complete(ve->request);
+               rq = fetch_and_zero(&ve->request);
+               if (rq) {
+                       mark_eio(rq);
+
+                       rq->engine = engine;
+                       __i915_request_submit(rq);
+                       i915_request_put(rq);
+
                        ve->base.execlists.queue_priority_hint = INT_MIN;
-                       ve->request = NULL;
                }
                spin_unlock(&ve->base.active.lock);
        }
@@ -2296,7 +2625,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 
        execlists->queue_priority_hint = INT_MIN;
        execlists->queue = RB_ROOT_CACHED;
-       GEM_BUG_ON(port_isset(execlists->port));
 
        GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
        execlists->tasklet.func = nop_submission_tasklet;
@@ -2434,7 +2762,8 @@ static int gen8_emit_flush_render(struct i915_request *request,
 {
        struct intel_engine_cs *engine = request->engine;
        u32 scratch_addr =
-               i915_scratch_offset(engine->i915) + 2 * CACHELINE_BYTES;
+               intel_gt_scratch_offset(engine->gt,
+                                       INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
        bool vf_flush_wa = false, dc_flush_wa = false;
        u32 *cs, flags = 0;
        int len;
@@ -2499,6 +2828,63 @@ static int gen8_emit_flush_render(struct i915_request *request,
        return 0;
 }
 
+static int gen11_emit_flush_render(struct i915_request *request,
+                                  u32 mode)
+{
+       struct intel_engine_cs *engine = request->engine;
+       const u32 scratch_addr =
+               intel_gt_scratch_offset(engine->gt,
+                                       INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
+
+       if (mode & EMIT_FLUSH) {
+               u32 *cs;
+               u32 flags = 0;
+
+               flags |= PIPE_CONTROL_CS_STALL;
+
+               flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
+               flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+               flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+               flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
+               flags |= PIPE_CONTROL_FLUSH_ENABLE;
+               flags |= PIPE_CONTROL_QW_WRITE;
+               flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+
+               cs = intel_ring_begin(request, 6);
+               if (IS_ERR(cs))
+                       return PTR_ERR(cs);
+
+               cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
+               intel_ring_advance(request, cs);
+       }
+
+       if (mode & EMIT_INVALIDATE) {
+               u32 *cs;
+               u32 flags = 0;
+
+               flags |= PIPE_CONTROL_CS_STALL;
+
+               flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
+               flags |= PIPE_CONTROL_TLB_INVALIDATE;
+               flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+               flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+               flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+               flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+               flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+               flags |= PIPE_CONTROL_QW_WRITE;
+               flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+
+               cs = intel_ring_begin(request, 6);
+               if (IS_ERR(cs))
+                       return PTR_ERR(cs);
+
+               cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
+               intel_ring_advance(request, cs);
+       }
+
+       return 0;
+}
+
 /*
  * Reserve space for 2 NOOPs at the end of each request to be
  * used as a workaround for not being allowed to do lite
@@ -2514,15 +2900,28 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
        return cs;
 }
 
-static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
+static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
 {
-       cs = gen8_emit_ggtt_write(cs,
-                                 request->fence.seqno,
-                                 request->timeline->hwsp_offset,
-                                 0);
+       *cs++ = MI_SEMAPHORE_WAIT |
+               MI_SEMAPHORE_GLOBAL_GTT |
+               MI_SEMAPHORE_POLL |
+               MI_SEMAPHORE_SAD_EQ_SDD;
+       *cs++ = 0;
+       *cs++ = intel_hws_preempt_address(request->engine);
+       *cs++ = 0;
+
+       return cs;
+}
 
+static __always_inline u32*
+gen8_emit_fini_breadcrumb_footer(struct i915_request *request,
+                                u32 *cs)
+{
        *cs++ = MI_USER_INTERRUPT;
+
        *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+       if (intel_engine_has_semaphores(request->engine))
+               cs = emit_preempt_busywait(request, cs);
 
        request->tail = intel_ring_offset(request, cs);
        assert_ring_tail_valid(request->ring, request->tail);
@@ -2530,51 +2929,53 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
        return gen8_emit_wa_tail(request, cs);
 }
 
+static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
+{
+       cs = gen8_emit_ggtt_write(cs,
+                                 request->fence.seqno,
+                                 request->timeline->hwsp_offset,
+                                 0);
+
+       return gen8_emit_fini_breadcrumb_footer(request, cs);
+}
+
 static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 {
-       /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
        cs = gen8_emit_ggtt_write_rcs(cs,
                                      request->fence.seqno,
                                      request->timeline->hwsp_offset,
                                      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
                                      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
                                      PIPE_CONTROL_DC_FLUSH_ENABLE);
+
+       /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
        cs = gen8_emit_pipe_control(cs,
                                    PIPE_CONTROL_FLUSH_ENABLE |
                                    PIPE_CONTROL_CS_STALL,
                                    0);
 
-       *cs++ = MI_USER_INTERRUPT;
-       *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-
-       request->tail = intel_ring_offset(request, cs);
-       assert_ring_tail_valid(request->ring, request->tail);
-
-       return gen8_emit_wa_tail(request, cs);
+       return gen8_emit_fini_breadcrumb_footer(request, cs);
 }
 
-static int gen8_init_rcs_context(struct i915_request *rq)
+static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request,
+                                          u32 *cs)
 {
-       int ret;
-
-       ret = intel_engine_emit_ctx_wa(rq);
-       if (ret)
-               return ret;
-
-       ret = intel_rcs_context_init_mocs(rq);
-       /*
-        * Failing to program the MOCS is non-fatal.The system will not
-        * run at peak performance. So generate an error and carry on.
-        */
-       if (ret)
-               DRM_ERROR("MOCS failed to program: expect performance issues.\n");
+       cs = gen8_emit_ggtt_write_rcs(cs,
+                                     request->fence.seqno,
+                                     request->timeline->hwsp_offset,
+                                     PIPE_CONTROL_CS_STALL |
+                                     PIPE_CONTROL_TILE_CACHE_FLUSH |
+                                     PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+                                     PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+                                     PIPE_CONTROL_DC_FLUSH_ENABLE |
+                                     PIPE_CONTROL_FLUSH_ENABLE);
 
-       return i915_gem_render_state_emit(rq);
+       return gen8_emit_fini_breadcrumb_footer(request, cs);
 }
 
 static void execlists_park(struct intel_engine_cs *engine)
 {
-       intel_engine_park(engine);
+       del_timer(&engine->execlists.timer);
 }
 
 void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
@@ -2592,11 +2993,11 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
        engine->unpark = NULL;
 
        engine->flags |= I915_ENGINE_SUPPORTS_STATS;
-       if (!intel_vgpu_active(engine->i915))
+       if (!intel_vgpu_active(engine->i915)) {
                engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
-       if (engine->preempt_context &&
-           HAS_LOGICAL_RING_PREEMPTION(engine->i915))
-               engine->flags |= I915_ENGINE_HAS_PREEMPTION;
+               if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
+                       engine->flags |= I915_ENGINE_HAS_PREEMPTION;
+       }
 }
 
 static void execlists_destroy(struct intel_engine_cs *engine)
@@ -2665,22 +3066,32 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
        engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
 }
 
-int intel_execlists_submission_setup(struct intel_engine_cs *engine)
+static void rcs_submission_override(struct intel_engine_cs *engine)
 {
-       /* Intentionally left blank. */
-       engine->buffer = NULL;
+       switch (INTEL_GEN(engine->i915)) {
+       case 12:
+       case 11:
+               engine->emit_flush = gen11_emit_flush_render;
+               engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
+               break;
+       default:
+               engine->emit_flush = gen8_emit_flush_render;
+               engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
+               break;
+       }
+}
 
+int intel_execlists_submission_setup(struct intel_engine_cs *engine)
+{
        tasklet_init(&engine->execlists.tasklet,
                     execlists_submission_tasklet, (unsigned long)engine);
+       timer_setup(&engine->execlists.timer, execlists_submission_timer, 0);
 
        logical_ring_default_vfuncs(engine);
        logical_ring_default_irqs(engine);
 
-       if (engine->class == RENDER_CLASS) {
-               engine->init_context = gen8_init_rcs_context;
-               engine->emit_flush = gen8_emit_flush_render;
-               engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
-       }
+       if (engine->class == RENDER_CLASS)
+               rcs_submission_override(engine);
 
        return 0;
 }
@@ -2697,9 +3108,6 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine)
        if (ret)
                return ret;
 
-       intel_engine_init_workarounds(engine);
-       intel_engine_init_whitelist(engine);
-
        if (intel_init_workaround_bb(engine))
                /*
                 * We continue even if we fail to initialize WA batch
@@ -2718,11 +3126,6 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine)
                        i915_mmio_reg_offset(RING_ELSP(base));
        }
 
-       execlists->preempt_complete_status = ~0u;
-       if (engine->preempt_context)
-               execlists->preempt_complete_status =
-                       upper_32_bits(engine->preempt_context->lrc_desc);
-
        execlists->csb_status =
                &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
 
@@ -2734,7 +3137,7 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine)
        else
                execlists->csb_size = GEN11_CSB_ENTRIES;
 
-       reset_csb_pointers(execlists);
+       reset_csb_pointers(engine);
 
        return 0;
 }
@@ -2747,6 +3150,10 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
        default:
                MISSING_CASE(INTEL_GEN(engine->i915));
                /* fall through */
+       case 12:
+               indirect_ctx_offset =
+                       GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+               break;
        case 11:
                indirect_ctx_offset =
                        GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
@@ -2773,7 +3180,7 @@ static void execlists_init_reg_state(u32 *regs,
                                     struct intel_engine_cs *engine,
                                     struct intel_ring *ring)
 {
-       struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->gem_context->vm);
+       struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->vm);
        bool rcs = engine->class == RENDER_CLASS;
        u32 base = engine->mmio_base;
 
@@ -2864,8 +3271,6 @@ static void execlists_init_reg_state(u32 *regs,
        if (rcs) {
                regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
                CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
-
-               i915_oa_init_reg_state(engine, ce, regs);
        }
 
        regs[CTX_END] = MI_BATCH_BUFFER_END;
@@ -2890,6 +3295,8 @@ populate_lr_context(struct intel_context *ce,
                return ret;
        }
 
+       set_redzone(vaddr, engine);
+
        if (engine->default_state) {
                /*
                 * We only want to copy over the template context state;
@@ -2917,11 +3324,6 @@ populate_lr_context(struct intel_context *ce,
        if (!engine->default_state)
                regs[CTX_CONTEXT_CONTROL + 1] |=
                        _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
-       if (ce->gem_context == engine->i915->preempt_context &&
-           INTEL_GEN(engine->i915) < 11)
-               regs[CTX_CONTEXT_CONTROL + 1] |=
-                       _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
-                                          CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT);
 
        ret = 0;
 err_unpin_ctx:
@@ -2932,27 +3334,16 @@ err_unpin_ctx:
        return ret;
 }
 
-static struct i915_timeline *get_timeline(struct i915_gem_context *ctx)
-{
-       if (ctx->timeline)
-               return i915_timeline_get(ctx->timeline);
-       else
-               return i915_timeline_create(ctx->i915, NULL);
-}
-
-static int execlists_context_deferred_alloc(struct intel_context *ce,
-                                           struct intel_engine_cs *engine)
+static int __execlists_context_alloc(struct intel_context *ce,
+                                    struct intel_engine_cs *engine)
 {
        struct drm_i915_gem_object *ctx_obj;
+       struct intel_ring *ring;
        struct i915_vma *vma;
        u32 context_size;
-       struct intel_ring *ring;
-       struct i915_timeline *timeline;
        int ret;
 
-       if (ce->state)
-               return 0;
-
+       GEM_BUG_ON(ce->state);
        context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
 
        /*
@@ -2960,27 +3351,32 @@ static int execlists_context_deferred_alloc(struct intel_context *ce,
         * for our own use and for sharing with the GuC.
         */
        context_size += LRC_HEADER_PAGES * PAGE_SIZE;
+       if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+               context_size += I915_GTT_PAGE_SIZE; /* for redzone */
 
        ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
        if (IS_ERR(ctx_obj))
                return PTR_ERR(ctx_obj);
 
-       vma = i915_vma_instance(ctx_obj, &engine->i915->ggtt.vm, NULL);
+       vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL);
        if (IS_ERR(vma)) {
                ret = PTR_ERR(vma);
                goto error_deref_obj;
        }
 
-       timeline = get_timeline(ce->gem_context);
-       if (IS_ERR(timeline)) {
-               ret = PTR_ERR(timeline);
-               goto error_deref_obj;
+       if (!ce->timeline) {
+               struct intel_timeline *tl;
+
+               tl = intel_timeline_create(engine->gt, NULL);
+               if (IS_ERR(tl)) {
+                       ret = PTR_ERR(tl);
+                       goto error_deref_obj;
+               }
+
+               ce->timeline = tl;
        }
 
-       ring = intel_engine_create_ring(engine,
-                                       timeline,
-                                       ce->gem_context->ring_size);
-       i915_timeline_put(timeline);
+       ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
        if (IS_ERR(ring)) {
                ret = PTR_ERR(ring);
                goto error_deref_obj;
@@ -3038,6 +3434,7 @@ static void virtual_context_destroy(struct kref *kref)
 
        if (ve->context.state)
                __execlists_context_fini(&ve->context);
+       intel_context_fini(&ve->context);
 
        kfree(ve->bonds);
        kfree(ve);
@@ -3090,6 +3487,8 @@ static void virtual_context_enter(struct intel_context *ce)
 
        for (n = 0; n < ve->num_siblings; n++)
                intel_engine_pm_get(ve->siblings[n]);
+
+       intel_timeline_enter(ce->timeline);
 }
 
 static void virtual_context_exit(struct intel_context *ce)
@@ -3097,6 +3496,8 @@ static void virtual_context_exit(struct intel_context *ce)
        struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
        unsigned int n;
 
+       intel_timeline_exit(ce->timeline);
+
        for (n = 0; n < ve->num_siblings; n++)
                intel_engine_pm_put(ve->siblings[n]);
 }
@@ -3219,6 +3620,8 @@ submit_engine:
 static void virtual_submit_request(struct i915_request *rq)
 {
        struct virtual_engine *ve = to_virtual_engine(rq->engine);
+       struct i915_request *old;
+       unsigned long flags;
 
        GEM_TRACE("%s: rq=%llx:%lld\n",
                  ve->base.name,
@@ -3227,15 +3630,31 @@ static void virtual_submit_request(struct i915_request *rq)
 
        GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
 
-       GEM_BUG_ON(ve->request);
-       GEM_BUG_ON(!list_empty(virtual_queue(ve)));
+       spin_lock_irqsave(&ve->base.active.lock, flags);
 
-       ve->base.execlists.queue_priority_hint = rq_prio(rq);
-       WRITE_ONCE(ve->request, rq);
+       old = ve->request;
+       if (old) { /* background completion event from preempt-to-busy */
+               GEM_BUG_ON(!i915_request_completed(old));
+               __i915_request_submit(old);
+               i915_request_put(old);
+       }
+
+       if (i915_request_completed(rq)) {
+               __i915_request_submit(rq);
+
+               ve->base.execlists.queue_priority_hint = INT_MIN;
+               ve->request = NULL;
+       } else {
+               ve->base.execlists.queue_priority_hint = rq_prio(rq);
+               ve->request = i915_request_get(rq);
 
-       list_move_tail(&rq->sched.link, virtual_queue(ve));
+               GEM_BUG_ON(!list_empty(virtual_queue(ve)));
+               list_move_tail(&rq->sched.link, virtual_queue(ve));
 
-       tasklet_schedule(&ve->base.execlists.tasklet);
+               tasklet_schedule(&ve->base.execlists.tasklet);
+       }
+
+       spin_unlock_irqrestore(&ve->base.active.lock, flags);
 }
 
 static struct ve_bond *
@@ -3256,18 +3675,22 @@ static void
 virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
 {
        struct virtual_engine *ve = to_virtual_engine(rq->engine);
+       intel_engine_mask_t allowed, exec;
        struct ve_bond *bond;
 
+       allowed = ~to_request(signal)->engine->mask;
+
        bond = virtual_find_bond(ve, to_request(signal)->engine);
-       if (bond) {
-               intel_engine_mask_t old, new, cmp;
+       if (bond)
+               allowed &= bond->sibling_mask;
 
-               cmp = READ_ONCE(rq->execution_mask);
-               do {
-                       old = cmp;
-                       new = cmp & bond->sibling_mask;
-               } while ((cmp = cmpxchg(&rq->execution_mask, old, new)) != old);
-       }
+       /* Restrict the bonded request to run on only the available engines */
+       exec = READ_ONCE(rq->execution_mask);
+       while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed))
+               ;
+
+       /* Prevent the master from being re-run on the bonded engines */
+       to_request(signal)->execution_mask &= ~allowed;
 }
 
 struct intel_context *
@@ -3290,11 +3713,11 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
                return ERR_PTR(-ENOMEM);
 
        ve->base.i915 = ctx->i915;
+       ve->base.gt = siblings[0]->gt;
        ve->base.id = -1;
        ve->base.class = OTHER_CLASS;
        ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
        ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
-       ve->base.flags = I915_ENGINE_IS_VIRTUAL;
 
        /*
         * The decision on whether to submit a request using semaphores
@@ -3391,8 +3814,18 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
                ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
                ve->base.emit_fini_breadcrumb_dw =
                        sibling->emit_fini_breadcrumb_dw;
+
+               ve->base.flags = sibling->flags;
        }
 
+       ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
+
+       err = __execlists_context_alloc(&ve->context, siblings[0]);
+       if (err)
+               goto err_put;
+
+       __set_bit(CONTEXT_ALLOC_BIT, &ve->context.flags);
+
        return &ve->context;
 
 err_put: