drm/i915: Only enqueue already completed requests

author Chris Wilson <chris@chris-wilson.co.uk>

Mon, 23 Sep 2019 11:00:55 +0000 (12:00 +0100)

committer Rodrigo Vivi <rodrigo.vivi@intel.com>

Wed, 9 Oct 2019 20:18:26 +0000 (13:18 -0700)
author Chris Wilson <chris@chris-wilson.co.uk>
Mon, 23 Sep 2019 11:00:55 +0000 (12:00 +0100)
committer Rodrigo Vivi <rodrigo.vivi@intel.com>
Wed, 9 Oct 2019 20:18:26 +0000 (13:18 -0700)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c

index 21f06d5..b952031 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -796,6 +796,17 @@ static bool can_merge_rq(const struct i915_request *prev,
         GEM_BUG_ON(prev == next);
         GEM_BUG_ON(!assert_priority_queue(prev, next));
  
+       /*
+        * We do not submit known completed requests. Therefore if the next
+        * request is already completed, we can pretend to merge it in
+        * with the previous context (and we will skip updating the ELSP
+        * and tracking). Thus hopefully keeping the ELSP full with active
+        * contexts, despite the best efforts of preempt-to-busy to confuse
+        * us.
+        */
+       if (i915_request_completed(next))
+               return true;
+
         if (!can_merge_ctx(prev->hw_context, next->hw_context))
                 return false;
  
@@ -1171,21 +1182,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                                 continue;
                         }
  
-                       if (i915_request_completed(rq)) {
-                               ve->request = NULL;
-                               ve->base.execlists.queue_priority_hint = INT_MIN;
-                               rb_erase_cached(rb, &execlists->virtual);
-                               RB_CLEAR_NODE(rb);
-
-                               rq->engine = engine;
-                               __i915_request_submit(rq);
-
-                               spin_unlock(&ve->base.active.lock);
-
-                               rb = rb_first_cached(&execlists->virtual);
-                               continue;
-                       }
-
                         if (last && !can_merge_rq(last, rq)) {
                                 spin_unlock(&ve->base.active.lock);
                                 return; /* leave this for another */
@@ -1236,11 +1232,23 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                                 GEM_BUG_ON(ve->siblings[0] != engine);
                         }
  
-                       __i915_request_submit(rq);
-                       if (!i915_request_completed(rq)) {
+                       if (__i915_request_submit(rq)) {
                                 submit = true;
                                 last = rq;
                         }
+
+                       /*
+                        * Hmm, we have a bunch of virtual engine requests,
+                        * but the first one was already completed (thanks
+                        * preempt-to-busy!). Keep looking at the veng queue
+                        * until we have no more relevant requests (i.e.
+                        * the normal submit queue has higher priority).
+                        */
+                       if (!submit) {
+                               spin_unlock(&ve->base.active.lock);
+                               rb = rb_first_cached(&execlists->virtual);
+                               continue;
+                       }
                 }
  
                 spin_unlock(&ve->base.active.lock);
@@ -1253,8 +1261,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                 int i;
  
                 priolist_for_each_request_consume(rq, rn, p, i) {
-                       if (i915_request_completed(rq))
-                               goto skip;
+                       bool merge = true;
  
                         /*
                          * Can we combine this request with the current port?
@@ -1295,14 +1302,23 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                                     ctx_single_port_submission(rq->hw_context))
                                         goto done;
  
-                               *port = execlists_schedule_in(last, port - execlists->pending);
-                               port++;
+                               merge = false;
                         }
  
-                       last = rq;
-                       submit = true;
-skip:
-                       __i915_request_submit(rq);
+                       if (__i915_request_submit(rq)) {
+                               if (!merge) {
+                                       *port = execlists_schedule_in(last, port - execlists->pending);
+                                       port++;
+                                       last = NULL;
+                               }
+
+                               GEM_BUG_ON(last &&
+                                          !can_merge_ctx(last->hw_context,
+                                                         rq->hw_context));
+
+                               submit = true;
+                               last = rq;
+                       }
                 }
  
                 rb_erase_cached(&p->node, &execlists->queue);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c

index 8f88851..1c55068 100644 (file)
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -377,9 +377,10 @@ __i915_request_await_execution(struct i915_request *rq,
         return 0;
  }
  
-void __i915_request_submit(struct i915_request *request)
+bool __i915_request_submit(struct i915_request *request)
  {
         struct intel_engine_cs *engine = request->engine;
+       bool result = false;
  
         GEM_TRACE("%s fence %llx:%lld, current %d\n",
                   engine->name,
@@ -389,6 +390,25 @@ void __i915_request_submit(struct i915_request *request)
         GEM_BUG_ON(!irqs_disabled());
         lockdep_assert_held(&engine->active.lock);
  
+       /*
+        * With the advent of preempt-to-busy, we frequently encounter
+        * requests that we have unsubmitted from HW, but left running
+        * until the next ack and so have completed in the meantime. On
+        * resubmission of that completed request, we can skip
+        * updating the payload, and execlists can even skip submitting
+        * the request.
+        *
+        * We must remove the request from the caller's priority queue,
+        * and the caller must only call us when the request is in their
+        * priority queue, under the active.lock. This ensures that the
+        * request has *not* yet been retired and we can safely move
+        * the request into the engine->active.list where it will be
+        * dropped upon retiring. (Otherwise if resubmit a *retired*
+        * request, this would be a horrible use-after-free.)
+        */
+       if (i915_request_completed(request))
+               goto xfer;
+
         if (i915_gem_context_is_banned(request->gem_context))
                 i915_request_skip(request, -EIO);
  
@@ -412,13 +432,18 @@ void __i915_request_submit(struct i915_request *request)
             i915_sw_fence_signaled(&request->semaphore))
                 engine->saturated |= request->sched.semaphores;
  
-       /* We may be recursing from the signal callback of another i915 fence */
-       spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
+       engine->emit_fini_breadcrumb(request,
+                                    request->ring->vaddr + request->postfix);
+
+       trace_i915_request_execute(request);
+       engine->serial++;
+       result = true;
  
-       list_move_tail(&request->sched.link, &engine->active.requests);
+xfer:  /* We may be recursing from the signal callback of another i915 fence */
+       spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
  
-       GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
-       set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
+       if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags))
+               list_move_tail(&request->sched.link, &engine->active.requests);
  
         if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
             !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
@@ -429,12 +454,7 @@ void __i915_request_submit(struct i915_request *request)
  
         spin_unlock(&request->lock);
  
-       engine->emit_fini_breadcrumb(request,
-                                    request->ring->vaddr + request->postfix);
-
-       engine->serial++;
-
-       trace_i915_request_execute(request);
+       return result;
  }
  
  void i915_request_submit(struct i915_request *request)
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h

index 8ac6e12..e4dd013 100644 (file)
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -292,7 +292,7 @@ int i915_request_await_execution(struct i915_request *rq,
  
  void i915_request_add(struct i915_request *rq);
  
-void __i915_request_submit(struct i915_request *request);
+bool __i915_request_submit(struct i915_request *request);
  void i915_request_submit(struct i915_request *request);
  
  void i915_request_skip(struct i915_request *request, int error);
author	Chris Wilson <chris@chris-wilson.co.uk>
	Mon, 23 Sep 2019 11:00:55 +0000 (12:00 +0100)
committer	Rodrigo Vivi <rodrigo.vivi@intel.com>
	Wed, 9 Oct 2019 20:18:26 +0000 (13:18 -0700)
drivers/gpu/drm/i915/gt/intel_lrc.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_request.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_request.h		patch \| blob \| history