drm/i915: Defer context state allocation for legacy ring submission

author Chris Wilson <chris@chris-wilson.co.uk>

Thu, 27 Apr 2017 10:46:51 +0000 (11:46 +0100)

committer Chris Wilson <chris@chris-wilson.co.uk>

Thu, 27 Apr 2017 11:22:13 +0000 (12:22 +0100)
author Chris Wilson <chris@chris-wilson.co.uk>
Thu, 27 Apr 2017 10:46:51 +0000 (11:46 +0100)
committer Chris Wilson <chris@chris-wilson.co.uk>
Thu, 27 Apr 2017 11:22:13 +0000 (12:22 +0100)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c

index 8bd0c49..d46a69d 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -151,45 +151,6 @@ void i915_gem_context_free(struct kref *ctx_ref)
         kfree(ctx);
  }
  
-static struct drm_i915_gem_object *
-alloc_context_obj(struct drm_i915_private *dev_priv, u64 size)
-{
-       struct drm_i915_gem_object *obj;
-       int ret;
-
-       lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
-       obj = i915_gem_object_create(dev_priv, size);
-       if (IS_ERR(obj))
-               return obj;
-
-       /*
-        * Try to make the context utilize L3 as well as LLC.
-        *
-        * On VLV we don't have L3 controls in the PTEs so we
-        * shouldn't touch the cache level, especially as that
-        * would make the object snooped which might have a
-        * negative performance impact.
-        *
-        * Snooping is required on non-llc platforms in execlist
-        * mode, but since all GGTT accesses use PAT entry 0 we
-        * get snooping anyway regardless of cache_level.
-        *
-        * This is only applicable for Ivy Bridge devices since
-        * later platforms don't have L3 control bits in the PTE.
-        */
-       if (IS_IVYBRIDGE(dev_priv)) {
-               ret = i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC);
-               /* Failure shouldn't ever happen this early */
-               if (WARN_ON(ret)) {
-                       i915_gem_object_put(obj);
-                       return ERR_PTR(ret);
-               }
-       }
-
-       return obj;
-}
-
  static void context_close(struct i915_gem_context *ctx)
  {
         i915_gem_context_set_closed(ctx);
@@ -266,26 +227,6 @@ __create_hw_context(struct drm_i915_private *dev_priv,
         list_add_tail(&ctx->link, &dev_priv->context_list);
         ctx->i915 = dev_priv;
  
-       if (dev_priv->hw_context_size) {
-               struct drm_i915_gem_object *obj;
-               struct i915_vma *vma;
-
-               obj = alloc_context_obj(dev_priv, dev_priv->hw_context_size);
-               if (IS_ERR(obj)) {
-                       ret = PTR_ERR(obj);
-                       goto err_out;
-               }
-
-               vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL);
-               if (IS_ERR(vma)) {
-                       i915_gem_object_put(obj);
-                       ret = PTR_ERR(vma);
-                       goto err_out;
-               }
-
-               ctx->engine[RCS].state = vma;
-       }
-
         /* Default context will never have a file_priv */
         ret = DEFAULT_CONTEXT_HANDLE;
         if (file_priv) {
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c

index 6836efb..61f6124 100644 (file)
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1437,6 +1437,44 @@ static int context_pin(struct i915_gem_context *ctx)
                             PIN_GLOBAL | PIN_HIGH);
  }
  
+static struct i915_vma *
+alloc_context_vma(struct intel_engine_cs *engine)
+{
+       struct drm_i915_private *i915 = engine->i915;
+       struct drm_i915_gem_object *obj;
+       struct i915_vma *vma;
+
+       obj = i915_gem_object_create(i915, i915->hw_context_size);
+       if (IS_ERR(obj))
+               return ERR_CAST(obj);
+
+       /*
+        * Try to make the context utilize L3 as well as LLC.
+        *
+        * On VLV we don't have L3 controls in the PTEs so we
+        * shouldn't touch the cache level, especially as that
+        * would make the object snooped which might have a
+        * negative performance impact.
+        *
+        * Snooping is required on non-llc platforms in execlist
+        * mode, but since all GGTT accesses use PAT entry 0 we
+        * get snooping anyway regardless of cache_level.
+        *
+        * This is only applicable for Ivy Bridge devices since
+        * later platforms don't have L3 control bits in the PTE.
+        */
+       if (IS_IVYBRIDGE(i915)) {
+               /* Ignore any error, regard it as a simple optimisation */
+               i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC);
+       }
+
+       vma = i915_vma_instance(obj, &i915->ggtt.base, NULL);
+       if (IS_ERR(vma))
+               i915_gem_object_put(obj);
+
+       return vma;
+}
+
  static int intel_ring_context_pin(struct intel_engine_cs *engine,
                                   struct i915_gem_context *ctx)
  {
@@ -1449,6 +1487,18 @@ static int intel_ring_context_pin(struct intel_engine_cs *engine,
                 return 0;
         GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
  
+       if (engine->id == RCS && !ce->state && engine->i915->hw_context_size) {
+               struct i915_vma *vma;
+
+               vma = alloc_context_vma(engine);
+               if (IS_ERR(vma)) {
+                       ret = PTR_ERR(vma);
+                       goto error;
+               }
+
+               ce->state = vma;
+       }
+
         if (ce->state) {
                 ret = context_pin(ctx);
                 if (ret)
author	Chris Wilson <chris@chris-wilson.co.uk>
	Thu, 27 Apr 2017 10:46:51 +0000 (11:46 +0100)
committer	Chris Wilson <chris@chris-wilson.co.uk>
	Thu, 27 Apr 2017 11:22:13 +0000 (12:22 +0100)
drivers/gpu/drm/i915/i915_gem_context.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_ringbuffer.c		patch \| blob \| history