drm/i915/gt: Pipelined clear
authorChris Wilson <chris@chris-wilson.co.uk>
Thu, 17 Jun 2021 06:30:14 +0000 (08:30 +0200)
committerMatthew Auld <matthew.auld@intel.com>
Thu, 17 Jun 2021 13:23:09 +0000 (14:23 +0100)
Update the PTE and emit a clear within a single unpreemptible packet
such that we can schedule and pipeline clears.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Co-developed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210617063018.92802-9-thomas.hellstrom@linux.intel.com
drivers/gpu/drm/i915/gt/intel_migrate.c
drivers/gpu/drm/i915/gt/intel_migrate.h
drivers/gpu/drm/i915/gt/selftest_migrate.c

index e2e8600..ba40091 100644 (file)
@@ -488,6 +488,114 @@ out_ce:
        return err;
 }
 
+static int emit_clear(struct i915_request *rq, int size, u32 value)
+{
+       const int gen = INTEL_GEN(rq->engine->i915);
+       u32 instance = rq->engine->instance;
+       u32 *cs;
+
+       GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
+
+       cs = intel_ring_begin(rq, gen >= 8 ? 8 : 6);
+       if (IS_ERR(cs))
+               return PTR_ERR(cs);
+
+       if (gen >= 8) {
+               *cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2);
+               *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
+               *cs++ = 0;
+               *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
+               *cs++ = 0; /* offset */
+               *cs++ = instance;
+               *cs++ = value;
+               *cs++ = MI_NOOP;
+       } else {
+               GEM_BUG_ON(instance);
+               *cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
+               *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
+               *cs++ = 0;
+               *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
+               *cs++ = 0;
+               *cs++ = value;
+       }
+
+       intel_ring_advance(rq, cs);
+       return 0;
+}
+
+int
+intel_context_migrate_clear(struct intel_context *ce,
+                           struct dma_fence *await,
+                           struct scatterlist *sg,
+                           enum i915_cache_level cache_level,
+                           bool is_lmem,
+                           u32 value,
+                           struct i915_request **out)
+{
+       struct sgt_dma it = sg_sgt(sg);
+       struct i915_request *rq;
+       int err;
+
+       *out = NULL;
+
+       GEM_BUG_ON(ce->ring->size < SZ_64K);
+
+       do {
+               int len;
+
+               rq = i915_request_create(ce);
+               if (IS_ERR(rq)) {
+                       err = PTR_ERR(rq);
+                       goto out_ce;
+               }
+
+               if (await) {
+                       err = i915_request_await_dma_fence(rq, await);
+                       if (err)
+                               goto out_rq;
+
+                       if (rq->engine->emit_init_breadcrumb) {
+                               err = rq->engine->emit_init_breadcrumb(rq);
+                               if (err)
+                                       goto out_rq;
+                       }
+
+                       await = NULL;
+               }
+
+               /* The PTE updates + clear must not be interrupted. */
+               err = emit_no_arbitration(rq);
+               if (err)
+                       goto out_rq;
+
+               len = emit_pte(rq, &it, cache_level, is_lmem, 0, CHUNK_SZ);
+               if (len <= 0) {
+                       err = len;
+                       goto out_rq;
+               }
+
+               err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+               if (err)
+                       goto out_rq;
+
+               err = emit_clear(rq, len, value);
+
+               /* Arbitration is re-enabled between requests. */
+out_rq:
+               if (*out)
+                       i915_request_put(*out);
+               *out = i915_request_get(rq);
+               i915_request_add(rq);
+               if (err || !it.sg || !sg_dma_len(it.sg))
+                       break;
+
+               cond_resched();
+       } while (1);
+
+out_ce:
+       return err;
+}
+
 int intel_migrate_copy(struct intel_migrate *m,
                       struct i915_gem_ww_ctx *ww,
                       struct dma_fence *await,
@@ -526,6 +634,41 @@ out:
        return err;
 }
 
+int
+intel_migrate_clear(struct intel_migrate *m,
+                   struct i915_gem_ww_ctx *ww,
+                   struct dma_fence *await,
+                   struct scatterlist *sg,
+                   enum i915_cache_level cache_level,
+                   bool is_lmem,
+                   u32 value,
+                   struct i915_request **out)
+{
+       struct intel_context *ce;
+       int err;
+
+       *out = NULL;
+       if (!m->context)
+               return -ENODEV;
+
+       ce = intel_migrate_create_context(m);
+       if (IS_ERR(ce))
+               ce = intel_context_get(m->context);
+       GEM_BUG_ON(IS_ERR(ce));
+
+       err = intel_context_pin_ww(ce, ww);
+       if (err)
+               goto out;
+
+       err = intel_context_migrate_clear(ce, await, sg, cache_level,
+                                         is_lmem, value, out);
+
+       intel_context_unpin(ce);
+out:
+       intel_context_put(ce);
+       return err;
+}
+
 void intel_migrate_fini(struct intel_migrate *m)
 {
        struct intel_context *ce;
index 32c6119..4e18e75 100644 (file)
@@ -6,6 +6,8 @@
 #ifndef __INTEL_MIGRATE__
 #define __INTEL_MIGRATE__
 
+#include <linux/types.h>
+
 #include "intel_migrate_types.h"
 
 struct dma_fence;
@@ -40,6 +42,24 @@ int intel_context_migrate_copy(struct intel_context *ce,
                               bool dst_is_lmem,
                               struct i915_request **out);
 
+int
+intel_migrate_clear(struct intel_migrate *m,
+                   struct i915_gem_ww_ctx *ww,
+                   struct dma_fence *await,
+                   struct scatterlist *sg,
+                   enum i915_cache_level cache_level,
+                   bool is_lmem,
+                   u32 value,
+                   struct i915_request **out);
+int
+intel_context_migrate_clear(struct intel_context *ce,
+                           struct dma_fence *await,
+                           struct scatterlist *sg,
+                           enum i915_cache_level cache_level,
+                           bool is_lmem,
+                           u32 value,
+                           struct i915_request **out);
+
 void intel_migrate_fini(struct intel_migrate *m);
 
 #endif /* __INTEL_MIGRATE__ */
index 9784d14..159c865 100644 (file)
@@ -129,6 +129,82 @@ err_free_src:
        return err;
 }
 
+static int clear(struct intel_migrate *migrate,
+                int (*fn)(struct intel_migrate *migrate,
+                          struct i915_gem_ww_ctx *ww,
+                          struct drm_i915_gem_object *obj,
+                          u32 value,
+                          struct i915_request **out),
+                u32 sz, struct rnd_state *prng)
+{
+       struct drm_i915_private *i915 = migrate->context->engine->i915;
+       struct drm_i915_gem_object *obj;
+       struct i915_request *rq;
+       struct i915_gem_ww_ctx ww;
+       u32 *vaddr;
+       int err = 0;
+       int i;
+
+       obj = create_lmem_or_internal(i915, sz);
+       if (IS_ERR(obj))
+               return 0;
+
+       for_i915_gem_ww(&ww, err, true) {
+               err = i915_gem_object_lock(obj, &ww);
+               if (err)
+                       continue;
+
+               vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
+               if (IS_ERR(vaddr)) {
+                       err = PTR_ERR(vaddr);
+                       continue;
+               }
+
+               for (i = 0; i < sz / sizeof(u32); i++)
+                       vaddr[i] = ~i;
+               i915_gem_object_flush_map(obj);
+
+               err = fn(migrate, &ww, obj, sz, &rq);
+               if (!err)
+                       continue;
+
+               if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS)
+                       pr_err("%ps failed, size: %u\n", fn, sz);
+               if (rq) {
+                       i915_request_wait(rq, 0, HZ);
+                       i915_request_put(rq);
+               }
+               i915_gem_object_unpin_map(obj);
+       }
+       if (err)
+               goto err_out;
+
+       if (rq) {
+               if (i915_request_wait(rq, 0, HZ) < 0) {
+                       pr_err("%ps timed out, size: %u\n", fn, sz);
+                       err = -ETIME;
+               }
+               i915_request_put(rq);
+       }
+
+       for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
+               int x = i * 1024 + i915_prandom_u32_max_state(1024, prng);
+
+               if (vaddr[x] != sz) {
+                       pr_err("%ps failed, size: %u, offset: %zu\n",
+                              fn, sz, x * sizeof(u32));
+                       igt_hexdump(vaddr + i * 1024, 4096);
+                       err = -EINVAL;
+               }
+       }
+
+       i915_gem_object_unpin_map(obj);
+err_out:
+       i915_gem_object_put(obj);
+
+       return err;
+}
+
 static int __migrate_copy(struct intel_migrate *migrate,
                          struct i915_gem_ww_ctx *ww,
                          struct drm_i915_gem_object *src,
@@ -169,6 +245,44 @@ global_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
        return copy(migrate, __global_copy, sz, prng);
 }
 
+static int __migrate_clear(struct intel_migrate *migrate,
+                          struct i915_gem_ww_ctx *ww,
+                          struct drm_i915_gem_object *obj,
+                          u32 value,
+                          struct i915_request **out)
+{
+       return intel_migrate_clear(migrate, ww, NULL,
+                                  obj->mm.pages->sgl,
+                                  obj->cache_level,
+                                  i915_gem_object_is_lmem(obj),
+                                  value, out);
+}
+
+static int __global_clear(struct intel_migrate *migrate,
+                         struct i915_gem_ww_ctx *ww,
+                         struct drm_i915_gem_object *obj,
+                         u32 value,
+                         struct i915_request **out)
+{
+       return intel_context_migrate_clear(migrate->context, NULL,
+                                          obj->mm.pages->sgl,
+                                          obj->cache_level,
+                                          i915_gem_object_is_lmem(obj),
+                                          value, out);
+}
+
+static int
+migrate_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
+{
+       return clear(migrate, __migrate_clear, sz, prng);
+}
+
+static int
+global_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
+{
+       return clear(migrate, __global_clear, sz, prng);
+}
+
 static int live_migrate_copy(void *arg)
 {
        struct intel_migrate *migrate = arg;
@@ -190,6 +304,28 @@ static int live_migrate_copy(void *arg)
        return 0;
 }
 
+static int live_migrate_clear(void *arg)
+{
+       struct intel_migrate *migrate = arg;
+       struct drm_i915_private *i915 = migrate->context->engine->i915;
+       I915_RND_STATE(prng);
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(sizes); i++) {
+               int err;
+
+               err = migrate_clear(migrate, sizes[i], &prng);
+               if (err == 0)
+                       err = global_clear(migrate, sizes[i], &prng);
+
+               i915_gem_drain_freed_objects(i915);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
 struct threaded_migrate {
        struct intel_migrate *migrate;
        struct task_struct *tsk;
@@ -271,12 +407,39 @@ static int thread_global_copy(void *arg)
        return threaded_migrate(arg, __thread_global_copy, 0);
 }
 
+static int __thread_migrate_clear(void *arg)
+{
+       struct threaded_migrate *tm = arg;
+
+       return migrate_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
+}
+
+static int __thread_global_clear(void *arg)
+{
+       struct threaded_migrate *tm = arg;
+
+       return global_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
+}
+
+static int thread_migrate_clear(void *arg)
+{
+       return threaded_migrate(arg, __thread_migrate_clear, 0);
+}
+
+static int thread_global_clear(void *arg)
+{
+       return threaded_migrate(arg, __thread_global_clear, 0);
+}
+
 int intel_migrate_live_selftests(struct drm_i915_private *i915)
 {
        static const struct i915_subtest tests[] = {
                SUBTEST(live_migrate_copy),
+               SUBTEST(live_migrate_clear),
                SUBTEST(thread_migrate_copy),
+               SUBTEST(thread_migrate_clear),
                SUBTEST(thread_global_copy),
+               SUBTEST(thread_global_clear),
        };
        struct intel_migrate m;
        int err;