drm/i915/gt: Setup a default migration context on the GT
authorChris Wilson <chris@chris-wilson.co.uk>
Thu, 17 Jun 2021 06:30:15 +0000 (08:30 +0200)
committerMatthew Auld <matthew.auld@intel.com>
Thu, 17 Jun 2021 13:23:11 +0000 (14:23 +0100)
Set up a default migration context on the GT and use it from the
selftests.
Add a perf selftest and make sure we exercise LMEM if available.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Co-developed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210617063018.92802-10-thomas.hellstrom@linux.intel.com
drivers/gpu/drm/i915/gt/intel_gt.c
drivers/gpu/drm/i915/gt/intel_gt_types.h
drivers/gpu/drm/i915/gt/intel_migrate.c
drivers/gpu/drm/i915/gt/selftest_migrate.c
drivers/gpu/drm/i915/selftests/i915_perf_selftests.h

index 2161bf0..67ef057 100644 (file)
@@ -13,6 +13,7 @@
 #include "intel_gt_clock_utils.h"
 #include "intel_gt_pm.h"
 #include "intel_gt_requests.h"
+#include "intel_migrate.h"
 #include "intel_mocs.h"
 #include "intel_rc6.h"
 #include "intel_renderstate.h"
@@ -626,6 +627,8 @@ int intel_gt_init(struct intel_gt *gt)
        if (err)
                goto err_gt;
 
+       intel_migrate_init(&gt->migrate, gt);
+
        goto out_fw;
 err_gt:
        __intel_gt_disable(gt);
@@ -649,6 +652,7 @@ void intel_gt_driver_remove(struct intel_gt *gt)
 {
        __intel_gt_disable(gt);
 
+       intel_migrate_fini(&gt->migrate);
        intel_uc_driver_remove(&gt->uc);
 
        intel_engines_release(gt);
index fecfacf..7450935 100644 (file)
@@ -24,6 +24,7 @@
 #include "intel_reset_types.h"
 #include "intel_rc6_types.h"
 #include "intel_rps_types.h"
+#include "intel_migrate_types.h"
 #include "intel_wakeref.h"
 
 struct drm_i915_private;
@@ -145,6 +146,8 @@ struct intel_gt {
 
        struct i915_vma *scratch;
 
+       struct intel_migrate migrate;
+
        struct intel_gt_info {
                intel_engine_mask_t engine_mask;
                u8 num_engines;
index ba40091..23c59ce 100644 (file)
@@ -418,6 +418,7 @@ intel_context_migrate_copy(struct intel_context *ce,
        struct i915_request *rq;
        int err;
 
+       GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm);
        *out = NULL;
 
        GEM_BUG_ON(ce->ring->size < SZ_64K);
@@ -536,6 +537,7 @@ intel_context_migrate_clear(struct intel_context *ce,
        struct i915_request *rq;
        int err;
 
+       GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm);
        *out = NULL;
 
        GEM_BUG_ON(ce->ring->size < SZ_64K);
index 159c865..12ef283 100644 (file)
@@ -3,6 +3,8 @@
  * Copyright © 2020 Intel Corporation
  */
 
+#include <linux/sort.h>
+
 #include "selftests/i915_random.h"
 
 static const unsigned int sizes[] = {
@@ -18,13 +20,11 @@ static const unsigned int sizes[] = {
 static struct drm_i915_gem_object *
 create_lmem_or_internal(struct drm_i915_private *i915, size_t size)
 {
-       if (HAS_LMEM(i915)) {
-               struct drm_i915_gem_object *obj;
+       struct drm_i915_gem_object *obj;
 
-               obj = i915_gem_object_create_lmem(i915, size, 0);
-               if (!IS_ERR(obj))
-                       return obj;
-       }
+       obj = i915_gem_object_create_lmem(i915, size, 0);
+       if (!IS_ERR(obj))
+               return obj;
 
        return i915_gem_object_create_internal(i915, size);
 }
@@ -441,14 +441,229 @@ int intel_migrate_live_selftests(struct drm_i915_private *i915)
                SUBTEST(thread_global_copy),
                SUBTEST(thread_global_clear),
        };
-       struct intel_migrate m;
+       struct intel_gt *gt = &i915->gt;
+
+       if (!gt->migrate.context)
+               return 0;
+
+       return i915_subtests(tests, &gt->migrate);
+}
+
+static struct drm_i915_gem_object *
+create_init_lmem_internal(struct intel_gt *gt, size_t sz, bool try_lmem)
+{
+       struct drm_i915_gem_object *obj = NULL;
        int err;
 
-       if (intel_migrate_init(&m, &i915->gt))
+       if (try_lmem)
+               obj = i915_gem_object_create_lmem(gt->i915, sz, 0);
+
+       if (IS_ERR_OR_NULL(obj)) {
+               obj = i915_gem_object_create_internal(gt->i915, sz);
+               if (IS_ERR(obj))
+                       return obj;
+       }
+
+       i915_gem_object_trylock(obj);
+       err = i915_gem_object_pin_pages(obj);
+       if (err) {
+               i915_gem_object_unlock(obj);
+               i915_gem_object_put(obj);
+               return ERR_PTR(err);
+       }
+
+       return obj;
+}
+
+static int wrap_ktime_compare(const void *A, const void *B)
+{
+       const ktime_t *a = A, *b = B;
+
+       return ktime_compare(*a, *b);
+}
+
+static int __perf_clear_blt(struct intel_context *ce,
+                           struct scatterlist *sg,
+                           enum i915_cache_level cache_level,
+                           bool is_lmem,
+                           size_t sz)
+{
+       ktime_t t[5];
+       int pass;
+       int err = 0;
+
+       for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
+               struct i915_request *rq;
+               ktime_t t0, t1;
+
+               t0 = ktime_get();
+
+               err = intel_context_migrate_clear(ce, NULL, sg, cache_level,
+                                                 is_lmem, 0, &rq);
+               if (rq) {
+                       if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0)
+                               err = -EIO;
+                       i915_request_put(rq);
+               }
+               if (err)
+                       break;
+
+               t1 = ktime_get();
+               t[pass] = ktime_sub(t1, t0);
+       }
+       if (err)
+               return err;
+
+       sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
+       pr_info("%s: %zd KiB fill: %lld MiB/s\n",
+               ce->engine->name, sz >> 10,
+               div64_u64(mul_u32_u32(4 * sz,
+                                     1000 * 1000 * 1000),
+                         t[1] + 2 * t[2] + t[3]) >> 20);
+       return 0;
+}
+
+static int perf_clear_blt(void *arg)
+{
+       struct intel_gt *gt = arg;
+       static const unsigned long sizes[] = {
+               SZ_4K,
+               SZ_64K,
+               SZ_2M,
+               SZ_64M
+       };
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(sizes); i++) {
+               struct drm_i915_gem_object *dst;
+               int err;
+
+               dst = create_init_lmem_internal(gt, sizes[i], true);
+               if (IS_ERR(dst))
+                       return PTR_ERR(dst);
+
+               err = __perf_clear_blt(gt->migrate.context,
+                                      dst->mm.pages->sgl,
+                                      I915_CACHE_NONE,
+                                      i915_gem_object_is_lmem(dst),
+                                      sizes[i]);
+
+               i915_gem_object_unlock(dst);
+               i915_gem_object_put(dst);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+static int __perf_copy_blt(struct intel_context *ce,
+                          struct scatterlist *src,
+                          enum i915_cache_level src_cache_level,
+                          bool src_is_lmem,
+                          struct scatterlist *dst,
+                          enum i915_cache_level dst_cache_level,
+                          bool dst_is_lmem,
+                          size_t sz)
+{
+       ktime_t t[5];
+       int pass;
+       int err = 0;
+
+       for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
+               struct i915_request *rq;
+               ktime_t t0, t1;
+
+               t0 = ktime_get();
+
+               err = intel_context_migrate_copy(ce, NULL,
+                                                src, src_cache_level,
+                                                src_is_lmem,
+                                                dst, dst_cache_level,
+                                                dst_is_lmem,
+                                                &rq);
+               if (rq) {
+                       if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0)
+                               err = -EIO;
+                       i915_request_put(rq);
+               }
+               if (err)
+                       break;
+
+               t1 = ktime_get();
+               t[pass] = ktime_sub(t1, t0);
+       }
+       if (err)
+               return err;
+
+       sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
+       pr_info("%s: %zd KiB copy: %lld MiB/s\n",
+               ce->engine->name, sz >> 10,
+               div64_u64(mul_u32_u32(4 * sz,
+                                     1000 * 1000 * 1000),
+                         t[1] + 2 * t[2] + t[3]) >> 20);
+       return 0;
+}
+
+static int perf_copy_blt(void *arg)
+{
+       struct intel_gt *gt = arg;
+       static const unsigned long sizes[] = {
+               SZ_4K,
+               SZ_64K,
+               SZ_2M,
+               SZ_64M
+       };
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(sizes); i++) {
+               struct drm_i915_gem_object *src, *dst;
+               int err;
+
+               src = create_init_lmem_internal(gt, sizes[i], true);
+               if (IS_ERR(src))
+                       return PTR_ERR(src);
+
+               dst = create_init_lmem_internal(gt, sizes[i], false);
+               if (IS_ERR(dst)) {
+                       err = PTR_ERR(dst);
+                       goto err_src;
+               }
+
+               err = __perf_copy_blt(gt->migrate.context,
+                                     src->mm.pages->sgl,
+                                     I915_CACHE_NONE,
+                                     i915_gem_object_is_lmem(src),
+                                     dst->mm.pages->sgl,
+                                     I915_CACHE_NONE,
+                                     i915_gem_object_is_lmem(dst),
+                                     sizes[i]);
+
+               i915_gem_object_unlock(dst);
+               i915_gem_object_put(dst);
+err_src:
+               i915_gem_object_unlock(src);
+               i915_gem_object_put(src);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+int intel_migrate_perf_selftests(struct drm_i915_private *i915)
+{
+       static const struct i915_subtest tests[] = {
+               SUBTEST(perf_clear_blt),
+               SUBTEST(perf_copy_blt),
+       };
+       struct intel_gt *gt = &i915->gt;
+
+       if (intel_gt_is_wedged(gt))
                return 0;
 
-       err = i915_subtests(tests, &m);
-       intel_migrate_fini(&m);
+       if (!gt->migrate.context)
+               return 0;
 
-       return err;
+       return intel_gt_live_subtests(tests, gt);
 }
index c2389f8..5077dc3 100644 (file)
@@ -17,5 +17,6 @@
  */
 selftest(engine_cs, intel_engine_cs_perf_selftests)
 selftest(request, i915_request_perf_selftests)
+selftest(migrate, intel_migrate_perf_selftests)
 selftest(blt, i915_gem_object_blt_perf_selftests)
 selftest(region, intel_memory_region_perf_selftests)