drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c

   1 // SPDX-License-Identifier: MIT
   2 /*
   3  * Copyright © 2019 Intel Corporation
   4  */
   5
   6 #include <linux/sort.h>
   7
   8 #include "gt/intel_gt.h"
   9 #include "gt/intel_engine_user.h"
  10
  11 #include "i915_selftest.h"
  12
  13 #include "gem/i915_gem_context.h"
  14 #include "selftests/igt_flush_test.h"
  15 #include "selftests/i915_random.h"
  16 #include "selftests/mock_drm.h"
  17 #include "huge_gem_object.h"
  18 #include "mock_context.h"
  19
  20 static int wrap_ktime_compare(const void *A, const void *B)
  21 {
  22         const ktime_t *a = A, *b = B;
  23
  24         return ktime_compare(*a, *b);
  25 }
  26
  27 static int __perf_fill_blt(struct drm_i915_gem_object *obj)
  28 {
  29         struct drm_i915_private *i915 = to_i915(obj->base.dev);
  30         int inst = 0;
  31
  32         do {
  33                 struct intel_engine_cs *engine;
  34                 ktime_t t[5];
  35                 int pass;
  36                 int err;
  37
  38                 engine = intel_engine_lookup_user(i915,
  39                                                   I915_ENGINE_CLASS_COPY,
  40                                                   inst++);
  41                 if (!engine)
  42                         return 0;
  43
  44                 intel_engine_pm_get(engine);
  45                 for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
  46                         struct intel_context *ce = engine->kernel_context;
  47                         ktime_t t0, t1;
  48
  49                         t0 = ktime_get();
  50
  51                         err = i915_gem_object_fill_blt(obj, ce, 0);
  52                         if (err)
  53                                 break;
  54
  55                         err = i915_gem_object_wait(obj,
  56                                                    I915_WAIT_ALL,
  57                                                    MAX_SCHEDULE_TIMEOUT);
  58                         if (err)
  59                                 break;
  60
  61                         t1 = ktime_get();
  62                         t[pass] = ktime_sub(t1, t0);
  63                 }
  64                 intel_engine_pm_put(engine);
  65                 if (err)
  66                         return err;
  67
  68                 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
  69                 pr_info("%s: blt %zd KiB fill: %lld MiB/s\n",
  70                         engine->name,
  71                         obj->base.size >> 10,
  72                         div64_u64(mul_u32_u32(4 * obj->base.size,
  73                                               1000 * 1000 * 1000),
  74                                   t[1] + 2 * t[2] + t[3]) >> 20);
  75         } while (1);
  76 }
  77
  78 static int perf_fill_blt(void *arg)
  79 {
  80         struct drm_i915_private *i915 = arg;
  81         static const unsigned long sizes[] = {
  82                 SZ_4K,
  83                 SZ_64K,
  84                 SZ_2M,
  85                 SZ_64M
  86         };
  87         int i;
  88
  89         for (i = 0; i < ARRAY_SIZE(sizes); i++) {
  90                 struct drm_i915_gem_object *obj;
  91                 int err;
  92
  93                 obj = i915_gem_object_create_internal(i915, sizes[i]);
  94                 if (IS_ERR(obj))
  95                         return PTR_ERR(obj);
  96
  97                 err = __perf_fill_blt(obj);
  98                 i915_gem_object_put(obj);
  99                 if (err)
 100                         return err;
 101         }
 102
 103         return 0;
 104 }
 105
 106 static int __perf_copy_blt(struct drm_i915_gem_object *src,
 107                            struct drm_i915_gem_object *dst)
 108 {
 109         struct drm_i915_private *i915 = to_i915(src->base.dev);
 110         int inst = 0;
 111
 112         do {
 113                 struct intel_engine_cs *engine;
 114                 ktime_t t[5];
 115                 int pass;
 116                 int err = 0;
 117
 118                 engine = intel_engine_lookup_user(i915,
 119                                                   I915_ENGINE_CLASS_COPY,
 120                                                   inst++);
 121                 if (!engine)
 122                         return 0;
 123
 124                 intel_engine_pm_get(engine);
 125                 for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
 126                         struct intel_context *ce = engine->kernel_context;
 127                         ktime_t t0, t1;
 128
 129                         t0 = ktime_get();
 130
 131                         err = i915_gem_object_copy_blt(src, dst, ce);
 132                         if (err)
 133                                 break;
 134
 135                         err = i915_gem_object_wait(dst,
 136                                                    I915_WAIT_ALL,
 137                                                    MAX_SCHEDULE_TIMEOUT);
 138                         if (err)
 139                                 break;
 140
 141                         t1 = ktime_get();
 142                         t[pass] = ktime_sub(t1, t0);
 143                 }
 144                 intel_engine_pm_put(engine);
 145                 if (err)
 146                         return err;
 147
 148                 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
 149                 pr_info("%s: blt %zd KiB copy: %lld MiB/s\n",
 150                         engine->name,
 151                         src->base.size >> 10,
 152                         div64_u64(mul_u32_u32(4 * src->base.size,
 153                                               1000 * 1000 * 1000),
 154                                   t[1] + 2 * t[2] + t[3]) >> 20);
 155         } while (1);
 156 }
 157
 158 static int perf_copy_blt(void *arg)
 159 {
 160         struct drm_i915_private *i915 = arg;
 161         static const unsigned long sizes[] = {
 162                 SZ_4K,
 163                 SZ_64K,
 164                 SZ_2M,
 165                 SZ_64M
 166         };
 167         int i;
 168
 169         for (i = 0; i < ARRAY_SIZE(sizes); i++) {
 170                 struct drm_i915_gem_object *src, *dst;
 171                 int err;
 172
 173                 src = i915_gem_object_create_internal(i915, sizes[i]);
 174                 if (IS_ERR(src))
 175                         return PTR_ERR(src);
 176
 177                 dst = i915_gem_object_create_internal(i915, sizes[i]);
 178                 if (IS_ERR(dst)) {
 179                         err = PTR_ERR(dst);
 180                         goto err_src;
 181                 }
 182
 183                 err = __perf_copy_blt(src, dst);
 184
 185                 i915_gem_object_put(dst);
 186 err_src:
 187                 i915_gem_object_put(src);
 188                 if (err)
 189                         return err;
 190         }
 191
 192         return 0;
 193 }
 194
 195 struct igt_thread_arg {
 196         struct intel_engine_cs *engine;
 197         struct i915_gem_context *ctx;
 198         struct file *file;
 199         struct rnd_state prng;
 200         unsigned int n_cpus;
 201 };
 202
 203 static int igt_fill_blt_thread(void *arg)
 204 {
 205         struct igt_thread_arg *thread = arg;
 206         struct intel_engine_cs *engine = thread->engine;
 207         struct rnd_state *prng = &thread->prng;
 208         struct drm_i915_gem_object *obj;
 209         struct i915_gem_context *ctx;
 210         struct intel_context *ce;
 211         unsigned int prio;
 212         IGT_TIMEOUT(end);
 213         u64 total, max;
 214         int err;
 215
 216         ctx = thread->ctx;
 217         if (!ctx) {
 218                 ctx = live_context_for_engine(engine, thread->file);
 219                 if (IS_ERR(ctx))
 220                         return PTR_ERR(ctx);
 221
 222                 prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng);
 223                 ctx->sched.priority = prio;
 224         }
 225
 226         ce = i915_gem_context_get_engine(ctx, 0);
 227         GEM_BUG_ON(IS_ERR(ce));
 228
 229         /*
 230          * If we have a tiny shared address space, like for the GGTT
 231          * then we can't be too greedy.
 232          */
 233         max = ce->vm->total;
 234         if (i915_is_ggtt(ce->vm) || thread->ctx)
 235                 max = div_u64(max, thread->n_cpus);
 236         max >>= 4;
 237
 238         total = PAGE_SIZE;
 239         do {
 240                 /* Aim to keep the runtime under reasonable bounds! */
 241                 const u32 max_phys_size = SZ_64K;
 242                 u32 val = prandom_u32_state(prng);
 243                 u32 phys_sz;
 244                 u32 sz;
 245                 u32 *vaddr;
 246                 u32 i;
 247
 248                 total = min(total, max);
 249                 sz = i915_prandom_u32_max_state(total, prng) + 1;
 250                 phys_sz = sz % max_phys_size + 1;
 251
 252                 sz = round_up(sz, PAGE_SIZE);
 253                 phys_sz = round_up(phys_sz, PAGE_SIZE);
 254                 phys_sz = min(phys_sz, sz);
 255
 256                 pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
 257                          phys_sz, sz, val);
 258
 259                 obj = huge_gem_object(engine->i915, phys_sz, sz);
 260                 if (IS_ERR(obj)) {
 261                         err = PTR_ERR(obj);
 262                         goto err_flush;
 263                 }
 264
 265                 vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
 266                 if (IS_ERR(vaddr)) {
 267                         err = PTR_ERR(vaddr);
 268                         goto err_put;
 269                 }
 270
 271                 /*
 272                  * Make sure the potentially async clflush does its job, if
 273                  * required.
 274                  */
 275                 memset32(vaddr, val ^ 0xdeadbeaf,
 276                          huge_gem_object_phys_size(obj) / sizeof(u32));
 277
 278                 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
 279                         obj->cache_dirty = true;
 280
 281                 err = i915_gem_object_fill_blt(obj, ce, val);
 282                 if (err)
 283                         goto err_unpin;
 284
 285                 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
 286                 if (err)
 287                         goto err_unpin;
 288
 289                 for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); i += 17) {
 290                         if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
 291                                 drm_clflush_virt_range(&vaddr[i], sizeof(vaddr[i]));
 292
 293                         if (vaddr[i] != val) {
 294                                 pr_err("vaddr[%u]=%x, expected=%x\n", i,
 295                                        vaddr[i], val);
 296                                 err = -EINVAL;
 297                                 goto err_unpin;
 298                         }
 299                 }
 300
 301                 i915_gem_object_unpin_map(obj);
 302                 i915_gem_object_put(obj);
 303
 304                 total <<= 1;
 305         } while (!time_after(jiffies, end));
 306
 307         goto err_flush;
 308
 309 err_unpin:
 310         i915_gem_object_unpin_map(obj);
 311 err_put:
 312         i915_gem_object_put(obj);
 313 err_flush:
 314         if (err == -ENOMEM)
 315                 err = 0;
 316
 317         intel_context_put(ce);
 318         return err;
 319 }
 320
 321 static int igt_copy_blt_thread(void *arg)
 322 {
 323         struct igt_thread_arg *thread = arg;
 324         struct intel_engine_cs *engine = thread->engine;
 325         struct rnd_state *prng = &thread->prng;
 326         struct drm_i915_gem_object *src, *dst;
 327         struct i915_gem_context *ctx;
 328         struct intel_context *ce;
 329         unsigned int prio;
 330         IGT_TIMEOUT(end);
 331         u64 total, max;
 332         int err;
 333
 334         ctx = thread->ctx;
 335         if (!ctx) {
 336                 ctx = live_context_for_engine(engine, thread->file);
 337                 if (IS_ERR(ctx))
 338                         return PTR_ERR(ctx);
 339
 340                 prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng);
 341                 ctx->sched.priority = prio;
 342         }
 343
 344         ce = i915_gem_context_get_engine(ctx, 0);
 345         GEM_BUG_ON(IS_ERR(ce));
 346
 347         /*
 348          * If we have a tiny shared address space, like for the GGTT
 349          * then we can't be too greedy.
 350          */
 351         max = ce->vm->total;
 352         if (i915_is_ggtt(ce->vm) || thread->ctx)
 353                 max = div_u64(max, thread->n_cpus);
 354         max >>= 4;
 355
 356         total = PAGE_SIZE;
 357         do {
 358                 /* Aim to keep the runtime under reasonable bounds! */
 359                 const u32 max_phys_size = SZ_64K;
 360                 u32 val = prandom_u32_state(prng);
 361                 u32 phys_sz;
 362                 u32 sz;
 363                 u32 *vaddr;
 364                 u32 i;
 365
 366                 total = min(total, max);
 367                 sz = i915_prandom_u32_max_state(total, prng) + 1;
 368                 phys_sz = sz % max_phys_size + 1;
 369
 370                 sz = round_up(sz, PAGE_SIZE);
 371                 phys_sz = round_up(phys_sz, PAGE_SIZE);
 372                 phys_sz = min(phys_sz, sz);
 373
 374                 pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
 375                          phys_sz, sz, val);
 376
 377                 src = huge_gem_object(engine->i915, phys_sz, sz);
 378                 if (IS_ERR(src)) {
 379                         err = PTR_ERR(src);
 380                         goto err_flush;
 381                 }
 382
 383                 vaddr = i915_gem_object_pin_map_unlocked(src, I915_MAP_WB);
 384                 if (IS_ERR(vaddr)) {
 385                         err = PTR_ERR(vaddr);
 386                         goto err_put_src;
 387                 }
 388
 389                 memset32(vaddr, val,
 390                          huge_gem_object_phys_size(src) / sizeof(u32));
 391
 392                 i915_gem_object_unpin_map(src);
 393
 394                 if (!(src->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
 395                         src->cache_dirty = true;
 396
 397                 dst = huge_gem_object(engine->i915, phys_sz, sz);
 398                 if (IS_ERR(dst)) {
 399                         err = PTR_ERR(dst);
 400                         goto err_put_src;
 401                 }
 402
 403                 vaddr = i915_gem_object_pin_map_unlocked(dst, I915_MAP_WB);
 404                 if (IS_ERR(vaddr)) {
 405                         err = PTR_ERR(vaddr);
 406                         goto err_put_dst;
 407                 }
 408
 409                 memset32(vaddr, val ^ 0xdeadbeaf,
 410                          huge_gem_object_phys_size(dst) / sizeof(u32));
 411
 412                 if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
 413                         dst->cache_dirty = true;
 414
 415                 err = i915_gem_object_copy_blt(src, dst, ce);
 416                 if (err)
 417                         goto err_unpin;
 418
 419                 err = i915_gem_object_wait(dst, 0, MAX_SCHEDULE_TIMEOUT);
 420                 if (err)
 421                         goto err_unpin;
 422
 423                 for (i = 0; i < huge_gem_object_phys_size(dst) / sizeof(u32); i += 17) {
 424                         if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
 425                                 drm_clflush_virt_range(&vaddr[i], sizeof(vaddr[i]));
 426
 427                         if (vaddr[i] != val) {
 428                                 pr_err("vaddr[%u]=%x, expected=%x\n", i,
 429                                        vaddr[i], val);
 430                                 err = -EINVAL;
 431                                 goto err_unpin;
 432                         }
 433                 }
 434
 435                 i915_gem_object_unpin_map(dst);
 436
 437                 i915_gem_object_put(src);
 438                 i915_gem_object_put(dst);
 439
 440                 total <<= 1;
 441         } while (!time_after(jiffies, end));
 442
 443         goto err_flush;
 444
 445 err_unpin:
 446         i915_gem_object_unpin_map(dst);
 447 err_put_dst:
 448         i915_gem_object_put(dst);
 449 err_put_src:
 450         i915_gem_object_put(src);
 451 err_flush:
 452         if (err == -ENOMEM)
 453                 err = 0;
 454
 455         intel_context_put(ce);
 456         return err;
 457 }
 458
 459 static int igt_threaded_blt(struct intel_engine_cs *engine,
 460                             int (*blt_fn)(void *arg),
 461                             unsigned int flags)
 462 #define SINGLE_CTX BIT(0)
 463 {
 464         struct igt_thread_arg *thread;
 465         struct task_struct **tsk;
 466         unsigned int n_cpus, i;
 467         I915_RND_STATE(prng);
 468         int err = 0;
 469
 470         n_cpus = num_online_cpus() + 1;
 471
 472         tsk = kcalloc(n_cpus, sizeof(struct task_struct *), GFP_KERNEL);
 473         if (!tsk)
 474                 return 0;
 475
 476         thread = kcalloc(n_cpus, sizeof(struct igt_thread_arg), GFP_KERNEL);
 477         if (!thread)
 478                 goto out_tsk;
 479
 480         thread[0].file = mock_file(engine->i915);
 481         if (IS_ERR(thread[0].file)) {
 482                 err = PTR_ERR(thread[0].file);
 483                 goto out_thread;
 484         }
 485
 486         if (flags & SINGLE_CTX) {
 487                 thread[0].ctx = live_context_for_engine(engine, thread[0].file);
 488                 if (IS_ERR(thread[0].ctx)) {
 489                         err = PTR_ERR(thread[0].ctx);
 490                         goto out_file;
 491                 }
 492         }
 493
 494         for (i = 0; i < n_cpus; ++i) {
 495                 thread[i].engine = engine;
 496                 thread[i].file = thread[0].file;
 497                 thread[i].ctx = thread[0].ctx;
 498                 thread[i].n_cpus = n_cpus;
 499                 thread[i].prng =
 500                         I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng));
 501
 502                 tsk[i] = kthread_run(blt_fn, &thread[i], "igt/blt-%d", i);
 503                 if (IS_ERR(tsk[i])) {
 504                         err = PTR_ERR(tsk[i]);
 505                         break;
 506                 }
 507
 508                 get_task_struct(tsk[i]);
 509         }
 510
 511         yield(); /* start all threads before we kthread_stop() */
 512
 513         for (i = 0; i < n_cpus; ++i) {
 514                 int status;
 515
 516                 if (IS_ERR_OR_NULL(tsk[i]))
 517                         continue;
 518
 519                 status = kthread_stop(tsk[i]);
 520                 if (status && !err)
 521                         err = status;
 522
 523                 put_task_struct(tsk[i]);
 524         }
 525
 526 out_file:
 527         fput(thread[0].file);
 528 out_thread:
 529         kfree(thread);
 530 out_tsk:
 531         kfree(tsk);
 532         return err;
 533 }
 534
 535 static int test_copy_engines(struct drm_i915_private *i915,
 536                              int (*fn)(void *arg),
 537                              unsigned int flags)
 538 {
 539         struct intel_engine_cs *engine;
 540         int ret;
 541
 542         for_each_uabi_class_engine(engine, I915_ENGINE_CLASS_COPY, i915) {
 543                 ret = igt_threaded_blt(engine, fn, flags);
 544                 if (ret)
 545                         return ret;
 546         }
 547
 548         return 0;
 549 }
 550
 551 static int igt_fill_blt(void *arg)
 552 {
 553         return test_copy_engines(arg, igt_fill_blt_thread, 0);
 554 }
 555
 556 static int igt_fill_blt_ctx0(void *arg)
 557 {
 558         return test_copy_engines(arg, igt_fill_blt_thread, SINGLE_CTX);
 559 }
 560
 561 static int igt_copy_blt(void *arg)
 562 {
 563         return test_copy_engines(arg, igt_copy_blt_thread, 0);
 564 }
 565
 566 static int igt_copy_blt_ctx0(void *arg)
 567 {
 568         return test_copy_engines(arg, igt_copy_blt_thread, SINGLE_CTX);
 569 }
 570
 571 int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915)
 572 {
 573         static const struct i915_subtest tests[] = {
 574                 SUBTEST(igt_fill_blt),
 575                 SUBTEST(igt_fill_blt_ctx0),
 576                 SUBTEST(igt_copy_blt),
 577                 SUBTEST(igt_copy_blt_ctx0),
 578         };
 579
 580         if (intel_gt_is_wedged(&i915->gt))
 581                 return 0;
 582
 583         return i915_live_subtests(tests, i915);
 584 }
 585
 586 int i915_gem_object_blt_perf_selftests(struct drm_i915_private *i915)
 587 {
 588         static const struct i915_subtest tests[] = {
 589                 SUBTEST(perf_fill_blt),
 590                 SUBTEST(perf_copy_blt),
 591         };
 592
 593         if (intel_gt_is_wedged(&i915->gt))
 594                 return 0;
 595
 596         return i915_live_subtests(tests, i915);
 597 }