drivers/gpu/drm/i915/selftests/i915_gem_context.c

   1 /*
   2  * Copyright © 2017 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  */
  24
  25 #include <linux/prime_numbers.h>
  26
  27 #include "../i915_selftest.h"
  28 #include "i915_random.h"
  29 #include "igt_flush_test.h"
  30
  31 #include "mock_drm.h"
  32 #include "mock_gem_device.h"
  33 #include "huge_gem_object.h"
  34
  35 #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32))
  36
  37 struct live_test {
  38         struct drm_i915_private *i915;
  39         const char *func;
  40         const char *name;
  41
  42         unsigned int reset_global;
  43         unsigned int reset_engine[I915_NUM_ENGINES];
  44 };
  45
  46 static int begin_live_test(struct live_test *t,
  47                            struct drm_i915_private *i915,
  48                            const char *func,
  49                            const char *name)
  50 {
  51         struct intel_engine_cs *engine;
  52         enum intel_engine_id id;
  53         int err;
  54
  55         t->i915 = i915;
  56         t->func = func;
  57         t->name = name;
  58
  59         err = i915_gem_wait_for_idle(i915,
  60                                      I915_WAIT_LOCKED,
  61                                      MAX_SCHEDULE_TIMEOUT);
  62         if (err) {
  63                 pr_err("%s(%s): failed to idle before, with err=%d!",
  64                        func, name, err);
  65                 return err;
  66         }
  67
  68         i915->gpu_error.missed_irq_rings = 0;
  69         t->reset_global = i915_reset_count(&i915->gpu_error);
  70
  71         for_each_engine(engine, i915, id)
  72                 t->reset_engine[id] =
  73                         i915_reset_engine_count(&i915->gpu_error, engine);
  74
  75         return 0;
  76 }
  77
  78 static int end_live_test(struct live_test *t)
  79 {
  80         struct drm_i915_private *i915 = t->i915;
  81         struct intel_engine_cs *engine;
  82         enum intel_engine_id id;
  83
  84         if (igt_flush_test(i915, I915_WAIT_LOCKED))
  85                 return -EIO;
  86
  87         if (t->reset_global != i915_reset_count(&i915->gpu_error)) {
  88                 pr_err("%s(%s): GPU was reset %d times!\n",
  89                        t->func, t->name,
  90                        i915_reset_count(&i915->gpu_error) - t->reset_global);
  91                 return -EIO;
  92         }
  93
  94         for_each_engine(engine, i915, id) {
  95                 if (t->reset_engine[id] ==
  96                     i915_reset_engine_count(&i915->gpu_error, engine))
  97                         continue;
  98
  99                 pr_err("%s(%s): engine '%s' was reset %d times!\n",
 100                        t->func, t->name, engine->name,
 101                        i915_reset_engine_count(&i915->gpu_error, engine) -
 102                        t->reset_engine[id]);
 103                 return -EIO;
 104         }
 105
 106         if (i915->gpu_error.missed_irq_rings) {
 107                 pr_err("%s(%s): Missed interrupts on engines %lx\n",
 108                        t->func, t->name, i915->gpu_error.missed_irq_rings);
 109                 return -EIO;
 110         }
 111
 112         return 0;
 113 }
 114
 115 static int live_nop_switch(void *arg)
 116 {
 117         const unsigned int nctx = 1024;
 118         struct drm_i915_private *i915 = arg;
 119         struct intel_engine_cs *engine;
 120         struct i915_gem_context **ctx;
 121         enum intel_engine_id id;
 122         struct drm_file *file;
 123         struct live_test t;
 124         unsigned long n;
 125         int err = -ENODEV;
 126
 127         /*
 128          * Create as many contexts as we can feasibly get away with
 129          * and check we can switch between them rapidly.
 130          *
 131          * Serves as very simple stress test for submission and HW switching
 132          * between contexts.
 133          */
 134
 135         if (!DRIVER_CAPS(i915)->has_logical_contexts)
 136                 return 0;
 137
 138         file = mock_file(i915);
 139         if (IS_ERR(file))
 140                 return PTR_ERR(file);
 141
 142         mutex_lock(&i915->drm.struct_mutex);
 143         intel_runtime_pm_get(i915);
 144
 145         ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL);
 146         if (!ctx) {
 147                 err = -ENOMEM;
 148                 goto out_unlock;
 149         }
 150
 151         for (n = 0; n < nctx; n++) {
 152                 ctx[n] = i915_gem_create_context(i915, file->driver_priv);
 153                 if (IS_ERR(ctx[n])) {
 154                         err = PTR_ERR(ctx[n]);
 155                         goto out_unlock;
 156                 }
 157         }
 158
 159         for_each_engine(engine, i915, id) {
 160                 struct i915_request *rq;
 161                 unsigned long end_time, prime;
 162                 ktime_t times[2] = {};
 163
 164                 times[0] = ktime_get_raw();
 165                 for (n = 0; n < nctx; n++) {
 166                         rq = i915_request_alloc(engine, ctx[n]);
 167                         if (IS_ERR(rq)) {
 168                                 err = PTR_ERR(rq);
 169                                 goto out_unlock;
 170                         }
 171                         i915_request_add(rq);
 172                 }
 173                 if (i915_request_wait(rq,
 174                                       I915_WAIT_LOCKED,
 175                                       HZ / 5) < 0) {
 176                         pr_err("Failed to populated %d contexts\n", nctx);
 177                         i915_gem_set_wedged(i915);
 178                         err = -EIO;
 179                         goto out_unlock;
 180                 }
 181
 182                 times[1] = ktime_get_raw();
 183
 184                 pr_info("Populated %d contexts on %s in %lluns\n",
 185                         nctx, engine->name, ktime_to_ns(times[1] - times[0]));
 186
 187                 err = begin_live_test(&t, i915, __func__, engine->name);
 188                 if (err)
 189                         goto out_unlock;
 190
 191                 end_time = jiffies + i915_selftest.timeout_jiffies;
 192                 for_each_prime_number_from(prime, 2, 8192) {
 193                         times[1] = ktime_get_raw();
 194
 195                         for (n = 0; n < prime; n++) {
 196                                 rq = i915_request_alloc(engine, ctx[n % nctx]);
 197                                 if (IS_ERR(rq)) {
 198                                         err = PTR_ERR(rq);
 199                                         goto out_unlock;
 200                                 }
 201
 202                                 /*
 203                                  * This space is left intentionally blank.
 204                                  *
 205                                  * We do not actually want to perform any
 206                                  * action with this request, we just want
 207                                  * to measure the latency in allocation
 208                                  * and submission of our breadcrumbs -
 209                                  * ensuring that the bare request is sufficient
 210                                  * for the system to work (i.e. proper HEAD
 211                                  * tracking of the rings, interrupt handling,
 212                                  * etc). It also gives us the lowest bounds
 213                                  * for latency.
 214                                  */
 215
 216                                 i915_request_add(rq);
 217                         }
 218                         if (i915_request_wait(rq,
 219                                               I915_WAIT_LOCKED,
 220                                               HZ / 5) < 0) {
 221                                 pr_err("Switching between %ld contexts timed out\n",
 222                                        prime);
 223                                 i915_gem_set_wedged(i915);
 224                                 break;
 225                         }
 226
 227                         times[1] = ktime_sub(ktime_get_raw(), times[1]);
 228                         if (prime == 2)
 229                                 times[0] = times[1];
 230
 231                         if (__igt_timeout(end_time, NULL))
 232                                 break;
 233                 }
 234
 235                 err = end_live_test(&t);
 236                 if (err)
 237                         goto out_unlock;
 238
 239                 pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n",
 240                         engine->name,
 241                         ktime_to_ns(times[0]),
 242                         prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1));
 243         }
 244
 245 out_unlock:
 246         intel_runtime_pm_put(i915);
 247         mutex_unlock(&i915->drm.struct_mutex);
 248         mock_file_free(i915, file);
 249         return err;
 250 }
 251
 252 static struct i915_vma *
 253 gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value)
 254 {
 255         struct drm_i915_gem_object *obj;
 256         const int gen = INTEL_GEN(vma->vm->i915);
 257         unsigned long n, size;
 258         u32 *cmd;
 259         int err;
 260
 261         size = (4 * count + 1) * sizeof(u32);
 262         size = round_up(size, PAGE_SIZE);
 263         obj = i915_gem_object_create_internal(vma->vm->i915, size);
 264         if (IS_ERR(obj))
 265                 return ERR_CAST(obj);
 266
 267         cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
 268         if (IS_ERR(cmd)) {
 269                 err = PTR_ERR(cmd);
 270                 goto err;
 271         }
 272
 273         GEM_BUG_ON(offset + (count - 1) * PAGE_SIZE > vma->node.size);
 274         offset += vma->node.start;
 275
 276         for (n = 0; n < count; n++) {
 277                 if (gen >= 8) {
 278                         *cmd++ = MI_STORE_DWORD_IMM_GEN4;
 279                         *cmd++ = lower_32_bits(offset);
 280                         *cmd++ = upper_32_bits(offset);
 281                         *cmd++ = value;
 282                 } else if (gen >= 4) {
 283                         *cmd++ = MI_STORE_DWORD_IMM_GEN4 |
 284                                 (gen < 6 ? MI_USE_GGTT : 0);
 285                         *cmd++ = 0;
 286                         *cmd++ = offset;
 287                         *cmd++ = value;
 288                 } else {
 289                         *cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
 290                         *cmd++ = offset;
 291                         *cmd++ = value;
 292                 }
 293                 offset += PAGE_SIZE;
 294         }
 295         *cmd = MI_BATCH_BUFFER_END;
 296         i915_gem_object_unpin_map(obj);
 297
 298         err = i915_gem_object_set_to_gtt_domain(obj, false);
 299         if (err)
 300                 goto err;
 301
 302         vma = i915_vma_instance(obj, vma->vm, NULL);
 303         if (IS_ERR(vma)) {
 304                 err = PTR_ERR(vma);
 305                 goto err;
 306         }
 307
 308         err = i915_vma_pin(vma, 0, 0, PIN_USER);
 309         if (err)
 310                 goto err;
 311
 312         return vma;
 313
 314 err:
 315         i915_gem_object_put(obj);
 316         return ERR_PTR(err);
 317 }
 318
 319 static unsigned long real_page_count(struct drm_i915_gem_object *obj)
 320 {
 321         return huge_gem_object_phys_size(obj) >> PAGE_SHIFT;
 322 }
 323
 324 static unsigned long fake_page_count(struct drm_i915_gem_object *obj)
 325 {
 326         return huge_gem_object_dma_size(obj) >> PAGE_SHIFT;
 327 }
 328
 329 static int gpu_fill(struct drm_i915_gem_object *obj,
 330                     struct i915_gem_context *ctx,
 331                     struct intel_engine_cs *engine,
 332                     unsigned int dw)
 333 {
 334         struct drm_i915_private *i915 = to_i915(obj->base.dev);
 335         struct i915_address_space *vm =
 336                 ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm;
 337         struct i915_request *rq;
 338         struct i915_vma *vma;
 339         struct i915_vma *batch;
 340         unsigned int flags;
 341         int err;
 342
 343         GEM_BUG_ON(obj->base.size > vm->total);
 344         GEM_BUG_ON(!intel_engine_can_store_dword(engine));
 345
 346         vma = i915_vma_instance(obj, vm, NULL);
 347         if (IS_ERR(vma))
 348                 return PTR_ERR(vma);
 349
 350         err = i915_gem_object_set_to_gtt_domain(obj, false);
 351         if (err)
 352                 return err;
 353
 354         err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER);
 355         if (err)
 356                 return err;
 357
 358         /* Within the GTT the huge objects maps every page onto
 359          * its 1024 real pages (using phys_pfn = dma_pfn % 1024).
 360          * We set the nth dword within the page using the nth
 361          * mapping via the GTT - this should exercise the GTT mapping
 362          * whilst checking that each context provides a unique view
 363          * into the object.
 364          */
 365         batch = gpu_fill_dw(vma,
 366                             (dw * real_page_count(obj)) << PAGE_SHIFT |
 367                             (dw * sizeof(u32)),
 368                             real_page_count(obj),
 369                             dw);
 370         if (IS_ERR(batch)) {
 371                 err = PTR_ERR(batch);
 372                 goto err_vma;
 373         }
 374
 375         rq = i915_request_alloc(engine, ctx);
 376         if (IS_ERR(rq)) {
 377                 err = PTR_ERR(rq);
 378                 goto err_batch;
 379         }
 380
 381         flags = 0;
 382         if (INTEL_GEN(vm->i915) <= 5)
 383                 flags |= I915_DISPATCH_SECURE;
 384
 385         err = engine->emit_bb_start(rq,
 386                                     batch->node.start, batch->node.size,
 387                                     flags);
 388         if (err)
 389                 goto err_request;
 390
 391         err = i915_vma_move_to_active(batch, rq, 0);
 392         if (err)
 393                 goto skip_request;
 394
 395         err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
 396         if (err)
 397                 goto skip_request;
 398
 399         i915_gem_object_set_active_reference(batch->obj);
 400         i915_vma_unpin(batch);
 401         i915_vma_close(batch);
 402
 403         i915_vma_unpin(vma);
 404
 405         i915_request_add(rq);
 406
 407         return 0;
 408
 409 skip_request:
 410         i915_request_skip(rq, err);
 411 err_request:
 412         i915_request_add(rq);
 413 err_batch:
 414         i915_vma_unpin(batch);
 415         i915_vma_put(batch);
 416 err_vma:
 417         i915_vma_unpin(vma);
 418         return err;
 419 }
 420
 421 static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
 422 {
 423         const bool has_llc = HAS_LLC(to_i915(obj->base.dev));
 424         unsigned int n, m, need_flush;
 425         int err;
 426
 427         err = i915_gem_obj_prepare_shmem_write(obj, &need_flush);
 428         if (err)
 429                 return err;
 430
 431         for (n = 0; n < real_page_count(obj); n++) {
 432                 u32 *map;
 433
 434                 map = kmap_atomic(i915_gem_object_get_page(obj, n));
 435                 for (m = 0; m < DW_PER_PAGE; m++)
 436                         map[m] = value;
 437                 if (!has_llc)
 438                         drm_clflush_virt_range(map, PAGE_SIZE);
 439                 kunmap_atomic(map);
 440         }
 441
 442         i915_gem_obj_finish_shmem_access(obj);
 443         obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU;
 444         obj->write_domain = 0;
 445         return 0;
 446 }
 447
 448 static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
 449 {
 450         unsigned int n, m, needs_flush;
 451         int err;
 452
 453         err = i915_gem_obj_prepare_shmem_read(obj, &needs_flush);
 454         if (err)
 455                 return err;
 456
 457         for (n = 0; n < real_page_count(obj); n++) {
 458                 u32 *map;
 459
 460                 map = kmap_atomic(i915_gem_object_get_page(obj, n));
 461                 if (needs_flush & CLFLUSH_BEFORE)
 462                         drm_clflush_virt_range(map, PAGE_SIZE);
 463
 464                 for (m = 0; m < max; m++) {
 465                         if (map[m] != m) {
 466                                 pr_err("Invalid value at page %d, offset %d: found %x expected %x\n",
 467                                        n, m, map[m], m);
 468                                 err = -EINVAL;
 469                                 goto out_unmap;
 470                         }
 471                 }
 472
 473                 for (; m < DW_PER_PAGE; m++) {
 474                         if (map[m] != STACK_MAGIC) {
 475                                 pr_err("Invalid value at page %d, offset %d: found %x expected %x\n",
 476                                        n, m, map[m], STACK_MAGIC);
 477                                 err = -EINVAL;
 478                                 goto out_unmap;
 479                         }
 480                 }
 481
 482 out_unmap:
 483                 kunmap_atomic(map);
 484                 if (err)
 485                         break;
 486         }
 487
 488         i915_gem_obj_finish_shmem_access(obj);
 489         return err;
 490 }
 491
 492 static int file_add_object(struct drm_file *file,
 493                             struct drm_i915_gem_object *obj)
 494 {
 495         int err;
 496
 497         GEM_BUG_ON(obj->base.handle_count);
 498
 499         /* tie the object to the drm_file for easy reaping */
 500         err = idr_alloc(&file->object_idr, &obj->base, 1, 0, GFP_KERNEL);
 501         if (err < 0)
 502                 return  err;
 503
 504         i915_gem_object_get(obj);
 505         obj->base.handle_count++;
 506         return 0;
 507 }
 508
 509 static struct drm_i915_gem_object *
 510 create_test_object(struct i915_gem_context *ctx,
 511                    struct drm_file *file,
 512                    struct list_head *objects)
 513 {
 514         struct drm_i915_gem_object *obj;
 515         struct i915_address_space *vm =
 516                 ctx->ppgtt ? &ctx->ppgtt->vm : &ctx->i915->ggtt.vm;
 517         u64 size;
 518         int err;
 519
 520         size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE);
 521         size = round_down(size, DW_PER_PAGE * PAGE_SIZE);
 522
 523         obj = huge_gem_object(ctx->i915, DW_PER_PAGE * PAGE_SIZE, size);
 524         if (IS_ERR(obj))
 525                 return obj;
 526
 527         err = file_add_object(file, obj);
 528         i915_gem_object_put(obj);
 529         if (err)
 530                 return ERR_PTR(err);
 531
 532         err = cpu_fill(obj, STACK_MAGIC);
 533         if (err) {
 534                 pr_err("Failed to fill object with cpu, err=%d\n",
 535                        err);
 536                 return ERR_PTR(err);
 537         }
 538
 539         list_add_tail(&obj->st_link, objects);
 540         return obj;
 541 }
 542
 543 static unsigned long max_dwords(struct drm_i915_gem_object *obj)
 544 {
 545         unsigned long npages = fake_page_count(obj);
 546
 547         GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE));
 548         return npages / DW_PER_PAGE;
 549 }
 550
 551 static int igt_ctx_exec(void *arg)
 552 {
 553         struct drm_i915_private *i915 = arg;
 554         struct drm_i915_gem_object *obj = NULL;
 555         unsigned long ncontexts, ndwords, dw;
 556         struct drm_file *file;
 557         IGT_TIMEOUT(end_time);
 558         LIST_HEAD(objects);
 559         struct live_test t;
 560         int err = -ENODEV;
 561
 562         /*
 563          * Create a few different contexts (with different mm) and write
 564          * through each ctx/mm using the GPU making sure those writes end
 565          * up in the expected pages of our obj.
 566          */
 567
 568         if (!DRIVER_CAPS(i915)->has_logical_contexts)
 569                 return 0;
 570
 571         file = mock_file(i915);
 572         if (IS_ERR(file))
 573                 return PTR_ERR(file);
 574
 575         mutex_lock(&i915->drm.struct_mutex);
 576
 577         err = begin_live_test(&t, i915, __func__, "");
 578         if (err)
 579                 goto out_unlock;
 580
 581         ncontexts = 0;
 582         ndwords = 0;
 583         dw = 0;
 584         while (!time_after(jiffies, end_time)) {
 585                 struct intel_engine_cs *engine;
 586                 struct i915_gem_context *ctx;
 587                 unsigned int id;
 588
 589                 ctx = i915_gem_create_context(i915, file->driver_priv);
 590                 if (IS_ERR(ctx)) {
 591                         err = PTR_ERR(ctx);
 592                         goto out_unlock;
 593                 }
 594
 595                 for_each_engine(engine, i915, id) {
 596                         if (!engine->context_size)
 597                                 continue; /* No logical context support in HW */
 598
 599                         if (!intel_engine_can_store_dword(engine))
 600                                 continue;
 601
 602                         if (!obj) {
 603                                 obj = create_test_object(ctx, file, &objects);
 604                                 if (IS_ERR(obj)) {
 605                                         err = PTR_ERR(obj);
 606                                         goto out_unlock;
 607                                 }
 608                         }
 609
 610                         intel_runtime_pm_get(i915);
 611                         err = gpu_fill(obj, ctx, engine, dw);
 612                         intel_runtime_pm_put(i915);
 613                         if (err) {
 614                                 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
 615                                        ndwords, dw, max_dwords(obj),
 616                                        engine->name, ctx->hw_id,
 617                                        yesno(!!ctx->ppgtt), err);
 618                                 goto out_unlock;
 619                         }
 620
 621                         if (++dw == max_dwords(obj)) {
 622                                 obj = NULL;
 623                                 dw = 0;
 624                         }
 625                         ndwords++;
 626                 }
 627                 ncontexts++;
 628         }
 629         pr_info("Submitted %lu contexts (across %u engines), filling %lu dwords\n",
 630                 ncontexts, INTEL_INFO(i915)->num_rings, ndwords);
 631
 632         dw = 0;
 633         list_for_each_entry(obj, &objects, st_link) {
 634                 unsigned int rem =
 635                         min_t(unsigned int, ndwords - dw, max_dwords(obj));
 636
 637                 err = cpu_check(obj, rem);
 638                 if (err)
 639                         break;
 640
 641                 dw += rem;
 642         }
 643
 644 out_unlock:
 645         if (end_live_test(&t))
 646                 err = -EIO;
 647         mutex_unlock(&i915->drm.struct_mutex);
 648
 649         mock_file_free(i915, file);
 650         return err;
 651 }
 652
 653 static int igt_ctx_readonly(void *arg)
 654 {
 655         struct drm_i915_private *i915 = arg;
 656         struct drm_i915_gem_object *obj = NULL;
 657         struct i915_gem_context *ctx;
 658         struct i915_hw_ppgtt *ppgtt;
 659         unsigned long ndwords, dw;
 660         struct drm_file *file;
 661         I915_RND_STATE(prng);
 662         IGT_TIMEOUT(end_time);
 663         LIST_HEAD(objects);
 664         struct live_test t;
 665         int err = -ENODEV;
 666
 667         /*
 668          * Create a few read-only objects (with the occasional writable object)
 669          * and try to write into these object checking that the GPU discards
 670          * any write to a read-only object.
 671          */
 672
 673         file = mock_file(i915);
 674         if (IS_ERR(file))
 675                 return PTR_ERR(file);
 676
 677         mutex_lock(&i915->drm.struct_mutex);
 678
 679         err = begin_live_test(&t, i915, __func__, "");
 680         if (err)
 681                 goto out_unlock;
 682
 683         ctx = i915_gem_create_context(i915, file->driver_priv);
 684         if (IS_ERR(ctx)) {
 685                 err = PTR_ERR(ctx);
 686                 goto out_unlock;
 687         }
 688
 689         ppgtt = ctx->ppgtt ?: i915->mm.aliasing_ppgtt;
 690         if (!ppgtt || !ppgtt->vm.has_read_only) {
 691                 err = 0;
 692                 goto out_unlock;
 693         }
 694
 695         ndwords = 0;
 696         dw = 0;
 697         while (!time_after(jiffies, end_time)) {
 698                 struct intel_engine_cs *engine;
 699                 unsigned int id;
 700
 701                 for_each_engine(engine, i915, id) {
 702                         if (!intel_engine_can_store_dword(engine))
 703                                 continue;
 704
 705                         if (!obj) {
 706                                 obj = create_test_object(ctx, file, &objects);
 707                                 if (IS_ERR(obj)) {
 708                                         err = PTR_ERR(obj);
 709                                         goto out_unlock;
 710                                 }
 711
 712                                 if (prandom_u32_state(&prng) & 1)
 713                                         i915_gem_object_set_readonly(obj);
 714                         }
 715
 716                         intel_runtime_pm_get(i915);
 717                         err = gpu_fill(obj, ctx, engine, dw);
 718                         intel_runtime_pm_put(i915);
 719                         if (err) {
 720                                 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
 721                                        ndwords, dw, max_dwords(obj),
 722                                        engine->name, ctx->hw_id,
 723                                        yesno(!!ctx->ppgtt), err);
 724                                 goto out_unlock;
 725                         }
 726
 727                         if (++dw == max_dwords(obj)) {
 728                                 obj = NULL;
 729                                 dw = 0;
 730                         }
 731                         ndwords++;
 732                 }
 733         }
 734         pr_info("Submitted %lu dwords (across %u engines)\n",
 735                 ndwords, INTEL_INFO(i915)->num_rings);
 736
 737         dw = 0;
 738         list_for_each_entry(obj, &objects, st_link) {
 739                 unsigned int rem =
 740                         min_t(unsigned int, ndwords - dw, max_dwords(obj));
 741                 unsigned int num_writes;
 742
 743                 num_writes = rem;
 744                 if (i915_gem_object_is_readonly(obj))
 745                         num_writes = 0;
 746
 747                 err = cpu_check(obj, num_writes);
 748                 if (err)
 749                         break;
 750
 751                 dw += rem;
 752         }
 753
 754 out_unlock:
 755         if (end_live_test(&t))
 756                 err = -EIO;
 757         mutex_unlock(&i915->drm.struct_mutex);
 758
 759         mock_file_free(i915, file);
 760         return err;
 761 }
 762
 763 static int check_scratch(struct i915_gem_context *ctx, u64 offset)
 764 {
 765         struct drm_mm_node *node =
 766                 __drm_mm_interval_first(&ctx->ppgtt->vm.mm,
 767                                         offset, offset + sizeof(u32) - 1);
 768         if (!node || node->start > offset)
 769                 return 0;
 770
 771         GEM_BUG_ON(offset >= node->start + node->size);
 772
 773         pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n",
 774                upper_32_bits(offset), lower_32_bits(offset));
 775         return -EINVAL;
 776 }
 777
 778 static int write_to_scratch(struct i915_gem_context *ctx,
 779                             struct intel_engine_cs *engine,
 780                             u64 offset, u32 value)
 781 {
 782         struct drm_i915_private *i915 = ctx->i915;
 783         struct drm_i915_gem_object *obj;
 784         struct i915_request *rq;
 785         struct i915_vma *vma;
 786         u32 *cmd;
 787         int err;
 788
 789         GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE);
 790
 791         obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
 792         if (IS_ERR(obj))
 793                 return PTR_ERR(obj);
 794
 795         cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
 796         if (IS_ERR(cmd)) {
 797                 err = PTR_ERR(cmd);
 798                 goto err;
 799         }
 800
 801         *cmd++ = MI_STORE_DWORD_IMM_GEN4;
 802         if (INTEL_GEN(i915) >= 8) {
 803                 *cmd++ = lower_32_bits(offset);
 804                 *cmd++ = upper_32_bits(offset);
 805         } else {
 806                 *cmd++ = 0;
 807                 *cmd++ = offset;
 808         }
 809         *cmd++ = value;
 810         *cmd = MI_BATCH_BUFFER_END;
 811         i915_gem_object_unpin_map(obj);
 812
 813         err = i915_gem_object_set_to_gtt_domain(obj, false);
 814         if (err)
 815                 goto err;
 816
 817         vma = i915_vma_instance(obj, &ctx->ppgtt->vm, NULL);
 818         if (IS_ERR(vma)) {
 819                 err = PTR_ERR(vma);
 820                 goto err;
 821         }
 822
 823         err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
 824         if (err)
 825                 goto err;
 826
 827         err = check_scratch(ctx, offset);
 828         if (err)
 829                 goto err_unpin;
 830
 831         rq = i915_request_alloc(engine, ctx);
 832         if (IS_ERR(rq)) {
 833                 err = PTR_ERR(rq);
 834                 goto err_unpin;
 835         }
 836
 837         err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0);
 838         if (err)
 839                 goto err_request;
 840
 841         err = i915_vma_move_to_active(vma, rq, 0);
 842         if (err)
 843                 goto skip_request;
 844
 845         i915_gem_object_set_active_reference(obj);
 846         i915_vma_unpin(vma);
 847         i915_vma_close(vma);
 848
 849         i915_request_add(rq);
 850
 851         return 0;
 852
 853 skip_request:
 854         i915_request_skip(rq, err);
 855 err_request:
 856         i915_request_add(rq);
 857 err_unpin:
 858         i915_vma_unpin(vma);
 859 err:
 860         i915_gem_object_put(obj);
 861         return err;
 862 }
 863
 864 static int read_from_scratch(struct i915_gem_context *ctx,
 865                              struct intel_engine_cs *engine,
 866                              u64 offset, u32 *value)
 867 {
 868         struct drm_i915_private *i915 = ctx->i915;
 869         struct drm_i915_gem_object *obj;
 870         const u32 RCS_GPR0 = 0x2600; /* not all engines have their own GPR! */
 871         const u32 result = 0x100;
 872         struct i915_request *rq;
 873         struct i915_vma *vma;
 874         u32 *cmd;
 875         int err;
 876
 877         GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE);
 878
 879         obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
 880         if (IS_ERR(obj))
 881                 return PTR_ERR(obj);
 882
 883         cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
 884         if (IS_ERR(cmd)) {
 885                 err = PTR_ERR(cmd);
 886                 goto err;
 887         }
 888
 889         memset(cmd, POISON_INUSE, PAGE_SIZE);
 890         if (INTEL_GEN(i915) >= 8) {
 891                 *cmd++ = MI_LOAD_REGISTER_MEM_GEN8;
 892                 *cmd++ = RCS_GPR0;
 893                 *cmd++ = lower_32_bits(offset);
 894                 *cmd++ = upper_32_bits(offset);
 895                 *cmd++ = MI_STORE_REGISTER_MEM_GEN8;
 896                 *cmd++ = RCS_GPR0;
 897                 *cmd++ = result;
 898                 *cmd++ = 0;
 899         } else {
 900                 *cmd++ = MI_LOAD_REGISTER_MEM;
 901                 *cmd++ = RCS_GPR0;
 902                 *cmd++ = offset;
 903                 *cmd++ = MI_STORE_REGISTER_MEM;
 904                 *cmd++ = RCS_GPR0;
 905                 *cmd++ = result;
 906         }
 907         *cmd = MI_BATCH_BUFFER_END;
 908         i915_gem_object_unpin_map(obj);
 909
 910         err = i915_gem_object_set_to_gtt_domain(obj, false);
 911         if (err)
 912                 goto err;
 913
 914         vma = i915_vma_instance(obj, &ctx->ppgtt->vm, NULL);
 915         if (IS_ERR(vma)) {
 916                 err = PTR_ERR(vma);
 917                 goto err;
 918         }
 919
 920         err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
 921         if (err)
 922                 goto err;
 923
 924         err = check_scratch(ctx, offset);
 925         if (err)
 926                 goto err_unpin;
 927
 928         rq = i915_request_alloc(engine, ctx);
 929         if (IS_ERR(rq)) {
 930                 err = PTR_ERR(rq);
 931                 goto err_unpin;
 932         }
 933
 934         err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0);
 935         if (err)
 936                 goto err_request;
 937
 938         err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
 939         if (err)
 940                 goto skip_request;
 941
 942         i915_vma_unpin(vma);
 943         i915_vma_close(vma);
 944
 945         i915_request_add(rq);
 946
 947         err = i915_gem_object_set_to_cpu_domain(obj, false);
 948         if (err)
 949                 goto err;
 950
 951         cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
 952         if (IS_ERR(cmd)) {
 953                 err = PTR_ERR(cmd);
 954                 goto err;
 955         }
 956
 957         *value = cmd[result / sizeof(*cmd)];
 958         i915_gem_object_unpin_map(obj);
 959         i915_gem_object_put(obj);
 960
 961         return 0;
 962
 963 skip_request:
 964         i915_request_skip(rq, err);
 965 err_request:
 966         i915_request_add(rq);
 967 err_unpin:
 968         i915_vma_unpin(vma);
 969 err:
 970         i915_gem_object_put(obj);
 971         return err;
 972 }
 973
 974 static int igt_vm_isolation(void *arg)
 975 {
 976         struct drm_i915_private *i915 = arg;
 977         struct i915_gem_context *ctx_a, *ctx_b;
 978         struct intel_engine_cs *engine;
 979         struct drm_file *file;
 980         I915_RND_STATE(prng);
 981         unsigned long count;
 982         struct live_test t;
 983         unsigned int id;
 984         u64 vm_total;
 985         int err;
 986
 987         if (INTEL_GEN(i915) < 7)
 988                 return 0;
 989
 990         /*
 991          * The simple goal here is that a write into one context is not
 992          * observed in a second (separate page tables and scratch).
 993          */
 994
 995         file = mock_file(i915);
 996         if (IS_ERR(file))
 997                 return PTR_ERR(file);
 998
 999         mutex_lock(&i915->drm.struct_mutex);
1000
1001         err = begin_live_test(&t, i915, __func__, "");
1002         if (err)
1003                 goto out_unlock;
1004
1005         ctx_a = i915_gem_create_context(i915, file->driver_priv);
1006         if (IS_ERR(ctx_a)) {
1007                 err = PTR_ERR(ctx_a);
1008                 goto out_unlock;
1009         }
1010
1011         ctx_b = i915_gem_create_context(i915, file->driver_priv);
1012         if (IS_ERR(ctx_b)) {
1013                 err = PTR_ERR(ctx_b);
1014                 goto out_unlock;
1015         }
1016
1017         /* We can only test vm isolation, if the vm are distinct */
1018         if (ctx_a->ppgtt == ctx_b->ppgtt)
1019                 goto out_unlock;
1020
1021         vm_total = ctx_a->ppgtt->vm.total;
1022         GEM_BUG_ON(ctx_b->ppgtt->vm.total != vm_total);
1023         vm_total -= I915_GTT_PAGE_SIZE;
1024
1025         intel_runtime_pm_get(i915);
1026
1027         count = 0;
1028         for_each_engine(engine, i915, id) {
1029                 IGT_TIMEOUT(end_time);
1030                 unsigned long this = 0;
1031
1032                 if (!intel_engine_can_store_dword(engine))
1033                         continue;
1034
1035                 while (!__igt_timeout(end_time, NULL)) {
1036                         u32 value = 0xc5c5c5c5;
1037                         u64 offset;
1038
1039                         div64_u64_rem(i915_prandom_u64_state(&prng),
1040                                       vm_total, &offset);
1041                         offset &= ~sizeof(u32);
1042                         offset += I915_GTT_PAGE_SIZE;
1043
1044                         err = write_to_scratch(ctx_a, engine,
1045                                                offset, 0xdeadbeef);
1046                         if (err == 0)
1047                                 err = read_from_scratch(ctx_b, engine,
1048                                                         offset, &value);
1049                         if (err)
1050                                 goto out_rpm;
1051
1052                         if (value) {
1053                                 pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n",
1054                                        engine->name, value,
1055                                        upper_32_bits(offset),
1056                                        lower_32_bits(offset),
1057                                        this);
1058                                 err = -EINVAL;
1059                                 goto out_rpm;
1060                         }
1061
1062                         this++;
1063                 }
1064                 count += this;
1065         }
1066         pr_info("Checked %lu scratch offsets across %d engines\n",
1067                 count, INTEL_INFO(i915)->num_rings);
1068
1069 out_rpm:
1070         intel_runtime_pm_put(i915);
1071 out_unlock:
1072         if (end_live_test(&t))
1073                 err = -EIO;
1074         mutex_unlock(&i915->drm.struct_mutex);
1075
1076         mock_file_free(i915, file);
1077         return err;
1078 }
1079
1080 static __maybe_unused const char *
1081 __engine_name(struct drm_i915_private *i915, unsigned int engines)
1082 {
1083         struct intel_engine_cs *engine;
1084         unsigned int tmp;
1085
1086         if (engines == ALL_ENGINES)
1087                 return "all";
1088
1089         for_each_engine_masked(engine, i915, engines, tmp)
1090                 return engine->name;
1091
1092         return "none";
1093 }
1094
1095 static int __igt_switch_to_kernel_context(struct drm_i915_private *i915,
1096                                           struct i915_gem_context *ctx,
1097                                           unsigned int engines)
1098 {
1099         struct intel_engine_cs *engine;
1100         unsigned int tmp;
1101         int err;
1102
1103         GEM_TRACE("Testing %s\n", __engine_name(i915, engines));
1104         for_each_engine_masked(engine, i915, engines, tmp) {
1105                 struct i915_request *rq;
1106
1107                 rq = i915_request_alloc(engine, ctx);
1108                 if (IS_ERR(rq))
1109                         return PTR_ERR(rq);
1110
1111                 i915_request_add(rq);
1112         }
1113
1114         err = i915_gem_switch_to_kernel_context(i915);
1115         if (err)
1116                 return err;
1117
1118         for_each_engine_masked(engine, i915, engines, tmp) {
1119                 if (!engine_has_kernel_context_barrier(engine)) {
1120                         pr_err("kernel context not last on engine %s!\n",
1121                                engine->name);
1122                         return -EINVAL;
1123                 }
1124         }
1125
1126         err = i915_gem_wait_for_idle(i915,
1127                                      I915_WAIT_LOCKED,
1128                                      MAX_SCHEDULE_TIMEOUT);
1129         if (err)
1130                 return err;
1131
1132         GEM_BUG_ON(i915->gt.active_requests);
1133         for_each_engine_masked(engine, i915, engines, tmp) {
1134                 if (engine->last_retired_context->gem_context != i915->kernel_context) {
1135                         pr_err("engine %s not idling in kernel context!\n",
1136                                engine->name);
1137                         return -EINVAL;
1138                 }
1139         }
1140
1141         err = i915_gem_switch_to_kernel_context(i915);
1142         if (err)
1143                 return err;
1144
1145         if (i915->gt.active_requests) {
1146                 pr_err("switch-to-kernel-context emitted %d requests even though it should already be idling in the kernel context\n",
1147                        i915->gt.active_requests);
1148                 return -EINVAL;
1149         }
1150
1151         for_each_engine_masked(engine, i915, engines, tmp) {
1152                 if (!intel_engine_has_kernel_context(engine)) {
1153                         pr_err("kernel context not last on engine %s!\n",
1154                                engine->name);
1155                         return -EINVAL;
1156                 }
1157         }
1158
1159         return 0;
1160 }
1161
1162 static int igt_switch_to_kernel_context(void *arg)
1163 {
1164         struct drm_i915_private *i915 = arg;
1165         struct intel_engine_cs *engine;
1166         struct i915_gem_context *ctx;
1167         enum intel_engine_id id;
1168         int err;
1169
1170         /*
1171          * A core premise of switching to the kernel context is that
1172          * if an engine is already idling in the kernel context, we
1173          * do not emit another request and wake it up. The other being
1174          * that we do indeed end up idling in the kernel context.
1175          */
1176
1177         mutex_lock(&i915->drm.struct_mutex);
1178         intel_runtime_pm_get(i915);
1179
1180         ctx = kernel_context(i915);
1181         if (IS_ERR(ctx)) {
1182                 mutex_unlock(&i915->drm.struct_mutex);
1183                 return PTR_ERR(ctx);
1184         }
1185
1186         /* First check idling each individual engine */
1187         for_each_engine(engine, i915, id) {
1188                 err = __igt_switch_to_kernel_context(i915, ctx, BIT(id));
1189                 if (err)
1190                         goto out_unlock;
1191         }
1192
1193         /* Now en masse */
1194         err = __igt_switch_to_kernel_context(i915, ctx, ALL_ENGINES);
1195         if (err)
1196                 goto out_unlock;
1197
1198 out_unlock:
1199         GEM_TRACE_DUMP_ON(err);
1200         if (igt_flush_test(i915, I915_WAIT_LOCKED))
1201                 err = -EIO;
1202
1203         intel_runtime_pm_put(i915);
1204         mutex_unlock(&i915->drm.struct_mutex);
1205
1206         kernel_context_close(ctx);
1207         return err;
1208 }
1209
1210 int i915_gem_context_mock_selftests(void)
1211 {
1212         static const struct i915_subtest tests[] = {
1213                 SUBTEST(igt_switch_to_kernel_context),
1214         };
1215         struct drm_i915_private *i915;
1216         int err;
1217
1218         i915 = mock_gem_device();
1219         if (!i915)
1220                 return -ENOMEM;
1221
1222         err = i915_subtests(tests, i915);
1223
1224         drm_dev_put(&i915->drm);
1225         return err;
1226 }
1227
1228 int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
1229 {
1230         static const struct i915_subtest tests[] = {
1231                 SUBTEST(igt_switch_to_kernel_context),
1232                 SUBTEST(live_nop_switch),
1233                 SUBTEST(igt_ctx_exec),
1234                 SUBTEST(igt_ctx_readonly),
1235                 SUBTEST(igt_vm_isolation),
1236         };
1237
1238         if (i915_terminally_wedged(&dev_priv->gpu_error))
1239                 return 0;
1240
1241         return i915_subtests(tests, dev_priv);
1242 }