Merge tag 'for-5.0-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave...
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / selftests / i915_gem_context.c
1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24
25 #include <linux/prime_numbers.h>
26
27 #include "../i915_selftest.h"
28 #include "i915_random.h"
29 #include "igt_flush_test.h"
30
31 #include "mock_drm.h"
32 #include "mock_gem_device.h"
33 #include "huge_gem_object.h"
34
35 #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32))
36
37 struct live_test {
38         struct drm_i915_private *i915;
39         const char *func;
40         const char *name;
41
42         unsigned int reset_global;
43         unsigned int reset_engine[I915_NUM_ENGINES];
44 };
45
46 static int begin_live_test(struct live_test *t,
47                            struct drm_i915_private *i915,
48                            const char *func,
49                            const char *name)
50 {
51         struct intel_engine_cs *engine;
52         enum intel_engine_id id;
53         int err;
54
55         t->i915 = i915;
56         t->func = func;
57         t->name = name;
58
59         err = i915_gem_wait_for_idle(i915,
60                                      I915_WAIT_LOCKED,
61                                      MAX_SCHEDULE_TIMEOUT);
62         if (err) {
63                 pr_err("%s(%s): failed to idle before, with err=%d!",
64                        func, name, err);
65                 return err;
66         }
67
68         i915->gpu_error.missed_irq_rings = 0;
69         t->reset_global = i915_reset_count(&i915->gpu_error);
70
71         for_each_engine(engine, i915, id)
72                 t->reset_engine[id] =
73                         i915_reset_engine_count(&i915->gpu_error, engine);
74
75         return 0;
76 }
77
78 static int end_live_test(struct live_test *t)
79 {
80         struct drm_i915_private *i915 = t->i915;
81         struct intel_engine_cs *engine;
82         enum intel_engine_id id;
83
84         if (igt_flush_test(i915, I915_WAIT_LOCKED))
85                 return -EIO;
86
87         if (t->reset_global != i915_reset_count(&i915->gpu_error)) {
88                 pr_err("%s(%s): GPU was reset %d times!\n",
89                        t->func, t->name,
90                        i915_reset_count(&i915->gpu_error) - t->reset_global);
91                 return -EIO;
92         }
93
94         for_each_engine(engine, i915, id) {
95                 if (t->reset_engine[id] ==
96                     i915_reset_engine_count(&i915->gpu_error, engine))
97                         continue;
98
99                 pr_err("%s(%s): engine '%s' was reset %d times!\n",
100                        t->func, t->name, engine->name,
101                        i915_reset_engine_count(&i915->gpu_error, engine) -
102                        t->reset_engine[id]);
103                 return -EIO;
104         }
105
106         if (i915->gpu_error.missed_irq_rings) {
107                 pr_err("%s(%s): Missed interrupts on engines %lx\n",
108                        t->func, t->name, i915->gpu_error.missed_irq_rings);
109                 return -EIO;
110         }
111
112         return 0;
113 }
114
115 static int live_nop_switch(void *arg)
116 {
117         const unsigned int nctx = 1024;
118         struct drm_i915_private *i915 = arg;
119         struct intel_engine_cs *engine;
120         struct i915_gem_context **ctx;
121         enum intel_engine_id id;
122         struct drm_file *file;
123         struct live_test t;
124         unsigned long n;
125         int err = -ENODEV;
126
127         /*
128          * Create as many contexts as we can feasibly get away with
129          * and check we can switch between them rapidly.
130          *
131          * Serves as very simple stress test for submission and HW switching
132          * between contexts.
133          */
134
135         if (!DRIVER_CAPS(i915)->has_logical_contexts)
136                 return 0;
137
138         file = mock_file(i915);
139         if (IS_ERR(file))
140                 return PTR_ERR(file);
141
142         mutex_lock(&i915->drm.struct_mutex);
143         intel_runtime_pm_get(i915);
144
145         ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL);
146         if (!ctx) {
147                 err = -ENOMEM;
148                 goto out_unlock;
149         }
150
151         for (n = 0; n < nctx; n++) {
152                 ctx[n] = i915_gem_create_context(i915, file->driver_priv);
153                 if (IS_ERR(ctx[n])) {
154                         err = PTR_ERR(ctx[n]);
155                         goto out_unlock;
156                 }
157         }
158
159         for_each_engine(engine, i915, id) {
160                 struct i915_request *rq;
161                 unsigned long end_time, prime;
162                 ktime_t times[2] = {};
163
164                 times[0] = ktime_get_raw();
165                 for (n = 0; n < nctx; n++) {
166                         rq = i915_request_alloc(engine, ctx[n]);
167                         if (IS_ERR(rq)) {
168                                 err = PTR_ERR(rq);
169                                 goto out_unlock;
170                         }
171                         i915_request_add(rq);
172                 }
173                 if (i915_request_wait(rq,
174                                       I915_WAIT_LOCKED,
175                                       HZ / 5) < 0) {
176                         pr_err("Failed to populated %d contexts\n", nctx);
177                         i915_gem_set_wedged(i915);
178                         err = -EIO;
179                         goto out_unlock;
180                 }
181
182                 times[1] = ktime_get_raw();
183
184                 pr_info("Populated %d contexts on %s in %lluns\n",
185                         nctx, engine->name, ktime_to_ns(times[1] - times[0]));
186
187                 err = begin_live_test(&t, i915, __func__, engine->name);
188                 if (err)
189                         goto out_unlock;
190
191                 end_time = jiffies + i915_selftest.timeout_jiffies;
192                 for_each_prime_number_from(prime, 2, 8192) {
193                         times[1] = ktime_get_raw();
194
195                         for (n = 0; n < prime; n++) {
196                                 rq = i915_request_alloc(engine, ctx[n % nctx]);
197                                 if (IS_ERR(rq)) {
198                                         err = PTR_ERR(rq);
199                                         goto out_unlock;
200                                 }
201
202                                 /*
203                                  * This space is left intentionally blank.
204                                  *
205                                  * We do not actually want to perform any
206                                  * action with this request, we just want
207                                  * to measure the latency in allocation
208                                  * and submission of our breadcrumbs -
209                                  * ensuring that the bare request is sufficient
210                                  * for the system to work (i.e. proper HEAD
211                                  * tracking of the rings, interrupt handling,
212                                  * etc). It also gives us the lowest bounds
213                                  * for latency.
214                                  */
215
216                                 i915_request_add(rq);
217                         }
218                         if (i915_request_wait(rq,
219                                               I915_WAIT_LOCKED,
220                                               HZ / 5) < 0) {
221                                 pr_err("Switching between %ld contexts timed out\n",
222                                        prime);
223                                 i915_gem_set_wedged(i915);
224                                 break;
225                         }
226
227                         times[1] = ktime_sub(ktime_get_raw(), times[1]);
228                         if (prime == 2)
229                                 times[0] = times[1];
230
231                         if (__igt_timeout(end_time, NULL))
232                                 break;
233                 }
234
235                 err = end_live_test(&t);
236                 if (err)
237                         goto out_unlock;
238
239                 pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n",
240                         engine->name,
241                         ktime_to_ns(times[0]),
242                         prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1));
243         }
244
245 out_unlock:
246         intel_runtime_pm_put(i915);
247         mutex_unlock(&i915->drm.struct_mutex);
248         mock_file_free(i915, file);
249         return err;
250 }
251
252 static struct i915_vma *
253 gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value)
254 {
255         struct drm_i915_gem_object *obj;
256         const int gen = INTEL_GEN(vma->vm->i915);
257         unsigned long n, size;
258         u32 *cmd;
259         int err;
260
261         size = (4 * count + 1) * sizeof(u32);
262         size = round_up(size, PAGE_SIZE);
263         obj = i915_gem_object_create_internal(vma->vm->i915, size);
264         if (IS_ERR(obj))
265                 return ERR_CAST(obj);
266
267         cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
268         if (IS_ERR(cmd)) {
269                 err = PTR_ERR(cmd);
270                 goto err;
271         }
272
273         GEM_BUG_ON(offset + (count - 1) * PAGE_SIZE > vma->node.size);
274         offset += vma->node.start;
275
276         for (n = 0; n < count; n++) {
277                 if (gen >= 8) {
278                         *cmd++ = MI_STORE_DWORD_IMM_GEN4;
279                         *cmd++ = lower_32_bits(offset);
280                         *cmd++ = upper_32_bits(offset);
281                         *cmd++ = value;
282                 } else if (gen >= 4) {
283                         *cmd++ = MI_STORE_DWORD_IMM_GEN4 |
284                                 (gen < 6 ? MI_USE_GGTT : 0);
285                         *cmd++ = 0;
286                         *cmd++ = offset;
287                         *cmd++ = value;
288                 } else {
289                         *cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
290                         *cmd++ = offset;
291                         *cmd++ = value;
292                 }
293                 offset += PAGE_SIZE;
294         }
295         *cmd = MI_BATCH_BUFFER_END;
296         i915_gem_object_unpin_map(obj);
297
298         err = i915_gem_object_set_to_gtt_domain(obj, false);
299         if (err)
300                 goto err;
301
302         vma = i915_vma_instance(obj, vma->vm, NULL);
303         if (IS_ERR(vma)) {
304                 err = PTR_ERR(vma);
305                 goto err;
306         }
307
308         err = i915_vma_pin(vma, 0, 0, PIN_USER);
309         if (err)
310                 goto err;
311
312         return vma;
313
314 err:
315         i915_gem_object_put(obj);
316         return ERR_PTR(err);
317 }
318
319 static unsigned long real_page_count(struct drm_i915_gem_object *obj)
320 {
321         return huge_gem_object_phys_size(obj) >> PAGE_SHIFT;
322 }
323
324 static unsigned long fake_page_count(struct drm_i915_gem_object *obj)
325 {
326         return huge_gem_object_dma_size(obj) >> PAGE_SHIFT;
327 }
328
329 static int gpu_fill(struct drm_i915_gem_object *obj,
330                     struct i915_gem_context *ctx,
331                     struct intel_engine_cs *engine,
332                     unsigned int dw)
333 {
334         struct drm_i915_private *i915 = to_i915(obj->base.dev);
335         struct i915_address_space *vm =
336                 ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm;
337         struct i915_request *rq;
338         struct i915_vma *vma;
339         struct i915_vma *batch;
340         unsigned int flags;
341         int err;
342
343         GEM_BUG_ON(obj->base.size > vm->total);
344         GEM_BUG_ON(!intel_engine_can_store_dword(engine));
345
346         vma = i915_vma_instance(obj, vm, NULL);
347         if (IS_ERR(vma))
348                 return PTR_ERR(vma);
349
350         err = i915_gem_object_set_to_gtt_domain(obj, false);
351         if (err)
352                 return err;
353
354         err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER);
355         if (err)
356                 return err;
357
358         /* Within the GTT the huge objects maps every page onto
359          * its 1024 real pages (using phys_pfn = dma_pfn % 1024).
360          * We set the nth dword within the page using the nth
361          * mapping via the GTT - this should exercise the GTT mapping
362          * whilst checking that each context provides a unique view
363          * into the object.
364          */
365         batch = gpu_fill_dw(vma,
366                             (dw * real_page_count(obj)) << PAGE_SHIFT |
367                             (dw * sizeof(u32)),
368                             real_page_count(obj),
369                             dw);
370         if (IS_ERR(batch)) {
371                 err = PTR_ERR(batch);
372                 goto err_vma;
373         }
374
375         rq = i915_request_alloc(engine, ctx);
376         if (IS_ERR(rq)) {
377                 err = PTR_ERR(rq);
378                 goto err_batch;
379         }
380
381         flags = 0;
382         if (INTEL_GEN(vm->i915) <= 5)
383                 flags |= I915_DISPATCH_SECURE;
384
385         err = engine->emit_bb_start(rq,
386                                     batch->node.start, batch->node.size,
387                                     flags);
388         if (err)
389                 goto err_request;
390
391         err = i915_vma_move_to_active(batch, rq, 0);
392         if (err)
393                 goto skip_request;
394
395         err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
396         if (err)
397                 goto skip_request;
398
399         i915_gem_object_set_active_reference(batch->obj);
400         i915_vma_unpin(batch);
401         i915_vma_close(batch);
402
403         i915_vma_unpin(vma);
404
405         i915_request_add(rq);
406
407         return 0;
408
409 skip_request:
410         i915_request_skip(rq, err);
411 err_request:
412         i915_request_add(rq);
413 err_batch:
414         i915_vma_unpin(batch);
415         i915_vma_put(batch);
416 err_vma:
417         i915_vma_unpin(vma);
418         return err;
419 }
420
421 static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
422 {
423         const bool has_llc = HAS_LLC(to_i915(obj->base.dev));
424         unsigned int n, m, need_flush;
425         int err;
426
427         err = i915_gem_obj_prepare_shmem_write(obj, &need_flush);
428         if (err)
429                 return err;
430
431         for (n = 0; n < real_page_count(obj); n++) {
432                 u32 *map;
433
434                 map = kmap_atomic(i915_gem_object_get_page(obj, n));
435                 for (m = 0; m < DW_PER_PAGE; m++)
436                         map[m] = value;
437                 if (!has_llc)
438                         drm_clflush_virt_range(map, PAGE_SIZE);
439                 kunmap_atomic(map);
440         }
441
442         i915_gem_obj_finish_shmem_access(obj);
443         obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU;
444         obj->write_domain = 0;
445         return 0;
446 }
447
448 static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
449 {
450         unsigned int n, m, needs_flush;
451         int err;
452
453         err = i915_gem_obj_prepare_shmem_read(obj, &needs_flush);
454         if (err)
455                 return err;
456
457         for (n = 0; n < real_page_count(obj); n++) {
458                 u32 *map;
459
460                 map = kmap_atomic(i915_gem_object_get_page(obj, n));
461                 if (needs_flush & CLFLUSH_BEFORE)
462                         drm_clflush_virt_range(map, PAGE_SIZE);
463
464                 for (m = 0; m < max; m++) {
465                         if (map[m] != m) {
466                                 pr_err("Invalid value at page %d, offset %d: found %x expected %x\n",
467                                        n, m, map[m], m);
468                                 err = -EINVAL;
469                                 goto out_unmap;
470                         }
471                 }
472
473                 for (; m < DW_PER_PAGE; m++) {
474                         if (map[m] != STACK_MAGIC) {
475                                 pr_err("Invalid value at page %d, offset %d: found %x expected %x\n",
476                                        n, m, map[m], STACK_MAGIC);
477                                 err = -EINVAL;
478                                 goto out_unmap;
479                         }
480                 }
481
482 out_unmap:
483                 kunmap_atomic(map);
484                 if (err)
485                         break;
486         }
487
488         i915_gem_obj_finish_shmem_access(obj);
489         return err;
490 }
491
492 static int file_add_object(struct drm_file *file,
493                             struct drm_i915_gem_object *obj)
494 {
495         int err;
496
497         GEM_BUG_ON(obj->base.handle_count);
498
499         /* tie the object to the drm_file for easy reaping */
500         err = idr_alloc(&file->object_idr, &obj->base, 1, 0, GFP_KERNEL);
501         if (err < 0)
502                 return  err;
503
504         i915_gem_object_get(obj);
505         obj->base.handle_count++;
506         return 0;
507 }
508
509 static struct drm_i915_gem_object *
510 create_test_object(struct i915_gem_context *ctx,
511                    struct drm_file *file,
512                    struct list_head *objects)
513 {
514         struct drm_i915_gem_object *obj;
515         struct i915_address_space *vm =
516                 ctx->ppgtt ? &ctx->ppgtt->vm : &ctx->i915->ggtt.vm;
517         u64 size;
518         int err;
519
520         size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE);
521         size = round_down(size, DW_PER_PAGE * PAGE_SIZE);
522
523         obj = huge_gem_object(ctx->i915, DW_PER_PAGE * PAGE_SIZE, size);
524         if (IS_ERR(obj))
525                 return obj;
526
527         err = file_add_object(file, obj);
528         i915_gem_object_put(obj);
529         if (err)
530                 return ERR_PTR(err);
531
532         err = cpu_fill(obj, STACK_MAGIC);
533         if (err) {
534                 pr_err("Failed to fill object with cpu, err=%d\n",
535                        err);
536                 return ERR_PTR(err);
537         }
538
539         list_add_tail(&obj->st_link, objects);
540         return obj;
541 }
542
543 static unsigned long max_dwords(struct drm_i915_gem_object *obj)
544 {
545         unsigned long npages = fake_page_count(obj);
546
547         GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE));
548         return npages / DW_PER_PAGE;
549 }
550
551 static int igt_ctx_exec(void *arg)
552 {
553         struct drm_i915_private *i915 = arg;
554         struct drm_i915_gem_object *obj = NULL;
555         unsigned long ncontexts, ndwords, dw;
556         struct drm_file *file;
557         IGT_TIMEOUT(end_time);
558         LIST_HEAD(objects);
559         struct live_test t;
560         int err = -ENODEV;
561
562         /*
563          * Create a few different contexts (with different mm) and write
564          * through each ctx/mm using the GPU making sure those writes end
565          * up in the expected pages of our obj.
566          */
567
568         if (!DRIVER_CAPS(i915)->has_logical_contexts)
569                 return 0;
570
571         file = mock_file(i915);
572         if (IS_ERR(file))
573                 return PTR_ERR(file);
574
575         mutex_lock(&i915->drm.struct_mutex);
576
577         err = begin_live_test(&t, i915, __func__, "");
578         if (err)
579                 goto out_unlock;
580
581         ncontexts = 0;
582         ndwords = 0;
583         dw = 0;
584         while (!time_after(jiffies, end_time)) {
585                 struct intel_engine_cs *engine;
586                 struct i915_gem_context *ctx;
587                 unsigned int id;
588
589                 ctx = i915_gem_create_context(i915, file->driver_priv);
590                 if (IS_ERR(ctx)) {
591                         err = PTR_ERR(ctx);
592                         goto out_unlock;
593                 }
594
595                 for_each_engine(engine, i915, id) {
596                         if (!engine->context_size)
597                                 continue; /* No logical context support in HW */
598
599                         if (!intel_engine_can_store_dword(engine))
600                                 continue;
601
602                         if (!obj) {
603                                 obj = create_test_object(ctx, file, &objects);
604                                 if (IS_ERR(obj)) {
605                                         err = PTR_ERR(obj);
606                                         goto out_unlock;
607                                 }
608                         }
609
610                         intel_runtime_pm_get(i915);
611                         err = gpu_fill(obj, ctx, engine, dw);
612                         intel_runtime_pm_put(i915);
613                         if (err) {
614                                 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
615                                        ndwords, dw, max_dwords(obj),
616                                        engine->name, ctx->hw_id,
617                                        yesno(!!ctx->ppgtt), err);
618                                 goto out_unlock;
619                         }
620
621                         if (++dw == max_dwords(obj)) {
622                                 obj = NULL;
623                                 dw = 0;
624                         }
625                         ndwords++;
626                 }
627                 ncontexts++;
628         }
629         pr_info("Submitted %lu contexts (across %u engines), filling %lu dwords\n",
630                 ncontexts, INTEL_INFO(i915)->num_rings, ndwords);
631
632         dw = 0;
633         list_for_each_entry(obj, &objects, st_link) {
634                 unsigned int rem =
635                         min_t(unsigned int, ndwords - dw, max_dwords(obj));
636
637                 err = cpu_check(obj, rem);
638                 if (err)
639                         break;
640
641                 dw += rem;
642         }
643
644 out_unlock:
645         if (end_live_test(&t))
646                 err = -EIO;
647         mutex_unlock(&i915->drm.struct_mutex);
648
649         mock_file_free(i915, file);
650         return err;
651 }
652
653 static int igt_ctx_readonly(void *arg)
654 {
655         struct drm_i915_private *i915 = arg;
656         struct drm_i915_gem_object *obj = NULL;
657         struct i915_gem_context *ctx;
658         struct i915_hw_ppgtt *ppgtt;
659         unsigned long ndwords, dw;
660         struct drm_file *file;
661         I915_RND_STATE(prng);
662         IGT_TIMEOUT(end_time);
663         LIST_HEAD(objects);
664         struct live_test t;
665         int err = -ENODEV;
666
667         /*
668          * Create a few read-only objects (with the occasional writable object)
669          * and try to write into these object checking that the GPU discards
670          * any write to a read-only object.
671          */
672
673         file = mock_file(i915);
674         if (IS_ERR(file))
675                 return PTR_ERR(file);
676
677         mutex_lock(&i915->drm.struct_mutex);
678
679         err = begin_live_test(&t, i915, __func__, "");
680         if (err)
681                 goto out_unlock;
682
683         ctx = i915_gem_create_context(i915, file->driver_priv);
684         if (IS_ERR(ctx)) {
685                 err = PTR_ERR(ctx);
686                 goto out_unlock;
687         }
688
689         ppgtt = ctx->ppgtt ?: i915->mm.aliasing_ppgtt;
690         if (!ppgtt || !ppgtt->vm.has_read_only) {
691                 err = 0;
692                 goto out_unlock;
693         }
694
695         ndwords = 0;
696         dw = 0;
697         while (!time_after(jiffies, end_time)) {
698                 struct intel_engine_cs *engine;
699                 unsigned int id;
700
701                 for_each_engine(engine, i915, id) {
702                         if (!intel_engine_can_store_dword(engine))
703                                 continue;
704
705                         if (!obj) {
706                                 obj = create_test_object(ctx, file, &objects);
707                                 if (IS_ERR(obj)) {
708                                         err = PTR_ERR(obj);
709                                         goto out_unlock;
710                                 }
711
712                                 if (prandom_u32_state(&prng) & 1)
713                                         i915_gem_object_set_readonly(obj);
714                         }
715
716                         intel_runtime_pm_get(i915);
717                         err = gpu_fill(obj, ctx, engine, dw);
718                         intel_runtime_pm_put(i915);
719                         if (err) {
720                                 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
721                                        ndwords, dw, max_dwords(obj),
722                                        engine->name, ctx->hw_id,
723                                        yesno(!!ctx->ppgtt), err);
724                                 goto out_unlock;
725                         }
726
727                         if (++dw == max_dwords(obj)) {
728                                 obj = NULL;
729                                 dw = 0;
730                         }
731                         ndwords++;
732                 }
733         }
734         pr_info("Submitted %lu dwords (across %u engines)\n",
735                 ndwords, INTEL_INFO(i915)->num_rings);
736
737         dw = 0;
738         list_for_each_entry(obj, &objects, st_link) {
739                 unsigned int rem =
740                         min_t(unsigned int, ndwords - dw, max_dwords(obj));
741                 unsigned int num_writes;
742
743                 num_writes = rem;
744                 if (i915_gem_object_is_readonly(obj))
745                         num_writes = 0;
746
747                 err = cpu_check(obj, num_writes);
748                 if (err)
749                         break;
750
751                 dw += rem;
752         }
753
754 out_unlock:
755         if (end_live_test(&t))
756                 err = -EIO;
757         mutex_unlock(&i915->drm.struct_mutex);
758
759         mock_file_free(i915, file);
760         return err;
761 }
762
763 static int check_scratch(struct i915_gem_context *ctx, u64 offset)
764 {
765         struct drm_mm_node *node =
766                 __drm_mm_interval_first(&ctx->ppgtt->vm.mm,
767                                         offset, offset + sizeof(u32) - 1);
768         if (!node || node->start > offset)
769                 return 0;
770
771         GEM_BUG_ON(offset >= node->start + node->size);
772
773         pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n",
774                upper_32_bits(offset), lower_32_bits(offset));
775         return -EINVAL;
776 }
777
778 static int write_to_scratch(struct i915_gem_context *ctx,
779                             struct intel_engine_cs *engine,
780                             u64 offset, u32 value)
781 {
782         struct drm_i915_private *i915 = ctx->i915;
783         struct drm_i915_gem_object *obj;
784         struct i915_request *rq;
785         struct i915_vma *vma;
786         u32 *cmd;
787         int err;
788
789         GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE);
790
791         obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
792         if (IS_ERR(obj))
793                 return PTR_ERR(obj);
794
795         cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
796         if (IS_ERR(cmd)) {
797                 err = PTR_ERR(cmd);
798                 goto err;
799         }
800
801         *cmd++ = MI_STORE_DWORD_IMM_GEN4;
802         if (INTEL_GEN(i915) >= 8) {
803                 *cmd++ = lower_32_bits(offset);
804                 *cmd++ = upper_32_bits(offset);
805         } else {
806                 *cmd++ = 0;
807                 *cmd++ = offset;
808         }
809         *cmd++ = value;
810         *cmd = MI_BATCH_BUFFER_END;
811         i915_gem_object_unpin_map(obj);
812
813         err = i915_gem_object_set_to_gtt_domain(obj, false);
814         if (err)
815                 goto err;
816
817         vma = i915_vma_instance(obj, &ctx->ppgtt->vm, NULL);
818         if (IS_ERR(vma)) {
819                 err = PTR_ERR(vma);
820                 goto err;
821         }
822
823         err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
824         if (err)
825                 goto err;
826
827         err = check_scratch(ctx, offset);
828         if (err)
829                 goto err_unpin;
830
831         rq = i915_request_alloc(engine, ctx);
832         if (IS_ERR(rq)) {
833                 err = PTR_ERR(rq);
834                 goto err_unpin;
835         }
836
837         err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0);
838         if (err)
839                 goto err_request;
840
841         err = i915_vma_move_to_active(vma, rq, 0);
842         if (err)
843                 goto skip_request;
844
845         i915_gem_object_set_active_reference(obj);
846         i915_vma_unpin(vma);
847         i915_vma_close(vma);
848
849         i915_request_add(rq);
850
851         return 0;
852
853 skip_request:
854         i915_request_skip(rq, err);
855 err_request:
856         i915_request_add(rq);
857 err_unpin:
858         i915_vma_unpin(vma);
859 err:
860         i915_gem_object_put(obj);
861         return err;
862 }
863
864 static int read_from_scratch(struct i915_gem_context *ctx,
865                              struct intel_engine_cs *engine,
866                              u64 offset, u32 *value)
867 {
868         struct drm_i915_private *i915 = ctx->i915;
869         struct drm_i915_gem_object *obj;
870         const u32 RCS_GPR0 = 0x2600; /* not all engines have their own GPR! */
871         const u32 result = 0x100;
872         struct i915_request *rq;
873         struct i915_vma *vma;
874         u32 *cmd;
875         int err;
876
877         GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE);
878
879         obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
880         if (IS_ERR(obj))
881                 return PTR_ERR(obj);
882
883         cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
884         if (IS_ERR(cmd)) {
885                 err = PTR_ERR(cmd);
886                 goto err;
887         }
888
889         memset(cmd, POISON_INUSE, PAGE_SIZE);
890         if (INTEL_GEN(i915) >= 8) {
891                 *cmd++ = MI_LOAD_REGISTER_MEM_GEN8;
892                 *cmd++ = RCS_GPR0;
893                 *cmd++ = lower_32_bits(offset);
894                 *cmd++ = upper_32_bits(offset);
895                 *cmd++ = MI_STORE_REGISTER_MEM_GEN8;
896                 *cmd++ = RCS_GPR0;
897                 *cmd++ = result;
898                 *cmd++ = 0;
899         } else {
900                 *cmd++ = MI_LOAD_REGISTER_MEM;
901                 *cmd++ = RCS_GPR0;
902                 *cmd++ = offset;
903                 *cmd++ = MI_STORE_REGISTER_MEM;
904                 *cmd++ = RCS_GPR0;
905                 *cmd++ = result;
906         }
907         *cmd = MI_BATCH_BUFFER_END;
908         i915_gem_object_unpin_map(obj);
909
910         err = i915_gem_object_set_to_gtt_domain(obj, false);
911         if (err)
912                 goto err;
913
914         vma = i915_vma_instance(obj, &ctx->ppgtt->vm, NULL);
915         if (IS_ERR(vma)) {
916                 err = PTR_ERR(vma);
917                 goto err;
918         }
919
920         err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
921         if (err)
922                 goto err;
923
924         err = check_scratch(ctx, offset);
925         if (err)
926                 goto err_unpin;
927
928         rq = i915_request_alloc(engine, ctx);
929         if (IS_ERR(rq)) {
930                 err = PTR_ERR(rq);
931                 goto err_unpin;
932         }
933
934         err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0);
935         if (err)
936                 goto err_request;
937
938         err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
939         if (err)
940                 goto skip_request;
941
942         i915_vma_unpin(vma);
943         i915_vma_close(vma);
944
945         i915_request_add(rq);
946
947         err = i915_gem_object_set_to_cpu_domain(obj, false);
948         if (err)
949                 goto err;
950
951         cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
952         if (IS_ERR(cmd)) {
953                 err = PTR_ERR(cmd);
954                 goto err;
955         }
956
957         *value = cmd[result / sizeof(*cmd)];
958         i915_gem_object_unpin_map(obj);
959         i915_gem_object_put(obj);
960
961         return 0;
962
963 skip_request:
964         i915_request_skip(rq, err);
965 err_request:
966         i915_request_add(rq);
967 err_unpin:
968         i915_vma_unpin(vma);
969 err:
970         i915_gem_object_put(obj);
971         return err;
972 }
973
974 static int igt_vm_isolation(void *arg)
975 {
976         struct drm_i915_private *i915 = arg;
977         struct i915_gem_context *ctx_a, *ctx_b;
978         struct intel_engine_cs *engine;
979         struct drm_file *file;
980         I915_RND_STATE(prng);
981         unsigned long count;
982         struct live_test t;
983         unsigned int id;
984         u64 vm_total;
985         int err;
986
987         if (INTEL_GEN(i915) < 7)
988                 return 0;
989
990         /*
991          * The simple goal here is that a write into one context is not
992          * observed in a second (separate page tables and scratch).
993          */
994
995         file = mock_file(i915);
996         if (IS_ERR(file))
997                 return PTR_ERR(file);
998
999         mutex_lock(&i915->drm.struct_mutex);
1000
1001         err = begin_live_test(&t, i915, __func__, "");
1002         if (err)
1003                 goto out_unlock;
1004
1005         ctx_a = i915_gem_create_context(i915, file->driver_priv);
1006         if (IS_ERR(ctx_a)) {
1007                 err = PTR_ERR(ctx_a);
1008                 goto out_unlock;
1009         }
1010
1011         ctx_b = i915_gem_create_context(i915, file->driver_priv);
1012         if (IS_ERR(ctx_b)) {
1013                 err = PTR_ERR(ctx_b);
1014                 goto out_unlock;
1015         }
1016
1017         /* We can only test vm isolation, if the vm are distinct */
1018         if (ctx_a->ppgtt == ctx_b->ppgtt)
1019                 goto out_unlock;
1020
1021         vm_total = ctx_a->ppgtt->vm.total;
1022         GEM_BUG_ON(ctx_b->ppgtt->vm.total != vm_total);
1023         vm_total -= I915_GTT_PAGE_SIZE;
1024
1025         intel_runtime_pm_get(i915);
1026
1027         count = 0;
1028         for_each_engine(engine, i915, id) {
1029                 IGT_TIMEOUT(end_time);
1030                 unsigned long this = 0;
1031
1032                 if (!intel_engine_can_store_dword(engine))
1033                         continue;
1034
1035                 while (!__igt_timeout(end_time, NULL)) {
1036                         u32 value = 0xc5c5c5c5;
1037                         u64 offset;
1038
1039                         div64_u64_rem(i915_prandom_u64_state(&prng),
1040                                       vm_total, &offset);
1041                         offset &= ~sizeof(u32);
1042                         offset += I915_GTT_PAGE_SIZE;
1043
1044                         err = write_to_scratch(ctx_a, engine,
1045                                                offset, 0xdeadbeef);
1046                         if (err == 0)
1047                                 err = read_from_scratch(ctx_b, engine,
1048                                                         offset, &value);
1049                         if (err)
1050                                 goto out_rpm;
1051
1052                         if (value) {
1053                                 pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n",
1054                                        engine->name, value,
1055                                        upper_32_bits(offset),
1056                                        lower_32_bits(offset),
1057                                        this);
1058                                 err = -EINVAL;
1059                                 goto out_rpm;
1060                         }
1061
1062                         this++;
1063                 }
1064                 count += this;
1065         }
1066         pr_info("Checked %lu scratch offsets across %d engines\n",
1067                 count, INTEL_INFO(i915)->num_rings);
1068
1069 out_rpm:
1070         intel_runtime_pm_put(i915);
1071 out_unlock:
1072         if (end_live_test(&t))
1073                 err = -EIO;
1074         mutex_unlock(&i915->drm.struct_mutex);
1075
1076         mock_file_free(i915, file);
1077         return err;
1078 }
1079
1080 static __maybe_unused const char *
1081 __engine_name(struct drm_i915_private *i915, unsigned int engines)
1082 {
1083         struct intel_engine_cs *engine;
1084         unsigned int tmp;
1085
1086         if (engines == ALL_ENGINES)
1087                 return "all";
1088
1089         for_each_engine_masked(engine, i915, engines, tmp)
1090                 return engine->name;
1091
1092         return "none";
1093 }
1094
1095 static int __igt_switch_to_kernel_context(struct drm_i915_private *i915,
1096                                           struct i915_gem_context *ctx,
1097                                           unsigned int engines)
1098 {
1099         struct intel_engine_cs *engine;
1100         unsigned int tmp;
1101         int err;
1102
1103         GEM_TRACE("Testing %s\n", __engine_name(i915, engines));
1104         for_each_engine_masked(engine, i915, engines, tmp) {
1105                 struct i915_request *rq;
1106
1107                 rq = i915_request_alloc(engine, ctx);
1108                 if (IS_ERR(rq))
1109                         return PTR_ERR(rq);
1110
1111                 i915_request_add(rq);
1112         }
1113
1114         err = i915_gem_switch_to_kernel_context(i915);
1115         if (err)
1116                 return err;
1117
1118         for_each_engine_masked(engine, i915, engines, tmp) {
1119                 if (!engine_has_kernel_context_barrier(engine)) {
1120                         pr_err("kernel context not last on engine %s!\n",
1121                                engine->name);
1122                         return -EINVAL;
1123                 }
1124         }
1125
1126         err = i915_gem_wait_for_idle(i915,
1127                                      I915_WAIT_LOCKED,
1128                                      MAX_SCHEDULE_TIMEOUT);
1129         if (err)
1130                 return err;
1131
1132         GEM_BUG_ON(i915->gt.active_requests);
1133         for_each_engine_masked(engine, i915, engines, tmp) {
1134                 if (engine->last_retired_context->gem_context != i915->kernel_context) {
1135                         pr_err("engine %s not idling in kernel context!\n",
1136                                engine->name);
1137                         return -EINVAL;
1138                 }
1139         }
1140
1141         err = i915_gem_switch_to_kernel_context(i915);
1142         if (err)
1143                 return err;
1144
1145         if (i915->gt.active_requests) {
1146                 pr_err("switch-to-kernel-context emitted %d requests even though it should already be idling in the kernel context\n",
1147                        i915->gt.active_requests);
1148                 return -EINVAL;
1149         }
1150
1151         for_each_engine_masked(engine, i915, engines, tmp) {
1152                 if (!intel_engine_has_kernel_context(engine)) {
1153                         pr_err("kernel context not last on engine %s!\n",
1154                                engine->name);
1155                         return -EINVAL;
1156                 }
1157         }
1158
1159         return 0;
1160 }
1161
1162 static int igt_switch_to_kernel_context(void *arg)
1163 {
1164         struct drm_i915_private *i915 = arg;
1165         struct intel_engine_cs *engine;
1166         struct i915_gem_context *ctx;
1167         enum intel_engine_id id;
1168         int err;
1169
1170         /*
1171          * A core premise of switching to the kernel context is that
1172          * if an engine is already idling in the kernel context, we
1173          * do not emit another request and wake it up. The other being
1174          * that we do indeed end up idling in the kernel context.
1175          */
1176
1177         mutex_lock(&i915->drm.struct_mutex);
1178         intel_runtime_pm_get(i915);
1179
1180         ctx = kernel_context(i915);
1181         if (IS_ERR(ctx)) {
1182                 mutex_unlock(&i915->drm.struct_mutex);
1183                 return PTR_ERR(ctx);
1184         }
1185
1186         /* First check idling each individual engine */
1187         for_each_engine(engine, i915, id) {
1188                 err = __igt_switch_to_kernel_context(i915, ctx, BIT(id));
1189                 if (err)
1190                         goto out_unlock;
1191         }
1192
1193         /* Now en masse */
1194         err = __igt_switch_to_kernel_context(i915, ctx, ALL_ENGINES);
1195         if (err)
1196                 goto out_unlock;
1197
1198 out_unlock:
1199         GEM_TRACE_DUMP_ON(err);
1200         if (igt_flush_test(i915, I915_WAIT_LOCKED))
1201                 err = -EIO;
1202
1203         intel_runtime_pm_put(i915);
1204         mutex_unlock(&i915->drm.struct_mutex);
1205
1206         kernel_context_close(ctx);
1207         return err;
1208 }
1209
1210 int i915_gem_context_mock_selftests(void)
1211 {
1212         static const struct i915_subtest tests[] = {
1213                 SUBTEST(igt_switch_to_kernel_context),
1214         };
1215         struct drm_i915_private *i915;
1216         int err;
1217
1218         i915 = mock_gem_device();
1219         if (!i915)
1220                 return -ENOMEM;
1221
1222         err = i915_subtests(tests, i915);
1223
1224         drm_dev_put(&i915->drm);
1225         return err;
1226 }
1227
1228 int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
1229 {
1230         static const struct i915_subtest tests[] = {
1231                 SUBTEST(igt_switch_to_kernel_context),
1232                 SUBTEST(live_nop_switch),
1233                 SUBTEST(igt_ctx_exec),
1234                 SUBTEST(igt_ctx_readonly),
1235                 SUBTEST(igt_vm_isolation),
1236         };
1237
1238         if (i915_terminally_wedged(&dev_priv->gpu_error))
1239                 return 0;
1240
1241         return i915_subtests(tests, dev_priv);
1242 }