2 * Copyright © 2017 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "../i915_selftest.h"
27 #include <linux/prime_numbers.h>
29 #include "igt_gem_utils.h"
31 #include "i915_random.h"
33 static const unsigned int page_sizes[] = {
34 I915_GTT_PAGE_SIZE_2M,
35 I915_GTT_PAGE_SIZE_64K,
36 I915_GTT_PAGE_SIZE_4K,
39 static unsigned int get_largest_page_size(struct drm_i915_private *i915,
44 for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) {
45 unsigned int page_size = page_sizes[i];
47 if (HAS_PAGE_SIZES(i915, page_size) && rem >= page_size)
54 static void huge_pages_free_pages(struct sg_table *st)
56 struct scatterlist *sg;
58 for (sg = st->sgl; sg; sg = __sg_next(sg)) {
60 __free_pages(sg_page(sg), get_order(sg->length));
67 static int get_huge_pages(struct drm_i915_gem_object *obj)
69 #define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY)
70 unsigned int page_mask = obj->mm.page_mask;
72 struct scatterlist *sg;
73 unsigned int sg_page_sizes;
76 st = kmalloc(sizeof(*st), GFP);
80 if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) {
91 * Our goal here is simple, we want to greedily fill the object from
92 * largest to smallest page-size, while ensuring that we use *every*
93 * page-size as per the given page-mask.
96 unsigned int bit = ilog2(page_mask);
97 unsigned int page_size = BIT(bit);
98 int order = get_order(page_size);
103 GEM_BUG_ON(order >= MAX_ORDER);
104 page = alloc_pages(GFP | __GFP_ZERO, order);
108 sg_set_page(sg, page, page_size, 0);
109 sg_page_sizes |= page_size;
119 } while ((rem - ((page_size-1) & page_mask)) >= page_size);
121 page_mask &= (page_size-1);
124 if (i915_gem_gtt_prepare_pages(obj, st))
127 obj->mm.madv = I915_MADV_DONTNEED;
129 GEM_BUG_ON(sg_page_sizes != obj->mm.page_mask);
130 __i915_gem_object_set_pages(obj, st, sg_page_sizes);
135 sg_set_page(sg, NULL, 0, 0);
137 huge_pages_free_pages(st);
142 static void put_huge_pages(struct drm_i915_gem_object *obj,
143 struct sg_table *pages)
145 i915_gem_gtt_finish_pages(obj, pages);
146 huge_pages_free_pages(pages);
148 obj->mm.dirty = false;
149 obj->mm.madv = I915_MADV_WILLNEED;
152 static const struct drm_i915_gem_object_ops huge_page_ops = {
153 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
154 I915_GEM_OBJECT_IS_SHRINKABLE,
155 .get_pages = get_huge_pages,
156 .put_pages = put_huge_pages,
159 static struct drm_i915_gem_object *
160 huge_pages_object(struct drm_i915_private *i915,
162 unsigned int page_mask)
164 struct drm_i915_gem_object *obj;
167 GEM_BUG_ON(!IS_ALIGNED(size, BIT(__ffs(page_mask))));
169 if (size >> PAGE_SHIFT > INT_MAX)
170 return ERR_PTR(-E2BIG);
172 if (overflows_type(size, obj->base.size))
173 return ERR_PTR(-E2BIG);
175 obj = i915_gem_object_alloc();
177 return ERR_PTR(-ENOMEM);
179 drm_gem_private_object_init(&i915->drm, &obj->base, size);
180 i915_gem_object_init(obj, &huge_page_ops);
182 obj->write_domain = I915_GEM_DOMAIN_CPU;
183 obj->read_domains = I915_GEM_DOMAIN_CPU;
184 obj->cache_level = I915_CACHE_NONE;
186 obj->mm.page_mask = page_mask;
191 static int fake_get_huge_pages(struct drm_i915_gem_object *obj)
193 struct drm_i915_private *i915 = to_i915(obj->base.dev);
194 const u64 max_len = rounddown_pow_of_two(UINT_MAX);
196 struct scatterlist *sg;
197 unsigned int sg_page_sizes;
200 st = kmalloc(sizeof(*st), GFP);
204 if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) {
209 /* Use optimal page sized chunks to fill in the sg table */
210 rem = obj->base.size;
215 unsigned int page_size = get_largest_page_size(i915, rem);
216 unsigned int len = min(page_size * div_u64(rem, page_size),
219 GEM_BUG_ON(!page_size);
223 sg_dma_len(sg) = len;
224 sg_dma_address(sg) = page_size;
226 sg_page_sizes |= len;
241 obj->mm.madv = I915_MADV_DONTNEED;
243 __i915_gem_object_set_pages(obj, st, sg_page_sizes);
248 static int fake_get_huge_pages_single(struct drm_i915_gem_object *obj)
250 struct drm_i915_private *i915 = to_i915(obj->base.dev);
252 struct scatterlist *sg;
253 unsigned int page_size;
255 st = kmalloc(sizeof(*st), GFP);
259 if (sg_alloc_table(st, 1, GFP)) {
267 page_size = get_largest_page_size(i915, obj->base.size);
268 GEM_BUG_ON(!page_size);
271 sg->length = obj->base.size;
272 sg_dma_len(sg) = obj->base.size;
273 sg_dma_address(sg) = page_size;
275 obj->mm.madv = I915_MADV_DONTNEED;
277 __i915_gem_object_set_pages(obj, st, sg->length);
283 static void fake_free_huge_pages(struct drm_i915_gem_object *obj,
284 struct sg_table *pages)
286 sg_free_table(pages);
290 static void fake_put_huge_pages(struct drm_i915_gem_object *obj,
291 struct sg_table *pages)
293 fake_free_huge_pages(obj, pages);
294 obj->mm.dirty = false;
295 obj->mm.madv = I915_MADV_WILLNEED;
298 static const struct drm_i915_gem_object_ops fake_ops = {
299 .flags = I915_GEM_OBJECT_IS_SHRINKABLE,
300 .get_pages = fake_get_huge_pages,
301 .put_pages = fake_put_huge_pages,
304 static const struct drm_i915_gem_object_ops fake_ops_single = {
305 .flags = I915_GEM_OBJECT_IS_SHRINKABLE,
306 .get_pages = fake_get_huge_pages_single,
307 .put_pages = fake_put_huge_pages,
310 static struct drm_i915_gem_object *
311 fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single)
313 struct drm_i915_gem_object *obj;
316 GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
318 if (size >> PAGE_SHIFT > UINT_MAX)
319 return ERR_PTR(-E2BIG);
321 if (overflows_type(size, obj->base.size))
322 return ERR_PTR(-E2BIG);
324 obj = i915_gem_object_alloc();
326 return ERR_PTR(-ENOMEM);
328 drm_gem_private_object_init(&i915->drm, &obj->base, size);
331 i915_gem_object_init(obj, &fake_ops_single);
333 i915_gem_object_init(obj, &fake_ops);
335 obj->write_domain = I915_GEM_DOMAIN_CPU;
336 obj->read_domains = I915_GEM_DOMAIN_CPU;
337 obj->cache_level = I915_CACHE_NONE;
342 static int igt_check_page_sizes(struct i915_vma *vma)
344 struct drm_i915_private *i915 = vma->vm->i915;
345 unsigned int supported = INTEL_INFO(i915)->page_sizes;
346 struct drm_i915_gem_object *obj = vma->obj;
349 if (!HAS_PAGE_SIZES(i915, vma->page_sizes.sg)) {
350 pr_err("unsupported page_sizes.sg=%u, supported=%u\n",
351 vma->page_sizes.sg & ~supported, supported);
355 if (!HAS_PAGE_SIZES(i915, vma->page_sizes.gtt)) {
356 pr_err("unsupported page_sizes.gtt=%u, supported=%u\n",
357 vma->page_sizes.gtt & ~supported, supported);
361 if (vma->page_sizes.phys != obj->mm.page_sizes.phys) {
362 pr_err("vma->page_sizes.phys(%u) != obj->mm.page_sizes.phys(%u)\n",
363 vma->page_sizes.phys, obj->mm.page_sizes.phys);
367 if (vma->page_sizes.sg != obj->mm.page_sizes.sg) {
368 pr_err("vma->page_sizes.sg(%u) != obj->mm.page_sizes.sg(%u)\n",
369 vma->page_sizes.sg, obj->mm.page_sizes.sg);
373 if (obj->mm.page_sizes.gtt) {
374 pr_err("obj->page_sizes.gtt(%u) should never be set\n",
375 obj->mm.page_sizes.gtt);
382 static int igt_mock_exhaust_device_supported_pages(void *arg)
384 struct i915_hw_ppgtt *ppgtt = arg;
385 struct drm_i915_private *i915 = ppgtt->vm.i915;
386 unsigned int saved_mask = INTEL_INFO(i915)->page_sizes;
387 struct drm_i915_gem_object *obj;
388 struct i915_vma *vma;
393 * Sanity check creating objects with every valid page support
394 * combination for our mock device.
397 for (i = 1; i < BIT(ARRAY_SIZE(page_sizes)); i++) {
398 unsigned int combination = 0;
400 for (j = 0; j < ARRAY_SIZE(page_sizes); j++) {
402 combination |= page_sizes[j];
405 mkwrite_device_info(i915)->page_sizes = combination;
407 for (single = 0; single <= 1; ++single) {
408 obj = fake_huge_pages_object(i915, combination, !!single);
414 if (obj->base.size != combination) {
415 pr_err("obj->base.size=%zu, expected=%u\n",
416 obj->base.size, combination);
421 vma = i915_vma_instance(obj, &ppgtt->vm, NULL);
427 err = i915_vma_pin(vma, 0, 0, PIN_USER);
431 err = igt_check_page_sizes(vma);
433 if (vma->page_sizes.sg != combination) {
434 pr_err("page_sizes.sg=%u, expected=%u\n",
435 vma->page_sizes.sg, combination);
442 i915_gem_object_put(obj);
454 i915_gem_object_put(obj);
456 mkwrite_device_info(i915)->page_sizes = saved_mask;
461 static int igt_mock_ppgtt_misaligned_dma(void *arg)
463 struct i915_hw_ppgtt *ppgtt = arg;
464 struct drm_i915_private *i915 = ppgtt->vm.i915;
465 unsigned long supported = INTEL_INFO(i915)->page_sizes;
466 struct drm_i915_gem_object *obj;
471 * Sanity check dma misalignment for huge pages -- the dma addresses we
472 * insert into the paging structures need to always respect the page
476 bit = ilog2(I915_GTT_PAGE_SIZE_64K);
478 for_each_set_bit_from(bit, &supported,
479 ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
480 IGT_TIMEOUT(end_time);
481 unsigned int page_size = BIT(bit);
482 unsigned int flags = PIN_USER | PIN_OFFSET_FIXED;
485 round_up(page_size, I915_GTT_PAGE_SIZE_2M) << 1;
486 struct i915_vma *vma;
488 obj = fake_huge_pages_object(i915, size, true);
492 if (obj->base.size != size) {
493 pr_err("obj->base.size=%zu, expected=%u\n",
494 obj->base.size, size);
499 err = i915_gem_object_pin_pages(obj);
503 /* Force the page size for this object */
504 obj->mm.page_sizes.sg = page_size;
506 vma = i915_vma_instance(obj, &ppgtt->vm, NULL);
512 err = i915_vma_pin(vma, 0, 0, flags);
519 err = igt_check_page_sizes(vma);
521 if (vma->page_sizes.gtt != page_size) {
522 pr_err("page_sizes.gtt=%u, expected %u\n",
523 vma->page_sizes.gtt, page_size);
535 * Try all the other valid offsets until the next
536 * boundary -- should always fall back to using 4K
539 for (offset = 4096; offset < page_size; offset += 4096) {
540 err = i915_vma_unbind(vma);
546 err = i915_vma_pin(vma, 0, 0, flags | offset);
552 err = igt_check_page_sizes(vma);
554 if (vma->page_sizes.gtt != I915_GTT_PAGE_SIZE_4K) {
555 pr_err("page_sizes.gtt=%u, expected %llu\n",
556 vma->page_sizes.gtt, I915_GTT_PAGE_SIZE_4K);
567 if (igt_timeout(end_time,
568 "%s timed out at offset %x with page-size %x\n",
569 __func__, offset, page_size))
575 i915_gem_object_unpin_pages(obj);
576 __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
577 i915_gem_object_put(obj);
583 i915_gem_object_unpin_pages(obj);
585 i915_gem_object_put(obj);
590 static void close_object_list(struct list_head *objects,
591 struct i915_hw_ppgtt *ppgtt)
593 struct drm_i915_gem_object *obj, *on;
595 list_for_each_entry_safe(obj, on, objects, st_link) {
596 struct i915_vma *vma;
598 vma = i915_vma_instance(obj, &ppgtt->vm, NULL);
602 list_del(&obj->st_link);
603 i915_gem_object_unpin_pages(obj);
604 __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
605 i915_gem_object_put(obj);
609 static int igt_mock_ppgtt_huge_fill(void *arg)
611 struct i915_hw_ppgtt *ppgtt = arg;
612 struct drm_i915_private *i915 = ppgtt->vm.i915;
613 unsigned long max_pages = ppgtt->vm.total >> PAGE_SHIFT;
614 unsigned long page_num;
617 IGT_TIMEOUT(end_time);
620 for_each_prime_number_from(page_num, 1, max_pages) {
621 struct drm_i915_gem_object *obj;
622 u64 size = page_num << PAGE_SHIFT;
623 struct i915_vma *vma;
624 unsigned int expected_gtt = 0;
627 obj = fake_huge_pages_object(i915, size, single);
633 if (obj->base.size != size) {
634 pr_err("obj->base.size=%zd, expected=%llu\n",
635 obj->base.size, size);
636 i915_gem_object_put(obj);
641 err = i915_gem_object_pin_pages(obj);
643 i915_gem_object_put(obj);
647 list_add(&obj->st_link, &objects);
649 vma = i915_vma_instance(obj, &ppgtt->vm, NULL);
655 err = i915_vma_pin(vma, 0, 0, PIN_USER);
659 err = igt_check_page_sizes(vma);
666 * Figure out the expected gtt page size knowing that we go from
667 * largest to smallest page size sg chunks, and that we align to
668 * the largest page size.
670 for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) {
671 unsigned int page_size = page_sizes[i];
673 if (HAS_PAGE_SIZES(i915, page_size) &&
675 expected_gtt |= page_size;
680 GEM_BUG_ON(!expected_gtt);
683 if (expected_gtt & I915_GTT_PAGE_SIZE_4K)
684 expected_gtt &= ~I915_GTT_PAGE_SIZE_64K;
688 if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) {
689 if (!IS_ALIGNED(vma->node.start,
690 I915_GTT_PAGE_SIZE_2M)) {
691 pr_err("node.start(%llx) not aligned to 2M\n",
697 if (!IS_ALIGNED(vma->node.size,
698 I915_GTT_PAGE_SIZE_2M)) {
699 pr_err("node.size(%llx) not aligned to 2M\n",
706 if (vma->page_sizes.gtt != expected_gtt) {
707 pr_err("gtt=%u, expected=%u, size=%zd, single=%s\n",
708 vma->page_sizes.gtt, expected_gtt,
709 obj->base.size, yesno(!!single));
714 if (igt_timeout(end_time,
715 "%s timed out at size %zd\n",
716 __func__, obj->base.size))
722 close_object_list(&objects, ppgtt);
724 if (err == -ENOMEM || err == -ENOSPC)
730 static int igt_mock_ppgtt_64K(void *arg)
732 struct i915_hw_ppgtt *ppgtt = arg;
733 struct drm_i915_private *i915 = ppgtt->vm.i915;
734 struct drm_i915_gem_object *obj;
735 const struct object_info {
740 /* Cases with forced padding/alignment */
743 .gtt = I915_GTT_PAGE_SIZE_64K,
747 .size = SZ_64K + SZ_4K,
748 .gtt = I915_GTT_PAGE_SIZE_4K,
752 .size = SZ_64K - SZ_4K,
753 .gtt = I915_GTT_PAGE_SIZE_4K,
758 .gtt = I915_GTT_PAGE_SIZE_64K,
762 .size = SZ_2M - SZ_4K,
763 .gtt = I915_GTT_PAGE_SIZE_4K,
767 .size = SZ_2M + SZ_4K,
768 .gtt = I915_GTT_PAGE_SIZE_64K | I915_GTT_PAGE_SIZE_4K,
772 .size = SZ_2M + SZ_64K,
773 .gtt = I915_GTT_PAGE_SIZE_64K,
777 .size = SZ_2M - SZ_64K,
778 .gtt = I915_GTT_PAGE_SIZE_64K,
781 /* Try without any forced padding/alignment */
785 .gtt = I915_GTT_PAGE_SIZE_4K,
789 .offset = SZ_2M - SZ_64K,
790 .gtt = I915_GTT_PAGE_SIZE_4K,
793 struct i915_vma *vma;
798 * Sanity check some of the trickiness with 64K pages -- either we can
799 * safely mark the whole page-table(2M block) as 64K, or we have to
800 * always fallback to 4K.
803 if (!HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K))
806 for (i = 0; i < ARRAY_SIZE(objects); ++i) {
807 unsigned int size = objects[i].size;
808 unsigned int expected_gtt = objects[i].gtt;
809 unsigned int offset = objects[i].offset;
810 unsigned int flags = PIN_USER;
812 for (single = 0; single <= 1; single++) {
813 obj = fake_huge_pages_object(i915, size, !!single);
817 err = i915_gem_object_pin_pages(obj);
822 * Disable 2M pages -- We only want to use 64K/4K pages
825 obj->mm.page_sizes.sg &= ~I915_GTT_PAGE_SIZE_2M;
827 vma = i915_vma_instance(obj, &ppgtt->vm, NULL);
830 goto out_object_unpin;
834 flags |= PIN_OFFSET_FIXED | offset;
836 err = i915_vma_pin(vma, 0, 0, flags);
840 err = igt_check_page_sizes(vma);
844 if (!offset && vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) {
845 if (!IS_ALIGNED(vma->node.start,
846 I915_GTT_PAGE_SIZE_2M)) {
847 pr_err("node.start(%llx) not aligned to 2M\n",
853 if (!IS_ALIGNED(vma->node.size,
854 I915_GTT_PAGE_SIZE_2M)) {
855 pr_err("node.size(%llx) not aligned to 2M\n",
862 if (vma->page_sizes.gtt != expected_gtt) {
863 pr_err("gtt=%u, expected=%u, i=%d, single=%s\n",
864 vma->page_sizes.gtt, expected_gtt, i,
873 i915_gem_object_unpin_pages(obj);
874 __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
875 i915_gem_object_put(obj);
886 i915_gem_object_unpin_pages(obj);
888 i915_gem_object_put(obj);
893 static struct i915_vma *
894 gpu_write_dw(struct i915_vma *vma, u64 offset, u32 val)
896 struct drm_i915_private *i915 = vma->vm->i915;
897 const int gen = INTEL_GEN(i915);
898 unsigned int count = vma->size >> PAGE_SHIFT;
899 struct drm_i915_gem_object *obj;
900 struct i915_vma *batch;
906 size = (1 + 4 * count) * sizeof(u32);
907 size = round_up(size, PAGE_SIZE);
908 obj = i915_gem_object_create_internal(i915, size);
910 return ERR_CAST(obj);
912 cmd = i915_gem_object_pin_map(obj, I915_MAP_WC);
918 offset += vma->node.start;
920 for (n = 0; n < count; n++) {
922 *cmd++ = MI_STORE_DWORD_IMM_GEN4;
923 *cmd++ = lower_32_bits(offset);
924 *cmd++ = upper_32_bits(offset);
926 } else if (gen >= 4) {
927 *cmd++ = MI_STORE_DWORD_IMM_GEN4 |
928 (gen < 6 ? MI_USE_GGTT : 0);
933 *cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
941 *cmd = MI_BATCH_BUFFER_END;
942 i915_gem_chipset_flush(i915);
944 i915_gem_object_unpin_map(obj);
946 batch = i915_vma_instance(obj, vma->vm, NULL);
948 err = PTR_ERR(batch);
952 err = i915_vma_pin(batch, 0, 0, PIN_USER);
959 i915_gem_object_put(obj);
964 static int gpu_write(struct i915_vma *vma,
965 struct i915_gem_context *ctx,
966 struct intel_engine_cs *engine,
970 struct i915_request *rq;
971 struct i915_vma *batch;
974 GEM_BUG_ON(!intel_engine_can_store_dword(engine));
976 err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
980 batch = gpu_write_dw(vma, dword * sizeof(u32), value);
982 return PTR_ERR(batch);
984 rq = igt_request_alloc(ctx, engine);
990 err = i915_vma_move_to_active(batch, rq, 0);
994 i915_gem_object_set_active_reference(batch->obj);
996 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
1000 err = engine->emit_bb_start(rq,
1001 batch->node.start, batch->node.size,
1005 i915_request_skip(rq, err);
1006 i915_request_add(rq);
1008 i915_vma_unpin(batch);
1009 i915_vma_close(batch);
1014 static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
1016 unsigned int needs_flush;
1020 err = i915_gem_obj_prepare_shmem_read(obj, &needs_flush);
1024 for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) {
1025 u32 *ptr = kmap_atomic(i915_gem_object_get_page(obj, n));
1027 if (needs_flush & CLFLUSH_BEFORE)
1028 drm_clflush_virt_range(ptr, PAGE_SIZE);
1030 if (ptr[dword] != val) {
1031 pr_err("n=%lu ptr[%u]=%u, val=%u\n",
1032 n, dword, ptr[dword], val);
1041 i915_gem_obj_finish_shmem_access(obj);
1046 static int __igt_write_huge(struct i915_gem_context *ctx,
1047 struct intel_engine_cs *engine,
1048 struct drm_i915_gem_object *obj,
1049 u64 size, u64 offset,
1052 struct drm_i915_private *i915 = to_i915(obj->base.dev);
1053 struct i915_address_space *vm =
1054 ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm;
1055 unsigned int flags = PIN_USER | PIN_OFFSET_FIXED;
1056 struct i915_vma *vma;
1059 vma = i915_vma_instance(obj, vm, NULL);
1061 return PTR_ERR(vma);
1063 err = i915_vma_unbind(vma);
1067 err = i915_vma_pin(vma, size, 0, flags | offset);
1070 * The ggtt may have some pages reserved so
1071 * refrain from erroring out.
1073 if (err == -ENOSPC && i915_is_ggtt(vm))
1079 err = igt_check_page_sizes(vma);
1083 err = gpu_write(vma, ctx, engine, dword, val);
1085 pr_err("gpu-write failed at offset=%llx\n", offset);
1089 err = cpu_check(obj, dword, val);
1091 pr_err("cpu-check failed at offset=%llx\n", offset);
1096 i915_vma_unpin(vma);
1098 i915_vma_destroy(vma);
1103 static int igt_write_huge(struct i915_gem_context *ctx,
1104 struct drm_i915_gem_object *obj)
1106 struct drm_i915_private *i915 = to_i915(obj->base.dev);
1107 struct i915_address_space *vm =
1108 ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm;
1109 static struct intel_engine_cs *engines[I915_NUM_ENGINES];
1110 struct intel_engine_cs *engine;
1111 I915_RND_STATE(prng);
1112 IGT_TIMEOUT(end_time);
1113 unsigned int max_page_size;
1122 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
1124 size = obj->base.size;
1125 if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K)
1126 size = round_up(size, I915_GTT_PAGE_SIZE_2M);
1128 max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg);
1129 max = div_u64((vm->total - size), max_page_size);
1132 for_each_engine(engine, i915, id) {
1133 if (!intel_engine_can_store_dword(engine)) {
1134 pr_info("store-dword-imm not supported on engine=%u\n",
1138 engines[n++] = engine;
1145 * To keep things interesting when alternating between engines in our
1146 * randomized order, lets also make feeding to the same engine a few
1147 * times in succession a possibility by enlarging the permutation array.
1149 order = i915_random_order(n * I915_NUM_ENGINES, &prng);
1154 * Try various offsets in an ascending/descending fashion until we
1155 * timeout -- we want to avoid issues hidden by effectively always using
1159 for_each_prime_number_from(num, 0, max) {
1160 u64 offset_low = num * max_page_size;
1161 u64 offset_high = (max - num) * max_page_size;
1162 u32 dword = offset_in_page(num) / 4;
1164 engine = engines[order[i] % n];
1165 i = (i + 1) % (n * I915_NUM_ENGINES);
1168 * In order to utilize 64K pages we need to both pad the vma
1169 * size and ensure the vma offset is at the start of the pt
1170 * boundary, however to improve coverage we opt for testing both
1171 * aligned and unaligned offsets.
1173 if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K)
1174 offset_low = round_down(offset_low,
1175 I915_GTT_PAGE_SIZE_2M);
1177 err = __igt_write_huge(ctx, engine, obj, size, offset_low,
1182 err = __igt_write_huge(ctx, engine, obj, size, offset_high,
1187 if (igt_timeout(end_time,
1188 "%s timed out on engine=%u, offset_low=%llx offset_high=%llx, max_page_size=%x\n",
1189 __func__, engine->id, offset_low, offset_high,
1199 static int igt_ppgtt_exhaust_huge(void *arg)
1201 struct i915_gem_context *ctx = arg;
1202 struct drm_i915_private *i915 = ctx->i915;
1203 unsigned long supported = INTEL_INFO(i915)->page_sizes;
1204 static unsigned int pages[ARRAY_SIZE(page_sizes)];
1205 struct drm_i915_gem_object *obj;
1206 unsigned int size_mask;
1207 unsigned int page_mask;
1211 if (supported == I915_GTT_PAGE_SIZE_4K)
1215 * Sanity check creating objects with a varying mix of page sizes --
1216 * ensuring that our writes lands in the right place.
1220 for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1)
1221 pages[n++] = BIT(i);
1223 for (size_mask = 2; size_mask < BIT(n); size_mask++) {
1224 unsigned int size = 0;
1226 for (i = 0; i < n; i++) {
1227 if (size_mask & BIT(i))
1232 * For our page mask we want to enumerate all the page-size
1233 * combinations which will fit into our chosen object size.
1235 for (page_mask = 2; page_mask <= size_mask; page_mask++) {
1236 unsigned int page_sizes = 0;
1238 for (i = 0; i < n; i++) {
1239 if (page_mask & BIT(i))
1240 page_sizes |= pages[i];
1244 * Ensure that we can actually fill the given object
1245 * with our chosen page mask.
1247 if (!IS_ALIGNED(size, BIT(__ffs(page_sizes))))
1250 obj = huge_pages_object(i915, size, page_sizes);
1256 err = i915_gem_object_pin_pages(obj);
1258 i915_gem_object_put(obj);
1260 if (err == -ENOMEM) {
1261 pr_info("unable to get pages, size=%u, pages=%u\n",
1267 pr_err("pin_pages failed, size=%u, pages=%u\n",
1268 size_mask, page_mask);
1273 /* Force the page-size for the gtt insertion */
1274 obj->mm.page_sizes.sg = page_sizes;
1276 err = igt_write_huge(ctx, obj);
1278 pr_err("exhaust write-huge failed with size=%u\n",
1283 i915_gem_object_unpin_pages(obj);
1284 __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
1285 i915_gem_object_put(obj);
1292 i915_gem_object_unpin_pages(obj);
1293 i915_gem_object_put(obj);
1295 mkwrite_device_info(i915)->page_sizes = supported;
1300 static int igt_ppgtt_internal_huge(void *arg)
1302 struct i915_gem_context *ctx = arg;
1303 struct drm_i915_private *i915 = ctx->i915;
1304 struct drm_i915_gem_object *obj;
1305 static const unsigned int sizes[] = {
1317 * Sanity check that the HW uses huge pages correctly through internal
1318 * -- ensure that our writes land in the right place.
1321 for (i = 0; i < ARRAY_SIZE(sizes); ++i) {
1322 unsigned int size = sizes[i];
1324 obj = i915_gem_object_create_internal(i915, size);
1326 return PTR_ERR(obj);
1328 err = i915_gem_object_pin_pages(obj);
1332 if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_64K) {
1333 pr_info("internal unable to allocate huge-page(s) with size=%u\n",
1338 err = igt_write_huge(ctx, obj);
1340 pr_err("internal write-huge failed with size=%u\n",
1345 i915_gem_object_unpin_pages(obj);
1346 __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
1347 i915_gem_object_put(obj);
1353 i915_gem_object_unpin_pages(obj);
1355 i915_gem_object_put(obj);
1360 static inline bool igt_can_allocate_thp(struct drm_i915_private *i915)
1362 return i915->mm.gemfs && has_transparent_hugepage();
1365 static int igt_ppgtt_gemfs_huge(void *arg)
1367 struct i915_gem_context *ctx = arg;
1368 struct drm_i915_private *i915 = ctx->i915;
1369 struct drm_i915_gem_object *obj;
1370 static const unsigned int sizes[] = {
1381 * Sanity check that the HW uses huge pages correctly through gemfs --
1382 * ensure that our writes land in the right place.
1385 if (!igt_can_allocate_thp(i915)) {
1386 pr_info("missing THP support, skipping\n");
1390 for (i = 0; i < ARRAY_SIZE(sizes); ++i) {
1391 unsigned int size = sizes[i];
1393 obj = i915_gem_object_create(i915, size);
1395 return PTR_ERR(obj);
1397 err = i915_gem_object_pin_pages(obj);
1401 if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) {
1402 pr_info("finishing test early, gemfs unable to allocate huge-page(s) with size=%u\n",
1407 err = igt_write_huge(ctx, obj);
1409 pr_err("gemfs write-huge failed with size=%u\n",
1414 i915_gem_object_unpin_pages(obj);
1415 __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
1416 i915_gem_object_put(obj);
1422 i915_gem_object_unpin_pages(obj);
1424 i915_gem_object_put(obj);
1429 static int igt_ppgtt_pin_update(void *arg)
1431 struct i915_gem_context *ctx = arg;
1432 struct drm_i915_private *dev_priv = ctx->i915;
1433 unsigned long supported = INTEL_INFO(dev_priv)->page_sizes;
1434 struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
1435 struct drm_i915_gem_object *obj;
1436 struct i915_vma *vma;
1437 unsigned int flags = PIN_USER | PIN_OFFSET_FIXED;
1442 * Make sure there's no funny business when doing a PIN_UPDATE -- in the
1443 * past we had a subtle issue with being able to incorrectly do multiple
1444 * alloc va ranges on the same object when doing a PIN_UPDATE, which
1445 * resulted in some pretty nasty bugs, though only when using
1449 if (!ppgtt || !i915_vm_is_4lvl(&ppgtt->vm)) {
1450 pr_info("48b PPGTT not supported, skipping\n");
1454 first = ilog2(I915_GTT_PAGE_SIZE_64K);
1455 last = ilog2(I915_GTT_PAGE_SIZE_2M);
1457 for_each_set_bit_from(first, &supported, last + 1) {
1458 unsigned int page_size = BIT(first);
1460 obj = i915_gem_object_create_internal(dev_priv, page_size);
1462 return PTR_ERR(obj);
1464 vma = i915_vma_instance(obj, &ppgtt->vm, NULL);
1470 err = i915_vma_pin(vma, SZ_2M, 0, flags);
1474 if (vma->page_sizes.sg < page_size) {
1475 pr_info("Unable to allocate page-size %x, finishing test early\n",
1480 err = igt_check_page_sizes(vma);
1484 if (vma->page_sizes.gtt != page_size) {
1485 dma_addr_t addr = i915_gem_object_get_dma_address(obj, 0);
1488 * The only valid reason for this to ever fail would be
1489 * if the dma-mapper screwed us over when we did the
1490 * dma_map_sg(), since it has the final say over the dma
1493 if (IS_ALIGNED(addr, page_size)) {
1494 pr_err("page_sizes.gtt=%u, expected=%u\n",
1495 vma->page_sizes.gtt, page_size);
1498 pr_info("dma address misaligned, finishing test early\n");
1504 err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE);
1508 i915_vma_unpin(vma);
1509 i915_vma_close(vma);
1511 i915_gem_object_put(obj);
1514 obj = i915_gem_object_create_internal(dev_priv, PAGE_SIZE);
1516 return PTR_ERR(obj);
1518 vma = i915_vma_instance(obj, &ppgtt->vm, NULL);
1524 err = i915_vma_pin(vma, 0, 0, flags);
1529 * Make sure we don't end up with something like where the pde is still
1530 * pointing to the 2M page, and the pt we just filled-in is dangling --
1531 * we can check this by writing to the first page where it would then
1532 * land in the now stale 2M page.
1535 err = gpu_write(vma, ctx, dev_priv->engine[RCS0], 0, 0xdeadbeaf);
1539 err = cpu_check(obj, 0, 0xdeadbeaf);
1542 i915_vma_unpin(vma);
1544 i915_vma_close(vma);
1546 i915_gem_object_put(obj);
1551 static int igt_tmpfs_fallback(void *arg)
1553 struct i915_gem_context *ctx = arg;
1554 struct drm_i915_private *i915 = ctx->i915;
1555 struct vfsmount *gemfs = i915->mm.gemfs;
1556 struct i915_address_space *vm =
1557 ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm;
1558 struct drm_i915_gem_object *obj;
1559 struct i915_vma *vma;
1564 * Make sure that we don't burst into a ball of flames upon falling back
1565 * to tmpfs, which we rely on if on the off-chance we encouter a failure
1566 * when setting up gemfs.
1569 i915->mm.gemfs = NULL;
1571 obj = i915_gem_object_create(i915, PAGE_SIZE);
1577 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
1578 if (IS_ERR(vaddr)) {
1579 err = PTR_ERR(vaddr);
1582 *vaddr = 0xdeadbeaf;
1584 __i915_gem_object_flush_map(obj, 0, 64);
1585 i915_gem_object_unpin_map(obj);
1587 vma = i915_vma_instance(obj, vm, NULL);
1593 err = i915_vma_pin(vma, 0, 0, PIN_USER);
1597 err = igt_check_page_sizes(vma);
1599 i915_vma_unpin(vma);
1601 i915_vma_close(vma);
1603 i915_gem_object_put(obj);
1605 i915->mm.gemfs = gemfs;
1610 static int igt_shrink_thp(void *arg)
1612 struct i915_gem_context *ctx = arg;
1613 struct drm_i915_private *i915 = ctx->i915;
1614 struct i915_address_space *vm =
1615 ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm;
1616 struct drm_i915_gem_object *obj;
1617 struct i915_vma *vma;
1618 unsigned int flags = PIN_USER;
1622 * Sanity check shrinking huge-paged object -- make sure nothing blows
1626 if (!igt_can_allocate_thp(i915)) {
1627 pr_info("missing THP support, skipping\n");
1631 obj = i915_gem_object_create(i915, SZ_2M);
1633 return PTR_ERR(obj);
1635 vma = i915_vma_instance(obj, vm, NULL);
1641 err = i915_vma_pin(vma, 0, 0, flags);
1645 if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) {
1646 pr_info("failed to allocate THP, finishing test early\n");
1650 err = igt_check_page_sizes(vma);
1654 err = gpu_write(vma, ctx, i915->engine[RCS0], 0, 0xdeadbeaf);
1658 i915_vma_unpin(vma);
1661 * Now that the pages are *unpinned* shrink-all should invoke
1662 * shmem to truncate our pages.
1664 i915_gem_shrink_all(i915);
1665 if (i915_gem_object_has_pages(obj)) {
1666 pr_err("shrink-all didn't truncate the pages\n");
1671 if (obj->mm.page_sizes.sg || obj->mm.page_sizes.phys) {
1672 pr_err("residual page-size bits left\n");
1677 err = i915_vma_pin(vma, 0, 0, flags);
1681 err = cpu_check(obj, 0, 0xdeadbeaf);
1684 i915_vma_unpin(vma);
1686 i915_vma_close(vma);
1688 i915_gem_object_put(obj);
1693 int i915_gem_huge_page_mock_selftests(void)
1695 static const struct i915_subtest tests[] = {
1696 SUBTEST(igt_mock_exhaust_device_supported_pages),
1697 SUBTEST(igt_mock_ppgtt_misaligned_dma),
1698 SUBTEST(igt_mock_ppgtt_huge_fill),
1699 SUBTEST(igt_mock_ppgtt_64K),
1701 struct drm_i915_private *dev_priv;
1702 struct i915_hw_ppgtt *ppgtt;
1705 dev_priv = mock_gem_device();
1709 /* Pretend to be a device which supports the 48b PPGTT */
1710 mkwrite_device_info(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL;
1711 mkwrite_device_info(dev_priv)->ppgtt_size = 48;
1713 mutex_lock(&dev_priv->drm.struct_mutex);
1714 ppgtt = i915_ppgtt_create(dev_priv);
1715 if (IS_ERR(ppgtt)) {
1716 err = PTR_ERR(ppgtt);
1720 if (!i915_vm_is_4lvl(&ppgtt->vm)) {
1721 pr_err("failed to create 48b PPGTT\n");
1726 /* If we were ever hit this then it's time to mock the 64K scratch */
1727 if (!i915_vm_has_scratch_64K(&ppgtt->vm)) {
1728 pr_err("PPGTT missing 64K scratch page\n");
1733 err = i915_subtests(tests, ppgtt);
1736 i915_ppgtt_put(ppgtt);
1739 mutex_unlock(&dev_priv->drm.struct_mutex);
1740 drm_dev_put(&dev_priv->drm);
1745 int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv)
1747 static const struct i915_subtest tests[] = {
1748 SUBTEST(igt_shrink_thp),
1749 SUBTEST(igt_ppgtt_pin_update),
1750 SUBTEST(igt_tmpfs_fallback),
1751 SUBTEST(igt_ppgtt_exhaust_huge),
1752 SUBTEST(igt_ppgtt_gemfs_huge),
1753 SUBTEST(igt_ppgtt_internal_huge),
1755 struct drm_file *file;
1756 struct i915_gem_context *ctx;
1757 intel_wakeref_t wakeref;
1760 if (!HAS_PPGTT(dev_priv)) {
1761 pr_info("PPGTT not supported, skipping live-selftests\n");
1765 if (i915_terminally_wedged(dev_priv))
1768 file = mock_file(dev_priv);
1770 return PTR_ERR(file);
1772 mutex_lock(&dev_priv->drm.struct_mutex);
1773 wakeref = intel_runtime_pm_get(dev_priv);
1775 ctx = live_context(dev_priv, file);
1782 ctx->ppgtt->vm.scrub_64K = true;
1784 err = i915_subtests(tests, ctx);
1787 intel_runtime_pm_put(dev_priv, wakeref);
1788 mutex_unlock(&dev_priv->drm.struct_mutex);
1790 mock_file_free(dev_priv, file);