1 // SPDX-License-Identifier: MIT
3 * Copyright © 2019 Intel Corporation
6 #include <linux/prime_numbers.h>
7 #include <linux/sort.h>
9 #include "../i915_selftest.h"
12 #include "mock_gem_device.h"
13 #include "mock_region.h"
15 #include "gem/i915_gem_context.h"
16 #include "gem/i915_gem_lmem.h"
17 #include "gem/i915_gem_region.h"
18 #include "gem/i915_gem_object_blt.h"
19 #include "gem/selftests/igt_gem_utils.h"
20 #include "gem/selftests/mock_context.h"
21 #include "gt/intel_engine_user.h"
22 #include "gt/intel_gt.h"
23 #include "i915_memcpy.h"
24 #include "selftests/igt_flush_test.h"
25 #include "selftests/i915_random.h"
27 static void close_objects(struct intel_memory_region *mem,
28 struct list_head *objects)
30 struct drm_i915_private *i915 = mem->i915;
31 struct drm_i915_gem_object *obj, *on;
33 list_for_each_entry_safe(obj, on, objects, st_link) {
34 i915_gem_object_lock(obj, NULL);
35 if (i915_gem_object_has_pinned_pages(obj))
36 i915_gem_object_unpin_pages(obj);
37 /* No polluting the memory region between tests */
38 __i915_gem_object_put_pages(obj);
39 i915_gem_object_unlock(obj);
40 list_del(&obj->st_link);
41 i915_gem_object_put(obj);
46 i915_gem_drain_freed_objects(i915);
49 static int igt_mock_fill(void *arg)
51 struct intel_memory_region *mem = arg;
52 resource_size_t total = resource_size(&mem->region);
53 resource_size_t page_size;
55 unsigned long max_pages;
56 unsigned long page_num;
60 page_size = mem->mm.chunk_size;
61 max_pages = div64_u64(total, page_size);
64 for_each_prime_number_from(page_num, 1, max_pages) {
65 resource_size_t size = page_num * page_size;
66 struct drm_i915_gem_object *obj;
68 obj = i915_gem_object_create_region(mem, size, 0);
74 err = i915_gem_object_pin_pages_unlocked(obj);
76 i915_gem_object_put(obj);
80 list_add(&obj->st_link, &objects);
87 if (page_num * page_size <= rem) {
88 pr_err("%s failed, space still left in region\n",
96 close_objects(mem, &objects);
101 static struct drm_i915_gem_object *
102 igt_object_create(struct intel_memory_region *mem,
103 struct list_head *objects,
107 struct drm_i915_gem_object *obj;
110 obj = i915_gem_object_create_region(mem, size, flags);
114 err = i915_gem_object_pin_pages_unlocked(obj);
118 list_add(&obj->st_link, objects);
122 i915_gem_object_put(obj);
126 static void igt_object_release(struct drm_i915_gem_object *obj)
128 i915_gem_object_lock(obj, NULL);
129 i915_gem_object_unpin_pages(obj);
130 __i915_gem_object_put_pages(obj);
131 i915_gem_object_unlock(obj);
132 list_del(&obj->st_link);
133 i915_gem_object_put(obj);
136 static bool is_contiguous(struct drm_i915_gem_object *obj)
138 struct scatterlist *sg;
139 dma_addr_t addr = -1;
141 for (sg = obj->mm.pages->sgl; sg; sg = sg_next(sg)) {
142 if (addr != -1 && sg_dma_address(sg) != addr)
145 addr = sg_dma_address(sg) + sg_dma_len(sg);
151 static int igt_mock_reserve(void *arg)
153 struct intel_memory_region *mem = arg;
154 resource_size_t avail = resource_size(&mem->region);
155 struct drm_i915_gem_object *obj;
156 const u32 chunk_size = SZ_32M;
157 u32 i, offset, count, *order;
158 u64 allocated, cur_avail;
159 I915_RND_STATE(prng);
163 if (!list_empty(&mem->reserved)) {
164 pr_err("%s region reserved list is not empty\n", __func__);
168 count = avail / chunk_size;
169 order = i915_random_order(count, &prng);
173 /* Reserve a bunch of ranges within the region */
174 for (i = 0; i < count; ++i) {
175 u64 start = order[i] * chunk_size;
176 u64 size = i915_prandom_u32_max_state(chunk_size, &prng);
178 /* Allow for some really big holes */
182 size = round_up(size, PAGE_SIZE);
183 offset = igt_random_offset(&prng, 0, chunk_size, size,
186 err = intel_memory_region_reserve(mem, start + offset, size);
188 pr_err("%s failed to reserve range", __func__);
192 /* XXX: maybe sanity check the block range here? */
196 /* Try to see if we can allocate from the remaining space */
200 u32 size = i915_prandom_u32_max_state(cur_avail, &prng);
202 size = max_t(u32, round_up(size, PAGE_SIZE), PAGE_SIZE);
203 obj = igt_object_create(mem, &objects, size, 0);
205 if (PTR_ERR(obj) == -ENXIO)
215 if (allocated != avail) {
216 pr_err("%s mismatch between allocation and free space", __func__);
222 close_objects(mem, &objects);
223 i915_buddy_free_list(&mem->mm, &mem->reserved);
227 static int igt_mock_contiguous(void *arg)
229 struct intel_memory_region *mem = arg;
230 struct drm_i915_gem_object *obj;
231 unsigned long n_objects;
234 I915_RND_STATE(prng);
235 resource_size_t total;
240 total = resource_size(&mem->region);
243 obj = igt_object_create(mem, &objects, mem->mm.chunk_size,
244 I915_BO_ALLOC_CONTIGUOUS);
248 if (!is_contiguous(obj)) {
249 pr_err("%s min object spans disjoint sg entries\n", __func__);
251 goto err_close_objects;
254 igt_object_release(obj);
257 obj = igt_object_create(mem, &objects, total, I915_BO_ALLOC_CONTIGUOUS);
261 if (!is_contiguous(obj)) {
262 pr_err("%s max object spans disjoint sg entries\n", __func__);
264 goto err_close_objects;
267 igt_object_release(obj);
269 /* Internal fragmentation should not bleed into the object size */
270 target = i915_prandom_u64_state(&prng);
271 div64_u64_rem(target, total, &target);
272 target = round_up(target, PAGE_SIZE);
273 target = max_t(u64, PAGE_SIZE, target);
275 obj = igt_object_create(mem, &objects, target,
276 I915_BO_ALLOC_CONTIGUOUS);
280 if (obj->base.size != target) {
281 pr_err("%s obj->base.size(%zx) != target(%llx)\n", __func__,
282 obj->base.size, target);
284 goto err_close_objects;
287 if (!is_contiguous(obj)) {
288 pr_err("%s object spans disjoint sg entries\n", __func__);
290 goto err_close_objects;
293 igt_object_release(obj);
296 * Try to fragment the address space, such that half of it is free, but
297 * the max contiguous block size is SZ_64K.
301 n_objects = div64_u64(total, target);
303 while (n_objects--) {
304 struct list_head *list;
311 obj = igt_object_create(mem, list, target,
312 I915_BO_ALLOC_CONTIGUOUS);
315 goto err_close_objects;
319 close_objects(mem, &holes);
324 /* Make sure we can still allocate all the fragmented space */
325 obj = igt_object_create(mem, &objects, target, 0);
328 goto err_close_objects;
331 igt_object_release(obj);
334 * Even though we have enough free space, we don't have a big enough
335 * contiguous block. Make sure that holds true.
339 bool should_fail = target > min;
341 obj = igt_object_create(mem, &objects, target,
342 I915_BO_ALLOC_CONTIGUOUS);
343 if (should_fail != IS_ERR(obj)) {
344 pr_err("%s target allocation(%llx) mismatch\n",
347 goto err_close_objects;
351 } while (target >= mem->mm.chunk_size);
354 list_splice_tail(&holes, &objects);
355 close_objects(mem, &objects);
359 static int igt_mock_splintered_region(void *arg)
361 struct intel_memory_region *mem = arg;
362 struct drm_i915_private *i915 = mem->i915;
363 struct drm_i915_gem_object *obj;
364 unsigned int expected_order;
370 * Sanity check we can still allocate everything even if the
371 * mm.max_order != mm.size. i.e our starting address space size is not a
375 size = (SZ_4G - 1) & PAGE_MASK;
376 mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0);
380 if (mem->mm.size != size) {
381 pr_err("%s size mismatch(%llu != %llu)\n",
382 __func__, mem->mm.size, size);
387 expected_order = get_order(rounddown_pow_of_two(size));
388 if (mem->mm.max_order != expected_order) {
389 pr_err("%s order mismatch(%u != %u)\n",
390 __func__, mem->mm.max_order, expected_order);
395 obj = igt_object_create(mem, &objects, size, 0);
401 close_objects(mem, &objects);
404 * While we should be able allocate everything without any flag
405 * restrictions, if we consider I915_BO_ALLOC_CONTIGUOUS then we are
406 * actually limited to the largest power-of-two for the region size i.e
407 * max_order, due to the inner workings of the buddy allocator. So make
408 * sure that does indeed hold true.
411 obj = igt_object_create(mem, &objects, size, I915_BO_ALLOC_CONTIGUOUS);
413 pr_err("%s too large contiguous allocation was not rejected\n",
419 obj = igt_object_create(mem, &objects, rounddown_pow_of_two(size),
420 I915_BO_ALLOC_CONTIGUOUS);
422 pr_err("%s largest possible contiguous allocation failed\n",
429 close_objects(mem, &objects);
431 intel_memory_region_put(mem);
436 #define SZ_8G BIT_ULL(33)
439 static int igt_mock_max_segment(void *arg)
441 const unsigned int max_segment = i915_sg_segment_size();
442 struct intel_memory_region *mem = arg;
443 struct drm_i915_private *i915 = mem->i915;
444 struct drm_i915_gem_object *obj;
445 struct i915_buddy_block *block;
446 struct scatterlist *sg;
452 * While we may create very large contiguous blocks, we may need
453 * to break those down for consumption elsewhere. In particular,
454 * dma-mapping with scatterlist elements have an implicit limit of
455 * UINT_MAX on each element.
459 mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0);
463 obj = igt_object_create(mem, &objects, size, 0);
470 list_for_each_entry(block, &obj->mm.blocks, link) {
471 if (i915_buddy_block_size(&mem->mm, block) > size)
472 size = i915_buddy_block_size(&mem->mm, block);
474 if (size < max_segment) {
475 pr_err("%s: Failed to create a huge contiguous block [> %u], largest block %lld\n",
476 __func__, max_segment, size);
481 for (sg = obj->mm.pages->sgl; sg; sg = sg_next(sg)) {
482 if (sg->length > max_segment) {
483 pr_err("%s: Created an oversized scatterlist entry, %u > %u\n",
484 __func__, sg->length, max_segment);
491 close_objects(mem, &objects);
493 intel_memory_region_put(mem);
497 static int igt_gpu_write_dw(struct intel_context *ce,
498 struct i915_vma *vma,
502 return igt_gpu_fill_dw(ce, vma, dword * sizeof(u32),
503 vma->size >> PAGE_SHIFT, value);
506 static int igt_cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
508 unsigned long n = obj->base.size >> PAGE_SHIFT;
512 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
516 ptr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
523 pr_err("base[%u]=%08x, val=%08x\n",
529 ptr += PAGE_SIZE / sizeof(*ptr);
532 i915_gem_object_unpin_map(obj);
536 static int igt_gpu_write(struct i915_gem_context *ctx,
537 struct drm_i915_gem_object *obj)
539 struct i915_gem_engines *engines;
540 struct i915_gem_engines_iter it;
541 struct i915_address_space *vm;
542 struct intel_context *ce;
543 I915_RND_STATE(prng);
544 IGT_TIMEOUT(end_time);
546 struct i915_vma *vma;
551 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
555 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
557 if (!intel_engine_can_store_dword(ce->engine))
563 i915_gem_context_unlock_engines(ctx);
567 order = i915_random_order(count * count, &prng);
571 vma = i915_vma_instance(obj, vm, NULL);
577 err = i915_vma_pin(vma, 0, 0, PIN_USER);
582 engines = i915_gem_context_lock_engines(ctx);
584 u32 rng = prandom_u32_state(&prng);
585 u32 dword = offset_in_page(rng) / 4;
587 ce = engines->engines[order[i] % engines->num_engines];
588 i = (i + 1) % (count * count);
589 if (!ce || !intel_engine_can_store_dword(ce->engine))
592 err = igt_gpu_write_dw(ce, vma, dword, rng);
596 err = igt_cpu_check(obj, dword, rng);
599 } while (!__igt_timeout(end_time, NULL));
600 i915_gem_context_unlock_engines(ctx);
611 static int igt_lmem_create(void *arg)
613 struct drm_i915_private *i915 = arg;
614 struct drm_i915_gem_object *obj;
617 obj = i915_gem_object_create_lmem(i915, PAGE_SIZE, 0);
621 err = i915_gem_object_pin_pages_unlocked(obj);
625 i915_gem_object_unpin_pages(obj);
627 i915_gem_object_put(obj);
632 static int igt_lmem_write_gpu(void *arg)
634 struct drm_i915_private *i915 = arg;
635 struct drm_i915_gem_object *obj;
636 struct i915_gem_context *ctx;
638 I915_RND_STATE(prng);
642 file = mock_file(i915);
644 return PTR_ERR(file);
646 ctx = live_context(i915, file);
652 sz = round_up(prandom_u32_state(&prng) % SZ_32M, PAGE_SIZE);
654 obj = i915_gem_object_create_lmem(i915, sz, 0);
660 err = i915_gem_object_pin_pages_unlocked(obj);
664 err = igt_gpu_write(ctx, obj);
666 pr_err("igt_gpu_write failed(%d)\n", err);
668 i915_gem_object_unpin_pages(obj);
670 i915_gem_object_put(obj);
676 static struct intel_engine_cs *
677 random_engine_class(struct drm_i915_private *i915,
679 struct rnd_state *prng)
681 struct intel_engine_cs *engine;
685 for (engine = intel_engine_lookup_user(i915, class, 0);
686 engine && engine->uabi_class == class;
687 engine = rb_entry_safe(rb_next(&engine->uabi_node),
688 typeof(*engine), uabi_node))
691 count = i915_prandom_u32_max_state(count, prng);
692 return intel_engine_lookup_user(i915, class, count);
695 static int igt_lmem_write_cpu(void *arg)
697 struct drm_i915_private *i915 = arg;
698 struct drm_i915_gem_object *obj;
699 I915_RND_STATE(prng);
700 IGT_TIMEOUT(end_time);
702 0, /* rng placeholder */
707 PAGE_SIZE - sizeof(u32),
708 PAGE_SIZE - sizeof(u64),
711 struct intel_engine_cs *engine;
719 engine = random_engine_class(i915, I915_ENGINE_CLASS_COPY, &prng);
723 pr_info("%s: using %s\n", __func__, engine->name);
725 sz = round_up(prandom_u32_state(&prng) % SZ_32M, PAGE_SIZE);
726 sz = max_t(u32, 2 * PAGE_SIZE, sz);
728 obj = i915_gem_object_create_lmem(i915, sz, I915_BO_ALLOC_CONTIGUOUS);
732 vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
734 err = PTR_ERR(vaddr);
738 /* Put the pages into a known state -- from the gpu for added fun */
739 intel_engine_pm_get(engine);
740 err = i915_gem_object_fill_blt(obj, engine->kernel_context, 0xdeadbeaf);
741 intel_engine_pm_put(engine);
745 i915_gem_object_lock(obj, NULL);
746 err = i915_gem_object_set_to_wc_domain(obj, true);
747 i915_gem_object_unlock(obj);
751 count = ARRAY_SIZE(bytes);
752 order = i915_random_order(count * count, &prng);
758 /* A random multiple of u32, picked between [64, PAGE_SIZE - 64] */
759 bytes[0] = igt_random_offset(&prng, 64, PAGE_SIZE - 64, 0, sizeof(u32));
760 GEM_BUG_ON(!IS_ALIGNED(bytes[0], sizeof(u32)));
770 size = bytes[order[i] % count];
771 i = (i + 1) % (count * count);
773 align = bytes[order[i] % count];
774 i = (i + 1) % (count * count);
776 align = max_t(u32, sizeof(u32), rounddown_pow_of_two(align));
778 offset = igt_random_offset(&prng, 0, obj->base.size,
781 val = prandom_u32_state(&prng);
782 memset32(vaddr + offset / sizeof(u32), val ^ 0xdeadbeaf,
786 * Sample random dw -- don't waste precious time reading every
789 dword = igt_random_offset(&prng, offset,
791 sizeof(u32), sizeof(u32));
792 dword /= sizeof(u32);
793 if (vaddr[dword] != (val ^ 0xdeadbeaf)) {
794 pr_err("%s vaddr[%u]=%u, val=%u, size=%u, align=%u, offset=%u\n",
795 __func__, dword, vaddr[dword], val ^ 0xdeadbeaf,
796 size, align, offset);
800 } while (!__igt_timeout(end_time, NULL));
803 i915_gem_object_unpin_map(obj);
805 i915_gem_object_put(obj);
810 static const char *repr_type(u32 type)
822 static struct drm_i915_gem_object *
823 create_region_for_mapping(struct intel_memory_region *mr, u64 size, u32 type,
826 struct drm_i915_gem_object *obj;
829 obj = i915_gem_object_create_region(mr, size, 0);
831 if (PTR_ERR(obj) == -ENOSPC) /* Stolen memory */
832 return ERR_PTR(-ENODEV);
836 addr = i915_gem_object_pin_map_unlocked(obj, type);
838 i915_gem_object_put(obj);
839 if (PTR_ERR(addr) == -ENXIO)
840 return ERR_PTR(-ENODEV);
848 static int wrap_ktime_compare(const void *A, const void *B)
850 const ktime_t *a = A, *b = B;
852 return ktime_compare(*a, *b);
855 static void igt_memcpy_long(void *dst, const void *src, size_t size)
857 unsigned long *tmp = dst;
858 const unsigned long *s = src;
860 size = size / sizeof(unsigned long);
865 static inline void igt_memcpy(void *dst, const void *src, size_t size)
867 memcpy(dst, src, size);
870 static inline void igt_memcpy_from_wc(void *dst, const void *src, size_t size)
872 i915_memcpy_from_wc(dst, src, size);
875 static int _perf_memcpy(struct intel_memory_region *src_mr,
876 struct intel_memory_region *dst_mr,
877 u64 size, u32 src_type, u32 dst_type)
879 struct drm_i915_private *i915 = src_mr->i915;
882 void (*copy)(void *dst, const void *src, size_t size);
896 !i915_has_memcpy_from_wc(),
899 struct drm_i915_gem_object *src, *dst;
900 void *src_addr, *dst_addr;
904 src = create_region_for_mapping(src_mr, size, src_type, &src_addr);
910 dst = create_region_for_mapping(dst_mr, size, dst_type, &dst_addr);
916 for (i = 0; i < ARRAY_SIZE(tests); ++i) {
923 for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
928 tests[i].copy(dst_addr, src_addr, size);
931 t[pass] = ktime_sub(t1, t0);
934 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
936 /* ignore the impossible to protect our sanity */
937 pr_debug("Skipping %s src(%s, %s) -> dst(%s, %s) %14s %4lluKiB copy, unstable measurement [%lld, %lld]\n",
939 src_mr->name, repr_type(src_type),
940 dst_mr->name, repr_type(dst_type),
941 tests[i].name, size >> 10,
946 pr_info("%s src(%s, %s) -> dst(%s, %s) %14s %4llu KiB copy: %5lld MiB/s\n",
948 src_mr->name, repr_type(src_type),
949 dst_mr->name, repr_type(dst_type),
950 tests[i].name, size >> 10,
951 div64_u64(mul_u32_u32(4 * size,
953 t[1] + 2 * t[2] + t[3]) >> 20);
958 i915_gem_object_unpin_map(dst);
959 i915_gem_object_put(dst);
961 i915_gem_object_unpin_map(src);
962 i915_gem_object_put(src);
964 i915_gem_drain_freed_objects(i915);
972 static int perf_memcpy(void *arg)
974 struct drm_i915_private *i915 = arg;
975 static const u32 types[] = {
979 static const u32 sizes[] = {
984 struct intel_memory_region *src_mr, *dst_mr;
989 for_each_memory_region(src_mr, i915, src_id) {
990 for_each_memory_region(dst_mr, i915, dst_id) {
991 for (i = 0; i < ARRAY_SIZE(sizes); ++i) {
992 for (j = 0; j < ARRAY_SIZE(types); ++j) {
993 for (k = 0; k < ARRAY_SIZE(types); ++k) {
994 ret = _perf_memcpy(src_mr,
1010 int intel_memory_region_mock_selftests(void)
1012 static const struct i915_subtest tests[] = {
1013 SUBTEST(igt_mock_reserve),
1014 SUBTEST(igt_mock_fill),
1015 SUBTEST(igt_mock_contiguous),
1016 SUBTEST(igt_mock_splintered_region),
1017 SUBTEST(igt_mock_max_segment),
1019 struct intel_memory_region *mem;
1020 struct drm_i915_private *i915;
1023 i915 = mock_gem_device();
1027 mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0);
1029 pr_err("failed to create memory region\n");
1034 err = i915_subtests(tests, mem);
1036 intel_memory_region_put(mem);
1038 mock_destroy_device(i915);
1042 int intel_memory_region_live_selftests(struct drm_i915_private *i915)
1044 static const struct i915_subtest tests[] = {
1045 SUBTEST(igt_lmem_create),
1046 SUBTEST(igt_lmem_write_cpu),
1047 SUBTEST(igt_lmem_write_gpu),
1050 if (!HAS_LMEM(i915)) {
1051 pr_info("device lacks LMEM support, skipping\n");
1055 if (intel_gt_is_wedged(&i915->gt))
1058 return i915_live_subtests(tests, i915);
1061 int intel_memory_region_perf_selftests(struct drm_i915_private *i915)
1063 static const struct i915_subtest tests[] = {
1064 SUBTEST(perf_memcpy),
1067 if (intel_gt_is_wedged(&i915->gt))
1070 return i915_live_subtests(tests, i915);