Merge drm/drm-next into drm-intel-gt-next
authorTvrtko Ursulin <tvrtko.ursulin@intel.com>
Mon, 22 Nov 2021 12:18:15 +0000 (12:18 +0000)
committerTvrtko Ursulin <tvrtko.ursulin@intel.com>
Mon, 22 Nov 2021 12:18:15 +0000 (12:18 +0000)
Thomas needs the dma_resv_for_each_fence API for i915/ttm async migration
work.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
78 files changed:
MAINTAINERS
drivers/gpu/drm/i915/Makefile
drivers/gpu/drm/i915/gem/i915_gem_clflush.c
drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
drivers/gpu/drm/i915/gem/i915_gem_domain.c
drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
drivers/gpu/drm/i915/gem/i915_gem_internal.c
drivers/gpu/drm/i915/gem/i915_gem_object.c
drivers/gpu/drm/i915/gem/i915_gem_object.h
drivers/gpu/drm/i915/gem/i915_gem_object_types.h
drivers/gpu/drm/i915/gem/i915_gem_pages.c
drivers/gpu/drm/i915/gem/i915_gem_shmem.c
drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
drivers/gpu/drm/i915/gem/i915_gem_ttm.c
drivers/gpu/drm/i915/gem/i915_gem_ttm.h
drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c [new file with mode: 0644]
drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h [new file with mode: 0644]
drivers/gpu/drm/i915/gem/selftests/huge_pages.c
drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
drivers/gpu/drm/i915/gt/gen6_ppgtt.c
drivers/gpu/drm/i915/gt/gen6_ppgtt.h
drivers/gpu/drm/i915/gt/gen8_engine_cs.c
drivers/gpu/drm/i915/gt/gen8_ppgtt.c
drivers/gpu/drm/i915/gt/intel_context.c
drivers/gpu/drm/i915/gt/intel_engine_cs.c
drivers/gpu/drm/i915/gt/intel_engine_stats.h
drivers/gpu/drm/i915/gt/intel_engine_types.h
drivers/gpu/drm/i915/gt/intel_execlists_submission.c
drivers/gpu/drm/i915/gt/intel_gt_pm.c
drivers/gpu/drm/i915/gt/intel_gtt.c
drivers/gpu/drm/i915/gt/intel_mocs.c
drivers/gpu/drm/i915/gt/intel_region_lmem.c
drivers/gpu/drm/i915/gt/intel_reset.c
drivers/gpu/drm/i915/gt/intel_ring_submission.c
drivers/gpu/drm/i915/gt/intel_rps.c
drivers/gpu/drm/i915/gt/intel_rps.h
drivers/gpu/drm/i915/gt/intel_workarounds.c
drivers/gpu/drm/i915/gt/mock_engine.c
drivers/gpu/drm/i915/gt/selftest_engine_pm.c
drivers/gpu/drm/i915/gt/selftest_hangcheck.c
drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
drivers/gpu/drm/i915/gt/uc/intel_guc.h
drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h
drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
drivers/gpu/drm/i915/i915_drv.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_irq.c
drivers/gpu/drm/i915/i915_pci.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/i915_request.c
drivers/gpu/drm/i915/i915_scatterlist.c
drivers/gpu/drm/i915/i915_scatterlist.h
drivers/gpu/drm/i915/i915_sysfs.c
drivers/gpu/drm/i915/i915_vma.c
drivers/gpu/drm/i915/i915_vma.h
drivers/gpu/drm/i915/i915_vma_types.h
drivers/gpu/drm/i915/intel_device_info.c
drivers/gpu/drm/i915/intel_device_info.h
drivers/gpu/drm/i915/intel_pm.c
drivers/gpu/drm/i915/intel_region_ttm.c
drivers/gpu/drm/i915/intel_region_ttm.h
drivers/gpu/drm/i915/intel_step.c
drivers/gpu/drm/i915/intel_step.h
drivers/gpu/drm/i915/intel_uncore.c
drivers/gpu/drm/i915/intel_uncore.h
drivers/gpu/drm/i915/selftests/i915_gem_evict.c
drivers/gpu/drm/i915/selftests/i915_request.c
drivers/gpu/drm/i915/selftests/igt_reset.c
drivers/gpu/drm/i915/selftests/mock_gem_device.c
drivers/gpu/drm/i915/selftests/mock_region.c

index 5250298..8e4745a 100644 (file)
@@ -9479,6 +9479,7 @@ INTEL DRM DRIVERS (excluding Poulsbo, Moorestown and derivative chipsets)
 M:     Jani Nikula <jani.nikula@linux.intel.com>
 M:     Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
 M:     Rodrigo Vivi <rodrigo.vivi@intel.com>
+M:     Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
 L:     intel-gfx@lists.freedesktop.org
 S:     Supported
 W:     https://01.org/linuxgraphics/
index 660bb03..8ef1487 100644 (file)
@@ -154,6 +154,7 @@ gem-y += \
        gem/i915_gem_throttle.o \
        gem/i915_gem_tiling.o \
        gem/i915_gem_ttm.o \
+       gem/i915_gem_ttm_move.o \
        gem/i915_gem_ttm_pm.o \
        gem/i915_gem_userptr.o \
        gem/i915_gem_wait.o \
index f0435c6..8a24800 100644 (file)
@@ -69,10 +69,16 @@ static struct clflush *clflush_work_create(struct drm_i915_gem_object *obj)
 bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
                             unsigned int flags)
 {
+       struct drm_i915_private *i915 = to_i915(obj->base.dev);
        struct clflush *clflush;
 
        assert_object_held(obj);
 
+       if (IS_DGFX(i915)) {
+               WARN_ON_ONCE(obj->cache_dirty);
+               return false;
+       }
+
        /*
         * Stolen memory is always coherent with the GPU as it is explicitly
         * marked as wc by the system, or the system is cache-coherent.
@@ -105,16 +111,24 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
        if (clflush) {
                i915_sw_fence_await_reservation(&clflush->base.chain,
                                                obj->base.resv, NULL, true,
-                                               i915_fence_timeout(to_i915(obj->base.dev)),
+                                               i915_fence_timeout(i915),
                                                I915_FENCE_GFP);
                dma_resv_add_excl_fence(obj->base.resv, &clflush->base.dma);
                dma_fence_work_commit(&clflush->base);
+               /*
+                * We must have successfully populated the pages(since we are
+                * holding a pin on the pages as per the flush worker) to reach
+                * this point, which must mean we have already done the required
+                * flush-on-acquire, hence resetting cache_dirty here should be
+                * safe.
+                */
+               obj->cache_dirty = false;
        } else if (obj->mm.pages) {
                __do_clflush(obj);
+               obj->cache_dirty = false;
        } else {
                GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
        }
 
-       obj->cache_dirty = false;
        return true;
 }
index e8a58c9..f291cf4 100644 (file)
 
 MODULE_IMPORT_NS(DMA_BUF);
 
+#if defined(CONFIG_X86)
+#include <asm/smp.h>
+#else
+#define wbinvd_on_all_cpus() \
+       pr_warn(DRIVER_NAME ": Missing cache flush in %s\n", __func__)
+#endif
+
 I915_SELFTEST_DECLARE(static bool force_different_devices;)
 
 static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf)
@@ -248,8 +255,19 @@ static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
        if (IS_ERR(pages))
                return PTR_ERR(pages);
 
-       /* XXX: consider doing a vmap flush or something */
-       if (!HAS_LLC(i915) || i915_gem_object_can_bypass_llc(obj))
+       /*
+        * DG1 is special here since it still snoops transactions even with
+        * CACHE_NONE. This is not the case with other HAS_SNOOP platforms. We
+        * might need to revisit this as we add new discrete platforms.
+        *
+        * XXX: Consider doing a vmap flush or something, where possible.
+        * Currently we just do a heavy handed wbinvd_on_all_cpus() here since
+        * the underlying sg_table might not even point to struct pages, so we
+        * can't just call drm_clflush_sg or similar, like we do elsewhere in
+        * the driver.
+        */
+       if (i915_gem_object_can_bypass_llc(obj) ||
+           (!HAS_LLC(i915) && !IS_DG1(i915)))
                wbinvd_on_all_cpus();
 
        sg_page_sizes = i915_sg_dma_sizes(pages->sgl);
index b684a62..26532c0 100644 (file)
 
 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
 {
+       struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
+       if (IS_DGFX(i915))
+               return false;
+
        return !(obj->cache_level == I915_CACHE_NONE ||
                 obj->cache_level == I915_CACHE_WT);
 }
 
+bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
+{
+       struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
+       if (obj->cache_dirty)
+               return false;
+
+       if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
+               return true;
+
+       if (IS_DGFX(i915))
+               return false;
+
+       /* Currently in use by HW (display engine)? Keep flushed. */
+       return i915_gem_object_is_framebuffer(obj);
+}
+
 static void
 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
 {
index 4d7da07..879a8d5 100644 (file)
@@ -990,7 +990,7 @@ static int eb_validate_vmas(struct i915_execbuffer *eb)
                }
 
                if (!(ev->flags & EXEC_OBJECT_WRITE)) {
-                       err = dma_resv_reserve_shared(vma->resv, 1);
+                       err = dma_resv_reserve_shared(vma->obj->base.resv, 1);
                        if (err)
                                return err;
                }
@@ -2164,7 +2164,7 @@ static int eb_parse(struct i915_execbuffer *eb)
                goto err_trampoline;
        }
 
-       err = dma_resv_reserve_shared(shadow->resv, 1);
+       err = dma_resv_reserve_shared(shadow->obj->base.resv, 1);
        if (err)
                goto err_trampoline;
 
@@ -3114,7 +3114,7 @@ eb_requests_create(struct i915_execbuffer *eb, struct dma_fence *in_fence,
                /* Allocate a request for this batch buffer nice and early. */
                eb->requests[i] = i915_request_create(eb_find_context(eb, i));
                if (IS_ERR(eb->requests[i])) {
-                       out_fence = ERR_PTR(PTR_ERR(eb->requests[i]));
+                       out_fence = ERR_CAST(eb->requests[i]);
                        eb->requests[i] = NULL;
                        return out_fence;
                }
index a57a6b7..c5150a1 100644 (file)
@@ -145,24 +145,10 @@ static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = {
        .put_pages = i915_gem_object_put_pages_internal,
 };
 
-/**
- * i915_gem_object_create_internal: create an object with volatile pages
- * @i915: the i915 device
- * @size: the size in bytes of backing storage to allocate for the object
- *
- * Creates a new object that wraps some internal memory for private use.
- * This object is not backed by swappable storage, and as such its contents
- * are volatile and only valid whilst pinned. If the object is reaped by the
- * shrinker, its pages and data will be discarded. Equally, it is not a full
- * GEM object and so not valid for access from userspace. This makes it useful
- * for hardware interfaces like ringbuffers (which are pinned from the time
- * the request is written to the time the hardware stops accessing it), but
- * not for contexts (which need to be preserved when not active for later
- * reuse). Note that it is not cleared upon allocation.
- */
 struct drm_i915_gem_object *
-i915_gem_object_create_internal(struct drm_i915_private *i915,
-                               phys_addr_t size)
+__i915_gem_object_create_internal(struct drm_i915_private *i915,
+                                 const struct drm_i915_gem_object_ops *ops,
+                                 phys_addr_t size)
 {
        static struct lock_class_key lock_class;
        struct drm_i915_gem_object *obj;
@@ -179,7 +165,7 @@ i915_gem_object_create_internal(struct drm_i915_private *i915,
                return ERR_PTR(-ENOMEM);
 
        drm_gem_private_object_init(&i915->drm, &obj->base, size);
-       i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class, 0);
+       i915_gem_object_init(obj, ops, &lock_class, 0);
        obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE;
 
        /*
@@ -199,3 +185,25 @@ i915_gem_object_create_internal(struct drm_i915_private *i915,
 
        return obj;
 }
+
+/**
+ * i915_gem_object_create_internal: create an object with volatile pages
+ * @i915: the i915 device
+ * @size: the size in bytes of backing storage to allocate for the object
+ *
+ * Creates a new object that wraps some internal memory for private use.
+ * This object is not backed by swappable storage, and as such its contents
+ * are volatile and only valid whilst pinned. If the object is reaped by the
+ * shrinker, its pages and data will be discarded. Equally, it is not a full
+ * GEM object and so not valid for access from userspace. This makes it useful
+ * for hardware interfaces like ringbuffers (which are pinned from the time
+ * the request is written to the time the hardware stops accessing it), but
+ * not for contexts (which need to be preserved when not active for later
+ * reuse). Note that it is not cleared upon allocation.
+ */
+struct drm_i915_gem_object *
+i915_gem_object_create_internal(struct drm_i915_private *i915,
+                               phys_addr_t size)
+{
+       return __i915_gem_object_create_internal(i915, &i915_gem_object_internal_ops, size);
+}
index 1e426a4..591ee3c 100644 (file)
@@ -114,18 +114,21 @@ void __i915_gem_object_fini(struct drm_i915_gem_object *obj)
 void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
                                         unsigned int cache_level)
 {
+       struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
        obj->cache_level = cache_level;
 
        if (cache_level != I915_CACHE_NONE)
                obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ |
                                       I915_BO_CACHE_COHERENT_FOR_WRITE);
-       else if (HAS_LLC(to_i915(obj->base.dev)))
+       else if (HAS_LLC(i915))
                obj->cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ;
        else
                obj->cache_coherent = 0;
 
        obj->cache_dirty =
-               !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE);
+               !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE) &&
+               !IS_DGFX(i915);
 }
 
 bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj)
@@ -363,15 +366,6 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj)
         */
        atomic_inc(&i915->mm.free_count);
 
-       /*
-        * This serializes freeing with the shrinker. Since the free
-        * is delayed, first by RCU then by the workqueue, we want the
-        * shrinker to be able to free pages of unreferenced objects,
-        * or else we may oom whilst there are plenty of deferred
-        * freed objects.
-        */
-       i915_gem_object_make_unshrinkable(obj);
-
        /*
         * Since we require blocking on struct_mutex to unbind the freed
         * object from the GPU before releasing resources back to the
index 5920180..133963b 100644 (file)
@@ -93,7 +93,6 @@ void i915_gem_flush_free_objects(struct drm_i915_private *i915);
 
 struct sg_table *
 __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj);
-void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
 
 /**
  * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle
@@ -295,6 +294,12 @@ i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj)
        return i915_gem_object_type_has(obj, I915_GEM_OBJECT_IS_SHRINKABLE);
 }
 
+static inline bool
+i915_gem_object_has_self_managed_shrink_list(const struct drm_i915_gem_object *obj)
+{
+       return i915_gem_object_type_has(obj, I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST);
+}
+
 static inline bool
 i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj)
 {
@@ -449,7 +454,7 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 }
 
 int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
-void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
+int i915_gem_object_truncate(struct drm_i915_gem_object *obj);
 void i915_gem_object_writeback(struct drm_i915_gem_object *obj);
 
 /**
@@ -517,6 +522,7 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
 bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj);
 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
 void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj);
+bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj);
 
 int __must_check
 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
@@ -533,25 +539,15 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 
 void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj);
 void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj);
+void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj);
+void __i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj);
 void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj);
 
-static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
-{
-       if (obj->cache_dirty)
-               return false;
-
-       if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
-               return true;
-
-       /* Currently in use by HW (display engine)? Keep flushed. */
-       return i915_gem_object_is_framebuffer(obj);
-}
-
 static inline void __start_cpu_write(struct drm_i915_gem_object *obj)
 {
        obj->read_domains = I915_GEM_DOMAIN_CPU;
        obj->write_domain = I915_GEM_DOMAIN_CPU;
-       if (cpu_write_needs_clflush(obj))
+       if (i915_gem_cpu_write_needs_clflush(obj))
                obj->cache_dirty = true;
 }
 
@@ -613,6 +609,14 @@ int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj,
 bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj,
                                        enum intel_memory_type type);
 
+int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
+                        size_t size, struct intel_memory_region *mr,
+                        struct address_space *mapping,
+                        unsigned int max_segment);
+void shmem_sg_free_table(struct sg_table *st, struct address_space *mapping,
+                        bool dirty, bool backup);
+void __shmem_writeback(size_t size, struct address_space *mapping);
+
 #ifdef CONFIG_MMU_NOTIFIER
 static inline bool
 i915_gem_object_is_userptr(struct drm_i915_gem_object *obj)
index da85169..604ed5a 100644 (file)
@@ -34,9 +34,11 @@ struct i915_lut_handle {
 
 struct drm_i915_gem_object_ops {
        unsigned int flags;
-#define I915_GEM_OBJECT_IS_SHRINKABLE  BIT(1)
-#define I915_GEM_OBJECT_IS_PROXY       BIT(2)
-#define I915_GEM_OBJECT_NO_MMAP                BIT(3)
+#define I915_GEM_OBJECT_IS_SHRINKABLE                  BIT(1)
+/* Skip the shrinker management in set_pages/unset_pages */
+#define I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST       BIT(2)
+#define I915_GEM_OBJECT_IS_PROXY                       BIT(3)
+#define I915_GEM_OBJECT_NO_MMAP                                BIT(4)
 
        /* Interface between the GEM object and its backing storage.
         * get_pages() is called once prior to the use of the associated set
@@ -54,8 +56,10 @@ struct drm_i915_gem_object_ops {
        int (*get_pages)(struct drm_i915_gem_object *obj);
        void (*put_pages)(struct drm_i915_gem_object *obj,
                          struct sg_table *pages);
-       void (*truncate)(struct drm_i915_gem_object *obj);
+       int (*truncate)(struct drm_i915_gem_object *obj);
        void (*writeback)(struct drm_i915_gem_object *obj);
+       int (*shrinker_release_pages)(struct drm_i915_gem_object *obj,
+                                     bool should_writeback);
 
        int (*pread)(struct drm_i915_gem_object *obj,
                     const struct drm_i915_gem_pread *arg);
@@ -486,8 +490,36 @@ struct drm_i915_gem_object {
                 * instead go through the pin/unpin interfaces.
                 */
                atomic_t pages_pin_count;
+
+               /**
+                * @shrink_pin: Prevents the pages from being made visible to
+                * the shrinker, while the shrink_pin is non-zero. Most users
+                * should pretty much never have to care about this, outside of
+                * some special use cases.
+                *
+                * By default most objects will start out as visible to the
+                * shrinker(if I915_GEM_OBJECT_IS_SHRINKABLE) as soon as the
+                * backing pages are attached to the object, like in
+                * __i915_gem_object_set_pages(). They will then be removed the
+                * shrinker list once the pages are released.
+                *
+                * The @shrink_pin is incremented by calling
+                * i915_gem_object_make_unshrinkable(), which will also remove
+                * the object from the shrinker list, if the pin count was zero.
+                *
+                * Callers will then typically call
+                * i915_gem_object_make_shrinkable() or
+                * i915_gem_object_make_purgeable() to decrement the pin count,
+                * and make the pages visible again.
+                */
                atomic_t shrink_pin;
 
+               /**
+                * @ttm_shrinkable: True when the object is using shmem pages
+                * underneath. Protected by the object lock.
+                */
+               bool ttm_shrinkable;
+
                /**
                 * Priority list of potential placements for this object.
                 */
@@ -512,6 +544,7 @@ struct drm_i915_gem_object {
                 */
                struct list_head region_link;
 
+               struct i915_refct_sgt *rsgt;
                struct sg_table *pages;
                void *mapping;
 
@@ -547,7 +580,7 @@ struct drm_i915_gem_object {
                struct i915_gem_object_page_iter get_dma_page;
 
                /**
-                * Element within i915->mm.unbound_list or i915->mm.bound_list,
+                * Element within i915->mm.shrink_list or i915->mm.purge_list,
                 * locked by i915->mm.obj_lock.
                 */
                struct list_head link;
@@ -565,7 +598,7 @@ struct drm_i915_gem_object {
        } mm;
 
        struct {
-               struct sg_table *cached_io_st;
+               struct i915_refct_sgt *cached_io_rsgt;
                struct i915_gem_object_page_iter get_io_page;
                struct drm_i915_gem_object *backup;
                bool created:1;
index 8eb1c3a..c4f684b 100644 (file)
@@ -26,6 +26,7 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
 
        /* Make the pages coherent with the GPU (flushing any swapin). */
        if (obj->cache_dirty) {
+               WARN_ON_ONCE(IS_DGFX(i915));
                obj->write_domain = 0;
                if (i915_gem_object_has_struct_page(obj))
                        drm_clflush_sg(pages);
@@ -68,7 +69,7 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
                shrinkable = false;
        }
 
-       if (shrinkable) {
+       if (shrinkable && !i915_gem_object_has_self_managed_shrink_list(obj)) {
                struct list_head *list;
                unsigned long flags;
 
@@ -158,11 +159,13 @@ retry:
 }
 
 /* Immediately discard the backing storage */
-void i915_gem_object_truncate(struct drm_i915_gem_object *obj)
+int i915_gem_object_truncate(struct drm_i915_gem_object *obj)
 {
        drm_gem_free_mmap_offset(&obj->base);
        if (obj->ops->truncate)
-               obj->ops->truncate(obj);
+               return obj->ops->truncate(obj);
+
+       return 0;
 }
 
 /* Try to discard unwanted pages */
@@ -208,7 +211,8 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
        if (i915_gem_object_is_volatile(obj))
                obj->mm.madv = I915_MADV_WILLNEED;
 
-       i915_gem_object_make_unshrinkable(obj);
+       if (!i915_gem_object_has_self_managed_shrink_list(obj))
+               i915_gem_object_make_unshrinkable(obj);
 
        if (obj->mm.mapping) {
                unmap_object(obj, page_mask_bits(obj->mm.mapping));
index d77da59..4a88c89 100644 (file)
@@ -25,62 +25,67 @@ static void check_release_pagevec(struct pagevec *pvec)
        cond_resched();
 }
 
-static int shmem_get_pages(struct drm_i915_gem_object *obj)
+void shmem_sg_free_table(struct sg_table *st, struct address_space *mapping,
+                        bool dirty, bool backup)
 {
-       struct drm_i915_private *i915 = to_i915(obj->base.dev);
-       struct intel_memory_region *mem = obj->mm.region;
-       const unsigned long page_count = obj->base.size / PAGE_SIZE;
+       struct sgt_iter sgt_iter;
+       struct pagevec pvec;
+       struct page *page;
+
+       mapping_clear_unevictable(mapping);
+
+       pagevec_init(&pvec);
+       for_each_sgt_page(page, sgt_iter, st) {
+               if (dirty)
+                       set_page_dirty(page);
+
+               if (backup)
+                       mark_page_accessed(page);
+
+               if (!pagevec_add(&pvec, page))
+                       check_release_pagevec(&pvec);
+       }
+       if (pagevec_count(&pvec))
+               check_release_pagevec(&pvec);
+
+       sg_free_table(st);
+}
+
+int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
+                        size_t size, struct intel_memory_region *mr,
+                        struct address_space *mapping,
+                        unsigned int max_segment)
+{
+       const unsigned long page_count = size / PAGE_SIZE;
        unsigned long i;
-       struct address_space *mapping;
-       struct sg_table *st;
        struct scatterlist *sg;
-       struct sgt_iter sgt_iter;
        struct page *page;
        unsigned long last_pfn = 0;     /* suppress gcc warning */
-       unsigned int max_segment = i915_sg_segment_size();
-       unsigned int sg_page_sizes;
        gfp_t noreclaim;
        int ret;
 
-       /*
-        * Assert that the object is not currently in any GPU domain. As it
-        * wasn't in the GTT, there shouldn't be any way it could have been in
-        * a GPU cache
-        */
-       GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
-       GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
-
        /*
         * If there's no chance of allocating enough pages for the whole
         * object, bail early.
         */
-       if (obj->base.size > resource_size(&mem->region))
+       if (size > resource_size(&mr->region))
                return -ENOMEM;
 
-       st = kmalloc(sizeof(*st), GFP_KERNEL);
-       if (!st)
+       if (sg_alloc_table(st, page_count, GFP_KERNEL))
                return -ENOMEM;
 
-rebuild_st:
-       if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
-               kfree(st);
-               return -ENOMEM;
-       }
-
        /*
         * Get the list of pages out of our struct file.  They'll be pinned
         * at this point until we release them.
         *
         * Fail silently without starting the shrinker
         */
-       mapping = obj->base.filp->f_mapping;
        mapping_set_unevictable(mapping);
        noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
        noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
 
        sg = st->sgl;
        st->nents = 0;
-       sg_page_sizes = 0;
        for (i = 0; i < page_count; i++) {
                const unsigned int shrink[] = {
                        I915_SHRINK_BOUND | I915_SHRINK_UNBOUND,
@@ -135,10 +140,9 @@ rebuild_st:
                if (!i ||
                    sg->length >= max_segment ||
                    page_to_pfn(page) != last_pfn + 1) {
-                       if (i) {
-                               sg_page_sizes |= sg->length;
+                       if (i)
                                sg = sg_next(sg);
-                       }
+
                        st->nents++;
                        sg_set_page(sg, page, PAGE_SIZE, 0);
                } else {
@@ -149,14 +153,67 @@ rebuild_st:
                /* Check that the i965g/gm workaround works. */
                GEM_BUG_ON(gfp & __GFP_DMA32 && last_pfn >= 0x00100000UL);
        }
-       if (sg) { /* loop terminated early; short sg table */
-               sg_page_sizes |= sg->length;
+       if (sg) /* loop terminated early; short sg table */
                sg_mark_end(sg);
-       }
 
        /* Trim unused sg entries to avoid wasting memory. */
        i915_sg_trim(st);
 
+       return 0;
+err_sg:
+       sg_mark_end(sg);
+       if (sg != st->sgl) {
+               shmem_sg_free_table(st, mapping, false, false);
+       } else {
+               mapping_clear_unevictable(mapping);
+               sg_free_table(st);
+       }
+
+       /*
+        * shmemfs first checks if there is enough memory to allocate the page
+        * and reports ENOSPC should there be insufficient, along with the usual
+        * ENOMEM for a genuine allocation failure.
+        *
+        * We use ENOSPC in our driver to mean that we have run out of aperture
+        * space and so want to translate the error from shmemfs back to our
+        * usual understanding of ENOMEM.
+        */
+       if (ret == -ENOSPC)
+               ret = -ENOMEM;
+
+       return ret;
+}
+
+static int shmem_get_pages(struct drm_i915_gem_object *obj)
+{
+       struct drm_i915_private *i915 = to_i915(obj->base.dev);
+       struct intel_memory_region *mem = obj->mm.region;
+       struct address_space *mapping = obj->base.filp->f_mapping;
+       const unsigned long page_count = obj->base.size / PAGE_SIZE;
+       unsigned int max_segment = i915_sg_segment_size();
+       struct sg_table *st;
+       struct sgt_iter sgt_iter;
+       struct page *page;
+       int ret;
+
+       /*
+        * Assert that the object is not currently in any GPU domain. As it
+        * wasn't in the GTT, there shouldn't be any way it could have been in
+        * a GPU cache
+        */
+       GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
+       GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
+
+rebuild_st:
+       st = kmalloc(sizeof(*st), GFP_KERNEL);
+       if (!st)
+               return -ENOMEM;
+
+       ret = shmem_sg_alloc_table(i915, st, obj->base.size, mem, mapping,
+                                  max_segment);
+       if (ret)
+               goto err_st;
+
        ret = i915_gem_gtt_prepare_pages(obj, st);
        if (ret) {
                /*
@@ -168,6 +225,7 @@ rebuild_st:
                        for_each_sgt_page(page, sgt_iter, st)
                                put_page(page);
                        sg_free_table(st);
+                       kfree(st);
 
                        max_segment = PAGE_SIZE;
                        goto rebuild_st;
@@ -185,28 +243,12 @@ rebuild_st:
        if (i915_gem_object_can_bypass_llc(obj))
                obj->cache_dirty = true;
 
-       __i915_gem_object_set_pages(obj, st, sg_page_sizes);
+       __i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl));
 
        return 0;
 
-err_sg:
-       sg_mark_end(sg);
 err_pages:
-       mapping_clear_unevictable(mapping);
-       if (sg != st->sgl) {
-               struct pagevec pvec;
-
-               pagevec_init(&pvec);
-               for_each_sgt_page(page, sgt_iter, st) {
-                       if (!pagevec_add(&pvec, page))
-                               check_release_pagevec(&pvec);
-               }
-               if (pagevec_count(&pvec))
-                       check_release_pagevec(&pvec);
-       }
-       sg_free_table(st);
-       kfree(st);
-
+       shmem_sg_free_table(st, mapping, false, false);
        /*
         * shmemfs first checks if there is enough memory to allocate the page
         * and reports ENOSPC should there be insufficient, along with the usual
@@ -216,13 +258,16 @@ err_pages:
         * space and so want to translate the error from shmemfs back to our
         * usual understanding of ENOMEM.
         */
+err_st:
        if (ret == -ENOSPC)
                ret = -ENOMEM;
 
+       kfree(st);
+
        return ret;
 }
 
-static void
+static int
 shmem_truncate(struct drm_i915_gem_object *obj)
 {
        /*
@@ -234,12 +279,12 @@ shmem_truncate(struct drm_i915_gem_object *obj)
        shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
        obj->mm.madv = __I915_MADV_PURGED;
        obj->mm.pages = ERR_PTR(-EFAULT);
+
+       return 0;
 }
 
-static void
-shmem_writeback(struct drm_i915_gem_object *obj)
+void __shmem_writeback(size_t size, struct address_space *mapping)
 {
-       struct address_space *mapping;
        struct writeback_control wbc = {
                .sync_mode = WB_SYNC_NONE,
                .nr_to_write = SWAP_CLUSTER_MAX,
@@ -255,10 +300,9 @@ shmem_writeback(struct drm_i915_gem_object *obj)
         * instead of invoking writeback so they are aged and paged out
         * as normal.
         */
-       mapping = obj->base.filp->f_mapping;
 
        /* Begin writeback on each dirty page */
-       for (i = 0; i < obj->base.size >> PAGE_SHIFT; i++) {
+       for (i = 0; i < size >> PAGE_SHIFT; i++) {
                struct page *page;
 
                page = find_lock_page(mapping, i);
@@ -281,6 +325,12 @@ put:
        }
 }
 
+static void
+shmem_writeback(struct drm_i915_gem_object *obj)
+{
+       __shmem_writeback(obj->base.size, obj->base.filp->f_mapping);
+}
+
 void
 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
                                struct sg_table *pages,
@@ -313,11 +363,6 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
 
 void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_table *pages)
 {
-       struct sgt_iter sgt_iter;
-       struct pagevec pvec;
-       struct page *page;
-
-       GEM_WARN_ON(IS_DGFX(to_i915(obj->base.dev)));
        __i915_gem_object_release_shmem(obj, pages, true);
 
        i915_gem_gtt_finish_pages(obj, pages);
@@ -325,25 +370,10 @@ void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_
        if (i915_gem_object_needs_bit17_swizzle(obj))
                i915_gem_object_save_bit_17_swizzle(obj, pages);
 
-       mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
-
-       pagevec_init(&pvec);
-       for_each_sgt_page(page, sgt_iter, pages) {
-               if (obj->mm.dirty)
-                       set_page_dirty(page);
-
-               if (obj->mm.madv == I915_MADV_WILLNEED)
-                       mark_page_accessed(page);
-
-               if (!pagevec_add(&pvec, page))
-                       check_release_pagevec(&pvec);
-       }
-       if (pagevec_count(&pvec))
-               check_release_pagevec(&pvec);
-       obj->mm.dirty = false;
-
-       sg_free_table(pages);
+       shmem_sg_free_table(pages, file_inode(obj->base.filp)->i_mapping,
+                           obj->mm.dirty, obj->mm.madv == I915_MADV_WILLNEED);
        kfree(pages);
+       obj->mm.dirty = false;
 }
 
 static void
index 5ab136f..dde0a5c 100644 (file)
@@ -56,19 +56,24 @@ static bool unsafe_drop_pages(struct drm_i915_gem_object *obj,
        return false;
 }
 
-static void try_to_writeback(struct drm_i915_gem_object *obj,
-                            unsigned int flags)
+static int try_to_writeback(struct drm_i915_gem_object *obj, unsigned int flags)
 {
+       if (obj->ops->shrinker_release_pages)
+               return obj->ops->shrinker_release_pages(obj,
+                                                       flags & I915_SHRINK_WRITEBACK);
+
        switch (obj->mm.madv) {
        case I915_MADV_DONTNEED:
                i915_gem_object_truncate(obj);
-               return;
+               return 0;
        case __I915_MADV_PURGED:
-               return;
+               return 0;
        }
 
        if (flags & I915_SHRINK_WRITEBACK)
                i915_gem_object_writeback(obj);
+
+       return 0;
 }
 
 /**
@@ -222,8 +227,8 @@ i915_gem_shrink(struct i915_gem_ww_ctx *ww,
                                }
 
                                if (!__i915_gem_object_put_pages(obj)) {
-                                       try_to_writeback(obj, shrink);
-                                       count += obj->base.size >> PAGE_SHIFT;
+                                       if (!try_to_writeback(obj, shrink))
+                                               count += obj->base.size >> PAGE_SHIFT;
                                }
                                if (!ww)
                                        i915_gem_object_unlock(obj);
@@ -458,6 +463,16 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
 
 #define obj_to_i915(obj__) to_i915((obj__)->base.dev)
 
+/**
+ * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By
+ * default all object types that support shrinking(see IS_SHRINKABLE), will also
+ * make the object visible to the shrinker after allocating the system memory
+ * pages.
+ * @obj: The GEM object.
+ *
+ * This is typically used for special kernel internal objects that can't be
+ * easily processed by the shrinker, like if they are perma-pinned.
+ */
 void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj)
 {
        struct drm_i915_private *i915 = obj_to_i915(obj);
@@ -482,13 +497,12 @@ void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj)
        spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 }
 
-static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
-                                             struct list_head *head)
+static void ___i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
+                                              struct list_head *head)
 {
        struct drm_i915_private *i915 = obj_to_i915(obj);
        unsigned long flags;
 
-       GEM_BUG_ON(!i915_gem_object_has_pages(obj));
        if (!i915_gem_object_is_shrinkable(obj))
                return;
 
@@ -508,14 +522,67 @@ static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
        spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 }
 
+/**
+ * __i915_gem_object_make_shrinkable - Move the object to the tail of the
+ * shrinkable list. Objects on this list might be swapped out. Used with
+ * WILLNEED objects.
+ * @obj: The GEM object.
+ *
+ * DO NOT USE. This is intended to be called on very special objects that don't
+ * yet have mm.pages, but are guaranteed to have potentially reclaimable pages
+ * underneath.
+ */
+void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj)
+{
+       ___i915_gem_object_make_shrinkable(obj,
+                                          &obj_to_i915(obj)->mm.shrink_list);
+}
+
+/**
+ * __i915_gem_object_make_purgeable - Move the object to the tail of the
+ * purgeable list. Objects on this list might be swapped out. Used with
+ * DONTNEED objects.
+ * @obj: The GEM object.
+ *
+ * DO NOT USE. This is intended to be called on very special objects that don't
+ * yet have mm.pages, but are guaranteed to have potentially reclaimable pages
+ * underneath.
+ */
+void __i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj)
+{
+       ___i915_gem_object_make_shrinkable(obj,
+                                          &obj_to_i915(obj)->mm.purge_list);
+}
+
+/**
+ * i915_gem_object_make_shrinkable - Move the object to the tail of the
+ * shrinkable list. Objects on this list might be swapped out. Used with
+ * WILLNEED objects.
+ * @obj: The GEM object.
+ *
+ * MUST only be called on objects which have backing pages.
+ *
+ * MUST be balanced with previous call to i915_gem_object_make_unshrinkable().
+ */
 void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj)
 {
-       __i915_gem_object_make_shrinkable(obj,
-                                         &obj_to_i915(obj)->mm.shrink_list);
+       GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+       __i915_gem_object_make_shrinkable(obj);
 }
 
+/**
+ * i915_gem_object_make_purgeable - Move the object to the tail of the purgeable
+ * list. Used with DONTNEED objects. Unlike with shrinkable objects, the
+ * shrinker will attempt to discard the backing pages, instead of trying to swap
+ * them out.
+ * @obj: The GEM object.
+ *
+ * MUST only be called on objects which have backing pages.
+ *
+ * MUST be balanced with previous call to i915_gem_object_make_unshrinkable().
+ */
 void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj)
 {
-       __i915_gem_object_make_shrinkable(obj,
-                                         &obj_to_i915(obj)->mm.purge_list);
+       GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+       __i915_gem_object_make_purgeable(obj);
 }
index 74a1ffd..d08a270 100644 (file)
 #include "gem/i915_gem_object.h"
 #include "gem/i915_gem_region.h"
 #include "gem/i915_gem_ttm.h"
+#include "gem/i915_gem_ttm_move.h"
 #include "gem/i915_gem_ttm_pm.h"
 
-
-#include "gt/intel_engine_pm.h"
-#include "gt/intel_gt.h"
-#include "gt/intel_migrate.h"
-
 #define I915_TTM_PRIO_PURGE     0
 #define I915_TTM_PRIO_NO_PAGES  1
 #define I915_TTM_PRIO_HAS_PAGES 2
@@ -34,7 +30,9 @@
  * struct i915_ttm_tt - TTM page vector with additional private information
  * @ttm: The base TTM page vector.
  * @dev: The struct device used for dma mapping and unmapping.
- * @cached_st: The cached scatter-gather table.
+ * @cached_rsgt: The cached scatter-gather table.
+ * @is_shmem: Set if using shmem.
+ * @filp: The shmem file, if using shmem backend.
  *
  * Note that DMA may be going on right up to the point where the page-
  * vector is unpopulated in delayed destroy. Hence keep the
 struct i915_ttm_tt {
        struct ttm_tt ttm;
        struct device *dev;
-       struct sg_table *cached_st;
+       struct i915_refct_sgt cached_rsgt;
+
+       bool is_shmem;
+       struct file *filp;
 };
 
 static const struct ttm_place sys_placement_flags = {
@@ -103,37 +104,15 @@ static int i915_ttm_err_to_gem(int err)
        return err;
 }
 
-static bool gpu_binds_iomem(struct ttm_resource *mem)
-{
-       return mem->mem_type != TTM_PL_SYSTEM;
-}
-
-static bool cpu_maps_iomem(struct ttm_resource *mem)
-{
-       /* Once / if we support GGTT, this is also false for cached ttm_tts */
-       return mem->mem_type != TTM_PL_SYSTEM;
-}
-
-static enum i915_cache_level
-i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res,
-                    struct ttm_tt *ttm)
-{
-       return ((HAS_LLC(i915) || HAS_SNOOP(i915)) && !gpu_binds_iomem(res) &&
-               ttm->caching == ttm_cached) ? I915_CACHE_LLC :
-               I915_CACHE_NONE;
-}
-
-static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj);
-
 static enum ttm_caching
 i915_ttm_select_tt_caching(const struct drm_i915_gem_object *obj)
 {
        /*
-        * Objects only allowed in system get cached cpu-mappings.
-        * Other objects get WC mapping for now. Even if in system.
+        * Objects only allowed in system get cached cpu-mappings, or when
+        * evicting lmem-only buffers to system for swapping. Other objects get
+        * WC mapping for now. Even if in system.
         */
-       if (obj->mm.region->type == INTEL_MEMORY_SYSTEM &&
-           obj->mm.n_placements <= 1)
+       if (obj->mm.n_placements <= 1)
                return ttm_cached;
 
        return ttm_write_combined;
@@ -179,12 +158,97 @@ i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj,
        placement->busy_placement = busy;
 }
 
+static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev,
+                                     struct ttm_tt *ttm,
+                                     struct ttm_operation_ctx *ctx)
+{
+       struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
+       struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM];
+       struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+       const unsigned int max_segment = i915_sg_segment_size();
+       const size_t size = ttm->num_pages << PAGE_SHIFT;
+       struct file *filp = i915_tt->filp;
+       struct sgt_iter sgt_iter;
+       struct sg_table *st;
+       struct page *page;
+       unsigned long i;
+       int err;
+
+       if (!filp) {
+               struct address_space *mapping;
+               gfp_t mask;
+
+               filp = shmem_file_setup("i915-shmem-tt", size, VM_NORESERVE);
+               if (IS_ERR(filp))
+                       return PTR_ERR(filp);
+
+               mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
+
+               mapping = filp->f_mapping;
+               mapping_set_gfp_mask(mapping, mask);
+               GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
+
+               i915_tt->filp = filp;
+       }
+
+       st = &i915_tt->cached_rsgt.table;
+       err = shmem_sg_alloc_table(i915, st, size, mr, filp->f_mapping,
+                                  max_segment);
+       if (err)
+               return err;
+
+       err = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL,
+                             DMA_ATTR_SKIP_CPU_SYNC);
+       if (err)
+               goto err_free_st;
+
+       i = 0;
+       for_each_sgt_page(page, sgt_iter, st)
+               ttm->pages[i++] = page;
+
+       if (ttm->page_flags & TTM_TT_FLAG_SWAPPED)
+               ttm->page_flags &= ~TTM_TT_FLAG_SWAPPED;
+
+       return 0;
+
+err_free_st:
+       shmem_sg_free_table(st, filp->f_mapping, false, false);
+
+       return err;
+}
+
+static void i915_ttm_tt_shmem_unpopulate(struct ttm_tt *ttm)
+{
+       struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+       bool backup = ttm->page_flags & TTM_TT_FLAG_SWAPPED;
+       struct sg_table *st = &i915_tt->cached_rsgt.table;
+
+       shmem_sg_free_table(st, file_inode(i915_tt->filp)->i_mapping,
+                           backup, backup);
+}
+
+static void i915_ttm_tt_release(struct kref *ref)
+{
+       struct i915_ttm_tt *i915_tt =
+               container_of(ref, typeof(*i915_tt), cached_rsgt.kref);
+       struct sg_table *st = &i915_tt->cached_rsgt.table;
+
+       GEM_WARN_ON(st->sgl);
+
+       kfree(i915_tt);
+}
+
+static const struct i915_refct_sgt_ops tt_rsgt_ops = {
+       .release = i915_ttm_tt_release
+};
+
 static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
                                         uint32_t page_flags)
 {
        struct ttm_resource_manager *man =
                ttm_manager_type(bo->bdev, bo->resource->mem_type);
        struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+       enum ttm_caching caching = i915_ttm_select_tt_caching(obj);
        struct i915_ttm_tt *i915_tt;
        int ret;
 
@@ -196,38 +260,65 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
            man->use_tt)
                page_flags |= TTM_TT_FLAG_ZERO_ALLOC;
 
-       ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags,
-                         i915_ttm_select_tt_caching(obj));
-       if (ret) {
-               kfree(i915_tt);
-               return NULL;
+       if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached) {
+               page_flags |= TTM_TT_FLAG_EXTERNAL |
+                             TTM_TT_FLAG_EXTERNAL_MAPPABLE;
+               i915_tt->is_shmem = true;
        }
 
+       ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, caching);
+       if (ret)
+               goto err_free;
+
+       __i915_refct_sgt_init(&i915_tt->cached_rsgt, bo->base.size,
+                             &tt_rsgt_ops);
+
        i915_tt->dev = obj->base.dev->dev;
 
        return &i915_tt->ttm;
+
+err_free:
+       kfree(i915_tt);
+       return NULL;
+}
+
+static int i915_ttm_tt_populate(struct ttm_device *bdev,
+                               struct ttm_tt *ttm,
+                               struct ttm_operation_ctx *ctx)
+{
+       struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+
+       if (i915_tt->is_shmem)
+               return i915_ttm_tt_shmem_populate(bdev, ttm, ctx);
+
+       return ttm_pool_alloc(&bdev->pool, ttm, ctx);
 }
 
 static void i915_ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm)
 {
        struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+       struct sg_table *st = &i915_tt->cached_rsgt.table;
 
-       if (i915_tt->cached_st) {
-               dma_unmap_sgtable(i915_tt->dev, i915_tt->cached_st,
-                                 DMA_BIDIRECTIONAL, 0);
-               sg_free_table(i915_tt->cached_st);
-               kfree(i915_tt->cached_st);
-               i915_tt->cached_st = NULL;
+       if (st->sgl)
+               dma_unmap_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL, 0);
+
+       if (i915_tt->is_shmem) {
+               i915_ttm_tt_shmem_unpopulate(ttm);
+       } else {
+               sg_free_table(st);
+               ttm_pool_free(&bdev->pool, ttm);
        }
-       ttm_pool_free(&bdev->pool, ttm);
 }
 
 static void i915_ttm_tt_destroy(struct ttm_device *bdev, struct ttm_tt *ttm)
 {
        struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
 
+       if (i915_tt->filp)
+               fput(i915_tt->filp);
+
        ttm_tt_fini(ttm);
-       kfree(i915_tt);
+       i915_refct_sgt_put(&i915_tt->cached_rsgt);
 }
 
 static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo,
@@ -235,6 +326,14 @@ static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo,
 {
        struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
 
+       /*
+        * EXTERNAL objects should never be swapped out by TTM, instead we need
+        * to handle that ourselves. TTM will already skip such objects for us,
+        * but we would like to avoid grabbing locks for no good reason.
+        */
+       if (bo->ttm && bo->ttm->page_flags & TTM_TT_FLAG_EXTERNAL)
+               return -EBUSY;
+
        /* Will do for now. Our pinned objects are still on TTM's LRU lists */
        return i915_gem_object_evictable(obj);
 }
@@ -245,28 +344,19 @@ static void i915_ttm_evict_flags(struct ttm_buffer_object *bo,
        *placement = i915_sys_placement;
 }
 
-static int i915_ttm_move_notify(struct ttm_buffer_object *bo)
-{
-       struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
-       int ret;
-
-       ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE);
-       if (ret)
-               return ret;
-
-       ret = __i915_gem_object_put_pages(obj);
-       if (ret)
-               return ret;
-
-       return 0;
-}
-
-static void i915_ttm_free_cached_io_st(struct drm_i915_gem_object *obj)
+/**
+ * i915_ttm_free_cached_io_rsgt - Free object cached LMEM information
+ * @obj: The GEM object
+ * This function frees any LMEM-related information that is cached on
+ * the object. For example the radix tree for fast page lookup and the
+ * cached refcounted sg-table
+ */
+void i915_ttm_free_cached_io_rsgt(struct drm_i915_gem_object *obj)
 {
        struct radix_tree_iter iter;
        void __rcu **slot;
 
-       if (!obj->ttm.cached_io_st)
+       if (!obj->ttm.cached_io_rsgt)
                return;
 
        rcu_read_lock();
@@ -274,63 +364,65 @@ static void i915_ttm_free_cached_io_st(struct drm_i915_gem_object *obj)
                radix_tree_delete(&obj->ttm.get_io_page.radix, iter.index);
        rcu_read_unlock();
 
-       sg_free_table(obj->ttm.cached_io_st);
-       kfree(obj->ttm.cached_io_st);
-       obj->ttm.cached_io_st = NULL;
+       i915_refct_sgt_put(obj->ttm.cached_io_rsgt);
+       obj->ttm.cached_io_rsgt = NULL;
 }
 
-static void
-i915_ttm_adjust_domains_after_move(struct drm_i915_gem_object *obj)
+/**
+ * i915_ttm_purge - Clear an object of its memory
+ * @obj: The object
+ *
+ * This function is called to clear an object of it's memory when it is
+ * marked as not needed anymore.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int i915_ttm_purge(struct drm_i915_gem_object *obj)
 {
        struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+       struct i915_ttm_tt *i915_tt =
+               container_of(bo->ttm, typeof(*i915_tt), ttm);
+       struct ttm_operation_ctx ctx = {
+               .interruptible = true,
+               .no_wait_gpu = false,
+       };
+       struct ttm_placement place = {};
+       int ret;
 
-       if (cpu_maps_iomem(bo->resource) || bo->ttm->caching != ttm_cached) {
-               obj->write_domain = I915_GEM_DOMAIN_WC;
-               obj->read_domains = I915_GEM_DOMAIN_WC;
-       } else {
-               obj->write_domain = I915_GEM_DOMAIN_CPU;
-               obj->read_domains = I915_GEM_DOMAIN_CPU;
-       }
-}
+       if (obj->mm.madv == __I915_MADV_PURGED)
+               return 0;
 
-static void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj)
-{
-       struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
-       unsigned int cache_level;
-       unsigned int i;
+       ret = ttm_bo_validate(bo, &place, &ctx);
+       if (ret)
+               return ret;
 
-       /*
-        * If object was moved to an allowable region, update the object
-        * region to consider it migrated. Note that if it's currently not
-        * in an allowable region, it's evicted and we don't update the
-        * object region.
-        */
-       if (intel_region_to_ttm_type(obj->mm.region) != bo->resource->mem_type) {
-               for (i = 0; i < obj->mm.n_placements; ++i) {
-                       struct intel_memory_region *mr = obj->mm.placements[i];
-
-                       if (intel_region_to_ttm_type(mr) == bo->resource->mem_type &&
-                           mr != obj->mm.region) {
-                               i915_gem_object_release_memory_region(obj);
-                               i915_gem_object_init_memory_region(obj, mr);
-                               break;
-                       }
-               }
+       if (bo->ttm && i915_tt->filp) {
+               /*
+                * The below fput(which eventually calls shmem_truncate) might
+                * be delayed by worker, so when directly called to purge the
+                * pages(like by the shrinker) we should try to be more
+                * aggressive and release the pages immediately.
+                */
+               shmem_truncate_range(file_inode(i915_tt->filp),
+                                    0, (loff_t)-1);
+               fput(fetch_and_zero(&i915_tt->filp));
        }
 
-       obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM);
-
-       obj->mem_flags |= cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM :
-               I915_BO_FLAG_STRUCT_PAGE;
+       obj->write_domain = 0;
+       obj->read_domains = 0;
+       i915_ttm_adjust_gem_after_move(obj);
+       i915_ttm_free_cached_io_rsgt(obj);
+       obj->mm.madv = __I915_MADV_PURGED;
 
-       cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource,
-                                          bo->ttm);
-       i915_gem_object_set_cache_coherency(obj, cache_level);
+       return 0;
 }
 
-static void i915_ttm_purge(struct drm_i915_gem_object *obj)
+static int i915_ttm_shrinker_release_pages(struct drm_i915_gem_object *obj,
+                                          bool should_writeback)
 {
        struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+       struct i915_ttm_tt *i915_tt =
+               container_of(bo->ttm, typeof(*i915_tt), ttm);
        struct ttm_operation_ctx ctx = {
                .interruptible = true,
                .no_wait_gpu = false,
@@ -338,29 +430,35 @@ static void i915_ttm_purge(struct drm_i915_gem_object *obj)
        struct ttm_placement place = {};
        int ret;
 
-       if (obj->mm.madv == __I915_MADV_PURGED)
-               return;
+       if (!bo->ttm || bo->resource->mem_type != TTM_PL_SYSTEM)
+               return 0;
+
+       GEM_BUG_ON(!i915_tt->is_shmem);
 
-       /* TTM's purge interface. Note that we might be reentering. */
+       if (!i915_tt->filp)
+               return 0;
+
+       switch (obj->mm.madv) {
+       case I915_MADV_DONTNEED:
+               return i915_ttm_purge(obj);
+       case __I915_MADV_PURGED:
+               return 0;
+       }
+
+       if (bo->ttm->page_flags & TTM_TT_FLAG_SWAPPED)
+               return 0;
+
+       bo->ttm->page_flags |= TTM_TT_FLAG_SWAPPED;
        ret = ttm_bo_validate(bo, &place, &ctx);
-       if (!ret) {
-               obj->write_domain = 0;
-               obj->read_domains = 0;
-               i915_ttm_adjust_gem_after_move(obj);
-               i915_ttm_free_cached_io_st(obj);
-               obj->mm.madv = __I915_MADV_PURGED;
+       if (ret) {
+               bo->ttm->page_flags &= ~TTM_TT_FLAG_SWAPPED;
+               return ret;
        }
-}
 
-static void i915_ttm_swap_notify(struct ttm_buffer_object *bo)
-{
-       struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
-       int ret = i915_ttm_move_notify(bo);
+       if (should_writeback)
+               __shmem_writeback(obj->base.size, i915_tt->filp->f_mapping);
 
-       GEM_WARN_ON(ret);
-       GEM_WARN_ON(obj->ttm.cached_io_st);
-       if (!ret && obj->mm.madv != I915_MADV_WILLNEED)
-               i915_ttm_purge(obj);
+       return 0;
 }
 
 static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo)
@@ -369,232 +467,97 @@ static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo)
 
        if (likely(obj)) {
                __i915_gem_object_pages_fini(obj);
-               i915_ttm_free_cached_io_st(obj);
+               i915_ttm_free_cached_io_rsgt(obj);
        }
 }
 
-static struct intel_memory_region *
-i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type)
-{
-       struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
-
-       /* There's some room for optimization here... */
-       GEM_BUG_ON(ttm_mem_type != I915_PL_SYSTEM &&
-                  ttm_mem_type < I915_PL_LMEM0);
-       if (ttm_mem_type == I915_PL_SYSTEM)
-               return intel_memory_region_lookup(i915, INTEL_MEMORY_SYSTEM,
-                                                 0);
-
-       return intel_memory_region_lookup(i915, INTEL_MEMORY_LOCAL,
-                                         ttm_mem_type - I915_PL_LMEM0);
-}
-
-static struct sg_table *i915_ttm_tt_get_st(struct ttm_tt *ttm)
+static struct i915_refct_sgt *i915_ttm_tt_get_st(struct ttm_tt *ttm)
 {
        struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
        struct sg_table *st;
        int ret;
 
-       if (i915_tt->cached_st)
-               return i915_tt->cached_st;
-
-       st = kzalloc(sizeof(*st), GFP_KERNEL);
-       if (!st)
-               return ERR_PTR(-ENOMEM);
+       if (i915_tt->cached_rsgt.table.sgl)
+               return i915_refct_sgt_get(&i915_tt->cached_rsgt);
 
+       st = &i915_tt->cached_rsgt.table;
        ret = sg_alloc_table_from_pages_segment(st,
                        ttm->pages, ttm->num_pages,
                        0, (unsigned long)ttm->num_pages << PAGE_SHIFT,
                        i915_sg_segment_size(), GFP_KERNEL);
        if (ret) {
-               kfree(st);
+               st->sgl = NULL;
                return ERR_PTR(ret);
        }
 
        ret = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL, 0);
        if (ret) {
                sg_free_table(st);
-               kfree(st);
                return ERR_PTR(ret);
        }
 
-       i915_tt->cached_st = st;
-       return st;
+       return i915_refct_sgt_get(&i915_tt->cached_rsgt);
 }
 
-static struct sg_table *
+/**
+ * i915_ttm_resource_get_st - Get a refcounted sg-table pointing to the
+ * resource memory
+ * @obj: The GEM object used for sg-table caching
+ * @res: The struct ttm_resource for which an sg-table is requested.
+ *
+ * This function returns a refcounted sg-table representing the memory
+ * pointed to by @res. If @res is the object's current resource it may also
+ * cache the sg_table on the object or attempt to access an already cached
+ * sg-table. The refcounted sg-table needs to be put when no-longer in use.
+ *
+ * Return: A valid pointer to a struct i915_refct_sgt or error pointer on
+ * failure.
+ */
+struct i915_refct_sgt *
 i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,
                         struct ttm_resource *res)
 {
        struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
 
-       if (!gpu_binds_iomem(res))
+       if (!i915_ttm_gtt_binds_lmem(res))
                return i915_ttm_tt_get_st(bo->ttm);
 
        /*
         * If CPU mapping differs, we need to add the ttm_tt pages to
         * the resulting st. Might make sense for GGTT.
         */
-       GEM_WARN_ON(!cpu_maps_iomem(res));
-       return intel_region_ttm_resource_to_st(obj->mm.region, res);
-}
-
-static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
-                              bool clear,
-                              struct ttm_resource *dst_mem,
-                              struct ttm_tt *dst_ttm,
-                              struct sg_table *dst_st)
-{
-       struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915),
-                                                    bdev);
-       struct ttm_resource_manager *src_man =
-               ttm_manager_type(bo->bdev, bo->resource->mem_type);
-       struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
-       struct sg_table *src_st;
-       struct i915_request *rq;
-       struct ttm_tt *src_ttm = bo->ttm;
-       enum i915_cache_level src_level, dst_level;
-       int ret;
+       GEM_WARN_ON(!i915_ttm_cpu_maps_iomem(res));
+       if (bo->resource == res) {
+               if (!obj->ttm.cached_io_rsgt) {
+                       struct i915_refct_sgt *rsgt;
 
-       if (!i915->gt.migrate.context || intel_gt_is_wedged(&i915->gt))
-               return -EINVAL;
+                       rsgt = intel_region_ttm_resource_to_rsgt(obj->mm.region,
+                                                                res);
+                       if (IS_ERR(rsgt))
+                               return rsgt;
 
-       dst_level = i915_ttm_cache_level(i915, dst_mem, dst_ttm);
-       if (clear) {
-               if (bo->type == ttm_bo_type_kernel)
-                       return -EINVAL;
-
-               intel_engine_pm_get(i915->gt.migrate.context->engine);
-               ret = intel_context_migrate_clear(i915->gt.migrate.context, NULL,
-                                                 dst_st->sgl, dst_level,
-                                                 gpu_binds_iomem(dst_mem),
-                                                 0, &rq);
-
-               if (!ret && rq) {
-                       i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
-                       i915_request_put(rq);
-               }
-               intel_engine_pm_put(i915->gt.migrate.context->engine);
-       } else {
-               src_st = src_man->use_tt ? i915_ttm_tt_get_st(src_ttm) :
-                       obj->ttm.cached_io_st;
-
-               src_level = i915_ttm_cache_level(i915, bo->resource, src_ttm);
-               intel_engine_pm_get(i915->gt.migrate.context->engine);
-               ret = intel_context_migrate_copy(i915->gt.migrate.context,
-                                                NULL, src_st->sgl, src_level,
-                                                gpu_binds_iomem(bo->resource),
-                                                dst_st->sgl, dst_level,
-                                                gpu_binds_iomem(dst_mem),
-                                                &rq);
-               if (!ret && rq) {
-                       i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
-                       i915_request_put(rq);
+                       obj->ttm.cached_io_rsgt = rsgt;
                }
-               intel_engine_pm_put(i915->gt.migrate.context->engine);
+               return i915_refct_sgt_get(obj->ttm.cached_io_rsgt);
        }
 
-       return ret;
-}
-
-static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
-                           struct ttm_resource *dst_mem,
-                           struct ttm_tt *dst_ttm,
-                           struct sg_table *dst_st,
-                           bool allow_accel)
-{
-       int ret = -EINVAL;
-
-       if (allow_accel)
-               ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm, dst_st);
-       if (ret) {
-               struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
-               struct intel_memory_region *dst_reg, *src_reg;
-               union {
-                       struct ttm_kmap_iter_tt tt;
-                       struct ttm_kmap_iter_iomap io;
-               } _dst_iter, _src_iter;
-               struct ttm_kmap_iter *dst_iter, *src_iter;
-
-               dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type);
-               src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type);
-               GEM_BUG_ON(!dst_reg || !src_reg);
-
-               dst_iter = !cpu_maps_iomem(dst_mem) ?
-                       ttm_kmap_iter_tt_init(&_dst_iter.tt, dst_ttm) :
-                       ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap,
-                                                dst_st, dst_reg->region.start);
-
-               src_iter = !cpu_maps_iomem(bo->resource) ?
-                       ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) :
-                       ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap,
-                                                obj->ttm.cached_io_st,
-                                                src_reg->region.start);
-
-               ttm_move_memcpy(clear, dst_mem->num_pages, dst_iter, src_iter);
-       }
+       return intel_region_ttm_resource_to_rsgt(obj->mm.region, res);
 }
 
-static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
-                        struct ttm_operation_ctx *ctx,
-                        struct ttm_resource *dst_mem,
-                        struct ttm_place *hop)
+static void i915_ttm_swap_notify(struct ttm_buffer_object *bo)
 {
        struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
-       struct ttm_resource_manager *dst_man =
-               ttm_manager_type(bo->bdev, dst_mem->mem_type);
-       struct ttm_tt *ttm = bo->ttm;
-       struct sg_table *dst_st;
-       bool clear;
-       int ret;
-
-       /* Sync for now. We could do the actual copy async. */
-       ret = ttm_bo_wait_ctx(bo, ctx);
-       if (ret)
-               return ret;
-
-       ret = i915_ttm_move_notify(bo);
-       if (ret)
-               return ret;
+       int ret = i915_ttm_move_notify(bo);
 
-       if (obj->mm.madv != I915_MADV_WILLNEED) {
+       GEM_WARN_ON(ret);
+       GEM_WARN_ON(obj->ttm.cached_io_rsgt);
+       if (!ret && obj->mm.madv != I915_MADV_WILLNEED)
                i915_ttm_purge(obj);
-               ttm_resource_free(bo, &dst_mem);
-               return 0;
-       }
-
-       /* Populate ttm with pages if needed. Typically system memory. */
-       if (ttm && (dst_man->use_tt || (ttm->page_flags & TTM_TT_FLAG_SWAPPED))) {
-               ret = ttm_tt_populate(bo->bdev, ttm, ctx);
-               if (ret)
-                       return ret;
-       }
-
-       dst_st = i915_ttm_resource_get_st(obj, dst_mem);
-       if (IS_ERR(dst_st))
-               return PTR_ERR(dst_st);
-
-       clear = !cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm));
-       if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC)))
-               __i915_ttm_move(bo, clear, dst_mem, bo->ttm, dst_st, true);
-
-       ttm_bo_move_sync_cleanup(bo, dst_mem);
-       i915_ttm_adjust_domains_after_move(obj);
-       i915_ttm_free_cached_io_st(obj);
-
-       if (gpu_binds_iomem(dst_mem) || cpu_maps_iomem(dst_mem)) {
-               obj->ttm.cached_io_st = dst_st;
-               obj->ttm.get_io_page.sg_pos = dst_st->sgl;
-               obj->ttm.get_io_page.sg_idx = 0;
-       }
-
-       i915_ttm_adjust_gem_after_move(obj);
-       return 0;
 }
 
 static int i915_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem)
 {
-       if (!cpu_maps_iomem(mem))
+       if (!i915_ttm_cpu_maps_iomem(mem))
                return 0;
 
        mem->bus.caching = ttm_write_combined;
@@ -620,6 +583,7 @@ static unsigned long i915_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
 
 static struct ttm_device_funcs i915_ttm_bo_driver = {
        .ttm_tt_create = i915_ttm_tt_create,
+       .ttm_tt_populate = i915_ttm_tt_populate,
        .ttm_tt_unpopulate = i915_ttm_tt_unpopulate,
        .ttm_tt_destroy = i915_ttm_tt_destroy,
        .eviction_valuable = i915_ttm_eviction_valuable,
@@ -649,7 +613,6 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj,
                .interruptible = true,
                .no_wait_gpu = false,
        };
-       struct sg_table *st;
        int real_num_busy;
        int ret;
 
@@ -676,7 +639,6 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj,
                        return i915_ttm_err_to_gem(ret);
        }
 
-       i915_ttm_adjust_lru(obj);
        if (bo->ttm && !ttm_tt_is_populated(bo->ttm)) {
                ret = ttm_tt_populate(bo->bdev, bo->ttm, &ctx);
                if (ret)
@@ -687,14 +649,19 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj,
        }
 
        if (!i915_gem_object_has_pages(obj)) {
-               /* Object either has a page vector or is an iomem object */
-               st = bo->ttm ? i915_ttm_tt_get_st(bo->ttm) : obj->ttm.cached_io_st;
-               if (IS_ERR(st))
-                       return PTR_ERR(st);
+               struct i915_refct_sgt *rsgt =
+                       i915_ttm_resource_get_st(obj, bo->resource);
 
-               __i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl));
+               if (IS_ERR(rsgt))
+                       return PTR_ERR(rsgt);
+
+               GEM_BUG_ON(obj->mm.rsgt);
+               obj->mm.rsgt = rsgt;
+               __i915_gem_object_set_pages(obj, &rsgt->table,
+                                           i915_sg_dma_sizes(rsgt->table.sgl));
        }
 
+       i915_ttm_adjust_lru(obj);
        return ret;
 }
 
@@ -766,12 +733,21 @@ static void i915_ttm_put_pages(struct drm_i915_gem_object *obj,
         * and shrinkers will move it out if needed.
         */
 
-       i915_ttm_adjust_lru(obj);
+       if (obj->mm.rsgt)
+               i915_refct_sgt_put(fetch_and_zero(&obj->mm.rsgt));
 }
 
-static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
+/**
+ * i915_ttm_adjust_lru - Adjust an object's position on relevant LRU lists.
+ * @obj: The object
+ */
+void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
 {
        struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+       struct i915_ttm_tt *i915_tt =
+               container_of(bo->ttm, typeof(*i915_tt), ttm);
+       bool shrinkable =
+               bo->ttm && i915_tt->filp && ttm_tt_is_populated(bo->ttm);
 
        /*
         * Don't manipulate the TTM LRUs while in TTM bo destruction.
@@ -780,11 +756,54 @@ static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
        if (!kref_read(&bo->kref))
                return;
 
+       /*
+        * We skip managing the shrinker LRU in set_pages() and just manage
+        * everything here. This does at least solve the issue with having
+        * temporary shmem mappings(like with evicted lmem) not being visible to
+        * the shrinker. Only our shmem objects are shrinkable, everything else
+        * we keep as unshrinkable.
+        *
+        * To make sure everything plays nice we keep an extra shrink pin in TTM
+        * if the underlying pages are not currently shrinkable. Once we release
+        * our pin, like when the pages are moved to shmem, the pages will then
+        * be added to the shrinker LRU, assuming the caller isn't also holding
+        * a pin.
+        *
+        * TODO: consider maybe also bumping the shrinker list here when we have
+        * already unpinned it, which should give us something more like an LRU.
+        *
+        * TODO: There is a small window of opportunity for this function to
+        * get called from eviction after we've dropped the last GEM refcount,
+        * but before the TTM deleted flag is set on the object. Avoid
+        * adjusting the shrinker list in such cases, since the object is
+        * not available to the shrinker anyway due to its zero refcount.
+        * To fix this properly we should move to a TTM shrinker LRU list for
+        * these objects.
+        */
+       if (kref_get_unless_zero(&obj->base.refcount)) {
+               if (shrinkable != obj->mm.ttm_shrinkable) {
+                       if (shrinkable) {
+                               if (obj->mm.madv == I915_MADV_WILLNEED)
+                                       __i915_gem_object_make_shrinkable(obj);
+                               else
+                                       __i915_gem_object_make_purgeable(obj);
+                       } else {
+                               i915_gem_object_make_unshrinkable(obj);
+                       }
+
+                       obj->mm.ttm_shrinkable = shrinkable;
+               }
+               i915_gem_object_put(obj);
+       }
+
        /*
         * Put on the correct LRU list depending on the MADV status
         */
        spin_lock(&bo->bdev->lru_lock);
-       if (obj->mm.madv != I915_MADV_WILLNEED) {
+       if (shrinkable) {
+               /* Try to keep shmem_tt from being considered for shrinking. */
+               bo->priority = TTM_MAX_BO_PRIORITY - 1;
+       } else if (obj->mm.madv != I915_MADV_WILLNEED) {
                bo->priority = I915_TTM_PRIO_PURGE;
        } else if (!i915_gem_object_has_pages(obj)) {
                if (bo->priority < I915_TTM_PRIO_HAS_PAGES)
@@ -825,13 +844,34 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
        struct vm_area_struct *area = vmf->vma;
        struct drm_i915_gem_object *obj =
                i915_ttm_to_gem(area->vm_private_data);
+       struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+       struct drm_device *dev = bo->base.dev;
+       vm_fault_t ret;
+       int idx;
 
        /* Sanity check that we allow writing into this object */
        if (unlikely(i915_gem_object_is_readonly(obj) &&
                     area->vm_flags & VM_WRITE))
                return VM_FAULT_SIGBUS;
 
-       return ttm_bo_vm_fault(vmf);
+       ret = ttm_bo_vm_reserve(bo, vmf);
+       if (ret)
+               return ret;
+
+       if (drm_dev_enter(dev, &idx)) {
+               ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
+                                              TTM_BO_VM_NUM_PREFAULT, 1);
+               drm_dev_exit(idx);
+       } else {
+               ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
+       }
+       if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
+               return ret;
+
+       i915_ttm_adjust_lru(obj);
+
+       dma_resv_unlock(bo->base.resv);
+       return ret;
 }
 
 static int
@@ -882,13 +922,18 @@ static u64 i915_ttm_mmap_offset(struct drm_i915_gem_object *obj)
 
 static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = {
        .name = "i915_gem_object_ttm",
+       .flags = I915_GEM_OBJECT_IS_SHRINKABLE |
+                I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST,
 
        .get_pages = i915_ttm_get_pages,
        .put_pages = i915_ttm_put_pages,
        .truncate = i915_ttm_purge,
+       .shrinker_release_pages = i915_ttm_shrinker_release_pages,
+
        .adjust_lru = i915_ttm_adjust_lru,
        .delayed_free = i915_ttm_delayed_free,
        .migrate = i915_ttm_migrate,
+
        .mmap_offset = i915_ttm_mmap_offset,
        .mmap_ops = &vm_ops_ttm,
 };
@@ -901,6 +946,18 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo)
        mutex_destroy(&obj->ttm.get_io_page.lock);
 
        if (obj->ttm.created) {
+               /*
+                * We freely manage the shrinker LRU outide of the mm.pages life
+                * cycle. As a result when destroying the object we should be
+                * extra paranoid and ensure we remove it from the LRU, before
+                * we free the object.
+                *
+                * Touching the ttm_shrinkable outside of the object lock here
+                * should be safe now that the last GEM object ref was dropped.
+                */
+               if (obj->mm.ttm_shrinkable)
+                       i915_gem_object_make_unshrinkable(obj);
+
                i915_ttm_backup_free(obj);
 
                /* This releases all gem object bindings to the backend. */
@@ -943,7 +1000,6 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem,
        obj->mm.region = intel_memory_region_get(mem);
        INIT_LIST_HEAD(&obj->mm.region_link);
 
-       i915_gem_object_make_unshrinkable(obj);
        INIT_RADIX_TREE(&obj->ttm.get_io_page.radix, GFP_KERNEL | __GFP_NOWARN);
        mutex_init(&obj->ttm.get_io_page.lock);
        bo_type = (obj->flags & I915_BO_ALLOC_USER) ? ttm_bo_type_device :
@@ -954,6 +1010,14 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem,
        /* Forcing the page size is kernel internal only */
        GEM_BUG_ON(page_size && obj->mm.n_placements);
 
+       /*
+        * Keep an extra shrink pin to prevent the object from being made
+        * shrinkable too early. If the ttm_tt is ever allocated in shmem, we
+        * drop the pin. The TTM backend manages the shrinker LRU itself,
+        * outside of the normal mm.pages life cycle.
+        */
+       i915_gem_object_make_unshrinkable(obj);
+
        /*
         * If this function fails, it will call the destructor, but
         * our caller still owns the object. So no freeing in the
@@ -1023,7 +1087,7 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
        struct ttm_operation_ctx ctx = {
                .interruptible = intr,
        };
-       struct sg_table *dst_st;
+       struct i915_refct_sgt *dst_rsgt;
        int ret;
 
        assert_object_held(dst);
@@ -1038,11 +1102,11 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
        if (ret)
                return ret;
 
-       dst_st = gpu_binds_iomem(dst_bo->resource) ?
-               dst->ttm.cached_io_st : i915_ttm_tt_get_st(dst_bo->ttm);
-
+       dst_rsgt = i915_ttm_resource_get_st(dst, dst_bo->resource);
        __i915_ttm_move(src_bo, false, dst_bo->resource, dst_bo->ttm,
-                       dst_st, allow_accel);
+                       dst_rsgt, allow_accel);
+
+       i915_refct_sgt_put(dst_rsgt);
 
        return 0;
 }
index 0b7291d..074a7c0 100644 (file)
@@ -5,6 +5,8 @@
 #ifndef _I915_GEM_TTM_H_
 #define _I915_GEM_TTM_H_
 
+#include <drm/ttm/ttm_placement.h>
+
 #include "gem/i915_gem_object_types.h"
 
 /**
@@ -60,4 +62,37 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
 
 struct ttm_placement *i915_ttm_sys_placement(void);
 
+void i915_ttm_free_cached_io_rsgt(struct drm_i915_gem_object *obj);
+
+struct i915_refct_sgt *
+i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,
+                        struct ttm_resource *res);
+
+void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj);
+
+int i915_ttm_purge(struct drm_i915_gem_object *obj);
+
+/**
+ * i915_ttm_gtt_binds_lmem - Should the memory be viewed as LMEM by the GTT?
+ * @mem: struct ttm_resource representing the memory.
+ *
+ * Return: true if memory should be viewed as LMEM for GTT binding purposes,
+ * false otherwise.
+ */
+static inline bool i915_ttm_gtt_binds_lmem(struct ttm_resource *mem)
+{
+       return mem->mem_type != I915_PL_SYSTEM;
+}
+
+/**
+ * i915_ttm_cpu_maps_iomem - Should the memory be viewed as IOMEM by the CPU?
+ * @mem: struct ttm_resource representing the memory.
+ *
+ * Return: true if memory should be viewed as IOMEM for CPU mapping purposes.
+ */
+static inline bool i915_ttm_cpu_maps_iomem(struct ttm_resource *mem)
+{
+       /* Once / if we support GGTT, this is also false for cached ttm_tts */
+       return mem->mem_type != I915_PL_SYSTEM;
+}
 #endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
new file mode 100644 (file)
index 0000000..ef22d4e
--- /dev/null
@@ -0,0 +1,523 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright Â© 2021 Intel Corporation
+ */
+
+#include <drm/ttm/ttm_bo_driver.h>
+
+#include "i915_drv.h"
+#include "intel_memory_region.h"
+#include "intel_region_ttm.h"
+
+#include "gem/i915_gem_object.h"
+#include "gem/i915_gem_region.h"
+#include "gem/i915_gem_ttm.h"
+#include "gem/i915_gem_ttm_move.h"
+
+#include "gt/intel_engine_pm.h"
+#include "gt/intel_gt.h"
+#include "gt/intel_migrate.h"
+
+/**
+ * DOC: Selftest failure modes for failsafe migration:
+ *
+ * For fail_gpu_migration, the gpu blit scheduled is always a clear blit
+ * rather than a copy blit, and then we force the failure paths as if
+ * the blit fence returned an error.
+ *
+ * For fail_work_allocation we fail the kmalloc of the async worker, we
+ * sync the gpu blit. If it then fails, or fail_gpu_migration is set to
+ * true, then a memcpy operation is performed sync.
+ */
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+static bool fail_gpu_migration;
+static bool fail_work_allocation;
+
+void i915_ttm_migrate_set_failure_modes(bool gpu_migration,
+                                       bool work_allocation)
+{
+       fail_gpu_migration = gpu_migration;
+       fail_work_allocation = work_allocation;
+}
+#endif
+
+static enum i915_cache_level
+i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res,
+                    struct ttm_tt *ttm)
+{
+       return ((HAS_LLC(i915) || HAS_SNOOP(i915)) &&
+               !i915_ttm_gtt_binds_lmem(res) &&
+               ttm->caching == ttm_cached) ? I915_CACHE_LLC :
+               I915_CACHE_NONE;
+}
+
+static struct intel_memory_region *
+i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type)
+{
+       struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
+
+       /* There's some room for optimization here... */
+       GEM_BUG_ON(ttm_mem_type != I915_PL_SYSTEM &&
+                  ttm_mem_type < I915_PL_LMEM0);
+       if (ttm_mem_type == I915_PL_SYSTEM)
+               return intel_memory_region_lookup(i915, INTEL_MEMORY_SYSTEM,
+                                                 0);
+
+       return intel_memory_region_lookup(i915, INTEL_MEMORY_LOCAL,
+                                         ttm_mem_type - I915_PL_LMEM0);
+}
+
+/**
+ * i915_ttm_adjust_domains_after_move - Adjust the GEM domains after a
+ * TTM move
+ * @obj: The gem object
+ */
+void i915_ttm_adjust_domains_after_move(struct drm_i915_gem_object *obj)
+{
+       struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+
+       if (i915_ttm_cpu_maps_iomem(bo->resource) || bo->ttm->caching != ttm_cached) {
+               obj->write_domain = I915_GEM_DOMAIN_WC;
+               obj->read_domains = I915_GEM_DOMAIN_WC;
+       } else {
+               obj->write_domain = I915_GEM_DOMAIN_CPU;
+               obj->read_domains = I915_GEM_DOMAIN_CPU;
+       }
+}
+
+/**
+ * i915_ttm_adjust_gem_after_move - Adjust the GEM state after a TTM move
+ * @obj: The gem object
+ *
+ * Adjusts the GEM object's region, mem_flags and cache coherency after a
+ * TTM move.
+ */
+void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj)
+{
+       struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+       unsigned int cache_level;
+       unsigned int i;
+
+       /*
+        * If object was moved to an allowable region, update the object
+        * region to consider it migrated. Note that if it's currently not
+        * in an allowable region, it's evicted and we don't update the
+        * object region.
+        */
+       if (intel_region_to_ttm_type(obj->mm.region) != bo->resource->mem_type) {
+               for (i = 0; i < obj->mm.n_placements; ++i) {
+                       struct intel_memory_region *mr = obj->mm.placements[i];
+
+                       if (intel_region_to_ttm_type(mr) == bo->resource->mem_type &&
+                           mr != obj->mm.region) {
+                               i915_gem_object_release_memory_region(obj);
+                               i915_gem_object_init_memory_region(obj, mr);
+                               break;
+                       }
+               }
+       }
+
+       obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM);
+
+       obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM :
+               I915_BO_FLAG_STRUCT_PAGE;
+
+       cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource,
+                                          bo->ttm);
+       i915_gem_object_set_cache_coherency(obj, cache_level);
+}
+
+/**
+ * i915_ttm_move_notify - Prepare an object for move
+ * @bo: The ttm buffer object.
+ *
+ * This function prepares an object for move by removing all GPU bindings,
+ * removing all CPU mapings and finally releasing the pages sg-table.
+ *
+ * Return: 0 if successful, negative error code on error.
+ */
+int i915_ttm_move_notify(struct ttm_buffer_object *bo)
+{
+       struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+       int ret;
+
+       ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE);
+       if (ret)
+               return ret;
+
+       ret = __i915_gem_object_put_pages(obj);
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
+static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo,
+                                            bool clear,
+                                            struct ttm_resource *dst_mem,
+                                            struct ttm_tt *dst_ttm,
+                                            struct sg_table *dst_st)
+{
+       struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915),
+                                                    bdev);
+       struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+       struct i915_request *rq;
+       struct ttm_tt *src_ttm = bo->ttm;
+       enum i915_cache_level src_level, dst_level;
+       int ret;
+
+       if (!i915->gt.migrate.context || intel_gt_is_wedged(&i915->gt))
+               return ERR_PTR(-EINVAL);
+
+       /* With fail_gpu_migration, we always perform a GPU clear. */
+       if (I915_SELFTEST_ONLY(fail_gpu_migration))
+               clear = true;
+
+       dst_level = i915_ttm_cache_level(i915, dst_mem, dst_ttm);
+       if (clear) {
+               if (bo->type == ttm_bo_type_kernel &&
+                   !I915_SELFTEST_ONLY(fail_gpu_migration))
+                       return ERR_PTR(-EINVAL);
+
+               intel_engine_pm_get(i915->gt.migrate.context->engine);
+               ret = intel_context_migrate_clear(i915->gt.migrate.context, NULL,
+                                                 dst_st->sgl, dst_level,
+                                                 i915_ttm_gtt_binds_lmem(dst_mem),
+                                                 0, &rq);
+       } else {
+               struct i915_refct_sgt *src_rsgt =
+                       i915_ttm_resource_get_st(obj, bo->resource);
+
+               if (IS_ERR(src_rsgt))
+                       return ERR_CAST(src_rsgt);
+
+               src_level = i915_ttm_cache_level(i915, bo->resource, src_ttm);
+               intel_engine_pm_get(i915->gt.migrate.context->engine);
+               ret = intel_context_migrate_copy(i915->gt.migrate.context,
+                                                NULL, src_rsgt->table.sgl,
+                                                src_level,
+                                                i915_ttm_gtt_binds_lmem(bo->resource),
+                                                dst_st->sgl, dst_level,
+                                                i915_ttm_gtt_binds_lmem(dst_mem),
+                                                &rq);
+
+               i915_refct_sgt_put(src_rsgt);
+       }
+
+       intel_engine_pm_put(i915->gt.migrate.context->engine);
+
+       if (ret && rq) {
+               i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
+               i915_request_put(rq);
+       }
+
+       return ret ? ERR_PTR(ret) : &rq->fence;
+}
+
+/**
+ * struct i915_ttm_memcpy_arg - argument for the bo memcpy functionality.
+ * @_dst_iter: Storage space for the destination kmap iterator.
+ * @_src_iter: Storage space for the source kmap iterator.
+ * @dst_iter: Pointer to the destination kmap iterator.
+ * @src_iter: Pointer to the source kmap iterator.
+ * @clear: Whether to clear instead of copy.
+ * @src_rsgt: Refcounted scatter-gather list of source memory.
+ * @dst_rsgt: Refcounted scatter-gather list of destination memory.
+ */
+struct i915_ttm_memcpy_arg {
+       union {
+               struct ttm_kmap_iter_tt tt;
+               struct ttm_kmap_iter_iomap io;
+       } _dst_iter,
+       _src_iter;
+       struct ttm_kmap_iter *dst_iter;
+       struct ttm_kmap_iter *src_iter;
+       unsigned long num_pages;
+       bool clear;
+       struct i915_refct_sgt *src_rsgt;
+       struct i915_refct_sgt *dst_rsgt;
+};
+
+/**
+ * struct i915_ttm_memcpy_work - Async memcpy worker under a dma-fence.
+ * @fence: The dma-fence.
+ * @work: The work struct use for the memcpy work.
+ * @lock: The fence lock. Not used to protect anything else ATM.
+ * @irq_work: Low latency worker to signal the fence since it can't be done
+ * from the callback for lockdep reasons.
+ * @cb: Callback for the accelerated migration fence.
+ * @arg: The argument for the memcpy functionality.
+ */
+struct i915_ttm_memcpy_work {
+       struct dma_fence fence;
+       struct work_struct work;
+       /* The fence lock */
+       spinlock_t lock;
+       struct irq_work irq_work;
+       struct dma_fence_cb cb;
+       struct i915_ttm_memcpy_arg arg;
+};
+
+static void i915_ttm_move_memcpy(struct i915_ttm_memcpy_arg *arg)
+{
+       ttm_move_memcpy(arg->clear, arg->num_pages,
+                       arg->dst_iter, arg->src_iter);
+}
+
+static void i915_ttm_memcpy_init(struct i915_ttm_memcpy_arg *arg,
+                                struct ttm_buffer_object *bo, bool clear,
+                                struct ttm_resource *dst_mem,
+                                struct ttm_tt *dst_ttm,
+                                struct i915_refct_sgt *dst_rsgt)
+{
+       struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+       struct intel_memory_region *dst_reg, *src_reg;
+
+       dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type);
+       src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type);
+       GEM_BUG_ON(!dst_reg || !src_reg);
+
+       arg->dst_iter = !i915_ttm_cpu_maps_iomem(dst_mem) ?
+               ttm_kmap_iter_tt_init(&arg->_dst_iter.tt, dst_ttm) :
+               ttm_kmap_iter_iomap_init(&arg->_dst_iter.io, &dst_reg->iomap,
+                                        &dst_rsgt->table, dst_reg->region.start);
+
+       arg->src_iter = !i915_ttm_cpu_maps_iomem(bo->resource) ?
+               ttm_kmap_iter_tt_init(&arg->_src_iter.tt, bo->ttm) :
+               ttm_kmap_iter_iomap_init(&arg->_src_iter.io, &src_reg->iomap,
+                                        &obj->ttm.cached_io_rsgt->table,
+                                        src_reg->region.start);
+       arg->clear = clear;
+       arg->num_pages = bo->base.size >> PAGE_SHIFT;
+
+       arg->dst_rsgt = i915_refct_sgt_get(dst_rsgt);
+       arg->src_rsgt = clear ? NULL :
+               i915_ttm_resource_get_st(obj, bo->resource);
+}
+
+static void i915_ttm_memcpy_release(struct i915_ttm_memcpy_arg *arg)
+{
+       i915_refct_sgt_put(arg->src_rsgt);
+       i915_refct_sgt_put(arg->dst_rsgt);
+}
+
+static void __memcpy_work(struct work_struct *work)
+{
+       struct i915_ttm_memcpy_work *copy_work =
+               container_of(work, typeof(*copy_work), work);
+       struct i915_ttm_memcpy_arg *arg = &copy_work->arg;
+       bool cookie = dma_fence_begin_signalling();
+
+       i915_ttm_move_memcpy(arg);
+       dma_fence_end_signalling(cookie);
+
+       dma_fence_signal(&copy_work->fence);
+
+       i915_ttm_memcpy_release(arg);
+       dma_fence_put(&copy_work->fence);
+}
+
+static void __memcpy_irq_work(struct irq_work *irq_work)
+{
+       struct i915_ttm_memcpy_work *copy_work =
+               container_of(irq_work, typeof(*copy_work), irq_work);
+       struct i915_ttm_memcpy_arg *arg = &copy_work->arg;
+
+       dma_fence_signal(&copy_work->fence);
+       i915_ttm_memcpy_release(arg);
+       dma_fence_put(&copy_work->fence);
+}
+
+static void __memcpy_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+       struct i915_ttm_memcpy_work *copy_work =
+               container_of(cb, typeof(*copy_work), cb);
+
+       if (unlikely(fence->error || I915_SELFTEST_ONLY(fail_gpu_migration))) {
+               INIT_WORK(&copy_work->work, __memcpy_work);
+               queue_work(system_unbound_wq, &copy_work->work);
+       } else {
+               init_irq_work(&copy_work->irq_work, __memcpy_irq_work);
+               irq_work_queue(&copy_work->irq_work);
+       }
+}
+
+static const char *get_driver_name(struct dma_fence *fence)
+{
+       return "i915_ttm_memcpy_work";
+}
+
+static const char *get_timeline_name(struct dma_fence *fence)
+{
+       return "unbound";
+}
+
+static const struct dma_fence_ops dma_fence_memcpy_ops = {
+       .get_driver_name = get_driver_name,
+       .get_timeline_name = get_timeline_name,
+};
+
+static struct dma_fence *
+i915_ttm_memcpy_work_arm(struct i915_ttm_memcpy_work *work,
+                        struct dma_fence *dep)
+{
+       int ret;
+
+       spin_lock_init(&work->lock);
+       dma_fence_init(&work->fence, &dma_fence_memcpy_ops, &work->lock, 0, 0);
+       dma_fence_get(&work->fence);
+       ret = dma_fence_add_callback(dep, &work->cb, __memcpy_cb);
+       if (ret) {
+               if (ret != -ENOENT)
+                       dma_fence_wait(dep, false);
+
+               return ERR_PTR(I915_SELFTEST_ONLY(fail_gpu_migration) ? -EINVAL :
+                              dep->error);
+       }
+
+       return &work->fence;
+}
+
+/**
+ * __i915_ttm_move - helper to perform TTM moves or clears.
+ * @bo: The source buffer object.
+ * @clear: Whether this is a clear operation.
+ * @dst_mem: The destination ttm resource.
+ * @dst_ttm: The destination ttm page vector.
+ * @dst_rsgt: The destination refcounted sg-list.
+ * @allow_accel: Whether to allow acceleration.
+ */
+void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
+                    struct ttm_resource *dst_mem, struct ttm_tt *dst_ttm,
+                    struct i915_refct_sgt *dst_rsgt, bool allow_accel)
+{
+       struct i915_ttm_memcpy_work *copy_work = NULL;
+       struct i915_ttm_memcpy_arg _arg, *arg = &_arg;
+       struct dma_fence *fence = ERR_PTR(-EINVAL);
+
+       if (allow_accel) {
+               fence = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm,
+                                           &dst_rsgt->table);
+
+               /*
+                * We only need to intercept the error when moving to lmem.
+                * When moving to system, TTM or shmem will provide us with
+                * cleared pages.
+                */
+               if (!IS_ERR(fence) && !i915_ttm_gtt_binds_lmem(dst_mem) &&
+                   !I915_SELFTEST_ONLY(fail_gpu_migration ||
+                                       fail_work_allocation))
+                       goto out;
+       }
+
+       /* If we've scheduled gpu migration. Try to arm error intercept. */
+       if (!IS_ERR(fence)) {
+               struct dma_fence *dep = fence;
+
+               if (!I915_SELFTEST_ONLY(fail_work_allocation))
+                       copy_work = kzalloc(sizeof(*copy_work), GFP_KERNEL);
+
+               if (copy_work) {
+                       arg = &copy_work->arg;
+                       i915_ttm_memcpy_init(arg, bo, clear, dst_mem, dst_ttm,
+                                            dst_rsgt);
+                       fence = i915_ttm_memcpy_work_arm(copy_work, dep);
+               } else {
+                       dma_fence_wait(dep, false);
+                       fence = ERR_PTR(I915_SELFTEST_ONLY(fail_gpu_migration) ?
+                                       -EINVAL : fence->error);
+               }
+               dma_fence_put(dep);
+
+               if (!IS_ERR(fence))
+                       goto out;
+       }
+
+       /* Error intercept failed or no accelerated migration to start with */
+       if (!copy_work)
+               i915_ttm_memcpy_init(arg, bo, clear, dst_mem, dst_ttm,
+                                    dst_rsgt);
+       i915_ttm_move_memcpy(arg);
+       i915_ttm_memcpy_release(arg);
+       kfree(copy_work);
+
+       return;
+out:
+       /* Sync here for now, forward the fence to caller when fully async. */
+       if (fence) {
+               dma_fence_wait(fence, false);
+               dma_fence_put(fence);
+       } else if (copy_work) {
+               i915_ttm_memcpy_release(arg);
+               kfree(copy_work);
+       }
+}
+
+/**
+ * i915_ttm_move - The TTM move callback used by i915.
+ * @bo: The buffer object.
+ * @evict: Whether this is an eviction.
+ * @dst_mem: The destination ttm resource.
+ * @hop: If we need multihop, what temporary memory type to move to.
+ *
+ * Return: 0 if successful, negative error code otherwise.
+ */
+int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
+                 struct ttm_operation_ctx *ctx,
+                 struct ttm_resource *dst_mem,
+                 struct ttm_place *hop)
+{
+       struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+       struct ttm_resource_manager *dst_man =
+               ttm_manager_type(bo->bdev, dst_mem->mem_type);
+       struct ttm_tt *ttm = bo->ttm;
+       struct i915_refct_sgt *dst_rsgt;
+       bool clear;
+       int ret;
+
+       /* Sync for now. We could do the actual copy async. */
+       ret = ttm_bo_wait_ctx(bo, ctx);
+       if (ret)
+               return ret;
+
+       ret = i915_ttm_move_notify(bo);
+       if (ret)
+               return ret;
+
+       if (obj->mm.madv != I915_MADV_WILLNEED) {
+               i915_ttm_purge(obj);
+               ttm_resource_free(bo, &dst_mem);
+               return 0;
+       }
+
+       /* Populate ttm with pages if needed. Typically system memory. */
+       if (ttm && (dst_man->use_tt || (ttm->page_flags & TTM_TT_FLAG_SWAPPED))) {
+               ret = ttm_tt_populate(bo->bdev, ttm, ctx);
+               if (ret)
+                       return ret;
+       }
+
+       dst_rsgt = i915_ttm_resource_get_st(obj, dst_mem);
+       if (IS_ERR(dst_rsgt))
+               return PTR_ERR(dst_rsgt);
+
+       clear = !i915_ttm_cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm));
+       if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC)))
+               __i915_ttm_move(bo, clear, dst_mem, bo->ttm, dst_rsgt, true);
+
+       ttm_bo_move_sync_cleanup(bo, dst_mem);
+       i915_ttm_adjust_domains_after_move(obj);
+       i915_ttm_free_cached_io_rsgt(obj);
+
+       if (i915_ttm_gtt_binds_lmem(dst_mem) || i915_ttm_cpu_maps_iomem(dst_mem)) {
+               obj->ttm.cached_io_rsgt = dst_rsgt;
+               obj->ttm.get_io_page.sg_pos = dst_rsgt->table.sgl;
+               obj->ttm.get_io_page.sg_idx = 0;
+       } else {
+               i915_refct_sgt_put(dst_rsgt);
+       }
+
+       i915_ttm_adjust_lru(obj);
+       i915_ttm_adjust_gem_after_move(obj);
+       return 0;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h
new file mode 100644 (file)
index 0000000..75b87e7
--- /dev/null
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright Â© 2021 Intel Corporation
+ */
+#ifndef _I915_GEM_TTM_MOVE_H_
+#define _I915_GEM_TTM_MOVE_H_
+
+#include <linux/types.h>
+
+#include "i915_selftest.h"
+
+struct ttm_buffer_object;
+struct ttm_operation_ctx;
+struct ttm_place;
+struct ttm_resource;
+struct ttm_tt;
+
+struct drm_i915_gem_object;
+struct i915_refct_sgt;
+
+int i915_ttm_move_notify(struct ttm_buffer_object *bo);
+
+I915_SELFTEST_DECLARE(void i915_ttm_migrate_set_failure_modes(bool gpu_migration,
+                                                             bool work_allocation));
+
+/* Internal I915 TTM declarations and definitions below. */
+
+void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
+                    struct ttm_resource *dst_mem,
+                    struct ttm_tt *dst_ttm,
+                    struct i915_refct_sgt *dst_rsgt,
+                    bool allow_accel);
+
+int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
+                 struct ttm_operation_ctx *ctx,
+                 struct ttm_resource *dst_mem,
+                 struct ttm_place *hop);
+
+void i915_ttm_adjust_domains_after_move(struct drm_i915_gem_object *obj);
+
+void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj);
+
+#endif
index b200313..257588b 100644 (file)
 #include "selftests/mock_region.h"
 #include "selftests/i915_random.h"
 
+static struct i915_gem_context *hugepage_ctx(struct drm_i915_private *i915,
+                                            struct file *file)
+{
+       struct i915_gem_context *ctx = live_context(i915, file);
+       struct i915_address_space *vm;
+
+       if (IS_ERR(ctx))
+               return ctx;
+
+       vm = ctx->vm;
+       if (vm)
+               WRITE_ONCE(vm->scrub_64K, true);
+
+       return ctx;
+}
+
 static const unsigned int page_sizes[] = {
        I915_GTT_PAGE_SIZE_2M,
        I915_GTT_PAGE_SIZE_64K,
@@ -959,6 +975,8 @@ static int igt_mock_ppgtt_64K(void *arg)
                        __i915_gem_object_put_pages(obj);
                        i915_gem_object_unlock(obj);
                        i915_gem_object_put(obj);
+
+                       i915_gem_drain_freed_objects(i915);
                }
        }
 
@@ -1080,10 +1098,6 @@ static int __igt_write_huge(struct intel_context *ce,
        if (IS_ERR(vma))
                return PTR_ERR(vma);
 
-       err = i915_vma_unbind(vma);
-       if (err)
-               return err;
-
        err = i915_vma_pin(vma, size, 0, flags | offset);
        if (err) {
                /*
@@ -1117,7 +1131,7 @@ out_vma_unpin:
        return err;
 }
 
-static int igt_write_huge(struct i915_gem_context *ctx,
+static int igt_write_huge(struct drm_i915_private *i915,
                          struct drm_i915_gem_object *obj)
 {
        struct i915_gem_engines *engines;
@@ -1127,6 +1141,8 @@ static int igt_write_huge(struct i915_gem_context *ctx,
        IGT_TIMEOUT(end_time);
        unsigned int max_page_size;
        unsigned int count;
+       struct i915_gem_context *ctx;
+       struct file *file;
        u64 max;
        u64 num;
        u64 size;
@@ -1134,6 +1150,16 @@ static int igt_write_huge(struct i915_gem_context *ctx,
        int i, n;
        int err = 0;
 
+       file = mock_file(i915);
+       if (IS_ERR(file))
+               return PTR_ERR(file);
+
+       ctx = hugepage_ctx(i915, file);
+       if (IS_ERR(ctx)) {
+               err = PTR_ERR(ctx);
+               goto out;
+       }
+
        GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
 
        size = obj->base.size;
@@ -1153,7 +1179,7 @@ static int igt_write_huge(struct i915_gem_context *ctx,
        }
        i915_gem_context_unlock_engines(ctx);
        if (!n)
-               return 0;
+               goto out;
 
        /*
         * To keep things interesting when alternating between engines in our
@@ -1215,6 +1241,8 @@ static int igt_write_huge(struct i915_gem_context *ctx,
 
        kfree(order);
 
+out:
+       fput(file);
        return err;
 }
 
@@ -1277,8 +1305,7 @@ static u32 igt_random_size(struct rnd_state *prng,
 
 static int igt_ppgtt_smoke_huge(void *arg)
 {
-       struct i915_gem_context *ctx = arg;
-       struct drm_i915_private *i915 = ctx->i915;
+       struct drm_i915_private *i915 = arg;
        struct drm_i915_gem_object *obj;
        I915_RND_STATE(prng);
        struct {
@@ -1302,6 +1329,7 @@ static int igt_ppgtt_smoke_huge(void *arg)
                u32 min = backends[i].min;
                u32 max = backends[i].max;
                u32 size = max;
+
 try_again:
                size = igt_random_size(&prng, min, rounddown_pow_of_two(size));
 
@@ -1336,7 +1364,7 @@ try_again:
                        goto out_unpin;
                }
 
-               err = igt_write_huge(ctx, obj);
+               err = igt_write_huge(i915, obj);
                if (err) {
                        pr_err("%s write-huge failed with size=%u, i=%d\n",
                               __func__, size, i);
@@ -1363,8 +1391,7 @@ out_put:
 
 static int igt_ppgtt_sanity_check(void *arg)
 {
-       struct i915_gem_context *ctx = arg;
-       struct drm_i915_private *i915 = ctx->i915;
+       struct drm_i915_private *i915 = arg;
        unsigned int supported = INTEL_INFO(i915)->page_sizes;
        struct {
                igt_create_fn fn;
@@ -1431,7 +1458,7 @@ static int igt_ppgtt_sanity_check(void *arg)
                        if (pages)
                                obj->mm.page_sizes.sg = pages;
 
-                       err = igt_write_huge(ctx, obj);
+                       err = igt_write_huge(i915, obj);
 
                        i915_gem_object_lock(obj, NULL);
                        i915_gem_object_unpin_pages(obj);
@@ -1458,15 +1485,27 @@ out:
 
 static int igt_tmpfs_fallback(void *arg)
 {
-       struct i915_gem_context *ctx = arg;
-       struct drm_i915_private *i915 = ctx->i915;
+       struct drm_i915_private *i915 = arg;
+       struct i915_address_space *vm;
+       struct i915_gem_context *ctx;
        struct vfsmount *gemfs = i915->mm.gemfs;
-       struct i915_address_space *vm = i915_gem_context_get_eb_vm(ctx);
        struct drm_i915_gem_object *obj;
        struct i915_vma *vma;
+       struct file *file;
        u32 *vaddr;
        int err = 0;
 
+       file = mock_file(i915);
+       if (IS_ERR(file))
+               return PTR_ERR(file);
+
+       ctx = hugepage_ctx(i915, file);
+       if (IS_ERR(ctx)) {
+               err = PTR_ERR(ctx);
+               goto out;
+       }
+       vm = i915_gem_context_get_eb_vm(ctx);
+
        /*
         * Make sure that we don't burst into a ball of flames upon falling back
         * to tmpfs, which we rely on if on the off-chance we encouter a failure
@@ -1510,33 +1549,47 @@ out_restore:
        i915->mm.gemfs = gemfs;
 
        i915_vm_put(vm);
+out:
+       fput(file);
        return err;
 }
 
 static int igt_shrink_thp(void *arg)
 {
-       struct i915_gem_context *ctx = arg;
-       struct drm_i915_private *i915 = ctx->i915;
-       struct i915_address_space *vm = i915_gem_context_get_eb_vm(ctx);
+       struct drm_i915_private *i915 = arg;
+       struct i915_address_space *vm;
+       struct i915_gem_context *ctx;
        struct drm_i915_gem_object *obj;
        struct i915_gem_engines_iter it;
        struct intel_context *ce;
        struct i915_vma *vma;
+       struct file *file;
        unsigned int flags = PIN_USER;
        unsigned int n;
        bool should_swap;
-       int err = 0;
+       int err;
+
+       if (!igt_can_allocate_thp(i915)) {
+               pr_info("missing THP support, skipping\n");
+               return 0;
+       }
+
+       file = mock_file(i915);
+       if (IS_ERR(file))
+               return PTR_ERR(file);
+
+       ctx = hugepage_ctx(i915, file);
+       if (IS_ERR(ctx)) {
+               err = PTR_ERR(ctx);
+               goto out;
+       }
+       vm = i915_gem_context_get_eb_vm(ctx);
 
        /*
         * Sanity check shrinking huge-paged object -- make sure nothing blows
         * up.
         */
 
-       if (!igt_can_allocate_thp(i915)) {
-               pr_info("missing THP support, skipping\n");
-               goto out_vm;
-       }
-
        obj = i915_gem_object_create_shmem(i915, SZ_2M);
        if (IS_ERR(obj)) {
                err = PTR_ERR(obj);
@@ -1626,7 +1679,8 @@ out_put:
        i915_gem_object_put(obj);
 out_vm:
        i915_vm_put(vm);
-
+out:
+       fput(file);
        return err;
 }
 
@@ -1687,10 +1741,6 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915)
                SUBTEST(igt_ppgtt_smoke_huge),
                SUBTEST(igt_ppgtt_sanity_check),
        };
-       struct i915_gem_context *ctx;
-       struct i915_address_space *vm;
-       struct file *file;
-       int err;
 
        if (!HAS_PPGTT(i915)) {
                pr_info("PPGTT not supported, skipping live-selftests\n");
@@ -1700,23 +1750,5 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915)
        if (intel_gt_is_wedged(&i915->gt))
                return 0;
 
-       file = mock_file(i915);
-       if (IS_ERR(file))
-               return PTR_ERR(file);
-
-       ctx = live_context(i915, file);
-       if (IS_ERR(ctx)) {
-               err = PTR_ERR(ctx);
-               goto out_file;
-       }
-
-       vm = ctx->vm;
-       if (vm)
-               WRITE_ONCE(vm->scrub_64K, true);
-
-       err = i915_subtests(tests, ctx);
-
-out_file:
-       fput(file);
-       return err;
+       return i915_live_subtests(tests, i915);
 }
index 4a6bb64..3cc74b0 100644 (file)
@@ -102,7 +102,7 @@ static int igt_dmabuf_import_same_driver_lmem(void *arg)
        obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &lmem, 1);
        if (IS_ERR(obj)) {
                pr_err("__i915_gem_object_create_user failed with err=%ld\n",
-                      PTR_ERR(dmabuf));
+                      PTR_ERR(obj));
                err = PTR_ERR(obj);
                goto out_ret;
        }
@@ -158,7 +158,7 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915,
                                            regions, num_regions);
        if (IS_ERR(obj)) {
                pr_err("__i915_gem_object_create_user failed with err=%ld\n",
-                      PTR_ERR(dmabuf));
+                      PTR_ERR(obj));
                err = PTR_ERR(obj);
                goto out_ret;
        }
index 28a700f..4b8e6b0 100644 (file)
@@ -4,6 +4,7 @@
  */
 
 #include "gt/intel_migrate.h"
+#include "gem/i915_gem_ttm_move.h"
 
 static int igt_fill_check_buffer(struct drm_i915_gem_object *obj,
                                 bool fill)
@@ -227,13 +228,34 @@ out_put:
        return err;
 }
 
+static int igt_lmem_pages_failsafe_migrate(void *arg)
+{
+       int fail_gpu, fail_alloc, ret;
+
+       for (fail_gpu = 0; fail_gpu < 2; ++fail_gpu) {
+               for (fail_alloc = 0; fail_alloc < 2; ++fail_alloc) {
+                       pr_info("Simulated failure modes: gpu: %d, alloc: %d\n",
+                               fail_gpu, fail_alloc);
+                       i915_ttm_migrate_set_failure_modes(fail_gpu,
+                                                          fail_alloc);
+                       ret = igt_lmem_pages_migrate(arg);
+                       if (ret)
+                               goto out_err;
+               }
+       }
+
+out_err:
+       i915_ttm_migrate_set_failure_modes(false, false);
+       return ret;
+}
+
 int i915_gem_migrate_live_selftests(struct drm_i915_private *i915)
 {
        static const struct i915_subtest tests[] = {
                SUBTEST(igt_smem_create_migrate),
                SUBTEST(igt_lmem_create_migrate),
                SUBTEST(igt_same_create_migrate),
-               SUBTEST(igt_lmem_pages_migrate),
+               SUBTEST(igt_lmem_pages_failsafe_migrate),
        };
 
        if (!HAS_LMEM(i915))
index 890191f..4a166d2 100644 (file)
@@ -185,7 +185,6 @@ static void gen6_alloc_va_range(struct i915_address_space *vm,
 
                        pt = stash->pt[0];
                        __i915_gem_object_pin_pages(pt->base);
-                       i915_gem_object_make_unshrinkable(pt->base);
 
                        fill32_px(pt, vm->scratch[0]->encode);
 
@@ -262,13 +261,10 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
 {
        struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
 
-       __i915_vma_put(ppgtt->vma);
-
        gen6_ppgtt_free_pd(ppgtt);
        free_scratch(vm);
 
        mutex_destroy(&ppgtt->flush);
-       mutex_destroy(&ppgtt->pin_mutex);
 
        free_pd(&ppgtt->base.vm, ppgtt->base.pd);
 }
@@ -331,37 +327,6 @@ static const struct i915_vma_ops pd_vma_ops = {
        .unbind_vma = pd_vma_unbind,
 };
 
-static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
-{
-       struct i915_ggtt *ggtt = ppgtt->base.vm.gt->ggtt;
-       struct i915_vma *vma;
-
-       GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
-       GEM_BUG_ON(size > ggtt->vm.total);
-
-       vma = i915_vma_alloc();
-       if (!vma)
-               return ERR_PTR(-ENOMEM);
-
-       i915_active_init(&vma->active, NULL, NULL, 0);
-
-       kref_init(&vma->ref);
-       mutex_init(&vma->pages_mutex);
-       vma->vm = i915_vm_get(&ggtt->vm);
-       vma->ops = &pd_vma_ops;
-       vma->private = ppgtt;
-
-       vma->size = size;
-       vma->fence_size = size;
-       atomic_set(&vma->flags, I915_VMA_GGTT);
-       vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
-
-       INIT_LIST_HEAD(&vma->obj_link);
-       INIT_LIST_HEAD(&vma->closed_link);
-
-       return vma;
-}
-
 int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww)
 {
        struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
@@ -378,42 +343,92 @@ int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww)
        if (atomic_add_unless(&ppgtt->pin_count, 1, 0))
                return 0;
 
-       if (mutex_lock_interruptible(&ppgtt->pin_mutex))
-               return -EINTR;
+       /* grab the ppgtt resv to pin the object */
+       err = i915_vm_lock_objects(&ppgtt->base.vm, ww);
+       if (err)
+               return err;
 
        /*
         * PPGTT PDEs reside in the GGTT and consists of 512 entries. The
         * allocator works in address space sizes, so it's multiplied by page
         * size. We allocate at the top of the GTT to avoid fragmentation.
         */
-       err = 0;
-       if (!atomic_read(&ppgtt->pin_count))
+       if (!atomic_read(&ppgtt->pin_count)) {
                err = i915_ggtt_pin(ppgtt->vma, ww, GEN6_PD_ALIGN, PIN_HIGH);
+
+               GEM_BUG_ON(ppgtt->vma->fence);
+               clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(ppgtt->vma));
+       }
        if (!err)
                atomic_inc(&ppgtt->pin_count);
-       mutex_unlock(&ppgtt->pin_mutex);
 
        return err;
 }
 
-void gen6_ppgtt_unpin(struct i915_ppgtt *base)
+static int pd_dummy_obj_get_pages(struct drm_i915_gem_object *obj)
 {
-       struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
+       obj->mm.pages = ZERO_SIZE_PTR;
+       return 0;
+}
 
-       GEM_BUG_ON(!atomic_read(&ppgtt->pin_count));
-       if (atomic_dec_and_test(&ppgtt->pin_count))
-               i915_vma_unpin(ppgtt->vma);
+static void pd_dummy_obj_put_pages(struct drm_i915_gem_object *obj,
+                                  struct sg_table *pages)
+{
 }
 
-void gen6_ppgtt_unpin_all(struct i915_ppgtt *base)
+static const struct drm_i915_gem_object_ops pd_dummy_obj_ops = {
+       .name = "pd_dummy_obj",
+       .get_pages = pd_dummy_obj_get_pages,
+       .put_pages = pd_dummy_obj_put_pages,
+};
+
+static struct i915_page_directory *
+gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt)
 {
-       struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
+       struct i915_ggtt * const ggtt = ppgtt->base.vm.gt->ggtt;
+       struct i915_page_directory *pd;
+       int err;
 
-       if (!atomic_read(&ppgtt->pin_count))
-               return;
+       pd = __alloc_pd(I915_PDES);
+       if (unlikely(!pd))
+               return ERR_PTR(-ENOMEM);
 
-       i915_vma_unpin(ppgtt->vma);
-       atomic_set(&ppgtt->pin_count, 0);
+       pd->pt.base = __i915_gem_object_create_internal(ppgtt->base.vm.gt->i915,
+                                                       &pd_dummy_obj_ops,
+                                                       I915_PDES * SZ_4K);
+       if (IS_ERR(pd->pt.base)) {
+               err = PTR_ERR(pd->pt.base);
+               pd->pt.base = NULL;
+               goto err_pd;
+       }
+
+       pd->pt.base->base.resv = i915_vm_resv_get(&ppgtt->base.vm);
+       pd->pt.base->shares_resv_from = &ppgtt->base.vm;
+
+       ppgtt->vma = i915_vma_instance(pd->pt.base, &ggtt->vm, NULL);
+       if (IS_ERR(ppgtt->vma)) {
+               err = PTR_ERR(ppgtt->vma);
+               ppgtt->vma = NULL;
+               goto err_pd;
+       }
+
+       /* The dummy object we create is special, override ops.. */
+       ppgtt->vma->ops = &pd_vma_ops;
+       ppgtt->vma->private = ppgtt;
+       return pd;
+
+err_pd:
+       free_pd(&ppgtt->base.vm, pd);
+       return ERR_PTR(err);
+}
+
+void gen6_ppgtt_unpin(struct i915_ppgtt *base)
+{
+       struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
+
+       GEM_BUG_ON(!atomic_read(&ppgtt->pin_count));
+       if (atomic_dec_and_test(&ppgtt->pin_count))
+               i915_vma_unpin(ppgtt->vma);
 }
 
 struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
@@ -427,7 +442,6 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
                return ERR_PTR(-ENOMEM);
 
        mutex_init(&ppgtt->flush);
-       mutex_init(&ppgtt->pin_mutex);
 
        ppgtt_init(&ppgtt->base, gt, 0);
        ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t));
@@ -442,19 +456,13 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
        ppgtt->base.vm.alloc_pt_dma = alloc_pt_dma;
        ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
 
-       ppgtt->base.pd = __alloc_pd(I915_PDES);
-       if (!ppgtt->base.pd) {
-               err = -ENOMEM;
-               goto err_free;
-       }
-
        err = gen6_ppgtt_init_scratch(ppgtt);
        if (err)
-               goto err_pd;
+               goto err_free;
 
-       ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE);
-       if (IS_ERR(ppgtt->vma)) {
-               err = PTR_ERR(ppgtt->vma);
+       ppgtt->base.pd = gen6_alloc_top_pd(ppgtt);
+       if (IS_ERR(ppgtt->base.pd)) {
+               err = PTR_ERR(ppgtt->base.pd);
                goto err_scratch;
        }
 
@@ -462,10 +470,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
 
 err_scratch:
        free_scratch(&ppgtt->base.vm);
-err_pd:
-       free_pd(&ppgtt->base.vm, ppgtt->base.pd);
 err_free:
-       mutex_destroy(&ppgtt->pin_mutex);
        kfree(ppgtt);
        return ERR_PTR(err);
 }
index 6a61a5c..5e5cf2e 100644 (file)
@@ -19,7 +19,6 @@ struct gen6_ppgtt {
        u32 pp_dir;
 
        atomic_t pin_count;
-       struct mutex pin_mutex;
 
        bool scan_for_unused_pt;
 };
@@ -71,7 +70,6 @@ static inline struct gen6_ppgtt *to_gen6_ppgtt(struct i915_ppgtt *base)
 
 int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww);
 void gen6_ppgtt_unpin(struct i915_ppgtt *base);
-void gen6_ppgtt_unpin_all(struct i915_ppgtt *base);
 void gen6_ppgtt_enable(struct intel_gt *gt);
 void gen7_ppgtt_enable(struct intel_gt *gt);
 struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt);
index 461844d..e320610 100644 (file)
@@ -42,7 +42,7 @@ int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode)
                        vf_flush_wa = true;
 
                /* WaForGAMHang:kbl */
-               if (IS_KBL_GT_STEP(rq->engine->i915, 0, STEP_C0))
+               if (IS_KBL_GRAPHICS_STEP(rq->engine->i915, 0, STEP_C0))
                        dc_flush_wa = true;
        }
 
index 037a9a6..9966e9d 100644 (file)
@@ -301,7 +301,6 @@ static void __gen8_ppgtt_alloc(struct i915_address_space * const vm,
 
                        pt = stash->pt[!!lvl];
                        __i915_gem_object_pin_pages(pt->base);
-                       i915_gem_object_make_unshrinkable(pt->base);
 
                        fill_px(pt, vm->scratch[lvl]->encode);
 
@@ -652,7 +651,7 @@ static int gen8_init_scratch(struct i915_address_space *vm)
 
        vm->scratch[0]->encode =
                gen8_pte_encode(px_dma(vm->scratch[0]),
-                               I915_CACHE_LLC, pte_flags);
+                               I915_CACHE_NONE, pte_flags);
 
        for (i = 1; i <= vm->top; i++) {
                struct drm_i915_gem_object *obj;
@@ -668,7 +667,7 @@ static int gen8_init_scratch(struct i915_address_space *vm)
                }
 
                fill_px(obj, vm->scratch[i - 1]->encode);
-               obj->encode = gen8_pde_encode(px_dma(obj), I915_CACHE_LLC);
+               obj->encode = gen8_pde_encode(px_dma(obj), I915_CACHE_NONE);
 
                vm->scratch[i] = obj;
        }
index 5634d14..268c51f 100644 (file)
@@ -219,7 +219,7 @@ int __intel_context_do_pin_ww(struct intel_context *ce,
         */
 
        err = i915_gem_object_lock(ce->timeline->hwsp_ggtt->obj, ww);
-       if (!err && ce->ring->vma->obj)
+       if (!err)
                err = i915_gem_object_lock(ce->ring->vma->obj, ww);
        if (!err && ce->state)
                err = i915_gem_object_lock(ce->state->obj, ww);
@@ -228,17 +228,17 @@ int __intel_context_do_pin_ww(struct intel_context *ce,
        if (err)
                return err;
 
-       err = i915_active_acquire(&ce->active);
+       err = ce->ops->pre_pin(ce, ww, &vaddr);
        if (err)
                goto err_ctx_unpin;
 
-       err = ce->ops->pre_pin(ce, ww, &vaddr);
+       err = i915_active_acquire(&ce->active);
        if (err)
-               goto err_release;
+               goto err_post_unpin;
 
        err = mutex_lock_interruptible(&ce->pin_mutex);
        if (err)
-               goto err_post_unpin;
+               goto err_release;
 
        intel_engine_pm_might_get(ce->engine);
 
@@ -273,11 +273,11 @@ int __intel_context_do_pin_ww(struct intel_context *ce,
 
 err_unlock:
        mutex_unlock(&ce->pin_mutex);
+err_release:
+       i915_active_release(&ce->active);
 err_post_unpin:
        if (!handoff)
                ce->ops->post_unpin(ce);
-err_release:
-       i915_active_release(&ce->active);
 err_ctx_unpin:
        intel_context_post_unpin(ce);
 
index ff6753c..3327560 100644 (file)
@@ -363,7 +363,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
                DRIVER_CAPS(i915)->has_logical_contexts = true;
 
        ewma__engine_latency_init(&engine->latency);
-       seqcount_init(&engine->stats.lock);
+       seqcount_init(&engine->stats.execlists.lock);
 
        ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
 
@@ -1915,22 +1915,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
        intel_engine_print_breadcrumbs(engine, m);
 }
 
-static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine,
-                                           ktime_t *now)
-{
-       ktime_t total = engine->stats.total;
-
-       /*
-        * If the engine is executing something at the moment
-        * add it to the total.
-        */
-       *now = ktime_get();
-       if (READ_ONCE(engine->stats.active))
-               total = ktime_add(total, ktime_sub(*now, engine->stats.start));
-
-       return total;
-}
-
 /**
  * intel_engine_get_busy_time() - Return current accumulated engine busyness
  * @engine: engine to report on
@@ -1940,15 +1924,7 @@ static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine,
  */
 ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now)
 {
-       unsigned int seq;
-       ktime_t total;
-
-       do {
-               seq = read_seqcount_begin(&engine->stats.lock);
-               total = __intel_engine_get_busy_time(engine, now);
-       } while (read_seqcount_retry(&engine->stats.lock, seq));
-
-       return total;
+       return engine->busyness(engine, now);
 }
 
 struct intel_context *
index 24fbdd9..8e762d6 100644 (file)
 
 static inline void intel_engine_context_in(struct intel_engine_cs *engine)
 {
+       struct intel_engine_execlists_stats *stats = &engine->stats.execlists;
        unsigned long flags;
 
-       if (engine->stats.active) {
-               engine->stats.active++;
+       if (stats->active) {
+               stats->active++;
                return;
        }
 
        /* The writer is serialised; but the pmu reader may be from hardirq */
        local_irq_save(flags);
-       write_seqcount_begin(&engine->stats.lock);
+       write_seqcount_begin(&stats->lock);
 
-       engine->stats.start = ktime_get();
-       engine->stats.active++;
+       stats->start = ktime_get();
+       stats->active++;
 
-       write_seqcount_end(&engine->stats.lock);
+       write_seqcount_end(&stats->lock);
        local_irq_restore(flags);
 
-       GEM_BUG_ON(!engine->stats.active);
+       GEM_BUG_ON(!stats->active);
 }
 
 static inline void intel_engine_context_out(struct intel_engine_cs *engine)
 {
+       struct intel_engine_execlists_stats *stats = &engine->stats.execlists;
        unsigned long flags;
 
-       GEM_BUG_ON(!engine->stats.active);
-       if (engine->stats.active > 1) {
-               engine->stats.active--;
+       GEM_BUG_ON(!stats->active);
+       if (stats->active > 1) {
+               stats->active--;
                return;
        }
 
        local_irq_save(flags);
-       write_seqcount_begin(&engine->stats.lock);
+       write_seqcount_begin(&stats->lock);
 
-       engine->stats.active--;
-       engine->stats.total =
-               ktime_add(engine->stats.total,
-                         ktime_sub(ktime_get(), engine->stats.start));
+       stats->active--;
+       stats->total = ktime_add(stats->total,
+                                ktime_sub(ktime_get(), stats->start));
 
-       write_seqcount_end(&engine->stats.lock);
+       write_seqcount_end(&stats->lock);
        local_irq_restore(flags);
 }
 
index e0f7735..5732e0d 100644 (file)
@@ -257,6 +257,55 @@ struct intel_engine_execlists {
 
 #define INTEL_ENGINE_CS_MAX_NAME 8
 
+struct intel_engine_execlists_stats {
+       /**
+        * @active: Number of contexts currently scheduled in.
+        */
+       unsigned int active;
+
+       /**
+        * @lock: Lock protecting the below fields.
+        */
+       seqcount_t lock;
+
+       /**
+        * @total: Total time this engine was busy.
+        *
+        * Accumulated time not counting the most recent block in cases where
+        * engine is currently busy (active > 0).
+        */
+       ktime_t total;
+
+       /**
+        * @start: Timestamp of the last idle to active transition.
+        *
+        * Idle is defined as active == 0, active is active > 0.
+        */
+       ktime_t start;
+};
+
+struct intel_engine_guc_stats {
+       /**
+        * @running: Active state of the engine when busyness was last sampled.
+        */
+       bool running;
+
+       /**
+        * @prev_total: Previous value of total runtime clock cycles.
+        */
+       u32 prev_total;
+
+       /**
+        * @total_gt_clks: Total gt clock cycles this engine was busy.
+        */
+       u64 total_gt_clks;
+
+       /**
+        * @start_gt_clk: GT clock time of last idle to active transition.
+        */
+       u64 start_gt_clk;
+};
+
 struct intel_engine_cs {
        struct drm_i915_private *i915;
        struct intel_gt *gt;
@@ -439,6 +488,12 @@ struct intel_engine_cs {
        void            (*add_active_request)(struct i915_request *rq);
        void            (*remove_active_request)(struct i915_request *rq);
 
+       /*
+        * Get engine busyness and the time at which the busyness was sampled.
+        */
+       ktime_t         (*busyness)(struct intel_engine_cs *engine,
+                                   ktime_t *now);
+
        struct intel_engine_execlists execlists;
 
        /*
@@ -488,30 +543,10 @@ struct intel_engine_cs {
        u32 (*get_cmd_length_mask)(u32 cmd_header);
 
        struct {
-               /**
-                * @active: Number of contexts currently scheduled in.
-                */
-               unsigned int active;
-
-               /**
-                * @lock: Lock protecting the below fields.
-                */
-               seqcount_t lock;
-
-               /**
-                * @total: Total time this engine was busy.
-                *
-                * Accumulated time not counting the most recent block in cases
-                * where engine is currently busy (active > 0).
-                */
-               ktime_t total;
-
-               /**
-                * @start: Timestamp of the last idle to active transition.
-                *
-                * Idle is defined as active == 0, active is active > 0.
-                */
-               ktime_t start;
+               union {
+                       struct intel_engine_execlists_stats execlists;
+                       struct intel_engine_guc_stats guc;
+               };
 
                /**
                 * @rps: Utilisation at last RPS sampling.
index bedb800..ca03880 100644 (file)
@@ -3293,6 +3293,38 @@ static void execlists_release(struct intel_engine_cs *engine)
        lrc_fini_wa_ctx(engine);
 }
 
+static ktime_t __execlists_engine_busyness(struct intel_engine_cs *engine,
+                                          ktime_t *now)
+{
+       struct intel_engine_execlists_stats *stats = &engine->stats.execlists;
+       ktime_t total = stats->total;
+
+       /*
+        * If the engine is executing something at the moment
+        * add it to the total.
+        */
+       *now = ktime_get();
+       if (READ_ONCE(stats->active))
+               total = ktime_add(total, ktime_sub(*now, stats->start));
+
+       return total;
+}
+
+static ktime_t execlists_engine_busyness(struct intel_engine_cs *engine,
+                                        ktime_t *now)
+{
+       struct intel_engine_execlists_stats *stats = &engine->stats.execlists;
+       unsigned int seq;
+       ktime_t total;
+
+       do {
+               seq = read_seqcount_begin(&stats->lock);
+               total = __execlists_engine_busyness(engine, now);
+       } while (read_seqcount_retry(&stats->lock, seq));
+
+       return total;
+}
+
 static void
 logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 {
@@ -3349,6 +3381,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
                engine->emit_bb_start = gen8_emit_bb_start;
        else
                engine->emit_bb_start = gen8_emit_bb_start_noarb;
+
+       engine->busyness = execlists_engine_busyness;
 }
 
 static void logical_ring_default_irqs(struct intel_engine_cs *engine)
index 524eaf6..b4a8594 100644 (file)
@@ -86,6 +86,7 @@ static int __gt_unpark(struct intel_wakeref *wf)
        intel_rc6_unpark(&gt->rc6);
        intel_rps_unpark(&gt->rps);
        i915_pmu_gt_unparked(i915);
+       intel_guc_busyness_unpark(gt);
 
        intel_gt_unpark_requests(gt);
        runtime_begin(gt);
@@ -104,6 +105,7 @@ static int __gt_park(struct intel_wakeref *wf)
        runtime_end(gt);
        intel_gt_park_requests(gt);
 
+       intel_guc_busyness_park(gt);
        i915_vma_parked(gt);
        i915_pmu_gt_parked(i915);
        intel_rps_park(&gt->rps);
index 67d14af..0dd254c 100644 (file)
@@ -7,6 +7,8 @@
 
 #include <linux/fault-inject.h>
 
+#include <drm/drm_cache.h>
+
 #include "gem/i915_gem_lmem.h"
 #include "i915_trace.h"
 #include "intel_gt.h"
@@ -273,6 +275,7 @@ static void poison_scratch_page(struct drm_i915_gem_object *scratch)
                val = POISON_FREE;
 
        memset(vaddr, val, scratch->base.size);
+       drm_clflush_virt_range(vaddr, scratch->base.size);
 }
 
 int setup_scratch_page(struct i915_address_space *vm)
index 15f9ada..9c253ba 100644 (file)
@@ -424,7 +424,7 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
 
        table->unused_entries_index = I915_MOCS_PTE;
        if (IS_DG2(i915)) {
-               if (IS_DG2_GT_STEP(i915, G10, STEP_A0, STEP_B0)) {
+               if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
                        table->size = ARRAY_SIZE(dg2_mocs_table_g10_ax);
                        table->table = dg2_mocs_table_g10_ax;
                } else {
index afb35d2..aec838e 100644 (file)
@@ -158,7 +158,7 @@ intel_gt_setup_fake_lmem(struct intel_gt *gt)
 static bool get_legacy_lowmem_region(struct intel_uncore *uncore,
                                     u64 *start, u32 *size)
 {
-       if (!IS_DG1_GT_STEP(uncore->i915, STEP_A0, STEP_C0))
+       if (!IS_DG1_GRAPHICS_STEP(uncore->i915, STEP_A0, STEP_C0))
                return false;
 
        *start = 0;
index 91200c4..0fbd6db 100644 (file)
@@ -1367,20 +1367,27 @@ void intel_gt_handle_error(struct intel_gt *gt,
        /* Make sure i915_reset_trylock() sees the I915_RESET_BACKOFF */
        synchronize_rcu_expedited();
 
-       /* Prevent any other reset-engine attempt. */
-       for_each_engine(engine, gt, tmp) {
-               while (test_and_set_bit(I915_RESET_ENGINE + engine->id,
-                                       &gt->reset.flags))
-                       wait_on_bit(&gt->reset.flags,
-                                   I915_RESET_ENGINE + engine->id,
-                                   TASK_UNINTERRUPTIBLE);
+       /*
+        * Prevent any other reset-engine attempt. We don't do this for GuC
+        * submission the GuC owns the per-engine reset, not the i915.
+        */
+       if (!intel_uc_uses_guc_submission(&gt->uc)) {
+               for_each_engine(engine, gt, tmp) {
+                       while (test_and_set_bit(I915_RESET_ENGINE + engine->id,
+                                               &gt->reset.flags))
+                               wait_on_bit(&gt->reset.flags,
+                                           I915_RESET_ENGINE + engine->id,
+                                           TASK_UNINTERRUPTIBLE);
+               }
        }
 
        intel_gt_reset_global(gt, engine_mask, msg);
 
-       for_each_engine(engine, gt, tmp)
-               clear_bit_unlock(I915_RESET_ENGINE + engine->id,
-                                &gt->reset.flags);
+       if (!intel_uc_uses_guc_submission(&gt->uc)) {
+               for_each_engine(engine, gt, tmp)
+                       clear_bit_unlock(I915_RESET_ENGINE + engine->id,
+                                        &gt->reset.flags);
+       }
        clear_bit_unlock(I915_RESET_BACKOFF, &gt->reset.flags);
        smp_mb__after_atomic();
        wake_up_all(&gt->reset.queue);
@@ -1441,6 +1448,7 @@ void intel_gt_set_wedged_on_init(struct intel_gt *gt)
        BUILD_BUG_ON(I915_RESET_ENGINE + I915_NUM_ENGINES >
                     I915_WEDGED_ON_INIT);
        intel_gt_set_wedged(gt);
+       i915_disable_error_state(gt->i915, -ENODEV);
        set_bit(I915_WEDGED_ON_INIT, &gt->reset.flags);
 
        /* Wedged on init is non-recoverable */
@@ -1450,6 +1458,7 @@ void intel_gt_set_wedged_on_init(struct intel_gt *gt)
 void intel_gt_set_wedged_on_fini(struct intel_gt *gt)
 {
        intel_gt_set_wedged(gt);
+       i915_disable_error_state(gt->i915, -ENODEV);
        set_bit(I915_WEDGED_ON_FINI, &gt->reset.flags);
        intel_gt_retire_requests(gt); /* cleanup any wedged requests */
 }
index 586dca1..3e6fac0 100644 (file)
@@ -1357,7 +1357,7 @@ retry:
        err = i915_gem_object_lock(timeline->hwsp_ggtt->obj, &ww);
        if (!err && gen7_wa_vma)
                err = i915_gem_object_lock(gen7_wa_vma->obj, &ww);
-       if (!err && engine->legacy.ring->vma->obj)
+       if (!err)
                err = i915_gem_object_lock(engine->legacy.ring->vma->obj, &ww);
        if (!err)
                err = intel_timeline_pin(timeline, &ww);
index 5e275f8..07ff7ba 100644 (file)
@@ -936,8 +936,70 @@ void intel_rps_park(struct intel_rps *rps)
        GT_TRACE(rps_to_gt(rps), "park:%x\n", rps->cur_freq);
 }
 
+u32 intel_rps_get_boost_frequency(struct intel_rps *rps)
+{
+       struct intel_guc_slpc *slpc;
+
+       if (rps_uses_slpc(rps)) {
+               slpc = rps_to_slpc(rps);
+
+               return slpc->boost_freq;
+       } else {
+               return intel_gpu_freq(rps, rps->boost_freq);
+       }
+}
+
+static int rps_set_boost_freq(struct intel_rps *rps, u32 val)
+{
+       bool boost = false;
+
+       /* Validate against (static) hardware limits */
+       val = intel_freq_opcode(rps, val);
+       if (val < rps->min_freq || val > rps->max_freq)
+               return -EINVAL;
+
+       mutex_lock(&rps->lock);
+       if (val != rps->boost_freq) {
+               rps->boost_freq = val;
+               boost = atomic_read(&rps->num_waiters);
+       }
+       mutex_unlock(&rps->lock);
+       if (boost)
+               schedule_work(&rps->work);
+
+       return 0;
+}
+
+int intel_rps_set_boost_frequency(struct intel_rps *rps, u32 freq)
+{
+       struct intel_guc_slpc *slpc;
+
+       if (rps_uses_slpc(rps)) {
+               slpc = rps_to_slpc(rps);
+
+               return intel_guc_slpc_set_boost_freq(slpc, freq);
+       } else {
+               return rps_set_boost_freq(rps, freq);
+       }
+}
+
+void intel_rps_dec_waiters(struct intel_rps *rps)
+{
+       struct intel_guc_slpc *slpc;
+
+       if (rps_uses_slpc(rps)) {
+               slpc = rps_to_slpc(rps);
+
+               intel_guc_slpc_dec_waiters(slpc);
+       } else {
+               atomic_dec(&rps->num_waiters);
+       }
+}
+
 void intel_rps_boost(struct i915_request *rq)
 {
+       struct intel_guc_slpc *slpc;
+
        if (i915_request_signaled(rq) || i915_request_has_waitboost(rq))
                return;
 
@@ -945,6 +1007,16 @@ void intel_rps_boost(struct i915_request *rq)
        if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) {
                struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps;
 
+               if (rps_uses_slpc(rps)) {
+                       slpc = rps_to_slpc(rps);
+
+                       /* Return if old value is non zero */
+                       if (!atomic_fetch_inc(&slpc->num_waiters))
+                               schedule_work(&slpc->boost_work);
+
+                       return;
+               }
+
                if (atomic_fetch_inc(&rps->num_waiters))
                        return;
 
index 11960d6..aee12f3 100644 (file)
@@ -23,6 +23,9 @@ void intel_rps_disable(struct intel_rps *rps);
 void intel_rps_park(struct intel_rps *rps);
 void intel_rps_unpark(struct intel_rps *rps);
 void intel_rps_boost(struct i915_request *rq);
+void intel_rps_dec_waiters(struct intel_rps *rps);
+u32 intel_rps_get_boost_frequency(struct intel_rps *rps);
+int intel_rps_set_boost_frequency(struct intel_rps *rps, u32 freq);
 
 int intel_rps_set(struct intel_rps *rps, u8 val);
 void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive);
index e1f3625..a972744 100644 (file)
@@ -482,7 +482,7 @@ static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
        gen9_ctx_workarounds_init(engine, wal);
 
        /* WaToEnableHwFixForPushConstHWBug:kbl */
-       if (IS_KBL_GT_STEP(i915, STEP_C0, STEP_FOREVER))
+       if (IS_KBL_GRAPHICS_STEP(i915, STEP_C0, STEP_FOREVER))
                wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
                             GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 
@@ -558,6 +558,22 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
        wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
 }
 
+/*
+ * These settings aren't actually workarounds, but general tuning settings that
+ * need to be programmed on dg2 platform.
+ */
+static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine,
+                                  struct i915_wa_list *wal)
+{
+       wa_write_clr_set(wal, GEN11_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
+                        REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f));
+       wa_add(wal,
+              FF_MODE2,
+              FF_MODE2_TDS_TIMER_MASK,
+              FF_MODE2_TDS_TIMER_128,
+              0, false);
+}
+
 /*
  * These settings aren't actually workarounds, but general tuning settings that
  * need to be programmed on several platforms.
@@ -644,6 +660,42 @@ static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine,
                     DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE);
 }
 
+static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
+                                    struct i915_wa_list *wal)
+{
+       dg2_ctx_gt_tuning_init(engine, wal);
+
+       /* Wa_16011186671:dg2_g11 */
+       if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
+               wa_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH);
+               wa_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE);
+       }
+
+       if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
+               /* Wa_14010469329:dg2_g10 */
+               wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
+                            XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE);
+
+               /*
+                * Wa_22010465075:dg2_g10
+                * Wa_22010613112:dg2_g10
+                * Wa_14010698770:dg2_g10
+                */
+               wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
+                            GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
+       }
+
+       /* Wa_16013271637:dg2 */
+       wa_masked_en(wal, SLICE_COMMON_ECO_CHICKEN1,
+                    MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
+
+       /* Wa_22012532006:dg2 */
+       if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) ||
+           IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0))
+               wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
+                            DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA);
+}
+
 static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine,
                                         struct i915_wa_list *wal)
 {
@@ -730,7 +782,11 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
        if (engine->class != RENDER_CLASS)
                goto done;
 
-       if (IS_DG1(i915))
+       if (IS_DG2(i915))
+               dg2_ctx_workarounds_init(engine, wal);
+       else if (IS_XEHPSDV(i915))
+               ; /* noop; none at this time */
+       else if (IS_DG1(i915))
                dg1_ctx_workarounds_init(engine, wal);
        else if (GRAPHICS_VER(i915) == 12)
                gen12_ctx_workarounds_init(engine, wal);
@@ -877,11 +933,52 @@ hsw_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
        wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME);
 }
 
+static void
+gen9_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+       const struct sseu_dev_info *sseu = &i915->gt.info.sseu;
+       unsigned int slice, subslice;
+       u32 mcr, mcr_mask;
+
+       GEM_BUG_ON(GRAPHICS_VER(i915) != 9);
+
+       /*
+        * WaProgramMgsrForCorrectSliceSpecificMmioReads:gen9,glk,kbl,cml
+        * Before any MMIO read into slice/subslice specific registers, MCR
+        * packet control register needs to be programmed to point to any
+        * enabled s/ss pair. Otherwise, incorrect values will be returned.
+        * This means each subsequent MMIO read will be forwarded to an
+        * specific s/ss combination, but this is OK since these registers
+        * are consistent across s/ss in almost all cases. In the rare
+        * occasions, such as INSTDONE, where this value is dependent
+        * on s/ss combo, the read should be done with read_subslice_reg.
+        */
+       slice = ffs(sseu->slice_mask) - 1;
+       GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask));
+       subslice = ffs(intel_sseu_get_subslices(sseu, slice));
+       GEM_BUG_ON(!subslice);
+       subslice--;
+
+       /*
+        * We use GEN8_MCR..() macros to calculate the |mcr| value for
+        * Gen9 to address WaProgramMgsrForCorrectSliceSpecificMmioReads
+        */
+       mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
+       mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
+
+       drm_dbg(&i915->drm, "MCR slice:%d/subslice:%d = %x\n", slice, subslice, mcr);
+
+       wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
+}
+
 static void
 gen9_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
 {
        struct drm_i915_private *i915 = gt->i915;
 
+       /* WaProgramMgsrForCorrectSliceSpecificMmioReads:glk,kbl,cml,gen9 */
+       gen9_wa_init_mcr(i915, wal);
+
        /* WaDisableKillLogic:bxt,skl,kbl */
        if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915))
                wa_write_or(wal,
@@ -916,7 +1013,7 @@ skl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
                    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
 
        /* WaInPlaceDecompressionHang:skl */
-       if (IS_SKL_GT_STEP(gt->i915, STEP_A0, STEP_H0))
+       if (IS_SKL_GRAPHICS_STEP(gt->i915, STEP_A0, STEP_H0))
                wa_write_or(wal,
                            GEN9_GAMT_ECO_REG_RW_IA,
                            GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
@@ -928,7 +1025,7 @@ kbl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
        gen9_gt_workarounds_init(gt, wal);
 
        /* WaDisableDynamicCreditSharing:kbl */
-       if (IS_KBL_GT_STEP(gt->i915, 0, STEP_C0))
+       if (IS_KBL_GRAPHICS_STEP(gt->i915, 0, STEP_C0))
                wa_write_or(wal,
                            GAMT_CHKN_BIT_REG,
                            GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
@@ -1136,7 +1233,7 @@ icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
 
        /* Wa_1607087056:icl,ehl,jsl */
        if (IS_ICELAKE(i915) ||
-           IS_JSL_EHL_GT_STEP(i915, STEP_A0, STEP_B0))
+           IS_JSL_EHL_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
                wa_write_or(wal,
                            SLICE_UNIT_LEVEL_CLKGATE,
                            L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
@@ -1190,19 +1287,19 @@ tgl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
        gen12_gt_workarounds_init(gt, wal);
 
        /* Wa_1409420604:tgl */
-       if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0))
+       if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
                wa_write_or(wal,
                            SUBSLICE_UNIT_LEVEL_CLKGATE2,
                            CPSSUNIT_CLKGATE_DIS);
 
        /* Wa_1607087056:tgl also know as BUG:1409180338 */
-       if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0))
+       if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
                wa_write_or(wal,
                            SLICE_UNIT_LEVEL_CLKGATE,
                            L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
 
        /* Wa_1408615072:tgl[a0] */
-       if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0))
+       if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
                wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
                            VSUNIT_CLKGATE_DIS_TGL);
 }
@@ -1215,7 +1312,7 @@ dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
        gen12_gt_workarounds_init(gt, wal);
 
        /* Wa_1607087056:dg1 */
-       if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0))
+       if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
                wa_write_or(wal,
                            SLICE_UNIT_LEVEL_CLKGATE,
                            L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
@@ -1236,7 +1333,179 @@ dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
 static void
 xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
 {
+       struct drm_i915_private *i915 = gt->i915;
+
+       xehp_init_mcr(gt, wal);
+
+       /* Wa_1409757795:xehpsdv */
+       wa_write_or(wal, SCCGCTL94DC, CG3DDISURB);
+
+       /* Wa_18011725039:xehpsdv */
+       if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) {
+               wa_masked_dis(wal, MLTICTXCTL, TDONRENDER);
+               wa_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH);
+       }
+
+       /* Wa_16011155590:xehpsdv */
+       if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
+               wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
+                           TSGUNIT_CLKGATE_DIS);
+
+       /* Wa_14011780169:xehpsdv */
+       if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_B0, STEP_FOREVER)) {
+               wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS |
+                           GAMTLBVDBOX7_CLKGATE_DIS |
+                           GAMTLBVDBOX6_CLKGATE_DIS |
+                           GAMTLBVDBOX5_CLKGATE_DIS |
+                           GAMTLBVDBOX4_CLKGATE_DIS |
+                           GAMTLBVDBOX3_CLKGATE_DIS |
+                           GAMTLBVDBOX2_CLKGATE_DIS |
+                           GAMTLBVDBOX1_CLKGATE_DIS |
+                           GAMTLBVDBOX0_CLKGATE_DIS |
+                           GAMTLBKCR_CLKGATE_DIS |
+                           GAMTLBGUC_CLKGATE_DIS |
+                           GAMTLBBLT_CLKGATE_DIS);
+               wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS |
+                           GAMTLBGFXA1_CLKGATE_DIS |
+                           GAMTLBCOMPA0_CLKGATE_DIS |
+                           GAMTLBCOMPA1_CLKGATE_DIS |
+                           GAMTLBCOMPB0_CLKGATE_DIS |
+                           GAMTLBCOMPB1_CLKGATE_DIS |
+                           GAMTLBCOMPC0_CLKGATE_DIS |
+                           GAMTLBCOMPC1_CLKGATE_DIS |
+                           GAMTLBCOMPD0_CLKGATE_DIS |
+                           GAMTLBCOMPD1_CLKGATE_DIS |
+                           GAMTLBMERT_CLKGATE_DIS   |
+                           GAMTLBVEBOX3_CLKGATE_DIS |
+                           GAMTLBVEBOX2_CLKGATE_DIS |
+                           GAMTLBVEBOX1_CLKGATE_DIS |
+                           GAMTLBVEBOX0_CLKGATE_DIS);
+       }
+
+       /* Wa_14012362059:xehpsdv */
+       wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB);
+
+       /* Wa_16012725990:xehpsdv */
+       if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_FOREVER))
+               wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, VFUNIT_CLKGATE_DIS);
+
+       /* Wa_14011060649:xehpsdv */
+       wa_14011060649(gt, wal);
+
+       /* Wa_14014368820:xehpsdv */
+       wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS |
+                   GLOBAL_INVALIDATION_MODE);
+}
+
+static void
+dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
+{
+       struct intel_engine_cs *engine;
+       int id;
+
        xehp_init_mcr(gt, wal);
+
+       /* Wa_14011060649:dg2 */
+       wa_14011060649(gt, wal);
+
+       /*
+        * Although there are per-engine instances of these registers,
+        * they technically exist outside the engine itself and are not
+        * impacted by engine resets.  Furthermore, they're part of the
+        * GuC blacklist so trying to treat them as engine workarounds
+        * will result in GuC initialization failure and a wedged GPU.
+        */
+       for_each_engine(engine, gt, id) {
+               if (engine->class != VIDEO_DECODE_CLASS)
+                       continue;
+
+               /* Wa_16010515920:dg2_g10 */
+               if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0))
+                       wa_write_or(wal, VDBOX_CGCTL3F18(engine->mmio_base),
+                                   ALNUNIT_CLKGATE_DIS);
+       }
+
+       if (IS_DG2_G10(gt->i915)) {
+               /* Wa_22010523718:dg2 */
+               wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
+                           CG3DDISCFEG_CLKGATE_DIS);
+
+               /* Wa_14011006942:dg2 */
+               wa_write_or(wal, SUBSLICE_UNIT_LEVEL_CLKGATE,
+                           DSS_ROUTER_CLKGATE_DIS);
+       }
+
+       if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) {
+               /* Wa_14010680813:dg2_g10 */
+               wa_write_or(wal, GEN12_GAMSTLB_CTRL, CONTROL_BLOCK_CLKGATE_DIS |
+                           EGRESS_BLOCK_CLKGATE_DIS | TAG_BLOCK_CLKGATE_DIS);
+
+               /* Wa_14010948348:dg2_g10 */
+               wa_write_or(wal, UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS);
+
+               /* Wa_14011037102:dg2_g10 */
+               wa_write_or(wal, UNSLCGCTL9444, LTCDD_CLKGATE_DIS);
+
+               /* Wa_14011371254:dg2_g10 */
+               wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS);
+
+               /* Wa_14011431319:dg2_g10 */
+               wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS |
+                           GAMTLBVDBOX7_CLKGATE_DIS |
+                           GAMTLBVDBOX6_CLKGATE_DIS |
+                           GAMTLBVDBOX5_CLKGATE_DIS |
+                           GAMTLBVDBOX4_CLKGATE_DIS |
+                           GAMTLBVDBOX3_CLKGATE_DIS |
+                           GAMTLBVDBOX2_CLKGATE_DIS |
+                           GAMTLBVDBOX1_CLKGATE_DIS |
+                           GAMTLBVDBOX0_CLKGATE_DIS |
+                           GAMTLBKCR_CLKGATE_DIS |
+                           GAMTLBGUC_CLKGATE_DIS |
+                           GAMTLBBLT_CLKGATE_DIS);
+               wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS |
+                           GAMTLBGFXA1_CLKGATE_DIS |
+                           GAMTLBCOMPA0_CLKGATE_DIS |
+                           GAMTLBCOMPA1_CLKGATE_DIS |
+                           GAMTLBCOMPB0_CLKGATE_DIS |
+                           GAMTLBCOMPB1_CLKGATE_DIS |
+                           GAMTLBCOMPC0_CLKGATE_DIS |
+                           GAMTLBCOMPC1_CLKGATE_DIS |
+                           GAMTLBCOMPD0_CLKGATE_DIS |
+                           GAMTLBCOMPD1_CLKGATE_DIS |
+                           GAMTLBMERT_CLKGATE_DIS   |
+                           GAMTLBVEBOX3_CLKGATE_DIS |
+                           GAMTLBVEBOX2_CLKGATE_DIS |
+                           GAMTLBVEBOX1_CLKGATE_DIS |
+                           GAMTLBVEBOX0_CLKGATE_DIS);
+
+               /* Wa_14010569222:dg2_g10 */
+               wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
+                           GAMEDIA_CLKGATE_DIS);
+
+               /* Wa_14011028019:dg2_g10 */
+               wa_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS);
+       }
+
+       if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0) ||
+           IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) {
+               /* Wa_14012362059:dg2 */
+               wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB);
+       }
+
+       /* Wa_1509235366:dg2 */
+       wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS |
+                   GLOBAL_INVALIDATION_MODE);
+
+       /* Wa_14014830051:dg2 */
+       wa_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
+
+       /*
+        * The following are not actually "workarounds" but rather
+        * recommended tuning settings documented in the bspec's
+        * performance guide section.
+        */
+       wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
+       wa_write_or(wal, GEN12_SQCM, EN_32B_ACCESS);
 }
 
 static void
@@ -1244,7 +1513,9 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal)
 {
        struct drm_i915_private *i915 = gt->i915;
 
-       if (IS_XEHPSDV(i915))
+       if (IS_DG2(i915))
+               dg2_gt_workarounds_init(gt, wal);
+       else if (IS_XEHPSDV(i915))
                xehpsdv_gt_workarounds_init(gt, wal);
        else if (IS_DG1(i915))
                dg1_gt_workarounds_init(gt, wal);
@@ -1518,7 +1789,7 @@ static void cfl_whitelist_build(struct intel_engine_cs *engine)
                          RING_FORCE_TO_NONPRIV_RANGE_4);
 }
 
-static void cml_whitelist_build(struct intel_engine_cs *engine)
+static void allow_read_ctx_timestamp(struct intel_engine_cs *engine)
 {
        struct i915_wa_list *w = &engine->whitelist;
 
@@ -1526,6 +1797,11 @@ static void cml_whitelist_build(struct intel_engine_cs *engine)
                whitelist_reg_ext(w,
                                  RING_CTX_TIMESTAMP(engine->mmio_base),
                                  RING_FORCE_TO_NONPRIV_ACCESS_RD);
+}
+
+static void cml_whitelist_build(struct intel_engine_cs *engine)
+{
+       allow_read_ctx_timestamp(engine);
 
        cfl_whitelist_build(engine);
 }
@@ -1534,6 +1810,8 @@ static void icl_whitelist_build(struct intel_engine_cs *engine)
 {
        struct i915_wa_list *w = &engine->whitelist;
 
+       allow_read_ctx_timestamp(engine);
+
        switch (engine->class) {
        case RENDER_CLASS:
                /* WaAllowUMDToModifyHalfSliceChicken7:icl */
@@ -1569,15 +1847,9 @@ static void icl_whitelist_build(struct intel_engine_cs *engine)
                /* hucStatus2RegOffset */
                whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base),
                                  RING_FORCE_TO_NONPRIV_ACCESS_RD);
-               whitelist_reg_ext(w,
-                                 RING_CTX_TIMESTAMP(engine->mmio_base),
-                                 RING_FORCE_TO_NONPRIV_ACCESS_RD);
                break;
 
        default:
-               whitelist_reg_ext(w,
-                                 RING_CTX_TIMESTAMP(engine->mmio_base),
-                                 RING_FORCE_TO_NONPRIV_ACCESS_RD);
                break;
        }
 }
@@ -1586,6 +1858,8 @@ static void tgl_whitelist_build(struct intel_engine_cs *engine)
 {
        struct i915_wa_list *w = &engine->whitelist;
 
+       allow_read_ctx_timestamp(engine);
+
        switch (engine->class) {
        case RENDER_CLASS:
                /*
@@ -1609,9 +1883,6 @@ static void tgl_whitelist_build(struct intel_engine_cs *engine)
                whitelist_reg(w, HIZ_CHICKEN);
                break;
        default:
-               whitelist_reg_ext(w,
-                                 RING_CTX_TIMESTAMP(engine->mmio_base),
-                                 RING_FORCE_TO_NONPRIV_ACCESS_RD);
                break;
        }
 }
@@ -1623,13 +1894,46 @@ static void dg1_whitelist_build(struct intel_engine_cs *engine)
        tgl_whitelist_build(engine);
 
        /* GEN:BUG:1409280441:dg1 */
-       if (IS_DG1_GT_STEP(engine->i915, STEP_A0, STEP_B0) &&
+       if (IS_DG1_GRAPHICS_STEP(engine->i915, STEP_A0, STEP_B0) &&
            (engine->class == RENDER_CLASS ||
             engine->class == COPY_ENGINE_CLASS))
                whitelist_reg_ext(w, RING_ID(engine->mmio_base),
                                  RING_FORCE_TO_NONPRIV_ACCESS_RD);
 }
 
+static void xehpsdv_whitelist_build(struct intel_engine_cs *engine)
+{
+       allow_read_ctx_timestamp(engine);
+}
+
+static void dg2_whitelist_build(struct intel_engine_cs *engine)
+{
+       struct i915_wa_list *w = &engine->whitelist;
+
+       allow_read_ctx_timestamp(engine);
+
+       switch (engine->class) {
+       case RENDER_CLASS:
+               /*
+                * Wa_1507100340:dg2_g10
+                *
+                * This covers 4 registers which are next to one another :
+                *   - PS_INVOCATION_COUNT
+                *   - PS_INVOCATION_COUNT_UDW
+                *   - PS_DEPTH_COUNT
+                *   - PS_DEPTH_COUNT_UDW
+                */
+               if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0))
+                       whitelist_reg_ext(w, PS_INVOCATION_COUNT,
+                                         RING_FORCE_TO_NONPRIV_ACCESS_RD |
+                                         RING_FORCE_TO_NONPRIV_RANGE_4);
+
+               break;
+       default:
+               break;
+       }
+}
+
 void intel_engine_init_whitelist(struct intel_engine_cs *engine)
 {
        struct drm_i915_private *i915 = engine->i915;
@@ -1637,7 +1941,11 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine)
 
        wa_init_start(w, "whitelist", engine->name);
 
-       if (IS_DG1(i915))
+       if (IS_DG2(i915))
+               dg2_whitelist_build(engine);
+       else if (IS_XEHPSDV(i915))
+               xehpsdv_whitelist_build(engine);
+       else if (IS_DG1(i915))
                dg1_whitelist_build(engine);
        else if (GRAPHICS_VER(i915) == 12)
                tgl_whitelist_build(engine);
@@ -1711,13 +2019,119 @@ engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
                                    CMD_CCTL_MOCS_OVERRIDE(mocs, mocs));
        }
 }
+
+static bool needs_wa_1308578152(struct intel_engine_cs *engine)
+{
+       u64 dss_mask = intel_sseu_get_subslices(&engine->gt->info.sseu, 0);
+
+       return (dss_mask & GENMASK(GEN_DSS_PER_GSLICE - 1, 0)) == 0;
+}
+
 static void
 rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 {
        struct drm_i915_private *i915 = engine->i915;
 
-       if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0) ||
-           IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) {
+       if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
+               /* Wa_14013392000:dg2_g11 */
+               wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE);
+
+               /* Wa_16011620976:dg2_g11 */
+               wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
+       }
+
+       if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0) ||
+           IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
+               /* Wa_14012419201:dg2 */
+               wa_masked_en(wal, GEN9_ROW_CHICKEN4,
+                            GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX);
+       }
+
+       if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_C0) ||
+           IS_DG2_G11(engine->i915)) {
+               /*
+                * Wa_22012826095:dg2
+                * Wa_22013059131:dg2
+                */
+               wa_write_clr_set(wal, LSC_CHICKEN_BIT_0_UDW,
+                                MAXREQS_PER_BANK,
+                                REG_FIELD_PREP(MAXREQS_PER_BANK, 2));
+
+               /* Wa_22013059131:dg2 */
+               wa_write_or(wal, LSC_CHICKEN_BIT_0,
+                           FORCE_1_SUB_MESSAGE_PER_FRAGMENT);
+       }
+
+       /* Wa_1308578152:dg2_g10 when first gslice is fused off */
+       if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_C0) &&
+           needs_wa_1308578152(engine)) {
+               wa_masked_dis(wal, GEN12_CS_DEBUG_MODE1_CCCSUNIT_BE_COMMON,
+                             GEN12_REPLAY_MODE_GRANULARITY);
+       }
+
+       if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) ||
+           IS_DG2_G11(engine->i915)) {
+               /* Wa_22013037850:dg2 */
+               wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW,
+                           DISABLE_128B_EVICTION_COMMAND_UDW);
+
+               /* Wa_22012856258:dg2 */
+               wa_masked_en(wal, GEN7_ROW_CHICKEN2,
+                            GEN12_DISABLE_READ_SUPPRESSION);
+
+               /*
+                * Wa_22010960976:dg2
+                * Wa_14013347512:dg2
+                */
+               wa_masked_dis(wal, GEN12_HDC_CHICKEN0,
+                             LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
+       }
+
+       if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
+               /*
+                * Wa_1608949956:dg2_g10
+                * Wa_14010198302:dg2_g10
+                */
+               wa_masked_en(wal, GEN8_ROW_CHICKEN,
+                            MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE);
+
+               /*
+                * Wa_14010918519:dg2_g10
+                *
+                * LSC_CHICKEN_BIT_0 always reads back as 0 is this stepping,
+                * so ignoring verification.
+                */
+               wa_add(wal, LSC_CHICKEN_BIT_0_UDW, 0,
+                      FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE,
+                      0, false);
+       }
+
+       if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
+               /* Wa_22010430635:dg2 */
+               wa_masked_en(wal,
+                            GEN9_ROW_CHICKEN4,
+                            GEN12_DISABLE_GRF_CLEAR);
+
+               /* Wa_14010648519:dg2 */
+               wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
+       }
+
+       if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) ||
+           IS_DG2_G11(engine->i915)) {
+               /* Wa_22012654132:dg2 */
+               wa_add(wal, GEN10_CACHE_MODE_SS, 0,
+                      _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
+                      0 /* write-only, so skip validation */,
+                      true);
+       }
+
+       /* Wa_14013202645:dg2 */
+       if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_C0) ||
+           IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0))
+               wa_write_or(wal, RT_CTRL, DIS_NULL_QUERY);
+
+       if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
+           IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) {
                /*
                 * Wa_1607138336:tgl[a0],dg1[a0]
                 * Wa_1607063988:tgl[a0],dg1[a0]
@@ -1727,7 +2141,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
                            GEN12_DISABLE_POSH_BUSY_FF_DOP_CG);
        }
 
-       if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) {
+       if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) {
                /*
                 * Wa_1606679103:tgl
                 * (see also Wa_1606682166:icl)
@@ -1762,7 +2176,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
        }
 
        if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
-           IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0) ||
+           IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
            IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
                /* Wa_1409804808:tgl,rkl,dg1[a0],adl-s,adl-p */
                wa_masked_en(wal, GEN7_ROW_CHICKEN2,
@@ -1775,8 +2189,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
                wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
        }
 
-
-       if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0) ||
+       if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
            IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
                /*
                 * Wa_1607030317:tgl
@@ -2138,7 +2551,7 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
        struct drm_i915_private *i915 = engine->i915;
 
        /* WaKBLVECSSemaphoreWaitPoll:kbl */
-       if (IS_KBL_GT_STEP(i915, STEP_A0, STEP_F0)) {
+       if (IS_KBL_GRAPHICS_STEP(i915, STEP_A0, STEP_F0)) {
                wa_write(wal,
                         RING_SEMA_WAIT_POLL(engine->mmio_base),
                         1);
index 8b89215..bb99fc0 100644 (file)
@@ -35,9 +35,31 @@ static void mock_timeline_unpin(struct intel_timeline *tl)
        atomic_dec(&tl->pin_count);
 }
 
+static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
+{
+       struct i915_address_space *vm = &ggtt->vm;
+       struct drm_i915_private *i915 = vm->i915;
+       struct drm_i915_gem_object *obj;
+       struct i915_vma *vma;
+
+       obj = i915_gem_object_create_internal(i915, size);
+       if (IS_ERR(obj))
+               return ERR_CAST(obj);
+
+       vma = i915_vma_instance(obj, vm, NULL);
+       if (IS_ERR(vma))
+               goto err;
+
+       return vma;
+
+err:
+       i915_gem_object_put(obj);
+       return vma;
+}
+
 static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
 {
-       const unsigned long sz = PAGE_SIZE / 2;
+       const unsigned long sz = PAGE_SIZE;
        struct intel_ring *ring;
 
        ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL);
@@ -50,15 +72,11 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
        ring->vaddr = (void *)(ring + 1);
        atomic_set(&ring->pin_count, 1);
 
-       ring->vma = i915_vma_alloc();
-       if (!ring->vma) {
+       ring->vma = create_ring_vma(engine->gt->ggtt, PAGE_SIZE);
+       if (IS_ERR(ring->vma)) {
                kfree(ring);
                return NULL;
        }
-       i915_active_init(&ring->vma->active, NULL, NULL, 0);
-       __set_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(ring->vma));
-       __set_bit(DRM_MM_NODE_ALLOCATED_BIT, &ring->vma->node.flags);
-       ring->vma->node.size = sz;
 
        intel_ring_update_space(ring);
 
@@ -67,8 +85,7 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
 
 static void mock_ring_free(struct intel_ring *ring)
 {
-       i915_active_fini(&ring->vma->active);
-       i915_vma_free(ring->vma);
+       i915_vma_put(ring->vma);
 
        kfree(ring);
 }
@@ -125,6 +142,7 @@ static void mock_context_unpin(struct intel_context *ce)
 
 static void mock_context_post_unpin(struct intel_context *ce)
 {
+       i915_vma_unpin(ce->ring->vma);
 }
 
 static void mock_context_destroy(struct kref *ref)
@@ -169,7 +187,7 @@ static int mock_context_alloc(struct intel_context *ce)
 static int mock_context_pre_pin(struct intel_context *ce,
                                struct i915_gem_ww_ctx *ww, void **unused)
 {
-       return 0;
+       return i915_vma_pin_ww(ce->ring->vma, ww, 0, 0, PIN_GLOBAL | PIN_HIGH);
 }
 
 static int mock_context_pin(struct intel_context *ce, void *unused)
index 7556966..0bfd738 100644 (file)
@@ -214,6 +214,31 @@ static int live_engine_timestamps(void *arg)
        return 0;
 }
 
+static int __spin_until_busier(struct intel_engine_cs *engine, ktime_t busyness)
+{
+       ktime_t start, unused, dt;
+
+       if (!intel_engine_uses_guc(engine))
+               return 0;
+
+       /*
+        * In GuC mode of submission, the busyness stats may get updated after
+        * the batch starts running. Poll for a change in busyness and timeout
+        * after 500 us.
+        */
+       start = ktime_get();
+       while (intel_engine_get_busy_time(engine, &unused) == busyness) {
+               dt = ktime_get() - start;
+               if (dt > 500000) {
+                       pr_err("active wait timed out %lld\n", dt);
+                       ENGINE_TRACE(engine, "active wait time out %lld\n", dt);
+                       return -ETIME;
+               }
+       }
+
+       return 0;
+}
+
 static int live_engine_busy_stats(void *arg)
 {
        struct intel_gt *gt = arg;
@@ -232,6 +257,7 @@ static int live_engine_busy_stats(void *arg)
        GEM_BUG_ON(intel_gt_pm_is_awake(gt));
        for_each_engine(engine, gt, id) {
                struct i915_request *rq;
+               ktime_t busyness, dummy;
                ktime_t de, dt;
                ktime_t t[2];
 
@@ -274,12 +300,19 @@ static int live_engine_busy_stats(void *arg)
                }
                i915_request_add(rq);
 
+               busyness = intel_engine_get_busy_time(engine, &dummy);
                if (!igt_wait_for_spinner(&spin, rq)) {
                        intel_gt_set_wedged(engine->gt);
                        err = -ETIME;
                        goto end;
                }
 
+               err = __spin_until_busier(engine, busyness);
+               if (err) {
+                       GEM_TRACE_DUMP();
+                       goto end;
+               }
+
                ENGINE_TRACE(engine, "measuring busy time\n");
                preempt_disable();
                de = intel_engine_get_busy_time(engine, &t[0]);
index 7e2d99d..e5ad4d5 100644 (file)
@@ -471,7 +471,8 @@ static int igt_reset_nop_engine(void *arg)
                count = 0;
 
                st_engine_heartbeat_disable(engine);
-               set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
+               GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id,
+                                           &gt->reset.flags));
                do {
                        int i;
 
@@ -528,7 +529,7 @@ static int igt_reset_nop_engine(void *arg)
                                break;
                        }
                } while (time_before(jiffies, end_time));
-               clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
+               clear_and_wake_up_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
                st_engine_heartbeat_enable(engine);
 
                pr_info("%s(%s): %d resets\n", __func__, engine->name, count);
@@ -582,7 +583,8 @@ static int igt_reset_fail_engine(void *arg)
                }
 
                st_engine_heartbeat_disable(engine);
-               set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
+               GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id,
+                                           &gt->reset.flags));
 
                force_reset_timeout(engine);
                err = intel_engine_reset(engine, NULL);
@@ -679,7 +681,7 @@ static int igt_reset_fail_engine(void *arg)
 out:
                pr_info("%s(%s): %d resets\n", __func__, engine->name, count);
 skip:
-               clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
+               clear_and_wake_up_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
                st_engine_heartbeat_enable(engine);
                intel_context_put(ce);
 
@@ -734,7 +736,8 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
                reset_engine_count = i915_reset_engine_count(global, engine);
 
                st_engine_heartbeat_disable(engine);
-               set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
+               GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id,
+                                           &gt->reset.flags));
                count = 0;
                do {
                        struct i915_request *rq = NULL;
@@ -824,7 +827,7 @@ restore:
                        if (err)
                                break;
                } while (time_before(jiffies, end_time));
-               clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
+               clear_and_wake_up_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
                st_engine_heartbeat_enable(engine);
                pr_info("%s: Completed %lu %s resets\n",
                        engine->name, count, active ? "active" : "idle");
@@ -1042,7 +1045,8 @@ static int __igt_reset_engines(struct intel_gt *gt,
                yield(); /* start all threads before we begin */
 
                st_engine_heartbeat_disable_no_pm(engine);
-               set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
+               GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id,
+                                           &gt->reset.flags));
                do {
                        struct i915_request *rq = NULL;
                        struct intel_selftest_saved_policy saved;
@@ -1165,7 +1169,7 @@ restore:
                        if (err)
                                break;
                } while (time_before(jiffies, end_time));
-               clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
+               clear_and_wake_up_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
                st_engine_heartbeat_enable_no_pm(engine);
 
                pr_info("i915_reset_engine(%s:%s): %lu resets\n",
index ba10bd3..fe5d7d2 100644 (file)
@@ -144,6 +144,7 @@ enum intel_guc_action {
        INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600,
        INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
        INTEL_GUC_ACTION_RESET_CLIENT = 0x5507,
+       INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A,
        INTEL_GUC_ACTION_LIMIT
 };
 
index 31cf9fb..1cb4609 100644 (file)
@@ -138,6 +138,8 @@ struct intel_guc {
        u32 ads_regset_size;
        /** @ads_golden_ctxt_size: size of the golden contexts in the ADS */
        u32 ads_golden_ctxt_size;
+       /** @ads_engine_usage_size: size of engine usage in the ADS */
+       u32 ads_engine_usage_size;
 
        /** @lrc_desc_pool: object allocated to hold the GuC LRC descriptor pool */
        struct i915_vma *lrc_desc_pool;
@@ -172,6 +174,34 @@ struct intel_guc {
 
        /** @send_mutex: used to serialize the intel_guc_send actions */
        struct mutex send_mutex;
+
+       /**
+        * @timestamp: GT timestamp object that stores a copy of the timestamp
+        * and adjusts it for overflow using a worker.
+        */
+       struct {
+               /**
+                * @lock: Lock protecting the below fields and the engine stats.
+                */
+               spinlock_t lock;
+
+               /**
+                * @gt_stamp: 64 bit extended value of the GT timestamp.
+                */
+               u64 gt_stamp;
+
+               /**
+                * @ping_delay: Period for polling the GT timestamp for
+                * overflow.
+                */
+               unsigned long ping_delay;
+
+               /**
+                * @work: Periodic work to adjust GT timestamp, engine and
+                * context usage for overflows.
+                */
+               struct delayed_work work;
+       } timestamp;
 };
 
 static inline struct intel_guc *log_to_guc(struct intel_guc_log *log)
index 621c893..1a1edae 100644 (file)
@@ -26,6 +26,8 @@
  *      | guc_policies                          |
  *      +---------------------------------------+
  *      | guc_gt_system_info                    |
+ *      +---------------------------------------+
+ *      | guc_engine_usage                      |
  *      +---------------------------------------+ <== static
  *      | guc_mmio_reg[countA] (engine 0.0)     |
  *      | guc_mmio_reg[countB] (engine 0.1)     |
@@ -47,6 +49,7 @@ struct __guc_ads_blob {
        struct guc_ads ads;
        struct guc_policies policies;
        struct guc_gt_system_info system_info;
+       struct guc_engine_usage engine_usage;
        /* From here on, location is dynamic! Refer to above diagram. */
        struct guc_mmio_reg regset[0];
 } __packed;
@@ -628,3 +631,21 @@ void intel_guc_ads_reset(struct intel_guc *guc)
 
        guc_ads_private_data_reset(guc);
 }
+
+u32 intel_guc_engine_usage_offset(struct intel_guc *guc)
+{
+       struct __guc_ads_blob *blob = guc->ads_blob;
+       u32 base = intel_guc_ggtt_offset(guc, guc->ads_vma);
+       u32 offset = base + ptr_offset(blob, engine_usage);
+
+       return offset;
+}
+
+struct guc_engine_usage_record *intel_guc_engine_usage(struct intel_engine_cs *engine)
+{
+       struct intel_guc *guc = &engine->gt->uc.guc;
+       struct __guc_ads_blob *blob = guc->ads_blob;
+       u8 guc_class = engine_class_to_guc_class(engine->class);
+
+       return &blob->engine_usage.engines[guc_class][ilog2(engine->logical_mask)];
+}
index 3d85051..e74c110 100644 (file)
@@ -6,8 +6,11 @@
 #ifndef _INTEL_GUC_ADS_H_
 #define _INTEL_GUC_ADS_H_
 
+#include <linux/types.h>
+
 struct intel_guc;
 struct drm_printer;
+struct intel_engine_cs;
 
 int intel_guc_ads_create(struct intel_guc *guc);
 void intel_guc_ads_destroy(struct intel_guc *guc);
@@ -15,5 +18,7 @@ void intel_guc_ads_init_late(struct intel_guc *guc);
 void intel_guc_ads_reset(struct intel_guc *guc);
 void intel_guc_ads_print_policy_info(struct intel_guc *guc,
                                     struct drm_printer *p);
+struct guc_engine_usage_record *intel_guc_engine_usage(struct intel_engine_cs *engine);
+u32 intel_guc_engine_usage_offset(struct intel_guc *guc);
 
 #endif
index 722933e..7072e30 100644 (file)
@@ -294,6 +294,19 @@ struct guc_ads {
        u32 reserved[15];
 } __packed;
 
+/* Engine usage stats */
+struct guc_engine_usage_record {
+       u32 current_context_index;
+       u32 last_switch_in_stamp;
+       u32 reserved0;
+       u32 total_runtime;
+       u32 reserved1[4];
+} __packed;
+
+struct guc_engine_usage {
+       struct guc_engine_usage_record engines[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
+} __packed;
+
 /* GuC logging structures */
 
 enum guc_log_buffer_type {
index 65a3e7f..22c1c12 100644 (file)
@@ -79,29 +79,6 @@ static void slpc_mem_set_disabled(struct slpc_shared_data *data,
        slpc_mem_set_param(data, enable_id, 0);
 }
 
-int intel_guc_slpc_init(struct intel_guc_slpc *slpc)
-{
-       struct intel_guc *guc = slpc_to_guc(slpc);
-       struct drm_i915_private *i915 = slpc_to_i915(slpc);
-       u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
-       int err;
-
-       GEM_BUG_ON(slpc->vma);
-
-       err = intel_guc_allocate_and_map_vma(guc, size, &slpc->vma, (void **)&slpc->vaddr);
-       if (unlikely(err)) {
-               drm_err(&i915->drm,
-                       "Failed to allocate SLPC struct (err=%pe)\n",
-                       ERR_PTR(err));
-               return err;
-       }
-
-       slpc->max_freq_softlimit = 0;
-       slpc->min_freq_softlimit = 0;
-
-       return err;
-}
-
 static u32 slpc_get_state(struct intel_guc_slpc *slpc)
 {
        struct slpc_shared_data *data;
@@ -203,6 +180,86 @@ static int slpc_unset_param(struct intel_guc_slpc *slpc,
        return guc_action_slpc_unset_param(guc, id);
 }
 
+static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
+{
+       struct drm_i915_private *i915 = slpc_to_i915(slpc);
+       struct intel_guc *guc = slpc_to_guc(slpc);
+       intel_wakeref_t wakeref;
+       int ret = 0;
+
+       lockdep_assert_held(&slpc->lock);
+
+       if (!intel_guc_is_ready(guc))
+               return -ENODEV;
+
+       /*
+        * This function is a little different as compared to
+        * intel_guc_slpc_set_min_freq(). Softlimit will not be updated
+        * here since this is used to temporarily change min freq,
+        * for example, during a waitboost. Caller is responsible for
+        * checking bounds.
+        */
+
+       with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
+               ret = slpc_set_param(slpc,
+                                    SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
+                                    freq);
+               if (ret)
+                       drm_err(&i915->drm, "Unable to force min freq to %u: %d",
+                               freq, ret);
+       }
+
+       return ret;
+}
+
+static void slpc_boost_work(struct work_struct *work)
+{
+       struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), boost_work);
+
+       /*
+        * Raise min freq to boost. It's possible that
+        * this is greater than current max. But it will
+        * certainly be limited by RP0. An error setting
+        * the min param is not fatal.
+        */
+       mutex_lock(&slpc->lock);
+       if (atomic_read(&slpc->num_waiters)) {
+               slpc_force_min_freq(slpc, slpc->boost_freq);
+               slpc->num_boosts++;
+       }
+       mutex_unlock(&slpc->lock);
+}
+
+int intel_guc_slpc_init(struct intel_guc_slpc *slpc)
+{
+       struct intel_guc *guc = slpc_to_guc(slpc);
+       struct drm_i915_private *i915 = slpc_to_i915(slpc);
+       u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
+       int err;
+
+       GEM_BUG_ON(slpc->vma);
+
+       err = intel_guc_allocate_and_map_vma(guc, size, &slpc->vma, (void **)&slpc->vaddr);
+       if (unlikely(err)) {
+               drm_err(&i915->drm,
+                       "Failed to allocate SLPC struct (err=%pe)\n",
+                       ERR_PTR(err));
+               return err;
+       }
+
+       slpc->max_freq_softlimit = 0;
+       slpc->min_freq_softlimit = 0;
+
+       slpc->boost_freq = 0;
+       atomic_set(&slpc->num_waiters, 0);
+       slpc->num_boosts = 0;
+
+       mutex_init(&slpc->lock);
+       INIT_WORK(&slpc->boost_work, slpc_boost_work);
+
+       return err;
+}
+
 static const char *slpc_global_state_to_string(enum slpc_global_state state)
 {
        switch (state) {
@@ -393,7 +450,11 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val)
            val > slpc->max_freq_softlimit)
                return -EINVAL;
 
+       /* Need a lock now since waitboost can be modifying min as well */
+       mutex_lock(&slpc->lock);
+
        with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
+
                ret = slpc_set_param(slpc,
                                     SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
                                     val);
@@ -406,6 +467,8 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val)
        if (!ret)
                slpc->min_freq_softlimit = val;
 
+       mutex_unlock(&slpc->lock);
+
        return ret;
 }
 
@@ -522,6 +585,9 @@ static void slpc_get_rp_values(struct intel_guc_slpc *slpc)
                                        GT_FREQUENCY_MULTIPLIER;
        slpc->min_freq = REG_FIELD_GET(RPN_CAP_MASK, rp_state_cap) *
                                        GT_FREQUENCY_MULTIPLIER;
+
+       if (!slpc->boost_freq)
+               slpc->boost_freq = slpc->rp0_freq;
 }
 
 /*
@@ -588,6 +654,47 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
        return 0;
 }
 
+int intel_guc_slpc_set_boost_freq(struct intel_guc_slpc *slpc, u32 val)
+{
+       int ret = 0;
+
+       if (val < slpc->min_freq || val > slpc->rp0_freq)
+               return -EINVAL;
+
+       mutex_lock(&slpc->lock);
+
+       if (slpc->boost_freq != val) {
+               /* Apply only if there are active waiters */
+               if (atomic_read(&slpc->num_waiters)) {
+                       ret = slpc_force_min_freq(slpc, val);
+                       if (ret) {
+                               ret = -EIO;
+                               goto done;
+                       }
+               }
+
+               slpc->boost_freq = val;
+       }
+
+done:
+       mutex_unlock(&slpc->lock);
+       return ret;
+}
+
+void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc)
+{
+       /*
+        * Return min back to the softlimit.
+        * This is called during request retire,
+        * so we don't need to fail that if the
+        * set_param fails.
+        */
+       mutex_lock(&slpc->lock);
+       if (atomic_dec_and_test(&slpc->num_waiters))
+               slpc_force_min_freq(slpc, slpc->min_freq_softlimit);
+       mutex_unlock(&slpc->lock);
+}
+
 int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p)
 {
        struct drm_i915_private *i915 = slpc_to_i915(slpc);
@@ -611,6 +718,8 @@ int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p
                                   slpc_decode_max_freq(slpc));
                        drm_printf(p, "\tMin freq: %u MHz\n",
                                   slpc_decode_min_freq(slpc));
+                       drm_printf(p, "\twaitboosts: %u\n",
+                                  slpc->num_boosts);
                }
        }
 
index e45054d..0caa8fe 100644 (file)
@@ -34,9 +34,12 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc);
 void intel_guc_slpc_fini(struct intel_guc_slpc *slpc);
 int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 val);
 int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val);
+int intel_guc_slpc_set_boost_freq(struct intel_guc_slpc *slpc, u32 val);
 int intel_guc_slpc_get_max_freq(struct intel_guc_slpc *slpc, u32 *val);
 int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val);
 int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p);
 void intel_guc_pm_intrmsk_enable(struct intel_gt *gt);
+void intel_guc_slpc_boost(struct intel_guc_slpc *slpc);
+void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc);
 
 #endif
index 41d1352..bf5b9a5 100644 (file)
@@ -6,6 +6,9 @@
 #ifndef _INTEL_GUC_SLPC_TYPES_H_
 #define _INTEL_GUC_SLPC_TYPES_H_
 
+#include <linux/atomic.h>
+#include <linux/workqueue.h>
+#include <linux/mutex.h>
 #include <linux/types.h>
 
 #define SLPC_RESET_TIMEOUT_MS 5
@@ -20,10 +23,20 @@ struct intel_guc_slpc {
        u32 min_freq;
        u32 rp0_freq;
        u32 rp1_freq;
+       u32 boost_freq;
 
        /* frequency softlimits */
        u32 min_freq_softlimit;
        u32 max_freq_softlimit;
+
+       /* Protects set/reset of boost freq
+        * and value of num_waiters
+        */
+       struct mutex lock;
+
+       struct work_struct boost_work;
+       atomic_t num_waiters;
+       u32 num_boosts;
 };
 
 #endif
index c48557d..a7108b3 100644 (file)
@@ -13,6 +13,7 @@
 #include "gt/intel_engine_heartbeat.h"
 #include "gt/intel_gpu_commands.h"
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_clock_utils.h"
 #include "gt/intel_gt_irq.h"
 #include "gt/intel_gt_pm.h"
 #include "gt/intel_gt_requests.h"
@@ -21,6 +22,7 @@
 #include "gt/intel_mocs.h"
 #include "gt/intel_ring.h"
 
+#include "intel_guc_ads.h"
 #include "intel_guc_submission.h"
 
 #include "i915_drv.h"
@@ -1077,6 +1079,266 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
        xa_unlock_irqrestore(&guc->context_lookup, flags);
 }
 
+/*
+ * GuC stores busyness stats for each engine at context in/out boundaries. A
+ * context 'in' logs execution start time, 'out' adds in -> out delta to total.
+ * i915/kmd accesses 'start', 'total' and 'context id' from memory shared with
+ * GuC.
+ *
+ * __i915_pmu_event_read samples engine busyness. When sampling, if context id
+ * is valid (!= ~0) and start is non-zero, the engine is considered to be
+ * active. For an active engine total busyness = total + (now - start), where
+ * 'now' is the time at which the busyness is sampled. For inactive engine,
+ * total busyness = total.
+ *
+ * All times are captured from GUCPMTIMESTAMP reg and are in gt clock domain.
+ *
+ * The start and total values provided by GuC are 32 bits and wrap around in a
+ * few minutes. Since perf pmu provides busyness as 64 bit monotonically
+ * increasing ns values, there is a need for this implementation to account for
+ * overflows and extend the GuC provided values to 64 bits before returning
+ * busyness to the user. In order to do that, a worker runs periodically at
+ * frequency = 1/8th the time it takes for the timestamp to wrap (i.e. once in
+ * 27 seconds for a gt clock frequency of 19.2 MHz).
+ */
+
+#define WRAP_TIME_CLKS U32_MAX
+#define POLL_TIME_CLKS (WRAP_TIME_CLKS >> 3)
+
+static void
+__extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
+{
+       u32 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
+       u32 gt_stamp_last = lower_32_bits(guc->timestamp.gt_stamp);
+
+       if (new_start == lower_32_bits(*prev_start))
+               return;
+
+       if (new_start < gt_stamp_last &&
+           (new_start - gt_stamp_last) <= POLL_TIME_CLKS)
+               gt_stamp_hi++;
+
+       if (new_start > gt_stamp_last &&
+           (gt_stamp_last - new_start) <= POLL_TIME_CLKS && gt_stamp_hi)
+               gt_stamp_hi--;
+
+       *prev_start = ((u64)gt_stamp_hi << 32) | new_start;
+}
+
+static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
+{
+       struct guc_engine_usage_record *rec = intel_guc_engine_usage(engine);
+       struct intel_engine_guc_stats *stats = &engine->stats.guc;
+       struct intel_guc *guc = &engine->gt->uc.guc;
+       u32 last_switch = rec->last_switch_in_stamp;
+       u32 ctx_id = rec->current_context_index;
+       u32 total = rec->total_runtime;
+
+       lockdep_assert_held(&guc->timestamp.lock);
+
+       stats->running = ctx_id != ~0U && last_switch;
+       if (stats->running)
+               __extend_last_switch(guc, &stats->start_gt_clk, last_switch);
+
+       /*
+        * Instead of adjusting the total for overflow, just add the
+        * difference from previous sample stats->total_gt_clks
+        */
+       if (total && total != ~0U) {
+               stats->total_gt_clks += (u32)(total - stats->prev_total);
+               stats->prev_total = total;
+       }
+}
+
+static void guc_update_pm_timestamp(struct intel_guc *guc,
+                                   struct intel_engine_cs *engine,
+                                   ktime_t *now)
+{
+       u32 gt_stamp_now, gt_stamp_hi;
+
+       lockdep_assert_held(&guc->timestamp.lock);
+
+       gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
+       gt_stamp_now = intel_uncore_read(engine->uncore,
+                                        RING_TIMESTAMP(engine->mmio_base));
+       *now = ktime_get();
+
+       if (gt_stamp_now < lower_32_bits(guc->timestamp.gt_stamp))
+               gt_stamp_hi++;
+
+       guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_now;
+}
+
+/*
+ * Unlike the execlist mode of submission total and active times are in terms of
+ * gt clocks. The *now parameter is retained to return the cpu time at which the
+ * busyness was sampled.
+ */
+static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
+{
+       struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc;
+       struct i915_gpu_error *gpu_error = &engine->i915->gpu_error;
+       struct intel_gt *gt = engine->gt;
+       struct intel_guc *guc = &gt->uc.guc;
+       u64 total, gt_stamp_saved;
+       unsigned long flags;
+       u32 reset_count;
+
+       spin_lock_irqsave(&guc->timestamp.lock, flags);
+
+       /*
+        * If a reset happened, we risk reading partially updated
+        * engine busyness from GuC, so we just use the driver stored
+        * copy of busyness. Synchronize with gt reset using reset_count.
+        */
+       reset_count = i915_reset_count(gpu_error);
+
+       *now = ktime_get();
+
+       /*
+        * The active busyness depends on start_gt_clk and gt_stamp.
+        * gt_stamp is updated by i915 only when gt is awake and the
+        * start_gt_clk is derived from GuC state. To get a consistent
+        * view of activity, we query the GuC state only if gt is awake.
+        */
+       stats_saved = *stats;
+       gt_stamp_saved = guc->timestamp.gt_stamp;
+       if (intel_gt_pm_get_if_awake(gt)) {
+               guc_update_engine_gt_clks(engine);
+               guc_update_pm_timestamp(guc, engine, now);
+               intel_gt_pm_put_async(gt);
+               if (i915_reset_count(gpu_error) != reset_count) {
+                       *stats = stats_saved;
+                       guc->timestamp.gt_stamp = gt_stamp_saved;
+               }
+       }
+
+       total = intel_gt_clock_interval_to_ns(gt, stats->total_gt_clks);
+       if (stats->running) {
+               u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk;
+
+               total += intel_gt_clock_interval_to_ns(gt, clk);
+       }
+
+       spin_unlock_irqrestore(&guc->timestamp.lock, flags);
+
+       return ns_to_ktime(total);
+}
+
+static void __reset_guc_busyness_stats(struct intel_guc *guc)
+{
+       struct intel_gt *gt = guc_to_gt(guc);
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+       unsigned long flags;
+       ktime_t unused;
+
+       cancel_delayed_work_sync(&guc->timestamp.work);
+
+       spin_lock_irqsave(&guc->timestamp.lock, flags);
+
+       for_each_engine(engine, gt, id) {
+               guc_update_pm_timestamp(guc, engine, &unused);
+               guc_update_engine_gt_clks(engine);
+               engine->stats.guc.prev_total = 0;
+       }
+
+       spin_unlock_irqrestore(&guc->timestamp.lock, flags);
+}
+
+static void __update_guc_busyness_stats(struct intel_guc *guc)
+{
+       struct intel_gt *gt = guc_to_gt(guc);
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+       unsigned long flags;
+       ktime_t unused;
+
+       spin_lock_irqsave(&guc->timestamp.lock, flags);
+       for_each_engine(engine, gt, id) {
+               guc_update_pm_timestamp(guc, engine, &unused);
+               guc_update_engine_gt_clks(engine);
+       }
+       spin_unlock_irqrestore(&guc->timestamp.lock, flags);
+}
+
+static void guc_timestamp_ping(struct work_struct *wrk)
+{
+       struct intel_guc *guc = container_of(wrk, typeof(*guc),
+                                            timestamp.work.work);
+       struct intel_uc *uc = container_of(guc, typeof(*uc), guc);
+       struct intel_gt *gt = guc_to_gt(guc);
+       intel_wakeref_t wakeref;
+       int srcu, ret;
+
+       /*
+        * Synchronize with gt reset to make sure the worker does not
+        * corrupt the engine/guc stats.
+        */
+       ret = intel_gt_reset_trylock(gt, &srcu);
+       if (ret)
+               return;
+
+       with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref)
+               __update_guc_busyness_stats(guc);
+
+       intel_gt_reset_unlock(gt, srcu);
+
+       mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
+                        guc->timestamp.ping_delay);
+}
+
+static int guc_action_enable_usage_stats(struct intel_guc *guc)
+{
+       u32 offset = intel_guc_engine_usage_offset(guc);
+       u32 action[] = {
+               INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF,
+               offset,
+               0,
+       };
+
+       return intel_guc_send(guc, action, ARRAY_SIZE(action));
+}
+
+static void guc_init_engine_stats(struct intel_guc *guc)
+{
+       struct intel_gt *gt = guc_to_gt(guc);
+       intel_wakeref_t wakeref;
+
+       mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
+                        guc->timestamp.ping_delay);
+
+       with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref) {
+               int ret = guc_action_enable_usage_stats(guc);
+
+               if (ret)
+                       drm_err(&gt->i915->drm,
+                               "Failed to enable usage stats: %d!\n", ret);
+       }
+}
+
+void intel_guc_busyness_park(struct intel_gt *gt)
+{
+       struct intel_guc *guc = &gt->uc.guc;
+
+       if (!guc_submission_initialized(guc))
+               return;
+
+       cancel_delayed_work(&guc->timestamp.work);
+       __update_guc_busyness_stats(guc);
+}
+
+void intel_guc_busyness_unpark(struct intel_gt *gt)
+{
+       struct intel_guc *guc = &gt->uc.guc;
+
+       if (!guc_submission_initialized(guc))
+               return;
+
+       mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
+                        guc->timestamp.ping_delay);
+}
+
 static inline bool
 submission_disabled(struct intel_guc *guc)
 {
@@ -1138,6 +1400,7 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc)
        intel_gt_park_heartbeats(guc_to_gt(guc));
        disable_submission(guc);
        guc->interrupts.disable(guc);
+       __reset_guc_busyness_stats(guc);
 
        /* Flush IRQ handler */
        spin_lock_irq(&guc_to_gt(guc)->irq_lock);
@@ -1484,6 +1747,7 @@ static void destroyed_worker_func(struct work_struct *w);
  */
 int intel_guc_submission_init(struct intel_guc *guc)
 {
+       struct intel_gt *gt = guc_to_gt(guc);
        int ret;
 
        if (guc->lrc_desc_pool)
@@ -1512,6 +1776,10 @@ int intel_guc_submission_init(struct intel_guc *guc)
        if (!guc->submission_state.guc_ids_bitmap)
                return -ENOMEM;
 
+       spin_lock_init(&guc->timestamp.lock);
+       INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
+       guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
+
        return 0;
 }
 
@@ -3369,7 +3637,9 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
                engine->emit_flush = gen12_emit_flush_xcs;
        }
        engine->set_default_submission = guc_set_default_submission;
+       engine->busyness = guc_engine_busyness;
 
+       engine->flags |= I915_ENGINE_SUPPORTS_STATS;
        engine->flags |= I915_ENGINE_HAS_PREEMPTION;
        engine->flags |= I915_ENGINE_HAS_TIMESLICES;
 
@@ -3468,6 +3738,7 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine)
 void intel_guc_submission_enable(struct intel_guc *guc)
 {
        guc_init_lrc_mapping(guc);
+       guc_init_engine_stats(guc);
 }
 
 void intel_guc_submission_disable(struct intel_guc *guc)
@@ -3695,6 +3966,7 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc,
                                        const u32 *msg, u32 len)
 {
        struct intel_context *ce;
+       unsigned long flags;
        int desc_idx;
 
        if (unlikely(len != 1)) {
@@ -3703,11 +3975,24 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc,
        }
 
        desc_idx = msg[0];
+
+       /*
+        * The context lookup uses the xarray but lookups only require an RCU lock
+        * not the full spinlock. So take the lock explicitly and keep it until the
+        * context has been reference count locked to ensure it can't be destroyed
+        * asynchronously until the reset is done.
+        */
+       xa_lock_irqsave(&guc->context_lookup, flags);
        ce = g2h_context_lookup(guc, desc_idx);
+       if (ce)
+               intel_context_get(ce);
+       xa_unlock_irqrestore(&guc->context_lookup, flags);
+
        if (unlikely(!ce))
                return -EPROTO;
 
        guc_handle_context_reset(guc, ce);
+       intel_context_put(ce);
 
        return 0;
 }
index c7ef44f..5a95a9f 100644 (file)
@@ -28,6 +28,8 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc,
 void intel_guc_dump_active_requests(struct intel_engine_cs *engine,
                                    struct i915_request *hung_rq,
                                    struct drm_printer *m);
+void intel_guc_busyness_park(struct intel_gt *gt);
+void intel_guc_busyness_unpark(struct intel_gt *gt);
 
 bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve);
 
index b18a250..e140628 100644 (file)
@@ -415,10 +415,14 @@ static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv)
        if (ret < 0)
                return ret;
 
-       ret = intel_uncore_init_mmio(&dev_priv->uncore);
+       ret = intel_uncore_setup_mmio(&dev_priv->uncore);
        if (ret < 0)
                goto err_bridge;
 
+       ret = intel_uncore_init_mmio(&dev_priv->uncore);
+       if (ret)
+               goto err_mmio;
+
        /* Try to make sure MCHBAR is enabled before poking at it */
        intel_setup_mchbar(dev_priv);
        intel_device_info_runtime_init(dev_priv);
@@ -435,6 +439,8 @@ static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv)
 err_uncore:
        intel_teardown_mchbar(dev_priv);
        intel_uncore_fini_mmio(&dev_priv->uncore);
+err_mmio:
+       intel_uncore_cleanup_mmio(&dev_priv->uncore);
 err_bridge:
        pci_dev_put(dev_priv->bridge_dev);
 
@@ -449,6 +455,7 @@ static void i915_driver_mmio_release(struct drm_i915_private *dev_priv)
 {
        intel_teardown_mchbar(dev_priv);
        intel_uncore_fini_mmio(&dev_priv->uncore);
+       intel_uncore_cleanup_mmio(&dev_priv->uncore);
        pci_dev_put(dev_priv->bridge_dev);
 }
 
@@ -807,7 +814,7 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                return PTR_ERR(i915);
 
        /* Disable nuclear pageflip by default on pre-ILK */
-       if (!i915->params.nuclear_pageflip && match_info->graphics_ver < 5)
+       if (!i915->params.nuclear_pageflip && match_info->graphics.ver < 5)
                i915->drm.driver_features &= ~DRIVER_ATOMIC;
 
        /*
index 1225621..c6473aa 100644 (file)
@@ -191,8 +191,6 @@ struct i915_hotplug {
         I915_GEM_DOMAIN_VERTEX)
 
 struct drm_i915_private;
-struct i915_mm_struct;
-struct i915_mmu_object;
 
 struct drm_i915_file_private {
        struct drm_i915_private *dev_priv;
@@ -1327,15 +1325,15 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev)
 
 #define IP_VER(ver, rel)               ((ver) << 8 | (rel))
 
-#define GRAPHICS_VER(i915)             (INTEL_INFO(i915)->graphics_ver)
-#define GRAPHICS_VER_FULL(i915)                IP_VER(INTEL_INFO(i915)->graphics_ver, \
-                                              INTEL_INFO(i915)->graphics_rel)
+#define GRAPHICS_VER(i915)             (INTEL_INFO(i915)->graphics.ver)
+#define GRAPHICS_VER_FULL(i915)                IP_VER(INTEL_INFO(i915)->graphics.ver, \
+                                              INTEL_INFO(i915)->graphics.rel)
 #define IS_GRAPHICS_VER(i915, from, until) \
        (GRAPHICS_VER(i915) >= (from) && GRAPHICS_VER(i915) <= (until))
 
-#define MEDIA_VER(i915)                        (INTEL_INFO(i915)->media_ver)
-#define MEDIA_VER_FULL(i915)           IP_VER(INTEL_INFO(i915)->media_ver, \
-                                              INTEL_INFO(i915)->media_rel)
+#define MEDIA_VER(i915)                        (INTEL_INFO(i915)->media.ver)
+#define MEDIA_VER_FULL(i915)           IP_VER(INTEL_INFO(i915)->media.arch, \
+                                              INTEL_INFO(i915)->media.rel)
 #define IS_MEDIA_VER(i915, from, until) \
        (MEDIA_VER(i915) >= (from) && MEDIA_VER(i915) <= (until))
 
@@ -1348,15 +1346,20 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev)
 #define HAS_DSB(dev_priv)      (INTEL_INFO(dev_priv)->display.has_dsb)
 
 #define INTEL_DISPLAY_STEP(__i915) (RUNTIME_INFO(__i915)->step.display_step)
-#define INTEL_GT_STEP(__i915) (RUNTIME_INFO(__i915)->step.gt_step)
+#define INTEL_GRAPHICS_STEP(__i915) (RUNTIME_INFO(__i915)->step.graphics_step)
+#define INTEL_MEDIA_STEP(__i915) (RUNTIME_INFO(__i915)->step.media_step)
 
 #define IS_DISPLAY_STEP(__i915, since, until) \
        (drm_WARN_ON(&(__i915)->drm, INTEL_DISPLAY_STEP(__i915) == STEP_NONE), \
         INTEL_DISPLAY_STEP(__i915) >= (since) && INTEL_DISPLAY_STEP(__i915) < (until))
 
-#define IS_GT_STEP(__i915, since, until) \
-       (drm_WARN_ON(&(__i915)->drm, INTEL_GT_STEP(__i915) == STEP_NONE), \
-        INTEL_GT_STEP(__i915) >= (since) && INTEL_GT_STEP(__i915) < (until))
+#define IS_GRAPHICS_STEP(__i915, since, until) \
+       (drm_WARN_ON(&(__i915)->drm, INTEL_GRAPHICS_STEP(__i915) == STEP_NONE), \
+        INTEL_GRAPHICS_STEP(__i915) >= (since) && INTEL_GRAPHICS_STEP(__i915) < (until))
+
+#define IS_MEDIA_STEP(__i915, since, until) \
+       (drm_WARN_ON(&(__i915)->drm, INTEL_MEDIA_STEP(__i915) == STEP_NONE), \
+        INTEL_MEDIA_STEP(__i915) >= (since) && INTEL_MEDIA_STEP(__i915) < (until))
 
 static __always_inline unsigned int
 __platform_mask_index(const struct intel_runtime_info *info,
@@ -1530,15 +1533,15 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 #define IS_TGL_Y(dev_priv) \
        IS_SUBPLATFORM(dev_priv, INTEL_TIGERLAKE, INTEL_SUBPLATFORM_ULX)
 
-#define IS_SKL_GT_STEP(p, since, until) (IS_SKYLAKE(p) && IS_GT_STEP(p, since, until))
+#define IS_SKL_GRAPHICS_STEP(p, since, until) (IS_SKYLAKE(p) && IS_GRAPHICS_STEP(p, since, until))
 
-#define IS_KBL_GT_STEP(dev_priv, since, until) \
-       (IS_KABYLAKE(dev_priv) && IS_GT_STEP(dev_priv, since, until))
+#define IS_KBL_GRAPHICS_STEP(dev_priv, since, until) \
+       (IS_KABYLAKE(dev_priv) && IS_GRAPHICS_STEP(dev_priv, since, until))
 #define IS_KBL_DISPLAY_STEP(dev_priv, since, until) \
        (IS_KABYLAKE(dev_priv) && IS_DISPLAY_STEP(dev_priv, since, until))
 
-#define IS_JSL_EHL_GT_STEP(p, since, until) \
-       (IS_JSL_EHL(p) && IS_GT_STEP(p, since, until))
+#define IS_JSL_EHL_GRAPHICS_STEP(p, since, until) \
+       (IS_JSL_EHL(p) && IS_GRAPHICS_STEP(p, since, until))
 #define IS_JSL_EHL_DISPLAY_STEP(p, since, until) \
        (IS_JSL_EHL(p) && IS_DISPLAY_STEP(p, since, until))
 
@@ -1546,19 +1549,19 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
        (IS_TIGERLAKE(__i915) && \
         IS_DISPLAY_STEP(__i915, since, until))
 
-#define IS_TGL_UY_GT_STEP(__i915, since, until) \
+#define IS_TGL_UY_GRAPHICS_STEP(__i915, since, until) \
        ((IS_TGL_U(__i915) || IS_TGL_Y(__i915)) && \
-        IS_GT_STEP(__i915, since, until))
+        IS_GRAPHICS_STEP(__i915, since, until))
 
-#define IS_TGL_GT_STEP(__i915, since, until) \
+#define IS_TGL_GRAPHICS_STEP(__i915, since, until) \
        (IS_TIGERLAKE(__i915) && !(IS_TGL_U(__i915) || IS_TGL_Y(__i915)) && \
-        IS_GT_STEP(__i915, since, until))
+        IS_GRAPHICS_STEP(__i915, since, until))
 
 #define IS_RKL_DISPLAY_STEP(p, since, until) \
        (IS_ROCKETLAKE(p) && IS_DISPLAY_STEP(p, since, until))
 
-#define IS_DG1_GT_STEP(p, since, until) \
-       (IS_DG1(p) && IS_GT_STEP(p, since, until))
+#define IS_DG1_GRAPHICS_STEP(p, since, until) \
+       (IS_DG1(p) && IS_GRAPHICS_STEP(p, since, until))
 #define IS_DG1_DISPLAY_STEP(p, since, until) \
        (IS_DG1(p) && IS_DISPLAY_STEP(p, since, until))
 
@@ -1566,20 +1569,20 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
        (IS_ALDERLAKE_S(__i915) && \
         IS_DISPLAY_STEP(__i915, since, until))
 
-#define IS_ADLS_GT_STEP(__i915, since, until) \
+#define IS_ADLS_GRAPHICS_STEP(__i915, since, until) \
        (IS_ALDERLAKE_S(__i915) && \
-        IS_GT_STEP(__i915, since, until))
+        IS_GRAPHICS_STEP(__i915, since, until))
 
 #define IS_ADLP_DISPLAY_STEP(__i915, since, until) \
        (IS_ALDERLAKE_P(__i915) && \
         IS_DISPLAY_STEP(__i915, since, until))
 
-#define IS_ADLP_GT_STEP(__i915, since, until) \
+#define IS_ADLP_GRAPHICS_STEP(__i915, since, until) \
        (IS_ALDERLAKE_P(__i915) && \
-        IS_GT_STEP(__i915, since, until))
+        IS_GRAPHICS_STEP(__i915, since, until))
 
-#define IS_XEHPSDV_GT_STEP(__i915, since, until) \
-       (IS_XEHPSDV(__i915) && IS_GT_STEP(__i915, since, until))
+#define IS_XEHPSDV_GRAPHICS_STEP(__i915, since, until) \
+       (IS_XEHPSDV(__i915) && IS_GRAPHICS_STEP(__i915, since, until))
 
 /*
  * DG2 hardware steppings are a bit unusual.  The hardware design was forked
@@ -1595,9 +1598,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
  * and stepping-specific logic will be applied with a general DG2-wide stepping
  * number.
  */
-#define IS_DG2_GT_STEP(__i915, variant, since, until) \
+#define IS_DG2_GRAPHICS_STEP(__i915, variant, since, until) \
        (IS_SUBPLATFORM(__i915, INTEL_DG2, INTEL_SUBPLATFORM_##variant) && \
-        IS_GT_STEP(__i915, since, until))
+        IS_GRAPHICS_STEP(__i915, since, until))
 
 #define IS_DG2_DISP_STEP(__i915, since, until) \
        (IS_DG2(__i915) && \
@@ -1819,6 +1822,7 @@ static inline void i915_gem_drain_freed_objects(struct drm_i915_private *i915)
         */
        while (atomic_read(&i915->mm.free_count)) {
                flush_work(&i915->mm.free_work);
+               flush_delayed_work(&i915->bdev.wq);
                rcu_barrier();
        }
 }
@@ -1933,6 +1937,10 @@ int i915_gem_evict_vm(struct i915_address_space *vm);
 struct drm_i915_gem_object *
 i915_gem_object_create_internal(struct drm_i915_private *dev_priv,
                                phys_addr_t size);
+struct drm_i915_gem_object *
+__i915_gem_object_create_internal(struct drm_i915_private *dev_priv,
+                                 const struct drm_i915_gem_object_ops *ops,
+                                 phys_addr_t size);
 
 /* i915_gem_tiling.c */
 static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
index 981e383..527228d 100644 (file)
@@ -764,7 +764,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
         * perspective, requiring manual detiling by the client.
         */
        if (!i915_gem_object_has_struct_page(obj) ||
-           cpu_write_needs_clflush(obj))
+           i915_gem_cpu_write_needs_clflush(obj))
                /* Note that the gtt paths might fail with non-page-backed user
                 * pointers (e.g. gtt mappings when moving data between
                 * textures). Fallback to the shmem path in that case.
@@ -1005,7 +1005,8 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
                        obj->ops->adjust_lru(obj);
        }
 
-       if (i915_gem_object_has_pages(obj)) {
+       if (i915_gem_object_has_pages(obj) ||
+           i915_gem_object_has_self_managed_shrink_list(obj)) {
                unsigned long flags;
 
                spin_lock_irqsave(&i915->mm.obj_lock, flags);
index 77680bc..038a9ec 100644 (file)
@@ -2772,7 +2772,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
 {
        struct drm_i915_private * const i915 = arg;
        struct intel_gt *gt = &i915->gt;
-       void __iomem * const regs = i915->uncore.regs;
+       void __iomem * const regs = gt->uncore->regs;
        u32 master_tile_ctl, master_ctl;
        u32 gu_misc_iir;
 
@@ -3173,11 +3173,12 @@ static void gen11_display_irq_reset(struct drm_i915_private *dev_priv)
 
 static void gen11_irq_reset(struct drm_i915_private *dev_priv)
 {
-       struct intel_uncore *uncore = &dev_priv->uncore;
+       struct intel_gt *gt = &dev_priv->gt;
+       struct intel_uncore *uncore = gt->uncore;
 
        gen11_master_intr_disable(dev_priv->uncore.regs);
 
-       gen11_gt_irq_reset(&dev_priv->gt);
+       gen11_gt_irq_reset(gt);
        gen11_display_irq_reset(dev_priv);
 
        GEN3_IRQ_RESET(uncore, GEN11_GU_MISC_);
@@ -3186,11 +3187,12 @@ static void gen11_irq_reset(struct drm_i915_private *dev_priv)
 
 static void dg1_irq_reset(struct drm_i915_private *dev_priv)
 {
-       struct intel_uncore *uncore = &dev_priv->uncore;
+       struct intel_gt *gt = &dev_priv->gt;
+       struct intel_uncore *uncore = gt->uncore;
 
        dg1_master_intr_disable(dev_priv->uncore.regs);
 
-       gen11_gt_irq_reset(&dev_priv->gt);
+       gen11_gt_irq_reset(gt);
        gen11_display_irq_reset(dev_priv);
 
        GEN3_IRQ_RESET(uncore, GEN11_GU_MISC_);
@@ -3869,13 +3871,14 @@ static void gen11_de_irq_postinstall(struct drm_i915_private *dev_priv)
 
 static void gen11_irq_postinstall(struct drm_i915_private *dev_priv)
 {
-       struct intel_uncore *uncore = &dev_priv->uncore;
+       struct intel_gt *gt = &dev_priv->gt;
+       struct intel_uncore *uncore = gt->uncore;
        u32 gu_misc_masked = GEN11_GU_MISC_GSE;
 
        if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP)
                icp_irq_postinstall(dev_priv);
 
-       gen11_gt_irq_postinstall(&dev_priv->gt);
+       gen11_gt_irq_postinstall(gt);
        gen11_de_irq_postinstall(dev_priv);
 
        GEN3_IRQ_INIT(uncore, GEN11_GU_MISC_, ~gu_misc_masked, gu_misc_masked);
@@ -3886,10 +3889,11 @@ static void gen11_irq_postinstall(struct drm_i915_private *dev_priv)
 
 static void dg1_irq_postinstall(struct drm_i915_private *dev_priv)
 {
-       struct intel_uncore *uncore = &dev_priv->uncore;
+       struct intel_gt *gt = &dev_priv->gt;
+       struct intel_uncore *uncore = gt->uncore;
        u32 gu_misc_masked = GEN11_GU_MISC_GSE;
 
-       gen11_gt_irq_postinstall(&dev_priv->gt);
+       gen11_gt_irq_postinstall(gt);
 
        GEN3_IRQ_INIT(uncore, GEN11_GU_MISC_, ~gu_misc_masked, gu_misc_masked);
 
@@ -3900,8 +3904,8 @@ static void dg1_irq_postinstall(struct drm_i915_private *dev_priv)
                                   GEN11_DISPLAY_IRQ_ENABLE);
        }
 
-       dg1_master_intr_enable(dev_priv->uncore.regs);
-       intel_uncore_posting_read(&dev_priv->uncore, DG1_MSTR_TILE_INTR);
+       dg1_master_intr_enable(uncore->regs);
+       intel_uncore_posting_read(uncore, DG1_MSTR_TILE_INTR);
 }
 
 static void cherryview_irq_postinstall(struct drm_i915_private *dev_priv)
index 169837d..5e67958 100644 (file)
@@ -32,8 +32,8 @@
 
 #define PLATFORM(x) .platform = (x)
 #define GEN(x) \
-       .graphics_ver = (x), \
-       .media_ver = (x), \
+       .graphics.ver = (x), \
+       .media.ver = (x), \
        .display.ver = (x)
 
 #define I845_PIPE_OFFSETS \
@@ -899,7 +899,7 @@ static const struct intel_device_info rkl_info = {
 static const struct intel_device_info dg1_info = {
        GEN12_FEATURES,
        DGFX_FEATURES,
-       .graphics_rel = 10,
+       .graphics.rel = 10,
        PLATFORM(INTEL_DG1),
        .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D),
        .require_force_probe = 1,
@@ -986,8 +986,8 @@ static const struct intel_device_info adl_p_info = {
                      I915_GTT_PAGE_SIZE_2M
 
 #define XE_HP_FEATURES \
-       .graphics_ver = 12, \
-       .graphics_rel = 50, \
+       .graphics.ver = 12, \
+       .graphics.rel = 50, \
        XE_HP_PAGE_SIZES, \
        .dma_mask_size = 46, \
        .has_64bit_reloc = 1, \
@@ -1005,8 +1005,8 @@ static const struct intel_device_info adl_p_info = {
        .ppgtt_type = INTEL_PPGTT_FULL
 
 #define XE_HPM_FEATURES \
-       .media_ver = 12, \
-       .media_rel = 50
+       .media.ver = 12, \
+       .media.rel = 50
 
 __maybe_unused
 static const struct intel_device_info xehpsdv_info = {
@@ -1030,8 +1030,8 @@ static const struct intel_device_info dg2_info = {
        XE_HPM_FEATURES,
        XE_LPD_FEATURES,
        DGFX_FEATURES,
-       .graphics_rel = 55,
-       .media_rel = 55,
+       .graphics.rel = 55,
+       .media.rel = 55,
        PLATFORM(INTEL_DG2),
        .platform_engine_mask =
                BIT(RCS0) | BIT(BCS0) |
index bcee121..1891e7f 100644 (file)
@@ -498,6 +498,18 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define   ECOBITS_PPGTT_CACHE64B       (3 << 8)
 #define   ECOBITS_PPGTT_CACHE4B                (0 << 8)
 
+#define GEN12_GAMCNTRL_CTRL                    _MMIO(0xcf54)
+#define   INVALIDATION_BROADCAST_MODE_DIS      REG_BIT(12)
+#define   GLOBAL_INVALIDATION_MODE             REG_BIT(2)
+
+#define GEN12_GAMSTLB_CTRL             _MMIO(0xcf4c)
+#define   CONTROL_BLOCK_CLKGATE_DIS    REG_BIT(12)
+#define   EGRESS_BLOCK_CLKGATE_DIS     REG_BIT(11)
+#define   TAG_BLOCK_CLKGATE_DIS                REG_BIT(7)
+
+#define GEN12_MERT_MOD_CTRL            _MMIO(0xcf28)
+#define   FORCE_MISS_FTLB              REG_BIT(3)
+
 #define GAB_CTL                                _MMIO(0x24000)
 #define   GAB_CTL_CONT_AFTER_PAGEFAULT (1 << 8)
 
@@ -719,6 +731,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 
 #define GEN12_OA_TLB_INV_CR _MMIO(0xceec)
 
+#define GEN12_SQCM             _MMIO(0x8724)
+#define   EN_32B_ACCESS                REG_BIT(30)
+
 /* Gen12 OAR unit */
 #define GEN12_OAR_OACONTROL _MMIO(0x2960)
 #define  GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT 1
@@ -770,6 +785,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define EU_PERF_CNTL5      _MMIO(0xe55c)
 #define EU_PERF_CNTL6      _MMIO(0xe65c)
 
+#define RT_CTRL                        _MMIO(0xe530)
+#define  DIS_NULL_QUERY                REG_BIT(10)
+
 /*
  * OA Boolean state
  */
@@ -2662,6 +2680,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define   RING_WAIT            (1 << 11) /* gen3+, PRBx_CTL */
 #define   RING_WAIT_SEMAPHORE  (1 << 10) /* gen6+ */
 
+#define GUCPMTIMESTAMP          _MMIO(0xC3E8)
+
 /* There are 16 64-bit CS General Purpose Registers per-engine on Gen8+ */
 #define GEN8_RING_CS_GPR(base, n)      _MMIO((base) + 0x600 + (n) * 8)
 #define GEN8_RING_CS_GPR_UDW(base, n)  _MMIO((base) + 0x600 + (n) * 8 + 4)
@@ -2772,6 +2792,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define VDBOX_CGCTL3F10(base)          _MMIO((base) + 0x3f10)
 #define   IECPUNIT_CLKGATE_DIS         REG_BIT(22)
 
+#define VDBOX_CGCTL3F18(base)          _MMIO((base) + 0x3f18)
+#define   ALNUNIT_CLKGATE_DIS          REG_BIT(13)
+
 #define ERROR_GEN6     _MMIO(0x40a0)
 #define GEN7_ERR_INT   _MMIO(0x44040)
 #define   ERR_INT_POISON               (1 << 31)
@@ -2870,6 +2893,15 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define   GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE (1 << 2)
 #define   GEN11_ENABLE_32_PLANE_MODE (1 << 7)
 
+#define SCCGCTL94DC            _MMIO(0x94dc)
+#define   CG3DDISURB           REG_BIT(14)
+
+#define MLTICTXCTL             _MMIO(0xb170)
+#define   TDONRENDER           REG_BIT(2)
+
+#define L3SQCREG1_CCS0         _MMIO(0xb200)
+#define   FLUSHALLNONCOH       REG_BIT(5)
+
 /* WaClearTdlStateAckDirtyBits */
 #define GEN8_STATE_ACK         _MMIO(0x20F0)
 #define GEN9_STATE_ACK_SLICE1  _MMIO(0x20F8)
@@ -3106,7 +3138,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define GEN9_RCS_FE_FSM2 _MMIO(0x22a4)
 
 #define GEN10_CACHE_MODE_SS                    _MMIO(0xe420)
-#define   FLOAT_BLEND_OPTIMIZATION_ENABLE      (1 << 4)
+#define   ENABLE_PREFETCH_INTO_IC              REG_BIT(3)
+#define   FLOAT_BLEND_OPTIMIZATION_ENABLE      REG_BIT(4)
 
 /* Fuse readout registers for GT */
 #define HSW_PAVP_FUSE1                 _MMIO(0x911C)
@@ -4278,21 +4311,62 @@ enum {
 /*
  * GEN10 clock gating regs
  */
+
+#define UNSLCGCTL9440                  _MMIO(0x9440)
+#define   GAMTLBOACS_CLKGATE_DIS       REG_BIT(28)
+#define   GAMTLBVDBOX5_CLKGATE_DIS     REG_BIT(27)
+#define   GAMTLBVDBOX6_CLKGATE_DIS     REG_BIT(26)
+#define   GAMTLBVDBOX3_CLKGATE_DIS     REG_BIT(24)
+#define   GAMTLBVDBOX4_CLKGATE_DIS     REG_BIT(23)
+#define   GAMTLBVDBOX7_CLKGATE_DIS     REG_BIT(22)
+#define   GAMTLBVDBOX2_CLKGATE_DIS     REG_BIT(21)
+#define   GAMTLBVDBOX0_CLKGATE_DIS     REG_BIT(17)
+#define   GAMTLBKCR_CLKGATE_DIS                REG_BIT(16)
+#define   GAMTLBGUC_CLKGATE_DIS                REG_BIT(15)
+#define   GAMTLBBLT_CLKGATE_DIS                REG_BIT(14)
+#define   GAMTLBVDBOX1_CLKGATE_DIS     REG_BIT(6)
+
+#define UNSLCGCTL9444                  _MMIO(0x9444)
+#define   GAMTLBGFXA0_CLKGATE_DIS      REG_BIT(30)
+#define   GAMTLBGFXA1_CLKGATE_DIS      REG_BIT(29)
+#define   GAMTLBCOMPA0_CLKGATE_DIS     REG_BIT(28)
+#define   GAMTLBCOMPA1_CLKGATE_DIS     REG_BIT(27)
+#define   GAMTLBCOMPB0_CLKGATE_DIS     REG_BIT(26)
+#define   GAMTLBCOMPB1_CLKGATE_DIS     REG_BIT(25)
+#define   GAMTLBCOMPC0_CLKGATE_DIS     REG_BIT(24)
+#define   GAMTLBCOMPC1_CLKGATE_DIS     REG_BIT(23)
+#define   GAMTLBCOMPD0_CLKGATE_DIS     REG_BIT(22)
+#define   GAMTLBCOMPD1_CLKGATE_DIS     REG_BIT(21)
+#define   GAMTLBMERT_CLKGATE_DIS       REG_BIT(20)
+#define   GAMTLBVEBOX3_CLKGATE_DIS     REG_BIT(19)
+#define   GAMTLBVEBOX2_CLKGATE_DIS     REG_BIT(18)
+#define   GAMTLBVEBOX1_CLKGATE_DIS     REG_BIT(17)
+#define   GAMTLBVEBOX0_CLKGATE_DIS     REG_BIT(16)
+#define   LTCDD_CLKGATE_DIS            REG_BIT(10)
+
 #define SLICE_UNIT_LEVEL_CLKGATE       _MMIO(0x94d4)
 #define  SARBUNIT_CLKGATE_DIS          (1 << 5)
 #define  RCCUNIT_CLKGATE_DIS           (1 << 7)
 #define  MSCUNIT_CLKGATE_DIS           (1 << 10)
+#define  NODEDSS_CLKGATE_DIS           REG_BIT(12)
 #define  L3_CLKGATE_DIS                        REG_BIT(16)
 #define  L3_CR2X_CLKGATE_DIS           REG_BIT(17)
 
 #define SUBSLICE_UNIT_LEVEL_CLKGATE    _MMIO(0x9524)
-#define  GWUNIT_CLKGATE_DIS            (1 << 16)
+#define   DSS_ROUTER_CLKGATE_DIS       REG_BIT(28)
+#define   GWUNIT_CLKGATE_DIS           REG_BIT(16)
 
 #define SUBSLICE_UNIT_LEVEL_CLKGATE2   _MMIO(0x9528)
 #define  CPSSUNIT_CLKGATE_DIS          REG_BIT(9)
 
+#define SSMCGCTL9530                   _MMIO(0x9530)
+#define   RTFUNIT_CLKGATE_DIS          REG_BIT(18)
+
 #define UNSLICE_UNIT_LEVEL_CLKGATE     _MMIO(0x9434)
 #define   VFUNIT_CLKGATE_DIS           REG_BIT(20)
+#define   TSGUNIT_CLKGATE_DIS          REG_BIT(17) /* XEHPSDV */
+#define   CG3DDISCFEG_CLKGATE_DIS      REG_BIT(17) /* DG2 */
+#define   GAMEDIA_CLKGATE_DIS          REG_BIT(11)
 #define   HSUNIT_CLKGATE_DIS           REG_BIT(8)
 #define   VSUNIT_CLKGATE_DIS           REG_BIT(3)
 
@@ -8351,6 +8425,9 @@ enum {
 #define GEN9_CTX_PREEMPT_REG           _MMIO(0x2248)
 #define   GEN12_DISABLE_POSH_BUSY_FF_DOP_CG REG_BIT(11)
 
+#define GEN12_CS_DEBUG_MODE1_CCCSUNIT_BE_COMMON                _MMIO(0x20EC)
+#define   GEN12_REPLAY_MODE_GRANULARITY                        REG_BIT(0)
+
 #define GEN8_CS_CHICKEN1               _MMIO(0x2580)
 #define GEN9_PREEMPT_3D_OBJECT_LEVEL           (1 << 0)
 #define GEN9_PREEMPT_GPGPU_LEVEL(hi, lo)       (((hi) << 2) | ((lo) << 1))
@@ -8374,9 +8451,10 @@ enum {
   #define GEN8_ERRDETBCTRL (1 << 9)
 
 #define GEN11_COMMON_SLICE_CHICKEN3                    _MMIO(0x7304)
-  #define DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN     REG_BIT(12)
-  #define GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC           REG_BIT(11)
-  #define GEN12_DISABLE_CPS_AWARE_COLOR_PIPE           REG_BIT(9)
+#define   DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN     REG_BIT(12)
+#define   XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE            REG_BIT(12)
+#define   GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC           REG_BIT(11)
+#define   GEN12_DISABLE_CPS_AWARE_COLOR_PIPE           REG_BIT(9)
 
 #define HIZ_CHICKEN                                    _MMIO(0x7018)
 # define CHV_HZ_8X8_MODE_IN_1X                         REG_BIT(15)
@@ -8430,6 +8508,12 @@ enum {
 #define  GEN8_LQSC_FLUSH_COHERENT_LINES                (1 << 21)
 #define  GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE REG_BIT(22)
 
+#define GEN11_L3SQCREG5                                _MMIO(0xb158)
+#define   L3_PWM_TIMER_INIT_VAL_MASK           REG_GENMASK(9, 0)
+
+#define XEHP_L3SCQREG7                         _MMIO(0xb188)
+#define   BLEND_FILL_CACHING_OPT_DIS           REG_BIT(3)
+
 /* GEN8 chicken */
 #define HDC_CHICKEN0                           _MMIO(0x7300)
 #define ICL_HDC_MODE                           _MMIO(0xE5F4)
@@ -8440,6 +8524,12 @@ enum {
 #define  HDC_FORCE_NON_COHERENT                        (1 << 4)
 #define  HDC_BARRIER_PERFORMANCE_DISABLE       (1 << 10)
 
+#define GEN12_HDC_CHICKEN0                                     _MMIO(0xE5F0)
+#define   LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK       REG_GENMASK(13, 11)
+
+#define SARB_CHICKEN1                          _MMIO(0xe90c)
+#define   COMP_CKN_IN                          REG_GENMASK(30, 29)
+
 #define GEN8_HDC_CHICKEN1                      _MMIO(0x7304)
 
 /* GEN9 chicken */
@@ -8470,6 +8560,10 @@ enum {
 #define   PIXEL_ROUNDING_TRUNC_FB_PASSTHRU     (1 << 15)
 #define   PER_PIXEL_ALPHA_BYPASS_EN            (1 << 7)
 
+#define VFLSKPD                                _MMIO(0x62a8)
+#define   DIS_OVER_FETCH_CACHE         REG_BIT(1)
+#define   DIS_MULT_MISS_RD_SQUASH      REG_BIT(0)
+
 #define FF_MODE2                       _MMIO(0x6604)
 #define   FF_MODE2_GS_TIMER_MASK       REG_GENMASK(31, 24)
 #define   FF_MODE2_GS_TIMER_224                REG_FIELD_PREP(FF_MODE2_GS_TIMER_MASK, 224)
@@ -9293,6 +9387,9 @@ enum {
 #define   GEN8_SDEUNIT_CLOCK_GATE_DISABLE      (1 << 14)
 #define   GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ (1 << 28)
 
+#define UNSLCGCTL9430                          _MMIO(0x9430)
+#define   MSQDUNIT_CLKGATE_DIS                 REG_BIT(3)
+
 #define GEN6_GFXPAUSE                          _MMIO(0xA000)
 #define GEN6_RPNSWREQ                          _MMIO(0xA008)
 #define   GEN6_TURBO_DISABLE                   (1 << 31)
@@ -9608,24 +9705,39 @@ enum {
 #define   GEN9_CCS_TLB_PREFETCH_ENABLE (1 << 3)
 
 #define GEN8_ROW_CHICKEN               _MMIO(0xe4f0)
-#define   FLOW_CONTROL_ENABLE          (1 << 15)
-#define   PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE        (1 << 8)
-#define   STALL_DOP_GATING_DISABLE             (1 << 5)
-#define   THROTTLE_12_5                                (7 << 2)
-#define   DISABLE_EARLY_EOT                    (1 << 1)
+#define   FLOW_CONTROL_ENABLE                  REG_BIT(15)
+#define   UGM_BACKUP_MODE                      REG_BIT(13)
+#define   MDQ_ARBITRATION_MODE                 REG_BIT(12)
+#define   PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE        REG_BIT(8)
+#define   STALL_DOP_GATING_DISABLE             REG_BIT(5)
+#define   THROTTLE_12_5                                REG_GENMASK(4, 2)
+#define   DISABLE_EARLY_EOT                    REG_BIT(1)
 
 #define GEN7_ROW_CHICKEN2                      _MMIO(0xe4f4)
+#define   GEN12_DISABLE_READ_SUPPRESSION       REG_BIT(15)
 #define   GEN12_DISABLE_EARLY_READ             REG_BIT(14)
+#define   GEN12_ENABLE_LARGE_GRF_MODE          REG_BIT(12)
 #define   GEN12_PUSH_CONST_DEREF_HOLD_DIS      REG_BIT(8)
 
+#define LSC_CHICKEN_BIT_0                      _MMIO(0xe7c8)
+#define   FORCE_1_SUB_MESSAGE_PER_FRAGMENT     REG_BIT(15)
+#define LSC_CHICKEN_BIT_0_UDW                  _MMIO(0xe7c8 + 4)
+#define   DIS_CHAIN_2XSIMD8                    REG_BIT(55 - 32)
+#define   FORCE_SLM_FENCE_SCOPE_TO_TILE                REG_BIT(42 - 32)
+#define   FORCE_UGM_FENCE_SCOPE_TO_TILE                REG_BIT(41 - 32)
+#define   MAXREQS_PER_BANK                     REG_GENMASK(39 - 32, 37 - 32)
+#define   DISABLE_128B_EVICTION_COMMAND_UDW    REG_BIT(36 - 32)
+
 #define GEN7_ROW_CHICKEN2_GT2          _MMIO(0xf4f4)
 #define   DOP_CLOCK_GATING_DISABLE     (1 << 0)
 #define   PUSH_CONSTANT_DEREF_DISABLE  (1 << 8)
 #define   GEN11_TDL_CLOCK_GATING_FIX_DISABLE   (1 << 1)
 
-#define GEN9_ROW_CHICKEN4              _MMIO(0xe48c)
-#define   GEN12_DISABLE_TDL_PUSH       REG_BIT(9)
-#define   GEN11_DIS_PICK_2ND_EU                REG_BIT(7)
+#define GEN9_ROW_CHICKEN4                              _MMIO(0xe48c)
+#define   GEN12_DISABLE_GRF_CLEAR                      REG_BIT(13)
+#define   GEN12_DISABLE_TDL_PUSH                       REG_BIT(9)
+#define   GEN11_DIS_PICK_2ND_EU                                REG_BIT(7)
+#define   GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX      REG_BIT(4)
 
 #define HSW_ROW_CHICKEN3               _MMIO(0xe49c)
 #define  HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE    (1 << 6)
@@ -9640,9 +9752,10 @@ enum {
 #define   GEN8_SAMPLER_POWER_BYPASS_DIS        (1 << 1)
 
 #define GEN9_HALF_SLICE_CHICKEN7       _MMIO(0xe194)
-#define   GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR       (1 << 8)
-#define   GEN9_ENABLE_YV12_BUGFIX      (1 << 4)
-#define   GEN9_ENABLE_GPGPU_PREEMPTION (1 << 2)
+#define   DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA      REG_BIT(15)
+#define   GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR       REG_BIT(8)
+#define   GEN9_ENABLE_YV12_BUGFIX                      REG_BIT(4)
+#define   GEN9_ENABLE_GPGPU_PREEMPTION                 REG_BIT(2)
 
 /* Audio */
 #define G4X_AUD_VID_DID                        _MMIO(DISPLAY_MMIO_BASE(dev_priv) + 0x62020)
@@ -12466,11 +12579,19 @@ enum skl_power_gate {
 #define   PMFLUSH_GAPL3UNBLOCK         (1 << 21)
 #define   PMFLUSHDONE_LNEBLK           (1 << 22)
 
+#define XEHP_L3NODEARBCFG              _MMIO(0xb0b4)
+#define   XEHP_LNESPARE                        REG_BIT(19)
+
 #define GEN12_GLOBAL_MOCS(i)   _MMIO(0x4000 + (i) * 4) /* Global MOCS regs */
 
 #define GEN12_GSMBASE                  _MMIO(0x108100)
 #define GEN12_DSMBASE                  _MMIO(0x1080C0)
 
+#define XEHP_CLOCK_GATE_DIS            _MMIO(0x101014)
+#define   SGSI_SIDECLK_DIS             REG_BIT(17)
+#define   SGGI_DIS                     REG_BIT(15)
+#define   SGR_DIS                      REG_BIT(13)
+
 /* gamt regs */
 #define GEN8_L3_LRA_1_GPGPU _MMIO(0x4dd4)
 #define   GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW  0x67F1427F /* max/min for LRA1/2 */
@@ -12847,4 +12968,7 @@ enum skl_power_gate {
 #define CLKGATE_DIS_MISC                       _MMIO(0x46534)
 #define  CLKGATE_DIS_MISC_DMASC_GATING_DIS     REG_BIT(21)
 
+#define SLICE_COMMON_ECO_CHICKEN1              _MMIO(0x731C)
+#define   MSC_MSAA_REODER_BUF_BYPASS_DISABLE   REG_BIT(14)
+
 #endif /* _I915_REG_H_ */
index 820a1f3..623273a 100644 (file)
@@ -339,7 +339,7 @@ bool i915_request_retire(struct i915_request *rq)
        }
 
        if (test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags))
-               atomic_dec(&rq->engine->gt->rps.num_waiters);
+               intel_rps_dec_waiters(&rq->engine->gt->rps);
 
        /*
         * We only loosely track inflight requests across preemption,
index 4a6712d..41f2adb 100644 (file)
@@ -41,8 +41,32 @@ bool i915_sg_trim(struct sg_table *orig_st)
        return true;
 }
 
+static void i915_refct_sgt_release(struct kref *ref)
+{
+       struct i915_refct_sgt *rsgt =
+               container_of(ref, typeof(*rsgt), kref);
+
+       sg_free_table(&rsgt->table);
+       kfree(rsgt);
+}
+
+static const struct i915_refct_sgt_ops rsgt_ops = {
+       .release = i915_refct_sgt_release
+};
+
+/**
+ * i915_refct_sgt_init - Initialize a struct i915_refct_sgt with default ops
+ * @rsgt: The struct i915_refct_sgt to initialize.
+ * size: The size of the underlying memory buffer.
+ */
+void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size)
+{
+       __i915_refct_sgt_init(rsgt, size, &rsgt_ops);
+}
+
 /**
- * i915_sg_from_mm_node - Create an sg_table from a struct drm_mm_node
+ * i915_rsgt_from_mm_node - Create a refcounted sg_table from a struct
+ * drm_mm_node
  * @node: The drm_mm_node.
  * @region_start: An offset to add to the dma addresses of the sg list.
  *
@@ -50,25 +74,28 @@ bool i915_sg_trim(struct sg_table *orig_st)
  * taking a maximum segment length into account, splitting into segments
  * if necessary.
  *
- * Return: A pointer to a kmalloced struct sg_table on success, negative
+ * Return: A pointer to a kmalloced struct i915_refct_sgt on success, negative
  * error code cast to an error pointer on failure.
  */
-struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node,
-                                     u64 region_start)
+struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
+                                             u64 region_start)
 {
        const u64 max_segment = SZ_1G; /* Do we have a limit on this? */
        u64 segment_pages = max_segment >> PAGE_SHIFT;
        u64 block_size, offset, prev_end;
+       struct i915_refct_sgt *rsgt;
        struct sg_table *st;
        struct scatterlist *sg;
 
-       st = kmalloc(sizeof(*st), GFP_KERNEL);
-       if (!st)
+       rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL);
+       if (!rsgt)
                return ERR_PTR(-ENOMEM);
 
+       i915_refct_sgt_init(rsgt, node->size << PAGE_SHIFT);
+       st = &rsgt->table;
        if (sg_alloc_table(st, DIV_ROUND_UP(node->size, segment_pages),
                           GFP_KERNEL)) {
-               kfree(st);
+               i915_refct_sgt_put(rsgt);
                return ERR_PTR(-ENOMEM);
        }
 
@@ -104,11 +131,11 @@ struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node,
        sg_mark_end(sg);
        i915_sg_trim(st);
 
-       return st;
+       return rsgt;
 }
 
 /**
- * i915_sg_from_buddy_resource - Create an sg_table from a struct
+ * i915_rsgt_from_buddy_resource - Create a refcounted sg_table from a struct
  * i915_buddy_block list
  * @res: The struct i915_ttm_buddy_resource.
  * @region_start: An offset to add to the dma addresses of the sg list.
@@ -117,11 +144,11 @@ struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node,
  * taking a maximum segment length into account, splitting into segments
  * if necessary.
  *
- * Return: A pointer to a kmalloced struct sg_table on success, negative
+ * Return: A pointer to a kmalloced struct i915_refct_sgts on success, negative
  * error code cast to an error pointer on failure.
  */
-struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource *res,
-                                            u64 region_start)
+struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
+                                                    u64 region_start)
 {
        struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res);
        const u64 size = res->num_pages << PAGE_SHIFT;
@@ -129,18 +156,21 @@ struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource *res,
        struct i915_buddy_mm *mm = bman_res->mm;
        struct list_head *blocks = &bman_res->blocks;
        struct i915_buddy_block *block;
+       struct i915_refct_sgt *rsgt;
        struct scatterlist *sg;
        struct sg_table *st;
        resource_size_t prev_end;
 
        GEM_BUG_ON(list_empty(blocks));
 
-       st = kmalloc(sizeof(*st), GFP_KERNEL);
-       if (!st)
+       rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL);
+       if (!rsgt)
                return ERR_PTR(-ENOMEM);
 
+       i915_refct_sgt_init(rsgt, size);
+       st = &rsgt->table;
        if (sg_alloc_table(st, res->num_pages, GFP_KERNEL)) {
-               kfree(st);
+               i915_refct_sgt_put(rsgt);
                return ERR_PTR(-ENOMEM);
        }
 
@@ -181,7 +211,7 @@ struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource *res,
        sg_mark_end(sg);
        i915_sg_trim(st);
 
-       return st;
+       return rsgt;
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
index b8bd592..12c6a16 100644 (file)
@@ -144,10 +144,78 @@ static inline unsigned int i915_sg_segment_size(void)
 
 bool i915_sg_trim(struct sg_table *orig_st);
 
-struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node,
-                                     u64 region_start);
+/**
+ * struct i915_refct_sgt_ops - Operations structure for struct i915_refct_sgt
+ */
+struct i915_refct_sgt_ops {
+       /**
+        * release() - Free the memory of the struct i915_refct_sgt
+        * @ref: struct kref that is embedded in the struct i915_refct_sgt
+        */
+       void (*release)(struct kref *ref);
+};
+
+/**
+ * struct i915_refct_sgt - A refcounted scatter-gather table
+ * @kref: struct kref for refcounting
+ * @table: struct sg_table holding the scatter-gather table itself. Note that
+ * @table->sgl = NULL can be used to determine whether a scatter-gather table
+ * is present or not.
+ * @size: The size in bytes of the underlying memory buffer
+ * @ops: The operations structure.
+ */
+struct i915_refct_sgt {
+       struct kref kref;
+       struct sg_table table;
+       size_t size;
+       const struct i915_refct_sgt_ops *ops;
+};
+
+/**
+ * i915_refct_sgt_put - Put a refcounted sg-table
+ * @rsgt the struct i915_refct_sgt to put.
+ */
+static inline void i915_refct_sgt_put(struct i915_refct_sgt *rsgt)
+{
+       if (rsgt)
+               kref_put(&rsgt->kref, rsgt->ops->release);
+}
+
+/**
+ * i915_refct_sgt_get - Get a refcounted sg-table
+ * @rsgt the struct i915_refct_sgt to get.
+ */
+static inline struct i915_refct_sgt *
+i915_refct_sgt_get(struct i915_refct_sgt *rsgt)
+{
+       kref_get(&rsgt->kref);
+       return rsgt;
+}
+
+/**
+ * __i915_refct_sgt_init - Initialize a refcounted sg-list with a custom
+ * operations structure
+ * @rsgt The struct i915_refct_sgt to initialize.
+ * @size: Size in bytes of the underlying memory buffer.
+ * @ops: A customized operations structure in case the refcounted sg-list
+ * is embedded into another structure.
+ */
+static inline void __i915_refct_sgt_init(struct i915_refct_sgt *rsgt,
+                                        size_t size,
+                                        const struct i915_refct_sgt_ops *ops)
+{
+       kref_init(&rsgt->kref);
+       rsgt->table.sgl = NULL;
+       rsgt->size = size;
+       rsgt->ops = ops;
+}
+
+void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size);
+
+struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
+                                             u64 region_start);
 
-struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource *res,
-                                            u64 region_start);
+struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
+                                                    u64 region_start);
 
 #endif
index 1804f41..59d441c 100644 (file)
@@ -279,7 +279,7 @@ static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribu
        struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
        struct intel_rps *rps = &i915->gt.rps;
 
-       return sysfs_emit(buf, "%d\n", intel_gpu_freq(rps, rps->boost_freq));
+       return sysfs_emit(buf, "%d\n", intel_rps_get_boost_frequency(rps));
 }
 
 static ssize_t gt_boost_freq_mhz_store(struct device *kdev,
@@ -288,7 +288,6 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev,
 {
        struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
        struct intel_rps *rps = &dev_priv->gt.rps;
-       bool boost = false;
        ssize_t ret;
        u32 val;
 
@@ -296,21 +295,9 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev,
        if (ret)
                return ret;
 
-       /* Validate against (static) hardware limits */
-       val = intel_freq_opcode(rps, val);
-       if (val < rps->min_freq || val > rps->max_freq)
-               return -EINVAL;
-
-       mutex_lock(&rps->lock);
-       if (val != rps->boost_freq) {
-               rps->boost_freq = val;
-               boost = atomic_read(&rps->num_waiters);
-       }
-       mutex_unlock(&rps->lock);
-       if (boost)
-               schedule_work(&rps->work);
+       ret = intel_rps_set_boost_frequency(rps, val);
 
-       return count;
+       return ret ?: count;
 }
 
 static ssize_t vlv_rpe_freq_mhz_show(struct device *kdev,
index bef795e..e2f2c4c 100644 (file)
 
 static struct kmem_cache *slab_vmas;
 
-struct i915_vma *i915_vma_alloc(void)
+static struct i915_vma *i915_vma_alloc(void)
 {
        return kmem_cache_zalloc(slab_vmas, GFP_KERNEL);
 }
 
-void i915_vma_free(struct i915_vma *vma)
+static void i915_vma_free(struct i915_vma *vma)
 {
        return kmem_cache_free(slab_vmas, vma);
 }
@@ -113,7 +113,6 @@ vma_create(struct drm_i915_gem_object *obj,
        vma->vm = i915_vm_get(vm);
        vma->ops = &vm->vma_ops;
        vma->obj = obj;
-       vma->resv = obj->base.resv;
        vma->size = obj->base.size;
        vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
 
@@ -346,7 +345,7 @@ int i915_vma_wait_for_bind(struct i915_vma *vma)
                fence = dma_fence_get_rcu_safe(&vma->active.excl.fence);
                rcu_read_unlock();
                if (fence) {
-                       err = dma_fence_wait(fence, MAX_SCHEDULE_TIMEOUT);
+                       err = dma_fence_wait(fence, true);
                        dma_fence_put(fence);
                }
        }
@@ -423,10 +422,8 @@ int i915_vma_bind(struct i915_vma *vma,
 
                work->base.dma.error = 0; /* enable the queue_work() */
 
-               if (vma->obj) {
-                       __i915_gem_object_pin_pages(vma->obj);
-                       work->pinned = i915_gem_object_get(vma->obj);
-               }
+               __i915_gem_object_pin_pages(vma->obj);
+               work->pinned = i915_gem_object_get(vma->obj);
        } else {
                vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags);
        }
@@ -667,7 +664,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
        }
 
        color = 0;
-       if (vma->obj && i915_vm_has_cache_coloring(vma->vm))
+       if (i915_vm_has_cache_coloring(vma->vm))
                color = vma->obj->cache_level;
 
        if (flags & PIN_OFFSET_FIXED) {
@@ -792,17 +789,14 @@ unpinned:
 static int vma_get_pages(struct i915_vma *vma)
 {
        int err = 0;
-       bool pinned_pages = false;
+       bool pinned_pages = true;
 
        if (atomic_add_unless(&vma->pages_count, 1, 0))
                return 0;
 
-       if (vma->obj) {
-               err = i915_gem_object_pin_pages(vma->obj);
-               if (err)
-                       return err;
-               pinned_pages = true;
-       }
+       err = i915_gem_object_pin_pages(vma->obj);
+       if (err)
+               return err;
 
        /* Allocations ahoy! */
        if (mutex_lock_interruptible(&vma->pages_mutex)) {
@@ -835,8 +829,8 @@ static void __vma_put_pages(struct i915_vma *vma, unsigned int count)
        if (atomic_sub_return(count, &vma->pages_count) == 0) {
                vma->ops->clear_pages(vma);
                GEM_BUG_ON(vma->pages);
-               if (vma->obj)
-                       i915_gem_object_unpin_pages(vma->obj);
+
+               i915_gem_object_unpin_pages(vma->obj);
        }
        mutex_unlock(&vma->pages_mutex);
 }
@@ -872,7 +866,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
        int err;
 
 #ifdef CONFIG_PROVE_LOCKING
-       if (debug_locks && !WARN_ON(!ww) && vma->resv)
+       if (debug_locks && !WARN_ON(!ww))
                assert_vma_held(vma);
 #endif
 
@@ -980,7 +974,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 
        GEM_BUG_ON(!vma->pages);
        err = i915_vma_bind(vma,
-                           vma->obj ? vma->obj->cache_level : 0,
+                           vma->obj->cache_level,
                            flags, work);
        if (err)
                goto err_remove;
@@ -1034,7 +1028,7 @@ int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
        GEM_BUG_ON(!i915_vma_is_ggtt(vma));
 
 #ifdef CONFIG_LOCKDEP
-       WARN_ON(!ww && vma->resv && dma_resv_held(vma->resv));
+       WARN_ON(!ww && dma_resv_held(vma->obj->base.resv));
 #endif
 
        do {
@@ -1113,6 +1107,7 @@ void i915_vma_reopen(struct i915_vma *vma)
 void i915_vma_release(struct kref *ref)
 {
        struct i915_vma *vma = container_of(ref, typeof(*vma), ref);
+       struct drm_i915_gem_object *obj = vma->obj;
 
        if (drm_mm_node_allocated(&vma->node)) {
                mutex_lock(&vma->vm->mutex);
@@ -1123,15 +1118,11 @@ void i915_vma_release(struct kref *ref)
        }
        GEM_BUG_ON(i915_vma_is_active(vma));
 
-       if (vma->obj) {
-               struct drm_i915_gem_object *obj = vma->obj;
-
-               spin_lock(&obj->vma.lock);
-               list_del(&vma->obj_link);
-               if (!RB_EMPTY_NODE(&vma->obj_node))
-                       rb_erase(&vma->obj_node, &obj->vma.tree);
-               spin_unlock(&obj->vma.lock);
-       }
+       spin_lock(&obj->vma.lock);
+       list_del(&vma->obj_link);
+       if (!RB_EMPTY_NODE(&vma->obj_node))
+               rb_erase(&vma->obj_node, &obj->vma.tree);
+       spin_unlock(&obj->vma.lock);
 
        __i915_vma_remove_closed(vma);
        i915_vm_put(vma->vm);
@@ -1256,19 +1247,19 @@ int _i915_vma_move_to_active(struct i915_vma *vma,
                }
 
                if (fence) {
-                       dma_resv_add_excl_fence(vma->resv, fence);
+                       dma_resv_add_excl_fence(vma->obj->base.resv, fence);
                        obj->write_domain = I915_GEM_DOMAIN_RENDER;
                        obj->read_domains = 0;
                }
        } else {
                if (!(flags & __EXEC_OBJECT_NO_RESERVE)) {
-                       err = dma_resv_reserve_shared(vma->resv, 1);
+                       err = dma_resv_reserve_shared(vma->obj->base.resv, 1);
                        if (unlikely(err))
                                return err;
                }
 
                if (fence) {
-                       dma_resv_add_shared_fence(vma->resv, fence);
+                       dma_resv_add_shared_fence(vma->obj->base.resv, fence);
                        obj->write_domain = 0;
                }
        }
index 648dbe7..4033aa0 100644 (file)
@@ -234,16 +234,16 @@ static inline void __i915_vma_put(struct i915_vma *vma)
        kref_put(&vma->ref, i915_vma_release);
 }
 
-#define assert_vma_held(vma) dma_resv_assert_held((vma)->resv)
+#define assert_vma_held(vma) dma_resv_assert_held((vma)->obj->base.resv)
 
 static inline void i915_vma_lock(struct i915_vma *vma)
 {
-       dma_resv_lock(vma->resv, NULL);
+       dma_resv_lock(vma->obj->base.resv, NULL);
 }
 
 static inline void i915_vma_unlock(struct i915_vma *vma)
 {
-       dma_resv_unlock(vma->resv);
+       dma_resv_unlock(vma->obj->base.resv);
 }
 
 int __must_check
@@ -418,9 +418,6 @@ static inline void i915_vma_clear_scanout(struct i915_vma *vma)
        list_for_each_entry(V, &(OBJ)->vma.list, obj_link)              \
                for_each_until(!i915_vma_is_ggtt(V))
 
-struct i915_vma *i915_vma_alloc(void);
-void i915_vma_free(struct i915_vma *vma);
-
 struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma);
 void i915_vma_make_shrinkable(struct i915_vma *vma);
 void i915_vma_make_purgeable(struct i915_vma *vma);
index 80e93bf..8a0decb 100644 (file)
@@ -178,7 +178,6 @@ struct i915_vma {
        const struct i915_vma_ops *ops;
 
        struct drm_i915_gem_object *obj;
-       struct dma_resv *resv; /** Alias of obj->resv */
 
        struct sg_table *pages;
        void __iomem *iomap;
index 305face..6e6b317 100644 (file)
@@ -97,17 +97,22 @@ static const char *iommu_name(void)
 void intel_device_info_print_static(const struct intel_device_info *info,
                                    struct drm_printer *p)
 {
-       if (info->graphics_rel)
-               drm_printf(p, "graphics version: %u.%02u\n", info->graphics_ver, info->graphics_rel);
+       if (info->graphics.rel)
+               drm_printf(p, "graphics version: %u.%02u\n", info->graphics.ver,
+                          info->graphics.rel);
        else
-               drm_printf(p, "graphics version: %u\n", info->graphics_ver);
+               drm_printf(p, "graphics version: %u\n", info->graphics.ver);
 
-       if (info->media_rel)
-               drm_printf(p, "media version: %u.%02u\n", info->media_ver, info->media_rel);
+       if (info->media.rel)
+               drm_printf(p, "media version: %u.%02u\n", info->media.ver, info->media.rel);
        else
-               drm_printf(p, "media version: %u\n", info->media_ver);
+               drm_printf(p, "media version: %u\n", info->media.ver);
+
+       if (info->display.rel)
+               drm_printf(p, "display version: %u.%02u\n", info->display.ver, info->display.rel);
+       else
+               drm_printf(p, "display version: %u\n", info->display.ver);
 
-       drm_printf(p, "display version: %u\n", info->display.ver);
        drm_printf(p, "gt: %d\n", info->gt);
        drm_printf(p, "iommu: %s\n", iommu_name());
        drm_printf(p, "memory-regions: %x\n", info->memory_regions);
index 8e6f48d..669f0d2 100644 (file)
@@ -166,11 +166,14 @@ enum intel_ppgtt_type {
        func(overlay_needs_physical); \
        func(supports_tv);
 
+struct ip_version {
+       u8 ver;
+       u8 rel;
+};
+
 struct intel_device_info {
-       u8 graphics_ver;
-       u8 graphics_rel;
-       u8 media_ver;
-       u8 media_rel;
+       struct ip_version graphics;
+       struct ip_version media;
 
        intel_engine_mask_t platform_engine_mask; /* Engines supported by the HW */
 
@@ -200,6 +203,7 @@ struct intel_device_info {
 
        struct {
                u8 ver;
+               u8 rel;
 
 #define DEFINE_FLAG(name) u8 name:1
                DEV_INFO_DISPLAY_FOR_EACH_FLAG(DEFINE_FLAG);
index ecbb3d1..1f17ca5 100644 (file)
@@ -7473,11 +7473,34 @@ static void dg1_init_clock_gating(struct drm_i915_private *dev_priv)
        gen12lp_init_clock_gating(dev_priv);
 
        /* Wa_1409836686:dg1[a0] */
-       if (IS_DG1_GT_STEP(dev_priv, STEP_A0, STEP_B0))
+       if (IS_DG1_GRAPHICS_STEP(dev_priv, STEP_A0, STEP_B0))
                intel_uncore_write(&dev_priv->uncore, GEN9_CLKGATE_DIS_3, intel_uncore_read(&dev_priv->uncore, GEN9_CLKGATE_DIS_3) |
                           DPT_GATING_DIS);
 }
 
+static void xehpsdv_init_clock_gating(struct drm_i915_private *dev_priv)
+{
+       /* Wa_22010146351:xehpsdv */
+       if (IS_XEHPSDV_GRAPHICS_STEP(dev_priv, STEP_A0, STEP_B0))
+               intel_uncore_rmw(&dev_priv->uncore, XEHP_CLOCK_GATE_DIS, 0, SGR_DIS);
+}
+
+static void dg2_init_clock_gating(struct drm_i915_private *i915)
+{
+       /* Wa_22010954014:dg2_g10 */
+       if (IS_DG2_G10(i915))
+               intel_uncore_rmw(&i915->uncore, XEHP_CLOCK_GATE_DIS, 0,
+                                SGSI_SIDECLK_DIS);
+
+       /*
+        * Wa_14010733611:dg2_g10
+        * Wa_22010146351:dg2_g10
+        */
+       if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0))
+               intel_uncore_rmw(&i915->uncore, XEHP_CLOCK_GATE_DIS, 0,
+                                SGR_DIS | SGGI_DIS);
+}
+
 static void cnp_init_clock_gating(struct drm_i915_private *dev_priv)
 {
        if (!HAS_PCH_CNP(dev_priv))
@@ -7521,12 +7544,12 @@ static void kbl_init_clock_gating(struct drm_i915_private *dev_priv)
                   FBC_LLC_FULLY_OPEN);
 
        /* WaDisableSDEUnitClockGating:kbl */
-       if (IS_KBL_GT_STEP(dev_priv, 0, STEP_C0))
+       if (IS_KBL_GRAPHICS_STEP(dev_priv, 0, STEP_C0))
                intel_uncore_write(&dev_priv->uncore, GEN8_UCGCTL6, intel_uncore_read(&dev_priv->uncore, GEN8_UCGCTL6) |
                           GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
 
        /* WaDisableGamClockGating:kbl */
-       if (IS_KBL_GT_STEP(dev_priv, 0, STEP_C0))
+       if (IS_KBL_GRAPHICS_STEP(dev_priv, 0, STEP_C0))
                intel_uncore_write(&dev_priv->uncore, GEN6_UCGCTL1, intel_uncore_read(&dev_priv->uncore, GEN6_UCGCTL1) |
                           GEN6_GAMUNIT_CLOCK_GATE_DISABLE);
 
@@ -7888,6 +7911,8 @@ static const struct drm_i915_clock_gating_funcs platform##_clock_gating_funcs =
        .init_clock_gating = platform##_init_clock_gating,              \
 }
 
+CG_FUNCS(dg2);
+CG_FUNCS(xehpsdv);
 CG_FUNCS(adlp);
 CG_FUNCS(dg1);
 CG_FUNCS(gen12lp);
@@ -7924,7 +7949,11 @@ CG_FUNCS(nop);
  */
 void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv)
 {
-       if (IS_ALDERLAKE_P(dev_priv))
+       if (IS_DG2(dev_priv))
+               dev_priv->clock_gating_funcs = &dg2_clock_gating_funcs;
+       else if (IS_XEHPSDV(dev_priv))
+               dev_priv->clock_gating_funcs = &xehpsdv_clock_gating_funcs;
+       else if (IS_ALDERLAKE_P(dev_priv))
                dev_priv->clock_gating_funcs = &adlp_clock_gating_funcs;
        else if (IS_DG1(dev_priv))
                dev_priv->clock_gating_funcs = &dg1_clock_gating_funcs;
index 98c7339..2e901a2 100644 (file)
@@ -115,8 +115,8 @@ void intel_region_ttm_fini(struct intel_memory_region *mem)
 }
 
 /**
- * intel_region_ttm_resource_to_st - Convert an opaque TTM resource manager resource
- * to an sg_table.
+ * intel_region_ttm_resource_to_rsgt -
+ * Convert an opaque TTM resource manager resource to a refcounted sg_table.
  * @mem: The memory region.
  * @res: The resource manager resource obtained from the TTM resource manager.
  *
@@ -126,17 +126,18 @@ void intel_region_ttm_fini(struct intel_memory_region *mem)
  *
  * Return: A malloced sg_table on success, an error pointer on failure.
  */
-struct sg_table *intel_region_ttm_resource_to_st(struct intel_memory_region *mem,
-                                                struct ttm_resource *res)
+struct i915_refct_sgt *
+intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem,
+                                 struct ttm_resource *res)
 {
        if (mem->is_range_manager) {
                struct ttm_range_mgr_node *range_node =
                        to_ttm_range_mgr_node(res);
 
-               return i915_sg_from_mm_node(&range_node->mm_nodes[0],
-                                           mem->region.start);
+               return i915_rsgt_from_mm_node(&range_node->mm_nodes[0],
+                                             mem->region.start);
        } else {
-               return i915_sg_from_buddy_resource(res, mem->region.start);
+               return i915_rsgt_from_buddy_resource(res, mem->region.start);
        }
 }
 
index 6f44075..7bbe2b4 100644 (file)
@@ -22,8 +22,9 @@ int intel_region_ttm_init(struct intel_memory_region *mem);
 
 void intel_region_ttm_fini(struct intel_memory_region *mem);
 
-struct sg_table *intel_region_ttm_resource_to_st(struct intel_memory_region *mem,
-                                                struct ttm_resource *res);
+struct i915_refct_sgt *
+intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem,
+                                 struct ttm_resource *res);
 
 void intel_region_ttm_resource_free(struct intel_memory_region *mem,
                                    struct ttm_resource *res);
index 6cf9676..a4b16b9 100644 (file)
@@ -23,7 +23,8 @@
  * use a macro to define these to make it easier to identify the platforms
  * where the two steppings can deviate.
  */
-#define COMMON_STEP(x)  .gt_step = STEP_##x, .display_step = STEP_##x
+#define COMMON_STEP(x)  .graphics_step = STEP_##x, .display_step = STEP_##x, .media_step = STEP_##x
+#define COMMON_GT_MEDIA_STEP(x)  .graphics_step = STEP_##x, .media_step = STEP_##x
 
 static const struct intel_step_info skl_revids[] = {
        [0x6] = { COMMON_STEP(G0) },
@@ -33,13 +34,13 @@ static const struct intel_step_info skl_revids[] = {
 };
 
 static const struct intel_step_info kbl_revids[] = {
-       [1] = { .gt_step = STEP_B0, .display_step = STEP_B0 },
-       [2] = { .gt_step = STEP_C0, .display_step = STEP_B0 },
-       [3] = { .gt_step = STEP_D0, .display_step = STEP_B0 },
-       [4] = { .gt_step = STEP_F0, .display_step = STEP_C0 },
-       [5] = { .gt_step = STEP_C0, .display_step = STEP_B1 },
-       [6] = { .gt_step = STEP_D1, .display_step = STEP_B1 },
-       [7] = { .gt_step = STEP_G0, .display_step = STEP_C0 },
+       [1] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_B0 },
+       [2] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_B0 },
+       [3] = { COMMON_GT_MEDIA_STEP(D0), .display_step = STEP_B0 },
+       [4] = { COMMON_GT_MEDIA_STEP(F0), .display_step = STEP_C0 },
+       [5] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_B1 },
+       [6] = { COMMON_GT_MEDIA_STEP(D1), .display_step = STEP_B1 },
+       [7] = { COMMON_GT_MEDIA_STEP(G0), .display_step = STEP_C0 },
 };
 
 static const struct intel_step_info bxt_revids[] = {
@@ -63,16 +64,16 @@ static const struct intel_step_info jsl_ehl_revids[] = {
 };
 
 static const struct intel_step_info tgl_uy_revids[] = {
-       [0] = { .gt_step = STEP_A0, .display_step = STEP_A0 },
-       [1] = { .gt_step = STEP_B0, .display_step = STEP_C0 },
-       [2] = { .gt_step = STEP_B1, .display_step = STEP_C0 },
-       [3] = { .gt_step = STEP_C0, .display_step = STEP_D0 },
+       [0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A0 },
+       [1] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_C0 },
+       [2] = { COMMON_GT_MEDIA_STEP(B1), .display_step = STEP_C0 },
+       [3] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_D0 },
 };
 
 /* Same GT stepping between tgl_uy_revids and tgl_revids don't mean the same HW */
 static const struct intel_step_info tgl_revids[] = {
-       [0] = { .gt_step = STEP_A0, .display_step = STEP_B0 },
-       [1] = { .gt_step = STEP_B0, .display_step = STEP_D0 },
+       [0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_B0 },
+       [1] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_D0 },
 };
 
 static const struct intel_step_info rkl_revids[] = {
@@ -87,38 +88,38 @@ static const struct intel_step_info dg1_revids[] = {
 };
 
 static const struct intel_step_info adls_revids[] = {
-       [0x0] = { .gt_step = STEP_A0, .display_step = STEP_A0 },
-       [0x1] = { .gt_step = STEP_A0, .display_step = STEP_A2 },
-       [0x4] = { .gt_step = STEP_B0, .display_step = STEP_B0 },
-       [0x8] = { .gt_step = STEP_C0, .display_step = STEP_B0 },
-       [0xC] = { .gt_step = STEP_D0, .display_step = STEP_C0 },
+       [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A0 },
+       [0x1] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A2 },
+       [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_B0 },
+       [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_B0 },
+       [0xC] = { COMMON_GT_MEDIA_STEP(D0), .display_step = STEP_C0 },
 };
 
 static const struct intel_step_info adlp_revids[] = {
-       [0x0] = { .gt_step = STEP_A0, .display_step = STEP_A0 },
-       [0x4] = { .gt_step = STEP_B0, .display_step = STEP_B0 },
-       [0x8] = { .gt_step = STEP_C0, .display_step = STEP_C0 },
-       [0xC] = { .gt_step = STEP_C0, .display_step = STEP_D0 },
+       [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A0 },
+       [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_B0 },
+       [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_C0 },
+       [0xC] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_D0 },
 };
 
 static const struct intel_step_info xehpsdv_revids[] = {
-       [0x0] = { .gt_step = STEP_A0 },
-       [0x1] = { .gt_step = STEP_A1 },
-       [0x4] = { .gt_step = STEP_B0 },
-       [0x8] = { .gt_step = STEP_C0 },
+       [0x0] = { COMMON_GT_MEDIA_STEP(A0) },
+       [0x1] = { COMMON_GT_MEDIA_STEP(A1) },
+       [0x4] = { COMMON_GT_MEDIA_STEP(B0) },
+       [0x8] = { COMMON_GT_MEDIA_STEP(C0) },
 };
 
 static const struct intel_step_info dg2_g10_revid_step_tbl[] = {
-       [0x0] = { .gt_step = STEP_A0, .display_step = STEP_A0 },
-       [0x1] = { .gt_step = STEP_A1, .display_step = STEP_A0 },
-       [0x4] = { .gt_step = STEP_B0, .display_step = STEP_B0 },
-       [0x8] = { .gt_step = STEP_C0, .display_step = STEP_C0 },
+       [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A0 },
+       [0x1] = { COMMON_GT_MEDIA_STEP(A1), .display_step = STEP_A0 },
+       [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_B0 },
+       [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_C0 },
 };
 
 static const struct intel_step_info dg2_g11_revid_step_tbl[] = {
-       [0x0] = { .gt_step = STEP_A0, .display_step = STEP_B0 },
-       [0x4] = { .gt_step = STEP_B0, .display_step = STEP_C0 },
-       [0x5] = { .gt_step = STEP_B1, .display_step = STEP_C0 },
+       [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_B0 },
+       [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_C0 },
+       [0x5] = { COMMON_GT_MEDIA_STEP(B1), .display_step = STEP_C0 },
 };
 
 void intel_step_init(struct drm_i915_private *i915)
@@ -179,7 +180,7 @@ void intel_step_init(struct drm_i915_private *i915)
        if (!revids)
                return;
 
-       if (revid < size && revids[revid].gt_step != STEP_NONE) {
+       if (revid < size && revids[revid].graphics_step != STEP_NONE) {
                step = revids[revid];
        } else {
                drm_warn(&i915->drm, "Unknown revid 0x%02x\n", revid);
@@ -192,7 +193,7 @@ void intel_step_init(struct drm_i915_private *i915)
                 * steppings in the array are not monotonically increasing, but
                 * it's better than defaulting to 0.
                 */
-               while (revid < size && revids[revid].gt_step == STEP_NONE)
+               while (revid < size && revids[revid].graphics_step == STEP_NONE)
                        revid++;
 
                if (revid < size) {
@@ -201,12 +202,12 @@ void intel_step_init(struct drm_i915_private *i915)
                        step = revids[revid];
                } else {
                        drm_dbg(&i915->drm, "Using future steppings\n");
-                       step.gt_step = STEP_FUTURE;
+                       step.graphics_step = STEP_FUTURE;
                        step.display_step = STEP_FUTURE;
                }
        }
 
-       if (drm_WARN_ON(&i915->drm, step.gt_step == STEP_NONE))
+       if (drm_WARN_ON(&i915->drm, step.graphics_step == STEP_NONE))
                return;
 
        RUNTIME_INFO(i915)->step = step;
index f6641e2..d71a99b 100644 (file)
@@ -11,8 +11,9 @@
 struct drm_i915_private;
 
 struct intel_step_info {
-       u8 gt_step;
+       u8 graphics_step;
        u8 display_step;
+       u8 media_step;
 };
 
 #define STEP_ENUM_VAL(name)  STEP_##name,
index e072054..a308e86 100644 (file)
@@ -2020,7 +2020,7 @@ static int i915_pmic_bus_access_notifier(struct notifier_block *nb,
        return NOTIFY_OK;
 }
 
-static int uncore_mmio_setup(struct intel_uncore *uncore)
+int intel_uncore_setup_mmio(struct intel_uncore *uncore)
 {
        struct drm_i915_private *i915 = uncore->i915;
        struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
@@ -2053,7 +2053,7 @@ static int uncore_mmio_setup(struct intel_uncore *uncore)
        return 0;
 }
 
-static void uncore_mmio_cleanup(struct intel_uncore *uncore)
+void intel_uncore_cleanup_mmio(struct intel_uncore *uncore)
 {
        struct pci_dev *pdev = to_pci_dev(uncore->i915->drm.dev);
 
@@ -2146,10 +2146,6 @@ int intel_uncore_init_mmio(struct intel_uncore *uncore)
        struct drm_i915_private *i915 = uncore->i915;
        int ret;
 
-       ret = uncore_mmio_setup(uncore);
-       if (ret)
-               return ret;
-
        /*
         * The boot firmware initializes local memory and assesses its health.
         * If memory training fails, the punit will have been instructed to
@@ -2170,7 +2166,7 @@ int intel_uncore_init_mmio(struct intel_uncore *uncore)
        } else {
                ret = uncore_forcewake_init(uncore);
                if (ret)
-                       goto out_mmio_cleanup;
+                       return ret;
        }
 
        /* make sure fw funcs are set if and only if we have fw*/
@@ -2192,11 +2188,6 @@ int intel_uncore_init_mmio(struct intel_uncore *uncore)
                drm_dbg(&i915->drm, "unclaimed mmio detected on uncore init, clearing\n");
 
        return 0;
-
-out_mmio_cleanup:
-       uncore_mmio_cleanup(uncore);
-
-       return ret;
 }
 
 /*
@@ -2261,8 +2252,6 @@ void intel_uncore_fini_mmio(struct intel_uncore *uncore)
                intel_uncore_fw_domains_fini(uncore);
                iosf_mbi_punit_release();
        }
-
-       uncore_mmio_cleanup(uncore);
 }
 
 static const struct reg_whitelist {
index 3248e4e..d1d17b0 100644 (file)
@@ -218,11 +218,13 @@ void
 intel_uncore_mmio_debug_init_early(struct intel_uncore_mmio_debug *mmio_debug);
 void intel_uncore_init_early(struct intel_uncore *uncore,
                             struct drm_i915_private *i915);
+int intel_uncore_setup_mmio(struct intel_uncore *uncore);
 int intel_uncore_init_mmio(struct intel_uncore *uncore);
 void intel_uncore_prune_engine_fw_domains(struct intel_uncore *uncore,
                                          struct intel_gt *gt);
 bool intel_uncore_unclaimed_mmio(struct intel_uncore *uncore);
 bool intel_uncore_arm_unclaimed_mmio_detection(struct intel_uncore *uncore);
+void intel_uncore_cleanup_mmio(struct intel_uncore *uncore);
 void intel_uncore_fini_mmio(struct intel_uncore *uncore);
 void intel_uncore_suspend(struct intel_uncore *uncore);
 void intel_uncore_resume_early(struct intel_uncore *uncore);
index f99bb01..7e0658a 100644 (file)
@@ -442,6 +442,7 @@ static int igt_evict_contexts(void *arg)
        /* Overfill the GGTT with context objects and so try to evict one. */
        for_each_engine(engine, gt, id) {
                struct i915_sw_fence fence;
+               struct i915_request *last = NULL;
 
                count = 0;
                onstack_fence_init(&fence);
@@ -479,6 +480,9 @@ static int igt_evict_contexts(void *arg)
 
                        i915_request_add(rq);
                        count++;
+                       if (last)
+                               i915_request_put(last);
+                       last = i915_request_get(rq);
                        err = 0;
                } while(1);
                onstack_fence_fini(&fence);
@@ -486,6 +490,21 @@ static int igt_evict_contexts(void *arg)
                        count, engine->name);
                if (err)
                        break;
+               if (last) {
+                       if (i915_request_wait(last, 0, HZ) < 0) {
+                               err = -EIO;
+                               i915_request_put(last);
+                               pr_err("Failed waiting for last request (on %s)",
+                                      engine->name);
+                               break;
+                       }
+                       i915_request_put(last);
+               }
+               err = intel_gt_wait_for_idle(engine->gt, HZ * 3);
+               if (err) {
+                       pr_err("Failed to idle GT (on %s)", engine->name);
+                       break;
+               }
        }
 
        mutex_lock(&ggtt->vm.mutex);
index d67710d..6496671 100644 (file)
@@ -2805,7 +2805,7 @@ static int p_sync0(void *arg)
                i915_request_add(rq);
 
                err = 0;
-               if (i915_request_wait(rq, 0, HZ / 5) < 0)
+               if (i915_request_wait(rq, 0, HZ) < 0)
                        err = -ETIME;
                i915_request_put(rq);
                if (err)
@@ -2876,7 +2876,7 @@ static int p_sync1(void *arg)
                i915_request_add(rq);
 
                err = 0;
-               if (prev && i915_request_wait(prev, 0, HZ / 5) < 0)
+               if (prev && i915_request_wait(prev, 0, HZ) < 0)
                        err = -ETIME;
                i915_request_put(prev);
                prev = rq;
index 9f8590b..a2838c6 100644 (file)
@@ -36,7 +36,7 @@ void igt_global_reset_unlock(struct intel_gt *gt)
        enum intel_engine_id id;
 
        for_each_engine(engine, gt, id)
-               clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
+               clear_and_wake_up_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
 
        clear_bit(I915_RESET_BACKOFF, &gt->reset.flags);
        wake_up_all(&gt->reset.queue);
index 4f81801..d0e2e61 100644 (file)
@@ -165,7 +165,7 @@ struct drm_i915_private *mock_gem_device(void)
        /* Using the global GTT may ask questions about KMS users, so prepare */
        drm_mode_config_init(&i915->drm);
 
-       mkwrite_device_info(i915)->graphics_ver = -1;
+       mkwrite_device_info(i915)->graphics.ver = -1;
 
        mkwrite_device_info(i915)->page_sizes =
                I915_GTT_PAGE_SIZE_4K |
@@ -177,6 +177,8 @@ struct drm_i915_private *mock_gem_device(void)
 
        mock_uncore_init(&i915->uncore, i915);
 
+       spin_lock_init(&i915->gpu_error.lock);
+
        i915_gem_init__mm(i915);
        intel_gt_init_early(&i915->gt, i915);
        atomic_inc(&i915->gt.wakeref.count); /* disable; no hw support */
index 7579300..7ec5037 100644 (file)
@@ -15,9 +15,9 @@
 static void mock_region_put_pages(struct drm_i915_gem_object *obj,
                                  struct sg_table *pages)
 {
+       i915_refct_sgt_put(obj->mm.rsgt);
+       obj->mm.rsgt = NULL;
        intel_region_ttm_resource_free(obj->mm.region, obj->mm.res);
-       sg_free_table(pages);
-       kfree(pages);
 }
 
 static int mock_region_get_pages(struct drm_i915_gem_object *obj)
@@ -36,12 +36,14 @@ static int mock_region_get_pages(struct drm_i915_gem_object *obj)
        if (IS_ERR(obj->mm.res))
                return PTR_ERR(obj->mm.res);
 
-       pages = intel_region_ttm_resource_to_st(obj->mm.region, obj->mm.res);
-       if (IS_ERR(pages)) {
-               err = PTR_ERR(pages);
+       obj->mm.rsgt = intel_region_ttm_resource_to_rsgt(obj->mm.region,
+                                                        obj->mm.res);
+       if (IS_ERR(obj->mm.rsgt)) {
+               err = PTR_ERR(obj->mm.rsgt);
                goto err_free_resource;
        }
 
+       pages = &obj->mm.rsgt->table;
        __i915_gem_object_set_pages(obj, pages, i915_sg_dma_sizes(pages->sgl));
 
        return 0;