Merge tag 'drm-intel-gt-next-2021-05-28' of git://anongit.freedesktop.org/drm/drm...
authorDave Airlie <airlied@redhat.com>
Wed, 2 Jun 2021 04:04:29 +0000 (14:04 +1000)
committerDave Airlie <airlied@redhat.com>
Wed, 2 Jun 2021 04:15:54 +0000 (14:15 +1000)
UAPI Changes:
- Add reworked uAPI for DG1 behind CONFIG_BROKEN (Matt A, Abdiel)

Driver Changes:

- Fix for Gitlab issues #3293 and #3450:
  Avoid kernel crash on older L-shape memory machines

- Add Wa_14010733141 (VDBox SFC reset) for Gen11+ (Aditya)
- Fix crash in auto_retire active retire callback due to
  misalignment (Stephane)
- Fix overlay active retire callback alignment (Tvrtko)
- Eliminate need to align active retire callbacks (Matt A, Ville,
  Daniel)
- Program FF_MODE2 tuning value for all Gen12 platforms (Caz)
- Add Wa_14011060649 for TGL,RKL,DG1 and ADLS (Swathi)
- Create stolen memory region from local memory on DG1 (CQ)
- Place PD in LMEM on dGFX (Matt A)
- Use WC when default state object is allocated in LMEM (Venkata)
- Determine the coherent map type based on object location (Venkata)
- Use lmem physical addresses for fb_mmap() on discrete (Mohammed)
- Bypass aperture on fbdev when LMEM is available (Anusha)
- Return error value when displayable BO not in LMEM for dGFX (Mohammed)
- Do release kernel context if breadcrumb measure fails (Janusz)
- Hide modparams for compiled-out features (Tvrtko)
- Apply Wa_22010271021 for all Gen11 platforms (Caz)
- Fix unlikely ref count race in arming the watchdog timer (Tvrtko)
- Check actual RC6 enable status in PMU (Tvrtko)
- Fix a double free in gen8_preallocate_top_level_pdp (Lv)
- Use trylock in shrinker for GGTT on BSW VT-d and BXT (Maarten)
- Remove erroneous i915_is_ggtt check for
  I915_GEM_OBJECT_UNBIND_VM_TRYLOCK (Maarten)

- Convert uAPI headers to real kerneldoc (Matt A)
- Clean up kerneldoc warnings headers (Matt A, Maarten)
- Fail driver if LMEM training failed (Matt R)
- Avoid div-by-zero on Gen2 (Ville)
- Read C0DRB3/C1DRB3 as 16 bits again and add _BW suffix (Ville)
- Remove reference to struct drm_device.pdev (Thomas)
- Increase separation between GuC and execlists code (Chris, Matt B)

- Use might_alloc() (Bernard)
- Split DGFX_FEATURES from GEN12_FEATURES (Lucas)
- Deduplicate Wa_22010271021 programming on (Jose)
- Drop duplicate WaDisable4x2SubspanOptimization:hsw (Tvrtko)
- Selftest improvements (Chris, Hsin-Yi, Tvrtko)
- Shuffle around init_memory_region for stolen (Matt)
- Typo fixes (wengjianfeng)

[airlied: fix conflict with fixes in i915_active.c]
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/YLCbBR22BsQ/dpJB@jlahtine-mobl.ger.corp.intel.com
90 files changed:
Documentation/gpu/driver-uapi.rst [new file with mode: 0644]
Documentation/gpu/index.rst
Documentation/gpu/rfc/i915_gem_lmem.rst [new file with mode: 0644]
Documentation/gpu/rfc/index.rst
drivers/gpu/drm/i915/display/intel_display.c
drivers/gpu/drm/i915/display/intel_fbdev.c
drivers/gpu/drm/i915/display/intel_frontbuffer.c
drivers/gpu/drm/i915/display/intel_overlay.c
drivers/gpu/drm/i915/gem/i915_gem_context.c
drivers/gpu/drm/i915/gem/i915_gem_create.c
drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
drivers/gpu/drm/i915/gem/i915_gem_lmem.c
drivers/gpu/drm/i915/gem/i915_gem_lmem.h
drivers/gpu/drm/i915/gem/i915_gem_object.c
drivers/gpu/drm/i915/gem/i915_gem_object_types.h
drivers/gpu/drm/i915/gem/i915_gem_region.c
drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
drivers/gpu/drm/i915/gem/i915_gem_stolen.c
drivers/gpu/drm/i915/gem/i915_gem_stolen.h
drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
drivers/gpu/drm/i915/gt/gen6_ppgtt.c
drivers/gpu/drm/i915/gt/gen8_ppgtt.c
drivers/gpu/drm/i915/gt/intel_context.c
drivers/gpu/drm/i915/gt/intel_engine.h
drivers/gpu/drm/i915/gt/intel_engine_cs.c
drivers/gpu/drm/i915/gt/intel_engine_pm.c
drivers/gpu/drm/i915/gt/intel_engine_types.h
drivers/gpu/drm/i915/gt/intel_execlists_submission.c
drivers/gpu/drm/i915/gt/intel_execlists_submission.h
drivers/gpu/drm/i915/gt/intel_ggtt.c
drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
drivers/gpu/drm/i915/gt/intel_gt_irq.c
drivers/gpu/drm/i915/gt/intel_gt_irq.h
drivers/gpu/drm/i915/gt/intel_gt_types.h
drivers/gpu/drm/i915/gt/intel_gtt.c
drivers/gpu/drm/i915/gt/intel_gtt.h
drivers/gpu/drm/i915/gt/intel_lrc.c
drivers/gpu/drm/i915/gt/intel_ppgtt.c
drivers/gpu/drm/i915/gt/intel_reset.c
drivers/gpu/drm/i915/gt/intel_ring.c
drivers/gpu/drm/i915/gt/intel_ring_submission.c
drivers/gpu/drm/i915/gt/intel_rps.c
drivers/gpu/drm/i915/gt/intel_timeline.c
drivers/gpu/drm/i915/gt/intel_workarounds.c
drivers/gpu/drm/i915/gt/mock_engine.c
drivers/gpu/drm/i915/gt/selftest_context.c
drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
drivers/gpu/drm/i915/gt/selftest_execlists.c
drivers/gpu/drm/i915/gt/selftest_hangcheck.c
drivers/gpu/drm/i915/gt/selftest_lrc.c
drivers/gpu/drm/i915/gt/selftest_rc6.c
drivers/gpu/drm/i915/gt/selftest_ring_submission.c
drivers/gpu/drm/i915/gt/selftest_rps.c
drivers/gpu/drm/i915/gt/shmem_utils.c
drivers/gpu/drm/i915/gt/uc/intel_guc.c
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
drivers/gpu/drm/i915/gt/uc/intel_huc.c
drivers/gpu/drm/i915/i915_active.c
drivers/gpu/drm/i915/i915_active.h
drivers/gpu/drm/i915/i915_active_types.h
drivers/gpu/drm/i915/i915_cmd_parser.c
drivers/gpu/drm/i915/i915_debugfs.c
drivers/gpu/drm/i915/i915_drv.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_irq.c
drivers/gpu/drm/i915/i915_params.h
drivers/gpu/drm/i915/i915_pci.c
drivers/gpu/drm/i915/i915_perf.c
drivers/gpu/drm/i915/i915_pmu.c
drivers/gpu/drm/i915/i915_query.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/i915_request.c
drivers/gpu/drm/i915/i915_vma.c
drivers/gpu/drm/i915/intel_memory_region.c
drivers/gpu/drm/i915/intel_memory_region.h
drivers/gpu/drm/i915/intel_uncore.c
drivers/gpu/drm/i915/selftests/i915_active.c
drivers/gpu/drm/i915/selftests/i915_gem.c
drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
drivers/gpu/drm/i915/selftests/i915_perf.c
drivers/gpu/drm/i915/selftests/i915_vma.c
drivers/gpu/drm/i915/selftests/igt_spinner.c
drivers/gpu/drm/i915/selftests/intel_memory_region.c
drivers/gpu/drm/i915/selftests/librapl.c
drivers/gpu/drm/i915/selftests/librapl.h
include/uapi/drm/i915_drm.h

diff --git a/Documentation/gpu/driver-uapi.rst b/Documentation/gpu/driver-uapi.rst
new file mode 100644 (file)
index 0000000..4411e69
--- /dev/null
@@ -0,0 +1,8 @@
+===============
+DRM Driver uAPI
+===============
+
+drm/i915 uAPI
+=============
+
+.. kernel-doc:: include/uapi/drm/i915_drm.h
index ec4bc72..b9c1214 100644 (file)
@@ -10,6 +10,7 @@ Linux GPU Driver Developer's Guide
    drm-kms
    drm-kms-helpers
    drm-uapi
+   driver-uapi
    drm-client
    drivers
    backlight
diff --git a/Documentation/gpu/rfc/i915_gem_lmem.rst b/Documentation/gpu/rfc/i915_gem_lmem.rst
new file mode 100644 (file)
index 0000000..675ba86
--- /dev/null
@@ -0,0 +1,131 @@
+=========================
+I915 DG1/LMEM RFC Section
+=========================
+
+Upstream plan
+=============
+For upstream the overall plan for landing all the DG1 stuff and turning it for
+real, with all the uAPI bits is:
+
+* Merge basic HW enabling of DG1(still without pciid)
+* Merge the uAPI bits behind special CONFIG_BROKEN(or so) flag
+        * At this point we can still make changes, but importantly this lets us
+          start running IGTs which can utilize local-memory in CI
+* Convert over to TTM, make sure it all keeps working. Some of the work items:
+        * TTM shrinker for discrete
+        * dma_resv_lockitem for full dma_resv_lock, i.e not just trylock
+        * Use TTM CPU pagefault handler
+        * Route shmem backend over to TTM SYSTEM for discrete
+        * TTM purgeable object support
+        * Move i915 buddy allocator over to TTM
+        * MMAP ioctl mode(see `I915 MMAP`_)
+        * SET/GET ioctl caching(see `I915 SET/GET CACHING`_)
+* Send RFC(with mesa-dev on cc) for final sign off on the uAPI
+* Add pciid for DG1 and turn on uAPI for real
+
+New object placement and region query uAPI
+==========================================
+Starting from DG1 we need to give userspace the ability to allocate buffers from
+device local-memory. Currently the driver supports gem_create, which can place
+buffers in system memory via shmem, and the usual assortment of other
+interfaces, like dumb buffers and userptr.
+
+To support this new capability, while also providing a uAPI which will work
+beyond just DG1, we propose to offer three new bits of uAPI:
+
+DRM_I915_QUERY_MEMORY_REGIONS
+-----------------------------
+New query ID which allows userspace to discover the list of supported memory
+regions(like system-memory and local-memory) for a given device. We identify
+each region with a class and instance pair, which should be unique. The class
+here would be DEVICE or SYSTEM, and the instance would be zero, on platforms
+like DG1.
+
+Side note: The class/instance design is borrowed from our existing engine uAPI,
+where we describe every physical engine in terms of its class, and the
+particular instance, since we can have more than one per class.
+
+In the future we also want to expose more information which can further
+describe the capabilities of a region.
+
+.. kernel-doc:: include/uapi/drm/i915_drm.h
+        :functions: drm_i915_gem_memory_class drm_i915_gem_memory_class_instance drm_i915_memory_region_info drm_i915_query_memory_regions
+
+GEM_CREATE_EXT
+--------------
+New ioctl which is basically just gem_create but now allows userspace to provide
+a chain of possible extensions. Note that if we don't provide any extensions and
+set flags=0 then we get the exact same behaviour as gem_create.
+
+Side note: We also need to support PXP[1] in the near future, which is also
+applicable to integrated platforms, and adds its own gem_create_ext extension,
+which basically lets userspace mark a buffer as "protected".
+
+.. kernel-doc:: include/uapi/drm/i915_drm.h
+        :functions: drm_i915_gem_create_ext
+
+I915_GEM_CREATE_EXT_MEMORY_REGIONS
+----------------------------------
+Implemented as an extension for gem_create_ext, we would now allow userspace to
+optionally provide an immutable list of preferred placements at creation time,
+in priority order, for a given buffer object.  For the placements we expect
+them each to use the class/instance encoding, as per the output of the regions
+query. Having the list in priority order will be useful in the future when
+placing an object, say during eviction.
+
+.. kernel-doc:: include/uapi/drm/i915_drm.h
+        :functions: drm_i915_gem_create_ext_memory_regions
+
+One fair criticism here is that this seems a little over-engineered[2]. If we
+just consider DG1 then yes, a simple gem_create.flags or something is totally
+all that's needed to tell the kernel to allocate the buffer in local-memory or
+whatever. However looking to the future we need uAPI which can also support
+upcoming Xe HP multi-tile architecture in a sane way, where there can be
+multiple local-memory instances for a given device, and so using both class and
+instance in our uAPI to describe regions is desirable, although specifically
+for DG1 it's uninteresting, since we only have a single local-memory instance.
+
+Existing uAPI issues
+====================
+Some potential issues we still need to resolve.
+
+I915 MMAP
+---------
+In i915 there are multiple ways to MMAP GEM object, including mapping the same
+object using different mapping types(WC vs WB), i.e multiple active mmaps per
+object. TTM expects one MMAP at most for the lifetime of the object. If it
+turns out that we have to backpedal here, there might be some potential
+userspace fallout.
+
+I915 SET/GET CACHING
+--------------------
+In i915 we have set/get_caching ioctl. TTM doesn't let us to change this, but
+DG1 doesn't support non-snooped pcie transactions, so we can just always
+allocate as WB for smem-only buffers.  If/when our hw gains support for
+non-snooped pcie transactions then we must fix this mode at allocation time as
+a new GEM extension.
+
+This is related to the mmap problem, because in general (meaning, when we're
+not running on intel cpus) the cpu mmap must not, ever, be inconsistent with
+allocation mode.
+
+Possible idea is to let the kernel picks the mmap mode for userspace from the
+following table:
+
+smem-only: WB. Userspace does not need to call clflush.
+
+smem+lmem: We only ever allow a single mode, so simply allocate this as uncached
+memory, and always give userspace a WC mapping. GPU still does snooped access
+here(assuming we can't turn it off like on DG1), which is a bit inefficient.
+
+lmem only: always WC
+
+This means on discrete you only get a single mmap mode, all others must be
+rejected. That's probably going to be a new default mode or something like
+that.
+
+Links
+=====
+[1] https://patchwork.freedesktop.org/series/86798/
+
+[2] https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5599#note_553791
index a8621f7..0567044 100644 (file)
@@ -15,3 +15,7 @@ host such documentation:
 
 * Once the code has landed move all the documentation to the right places in
   the main core, helper or driver sections.
+
+.. toctree::
+
+    i915_gem_lmem.rst
index 384ff0b..422b59e 100644 (file)
@@ -11660,11 +11660,20 @@ intel_user_framebuffer_create(struct drm_device *dev,
        struct drm_framebuffer *fb;
        struct drm_i915_gem_object *obj;
        struct drm_mode_fb_cmd2 mode_cmd = *user_mode_cmd;
+       struct drm_i915_private *i915;
 
        obj = i915_gem_object_lookup(filp, mode_cmd.handles[0]);
        if (!obj)
                return ERR_PTR(-ENOENT);
 
+       /* object is backed with LMEM for discrete */
+       i915 = to_i915(obj->base.dev);
+       if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj)) {
+               /* object is "remote", not in local memory */
+               i915_gem_object_put(obj);
+               return ERR_PTR(-EREMOTE);
+       }
+
        fb = intel_framebuffer_create(obj, &mode_cmd);
        i915_gem_object_put(obj);
 
index ccd00e6..4af4022 100644 (file)
@@ -41,6 +41,8 @@
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_fourcc.h>
 
+#include "gem/i915_gem_lmem.h"
+
 #include "i915_drv.h"
 #include "intel_display_types.h"
 #include "intel_fbdev.h"
@@ -137,14 +139,22 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
        size = mode_cmd.pitches[0] * mode_cmd.height;
        size = PAGE_ALIGN(size);
 
-       /* If the FB is too big, just don't use it since fbdev is not very
-        * important and we should probably use that space with FBC or other
-        * features. */
        obj = ERR_PTR(-ENODEV);
-       if (size * 2 < dev_priv->stolen_usable_size)
-               obj = i915_gem_object_create_stolen(dev_priv, size);
-       if (IS_ERR(obj))
-               obj = i915_gem_object_create_shmem(dev_priv, size);
+       if (HAS_LMEM(dev_priv)) {
+               obj = i915_gem_object_create_lmem(dev_priv, size,
+                                                 I915_BO_ALLOC_CONTIGUOUS);
+       } else {
+               /*
+                * If the FB is too big, just don't use it since fbdev is not very
+                * important and we should probably use that space with FBC or other
+                * features.
+                */
+               if (size * 2 < dev_priv->stolen_usable_size)
+                       obj = i915_gem_object_create_stolen(dev_priv, size);
+               if (IS_ERR(obj))
+                       obj = i915_gem_object_create_shmem(dev_priv, size);
+       }
+
        if (IS_ERR(obj)) {
                drm_err(&dev_priv->drm, "failed to allocate framebuffer\n");
                return PTR_ERR(obj);
@@ -178,6 +188,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
        unsigned long flags = 0;
        bool prealloc = false;
        void __iomem *vaddr;
+       struct drm_i915_gem_object *obj;
        int ret;
 
        if (intel_fb &&
@@ -232,13 +243,27 @@ static int intelfb_create(struct drm_fb_helper *helper,
        info->fbops = &intelfb_ops;
 
        /* setup aperture base/size for vesafb takeover */
-       info->apertures->ranges[0].base = ggtt->gmadr.start;
-       info->apertures->ranges[0].size = ggtt->mappable_end;
+       obj = intel_fb_obj(&intel_fb->base);
+       if (i915_gem_object_is_lmem(obj)) {
+               struct intel_memory_region *mem = obj->mm.region;
+
+               info->apertures->ranges[0].base = mem->io_start;
+               info->apertures->ranges[0].size = mem->total;
+
+               /* Use fbdev's framebuffer from lmem for discrete */
+               info->fix.smem_start =
+                       (unsigned long)(mem->io_start +
+                                       i915_gem_object_get_dma_address(obj, 0));
+               info->fix.smem_len = obj->base.size;
+       } else {
+               info->apertures->ranges[0].base = ggtt->gmadr.start;
+               info->apertures->ranges[0].size = ggtt->mappable_end;
 
-       /* Our framebuffer is the entirety of fbdev's system memory */
-       info->fix.smem_start =
-               (unsigned long)(ggtt->gmadr.start + vma->node.start);
-       info->fix.smem_len = vma->node.size;
+               /* Our framebuffer is the entirety of fbdev's system memory */
+               info->fix.smem_start =
+                       (unsigned long)(ggtt->gmadr.start + vma->node.start);
+               info->fix.smem_len = vma->node.size;
+       }
 
        vaddr = i915_vma_pin_iomap(vma);
        if (IS_ERR(vaddr)) {
index 8161d49..8e75deb 100644 (file)
@@ -211,7 +211,6 @@ static int frontbuffer_active(struct i915_active *ref)
        return 0;
 }
 
-__i915_active_call
 static void frontbuffer_retire(struct i915_active *ref)
 {
        struct intel_frontbuffer *front =
@@ -266,7 +265,8 @@ intel_frontbuffer_get(struct drm_i915_gem_object *obj)
        atomic_set(&front->bits, 0);
        i915_active_init(&front->write,
                         frontbuffer_active,
-                        i915_active_may_sleep(frontbuffer_retire));
+                        frontbuffer_retire,
+                        I915_ACTIVE_RETIRE_SLEEPS);
 
        spin_lock(&i915->fb_tracking.lock);
        if (rcu_access_pointer(obj->frontbuffer)) {
index 46cba12..7e3f5c6 100644 (file)
@@ -384,8 +384,7 @@ static void intel_overlay_off_tail(struct intel_overlay *overlay)
                i830_overlay_clock_gating(dev_priv, true);
 }
 
-__i915_active_call static void
-intel_overlay_last_flip_retire(struct i915_active *active)
+static void intel_overlay_last_flip_retire(struct i915_active *active)
 {
        struct intel_overlay *overlay =
                container_of(active, typeof(*overlay), last_flip);
@@ -1402,7 +1401,7 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv)
        overlay->saturation = 146;
 
        i915_active_init(&overlay->last_flip,
-                        NULL, intel_overlay_last_flip_retire);
+                        NULL, intel_overlay_last_flip_retire, 0);
 
        ret = get_registers(overlay, OVERLAY_NEEDS_PHYSICAL(dev_priv));
        if (ret)
index fd8ee52..188dee1 100644 (file)
@@ -1046,7 +1046,6 @@ struct context_barrier_task {
        void *data;
 };
 
-__i915_active_call
 static void cb_retire(struct i915_active *base)
 {
        struct context_barrier_task *cb = container_of(base, typeof(*cb), base);
@@ -1080,7 +1079,7 @@ static int context_barrier_task(struct i915_gem_context *ctx,
        if (!cb)
                return -ENOMEM;
 
-       i915_active_init(&cb->base, NULL, cb_retire);
+       i915_active_init(&cb->base, NULL, cb_retire, 0);
        err = i915_active_acquire(&cb->base);
        if (err) {
                kfree(cb);
index 45d60e3..548ddf3 100644 (file)
  */
 
 #include "gem/i915_gem_ioctls.h"
+#include "gem/i915_gem_lmem.h"
 #include "gem/i915_gem_region.h"
 
 #include "i915_drv.h"
+#include "i915_trace.h"
+#include "i915_user_extensions.h"
+
+static u32 object_max_page_size(struct drm_i915_gem_object *obj)
+{
+       u32 max_page_size = 0;
+       int i;
+
+       for (i = 0; i < obj->mm.n_placements; i++) {
+               struct intel_memory_region *mr = obj->mm.placements[i];
+
+               GEM_BUG_ON(!is_power_of_2(mr->min_page_size));
+               max_page_size = max_t(u32, max_page_size, mr->min_page_size);
+       }
+
+       GEM_BUG_ON(!max_page_size);
+       return max_page_size;
+}
+
+static void object_set_placements(struct drm_i915_gem_object *obj,
+                                 struct intel_memory_region **placements,
+                                 unsigned int n_placements)
+{
+       GEM_BUG_ON(!n_placements);
+
+       /*
+        * For the common case of one memory region, skip storing an
+        * allocated array and just point at the region directly.
+        */
+       if (n_placements == 1) {
+               struct intel_memory_region *mr = placements[0];
+               struct drm_i915_private *i915 = mr->i915;
+
+               obj->mm.placements = &i915->mm.regions[mr->id];
+               obj->mm.n_placements = 1;
+       } else {
+               obj->mm.placements = placements;
+               obj->mm.n_placements = n_placements;
+       }
+}
+
+static int i915_gem_publish(struct drm_i915_gem_object *obj,
+                           struct drm_file *file,
+                           u64 *size_p,
+                           u32 *handle_p)
+{
+       u64 size = obj->base.size;
+       int ret;
+
+       ret = drm_gem_handle_create(file, &obj->base, handle_p);
+       /* drop reference from allocate - handle holds it now */
+       i915_gem_object_put(obj);
+       if (ret)
+               return ret;
+
+       *size_p = size;
+       return 0;
+}
 
 static int
-i915_gem_create(struct drm_file *file,
-               struct intel_memory_region *mr,
-               u64 *size_p,
-               u32 *handle_p)
+i915_gem_setup(struct drm_i915_gem_object *obj, u64 size)
 {
-       struct drm_i915_gem_object *obj;
-       u32 handle;
-       u64 size;
+       struct intel_memory_region *mr = obj->mm.placements[0];
+       unsigned int flags;
        int ret;
 
-       GEM_BUG_ON(!is_power_of_2(mr->min_page_size));
-       size = round_up(*size_p, mr->min_page_size);
+       size = round_up(size, object_max_page_size(obj));
        if (size == 0)
                return -EINVAL;
 
        /* For most of the ABI (e.g. mmap) we think in system pages */
        GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
 
-       /* Allocate the new object */
-       obj = i915_gem_object_create_region(mr, size, 0);
-       if (IS_ERR(obj))
-               return PTR_ERR(obj);
+       if (i915_gem_object_size_2big(size))
+               return -E2BIG;
 
-       GEM_BUG_ON(size != obj->base.size);
+       /*
+        * For now resort to CPU based clearing for device local-memory, in the
+        * near future this will use the blitter engine for accelerated, GPU
+        * based clearing.
+        */
+       flags = 0;
+       if (mr->type == INTEL_MEMORY_LOCAL)
+               flags = I915_BO_ALLOC_CPU_CLEAR;
 
-       ret = drm_gem_handle_create(file, &obj->base, &handle);
-       /* drop reference from allocate - handle holds it now */
-       i915_gem_object_put(obj);
+       ret = mr->ops->init_object(mr, obj, size, flags);
        if (ret)
                return ret;
 
-       *handle_p = handle;
-       *size_p = size;
+       GEM_BUG_ON(size != obj->base.size);
+
+       trace_i915_gem_object_create(obj);
        return 0;
 }
 
@@ -50,9 +108,12 @@ i915_gem_dumb_create(struct drm_file *file,
                     struct drm_device *dev,
                     struct drm_mode_create_dumb *args)
 {
+       struct drm_i915_gem_object *obj;
+       struct intel_memory_region *mr;
        enum intel_memory_type mem_type;
        int cpp = DIV_ROUND_UP(args->bpp, 8);
        u32 format;
+       int ret;
 
        switch (cpp) {
        case 1:
@@ -85,10 +146,22 @@ i915_gem_dumb_create(struct drm_file *file,
        if (HAS_LMEM(to_i915(dev)))
                mem_type = INTEL_MEMORY_LOCAL;
 
-       return i915_gem_create(file,
-                              intel_memory_region_by_type(to_i915(dev),
-                                                          mem_type),
-                              &args->size, &args->handle);
+       obj = i915_gem_object_alloc();
+       if (!obj)
+               return -ENOMEM;
+
+       mr = intel_memory_region_by_type(to_i915(dev), mem_type);
+       object_set_placements(obj, &mr, 1);
+
+       ret = i915_gem_setup(obj, args->size);
+       if (ret)
+               goto object_free;
+
+       return i915_gem_publish(obj, file, &args->size, &args->handle);
+
+object_free:
+       i915_gem_object_free(obj);
+       return ret;
 }
 
 /**
@@ -103,11 +176,229 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
 {
        struct drm_i915_private *i915 = to_i915(dev);
        struct drm_i915_gem_create *args = data;
+       struct drm_i915_gem_object *obj;
+       struct intel_memory_region *mr;
+       int ret;
 
        i915_gem_flush_free_objects(i915);
 
-       return i915_gem_create(file,
-                              intel_memory_region_by_type(i915,
-                                                          INTEL_MEMORY_SYSTEM),
-                              &args->size, &args->handle);
+       obj = i915_gem_object_alloc();
+       if (!obj)
+               return -ENOMEM;
+
+       mr = intel_memory_region_by_type(i915, INTEL_MEMORY_SYSTEM);
+       object_set_placements(obj, &mr, 1);
+
+       ret = i915_gem_setup(obj, args->size);
+       if (ret)
+               goto object_free;
+
+       return i915_gem_publish(obj, file, &args->size, &args->handle);
+
+object_free:
+       i915_gem_object_free(obj);
+       return ret;
+}
+
+struct create_ext {
+       struct drm_i915_private *i915;
+       struct drm_i915_gem_object *vanilla_object;
+};
+
+static void repr_placements(char *buf, size_t size,
+                           struct intel_memory_region **placements,
+                           int n_placements)
+{
+       int i;
+
+       buf[0] = '\0';
+
+       for (i = 0; i < n_placements; i++) {
+               struct intel_memory_region *mr = placements[i];
+               int r;
+
+               r = snprintf(buf, size, "\n  %s -> { class: %d, inst: %d }",
+                            mr->name, mr->type, mr->instance);
+               if (r >= size)
+                       return;
+
+               buf += r;
+               size -= r;
+       }
+}
+
+static int set_placements(struct drm_i915_gem_create_ext_memory_regions *args,
+                         struct create_ext *ext_data)
+{
+       struct drm_i915_private *i915 = ext_data->i915;
+       struct drm_i915_gem_memory_class_instance __user *uregions =
+               u64_to_user_ptr(args->regions);
+       struct drm_i915_gem_object *obj = ext_data->vanilla_object;
+       struct intel_memory_region **placements;
+       u32 mask;
+       int i, ret = 0;
+
+       if (args->pad) {
+               drm_dbg(&i915->drm, "pad should be zero\n");
+               ret = -EINVAL;
+       }
+
+       if (!args->num_regions) {
+               drm_dbg(&i915->drm, "num_regions is zero\n");
+               ret = -EINVAL;
+       }
+
+       if (args->num_regions > ARRAY_SIZE(i915->mm.regions)) {
+               drm_dbg(&i915->drm, "num_regions is too large\n");
+               ret = -EINVAL;
+       }
+
+       if (ret)
+               return ret;
+
+       placements = kmalloc_array(args->num_regions,
+                                  sizeof(struct intel_memory_region *),
+                                  GFP_KERNEL);
+       if (!placements)
+               return -ENOMEM;
+
+       mask = 0;
+       for (i = 0; i < args->num_regions; i++) {
+               struct drm_i915_gem_memory_class_instance region;
+               struct intel_memory_region *mr;
+
+               if (copy_from_user(&region, uregions, sizeof(region))) {
+                       ret = -EFAULT;
+                       goto out_free;
+               }
+
+               mr = intel_memory_region_lookup(i915,
+                                               region.memory_class,
+                                               region.memory_instance);
+               if (!mr || mr->private) {
+                       drm_dbg(&i915->drm, "Device is missing region { class: %d, inst: %d } at index = %d\n",
+                               region.memory_class, region.memory_instance, i);
+                       ret = -EINVAL;
+                       goto out_dump;
+               }
+
+               if (mask & BIT(mr->id)) {
+                       drm_dbg(&i915->drm, "Found duplicate placement %s -> { class: %d, inst: %d } at index = %d\n",
+                               mr->name, region.memory_class,
+                               region.memory_instance, i);
+                       ret = -EINVAL;
+                       goto out_dump;
+               }
+
+               placements[i] = mr;
+               mask |= BIT(mr->id);
+
+               ++uregions;
+       }
+
+       if (obj->mm.placements) {
+               ret = -EINVAL;
+               goto out_dump;
+       }
+
+       object_set_placements(obj, placements, args->num_regions);
+       if (args->num_regions == 1)
+               kfree(placements);
+
+       return 0;
+
+out_dump:
+       if (1) {
+               char buf[256];
+
+               if (obj->mm.placements) {
+                       repr_placements(buf,
+                                       sizeof(buf),
+                                       obj->mm.placements,
+                                       obj->mm.n_placements);
+                       drm_dbg(&i915->drm,
+                               "Placements were already set in previous EXT. Existing placements: %s\n",
+                               buf);
+               }
+
+               repr_placements(buf, sizeof(buf), placements, i);
+               drm_dbg(&i915->drm, "New placements(so far validated): %s\n", buf);
+       }
+
+out_free:
+       kfree(placements);
+       return ret;
+}
+
+static int ext_set_placements(struct i915_user_extension __user *base,
+                             void *data)
+{
+       struct drm_i915_gem_create_ext_memory_regions ext;
+
+       if (!IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM))
+               return -ENODEV;
+
+       if (copy_from_user(&ext, base, sizeof(ext)))
+               return -EFAULT;
+
+       return set_placements(&ext, data);
+}
+
+static const i915_user_extension_fn create_extensions[] = {
+       [I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements,
+};
+
+/**
+ * Creates a new mm object and returns a handle to it.
+ * @dev: drm device pointer
+ * @data: ioctl data blob
+ * @file: drm file pointer
+ */
+int
+i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
+                         struct drm_file *file)
+{
+       struct drm_i915_private *i915 = to_i915(dev);
+       struct drm_i915_gem_create_ext *args = data;
+       struct create_ext ext_data = { .i915 = i915 };
+       struct intel_memory_region **placements_ext;
+       struct drm_i915_gem_object *obj;
+       int ret;
+
+       if (args->flags)
+               return -EINVAL;
+
+       i915_gem_flush_free_objects(i915);
+
+       obj = i915_gem_object_alloc();
+       if (!obj)
+               return -ENOMEM;
+
+       ext_data.vanilla_object = obj;
+       ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
+                                  create_extensions,
+                                  ARRAY_SIZE(create_extensions),
+                                  &ext_data);
+       placements_ext = obj->mm.placements;
+       if (ret)
+               goto object_free;
+
+       if (!placements_ext) {
+               struct intel_memory_region *mr =
+                       intel_memory_region_by_type(i915, INTEL_MEMORY_SYSTEM);
+
+               object_set_placements(obj, &mr, 1);
+       }
+
+       ret = i915_gem_setup(obj, args->size);
+       if (ret)
+               goto object_free;
+
+       return i915_gem_publish(obj, file, &args->size, &args->handle);
+
+object_free:
+       if (obj->mm.n_placements > 1)
+               kfree(placements_ext);
+       i915_gem_object_free(obj);
+       return ret;
 }
index 7fd22f3..28d6526 100644 (file)
@@ -14,6 +14,8 @@ int i915_gem_busy_ioctl(struct drm_device *dev, void *data,
                        struct drm_file *file);
 int i915_gem_create_ioctl(struct drm_device *dev, void *data,
                          struct drm_file *file);
+int i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
+                             struct drm_file *file);
 int i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
                               struct drm_file *file);
 int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
index ce1c83c..f44bdd0 100644 (file)
@@ -17,9 +17,27 @@ const struct drm_i915_gem_object_ops i915_gem_lmem_obj_ops = {
        .release = i915_gem_object_release_memory_region,
 };
 
+void __iomem *
+i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj,
+                           unsigned long n,
+                           unsigned long size)
+{
+       resource_size_t offset;
+
+       GEM_BUG_ON(!i915_gem_object_is_contiguous(obj));
+
+       offset = i915_gem_object_get_dma_address(obj, n);
+       offset -= obj->mm.region->region.start;
+
+       return io_mapping_map_wc(&obj->mm.region->iomap, offset, size);
+}
+
 bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj)
 {
-       return obj->ops == &i915_gem_lmem_obj_ops;
+       struct intel_memory_region *mr = obj->mm.region;
+
+       return mr && (mr->type == INTEL_MEMORY_LOCAL ||
+                     mr->type == INTEL_MEMORY_STOLEN_LOCAL);
 }
 
 struct drm_i915_gem_object *
index 036d53c..fac6bc5 100644 (file)
@@ -14,6 +14,11 @@ struct intel_memory_region;
 
 extern const struct drm_i915_gem_object_ops i915_gem_lmem_obj_ops;
 
+void __iomem *
+i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj,
+                           unsigned long n,
+                           unsigned long size);
+
 bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj);
 
 struct drm_i915_gem_object *
index ea74cbc..2814441 100644 (file)
@@ -249,6 +249,9 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
                if (obj->ops->release)
                        obj->ops->release(obj);
 
+               if (obj->mm.n_placements > 1)
+                       kfree(obj->mm.placements);
+
                /* But keep the pointer alive for RCU-protected lookups */
                call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
                cond_resched();
index 8e485cb..0727d0c 100644 (file)
@@ -172,11 +172,13 @@ struct drm_i915_gem_object {
 #define I915_BO_ALLOC_CONTIGUOUS BIT(0)
 #define I915_BO_ALLOC_VOLATILE   BIT(1)
 #define I915_BO_ALLOC_STRUCT_PAGE BIT(2)
+#define I915_BO_ALLOC_CPU_CLEAR  BIT(3)
 #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \
                             I915_BO_ALLOC_VOLATILE | \
-                            I915_BO_ALLOC_STRUCT_PAGE)
-#define I915_BO_READONLY         BIT(3)
-#define I915_TILING_QUIRK_BIT    4 /* unknown swizzling; do not release! */
+                            I915_BO_ALLOC_STRUCT_PAGE | \
+                            I915_BO_ALLOC_CPU_CLEAR)
+#define I915_BO_READONLY         BIT(4)
+#define I915_TILING_QUIRK_BIT    5 /* unknown swizzling; do not release! */
 
        /*
         * Is the object to be mapped as read-only to the GPU
@@ -219,6 +221,12 @@ struct drm_i915_gem_object {
                atomic_t pages_pin_count;
                atomic_t shrink_pin;
 
+               /**
+                * Priority list of potential placements for this object.
+                */
+               struct intel_memory_region **placements;
+               int n_placements;
+
                /**
                 * Memory region for this object.
                 */
index 6a84fb6..ce8fcfc 100644 (file)
@@ -95,6 +95,28 @@ i915_gem_object_get_pages_buddy(struct drm_i915_gem_object *obj)
        sg_mark_end(sg);
        i915_sg_trim(st);
 
+       /* Intended for kernel internal use only */
+       if (obj->flags & I915_BO_ALLOC_CPU_CLEAR) {
+               struct scatterlist *sg;
+               unsigned long i;
+
+               for_each_sg(st->sgl, sg, st->nents, i) {
+                       unsigned int length;
+                       void __iomem *vaddr;
+                       dma_addr_t daddr;
+
+                       daddr = sg_dma_address(sg);
+                       daddr -= mem->region.start;
+                       length = sg_dma_len(sg);
+
+                       vaddr = io_mapping_map_wc(&mem->iomap, daddr, length);
+                       memset64((void __force *)vaddr, 0, length / sizeof(u64));
+                       io_mapping_unmap(vaddr);
+               }
+
+               wmb();
+       }
+
        __i915_gem_object_set_pages(obj, st, sg_page_sizes);
 
        return 0;
index 4f9c8d3..f4fb68e 100644 (file)
@@ -38,15 +38,17 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
 }
 
 static bool unsafe_drop_pages(struct drm_i915_gem_object *obj,
-                             unsigned long shrink)
+                             unsigned long shrink, bool trylock_vm)
 {
        unsigned long flags;
 
        flags = 0;
        if (shrink & I915_SHRINK_ACTIVE)
-               flags = I915_GEM_OBJECT_UNBIND_ACTIVE;
+               flags |= I915_GEM_OBJECT_UNBIND_ACTIVE;
        if (!(shrink & I915_SHRINK_BOUND))
-               flags = I915_GEM_OBJECT_UNBIND_TEST;
+               flags |= I915_GEM_OBJECT_UNBIND_TEST;
+       if (trylock_vm)
+               flags |= I915_GEM_OBJECT_UNBIND_VM_TRYLOCK;
 
        if (i915_gem_object_unbind(obj, flags) == 0)
                return true;
@@ -117,6 +119,9 @@ i915_gem_shrink(struct i915_gem_ww_ctx *ww,
        unsigned long scanned = 0;
        int err;
 
+       /* CHV + VTD workaround use stop_machine(); need to trylock vm->mutex */
+       bool trylock_vm = !ww && intel_vm_no_concurrent_access_wa(i915);
+
        trace_i915_gem_shrink(i915, target, shrink);
 
        /*
@@ -204,7 +209,7 @@ i915_gem_shrink(struct i915_gem_ww_ctx *ww,
                        spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 
                        err = 0;
-                       if (unsafe_drop_pages(obj, shrink)) {
+                       if (unsafe_drop_pages(obj, shrink, trylock_vm)) {
                                /* May arrive from get_pages on another bo */
                                if (!ww) {
                                        if (!i915_gem_object_trylock(obj))
index b0597de..b5553fc 100644 (file)
@@ -10,6 +10,7 @@
 #include <drm/drm_mm.h>
 #include <drm/i915_drm.h>
 
+#include "gem/i915_gem_lmem.h"
 #include "gem/i915_gem_region.h"
 #include "i915_drv.h"
 #include "i915_gem_stolen.h"
@@ -121,6 +122,14 @@ static int i915_adjust_stolen(struct drm_i915_private *i915,
                }
        }
 
+       /*
+        * With stolen lmem, we don't need to check if the address range
+        * overlaps with the non-stolen system memory range, since lmem is local
+        * to the gpu.
+        */
+       if (HAS_LMEM(i915))
+               return 0;
+
        /*
         * Verify that nothing else uses this physical address. Stolen
         * memory should be reserved by the BIOS and hidden from the
@@ -374,8 +383,9 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915,
        }
 }
 
-static int i915_gem_init_stolen(struct drm_i915_private *i915)
+static int i915_gem_init_stolen(struct intel_memory_region *mem)
 {
+       struct drm_i915_private *i915 = mem->i915;
        struct intel_uncore *uncore = &i915->uncore;
        resource_size_t reserved_base, stolen_top;
        resource_size_t reserved_total, reserved_size;
@@ -396,10 +406,10 @@ static int i915_gem_init_stolen(struct drm_i915_private *i915)
                return 0;
        }
 
-       if (resource_size(&intel_graphics_stolen_res) == 0)
+       if (resource_size(&mem->region) == 0)
                return 0;
 
-       i915->dsm = intel_graphics_stolen_res;
+       i915->dsm = mem->region;
 
        if (i915_adjust_stolen(i915, &i915->dsm))
                return 0;
@@ -627,10 +637,17 @@ static int __i915_gem_object_create_stolen(struct intel_memory_region *mem,
 {
        static struct lock_class_key lock_class;
        unsigned int cache_level;
+       unsigned int flags;
        int err;
 
+       /*
+        * Stolen objects are always physically contiguous since we just
+        * allocate one big block underneath using the drm_mm range allocator.
+        */
+       flags = I915_BO_ALLOC_CONTIGUOUS;
+
        drm_gem_private_object_init(&mem->i915->drm, &obj->base, stolen->size);
-       i915_gem_object_init(obj, &i915_gem_object_stolen_ops, &lock_class, 0);
+       i915_gem_object_init(obj, &i915_gem_object_stolen_ops, &lock_class, flags);
 
        obj->stolen = stolen;
        obj->read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT;
@@ -640,9 +657,11 @@ static int __i915_gem_object_create_stolen(struct intel_memory_region *mem,
        if (WARN_ON(!i915_gem_object_trylock(obj)))
                return -EBUSY;
 
+       i915_gem_object_init_memory_region(obj, mem);
+
        err = i915_gem_object_pin_pages(obj);
-       if (!err)
-               i915_gem_object_init_memory_region(obj, mem);
+       if (err)
+               i915_gem_object_release_memory_region(obj);
        i915_gem_object_unlock(obj);
 
        return err;
@@ -667,7 +686,8 @@ static int _i915_gem_object_stolen_init(struct intel_memory_region *mem,
        if (!stolen)
                return -ENOMEM;
 
-       ret = i915_gem_stolen_insert_node(i915, stolen, size, 4096);
+       ret = i915_gem_stolen_insert_node(i915, stolen, size,
+                                         mem->min_page_size);
        if (ret)
                goto err_free;
 
@@ -688,39 +708,126 @@ struct drm_i915_gem_object *
 i915_gem_object_create_stolen(struct drm_i915_private *i915,
                              resource_size_t size)
 {
-       return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_STOLEN_SMEM],
-                                            size, I915_BO_ALLOC_CONTIGUOUS);
+       return i915_gem_object_create_region(i915->mm.stolen_region, size, 0);
 }
 
-static int init_stolen(struct intel_memory_region *mem)
+static int init_stolen_smem(struct intel_memory_region *mem)
 {
-       intel_memory_region_set_name(mem, "stolen");
-
        /*
         * Initialise stolen early so that we may reserve preallocated
         * objects for the BIOS to KMS transition.
         */
-       return i915_gem_init_stolen(mem->i915);
+       return i915_gem_init_stolen(mem);
 }
 
-static void release_stolen(struct intel_memory_region *mem)
+static void release_stolen_smem(struct intel_memory_region *mem)
 {
        i915_gem_cleanup_stolen(mem->i915);
 }
 
-static const struct intel_memory_region_ops i915_region_stolen_ops = {
-       .init = init_stolen,
-       .release = release_stolen,
+static const struct intel_memory_region_ops i915_region_stolen_smem_ops = {
+       .init = init_stolen_smem,
+       .release = release_stolen_smem,
        .init_object = _i915_gem_object_stolen_init,
 };
 
-struct intel_memory_region *i915_gem_stolen_setup(struct drm_i915_private *i915)
+static int init_stolen_lmem(struct intel_memory_region *mem)
 {
-       return intel_memory_region_create(i915,
-                                         intel_graphics_stolen_res.start,
-                                         resource_size(&intel_graphics_stolen_res),
-                                         PAGE_SIZE, 0,
-                                         &i915_region_stolen_ops);
+       int err;
+
+       if (GEM_WARN_ON(resource_size(&mem->region) == 0))
+               return -ENODEV;
+
+       if (!io_mapping_init_wc(&mem->iomap,
+                               mem->io_start,
+                               resource_size(&mem->region)))
+               return -EIO;
+
+       /*
+        * TODO: For stolen lmem we mostly just care about populating the dsm
+        * related bits and setting up the drm_mm allocator for the range.
+        * Perhaps split up i915_gem_init_stolen() for this.
+        */
+       err = i915_gem_init_stolen(mem);
+       if (err)
+               goto err_fini;
+
+       return 0;
+
+err_fini:
+       io_mapping_fini(&mem->iomap);
+       return err;
+}
+
+static void release_stolen_lmem(struct intel_memory_region *mem)
+{
+       io_mapping_fini(&mem->iomap);
+       i915_gem_cleanup_stolen(mem->i915);
+}
+
+static const struct intel_memory_region_ops i915_region_stolen_lmem_ops = {
+       .init = init_stolen_lmem,
+       .release = release_stolen_lmem,
+       .init_object = _i915_gem_object_stolen_init,
+};
+
+struct intel_memory_region *
+i915_gem_stolen_lmem_setup(struct drm_i915_private *i915)
+{
+       struct intel_uncore *uncore = &i915->uncore;
+       struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
+       struct intel_memory_region *mem;
+       resource_size_t io_start;
+       resource_size_t lmem_size;
+       u64 lmem_base;
+
+       lmem_base = intel_uncore_read64(uncore, GEN12_DSMBASE);
+       if (GEM_WARN_ON(lmem_base >= pci_resource_len(pdev, 2)))
+               return ERR_PTR(-ENODEV);
+
+       lmem_size = pci_resource_len(pdev, 2) - lmem_base;
+       io_start = pci_resource_start(pdev, 2) + lmem_base;
+
+       mem = intel_memory_region_create(i915, lmem_base, lmem_size,
+                                        I915_GTT_PAGE_SIZE_4K, io_start,
+                                        &i915_region_stolen_lmem_ops);
+       if (IS_ERR(mem))
+               return mem;
+
+       /*
+        * TODO: consider creating common helper to just print all the
+        * interesting stuff from intel_memory_region, which we can use for all
+        * our probed regions.
+        */
+
+       drm_dbg(&i915->drm, "Stolen Local memory IO start: %pa\n",
+               &mem->io_start);
+
+       intel_memory_region_set_name(mem, "stolen-local");
+
+       mem->private = true;
+
+       return mem;
+}
+
+struct intel_memory_region*
+i915_gem_stolen_smem_setup(struct drm_i915_private *i915)
+{
+       struct intel_memory_region *mem;
+
+       mem = intel_memory_region_create(i915,
+                                        intel_graphics_stolen_res.start,
+                                        resource_size(&intel_graphics_stolen_res),
+                                        PAGE_SIZE, 0,
+                                        &i915_region_stolen_smem_ops);
+       if (IS_ERR(mem))
+               return mem;
+
+       intel_memory_region_set_name(mem, "stolen-system");
+
+       mem->private = true;
+
+       return mem;
 }
 
 struct drm_i915_gem_object *
@@ -728,7 +835,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *i915,
                                               resource_size_t stolen_offset,
                                               resource_size_t size)
 {
-       struct intel_memory_region *mem = i915->mm.regions[INTEL_REGION_STOLEN_SMEM];
+       struct intel_memory_region *mem = i915->mm.stolen_region;
        struct drm_i915_gem_object *obj;
        struct drm_mm_node *stolen;
        int ret;
@@ -742,8 +849,8 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *i915,
 
        /* KISS and expect everything to be page-aligned */
        if (GEM_WARN_ON(size == 0) ||
-           GEM_WARN_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)) ||
-           GEM_WARN_ON(!IS_ALIGNED(stolen_offset, I915_GTT_MIN_ALIGNMENT)))
+           GEM_WARN_ON(!IS_ALIGNED(size, mem->min_page_size)) ||
+           GEM_WARN_ON(!IS_ALIGNED(stolen_offset, mem->min_page_size)))
                return ERR_PTR(-EINVAL);
 
        stolen = kzalloc(sizeof(*stolen), GFP_KERNEL);
index b034897..2bec6c3 100644 (file)
@@ -21,7 +21,8 @@ int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv,
                                         u64 end);
 void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv,
                                 struct drm_mm_node *node);
-struct intel_memory_region *i915_gem_stolen_setup(struct drm_i915_private *i915);
+struct intel_memory_region *i915_gem_stolen_smem_setup(struct drm_i915_private *i915);
+struct intel_memory_region *i915_gem_stolen_lmem_setup(struct drm_i915_private *i915);
 struct drm_i915_gem_object *
 i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
                              resource_size_t size);
index 5fef592..ce70d0a 100644 (file)
@@ -1740,7 +1740,6 @@ out:
 static int check_scratch_page(struct i915_gem_context *ctx, u32 *out)
 {
        struct i915_address_space *vm;
-       struct page *page;
        u32 *vaddr;
        int err = 0;
 
@@ -1748,24 +1747,18 @@ static int check_scratch_page(struct i915_gem_context *ctx, u32 *out)
        if (!vm)
                return -ENODEV;
 
-       page = __px_page(vm->scratch[0]);
-       if (!page) {
+       if (!vm->scratch[0]) {
                pr_err("No scratch page!\n");
                return -EINVAL;
        }
 
-       vaddr = kmap(page);
-       if (!vaddr) {
-               pr_err("No (mappable) scratch page!\n");
-               return -EINVAL;
-       }
+       vaddr = __px_vaddr(vm->scratch[0]);
 
        memcpy(out, vaddr, sizeof(*out));
        if (memchr_inv(vaddr, *out, PAGE_SIZE)) {
                pr_err("Inconsistent initial state of scratch page!\n");
                err = -EINVAL;
        }
-       kunmap(page);
 
        return err;
 }
index 5cf6df4..05a3b29 100644 (file)
@@ -842,6 +842,24 @@ static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type)
        return true;
 }
 
+static void object_set_placements(struct drm_i915_gem_object *obj,
+                                 struct intel_memory_region **placements,
+                                 unsigned int n_placements)
+{
+       GEM_BUG_ON(!n_placements);
+
+       if (n_placements == 1) {
+               struct drm_i915_private *i915 = to_i915(obj->base.dev);
+               struct intel_memory_region *mr = placements[0];
+
+               obj->mm.placements = &i915->mm.regions[mr->id];
+               obj->mm.n_placements = 1;
+       } else {
+               obj->mm.placements = placements;
+               obj->mm.n_placements = n_placements;
+       }
+}
+
 #define expand32(x) (((x) << 0) | ((x) << 8) | ((x) << 16) | ((x) << 24))
 static int __igt_mmap(struct drm_i915_private *i915,
                      struct drm_i915_gem_object *obj,
@@ -950,6 +968,8 @@ static int igt_mmap(void *arg)
                        if (IS_ERR(obj))
                                return PTR_ERR(obj);
 
+                       object_set_placements(obj, &mr, 1);
+
                        err = __igt_mmap(i915, obj, I915_MMAP_TYPE_GTT);
                        if (err == 0)
                                err = __igt_mmap(i915, obj, I915_MMAP_TYPE_WC);
@@ -1068,6 +1088,8 @@ static int igt_mmap_access(void *arg)
                if (IS_ERR(obj))
                        return PTR_ERR(obj);
 
+               object_set_placements(obj, &mr, 1);
+
                err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_GTT);
                if (err == 0)
                        err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_WB);
@@ -1211,6 +1233,8 @@ static int igt_mmap_gpu(void *arg)
                if (IS_ERR(obj))
                        return PTR_ERR(obj);
 
+               object_set_placements(obj, &mr, 1);
+
                err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_GTT);
                if (err == 0)
                        err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_WC);
@@ -1354,6 +1378,8 @@ static int igt_mmap_revoke(void *arg)
                if (IS_ERR(obj))
                        return PTR_ERR(obj);
 
+               object_set_placements(obj, &mr, 1);
+
                err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_GTT);
                if (err == 0)
                        err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_WC);
index e08dff3..1aee5e6 100644 (file)
@@ -96,9 +96,8 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
                 * entries back to scratch.
                 */
 
-               vaddr = kmap_atomic_px(pt);
+               vaddr = px_vaddr(pt);
                memset32(vaddr + pte, scratch_pte, count);
-               kunmap_atomic(vaddr);
 
                pte = 0;
        }
@@ -120,7 +119,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 
        GEM_BUG_ON(!pd->entry[act_pt]);
 
-       vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt));
+       vaddr = px_vaddr(i915_pt_entry(pd, act_pt));
        do {
                GEM_BUG_ON(sg_dma_len(iter.sg) < I915_GTT_PAGE_SIZE);
                vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
@@ -136,12 +135,10 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
                }
 
                if (++act_pte == GEN6_PTES) {
-                       kunmap_atomic(vaddr);
-                       vaddr = kmap_atomic_px(i915_pt_entry(pd, ++act_pt));
+                       vaddr = px_vaddr(i915_pt_entry(pd, ++act_pt));
                        act_pte = 0;
                }
        } while (1);
-       kunmap_atomic(vaddr);
 
        vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
 }
@@ -235,7 +232,7 @@ static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
                goto err_scratch0;
        }
 
-       ret = pin_pt_dma(vm, vm->scratch[1]);
+       ret = map_pt_dma(vm, vm->scratch[1]);
        if (ret)
                goto err_scratch1;
 
@@ -346,7 +343,7 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
        if (!vma)
                return ERR_PTR(-ENOMEM);
 
-       i915_active_init(&vma->active, NULL, NULL);
+       i915_active_init(&vma->active, NULL, NULL, 0);
 
        kref_init(&vma->ref);
        mutex_init(&vma->pages_mutex);
index 74bf6fc..e3a8924 100644 (file)
@@ -242,11 +242,10 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
                            atomic_read(&pt->used));
                        GEM_BUG_ON(!count || count >= atomic_read(&pt->used));
 
-                       vaddr = kmap_atomic_px(pt);
+                       vaddr = px_vaddr(pt);
                        memset64(vaddr + gen8_pd_index(start, 0),
                                 vm->scratch[0]->encode,
                                 count);
-                       kunmap_atomic(vaddr);
 
                        atomic_sub(count, &pt->used);
                        start += count;
@@ -375,7 +374,7 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
        gen8_pte_t *vaddr;
 
        pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
-       vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
+       vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
        do {
                GEM_BUG_ON(sg_dma_len(iter->sg) < I915_GTT_PAGE_SIZE);
                vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;
@@ -402,12 +401,10 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
                        }
 
                        clflush_cache_range(vaddr, PAGE_SIZE);
-                       kunmap_atomic(vaddr);
-                       vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
+                       vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
                }
        } while (1);
        clflush_cache_range(vaddr, PAGE_SIZE);
-       kunmap_atomic(vaddr);
 
        return idx;
 }
@@ -442,7 +439,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
                        encode |= GEN8_PDE_PS_2M;
                        page_size = I915_GTT_PAGE_SIZE_2M;
 
-                       vaddr = kmap_atomic_px(pd);
+                       vaddr = px_vaddr(pd);
                } else {
                        struct i915_page_table *pt =
                                i915_pt_entry(pd, __gen8_pte_index(start, 1));
@@ -457,7 +454,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
                             rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))
                                maybe_64K = __gen8_pte_index(start, 1);
 
-                       vaddr = kmap_atomic_px(pt);
+                       vaddr = px_vaddr(pt);
                }
 
                do {
@@ -491,7 +488,6 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
                } while (rem >= page_size && index < I915_PDES);
 
                clflush_cache_range(vaddr, PAGE_SIZE);
-               kunmap_atomic(vaddr);
 
                /*
                 * Is it safe to mark the 2M block as 64K? -- Either we have
@@ -505,9 +501,8 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
                      !iter->sg && IS_ALIGNED(vma->node.start +
                                              vma->node.size,
                                              I915_GTT_PAGE_SIZE_2M)))) {
-                       vaddr = kmap_atomic_px(pd);
+                       vaddr = px_vaddr(pd);
                        vaddr[maybe_64K] |= GEN8_PDE_IPS_64K;
-                       kunmap_atomic(vaddr);
                        page_size = I915_GTT_PAGE_SIZE_64K;
 
                        /*
@@ -523,12 +518,11 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
                                u16 i;
 
                                encode = vma->vm->scratch[0]->encode;
-                               vaddr = kmap_atomic_px(i915_pt_entry(pd, maybe_64K));
+                               vaddr = px_vaddr(i915_pt_entry(pd, maybe_64K));
 
                                for (i = 1; i < index; i += 16)
                                        memset64(vaddr + i, encode, 15);
 
-                               kunmap_atomic(vaddr);
                        }
                }
 
@@ -602,7 +596,7 @@ static int gen8_init_scratch(struct i915_address_space *vm)
                if (IS_ERR(obj))
                        goto free_scratch;
 
-               ret = pin_pt_dma(vm, obj);
+               ret = map_pt_dma(vm, obj);
                if (ret) {
                        i915_gem_object_put(obj);
                        goto free_scratch;
@@ -639,7 +633,7 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
                if (IS_ERR(pde))
                        return PTR_ERR(pde);
 
-               err = pin_pt_dma(vm, pde->pt.base);
+               err = map_pt_dma(vm, pde->pt.base);
                if (err) {
                        free_pd(vm, pde);
                        return err;
@@ -674,7 +668,7 @@ gen8_alloc_top_pd(struct i915_address_space *vm)
                goto err_pd;
        }
 
-       err = pin_pt_dma(vm, pd->pt.base);
+       err = map_pt_dma(vm, pd->pt.base);
        if (err)
                goto err_pd;
 
@@ -717,7 +711,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
         */
        ppgtt->vm.has_read_only = !IS_GEN_RANGE(gt->i915, 11, 12);
 
-       ppgtt->vm.alloc_pt_dma = alloc_pt_dma;
+       if (HAS_LMEM(gt->i915))
+               ppgtt->vm.alloc_pt_dma = alloc_pt_lmem;
+       else
+               ppgtt->vm.alloc_pt_dma = alloc_pt_dma;
 
        err = gen8_init_scratch(&ppgtt->vm);
        if (err)
index 17cf264..4033184 100644 (file)
@@ -326,7 +326,6 @@ void intel_context_unpin(struct intel_context *ce)
        intel_context_put(ce);
 }
 
-__i915_active_call
 static void __intel_context_retire(struct i915_active *active)
 {
        struct intel_context *ce = container_of(active, typeof(*ce), active);
@@ -385,7 +384,7 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
        mutex_init(&ce->pin_mutex);
 
        i915_active_init(&ce->active,
-                        __intel_context_active, __intel_context_retire);
+                        __intel_context_active, __intel_context_retire, 0);
 }
 
 void intel_context_fini(struct intel_context *ce)
index 47ee857..8d91849 100644 (file)
@@ -13,8 +13,9 @@
 #include "i915_reg.h"
 #include "i915_request.h"
 #include "i915_selftest.h"
-#include "gt/intel_timeline.h"
 #include "intel_engine_types.h"
+#include "intel_gt_types.h"
+#include "intel_timeline.h"
 #include "intel_workarounds.h"
 
 struct drm_printer;
@@ -262,6 +263,11 @@ void intel_engine_init_active(struct intel_engine_cs *engine,
 #define ENGINE_MOCK    1
 #define ENGINE_VIRTUAL 2
 
+static inline bool intel_engine_uses_guc(const struct intel_engine_cs *engine)
+{
+       return engine->gt->submission_method >= INTEL_SUBMISSION_GUC;
+}
+
 static inline bool
 intel_engine_has_preempt_reset(const struct intel_engine_cs *engine)
 {
index 6dbdbde..3f9a811 100644 (file)
@@ -255,6 +255,11 @@ static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine)
        intel_engine_set_hwsp_writemask(engine, ~0u);
 }
 
+static void nop_irq_handler(struct intel_engine_cs *engine, u16 iir)
+{
+       GEM_DEBUG_WARN_ON(iir);
+}
+
 static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
 {
        const struct engine_info *info = &intel_engines[id];
@@ -292,6 +297,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
        engine->hw_id = info->hw_id;
        engine->guc_id = MAKE_GUC_ID(info->class, info->instance);
 
+       engine->irq_handler = nop_irq_handler;
+
        engine->class = info->class;
        engine->instance = info->instance;
        __sprint_engine_name(engine);
@@ -898,7 +905,7 @@ static int engine_init_common(struct intel_engine_cs *engine)
        return 0;
 
 err_context:
-       intel_context_put(ce);
+       destroy_pinned_context(ce);
        return ret;
 }
 
@@ -909,12 +916,16 @@ int intel_engines_init(struct intel_gt *gt)
        enum intel_engine_id id;
        int err;
 
-       if (intel_uc_uses_guc_submission(&gt->uc))
+       if (intel_uc_uses_guc_submission(&gt->uc)) {
+               gt->submission_method = INTEL_SUBMISSION_GUC;
                setup = intel_guc_submission_setup;
-       else if (HAS_EXECLISTS(gt->i915))
+       } else if (HAS_EXECLISTS(gt->i915)) {
+               gt->submission_method = INTEL_SUBMISSION_ELSP;
                setup = intel_execlists_submission_setup;
-       else
+       } else {
+               gt->submission_method = INTEL_SUBMISSION_RING;
                setup = intel_ring_submission_setup;
+       }
 
        for_each_engine(engine, gt, id) {
                err = engine_setup_common(engine);
@@ -1479,7 +1490,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
                drm_printf(m, "\tIPEHR: 0x%08x\n", ENGINE_READ(engine, IPEHR));
        }
 
-       if (intel_engine_in_guc_submission_mode(engine)) {
+       if (intel_engine_uses_guc(engine)) {
                /* nothing to print yet */
        } else if (HAS_EXECLISTS(dev_priv)) {
                struct i915_request * const *port, *rq;
index 7c9af86..47f4397 100644 (file)
@@ -23,7 +23,7 @@ static void dbg_poison_ce(struct intel_context *ce)
 
        if (ce->state) {
                struct drm_i915_gem_object *obj = ce->state->obj;
-               int type = i915_coherent_map_type(ce->engine->i915);
+               int type = i915_coherent_map_type(ce->engine->i915, obj, true);
                void *map;
 
                if (!i915_gem_object_trylock(obj))
index 883bafc..9ef349c 100644 (file)
@@ -402,6 +402,7 @@ struct intel_engine_cs {
        u32             irq_enable_mask; /* bitmask to enable ring interrupt */
        void            (*irq_enable)(struct intel_engine_cs *engine);
        void            (*irq_disable)(struct intel_engine_cs *engine);
+       void            (*irq_handler)(struct intel_engine_cs *engine, u16 iir);
 
        void            (*sanitize)(struct intel_engine_cs *engine);
        int             (*resume)(struct intel_engine_cs *engine);
@@ -481,10 +482,9 @@ struct intel_engine_cs {
 #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
 #define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
 #define I915_ENGINE_HAS_TIMESLICES   BIT(4)
-#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(5)
-#define I915_ENGINE_IS_VIRTUAL       BIT(6)
-#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(7)
-#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(8)
+#define I915_ENGINE_IS_VIRTUAL       BIT(5)
+#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
+#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
        unsigned int flags;
 
        /*
@@ -593,12 +593,6 @@ intel_engine_has_timeslices(const struct intel_engine_cs *engine)
        return engine->flags & I915_ENGINE_HAS_TIMESLICES;
 }
 
-static inline bool
-intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine)
-{
-       return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
-}
-
 static inline bool
 intel_engine_is_virtual(const struct intel_engine_cs *engine)
 {
index de12487..8db2004 100644 (file)
 #include "intel_engine_stats.h"
 #include "intel_execlists_submission.h"
 #include "intel_gt.h"
+#include "intel_gt_irq.h"
 #include "intel_gt_pm.h"
 #include "intel_gt_requests.h"
 #include "intel_lrc.h"
@@ -1768,7 +1769,6 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive)
         */
        GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) &&
                   !reset_in_progress(execlists));
-       GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine));
 
        /*
         * Note that csb_write, csb_status may be either in HWSP or mmio.
@@ -2385,6 +2385,45 @@ static void execlists_submission_tasklet(struct tasklet_struct *t)
        rcu_read_unlock();
 }
 
+static void execlists_irq_handler(struct intel_engine_cs *engine, u16 iir)
+{
+       bool tasklet = false;
+
+       if (unlikely(iir & GT_CS_MASTER_ERROR_INTERRUPT)) {
+               u32 eir;
+
+               /* Upper 16b are the enabling mask, rsvd for internal errors */
+               eir = ENGINE_READ(engine, RING_EIR) & GENMASK(15, 0);
+               ENGINE_TRACE(engine, "CS error: %x\n", eir);
+
+               /* Disable the error interrupt until after the reset */
+               if (likely(eir)) {
+                       ENGINE_WRITE(engine, RING_EMR, ~0u);
+                       ENGINE_WRITE(engine, RING_EIR, eir);
+                       WRITE_ONCE(engine->execlists.error_interrupt, eir);
+                       tasklet = true;
+               }
+       }
+
+       if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) {
+               WRITE_ONCE(engine->execlists.yield,
+                          ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI));
+               ENGINE_TRACE(engine, "semaphore yield: %08x\n",
+                            engine->execlists.yield);
+               if (del_timer(&engine->execlists.timer))
+                       tasklet = true;
+       }
+
+       if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
+               tasklet = true;
+
+       if (iir & GT_RENDER_USER_INTERRUPT)
+               intel_engine_signal_breadcrumbs(engine);
+
+       if (tasklet)
+               tasklet_hi_schedule(&engine->execlists.tasklet);
+}
+
 static void __execlists_kick(struct intel_engine_execlists *execlists)
 {
        /* Kick the tasklet for some interrupt coalescing and reset handling */
@@ -3076,29 +3115,6 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
        engine->submit_request = execlists_submit_request;
        engine->schedule = i915_schedule;
        engine->execlists.tasklet.callback = execlists_submission_tasklet;
-
-       engine->reset.prepare = execlists_reset_prepare;
-       engine->reset.rewind = execlists_reset_rewind;
-       engine->reset.cancel = execlists_reset_cancel;
-       engine->reset.finish = execlists_reset_finish;
-
-       engine->park = execlists_park;
-       engine->unpark = NULL;
-
-       engine->flags |= I915_ENGINE_SUPPORTS_STATS;
-       if (!intel_vgpu_active(engine->i915)) {
-               engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
-               if (can_preempt(engine)) {
-                       engine->flags |= I915_ENGINE_HAS_PREEMPTION;
-                       if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
-                               engine->flags |= I915_ENGINE_HAS_TIMESLICES;
-               }
-       }
-
-       if (intel_engine_has_preemption(engine))
-               engine->emit_bb_start = gen8_emit_bb_start;
-       else
-               engine->emit_bb_start = gen8_emit_bb_start_noarb;
 }
 
 static void execlists_shutdown(struct intel_engine_cs *engine)
@@ -3129,6 +3145,14 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
        engine->cops = &execlists_context_ops;
        engine->request_alloc = execlists_request_alloc;
 
+       engine->reset.prepare = execlists_reset_prepare;
+       engine->reset.rewind = execlists_reset_rewind;
+       engine->reset.cancel = execlists_reset_cancel;
+       engine->reset.finish = execlists_reset_finish;
+
+       engine->park = execlists_park;
+       engine->unpark = NULL;
+
        engine->emit_flush = gen8_emit_flush_xcs;
        engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
        engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs;
@@ -3149,6 +3173,22 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
                 * until a more refined solution exists.
                 */
        }
+       intel_engine_set_irq_handler(engine, execlists_irq_handler);
+
+       engine->flags |= I915_ENGINE_SUPPORTS_STATS;
+       if (!intel_vgpu_active(engine->i915)) {
+               engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
+               if (can_preempt(engine)) {
+                       engine->flags |= I915_ENGINE_HAS_PREEMPTION;
+                       if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+                               engine->flags |= I915_ENGINE_HAS_TIMESLICES;
+               }
+       }
+
+       if (intel_engine_has_preemption(engine))
+               engine->emit_bb_start = gen8_emit_bb_start;
+       else
+               engine->emit_bb_start = gen8_emit_bb_start_noarb;
 }
 
 static void logical_ring_default_irqs(struct intel_engine_cs *engine)
@@ -3884,13 +3924,6 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
        spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
-bool
-intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine)
-{
-       return engine->set_default_submission ==
-              execlists_set_default_submission;
-}
-
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftest_execlists.c"
 #endif
index fd61dae..4ca9b47 100644 (file)
@@ -43,7 +43,4 @@ int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
                                     const struct intel_engine_cs *master,
                                     const struct intel_engine_cs *sibling);
 
-bool
-intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine);
-
 #endif /* __INTEL_EXECLISTS_SUBMISSION_H__ */
index 38742bf..35069ca 100644 (file)
@@ -658,7 +658,7 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
                goto err_ppgtt;
 
        i915_gem_object_lock(ppgtt->vm.scratch[0], NULL);
-       err = i915_vm_pin_pt_stash(&ppgtt->vm, &stash);
+       err = i915_vm_map_pt_stash(&ppgtt->vm, &stash);
        i915_gem_object_unlock(ppgtt->vm.scratch[0]);
        if (err)
                goto err_stash;
@@ -907,9 +907,11 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 
        ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
 
-       /* Serialize GTT updates with aperture access on BXT if VT-d is on. */
-       if (intel_ggtt_update_needs_vtd_wa(i915) ||
-           IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) {
+       /*
+        * Serialize GTT updates with aperture access on BXT if VT-d is on,
+        * and always on CHV.
+        */
+       if (intel_vm_no_concurrent_access_wa(i915)) {
                ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
                ggtt->vm.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
                ggtt->vm.bind_async_flags =
index 8a32259..7bf84cd 100644 (file)
@@ -653,8 +653,8 @@ static void detect_bit_6_swizzle(struct i915_ggtt *ggtt)
                 * banks of memory are paired and unswizzled on the
                 * uneven portion, so leave that as unknown.
                 */
-               if (intel_uncore_read16(uncore, C0DRB3) ==
-                   intel_uncore_read16(uncore, C1DRB3)) {
+               if (intel_uncore_read16(uncore, C0DRB3_BW) ==
+                   intel_uncore_read16(uncore, C1DRB3_BW)) {
                        swizzle_x = I915_BIT_6_SWIZZLE_9_10;
                        swizzle_y = I915_BIT_6_SWIZZLE_9;
                }
@@ -867,7 +867,7 @@ void intel_ggtt_init_fences(struct i915_ggtt *ggtt)
        for (i = 0; i < num_fences; i++) {
                struct i915_fence_reg *fence = &ggtt->fence_regs[i];
 
-               i915_active_init(&fence->active, NULL, NULL);
+               i915_active_init(&fence->active, NULL, NULL, 0);
                fence->ggtt = ggtt;
                fence->id = i;
                list_add_tail(&fence->link, &ggtt->fence_list);
index c594681..aa0a59c 100644 (file)
@@ -98,7 +98,6 @@ static void pool_free_work(struct work_struct *wrk)
                                      round_jiffies_up_relative(HZ));
 }
 
-__i915_active_call
 static void pool_retire(struct i915_active *ref)
 {
        struct intel_gt_buffer_pool_node *node =
@@ -154,7 +153,7 @@ node_create(struct intel_gt_buffer_pool *pool, size_t sz,
        node->age = 0;
        node->pool = pool;
        node->pinned = false;
-       i915_active_init(&node->active, NULL, pool_retire);
+       i915_active_init(&node->active, NULL, pool_retire, 0);
 
        obj = i915_gem_object_create_internal(gt->i915, sz);
        if (IS_ERR(obj)) {
index 9fc6c91..d29126c 100644 (file)
@@ -20,48 +20,6 @@ static void guc_irq_handler(struct intel_guc *guc, u16 iir)
                intel_guc_to_host_event_handler(guc);
 }
 
-static void
-cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
-{
-       bool tasklet = false;
-
-       if (unlikely(iir & GT_CS_MASTER_ERROR_INTERRUPT)) {
-               u32 eir;
-
-               /* Upper 16b are the enabling mask, rsvd for internal errors */
-               eir = ENGINE_READ(engine, RING_EIR) & GENMASK(15, 0);
-               ENGINE_TRACE(engine, "CS error: %x\n", eir);
-
-               /* Disable the error interrupt until after the reset */
-               if (likely(eir)) {
-                       ENGINE_WRITE(engine, RING_EMR, ~0u);
-                       ENGINE_WRITE(engine, RING_EIR, eir);
-                       WRITE_ONCE(engine->execlists.error_interrupt, eir);
-                       tasklet = true;
-               }
-       }
-
-       if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) {
-               WRITE_ONCE(engine->execlists.yield,
-                          ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI));
-               ENGINE_TRACE(engine, "semaphore yield: %08x\n",
-                            engine->execlists.yield);
-               if (del_timer(&engine->execlists.timer))
-                       tasklet = true;
-       }
-
-       if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
-               tasklet = true;
-
-       if (iir & GT_RENDER_USER_INTERRUPT) {
-               intel_engine_signal_breadcrumbs(engine);
-               tasklet |= intel_engine_needs_breadcrumb_tasklet(engine);
-       }
-
-       if (tasklet)
-               tasklet_hi_schedule(&engine->execlists.tasklet);
-}
-
 static u32
 gen11_gt_engine_identity(struct intel_gt *gt,
                         const unsigned int bank, const unsigned int bit)
@@ -122,7 +80,7 @@ gen11_engine_irq_handler(struct intel_gt *gt, const u8 class,
                engine = NULL;
 
        if (likely(engine))
-               return cs_irq_handler(engine, iir);
+               return intel_engine_cs_irq(engine, iir);
 
        WARN_ONCE(1, "unhandled engine interrupt class=0x%x, instance=0x%x\n",
                  class, instance);
@@ -275,9 +233,12 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
 void gen5_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
 {
        if (gt_iir & GT_RENDER_USER_INTERRUPT)
-               intel_engine_signal_breadcrumbs(gt->engine_class[RENDER_CLASS][0]);
+               intel_engine_cs_irq(gt->engine_class[RENDER_CLASS][0],
+                                   gt_iir);
+
        if (gt_iir & ILK_BSD_USER_INTERRUPT)
-               intel_engine_signal_breadcrumbs(gt->engine_class[VIDEO_DECODE_CLASS][0]);
+               intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0],
+                                   gt_iir);
 }
 
 static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir)
@@ -301,11 +262,16 @@ static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir)
 void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
 {
        if (gt_iir & GT_RENDER_USER_INTERRUPT)
-               intel_engine_signal_breadcrumbs(gt->engine_class[RENDER_CLASS][0]);
+               intel_engine_cs_irq(gt->engine_class[RENDER_CLASS][0],
+                                   gt_iir);
+
        if (gt_iir & GT_BSD_USER_INTERRUPT)
-               intel_engine_signal_breadcrumbs(gt->engine_class[VIDEO_DECODE_CLASS][0]);
+               intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0],
+                                   gt_iir >> 12);
+
        if (gt_iir & GT_BLT_USER_INTERRUPT)
-               intel_engine_signal_breadcrumbs(gt->engine_class[COPY_ENGINE_CLASS][0]);
+               intel_engine_cs_irq(gt->engine_class[COPY_ENGINE_CLASS][0],
+                                   gt_iir >> 22);
 
        if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT |
                      GT_BSD_CS_ERROR_INTERRUPT |
@@ -324,10 +290,10 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl)
        if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) {
                iir = raw_reg_read(regs, GEN8_GT_IIR(0));
                if (likely(iir)) {
-                       cs_irq_handler(gt->engine_class[RENDER_CLASS][0],
-                                      iir >> GEN8_RCS_IRQ_SHIFT);
-                       cs_irq_handler(gt->engine_class[COPY_ENGINE_CLASS][0],
-                                      iir >> GEN8_BCS_IRQ_SHIFT);
+                       intel_engine_cs_irq(gt->engine_class[RENDER_CLASS][0],
+                                           iir >> GEN8_RCS_IRQ_SHIFT);
+                       intel_engine_cs_irq(gt->engine_class[COPY_ENGINE_CLASS][0],
+                                           iir >> GEN8_BCS_IRQ_SHIFT);
                        raw_reg_write(regs, GEN8_GT_IIR(0), iir);
                }
        }
@@ -335,10 +301,10 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl)
        if (master_ctl & (GEN8_GT_VCS0_IRQ | GEN8_GT_VCS1_IRQ)) {
                iir = raw_reg_read(regs, GEN8_GT_IIR(1));
                if (likely(iir)) {
-                       cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][0],
-                                      iir >> GEN8_VCS0_IRQ_SHIFT);
-                       cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][1],
-                                      iir >> GEN8_VCS1_IRQ_SHIFT);
+                       intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0],
+                                           iir >> GEN8_VCS0_IRQ_SHIFT);
+                       intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][1],
+                                           iir >> GEN8_VCS1_IRQ_SHIFT);
                        raw_reg_write(regs, GEN8_GT_IIR(1), iir);
                }
        }
@@ -346,8 +312,8 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl)
        if (master_ctl & GEN8_GT_VECS_IRQ) {
                iir = raw_reg_read(regs, GEN8_GT_IIR(3));
                if (likely(iir)) {
-                       cs_irq_handler(gt->engine_class[VIDEO_ENHANCEMENT_CLASS][0],
-                                      iir >> GEN8_VECS_IRQ_SHIFT);
+                       intel_engine_cs_irq(gt->engine_class[VIDEO_ENHANCEMENT_CLASS][0],
+                                           iir >> GEN8_VECS_IRQ_SHIFT);
                        raw_reg_write(regs, GEN8_GT_IIR(3), iir);
                }
        }
index f667e97..41cad38 100644 (file)
@@ -8,6 +8,8 @@
 
 #include <linux/types.h>
 
+#include "intel_engine_types.h"
+
 struct intel_gt;
 
 #define GEN8_GT_IRQS (GEN8_GT_RCS_IRQ | \
@@ -39,4 +41,25 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl);
 void gen8_gt_irq_reset(struct intel_gt *gt);
 void gen8_gt_irq_postinstall(struct intel_gt *gt);
 
+static inline void intel_engine_cs_irq(struct intel_engine_cs *engine, u16 iir)
+{
+       if (iir)
+               engine->irq_handler(engine, iir);
+}
+
+static inline void
+intel_engine_set_irq_handler(struct intel_engine_cs *engine,
+                            void (*fn)(struct intel_engine_cs *engine,
+                                       u16 iir))
+{
+       /*
+        * As the interrupt is live as allocate and setup the engines,
+        * err on the side of caution and apply barriers to updating
+        * the irq handler callback. This assures that when we do use
+        * the engine, we will receive interrupts only to ourselves,
+        * and not lose any.
+        */
+       smp_store_mb(engine->irq_handler, fn);
+}
+
 #endif /* INTEL_GT_IRQ_H */
index 0caf6ca..fecfacf 100644 (file)
@@ -31,6 +31,12 @@ struct i915_ggtt;
 struct intel_engine_cs;
 struct intel_uncore;
 
+enum intel_submission_method {
+       INTEL_SUBMISSION_RING,
+       INTEL_SUBMISSION_ELSP,
+       INTEL_SUBMISSION_GUC,
+};
+
 struct intel_gt {
        struct drm_i915_private *i915;
        struct intel_uncore *uncore;
@@ -118,6 +124,7 @@ struct intel_gt {
        struct intel_engine_cs *engine[I915_NUM_ENGINES];
        struct intel_engine_cs *engine_class[MAX_ENGINE_CLASS + 1]
                                            [MAX_ENGINE_INSTANCE + 1];
+       enum intel_submission_method submission_method;
 
        /*
         * Default address space (either GGTT or ppGTT depending on arch).
index 941f8af..9b98f9d 100644 (file)
@@ -7,10 +7,26 @@
 
 #include <linux/fault-inject.h>
 
+#include "gem/i915_gem_lmem.h"
 #include "i915_trace.h"
 #include "intel_gt.h"
 #include "intel_gtt.h"
 
+struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz)
+{
+       struct drm_i915_gem_object *obj;
+
+       obj = i915_gem_object_create_lmem(vm->i915, sz, 0);
+       /*
+        * Ensure all paging structures for this vm share the same dma-resv
+        * object underneath, with the idea that one object_lock() will lock
+        * them all at once.
+        */
+       if (!IS_ERR(obj))
+               obj->base.resv = &vm->resv;
+       return obj;
+}
+
 struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz)
 {
        struct drm_i915_gem_object *obj;
@@ -19,33 +35,39 @@ struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz)
                i915_gem_shrink_all(vm->i915);
 
        obj = i915_gem_object_create_internal(vm->i915, sz);
-       /* ensure all dma objects have the same reservation class */
+       /*
+        * Ensure all paging structures for this vm share the same dma-resv
+        * object underneath, with the idea that one object_lock() will lock
+        * them all at once.
+        */
        if (!IS_ERR(obj))
                obj->base.resv = &vm->resv;
        return obj;
 }
 
-int pin_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj)
+int map_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj)
 {
-       int err;
+       enum i915_map_type type;
+       void *vaddr;
 
-       i915_gem_object_lock(obj, NULL);
-       err = i915_gem_object_pin_pages(obj);
-       i915_gem_object_unlock(obj);
-       if (err)
-               return err;
+       type = i915_coherent_map_type(vm->i915, obj, true);
+       vaddr = i915_gem_object_pin_map_unlocked(obj, type);
+       if (IS_ERR(vaddr))
+               return PTR_ERR(vaddr);
 
        i915_gem_object_make_unshrinkable(obj);
        return 0;
 }
 
-int pin_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object *obj)
+int map_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object *obj)
 {
-       int err;
+       enum i915_map_type type;
+       void *vaddr;
 
-       err = i915_gem_object_pin_pages(obj);
-       if (err)
-               return err;
+       type = i915_coherent_map_type(vm->i915, obj, true);
+       vaddr = i915_gem_object_pin_map(obj, type);
+       if (IS_ERR(vaddr))
+               return PTR_ERR(vaddr);
 
        i915_gem_object_make_unshrinkable(obj);
        return 0;
@@ -132,7 +154,22 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass)
         */
        mutex_init(&vm->mutex);
        lockdep_set_subclass(&vm->mutex, subclass);
-       i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex);
+
+       if (!intel_vm_no_concurrent_access_wa(vm->i915)) {
+               i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex);
+       } else {
+               /*
+                * CHV + BXT VTD workaround use stop_machine(),
+                * which is allowed to allocate memory. This means &vm->mutex
+                * is the outer lock, and in theory we can allocate memory inside
+                * it through stop_machine().
+                *
+                * Add the annotation for this, we use trylock in shrinker.
+                */
+               mutex_acquire(&vm->mutex.dep_map, 0, 0, _THIS_IP_);
+               might_alloc(GFP_KERNEL);
+               mutex_release(&vm->mutex.dep_map, _THIS_IP_);
+       }
        dma_resv_init(&vm->resv);
 
        GEM_BUG_ON(!vm->total);
@@ -155,6 +192,14 @@ void clear_pages(struct i915_vma *vma)
        memset(&vma->page_sizes, 0, sizeof(vma->page_sizes));
 }
 
+void *__px_vaddr(struct drm_i915_gem_object *p)
+{
+       enum i915_map_type type;
+
+       GEM_BUG_ON(!i915_gem_object_has_pages(p));
+       return page_unpack_bits(p->mm.mapping, &type);
+}
+
 dma_addr_t __px_dma(struct drm_i915_gem_object *p)
 {
        GEM_BUG_ON(!i915_gem_object_has_pages(p));
@@ -170,32 +215,22 @@ struct page *__px_page(struct drm_i915_gem_object *p)
 void
 fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count)
 {
-       struct page *page = __px_page(p);
-       void *vaddr;
+       void *vaddr = __px_vaddr(p);
 
-       vaddr = kmap(page);
        memset64(vaddr, val, count);
        clflush_cache_range(vaddr, PAGE_SIZE);
-       kunmap(page);
 }
 
 static void poison_scratch_page(struct drm_i915_gem_object *scratch)
 {
-       struct sgt_iter sgt;
-       struct page *page;
+       void *vaddr = __px_vaddr(scratch);
        u8 val;
 
        val = 0;
        if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
                val = POISON_FREE;
 
-       for_each_sgt_page(page, sgt, scratch->mm.pages) {
-               void *vaddr;
-
-               vaddr = kmap(page);
-               memset(vaddr, val, PAGE_SIZE);
-               kunmap(page);
-       }
+       memset(vaddr, val, scratch->base.size);
 }
 
 int setup_scratch_page(struct i915_address_space *vm)
@@ -225,7 +260,7 @@ int setup_scratch_page(struct i915_address_space *vm)
                if (IS_ERR(obj))
                        goto skip;
 
-               if (pin_pt_dma(vm, obj))
+               if (map_pt_dma(vm, obj))
                        goto skip_obj;
 
                /* We need a single contiguous page for our scratch */
index 79f565a..ca00b45 100644 (file)
@@ -180,6 +180,9 @@ struct page *__px_page(struct drm_i915_gem_object *p);
 dma_addr_t __px_dma(struct drm_i915_gem_object *p);
 #define px_dma(px) (__px_dma(px_base(px)))
 
+void *__px_vaddr(struct drm_i915_gem_object *p);
+#define px_vaddr(px) (__px_vaddr(px_base(px)))
+
 #define px_pt(px) \
        __px_choose_expr(px, struct i915_page_table *, __x, \
        __px_choose_expr(px, struct i915_page_directory *, &__x->pt, \
@@ -516,8 +519,6 @@ struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt);
 void i915_ggtt_suspend(struct i915_ggtt *gtt);
 void i915_ggtt_resume(struct i915_ggtt *ggtt);
 
-#define kmap_atomic_px(px) kmap_atomic(__px_page(px_base(px)))
-
 void
 fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count);
 
@@ -531,12 +532,13 @@ int setup_scratch_page(struct i915_address_space *vm);
 void free_scratch(struct i915_address_space *vm);
 
 struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz);
+struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz);
 struct i915_page_table *alloc_pt(struct i915_address_space *vm);
 struct i915_page_directory *alloc_pd(struct i915_address_space *vm);
 struct i915_page_directory *__alloc_pd(int npde);
 
-int pin_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj);
-int pin_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object *obj);
+int map_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj);
+int map_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object *obj);
 
 void free_px(struct i915_address_space *vm,
             struct i915_page_table *pt, int lvl);
@@ -583,7 +585,7 @@ void setup_private_pat(struct intel_uncore *uncore);
 int i915_vm_alloc_pt_stash(struct i915_address_space *vm,
                           struct i915_vm_pt_stash *stash,
                           u64 size);
-int i915_vm_pin_pt_stash(struct i915_address_space *vm,
+int i915_vm_map_pt_stash(struct i915_address_space *vm,
                         struct i915_vm_pt_stash *stash);
 void i915_vm_free_pt_stash(struct i915_address_space *vm,
                           struct i915_vm_pt_stash *stash);
index e86897c..aafe2a4 100644 (file)
@@ -903,7 +903,9 @@ lrc_pre_pin(struct intel_context *ce,
        GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
 
        *vaddr = i915_gem_object_pin_map(ce->state->obj,
-                                        i915_coherent_map_type(ce->engine->i915) |
+                                        i915_coherent_map_type(ce->engine->i915,
+                                                               ce->state->obj,
+                                                               false) |
                                         I915_MAP_OVERRIDE);
 
        return PTR_ERR_OR_ZERO(*vaddr);
index 014ae8a..4e3d80c 100644 (file)
@@ -87,11 +87,10 @@ write_dma_entry(struct drm_i915_gem_object * const pdma,
                const unsigned short idx,
                const u64 encoded_entry)
 {
-       u64 * const vaddr = kmap_atomic(__px_page(pdma));
+       u64 * const vaddr = __px_vaddr(pdma);
 
        vaddr[idx] = encoded_entry;
        clflush_cache_range(&vaddr[idx], sizeof(u64));
-       kunmap_atomic(vaddr);
 }
 
 void
@@ -258,7 +257,7 @@ int i915_vm_alloc_pt_stash(struct i915_address_space *vm,
        return 0;
 }
 
-int i915_vm_pin_pt_stash(struct i915_address_space *vm,
+int i915_vm_map_pt_stash(struct i915_address_space *vm,
                         struct i915_vm_pt_stash *stash)
 {
        struct i915_page_table *pt;
@@ -266,7 +265,7 @@ int i915_vm_pin_pt_stash(struct i915_address_space *vm,
 
        for (n = 0; n < ARRAY_SIZE(stash->pt); n++) {
                for (pt = stash->pt[n]; pt; pt = pt->stash) {
-                       err = pin_pt_dma_locked(vm, pt->base);
+                       err = map_pt_dma_locked(vm, pt->base);
                        if (err)
                                return err;
                }
index a377c45..8091846 100644 (file)
@@ -338,15 +338,69 @@ static int gen6_reset_engines(struct intel_gt *gt,
        return gen6_hw_domain_reset(gt, hw_mask);
 }
 
-static int gen11_lock_sfc(struct intel_engine_cs *engine, u32 *hw_mask)
+static struct intel_engine_cs *find_sfc_paired_vecs_engine(struct intel_engine_cs *engine)
+{
+       int vecs_id;
+
+       GEM_BUG_ON(engine->class != VIDEO_DECODE_CLASS);
+
+       vecs_id = _VECS((engine->instance) / 2);
+
+       return engine->gt->engine[vecs_id];
+}
+
+struct sfc_lock_data {
+       i915_reg_t lock_reg;
+       i915_reg_t ack_reg;
+       i915_reg_t usage_reg;
+       u32 lock_bit;
+       u32 ack_bit;
+       u32 usage_bit;
+       u32 reset_bit;
+};
+
+static void get_sfc_forced_lock_data(struct intel_engine_cs *engine,
+                                    struct sfc_lock_data *sfc_lock)
+{
+       switch (engine->class) {
+       default:
+               MISSING_CASE(engine->class);
+               fallthrough;
+       case VIDEO_DECODE_CLASS:
+               sfc_lock->lock_reg = GEN11_VCS_SFC_FORCED_LOCK(engine);
+               sfc_lock->lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT;
+
+               sfc_lock->ack_reg = GEN11_VCS_SFC_LOCK_STATUS(engine);
+               sfc_lock->ack_bit  = GEN11_VCS_SFC_LOCK_ACK_BIT;
+
+               sfc_lock->usage_reg = GEN11_VCS_SFC_LOCK_STATUS(engine);
+               sfc_lock->usage_bit = GEN11_VCS_SFC_USAGE_BIT;
+               sfc_lock->reset_bit = GEN11_VCS_SFC_RESET_BIT(engine->instance);
+
+               break;
+       case VIDEO_ENHANCEMENT_CLASS:
+               sfc_lock->lock_reg = GEN11_VECS_SFC_FORCED_LOCK(engine);
+               sfc_lock->lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT;
+
+               sfc_lock->ack_reg = GEN11_VECS_SFC_LOCK_ACK(engine);
+               sfc_lock->ack_bit  = GEN11_VECS_SFC_LOCK_ACK_BIT;
+
+               sfc_lock->usage_reg = GEN11_VECS_SFC_USAGE(engine);
+               sfc_lock->usage_bit = GEN11_VECS_SFC_USAGE_BIT;
+               sfc_lock->reset_bit = GEN11_VECS_SFC_RESET_BIT(engine->instance);
+
+               break;
+       }
+}
+
+static int gen11_lock_sfc(struct intel_engine_cs *engine,
+                         u32 *reset_mask,
+                         u32 *unlock_mask)
 {
        struct intel_uncore *uncore = engine->uncore;
        u8 vdbox_sfc_access = engine->gt->info.vdbox_sfc_access;
-       i915_reg_t sfc_forced_lock, sfc_forced_lock_ack;
-       u32 sfc_forced_lock_bit, sfc_forced_lock_ack_bit;
-       i915_reg_t sfc_usage;
-       u32 sfc_usage_bit;
-       u32 sfc_reset_bit;
+       struct sfc_lock_data sfc_lock;
+       bool lock_obtained, lock_to_other = false;
        int ret;
 
        switch (engine->class) {
@@ -354,53 +408,72 @@ static int gen11_lock_sfc(struct intel_engine_cs *engine, u32 *hw_mask)
                if ((BIT(engine->instance) & vdbox_sfc_access) == 0)
                        return 0;
 
-               sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine);
-               sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT;
-
-               sfc_forced_lock_ack = GEN11_VCS_SFC_LOCK_STATUS(engine);
-               sfc_forced_lock_ack_bit  = GEN11_VCS_SFC_LOCK_ACK_BIT;
+               fallthrough;
+       case VIDEO_ENHANCEMENT_CLASS:
+               get_sfc_forced_lock_data(engine, &sfc_lock);
 
-               sfc_usage = GEN11_VCS_SFC_LOCK_STATUS(engine);
-               sfc_usage_bit = GEN11_VCS_SFC_USAGE_BIT;
-               sfc_reset_bit = GEN11_VCS_SFC_RESET_BIT(engine->instance);
                break;
+       default:
+               return 0;
+       }
 
-       case VIDEO_ENHANCEMENT_CLASS:
-               sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine);
-               sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT;
+       if (!(intel_uncore_read_fw(uncore, sfc_lock.usage_reg) & sfc_lock.usage_bit)) {
+               struct intel_engine_cs *paired_vecs;
 
-               sfc_forced_lock_ack = GEN11_VECS_SFC_LOCK_ACK(engine);
-               sfc_forced_lock_ack_bit  = GEN11_VECS_SFC_LOCK_ACK_BIT;
+               if (engine->class != VIDEO_DECODE_CLASS ||
+                   !IS_GEN(engine->i915, 12))
+                       return 0;
 
-               sfc_usage = GEN11_VECS_SFC_USAGE(engine);
-               sfc_usage_bit = GEN11_VECS_SFC_USAGE_BIT;
-               sfc_reset_bit = GEN11_VECS_SFC_RESET_BIT(engine->instance);
-               break;
+               /*
+                * Wa_14010733141
+                *
+                * If the VCS-MFX isn't using the SFC, we also need to check
+                * whether VCS-HCP is using it.  If so, we need to issue a *VE*
+                * forced lock on the VE engine that shares the same SFC.
+                */
+               if (!(intel_uncore_read_fw(uncore,
+                                          GEN12_HCP_SFC_LOCK_STATUS(engine)) &
+                     GEN12_HCP_SFC_USAGE_BIT))
+                       return 0;
 
-       default:
-               return 0;
+               paired_vecs = find_sfc_paired_vecs_engine(engine);
+               get_sfc_forced_lock_data(paired_vecs, &sfc_lock);
+               lock_to_other = true;
+               *unlock_mask |= paired_vecs->mask;
+       } else {
+               *unlock_mask |= engine->mask;
        }
 
        /*
-        * If the engine is using a SFC, tell the engine that a software reset
+        * If the engine is using an SFC, tell the engine that a software reset
         * is going to happen. The engine will then try to force lock the SFC.
         * If SFC ends up being locked to the engine we want to reset, we have
         * to reset it as well (we will unlock it once the reset sequence is
         * completed).
         */
-       if (!(intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit))
-               return 0;
-
-       rmw_set_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit);
+       rmw_set_fw(uncore, sfc_lock.lock_reg, sfc_lock.lock_bit);
 
        ret = __intel_wait_for_register_fw(uncore,
-                                          sfc_forced_lock_ack,
-                                          sfc_forced_lock_ack_bit,
-                                          sfc_forced_lock_ack_bit,
+                                          sfc_lock.ack_reg,
+                                          sfc_lock.ack_bit,
+                                          sfc_lock.ack_bit,
                                           1000, 0, NULL);
 
-       /* Was the SFC released while we were trying to lock it? */
-       if (!(intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit))
+       /*
+        * Was the SFC released while we were trying to lock it?
+        *
+        * We should reset both the engine and the SFC if:
+        *  - We were locking the SFC to this engine and the lock succeeded
+        *       OR
+        *  - We were locking the SFC to a different engine (Wa_14010733141)
+        *    but the SFC was released before the lock was obtained.
+        *
+        * Otherwise we need only reset the engine by itself and we can
+        * leave the SFC alone.
+        */
+       lock_obtained = (intel_uncore_read_fw(uncore, sfc_lock.usage_reg) &
+                       sfc_lock.usage_bit) != 0;
+       if (lock_obtained == lock_to_other)
                return 0;
 
        if (ret) {
@@ -408,7 +481,7 @@ static int gen11_lock_sfc(struct intel_engine_cs *engine, u32 *hw_mask)
                return ret;
        }
 
-       *hw_mask |= sfc_reset_bit;
+       *reset_mask |= sfc_lock.reset_bit;
        return 0;
 }
 
@@ -416,28 +489,19 @@ static void gen11_unlock_sfc(struct intel_engine_cs *engine)
 {
        struct intel_uncore *uncore = engine->uncore;
        u8 vdbox_sfc_access = engine->gt->info.vdbox_sfc_access;
-       i915_reg_t sfc_forced_lock;
-       u32 sfc_forced_lock_bit;
-
-       switch (engine->class) {
-       case VIDEO_DECODE_CLASS:
-               if ((BIT(engine->instance) & vdbox_sfc_access) == 0)
-                       return;
+       struct sfc_lock_data sfc_lock = {};
 
-               sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine);
-               sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT;
-               break;
-
-       case VIDEO_ENHANCEMENT_CLASS:
-               sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine);
-               sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT;
-               break;
+       if (engine->class != VIDEO_DECODE_CLASS &&
+           engine->class != VIDEO_ENHANCEMENT_CLASS)
+               return;
 
-       default:
+       if (engine->class == VIDEO_DECODE_CLASS &&
+           (BIT(engine->instance) & vdbox_sfc_access) == 0)
                return;
-       }
 
-       rmw_clear_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit);
+       get_sfc_forced_lock_data(engine, &sfc_lock);
+
+       rmw_clear_fw(uncore, sfc_lock.lock_reg, sfc_lock.lock_bit);
 }
 
 static int gen11_reset_engines(struct intel_gt *gt,
@@ -456,23 +520,23 @@ static int gen11_reset_engines(struct intel_gt *gt,
        };
        struct intel_engine_cs *engine;
        intel_engine_mask_t tmp;
-       u32 hw_mask;
+       u32 reset_mask, unlock_mask = 0;
        int ret;
 
        if (engine_mask == ALL_ENGINES) {
-               hw_mask = GEN11_GRDOM_FULL;
+               reset_mask = GEN11_GRDOM_FULL;
        } else {
-               hw_mask = 0;
+               reset_mask = 0;
                for_each_engine_masked(engine, gt, engine_mask, tmp) {
                        GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask));
-                       hw_mask |= hw_engine_mask[engine->id];
-                       ret = gen11_lock_sfc(engine, &hw_mask);
+                       reset_mask |= hw_engine_mask[engine->id];
+                       ret = gen11_lock_sfc(engine, &reset_mask, &unlock_mask);
                        if (ret)
                                goto sfc_unlock;
                }
        }
 
-       ret = gen6_hw_domain_reset(gt, hw_mask);
+       ret = gen6_hw_domain_reset(gt, reset_mask);
 
 sfc_unlock:
        /*
@@ -480,10 +544,14 @@ sfc_unlock:
         * gen11_lock_sfc to make sure that we clean properly if something
         * wrong happened during the lock (e.g. lock acquired after timeout
         * expiration).
+        *
+        * Due to Wa_14010733141, we may have locked an SFC to an engine that
+        * wasn't being reset.  So instead of calling gen11_unlock_sfc()
+        * on engine_mask, we instead call it on the mask of engines that our
+        * gen11_lock_sfc() calls told us actually had locks attempted.
         */
-       if (engine_mask != ALL_ENGINES)
-               for_each_engine_masked(engine, gt, engine_mask, tmp)
-                       gen11_unlock_sfc(engine);
+       for_each_engine_masked(engine, gt, unlock_mask, tmp)
+               gen11_unlock_sfc(engine);
 
        return ret;
 }
@@ -1118,7 +1186,6 @@ static int intel_gt_reset_engine(struct intel_engine_cs *engine)
 int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg)
 {
        struct intel_gt *gt = engine->gt;
-       bool uses_guc = intel_engine_in_guc_submission_mode(engine);
        int ret;
 
        ENGINE_TRACE(engine, "flags=%lx\n", gt->reset.flags);
@@ -1134,10 +1201,10 @@ int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg)
                           "Resetting %s for %s\n", engine->name, msg);
        atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]);
 
-       if (!uses_guc)
-               ret = intel_gt_reset_engine(engine);
-       else
+       if (intel_engine_uses_guc(engine))
                ret = intel_guc_reset_engine(&engine->gt->uc.guc, engine);
+       else
+               ret = intel_gt_reset_engine(engine);
        if (ret) {
                /* If we fail here, we expect to fallback to a global reset */
                ENGINE_TRACE(engine, "Failed to reset, err: %d\n", ret);
index aee0a77..7c4d515 100644 (file)
@@ -51,11 +51,14 @@ int intel_ring_pin(struct intel_ring *ring, struct i915_gem_ww_ctx *ww)
        if (unlikely(ret))
                goto err_unpin;
 
-       if (i915_vma_is_map_and_fenceable(vma))
+       if (i915_vma_is_map_and_fenceable(vma)) {
                addr = (void __force *)i915_vma_pin_iomap(vma);
-       else
-               addr = i915_gem_object_pin_map(vma->obj,
-                                              i915_coherent_map_type(vma->vm->i915));
+       } else {
+               int type = i915_coherent_map_type(vma->vm->i915, vma->obj, false);
+
+               addr = i915_gem_object_pin_map(vma->obj, type);
+       }
+
        if (IS_ERR(addr)) {
                ret = PTR_ERR(addr);
                goto err_ring;
index 9585546..2b6dffc 100644 (file)
@@ -12,6 +12,7 @@
 #include "intel_breadcrumbs.h"
 #include "intel_context.h"
 #include "intel_gt.h"
+#include "intel_gt_irq.h"
 #include "intel_reset.h"
 #include "intel_ring.h"
 #include "shmem_utils.h"
@@ -989,14 +990,10 @@ static void gen6_bsd_submit_request(struct i915_request *request)
 static void i9xx_set_default_submission(struct intel_engine_cs *engine)
 {
        engine->submit_request = i9xx_submit_request;
-
-       engine->park = NULL;
-       engine->unpark = NULL;
 }
 
 static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine)
 {
-       i9xx_set_default_submission(engine);
        engine->submit_request = gen6_bsd_submit_request;
 }
 
@@ -1021,10 +1018,17 @@ static void ring_release(struct intel_engine_cs *engine)
        intel_timeline_put(engine->legacy.timeline);
 }
 
+static void irq_handler(struct intel_engine_cs *engine, u16 iir)
+{
+       intel_engine_signal_breadcrumbs(engine);
+}
+
 static void setup_irq(struct intel_engine_cs *engine)
 {
        struct drm_i915_private *i915 = engine->i915;
 
+       intel_engine_set_irq_handler(engine, irq_handler);
+
        if (INTEL_GEN(i915) >= 6) {
                engine->irq_enable = gen6_irq_enable;
                engine->irq_disable = gen6_irq_disable;
index 405d814..97cab1b 100644 (file)
@@ -1774,7 +1774,7 @@ void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
                return;
 
        if (pm_iir & PM_VEBOX_USER_INTERRUPT)
-               intel_engine_signal_breadcrumbs(gt->engine[VECS0]);
+               intel_engine_cs_irq(gt->engine[VECS0], pm_iir >> 10);
 
        if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
                DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
index f19cf6d..c4a126c 100644 (file)
@@ -32,7 +32,6 @@ static struct i915_vma *hwsp_alloc(struct intel_gt *gt)
        return vma;
 }
 
-__i915_active_call
 static void __timeline_retire(struct i915_active *active)
 {
        struct intel_timeline *tl =
@@ -104,7 +103,8 @@ static int intel_timeline_init(struct intel_timeline *timeline,
        INIT_LIST_HEAD(&timeline->requests);
 
        i915_syncmap_init(&timeline->sync);
-       i915_active_init(&timeline->active, __timeline_active, __timeline_retire);
+       i915_active_init(&timeline->active, __timeline_active,
+                        __timeline_retire, 0);
 
        return 0;
 }
index 2c6f721..62cb9ee 100644 (file)
@@ -607,9 +607,38 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
        wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
 }
 
+/*
+ * These settings aren't actually workarounds, but general tuning settings that
+ * need to be programmed on several platforms.
+ */
+static void gen12_ctx_gt_tuning_init(struct intel_engine_cs *engine,
+                                    struct i915_wa_list *wal)
+{
+       /*
+        * Although some platforms refer to it as Wa_1604555607, we need to
+        * program it even on those that don't explicitly list that
+        * workaround.
+        *
+        * Note that the programming of this register is further modified
+        * according to the FF_MODE2 guidance given by Wa_1608008084:gen12.
+        * Wa_1608008084 tells us the FF_MODE2 register will return the wrong
+        * value when read. The default value for this register is zero for all
+        * fields and there are no bit masks. So instead of doing a RMW we
+        * should just write TDS timer value. For the same reason read
+        * verification is ignored.
+        */
+       wa_add(wal,
+              FF_MODE2,
+              FF_MODE2_TDS_TIMER_MASK,
+              FF_MODE2_TDS_TIMER_128,
+              0);
+}
+
 static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
                                       struct i915_wa_list *wal)
 {
+       gen12_ctx_gt_tuning_init(engine, wal);
+
        /*
         * Wa_1409142259:tgl
         * Wa_1409347922:tgl
@@ -628,27 +657,17 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
        wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
                            GEN9_PREEMPT_GPGPU_LEVEL_MASK,
                            GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
-}
-
-static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine,
-                                    struct i915_wa_list *wal)
-{
-       gen12_ctx_workarounds_init(engine, wal);
 
        /*
-        * Wa_1604555607:tgl,rkl
+        * Wa_16011163337
         *
-        * Note that the implementation of this workaround is further modified
-        * according to the FF_MODE2 guidance given by Wa_1608008084:gen12.
-        * FF_MODE2 register will return the wrong value when read. The default
-        * value for this register is zero for all fields and there are no bit
-        * masks. So instead of doing a RMW we should just write the GS Timer
-        * and TDS timer values for Wa_1604555607 and Wa_16011163337.
+        * Like in gen12_ctx_gt_tuning_init(), read verification is ignored due
+        * to Wa_1608008084.
         */
        wa_add(wal,
               FF_MODE2,
-              FF_MODE2_GS_TIMER_MASK | FF_MODE2_TDS_TIMER_MASK,
-              FF_MODE2_GS_TIMER_224  | FF_MODE2_TDS_TIMER_128,
+              FF_MODE2_GS_TIMER_MASK,
+              FF_MODE2_GS_TIMER_224,
               0);
 }
 
@@ -664,16 +683,6 @@ static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine,
        /* Wa_22010493298 */
        wa_masked_en(wal, HIZ_CHICKEN,
                     DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE);
-
-       /*
-        * Wa_16011163337
-        *
-        * Like in tgl_ctx_workarounds_init(), read verification is ignored due
-        * to Wa_1608008084.
-        */
-       wa_add(wal,
-              FF_MODE2,
-              FF_MODE2_GS_TIMER_MASK, FF_MODE2_GS_TIMER_224, 0);
 }
 
 static void
@@ -690,9 +699,6 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
 
        if (IS_DG1(i915))
                dg1_ctx_workarounds_init(engine, wal);
-       else if (IS_ALDERLAKE_S(i915) || IS_ROCKETLAKE(i915) ||
-                IS_TIGERLAKE(i915))
-               tgl_ctx_workarounds_init(engine, wal);
        else if (IS_GEN(i915, 12))
                gen12_ctx_workarounds_init(engine, wal);
        else if (IS_GEN(i915, 11))
@@ -1078,11 +1084,37 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
                            L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
 }
 
+/*
+ * Though there are per-engine instances of these registers,
+ * they retain their value through engine resets and should
+ * only be provided on the GT workaround list rather than
+ * the engine-specific workaround list.
+ */
+static void
+wa_14011060649(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+       struct intel_engine_cs *engine;
+       struct intel_gt *gt = &i915->gt;
+       int id;
+
+       for_each_engine(engine, gt, id) {
+               if (engine->class != VIDEO_DECODE_CLASS ||
+                   (engine->instance % 2))
+                       continue;
+
+               wa_write_or(wal, VDBOX_CGCTL3F10(engine->mmio_base),
+                           IECPUNIT_CLKGATE_DIS);
+       }
+}
+
 static void
 gen12_gt_workarounds_init(struct drm_i915_private *i915,
                          struct i915_wa_list *wal)
 {
        wa_init_mcr(i915, wal);
+
+       /* Wa_14011060649:tgl,rkl,dg1,adls */
+       wa_14011060649(i915, wal);
 }
 
 static void
@@ -1755,11 +1787,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
                            GEN7_FF_THREAD_MODE,
                            GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
 
-               /* Wa_22010271021:ehl */
-               if (IS_JSL_EHL(i915))
-                       wa_masked_en(wal,
-                                    GEN9_CS_DEBUG_MODE1,
-                                    FF_DOP_CLOCK_GATE_DISABLE);
+               /* Wa_22010271021 */
+               wa_masked_en(wal,
+                            GEN9_CS_DEBUG_MODE1,
+                            FF_DOP_CLOCK_GATE_DISABLE);
        }
 
        if (IS_GEN_RANGE(i915, 9, 12)) {
@@ -1828,9 +1859,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
                              CACHE_MODE_0_GEN7,
                              /* enable HiZ Raw Stall Optimization */
                              HIZ_RAW_STALL_OPT_DISABLE);
-
-               /* WaDisable4x2SubspanOptimization:hsw */
-               wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
        }
 
        if (IS_VALLEYVIEW(i915)) {
index e1ba03b..32589c6 100644 (file)
@@ -55,7 +55,7 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
                kfree(ring);
                return NULL;
        }
-       i915_active_init(&ring->vma->active, NULL, NULL);
+       i915_active_init(&ring->vma->active, NULL, NULL, 0);
        __set_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(ring->vma));
        __set_bit(DRM_MM_NODE_ALLOCATED_BIT, &ring->vma->node.flags);
        ring->vma->node.size = sz;
index b9bdd1d..26685b9 100644 (file)
@@ -88,7 +88,8 @@ static int __live_context_size(struct intel_engine_cs *engine)
                goto err;
 
        vaddr = i915_gem_object_pin_map_unlocked(ce->state->obj,
-                                                i915_coherent_map_type(engine->i915));
+                                                i915_coherent_map_type(engine->i915,
+                                                                       ce->state->obj, false));
        if (IS_ERR(vaddr)) {
                err = PTR_ERR(vaddr);
                intel_context_unpin(ce);
index b2c3693..4896e4c 100644 (file)
@@ -77,7 +77,7 @@ static struct pulse *pulse_create(void)
                return p;
 
        kref_init(&p->kref);
-       i915_active_init(&p->active, pulse_active, pulse_retire);
+       i915_active_init(&p->active, pulse_active, pulse_retire, 0);
 
        return p;
 }
index 1081cd3..1f93591 100644 (file)
@@ -4716,7 +4716,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
                SUBTEST(live_virtual_reset),
        };
 
-       if (!HAS_EXECLISTS(i915))
+       if (i915->gt.submission_method != INTEL_SUBMISSION_ELSP)
                return 0;
 
        if (intel_gt_is_wedged(&i915->gt))
index 7469859..5b63d4d 100644 (file)
@@ -69,7 +69,7 @@ static int hang_init(struct hang *h, struct intel_gt *gt)
        h->seqno = memset(vaddr, 0xff, PAGE_SIZE);
 
        vaddr = i915_gem_object_pin_map_unlocked(h->obj,
-                                                i915_coherent_map_type(gt->i915));
+                                                i915_coherent_map_type(gt->i915, h->obj, false));
        if (IS_ERR(vaddr)) {
                err = PTR_ERR(vaddr);
                goto err_unpin_hws;
@@ -130,7 +130,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
                return ERR_CAST(obj);
        }
 
-       vaddr = i915_gem_object_pin_map_unlocked(obj, i915_coherent_map_type(gt->i915));
+       vaddr = i915_gem_object_pin_map_unlocked(obj, i915_coherent_map_type(gt->i915, obj, false));
        if (IS_ERR(vaddr)) {
                i915_gem_object_put(obj);
                i915_vm_put(vm);
index 85e7df6..d8f6623 100644 (file)
@@ -1221,7 +1221,9 @@ static int compare_isolation(struct intel_engine_cs *engine,
        }
 
        lrc = i915_gem_object_pin_map_unlocked(ce->state->obj,
-                                     i915_coherent_map_type(engine->i915));
+                                              i915_coherent_map_type(engine->i915,
+                                                                     ce->state->obj,
+                                                                     false));
        if (IS_ERR(lrc)) {
                err = PTR_ERR(lrc);
                goto err_B1;
index f097e42..710f825 100644 (file)
@@ -34,6 +34,7 @@ int live_rc6_manual(void *arg)
        struct intel_rc6 *rc6 = &gt->rc6;
        u64 rc0_power, rc6_power;
        intel_wakeref_t wakeref;
+       bool has_power;
        ktime_t dt;
        u64 res[2];
        int err = 0;
@@ -50,6 +51,7 @@ int live_rc6_manual(void *arg)
        if (IS_VALLEYVIEW(gt->i915) || IS_CHERRYVIEW(gt->i915))
                return 0;
 
+       has_power = librapl_supported(gt->i915);
        wakeref = intel_runtime_pm_get(gt->uncore->rpm);
 
        /* Force RC6 off for starters */
@@ -71,11 +73,14 @@ int live_rc6_manual(void *arg)
                goto out_unlock;
        }
 
-       rc0_power = div64_u64(NSEC_PER_SEC * rc0_power, ktime_to_ns(dt));
-       if (!rc0_power) {
-               pr_err("No power measured while in RC0\n");
-               err = -EINVAL;
-               goto out_unlock;
+       if (has_power) {
+               rc0_power = div64_u64(NSEC_PER_SEC * rc0_power,
+                                     ktime_to_ns(dt));
+               if (!rc0_power) {
+                       pr_err("No power measured while in RC0\n");
+                       err = -EINVAL;
+                       goto out_unlock;
+               }
        }
 
        /* Manually enter RC6 */
@@ -97,13 +102,16 @@ int live_rc6_manual(void *arg)
                err = -EINVAL;
        }
 
-       rc6_power = div64_u64(NSEC_PER_SEC * rc6_power, ktime_to_ns(dt));
-       pr_info("GPU consumed %llduW in RC0 and %llduW in RC6\n",
-               rc0_power, rc6_power);
-       if (2 * rc6_power > rc0_power) {
-               pr_err("GPU leaked energy while in RC6!\n");
-               err = -EINVAL;
-               goto out_unlock;
+       if (has_power) {
+               rc6_power = div64_u64(NSEC_PER_SEC * rc6_power,
+                                     ktime_to_ns(dt));
+               pr_info("GPU consumed %llduW in RC0 and %llduW in RC6\n",
+                       rc0_power, rc6_power);
+               if (2 * rc6_power > rc0_power) {
+                       pr_err("GPU leaked energy while in RC6!\n");
+                       err = -EINVAL;
+                       goto out_unlock;
+               }
        }
 
        /* Restore what should have been the original state! */
index 9960927..c12e741 100644 (file)
@@ -291,7 +291,7 @@ int intel_ring_submission_live_selftests(struct drm_i915_private *i915)
                SUBTEST(live_ctx_switch_wa),
        };
 
-       if (HAS_EXECLISTS(i915))
+       if (i915->gt.submission_method > INTEL_SUBMISSION_RING)
                return 0;
 
        return intel_gt_live_subtests(tests, &i915->gt);
index 967641f..3ca1bd5 100644 (file)
@@ -606,7 +606,7 @@ int live_rps_frequency_cs(void *arg)
        int err = 0;
 
        /*
-        * The premise is that the GPU does change freqency at our behest.
+        * The premise is that the GPU does change frequency at our behest.
         * Let's check there is a correspondence between the requested
         * frequency, the actual frequency, and the observed clock rate.
         */
@@ -747,7 +747,7 @@ int live_rps_frequency_srm(void *arg)
        int err = 0;
 
        /*
-        * The premise is that the GPU does change freqency at our behest.
+        * The premise is that the GPU does change frequency at our behest.
         * Let's check there is a correspondence between the requested
         * frequency, the actual frequency, and the observed clock rate.
         */
@@ -1139,7 +1139,7 @@ int live_rps_power(void *arg)
        if (!intel_rps_is_enabled(rps) || INTEL_GEN(gt->i915) < 6)
                return 0;
 
-       if (!librapl_energy_uJ())
+       if (!librapl_supported(gt->i915))
                return 0;
 
        if (igt_spinner_init(&spin, gt))
index f8f02aa..0683b27 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/shmem_fs.h>
 
 #include "gem/i915_gem_object.h"
+#include "gem/i915_gem_lmem.h"
 #include "shmem_utils.h"
 
 struct file *shmem_create_from_data(const char *name, void *data, size_t len)
@@ -39,7 +40,8 @@ struct file *shmem_create_from_object(struct drm_i915_gem_object *obj)
                return file;
        }
 
-       ptr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
+       ptr = i915_gem_object_pin_map_unlocked(obj, i915_gem_object_is_lmem(obj) ?
+                                               I915_MAP_WC : I915_MAP_WB);
        if (IS_ERR(ptr))
                return ERR_CAST(ptr);
 
index 78305b2..adae04c 100644 (file)
@@ -682,7 +682,9 @@ int intel_guc_allocate_and_map_vma(struct intel_guc *guc, u32 size,
        if (IS_ERR(vma))
                return PTR_ERR(vma);
 
-       vaddr = i915_gem_object_pin_map_unlocked(vma->obj, I915_MAP_WB);
+       vaddr = i915_gem_object_pin_map_unlocked(vma->obj,
+                                                i915_coherent_map_type(guc_to_gt(guc)->i915,
+                                                                       vma->obj, true));
        if (IS_ERR(vaddr)) {
                i915_vma_unpin_and_release(&vma, 0);
                return PTR_ERR(vaddr);
index 92688a9..335719f 100644 (file)
@@ -11,6 +11,7 @@
 #include "gt/intel_context.h"
 #include "gt/intel_engine_pm.h"
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_irq.h"
 #include "gt/intel_gt_pm.h"
 #include "gt/intel_lrc.h"
 #include "gt/intel_mocs.h"
@@ -264,6 +265,14 @@ static void guc_submission_tasklet(struct tasklet_struct *t)
        spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
+static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir)
+{
+       if (iir & GT_RENDER_USER_INTERRUPT) {
+               intel_engine_signal_breadcrumbs(engine);
+               tasklet_hi_schedule(&engine->execlists.tasklet);
+       }
+}
+
 static void guc_reset_prepare(struct intel_engine_cs *engine)
 {
        struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -608,35 +617,6 @@ static int guc_resume(struct intel_engine_cs *engine)
 static void guc_set_default_submission(struct intel_engine_cs *engine)
 {
        engine->submit_request = guc_submit_request;
-       engine->schedule = i915_schedule;
-       engine->execlists.tasklet.callback = guc_submission_tasklet;
-
-       engine->reset.prepare = guc_reset_prepare;
-       engine->reset.rewind = guc_reset_rewind;
-       engine->reset.cancel = guc_reset_cancel;
-       engine->reset.finish = guc_reset_finish;
-
-       engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
-       engine->flags |= I915_ENGINE_HAS_PREEMPTION;
-
-       /*
-        * TODO: GuC supports timeslicing and semaphores as well, but they're
-        * handled by the firmware so some minor tweaks are required before
-        * enabling.
-        *
-        * engine->flags |= I915_ENGINE_HAS_TIMESLICES;
-        * engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
-        */
-
-       engine->emit_bb_start = gen8_emit_bb_start;
-
-       /*
-        * For the breadcrumb irq to work we need the interrupts to stay
-        * enabled. However, on all platforms on which we'll have support for
-        * GuC submission we don't allow disabling the interrupts at runtime, so
-        * we're always safe with the current flow.
-        */
-       GEM_BUG_ON(engine->irq_enable || engine->irq_disable);
 }
 
 static void guc_release(struct intel_engine_cs *engine)
@@ -658,6 +638,13 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
        engine->cops = &guc_context_ops;
        engine->request_alloc = guc_request_alloc;
 
+       engine->schedule = i915_schedule;
+
+       engine->reset.prepare = guc_reset_prepare;
+       engine->reset.rewind = guc_reset_rewind;
+       engine->reset.cancel = guc_reset_cancel;
+       engine->reset.finish = guc_reset_finish;
+
        engine->emit_flush = gen8_emit_flush_xcs;
        engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
        engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs;
@@ -666,6 +653,19 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
                engine->emit_flush = gen12_emit_flush_xcs;
        }
        engine->set_default_submission = guc_set_default_submission;
+
+       engine->flags |= I915_ENGINE_HAS_PREEMPTION;
+
+       /*
+        * TODO: GuC supports timeslicing and semaphores as well, but they're
+        * handled by the firmware so some minor tweaks are required before
+        * enabling.
+        *
+        * engine->flags |= I915_ENGINE_HAS_TIMESLICES;
+        * engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
+        */
+
+       engine->emit_bb_start = gen8_emit_bb_start;
 }
 
 static void rcs_submission_override(struct intel_engine_cs *engine)
@@ -689,6 +689,7 @@ static void rcs_submission_override(struct intel_engine_cs *engine)
 static inline void guc_default_irqs(struct intel_engine_cs *engine)
 {
        engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT;
+       intel_engine_set_irq_handler(engine, cs_irq_handler);
 }
 
 int intel_guc_submission_setup(struct intel_engine_cs *engine)
@@ -753,8 +754,3 @@ void intel_guc_submission_init_early(struct intel_guc *guc)
 {
        guc->submission_selected = __guc_submission_selected(guc);
 }
-
-bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine)
-{
-       return engine->set_default_submission == guc_set_default_submission;
-}
index 5f7b9e6..3f70050 100644 (file)
@@ -20,7 +20,6 @@ void intel_guc_submission_fini(struct intel_guc *guc);
 int intel_guc_preempt_work_create(struct intel_guc *guc);
 void intel_guc_preempt_work_destroy(struct intel_guc *guc);
 int intel_guc_submission_setup(struct intel_engine_cs *engine);
-bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine);
 
 static inline bool intel_guc_submission_is_supported(struct intel_guc *guc)
 {
index 2126dd8..56d2144 100644 (file)
@@ -82,7 +82,9 @@ static int intel_huc_rsa_data_create(struct intel_huc *huc)
        if (IS_ERR(vma))
                return PTR_ERR(vma);
 
-       vaddr = i915_gem_object_pin_map_unlocked(vma->obj, I915_MAP_WB);
+       vaddr = i915_gem_object_pin_map_unlocked(vma->obj,
+                                                i915_coherent_map_type(gt->i915,
+                                                                       vma->obj, true));
        if (IS_ERR(vaddr)) {
                i915_vma_unpin_and_release(&vma, 0);
                return PTR_ERR(vaddr);
index aa573b0..b1aa1c4 100644 (file)
@@ -343,18 +343,15 @@ out:
 void __i915_active_init(struct i915_active *ref,
                        int (*active)(struct i915_active *ref),
                        void (*retire)(struct i915_active *ref),
+                       unsigned long flags,
                        struct lock_class_key *mkey,
                        struct lock_class_key *wkey)
 {
-       unsigned long bits;
-
        debug_active_init(ref);
 
-       ref->flags = 0;
+       ref->flags = flags;
        ref->active = active;
-       ref->retire = ptr_unpack_bits(retire, &bits, 2);
-       if (bits & I915_ACTIVE_MAY_SLEEP)
-               ref->flags |= I915_ACTIVE_RETIRE_SLEEPS;
+       ref->retire = retire;
 
        spin_lock_init(&ref->tree_lock);
        ref->tree = RB_ROOT;
@@ -1156,8 +1153,7 @@ static int auto_active(struct i915_active *ref)
        return 0;
 }
 
-__i915_active_call static void
-auto_retire(struct i915_active *ref)
+static void auto_retire(struct i915_active *ref)
 {
        i915_active_put(ref);
 }
@@ -1171,7 +1167,7 @@ struct i915_active *i915_active_create(void)
                return NULL;
 
        kref_init(&aa->ref);
-       i915_active_init(&aa->base, auto_active, auto_retire);
+       i915_active_init(&aa->base, auto_active, auto_retire, 0);
 
        return &aa->base;
 }
index fb165d3..d0feda6 100644 (file)
@@ -152,15 +152,16 @@ i915_active_fence_isset(const struct i915_active_fence *active)
 void __i915_active_init(struct i915_active *ref,
                        int (*active)(struct i915_active *ref),
                        void (*retire)(struct i915_active *ref),
+                       unsigned long flags,
                        struct lock_class_key *mkey,
                        struct lock_class_key *wkey);
 
 /* Specialise each class of i915_active to avoid impossible lockdep cycles. */
-#define i915_active_init(ref, active, retire) do {             \
-       static struct lock_class_key __mkey;                            \
-       static struct lock_class_key __wkey;                            \
-                                                                       \
-       __i915_active_init(ref, active, retire, &__mkey, &__wkey);      \
+#define i915_active_init(ref, active, retire, flags) do {                      \
+       static struct lock_class_key __mkey;                                    \
+       static struct lock_class_key __wkey;                                    \
+                                                                               \
+       __i915_active_init(ref, active, retire, flags, &__mkey, &__wkey);       \
 } while (0)
 
 struct dma_fence *
index 6360c3e..c149f34 100644 (file)
@@ -24,11 +24,6 @@ struct i915_active_fence {
 
 struct active_node;
 
-#define I915_ACTIVE_MAY_SLEEP BIT(0)
-
-#define __i915_active_call __aligned(4)
-#define i915_active_may_sleep(fn) ptr_pack_bits(&(fn), I915_ACTIVE_MAY_SLEEP, 2)
-
 struct i915_active {
        atomic_t count;
        struct mutex mutex;
index e6f1e93..5b4b2bd 100644 (file)
@@ -1369,6 +1369,20 @@ static int check_bbstart(u32 *cmd, u32 offset, u32 length,
        return 0;
 }
 
+/**
+ * intel_engine_cmd_parser_alloc_jump_whitelist() - preallocate jump whitelist for intel_engine_cmd_parser()
+ * @batch_length: length of the commands in batch_obj
+ * @trampoline: Whether jump trampolines are used.
+ *
+ * Preallocates a jump whitelist for parsing the cmd buffer in intel_engine_cmd_parser().
+ * This has to be preallocated, because the command parser runs in signaling context,
+ * and may not allocate any memory.
+ *
+ * Return: NULL or pointer to a jump whitelist, or ERR_PTR() on failure. Use
+ * IS_ERR() to check for errors. Must bre freed() with kfree().
+ *
+ * NULL is a valid value, meaning no allocation was required.
+ */
 unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length,
                                                            bool trampoline)
 {
@@ -1401,7 +1415,9 @@ unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length,
  * @batch_offset: byte offset in the batch at which execution starts
  * @batch_length: length of the commands in batch_obj
  * @shadow: validated copy of the batch buffer in question
- * @trampoline: whether to emit a conditional trampoline at the end of the batch
+ * @jump_whitelist: buffer preallocated with intel_engine_cmd_parser_alloc_jump_whitelist()
+ * @shadow_map: mapping to @shadow vma
+ * @batch_map: mapping to @batch vma
  *
  * Parses the specified batch buffer looking for privilege violations as
  * described in the overview.
index b654b74..8dd3746 100644 (file)
@@ -622,9 +622,9 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
                seq_printf(m, "DDC2 = 0x%08x\n",
                           intel_uncore_read(uncore, DCC2));
                seq_printf(m, "C0DRB3 = 0x%04x\n",
-                          intel_uncore_read16(uncore, C0DRB3));
+                          intel_uncore_read16(uncore, C0DRB3_BW));
                seq_printf(m, "C1DRB3 = 0x%04x\n",
-                          intel_uncore_read16(uncore, C1DRB3));
+                          intel_uncore_read16(uncore, C1DRB3_BW));
        } else if (INTEL_GEN(dev_priv) >= 6) {
                seq_printf(m, "MAD_DIMM_C0 = 0x%08x\n",
                           intel_uncore_read(uncore, MAD_DIMM_C0));
index db513f9..5118dc8 100644 (file)
@@ -1727,6 +1727,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
        DRM_IOCTL_DEF_DRV(I915_GEM_ENTERVT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
        DRM_IOCTL_DEF_DRV(I915_GEM_LEAVEVT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
        DRM_IOCTL_DEF_DRV(I915_GEM_CREATE, i915_gem_create_ioctl, DRM_RENDER_ALLOW),
+       DRM_IOCTL_DEF_DRV(I915_GEM_CREATE_EXT, i915_gem_create_ext_ioctl, DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF_DRV(I915_GEM_PREAD, i915_gem_pread_ioctl, DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF_DRV(I915_GEM_PWRITE, i915_gem_pwrite_ioctl, DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF_DRV(I915_GEM_MMAP, i915_gem_mmap_ioctl, DRM_RENDER_ALLOW),
index 501a75c..9aee6a0 100644 (file)
@@ -77,6 +77,7 @@
 #include "gem/i915_gem_context_types.h"
 #include "gem/i915_gem_shrinker.h"
 #include "gem/i915_gem_stolen.h"
+#include "gem/i915_gem_lmem.h"
 
 #include "gt/intel_engine.h"
 #include "gt/intel_gt_types.h"
@@ -513,6 +514,13 @@ struct intel_l3_parity {
 };
 
 struct i915_gem_mm {
+       /*
+        * Shortcut for the stolen region. This points to either
+        * INTEL_REGION_STOLEN_SMEM for integrated platforms, or
+        * INTEL_REGION_STOLEN_LMEM for discrete, or NULL if the device doesn't
+        * support stolen.
+        */
+       struct intel_memory_region *stolen_region;
        /** Memory allocator for GTT stolen memory */
        struct drm_mm stolen;
        /** Protects the usage of the GTT stolen memory allocator. This is
@@ -1720,9 +1728,15 @@ static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv)
 }
 
 static inline bool
-intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *dev_priv)
+intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *i915)
 {
-       return IS_BROXTON(dev_priv) && intel_vtd_active();
+       return IS_BROXTON(i915) && intel_vtd_active();
+}
+
+static inline bool
+intel_vm_no_concurrent_access_wa(struct drm_i915_private *i915)
+{
+       return IS_CHERRYVIEW(i915) || intel_ggtt_update_needs_vtd_wa(i915);
 }
 
 /* i915_drv.c */
@@ -1802,6 +1816,7 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
 #define I915_GEM_OBJECT_UNBIND_ACTIVE BIT(0)
 #define I915_GEM_OBJECT_UNBIND_BARRIER BIT(1)
 #define I915_GEM_OBJECT_UNBIND_TEST BIT(2)
+#define I915_GEM_OBJECT_UNBIND_VM_TRYLOCK BIT(3)
 
 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv);
 
@@ -1934,9 +1949,15 @@ static inline int intel_hws_csb_write_index(struct drm_i915_private *i915)
 }
 
 static inline enum i915_map_type
-i915_coherent_map_type(struct drm_i915_private *i915)
+i915_coherent_map_type(struct drm_i915_private *i915,
+                      struct drm_i915_gem_object *obj, bool always_coherent)
 {
-       return HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC;
+       if (i915_gem_object_is_lmem(obj))
+               return I915_MAP_WC;
+       if (HAS_LLC(i915) || always_coherent)
+               return I915_MAP_WB;
+       else
+               return I915_MAP_WC;
 }
 
 #endif
index b3cedd2..cffd7f4 100644 (file)
@@ -157,8 +157,18 @@ try_again:
                if (vma) {
                        ret = -EBUSY;
                        if (flags & I915_GEM_OBJECT_UNBIND_ACTIVE ||
-                           !i915_vma_is_active(vma))
-                               ret = i915_vma_unbind(vma);
+                           !i915_vma_is_active(vma)) {
+                               if (flags & I915_GEM_OBJECT_UNBIND_VM_TRYLOCK) {
+                                       if (mutex_trylock(&vma->vm->mutex)) {
+                                               ret = __i915_vma_unbind(vma);
+                                               mutex_unlock(&vma->vm->mutex);
+                                       } else {
+                                               ret = -EBUSY;
+                                       }
+                               } else {
+                                       ret = i915_vma_unbind(vma);
+                               }
+                       }
 
                        __i915_vma_put(vma);
                }
index d4611c6..9ff5118 100644 (file)
@@ -4024,7 +4024,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg)
                intel_uncore_write16(&dev_priv->uncore, GEN2_IIR, iir);
 
                if (iir & I915_USER_INTERRUPT)
-                       intel_engine_signal_breadcrumbs(dev_priv->gt.engine[RCS0]);
+                       intel_engine_cs_irq(dev_priv->gt.engine[RCS0], iir);
 
                if (iir & I915_MASTER_ERROR_INTERRUPT)
                        i8xx_error_irq_handler(dev_priv, eir, eir_stuck);
@@ -4132,7 +4132,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg)
                intel_uncore_write(&dev_priv->uncore, GEN2_IIR, iir);
 
                if (iir & I915_USER_INTERRUPT)
-                       intel_engine_signal_breadcrumbs(dev_priv->gt.engine[RCS0]);
+                       intel_engine_cs_irq(dev_priv->gt.engine[RCS0], iir);
 
                if (iir & I915_MASTER_ERROR_INTERRUPT)
                        i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
@@ -4277,10 +4277,12 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
                intel_uncore_write(&dev_priv->uncore, GEN2_IIR, iir);
 
                if (iir & I915_USER_INTERRUPT)
-                       intel_engine_signal_breadcrumbs(dev_priv->gt.engine[RCS0]);
+                       intel_engine_cs_irq(dev_priv->gt.engine[RCS0],
+                                           iir);
 
                if (iir & I915_BSD_USER_INTERRUPT)
-                       intel_engine_signal_breadcrumbs(dev_priv->gt.engine[VCS0]);
+                       intel_engine_cs_irq(dev_priv->gt.engine[VCS0],
+                                           iir >> 25);
 
                if (iir & I915_MASTER_ERROR_INTERRUPT)
                        i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
index 14cd64c..4a114a5 100644 (file)
@@ -71,18 +71,18 @@ struct drm_printer;
        param(int, fastboot, -1, 0600) \
        param(int, enable_dpcd_backlight, -1, 0600) \
        param(char *, force_probe, CONFIG_DRM_I915_FORCE_PROBE, 0400) \
-       param(unsigned long, fake_lmem_start, 0, 0400) \
-       param(unsigned int, request_timeout_ms, CONFIG_DRM_I915_REQUEST_TIMEOUT, 0600) \
+       param(unsigned long, fake_lmem_start, 0, IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM) ? 0400 : 0) \
+       param(unsigned int, request_timeout_ms, CONFIG_DRM_I915_REQUEST_TIMEOUT, CONFIG_DRM_I915_REQUEST_TIMEOUT ? 0600 : 0) \
        /* leave bools at the end to not create holes */ \
        param(bool, enable_hangcheck, true, 0600) \
        param(bool, load_detect_test, false, 0600) \
        param(bool, force_reset_modeset_test, false, 0600) \
-       param(bool, error_capture, true, 0600) \
+       param(bool, error_capture, true, IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) ? 0600 : 0) \
        param(bool, disable_display, false, 0400) \
        param(bool, verbose_state_checks, true, 0) \
        param(bool, nuclear_pageflip, false, 0400) \
        param(bool, enable_dp_mst, true, 0600) \
-       param(bool, enable_gvt, false, 0400)
+       param(bool, enable_gvt, false, IS_ENABLED(CONFIG_DRM_I915_GVT) ? 0400 : 0)
 
 #define MEMBER(T, member, ...) T member;
 struct i915_params {
index 1680062..574881c 100644 (file)
@@ -908,7 +908,7 @@ static const struct intel_device_info rkl_info = {
 };
 
 #define DGFX_FEATURES \
-       .memory_regions = REGION_SMEM | REGION_LMEM, \
+       .memory_regions = REGION_SMEM | REGION_LMEM | REGION_STOLEN_LMEM, \
        .has_master_unit_irq = 1, \
        .has_llc = 0, \
        .has_snoop = 1, \
index de8ebc3..cb03e41 100644 (file)
@@ -1257,11 +1257,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
        case 8:
        case 9:
        case 10:
-               if (intel_engine_in_execlists_submission_mode(ce->engine)) {
-                       stream->specific_ctx_id_mask =
-                               (1U << GEN8_CTX_ID_WIDTH) - 1;
-                       stream->specific_ctx_id = stream->specific_ctx_id_mask;
-               } else {
+               if (intel_engine_uses_guc(ce->engine)) {
                        /*
                         * When using GuC, the context descriptor we write in
                         * i915 is read by GuC and rewritten before it's
@@ -1280,6 +1276,10 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
                         */
                        stream->specific_ctx_id_mask =
                                (1U << (GEN8_CTX_ID_WIDTH - 1)) - 1;
+               } else {
+                       stream->specific_ctx_id_mask =
+                               (1U << GEN8_CTX_ID_WIDTH) - 1;
+                       stream->specific_ctx_id = stream->specific_ctx_id_mask;
                }
                break;
 
index 41651ac..a75cd1d 100644 (file)
@@ -476,6 +476,8 @@ engine_event_status(struct intel_engine_cs *engine,
 static int
 config_status(struct drm_i915_private *i915, u64 config)
 {
+       struct intel_gt *gt = &i915->gt;
+
        switch (config) {
        case I915_PMU_ACTUAL_FREQUENCY:
                if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
@@ -489,7 +491,7 @@ config_status(struct drm_i915_private *i915, u64 config)
        case I915_PMU_INTERRUPTS:
                break;
        case I915_PMU_RC6_RESIDENCY:
-               if (!HAS_RC6(i915))
+               if (!gt->rc6.supported)
                        return -ENODEV;
                break;
        case I915_PMU_SOFTWARE_GT_AWAKE_TIME:
index fed337a..e49da36 100644 (file)
@@ -419,11 +419,73 @@ static int query_perf_config(struct drm_i915_private *i915,
        }
 }
 
+static int query_memregion_info(struct drm_i915_private *i915,
+                               struct drm_i915_query_item *query_item)
+{
+       struct drm_i915_query_memory_regions __user *query_ptr =
+               u64_to_user_ptr(query_item->data_ptr);
+       struct drm_i915_memory_region_info __user *info_ptr =
+               &query_ptr->regions[0];
+       struct drm_i915_memory_region_info info = { };
+       struct drm_i915_query_memory_regions query;
+       struct intel_memory_region *mr;
+       u32 total_length;
+       int ret, id, i;
+
+       if (!IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM))
+               return -ENODEV;
+
+       if (query_item->flags != 0)
+               return -EINVAL;
+
+       total_length = sizeof(query);
+       for_each_memory_region(mr, i915, id) {
+               if (mr->private)
+                       continue;
+
+               total_length += sizeof(info);
+       }
+
+       ret = copy_query_item(&query, sizeof(query), total_length, query_item);
+       if (ret != 0)
+               return ret;
+
+       if (query.num_regions)
+               return -EINVAL;
+
+       for (i = 0; i < ARRAY_SIZE(query.rsvd); i++) {
+               if (query.rsvd[i])
+                       return -EINVAL;
+       }
+
+       for_each_memory_region(mr, i915, id) {
+               if (mr->private)
+                       continue;
+
+               info.region.memory_class = mr->type;
+               info.region.memory_instance = mr->instance;
+               info.probed_size = mr->total;
+               info.unallocated_size = mr->avail;
+
+               if (__copy_to_user(info_ptr, &info, sizeof(info)))
+                       return -EFAULT;
+
+               query.num_regions++;
+               info_ptr++;
+       }
+
+       if (__copy_to_user(query_ptr, &query, sizeof(query)))
+               return -EFAULT;
+
+       return total_length;
+}
+
 static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
                                        struct drm_i915_query_item *query_item) = {
        query_topology_info,
        query_engine_info,
        query_perf_config,
+       query_memregion_info,
 };
 
 int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
index 07cca3b..c294e3f 100644 (file)
@@ -416,6 +416,12 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define GEN11_VECS_SFC_USAGE(engine)           _MMIO((engine)->mmio_base + 0x2014)
 #define   GEN11_VECS_SFC_USAGE_BIT             (1 << 0)
 
+#define GEN12_HCP_SFC_FORCED_LOCK(engine)      _MMIO((engine)->mmio_base + 0x2910)
+#define   GEN12_HCP_SFC_FORCED_LOCK_BIT                REG_BIT(0)
+#define GEN12_HCP_SFC_LOCK_STATUS(engine)      _MMIO((engine)->mmio_base + 0x2914)
+#define   GEN12_HCP_SFC_LOCK_ACK_BIT           REG_BIT(1)
+#define   GEN12_HCP_SFC_USAGE_BIT                      REG_BIT(0)
+
 #define GEN12_SFC_DONE(n)              _MMIO(0x1cc00 + (n) * 0x100)
 #define GEN12_SFC_DONE_MAX             4
 
@@ -487,6 +493,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define GAB_CTL                                _MMIO(0x24000)
 #define   GAB_CTL_CONT_AFTER_PAGEFAULT (1 << 8)
 
+#define GU_CNTL                                _MMIO(0x101010)
+#define   LMEM_INIT                    REG_BIT(7)
+
 #define GEN6_STOLEN_RESERVED           _MMIO(0x1082C0)
 #define GEN6_STOLEN_RESERVED_ADDR_MASK (0xFFF << 20)
 #define GEN7_STOLEN_RESERVED_ADDR_MASK (0x3FFF << 18)
@@ -2715,6 +2724,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define RING_INDIRECT_CTX_OFFSET(base) _MMIO((base) + 0x1c8) /* gen8+ */
 #define RING_CTX_TIMESTAMP(base)       _MMIO((base) + 0x3a8) /* gen8+ */
 
+#define VDBOX_CGCTL3F10(base)          _MMIO((base) + 0x3f10)
+#define   IECPUNIT_CLKGATE_DIS         REG_BIT(22)
+
 #define ERROR_GEN6     _MMIO(0x40a0)
 #define GEN7_ERR_INT   _MMIO(0x44040)
 #define   ERR_INT_POISON               (1 << 31)
@@ -3781,8 +3793,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define CSHRDDR3CTL_DDR3       (1 << 2)
 
 /* 965 MCH register controlling DRAM channel configuration */
-#define C0DRB3                 _MMIO(MCHBAR_MIRROR_BASE + 0x206)
-#define C1DRB3                 _MMIO(MCHBAR_MIRROR_BASE + 0x606)
+#define C0DRB3_BW              _MMIO(MCHBAR_MIRROR_BASE + 0x206)
+#define C1DRB3_BW              _MMIO(MCHBAR_MIRROR_BASE + 0x606)
 
 /* snb MCH registers for reading the DRAM channel configuration */
 #define MAD_DIMM_C0                    _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5004)
@@ -12208,6 +12220,7 @@ enum skl_power_gate {
 #define GEN12_GLOBAL_MOCS(i)   _MMIO(0x4000 + (i) * 4) /* Global MOCS regs */
 
 #define GEN12_GSMBASE                  _MMIO(0x108100)
+#define GEN12_DSMBASE                  _MMIO(0x1080C0)
 
 /* gamt regs */
 #define GEN8_L3_LRA_1_GPGPU _MMIO(0x4dd4)
index bec9c36..970d8f4 100644 (file)
@@ -929,7 +929,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
        u32 seqno;
        int ret;
 
-       might_sleep_if(gfpflags_allow_blocking(gfp));
+       might_alloc(gfp);
 
        /* Check that the caller provided an already pinned context */
        __intel_context_pin(ce);
index 07490db..a6cd0fa 100644 (file)
@@ -27,6 +27,7 @@
 
 #include "display/intel_frontbuffer.h"
 
+#include "gem/i915_gem_lmem.h"
 #include "gt/intel_engine.h"
 #include "gt/intel_engine_heartbeat.h"
 #include "gt/intel_gt.h"
@@ -93,7 +94,6 @@ static int __i915_vma_active(struct i915_active *ref)
        return i915_vma_tryget(active_to_vma(ref)) ? 0 : -ENOENT;
 }
 
-__i915_active_call
 static void __i915_vma_retire(struct i915_active *ref)
 {
        i915_vma_put(active_to_vma(ref));
@@ -124,7 +124,7 @@ vma_create(struct drm_i915_gem_object *obj,
        vma->size = obj->base.size;
        vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
 
-       i915_active_init(&vma->active, __i915_vma_active, __i915_vma_retire);
+       i915_active_init(&vma->active, __i915_vma_active, __i915_vma_retire, 0);
 
        /* Declare ourselves safe for use inside shrinkers */
        if (IS_ENABLED(CONFIG_LOCKDEP)) {
@@ -448,9 +448,11 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
        void __iomem *ptr;
        int err;
 
-       if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) {
-               err = -ENODEV;
-               goto err;
+       if (!i915_gem_object_is_lmem(vma->obj)) {
+               if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) {
+                       err = -ENODEV;
+                       goto err;
+               }
        }
 
        GEM_BUG_ON(!i915_vma_is_ggtt(vma));
@@ -458,9 +460,19 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 
        ptr = READ_ONCE(vma->iomap);
        if (ptr == NULL) {
-               ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->iomap,
-                                       vma->node.start,
-                                       vma->node.size);
+               /*
+                * TODO: consider just using i915_gem_object_pin_map() for lmem
+                * instead, which already supports mapping non-contiguous chunks
+                * of pages, that way we can also drop the
+                * I915_BO_ALLOC_CONTIGUOUS when allocating the object.
+                */
+               if (i915_gem_object_is_lmem(vma->obj))
+                       ptr = i915_gem_object_lmem_io_map(vma->obj, 0,
+                                                         vma->obj->base.size);
+               else
+                       ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->iomap,
+                                               vma->node.start,
+                                               vma->node.size);
                if (ptr == NULL) {
                        err = -ENOMEM;
                        goto err;
@@ -905,8 +917,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
                        if (err)
                                goto err_fence;
 
-                       err = i915_vm_pin_pt_stash(vma->vm,
-                                                  &work->stash);
+                       err = i915_vm_map_pt_stash(vma->vm, &work->stash);
                        if (err)
                                goto err_fence;
                }
index bf837b6..d98e8b8 100644 (file)
@@ -22,8 +22,28 @@ static const struct {
                .class = INTEL_MEMORY_STOLEN_SYSTEM,
                .instance = 0,
        },
+       [INTEL_REGION_STOLEN_LMEM] = {
+               .class = INTEL_MEMORY_STOLEN_LOCAL,
+               .instance = 0,
+       },
 };
 
+struct intel_memory_region *
+intel_memory_region_lookup(struct drm_i915_private *i915,
+                          u16 class, u16 instance)
+{
+       struct intel_memory_region *mr;
+       int id;
+
+       /* XXX: consider maybe converting to an rb tree at some point */
+       for_each_memory_region(mr, i915, id) {
+               if (mr->type == class && mr->instance == instance)
+                       return mr;
+       }
+
+       return NULL;
+}
+
 struct intel_memory_region *
 intel_memory_region_by_type(struct drm_i915_private *i915,
                            enum intel_memory_type mem_type)
@@ -278,8 +298,15 @@ int intel_memory_regions_hw_probe(struct drm_i915_private *i915)
                case INTEL_MEMORY_SYSTEM:
                        mem = i915_gem_shmem_setup(i915);
                        break;
+               case INTEL_MEMORY_STOLEN_LOCAL:
+                       mem = i915_gem_stolen_lmem_setup(i915);
+                       if (!IS_ERR(mem))
+                               i915->mm.stolen_region = mem;
+                       break;
                case INTEL_MEMORY_STOLEN_SYSTEM:
-                       mem = i915_gem_stolen_setup(i915);
+                       mem = i915_gem_stolen_smem_setup(i915);
+                       if (!IS_ERR(mem))
+                               i915->mm.stolen_region = mem;
                        break;
                default:
                        continue;
index edd4906..d24ce5a 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/mutex.h>
 #include <linux/io-mapping.h>
 #include <drm/drm_mm.h>
+#include <drm/i915_drm.h>
 
 #include "i915_buddy.h"
 
@@ -19,25 +20,25 @@ struct drm_i915_gem_object;
 struct intel_memory_region;
 struct sg_table;
 
-/**
- *  Base memory type
- */
 enum intel_memory_type {
-       INTEL_MEMORY_SYSTEM = 0,
-       INTEL_MEMORY_LOCAL,
+       INTEL_MEMORY_SYSTEM = I915_MEMORY_CLASS_SYSTEM,
+       INTEL_MEMORY_LOCAL = I915_MEMORY_CLASS_DEVICE,
        INTEL_MEMORY_STOLEN_SYSTEM,
+       INTEL_MEMORY_STOLEN_LOCAL,
 };
 
 enum intel_region_id {
        INTEL_REGION_SMEM = 0,
        INTEL_REGION_LMEM,
        INTEL_REGION_STOLEN_SMEM,
+       INTEL_REGION_STOLEN_LMEM,
        INTEL_REGION_UNKNOWN, /* Should be last */
 };
 
 #define REGION_SMEM     BIT(INTEL_REGION_SMEM)
 #define REGION_LMEM     BIT(INTEL_REGION_LMEM)
 #define REGION_STOLEN_SMEM   BIT(INTEL_REGION_STOLEN_SMEM)
+#define REGION_STOLEN_LMEM   BIT(INTEL_REGION_STOLEN_LMEM)
 
 #define I915_ALLOC_MIN_PAGE_SIZE  BIT(0)
 #define I915_ALLOC_CONTIGUOUS     BIT(1)
@@ -82,7 +83,8 @@ struct intel_memory_region {
        u16 type;
        u16 instance;
        enum intel_region_id id;
-       char name[8];
+       char name[16];
+       bool private; /* not for userspace */
 
        struct list_head reserved;
 
@@ -95,6 +97,10 @@ struct intel_memory_region {
        } objects;
 };
 
+struct intel_memory_region *
+intel_memory_region_lookup(struct drm_i915_private *i915,
+                          u16 class, u16 instance);
+
 int intel_memory_region_init_buddy(struct intel_memory_region *mem);
 void intel_memory_region_release_buddy(struct intel_memory_region *mem);
 
index ed5abe7..b4aaf8b 100644 (file)
@@ -1917,6 +1917,18 @@ int intel_uncore_init_mmio(struct intel_uncore *uncore)
        if (ret)
                return ret;
 
+       /*
+        * The boot firmware initializes local memory and assesses its health.
+        * If memory training fails, the punit will have been instructed to
+        * keep the GT powered down; we won't be able to communicate with it
+        * and we should not continue with driver initialization.
+        */
+       if (IS_DGFX(i915) &&
+           !(__raw_uncore_read32(uncore, GU_CNTL) & LMEM_INIT)) {
+               drm_err(&i915->drm, "LMEM not initialized by firmware\n");
+               return -ENODEV;
+       }
+
        if (INTEL_GEN(i915) > 5 && !intel_vgpu_active(i915))
                uncore->flags |= UNCORE_HAS_FORCEWAKE;
 
index 4002c98..61bf456 100644 (file)
@@ -68,7 +68,7 @@ static struct live_active *__live_alloc(struct drm_i915_private *i915)
                return NULL;
 
        kref_init(&active->ref);
-       i915_active_init(&active->base, __live_active, __live_retire);
+       i915_active_init(&active->base, __live_active, __live_retire, 0);
 
        return active;
 }
index dc394fb..152d9ab 100644 (file)
@@ -87,14 +87,14 @@ static void simulate_hibernate(struct drm_i915_private *i915)
        intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 }
 
-static int pm_prepare(struct drm_i915_private *i915)
+static int igt_pm_prepare(struct drm_i915_private *i915)
 {
        i915_gem_suspend(i915);
 
        return 0;
 }
 
-static void pm_suspend(struct drm_i915_private *i915)
+static void igt_pm_suspend(struct drm_i915_private *i915)
 {
        intel_wakeref_t wakeref;
 
@@ -104,7 +104,7 @@ static void pm_suspend(struct drm_i915_private *i915)
        }
 }
 
-static void pm_hibernate(struct drm_i915_private *i915)
+static void igt_pm_hibernate(struct drm_i915_private *i915)
 {
        intel_wakeref_t wakeref;
 
@@ -116,7 +116,7 @@ static void pm_hibernate(struct drm_i915_private *i915)
        }
 }
 
-static void pm_resume(struct drm_i915_private *i915)
+static void igt_pm_resume(struct drm_i915_private *i915)
 {
        intel_wakeref_t wakeref;
 
@@ -148,16 +148,16 @@ static int igt_gem_suspend(void *arg)
        if (err)
                goto out;
 
-       err = pm_prepare(i915);
+       err = igt_pm_prepare(i915);
        if (err)
                goto out;
 
-       pm_suspend(i915);
+       igt_pm_suspend(i915);
 
        /* Here be dragons! Note that with S3RST any S3 may become S4! */
        simulate_hibernate(i915);
 
-       pm_resume(i915);
+       igt_pm_resume(i915);
 
        err = switch_to_context(ctx);
 out:
@@ -183,16 +183,16 @@ static int igt_gem_hibernate(void *arg)
        if (err)
                goto out;
 
-       err = pm_prepare(i915);
+       err = igt_pm_prepare(i915);
        if (err)
                goto out;
 
-       pm_hibernate(i915);
+       igt_pm_hibernate(i915);
 
        /* Here be dragons! */
        simulate_hibernate(i915);
 
-       pm_resume(i915);
+       igt_pm_resume(i915);
 
        err = switch_to_context(ctx);
 out:
index 45c6c01..0a1472b 100644 (file)
@@ -186,7 +186,7 @@ retry:
                if (err)
                        goto err_ppgtt_cleanup;
 
-               err = i915_vm_pin_pt_stash(&ppgtt->vm, &stash);
+               err = i915_vm_map_pt_stash(&ppgtt->vm, &stash);
                if (err) {
                        i915_vm_free_pt_stash(&ppgtt->vm, &stash);
                        goto err_ppgtt_cleanup;
@@ -208,7 +208,7 @@ retry:
                if (err)
                        goto err_ppgtt_cleanup;
 
-               err = i915_vm_pin_pt_stash(&ppgtt->vm, &stash);
+               err = i915_vm_map_pt_stash(&ppgtt->vm, &stash);
                if (err) {
                        i915_vm_free_pt_stash(&ppgtt->vm, &stash);
                        goto err_ppgtt_cleanup;
@@ -325,11 +325,10 @@ retry:
                                                           BIT_ULL(size)))
                                        goto alloc_vm_end;
 
-                               err = i915_vm_pin_pt_stash(vm, &stash);
+                               err = i915_vm_map_pt_stash(vm, &stash);
                                if (!err)
                                        vm->allocate_va_range(vm, &stash,
                                                              addr, BIT_ULL(size));
-
                                i915_vm_free_pt_stash(vm, &stash);
 alloc_vm_end:
                                if (err == -EDEADLK) {
@@ -1968,10 +1967,9 @@ retry:
                        if (err)
                                goto end_ww;
 
-                       err = i915_vm_pin_pt_stash(vm, &stash);
+                       err = i915_vm_map_pt_stash(vm, &stash);
                        if (!err)
                                vm->allocate_va_range(vm, &stash, offset, chunk_size);
-
                        i915_vm_free_pt_stash(vm, &stash);
 end_ww:
                        if (err == -EDEADLK) {
index e9d86da..bfb0290 100644 (file)
@@ -307,7 +307,7 @@ static int live_noa_gpr(void *arg)
        }
 
        /* Poison the ce->vm so we detect writes not to the GGTT gt->scratch */
-       scratch = kmap(__px_page(ce->vm->scratch[0]));
+       scratch = __px_vaddr(ce->vm->scratch[0]);
        memset(scratch, POISON_FREE, PAGE_SIZE);
 
        rq = intel_context_create_request(ce);
@@ -405,7 +405,6 @@ static int live_noa_gpr(void *arg)
 out_rq:
        i915_request_put(rq);
 out_ce:
-       kunmap(__px_page(ce->vm->scratch[0]));
        intel_context_put(ce);
 out:
        stream_destroy(stream);
index 5fe7b80..dd06072 100644 (file)
@@ -967,6 +967,9 @@ static int igt_vma_remapped_gtt(void *arg)
        intel_wakeref_t wakeref;
        int err = 0;
 
+       if (!i915_ggtt_has_aperture(&i915->ggtt))
+               return 0;
+
        obj = i915_gem_object_create_internal(i915, 10 * 10 * PAGE_SIZE);
        if (IS_ERR(obj))
                return PTR_ERR(obj);
index cfbbe41..5fe397b 100644 (file)
@@ -94,9 +94,9 @@ int igt_spinner_pin(struct igt_spinner *spin,
        }
 
        if (!spin->batch) {
-               unsigned int mode =
-                       i915_coherent_map_type(spin->gt->i915);
+               unsigned int mode;
 
+               mode = i915_coherent_map_type(spin->gt->i915, spin->obj, false);
                vaddr = igt_spinner_pin_obj(ce, ww, spin->obj, mode, &spin->batch_vma);
                if (IS_ERR(vaddr))
                        return PTR_ERR(vaddr);
index a5fc0bf..f85fd8c 100644 (file)
@@ -513,7 +513,7 @@ static int igt_cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
        if (err)
                return err;
 
-       ptr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
+       ptr = i915_gem_object_pin_map(obj, I915_MAP_WC);
        if (IS_ERR(ptr))
                return PTR_ERR(ptr);
 
@@ -593,7 +593,9 @@ static int igt_gpu_write(struct i915_gem_context *ctx,
                if (err)
                        break;
 
+               i915_gem_object_lock(obj, NULL);
                err = igt_cpu_check(obj, dword, rng);
+               i915_gem_object_unlock(obj);
                if (err)
                        break;
        } while (!__igt_timeout(end_time, NULL));
@@ -629,6 +631,88 @@ out_put:
        return err;
 }
 
+static int igt_lmem_create_cleared_cpu(void *arg)
+{
+       struct drm_i915_private *i915 = arg;
+       I915_RND_STATE(prng);
+       IGT_TIMEOUT(end_time);
+       u32 size, i;
+       int err;
+
+       i915_gem_drain_freed_objects(i915);
+
+       size = max_t(u32, PAGE_SIZE, i915_prandom_u32_max_state(SZ_32M, &prng));
+       size = round_up(size, PAGE_SIZE);
+       i = 0;
+
+       do {
+               struct drm_i915_gem_object *obj;
+               unsigned int flags;
+               u32 dword, val;
+               void *vaddr;
+
+               /*
+                * Alternate between cleared and uncleared allocations, while
+                * also dirtying the pages each time to check that the pages are
+                * always cleared if requested, since we should get some overlap
+                * of the underlying pages, if not all, since we are the only
+                * user.
+                */
+
+               flags = I915_BO_ALLOC_CPU_CLEAR;
+               if (i & 1)
+                       flags = 0;
+
+               obj = i915_gem_object_create_lmem(i915, size, flags);
+               if (IS_ERR(obj))
+                       return PTR_ERR(obj);
+
+               i915_gem_object_lock(obj, NULL);
+               err = i915_gem_object_pin_pages(obj);
+               if (err)
+                       goto out_put;
+
+               dword = i915_prandom_u32_max_state(PAGE_SIZE / sizeof(u32),
+                                                  &prng);
+
+               if (flags & I915_BO_ALLOC_CPU_CLEAR) {
+                       err = igt_cpu_check(obj, dword, 0);
+                       if (err) {
+                               pr_err("%s failed with size=%u, flags=%u\n",
+                                      __func__, size, flags);
+                               goto out_unpin;
+                       }
+               }
+
+               vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
+               if (IS_ERR(vaddr)) {
+                       err = PTR_ERR(vaddr);
+                       goto out_unpin;
+               }
+
+               val = prandom_u32_state(&prng);
+
+               memset32(vaddr, val, obj->base.size / sizeof(u32));
+
+               i915_gem_object_flush_map(obj);
+               i915_gem_object_unpin_map(obj);
+out_unpin:
+               i915_gem_object_unpin_pages(obj);
+               __i915_gem_object_put_pages(obj);
+out_put:
+               i915_gem_object_unlock(obj);
+               i915_gem_object_put(obj);
+
+               if (err)
+                       break;
+               ++i;
+       } while (!__igt_timeout(end_time, NULL));
+
+       pr_info("%s completed (%u) iterations\n", __func__, i);
+
+       return err;
+}
+
 static int igt_lmem_write_gpu(void *arg)
 {
        struct drm_i915_private *i915 = arg;
@@ -1043,6 +1127,7 @@ int intel_memory_region_live_selftests(struct drm_i915_private *i915)
 {
        static const struct i915_subtest tests[] = {
                SUBTEST(igt_lmem_create),
+               SUBTEST(igt_lmem_create_cleared_cpu),
                SUBTEST(igt_lmem_write_cpu),
                SUBTEST(igt_lmem_write_gpu),
        };
index 58710ac..eb03b5b 100644 (file)
@@ -5,8 +5,18 @@
 
 #include <asm/msr.h>
 
+#include "i915_drv.h"
 #include "librapl.h"
 
+bool librapl_supported(const struct drm_i915_private *i915)
+{
+       /* Discrete cards require hwmon integration */
+       if (IS_DGFX(i915))
+               return false;
+
+       return librapl_energy_uJ();
+}
+
 u64 librapl_energy_uJ(void)
 {
        unsigned long long power;
index 887f3e9..e3b24fa 100644 (file)
@@ -8,6 +8,10 @@
 
 #include <linux/types.h>
 
+struct drm_i915_private;
+
+bool librapl_supported(const struct drm_i915_private *i915);
+
 u64 librapl_energy_uJ(void);
 
 #endif /* SELFTEST_LIBRAPL_H */
index ddc47bb..c2c7759 100644 (file)
@@ -62,8 +62,8 @@ extern "C" {
 #define I915_ERROR_UEVENT              "ERROR"
 #define I915_RESET_UEVENT              "RESET"
 
-/*
- * i915_user_extension: Base class for defining a chain of extensions
+/**
+ * struct i915_user_extension - Base class for defining a chain of extensions
  *
  * Many interfaces need to grow over time. In most cases we can simply
  * extend the struct and have userspace pass in more data. Another option,
@@ -76,12 +76,58 @@ extern "C" {
  * increasing complexity, and for large parts of that interface to be
  * entirely optional. The downside is more pointer chasing; chasing across
  * the __user boundary with pointers encapsulated inside u64.
+ *
+ * Example chaining:
+ *
+ * .. code-block:: C
+ *
+ *     struct i915_user_extension ext3 {
+ *             .next_extension = 0, // end
+ *             .name = ...,
+ *     };
+ *     struct i915_user_extension ext2 {
+ *             .next_extension = (uintptr_t)&ext3,
+ *             .name = ...,
+ *     };
+ *     struct i915_user_extension ext1 {
+ *             .next_extension = (uintptr_t)&ext2,
+ *             .name = ...,
+ *     };
+ *
+ * Typically the struct i915_user_extension would be embedded in some uAPI
+ * struct, and in this case we would feed it the head of the chain(i.e ext1),
+ * which would then apply all of the above extensions.
+ *
  */
 struct i915_user_extension {
+       /**
+        * @next_extension:
+        *
+        * Pointer to the next struct i915_user_extension, or zero if the end.
+        */
        __u64 next_extension;
+       /**
+        * @name: Name of the extension.
+        *
+        * Note that the name here is just some integer.
+        *
+        * Also note that the name space for this is not global for the whole
+        * driver, but rather its scope/meaning is limited to the specific piece
+        * of uAPI which has embedded the struct i915_user_extension.
+        */
        __u32 name;
-       __u32 flags; /* All undefined bits must be zero. */
-       __u32 rsvd[4]; /* Reserved for future use; must be zero. */
+       /**
+        * @flags: MBZ
+        *
+        * All undefined bits must be zero.
+        */
+       __u32 flags;
+       /**
+        * @rsvd: MBZ
+        *
+        * Reserved for future use; must be zero.
+        */
+       __u32 rsvd[4];
 };
 
 /*
@@ -360,6 +406,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_QUERY                 0x39
 #define DRM_I915_GEM_VM_CREATE         0x3a
 #define DRM_I915_GEM_VM_DESTROY                0x3b
+#define DRM_I915_GEM_CREATE_EXT                0x3c
 /* Must be kept compact -- no holes */
 
 #define DRM_IOCTL_I915_INIT            DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
@@ -392,6 +439,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_ENTERVT     DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_ENTERVT)
 #define DRM_IOCTL_I915_GEM_LEAVEVT     DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_LEAVEVT)
 #define DRM_IOCTL_I915_GEM_CREATE      DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE, struct drm_i915_gem_create)
+#define DRM_IOCTL_I915_GEM_CREATE_EXT  DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE_EXT, struct drm_i915_gem_create_ext)
 #define DRM_IOCTL_I915_GEM_PREAD       DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PREAD, struct drm_i915_gem_pread)
 #define DRM_IOCTL_I915_GEM_PWRITE      DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PWRITE, struct drm_i915_gem_pwrite)
 #define DRM_IOCTL_I915_GEM_MMAP                DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct drm_i915_gem_mmap)
@@ -1054,12 +1102,12 @@ struct drm_i915_gem_exec_fence {
        __u32 flags;
 };
 
-/**
+/*
  * See drm_i915_gem_execbuffer_ext_timeline_fences.
  */
 #define DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES 0
 
-/**
+/*
  * This structure describes an array of drm_syncobj and associated points for
  * timeline variants of drm_syncobj. It is invalid to append this structure to
  * the execbuf if I915_EXEC_FENCE_ARRAY is set.
@@ -1700,7 +1748,7 @@ struct drm_i915_gem_context_param {
        __u64 value;
 };
 
-/**
+/*
  * Context SSEU programming
  *
  * It may be necessary for either functional or performance reason to configure
@@ -2067,7 +2115,7 @@ struct drm_i915_perf_open_param {
        __u64 properties_ptr;
 };
 
-/**
+/*
  * Enable data capture for a stream that was either opened in a disabled state
  * via I915_PERF_FLAG_DISABLED or was later disabled via
  * I915_PERF_IOCTL_DISABLE.
@@ -2081,7 +2129,7 @@ struct drm_i915_perf_open_param {
  */
 #define I915_PERF_IOCTL_ENABLE _IO('i', 0x0)
 
-/**
+/*
  * Disable data capture for a stream.
  *
  * It is an error to try and read a stream that is disabled.
@@ -2090,7 +2138,7 @@ struct drm_i915_perf_open_param {
  */
 #define I915_PERF_IOCTL_DISABLE        _IO('i', 0x1)
 
-/**
+/*
  * Change metrics_set captured by a stream.
  *
  * If the stream is bound to a specific context, the configuration change
@@ -2103,7 +2151,7 @@ struct drm_i915_perf_open_param {
  */
 #define I915_PERF_IOCTL_CONFIG _IO('i', 0x2)
 
-/**
+/*
  * Common to all i915 perf records
  */
 struct drm_i915_perf_record_header {
@@ -2151,7 +2199,7 @@ enum drm_i915_perf_record_type {
        DRM_I915_PERF_RECORD_MAX /* non-ABI */
 };
 
-/**
+/*
  * Structure to upload perf dynamic configuration into the kernel.
  */
 struct drm_i915_perf_oa_config {
@@ -2172,53 +2220,95 @@ struct drm_i915_perf_oa_config {
        __u64 flex_regs_ptr;
 };
 
+/**
+ * struct drm_i915_query_item - An individual query for the kernel to process.
+ *
+ * The behaviour is determined by the @query_id. Note that exactly what
+ * @data_ptr is also depends on the specific @query_id.
+ */
 struct drm_i915_query_item {
+       /** @query_id: The id for this query */
        __u64 query_id;
 #define DRM_I915_QUERY_TOPOLOGY_INFO    1
 #define DRM_I915_QUERY_ENGINE_INFO     2
 #define DRM_I915_QUERY_PERF_CONFIG      3
+#define DRM_I915_QUERY_MEMORY_REGIONS   4
 /* Must be kept compact -- no holes and well documented */
 
-       /*
+       /**
+        * @length:
+        *
         * When set to zero by userspace, this is filled with the size of the
-        * data to be written at the data_ptr pointer. The kernel sets this
+        * data to be written at the @data_ptr pointer. The kernel sets this
         * value to a negative value to signal an error on a particular query
         * item.
         */
        __s32 length;
 
-       /*
+       /**
+        * @flags:
+        *
         * When query_id == DRM_I915_QUERY_TOPOLOGY_INFO, must be 0.
         *
         * When query_id == DRM_I915_QUERY_PERF_CONFIG, must be one of the
-        * following :
-        *         - DRM_I915_QUERY_PERF_CONFIG_LIST
-        *         - DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID
-        *         - DRM_I915_QUERY_PERF_CONFIG_FOR_UUID
+        * following:
+        *
+        *      - DRM_I915_QUERY_PERF_CONFIG_LIST
+        *      - DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID
+        *      - DRM_I915_QUERY_PERF_CONFIG_FOR_UUID
         */
        __u32 flags;
 #define DRM_I915_QUERY_PERF_CONFIG_LIST          1
 #define DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID 2
 #define DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID   3
 
-       /*
-        * Data will be written at the location pointed by data_ptr when the
-        * value of length matches the length of the data to be written by the
+       /**
+        * @data_ptr:
+        *
+        * Data will be written at the location pointed by @data_ptr when the
+        * value of @length matches the length of the data to be written by the
         * kernel.
         */
        __u64 data_ptr;
 };
 
+/**
+ * struct drm_i915_query - Supply an array of struct drm_i915_query_item for the
+ * kernel to fill out.
+ *
+ * Note that this is generally a two step process for each struct
+ * drm_i915_query_item in the array:
+ *
+ * 1. Call the DRM_IOCTL_I915_QUERY, giving it our array of struct
+ *    drm_i915_query_item, with &drm_i915_query_item.length set to zero. The
+ *    kernel will then fill in the size, in bytes, which tells userspace how
+ *    memory it needs to allocate for the blob(say for an array of properties).
+ *
+ * 2. Next we call DRM_IOCTL_I915_QUERY again, this time with the
+ *    &drm_i915_query_item.data_ptr equal to our newly allocated blob. Note that
+ *    the &drm_i915_query_item.length should still be the same as what the
+ *    kernel previously set. At this point the kernel can fill in the blob.
+ *
+ * Note that for some query items it can make sense for userspace to just pass
+ * in a buffer/blob equal to or larger than the required size. In this case only
+ * a single ioctl call is needed. For some smaller query items this can work
+ * quite well.
+ *
+ */
 struct drm_i915_query {
+       /** @num_items: The number of elements in the @items_ptr array */
        __u32 num_items;
 
-       /*
-        * Unused for now. Must be cleared to zero.
+       /**
+        * @flags: Unused for now. Must be cleared to zero.
         */
        __u32 flags;
 
-       /*
-        * This points to an array of num_items drm_i915_query_item structures.
+       /**
+        * @items_ptr:
+        *
+        * Pointer to an array of struct drm_i915_query_item. The number of
+        * array elements is @num_items.
         */
        __u64 items_ptr;
 };
@@ -2292,21 +2382,21 @@ struct drm_i915_query_topology_info {
  * Describes one engine and it's capabilities as known to the driver.
  */
 struct drm_i915_engine_info {
-       /** Engine class and instance. */
+       /** @engine: Engine class and instance. */
        struct i915_engine_class_instance engine;
 
-       /** Reserved field. */
+       /** @rsvd0: Reserved field. */
        __u32 rsvd0;
 
-       /** Engine flags. */
+       /** @flags: Engine flags. */
        __u64 flags;
 
-       /** Capabilities of this engine. */
+       /** @capabilities: Capabilities of this engine. */
        __u64 capabilities;
 #define I915_VIDEO_CLASS_CAPABILITY_HEVC               (1 << 0)
 #define I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC    (1 << 1)
 
-       /** Reserved fields. */
+       /** @rsvd1: Reserved fields. */
        __u64 rsvd1[4];
 };
 
@@ -2317,13 +2407,13 @@ struct drm_i915_engine_info {
  * an array of struct drm_i915_engine_info structures.
  */
 struct drm_i915_query_engine_info {
-       /** Number of struct drm_i915_engine_info structs following. */
+       /** @num_engines: Number of struct drm_i915_engine_info structs following. */
        __u32 num_engines;
 
-       /** MBZ */
+       /** @rsvd: MBZ */
        __u32 rsvd[3];
 
-       /** Marker for drm_i915_engine_info structures. */
+       /** @engines: Marker for drm_i915_engine_info structures. */
        struct drm_i915_engine_info engines[];
 };
 
@@ -2377,6 +2467,241 @@ struct drm_i915_query_perf_config {
        __u8 data[];
 };
 
+/**
+ * enum drm_i915_gem_memory_class - Supported memory classes
+ */
+enum drm_i915_gem_memory_class {
+       /** @I915_MEMORY_CLASS_SYSTEM: System memory */
+       I915_MEMORY_CLASS_SYSTEM = 0,
+       /** @I915_MEMORY_CLASS_DEVICE: Device local-memory */
+       I915_MEMORY_CLASS_DEVICE,
+};
+
+/**
+ * struct drm_i915_gem_memory_class_instance - Identify particular memory region
+ */
+struct drm_i915_gem_memory_class_instance {
+       /** @memory_class: See enum drm_i915_gem_memory_class */
+       __u16 memory_class;
+
+       /** @memory_instance: Which instance */
+       __u16 memory_instance;
+};
+
+/**
+ * struct drm_i915_memory_region_info - Describes one region as known to the
+ * driver.
+ *
+ * Note that we reserve some stuff here for potential future work. As an example
+ * we might want expose the capabilities for a given region, which could include
+ * things like if the region is CPU mappable/accessible, what are the supported
+ * mapping types etc.
+ *
+ * Note that to extend struct drm_i915_memory_region_info and struct
+ * drm_i915_query_memory_regions in the future the plan is to do the following:
+ *
+ * .. code-block:: C
+ *
+ *     struct drm_i915_memory_region_info {
+ *             struct drm_i915_gem_memory_class_instance region;
+ *             union {
+ *                     __u32 rsvd0;
+ *                     __u32 new_thing1;
+ *             };
+ *             ...
+ *             union {
+ *                     __u64 rsvd1[8];
+ *                     struct {
+ *                             __u64 new_thing2;
+ *                             __u64 new_thing3;
+ *                             ...
+ *                     };
+ *             };
+ *     };
+ *
+ * With this things should remain source compatible between versions for
+ * userspace, even as we add new fields.
+ *
+ * Note this is using both struct drm_i915_query_item and struct drm_i915_query.
+ * For this new query we are adding the new query id DRM_I915_QUERY_MEMORY_REGIONS
+ * at &drm_i915_query_item.query_id.
+ */
+struct drm_i915_memory_region_info {
+       /** @region: The class:instance pair encoding */
+       struct drm_i915_gem_memory_class_instance region;
+
+       /** @rsvd0: MBZ */
+       __u32 rsvd0;
+
+       /** @probed_size: Memory probed by the driver (-1 = unknown) */
+       __u64 probed_size;
+
+       /** @unallocated_size: Estimate of memory remaining (-1 = unknown) */
+       __u64 unallocated_size;
+
+       /** @rsvd1: MBZ */
+       __u64 rsvd1[8];
+};
+
+/**
+ * struct drm_i915_query_memory_regions
+ *
+ * The region info query enumerates all regions known to the driver by filling
+ * in an array of struct drm_i915_memory_region_info structures.
+ *
+ * Example for getting the list of supported regions:
+ *
+ * .. code-block:: C
+ *
+ *     struct drm_i915_query_memory_regions *info;
+ *     struct drm_i915_query_item item = {
+ *             .query_id = DRM_I915_QUERY_MEMORY_REGIONS;
+ *     };
+ *     struct drm_i915_query query = {
+ *             .num_items = 1,
+ *             .items_ptr = (uintptr_t)&item,
+ *     };
+ *     int err, i;
+ *
+ *     // First query the size of the blob we need, this needs to be large
+ *     // enough to hold our array of regions. The kernel will fill out the
+ *     // item.length for us, which is the number of bytes we need.
+ *     err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query);
+ *     if (err) ...
+ *
+ *     info = calloc(1, item.length);
+ *     // Now that we allocated the required number of bytes, we call the ioctl
+ *     // again, this time with the data_ptr pointing to our newly allocated
+ *     // blob, which the kernel can then populate with the all the region info.
+ *     item.data_ptr = (uintptr_t)&info,
+ *
+ *     err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query);
+ *     if (err) ...
+ *
+ *     // We can now access each region in the array
+ *     for (i = 0; i < info->num_regions; i++) {
+ *             struct drm_i915_memory_region_info mr = info->regions[i];
+ *             u16 class = mr.region.class;
+ *             u16 instance = mr.region.instance;
+ *
+ *             ....
+ *     }
+ *
+ *     free(info);
+ */
+struct drm_i915_query_memory_regions {
+       /** @num_regions: Number of supported regions */
+       __u32 num_regions;
+
+       /** @rsvd: MBZ */
+       __u32 rsvd[3];
+
+       /** @regions: Info about each supported region */
+       struct drm_i915_memory_region_info regions[];
+};
+
+/**
+ * struct drm_i915_gem_create_ext - Existing gem_create behaviour, with added
+ * extension support using struct i915_user_extension.
+ *
+ * Note that in the future we want to have our buffer flags here, at least for
+ * the stuff that is immutable. Previously we would have two ioctls, one to
+ * create the object with gem_create, and another to apply various parameters,
+ * however this creates some ambiguity for the params which are considered
+ * immutable. Also in general we're phasing out the various SET/GET ioctls.
+ */
+struct drm_i915_gem_create_ext {
+       /**
+        * @size: Requested size for the object.
+        *
+        * The (page-aligned) allocated size for the object will be returned.
+        *
+        * Note that for some devices we have might have further minimum
+        * page-size restrictions(larger than 4K), like for device local-memory.
+        * However in general the final size here should always reflect any
+        * rounding up, if for example using the I915_GEM_CREATE_EXT_MEMORY_REGIONS
+        * extension to place the object in device local-memory.
+        */
+       __u64 size;
+       /**
+        * @handle: Returned handle for the object.
+        *
+        * Object handles are nonzero.
+        */
+       __u32 handle;
+       /** @flags: MBZ */
+       __u32 flags;
+       /**
+        * @extensions: The chain of extensions to apply to this object.
+        *
+        * This will be useful in the future when we need to support several
+        * different extensions, and we need to apply more than one when
+        * creating the object. See struct i915_user_extension.
+        *
+        * If we don't supply any extensions then we get the same old gem_create
+        * behaviour.
+        *
+        * For I915_GEM_CREATE_EXT_MEMORY_REGIONS usage see
+        * struct drm_i915_gem_create_ext_memory_regions.
+        */
+#define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0
+       __u64 extensions;
+};
+
+/**
+ * struct drm_i915_gem_create_ext_memory_regions - The
+ * I915_GEM_CREATE_EXT_MEMORY_REGIONS extension.
+ *
+ * Set the object with the desired set of placements/regions in priority
+ * order. Each entry must be unique and supported by the device.
+ *
+ * This is provided as an array of struct drm_i915_gem_memory_class_instance, or
+ * an equivalent layout of class:instance pair encodings. See struct
+ * drm_i915_query_memory_regions and DRM_I915_QUERY_MEMORY_REGIONS for how to
+ * query the supported regions for a device.
+ *
+ * As an example, on discrete devices, if we wish to set the placement as
+ * device local-memory we can do something like:
+ *
+ * .. code-block:: C
+ *
+ *     struct drm_i915_gem_memory_class_instance region_lmem = {
+ *              .memory_class = I915_MEMORY_CLASS_DEVICE,
+ *              .memory_instance = 0,
+ *      };
+ *      struct drm_i915_gem_create_ext_memory_regions regions = {
+ *              .base = { .name = I915_GEM_CREATE_EXT_MEMORY_REGIONS },
+ *              .regions = (uintptr_t)&region_lmem,
+ *              .num_regions = 1,
+ *      };
+ *      struct drm_i915_gem_create_ext create_ext = {
+ *              .size = 16 * PAGE_SIZE,
+ *              .extensions = (uintptr_t)&regions,
+ *      };
+ *
+ *      int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext);
+ *      if (err) ...
+ *
+ * At which point we get the object handle in &drm_i915_gem_create_ext.handle,
+ * along with the final object size in &drm_i915_gem_create_ext.size, which
+ * should account for any rounding up, if required.
+ */
+struct drm_i915_gem_create_ext_memory_regions {
+       /** @base: Extension link. See struct i915_user_extension. */
+       struct i915_user_extension base;
+
+       /** @pad: MBZ */
+       __u32 pad;
+       /** @num_regions: Number of elements in the @regions array. */
+       __u32 num_regions;
+       /**
+        * @regions: The regions/placements array.
+        *
+        * An array of struct drm_i915_gem_memory_class_instance.
+        */
+       __u64 regions;
+};
+
 #if defined(__cplusplus)
 }
 #endif