drm/xe: Decouple TLB invalidations from GT

author Matthew Brost <matthew.brost@intel.com>

Tue, 26 Aug 2025 18:29:08 +0000 (18:29 +0000)

committer Matthew Brost <matthew.brost@intel.com>

Wed, 27 Aug 2025 18:49:18 +0000 (11:49 -0700)
author Matthew Brost <matthew.brost@intel.com>
Tue, 26 Aug 2025 18:29:08 +0000 (18:29 +0000)
committer Matthew Brost <matthew.brost@intel.com>
Wed, 27 Aug 2025 18:49:18 +0000 (11:49 -0700)
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile

index a1e4b3b..2b67deb 100644 (file)
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -61,8 +61,6 @@ xe-y += xe_bb.o \
         xe_gt_pagefault.o \
         xe_gt_sysfs.o \
         xe_gt_throttle.o \
-       xe_gt_tlb_inval.o \
-       xe_gt_tlb_inval_job.o \
         xe_gt_topology.o \
         xe_guc.o \
         xe_guc_ads.o \
@@ -119,6 +117,8 @@ xe-y += xe_bb.o \
         xe_sync.o \
         xe_tile.o \
         xe_tile_sysfs.o \
+       xe_tlb_inval.o \
+       xe_tlb_inval_job.o \
         xe_trace.o \
         xe_trace_bo.o \
         xe_trace_guc.o \
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c

index c3e46c2..71c7690 100644 (file)
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -23,13 +23,13 @@
  #include "xe_device.h"
  #include "xe_gt.h"
  #include "xe_gt_printk.h"
-#include "xe_gt_tlb_inval.h"
  #include "xe_map.h"
  #include "xe_mmio.h"
  #include "xe_pm.h"
  #include "xe_res_cursor.h"
  #include "xe_sriov.h"
  #include "xe_tile_sriov_vf.h"
+#include "xe_tlb_inval.h"
  #include "xe_wa.h"
  #include "xe_wopcm.h"
  
@@ -438,7 +438,7 @@ static void ggtt_invalidate_gt_tlb(struct xe_gt *gt)
         if (!gt)
                 return;
  
-       err = xe_gt_tlb_inval_ggtt(gt);
+       err = xe_tlb_inval_ggtt(&gt->tlb_inval);
         xe_gt_WARN(gt, err, "Failed to invalidate GGTT (%pe)", ERR_PTR(err));
  }
  
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c

index 9a46397..67ee7cd 100644 (file)
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -37,7 +37,6 @@
  #include "xe_gt_sriov_pf.h"
  #include "xe_gt_sriov_vf.h"
  #include "xe_gt_sysfs.h"
-#include "xe_gt_tlb_inval.h"
  #include "xe_gt_topology.h"
  #include "xe_guc_exec_queue_types.h"
  #include "xe_guc_pc.h"
@@ -58,6 +57,7 @@
  #include "xe_sa.h"
  #include "xe_sched_job.h"
  #include "xe_sriov.h"
+#include "xe_tlb_inval.h"
  #include "xe_tuning.h"
  #include "xe_uc.h"
  #include "xe_uc_fw.h"
@@ -852,7 +852,7 @@ static int gt_reset(struct xe_gt *gt)
  
         xe_uc_stop(&gt->uc);
  
-       xe_gt_tlb_inval_reset(gt);
+       xe_tlb_inval_reset(&gt->tlb_inval);
  
         err = do_gt_reset(gt);
         if (err)
@@ -1066,5 +1066,5 @@ void xe_gt_declare_wedged(struct xe_gt *gt)
         xe_gt_assert(gt, gt_to_xe(gt)->wedged.mode);
  
         xe_uc_declare_wedged(&gt->uc);
-       xe_gt_tlb_inval_reset(gt);
+       xe_tlb_inval_reset(&gt->tlb_inval);
  }
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_inval.c b/drivers/gpu/drm/xe/xe_gt_tlb_inval.c

deleted file mode 100644 (file)

index 37b3b45..0000000
--- a/drivers/gpu/drm/xe/xe_gt_tlb_inval.c
+++ /dev/null
@@ -1,632 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2023 Intel Corporation
- */
-
-#include <drm/drm_managed.h>
-
-#include "abi/guc_actions_abi.h"
-#include "xe_device.h"
-#include "xe_force_wake.h"
-#include "xe_gt.h"
-#include "xe_gt_printk.h"
-#include "xe_guc.h"
-#include "xe_guc_ct.h"
-#include "xe_gt_stats.h"
-#include "xe_gt_tlb_inval.h"
-#include "xe_mmio.h"
-#include "xe_pm.h"
-#include "xe_sriov.h"
-#include "xe_trace.h"
-#include "regs/xe_guc_regs.h"
-
-#define FENCE_STACK_BIT                DMA_FENCE_FLAG_USER_BITS
-
-/*
- * TLB inval depends on pending commands in the CT queue and then the real
- * invalidation time. Double up the time to process full CT queue
- * just to be on the safe side.
- */
-static long tlb_timeout_jiffies(struct xe_gt *gt)
-{
-       /* this reflects what HW/GuC needs to process TLB inv request */
-       const long hw_tlb_timeout = HZ / 4;
-
-       /* this estimates actual delay caused by the CTB transport */
-       long delay = xe_guc_ct_queue_proc_time_jiffies(&gt->uc.guc.ct);
-
-       return hw_tlb_timeout + 2 * delay;
-}
-
-static void xe_gt_tlb_inval_fence_fini(struct xe_gt_tlb_inval_fence *fence)
-{
-       if (WARN_ON_ONCE(!fence->gt))
-               return;
-
-       xe_pm_runtime_put(gt_to_xe(fence->gt));
-       fence->gt = NULL; /* fini() should be called once */
-}
-
-static void
-__inval_fence_signal(struct xe_device *xe, struct xe_gt_tlb_inval_fence *fence)
-{
-       bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags);
-
-       trace_xe_gt_tlb_inval_fence_signal(xe, fence);
-       xe_gt_tlb_inval_fence_fini(fence);
-       dma_fence_signal(&fence->base);
-       if (!stack)
-               dma_fence_put(&fence->base);
-}
-
-static void
-inval_fence_signal(struct xe_device *xe, struct xe_gt_tlb_inval_fence *fence)
-{
-       list_del(&fence->link);
-       __inval_fence_signal(xe, fence);
-}
-
-void xe_gt_tlb_inval_fence_signal(struct xe_gt_tlb_inval_fence *fence)
-{
-       if (WARN_ON_ONCE(!fence->gt))
-               return;
-
-       __inval_fence_signal(gt_to_xe(fence->gt), fence);
-}
-
-static void xe_gt_tlb_fence_timeout(struct work_struct *work)
-{
-       struct xe_gt *gt = container_of(work, struct xe_gt,
-                                       tlb_inval.fence_tdr.work);
-       struct xe_device *xe = gt_to_xe(gt);
-       struct xe_gt_tlb_inval_fence *fence, *next;
-
-       LNL_FLUSH_WORK(&gt->uc.guc.ct.g2h_worker);
-
-       spin_lock_irq(&gt->tlb_inval.pending_lock);
-       list_for_each_entry_safe(fence, next,
-                                &gt->tlb_inval.pending_fences, link) {
-               s64 since_inval_ms = ktime_ms_delta(ktime_get(),
-                                                   fence->inval_time);
-
-               if (msecs_to_jiffies(since_inval_ms) < tlb_timeout_jiffies(gt))
-                       break;
-
-               trace_xe_gt_tlb_inval_fence_timeout(xe, fence);
-               xe_gt_err(gt, "TLB invalidation fence timeout, seqno=%d recv=%d",
-                         fence->seqno, gt->tlb_inval.seqno_recv);
-
-               fence->base.error = -ETIME;
-               inval_fence_signal(xe, fence);
-       }
-       if (!list_empty(&gt->tlb_inval.pending_fences))
-               queue_delayed_work(system_wq,
-                                  &gt->tlb_inval.fence_tdr,
-                                  tlb_timeout_jiffies(gt));
-       spin_unlock_irq(&gt->tlb_inval.pending_lock);
-}
-
-/**
- * xe_gt_tlb_inval_init_early - Initialize GT TLB invalidation state
- * @gt: GT structure
- *
- * Initialize GT TLB invalidation state, purely software initialization, should
- * be called once during driver load.
- *
- * Return: 0 on success, negative error code on error.
- */
-int xe_gt_tlb_inval_init_early(struct xe_gt *gt)
-{
-       struct xe_device *xe = gt_to_xe(gt);
-       int err;
-
-       gt->tlb_inval.seqno = 1;
-       INIT_LIST_HEAD(&gt->tlb_inval.pending_fences);
-       spin_lock_init(&gt->tlb_inval.pending_lock);
-       spin_lock_init(&gt->tlb_inval.lock);
-       INIT_DELAYED_WORK(&gt->tlb_inval.fence_tdr,
-                         xe_gt_tlb_fence_timeout);
-
-       err = drmm_mutex_init(&xe->drm, &gt->tlb_inval.seqno_lock);
-       if (err)
-               return err;
-
-       gt->tlb_inval.job_wq =
-               drmm_alloc_ordered_workqueue(&gt_to_xe(gt)->drm, "gt-tbl-inval-job-wq",
-                                            WQ_MEM_RECLAIM);
-       if (IS_ERR(gt->tlb_inval.job_wq))
-               return PTR_ERR(gt->tlb_inval.job_wq);
-
-       return 0;
-}
-
-/**
- * xe_gt_tlb_inval_reset - Initialize GT TLB invalidation reset
- * @gt: GT structure
- *
- * Signal any pending invalidation fences, should be called during a GT reset
- */
-void xe_gt_tlb_inval_reset(struct xe_gt *gt)
-{
-       struct xe_gt_tlb_inval_fence *fence, *next;
-       int pending_seqno;
-
-       /*
-        * we can get here before the CTs are even initialized if we're wedging
-        * very early, in which case there are not going to be any pending
-        * fences so we can bail immediately.
-        */
-       if (!xe_guc_ct_initialized(&gt->uc.guc.ct))
-               return;
-
-       /*
-        * CT channel is already disabled at this point. No new TLB requests can
-        * appear.
-        */
-
-       mutex_lock(&gt->tlb_inval.seqno_lock);
-       spin_lock_irq(&gt->tlb_inval.pending_lock);
-       cancel_delayed_work(&gt->tlb_inval.fence_tdr);
-       /*
-        * We might have various kworkers waiting for TLB flushes to complete
-        * which are not tracked with an explicit TLB fence, however at this
-        * stage that will never happen since the CT is already disabled, so
-        * make sure we signal them here under the assumption that we have
-        * completed a full GT reset.
-        */
-       if (gt->tlb_inval.seqno == 1)
-               pending_seqno = TLB_INVALIDATION_SEQNO_MAX - 1;
-       else
-               pending_seqno = gt->tlb_inval.seqno - 1;
-       WRITE_ONCE(gt->tlb_inval.seqno_recv, pending_seqno);
-
-       list_for_each_entry_safe(fence, next,
-                                &gt->tlb_inval.pending_fences, link)
-               inval_fence_signal(gt_to_xe(gt), fence);
-       spin_unlock_irq(&gt->tlb_inval.pending_lock);
-       mutex_unlock(&gt->tlb_inval.seqno_lock);
-}
-
-/**
- *
- * xe_gt_tlb_inval_fini - Clean up GT TLB invalidation state
- *
- * Cancel pending fence workers and clean up any additional
- * GT TLB invalidation state.
- */
-void xe_gt_tlb_inval_fini(struct xe_gt *gt)
-{
-       xe_gt_tlb_inval_reset(gt);
-}
-
-static bool tlb_inval_seqno_past(struct xe_gt *gt, int seqno)
-{
-       int seqno_recv = READ_ONCE(gt->tlb_inval.seqno_recv);
-
-       if (seqno - seqno_recv < -(TLB_INVALIDATION_SEQNO_MAX / 2))
-               return false;
-
-       if (seqno - seqno_recv > (TLB_INVALIDATION_SEQNO_MAX / 2))
-               return true;
-
-       return seqno_recv >= seqno;
-}
-
-static int send_tlb_inval(struct xe_guc *guc,
-                         struct xe_gt_tlb_inval_fence *fence,
-                         u32 *action, int len)
-{
-       struct xe_gt *gt = guc_to_gt(guc);
-       struct xe_device *xe = gt_to_xe(gt);
-       int seqno;
-       int ret;
-
-       xe_gt_assert(gt, fence);
-
-       /*
-        * XXX: The seqno algorithm relies on TLB invalidation being processed
-        * in order which they currently are, if that changes the algorithm will
-        * need to be updated.
-        */
-
-       mutex_lock(&gt->tlb_inval.seqno_lock);
-       seqno = gt->tlb_inval.seqno;
-       fence->seqno = seqno;
-       trace_xe_gt_tlb_inval_fence_send(xe, fence);
-       action[1] = seqno;
-       ret = xe_guc_ct_send(&guc->ct, action, len,
-                            G2H_LEN_DW_TLB_INVALIDATE, 1);
-       if (!ret) {
-               spin_lock_irq(&gt->tlb_inval.pending_lock);
-               /*
-                * We haven't actually published the TLB fence as per
-                * pending_fences, but in theory our seqno could have already
-                * been written as we acquired the pending_lock. In such a case
-                * we can just go ahead and signal the fence here.
-                */
-               if (tlb_inval_seqno_past(gt, seqno)) {
-                       __inval_fence_signal(xe, fence);
-               } else {
-                       fence->inval_time = ktime_get();
-                       list_add_tail(&fence->link,
-                                     &gt->tlb_inval.pending_fences);
-
-                       if (list_is_singular(&gt->tlb_inval.pending_fences))
-                               queue_delayed_work(system_wq,
-                                                  &gt->tlb_inval.fence_tdr,
-                                                  tlb_timeout_jiffies(gt));
-               }
-               spin_unlock_irq(&gt->tlb_inval.pending_lock);
-       } else {
-               __inval_fence_signal(xe, fence);
-       }
-       if (!ret) {
-               gt->tlb_inval.seqno = (gt->tlb_inval.seqno + 1) %
-                       TLB_INVALIDATION_SEQNO_MAX;
-               if (!gt->tlb_inval.seqno)
-                       gt->tlb_inval.seqno = 1;
-       }
-       mutex_unlock(&gt->tlb_inval.seqno_lock);
-       xe_gt_stats_incr(gt, XE_GT_STATS_ID_TLB_INVAL, 1);
-
-       return ret;
-}
-
-#define MAKE_INVAL_OP(type)    ((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \
-               XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \
-               XE_GUC_TLB_INVAL_FLUSH_CACHE)
-
-/**
- * xe_gt_tlb_inval_guc - Issue a TLB invalidation on this GT for the GuC
- * @gt: GT structure
- * @fence: invalidation fence which will be signal on TLB invalidation
- * completion
- *
- * Issue a TLB invalidation for the GuC. Completion of TLB is asynchronous and
- * caller can use the invalidation fence to wait for completion.
- *
- * Return: 0 on success, negative error code on error
- */
-static int xe_gt_tlb_inval_guc(struct xe_gt *gt,
-                              struct xe_gt_tlb_inval_fence *fence)
-{
-       u32 action[] = {
-               XE_GUC_ACTION_TLB_INVALIDATION,
-               0,  /* seqno, replaced in send_tlb_inval */
-               MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC),
-       };
-       int ret;
-
-       ret = send_tlb_inval(&gt->uc.guc, fence, action, ARRAY_SIZE(action));
-       /*
-        * -ECANCELED indicates the CT is stopped for a GT reset. TLB caches
-        *  should be nuked on a GT reset so this error can be ignored.
-        */
-       if (ret == -ECANCELED)
-               return 0;
-
-       return ret;
-}
-
-/**
- * xe_gt_tlb_inval_ggtt - Issue a TLB invalidation on this GT for the GGTT
- * @gt: GT structure
- *
- * Issue a TLB invalidation for the GGTT. Completion of TLB invalidation is
- * synchronous.
- *
- * Return: 0 on success, negative error code on error
- */
-int xe_gt_tlb_inval_ggtt(struct xe_gt *gt)
-{
-       struct xe_device *xe = gt_to_xe(gt);
-       unsigned int fw_ref;
-
-       if (xe_guc_ct_enabled(&gt->uc.guc.ct) &&
-           gt->uc.guc.submission_state.enabled) {
-               struct xe_gt_tlb_inval_fence fence;
-               int ret;
-
-               xe_gt_tlb_inval_fence_init(gt, &fence, true);
-               ret = xe_gt_tlb_inval_guc(gt, &fence);
-               if (ret)
-                       return ret;
-
-               xe_gt_tlb_inval_fence_wait(&fence);
-       } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) {
-               struct xe_mmio *mmio = &gt->mmio;
-
-               if (IS_SRIOV_VF(xe))
-                       return 0;
-
-               fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
-               if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) {
-                       xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC1,
-                                       PVC_GUC_TLB_INV_DESC1_INVALIDATE);
-                       xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC0,
-                                       PVC_GUC_TLB_INV_DESC0_VALID);
-               } else {
-                       xe_mmio_write32(mmio, GUC_TLB_INV_CR,
-                                       GUC_TLB_INV_CR_INVALIDATE);
-               }
-               xe_force_wake_put(gt_to_fw(gt), fw_ref);
-       }
-
-       return 0;
-}
-
-static int send_tlb_inval_all(struct xe_gt *gt,
-                             struct xe_gt_tlb_inval_fence *fence)
-{
-       u32 action[] = {
-               XE_GUC_ACTION_TLB_INVALIDATION_ALL,
-               0,  /* seqno, replaced in send_tlb_inval */
-               MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL),
-       };
-
-       return send_tlb_inval(&gt->uc.guc, fence, action, ARRAY_SIZE(action));
-}
-
-/**
- * xe_gt_tlb_invalidation_all - Invalidate all TLBs across PF and all VFs.
- * @gt: the &xe_gt structure
- * @fence: the &xe_gt_tlb_inval_fence to be signaled on completion
- *
- * Send a request to invalidate all TLBs across PF and all VFs.
- *
- * Return: 0 on success, negative error code on error
- */
-int xe_gt_tlb_inval_all(struct xe_gt *gt, struct xe_gt_tlb_inval_fence *fence)
-{
-       int err;
-
-       xe_gt_assert(gt, gt == fence->gt);
-
-       err = send_tlb_inval_all(gt, fence);
-       if (err)
-               xe_gt_err(gt, "TLB invalidation request failed (%pe)", ERR_PTR(err));
-
-       return err;
-}
-
-/*
- * Ensure that roundup_pow_of_two(length) doesn't overflow.
- * Note that roundup_pow_of_two() operates on unsigned long,
- * not on u64.
- */
-#define MAX_RANGE_TLB_INVALIDATION_LENGTH (rounddown_pow_of_two(ULONG_MAX))
-
-/**
- * xe_gt_tlb_inval_range - Issue a TLB invalidation on this GT for an address range
- *
- * @gt: GT structure
- * @fence: invalidation fence which will be signal on TLB invalidation
- * completion
- * @start: start address
- * @end: end address
- * @asid: address space id
- *
- * Issue a range based TLB invalidation if supported, if not fallback to a full
- * TLB invalidation. Completion of TLB is asynchronous and caller can use
- * the invalidation fence to wait for completion.
- *
- * Return: Negative error code on error, 0 on success
- */
-int xe_gt_tlb_inval_range(struct xe_gt *gt, struct xe_gt_tlb_inval_fence *fence,
-                         u64 start, u64 end, u32 asid)
-{
-       struct xe_device *xe = gt_to_xe(gt);
-#define MAX_TLB_INVALIDATION_LEN       7
-       u32 action[MAX_TLB_INVALIDATION_LEN];
-       u64 length = end - start;
-       int len = 0;
-
-       xe_gt_assert(gt, fence);
-
-       /* Execlists not supported */
-       if (gt_to_xe(gt)->info.force_execlist) {
-               __inval_fence_signal(xe, fence);
-               return 0;
-       }
-
-       action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
-       action[len++] = 0; /* seqno, replaced in send_tlb_inval */
-       if (!xe->info.has_range_tlb_inval ||
-           length > MAX_RANGE_TLB_INVALIDATION_LENGTH) {
-               action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
-       } else {
-               u64 orig_start = start;
-               u64 align;
-
-               if (length < SZ_4K)
-                       length = SZ_4K;
-
-               /*
-                * We need to invalidate a higher granularity if start address
-                * is not aligned to length. When start is not aligned with
-                * length we need to find the length large enough to create an
-                * address mask covering the required range.
-                */
-               align = roundup_pow_of_two(length);
-               start = ALIGN_DOWN(start, align);
-               end = ALIGN(end, align);
-               length = align;
-               while (start + length < end) {
-                       length <<= 1;
-                       start = ALIGN_DOWN(orig_start, length);
-               }
-
-               /*
-                * Minimum invalidation size for a 2MB page that the hardware
-                * expects is 16MB
-                */
-               if (length >= SZ_2M) {
-                       length = max_t(u64, SZ_16M, length);
-                       start = ALIGN_DOWN(orig_start, length);
-               }
-
-               xe_gt_assert(gt, length >= SZ_4K);
-               xe_gt_assert(gt, is_power_of_2(length));
-               xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1,
-                                                   ilog2(SZ_2M) + 1)));
-               xe_gt_assert(gt, IS_ALIGNED(start, length));
-
-               action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
-               action[len++] = asid;
-               action[len++] = lower_32_bits(start);
-               action[len++] = upper_32_bits(start);
-               action[len++] = ilog2(length) - ilog2(SZ_4K);
-       }
-
-       xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN);
-
-       return send_tlb_inval(&gt->uc.guc, fence, action, len);
-}
-
-/**
- * xe_gt_tlb_inval_vm - Issue a TLB invalidation on this GT for a VM
- * @gt: graphics tile
- * @vm: VM to invalidate
- *
- * Invalidate entire VM's address space
- */
-void xe_gt_tlb_inval_vm(struct xe_gt *gt, struct xe_vm *vm)
-{
-       struct xe_gt_tlb_inval_fence fence;
-       u64 range = 1ull << vm->xe->info.va_bits;
-       int ret;
-
-       xe_gt_tlb_inval_fence_init(gt, &fence, true);
-
-       ret = xe_gt_tlb_inval_range(gt, &fence, 0, range, vm->usm.asid);
-       if (ret < 0)
-               return;
-
-       xe_gt_tlb_inval_fence_wait(&fence);
-}
-
-/**
- * xe_gt_tlb_inval_done_handler - GT TLB invalidation done handler
- * @gt: gt
- * @seqno: seqno of invalidation that is done
- *
- * Update recv seqno, signal any GT TLB invalidation fences, and restart TDR
- */
-static void xe_gt_tlb_inval_done_handler(struct xe_gt *gt, int seqno)
-{
-       struct xe_device *xe = gt_to_xe(gt);
-       struct xe_gt_tlb_inval_fence *fence, *next;
-       unsigned long flags;
-
-       /*
-        * This can also be run both directly from the IRQ handler and also in
-        * process_g2h_msg(). Only one may process any individual CT message,
-        * however the order they are processed here could result in skipping a
-        * seqno. To handle that we just process all the seqnos from the last
-        * seqno_recv up to and including the one in msg[0]. The delta should be
-        * very small so there shouldn't be much of pending_fences we actually
-        * need to iterate over here.
-        *
-        * From GuC POV we expect the seqnos to always appear in-order, so if we
-        * see something later in the timeline we can be sure that anything
-        * appearing earlier has already signalled, just that we have yet to
-        * officially process the CT message like if racing against
-        * process_g2h_msg().
-        */
-       spin_lock_irqsave(&gt->tlb_inval.pending_lock, flags);
-       if (tlb_inval_seqno_past(gt, seqno)) {
-               spin_unlock_irqrestore(&gt->tlb_inval.pending_lock, flags);
-               return;
-       }
-
-       WRITE_ONCE(gt->tlb_inval.seqno_recv, seqno);
-
-       list_for_each_entry_safe(fence, next,
-                                &gt->tlb_inval.pending_fences, link) {
-               trace_xe_gt_tlb_inval_fence_recv(xe, fence);
-
-               if (!tlb_inval_seqno_past(gt, fence->seqno))
-                       break;
-
-               inval_fence_signal(xe, fence);
-       }
-
-       if (!list_empty(&gt->tlb_inval.pending_fences))
-               mod_delayed_work(system_wq,
-                                &gt->tlb_inval.fence_tdr,
-                                tlb_timeout_jiffies(gt));
-       else
-               cancel_delayed_work(&gt->tlb_inval.fence_tdr);
-
-       spin_unlock_irqrestore(&gt->tlb_inval.pending_lock, flags);
-}
-
-/**
- * xe_guc_tlb_inval_done_handler - TLB invalidation done handler
- * @guc: guc
- * @msg: message indicating TLB invalidation done
- * @len: length of message
- *
- * Parse seqno of TLB invalidation, wake any waiters for seqno, and signal any
- * invalidation fences for seqno. Algorithm for this depends on seqno being
- * received in-order and asserts this assumption.
- *
- * Return: 0 on success, -EPROTO for malformed messages.
- */
-int xe_guc_tlb_inval_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
-{
-       struct xe_gt *gt = guc_to_gt(guc);
-
-       if (unlikely(len != 1))
-               return -EPROTO;
-
-       xe_gt_tlb_inval_done_handler(gt, msg[0]);
-
-       return 0;
-}
-
-static const char *
-inval_fence_get_driver_name(struct dma_fence *dma_fence)
-{
-       return "xe";
-}
-
-static const char *
-inval_fence_get_timeline_name(struct dma_fence *dma_fence)
-{
-       return "inval_fence";
-}
-
-static const struct dma_fence_ops inval_fence_ops = {
-       .get_driver_name = inval_fence_get_driver_name,
-       .get_timeline_name = inval_fence_get_timeline_name,
-};
-
-/**
- * xe_gt_tlb_inval_fence_init - Initialize TLB invalidation fence
- * @gt: GT
- * @fence: TLB invalidation fence to initialize
- * @stack: fence is stack variable
- *
- * Initialize TLB invalidation fence for use. xe_gt_tlb_inval_fence_fini
- * will be automatically called when fence is signalled (all fences must signal),
- * even on error.
- */
-void xe_gt_tlb_inval_fence_init(struct xe_gt *gt,
-                               struct xe_gt_tlb_inval_fence *fence,
-                               bool stack)
-{
-       xe_pm_runtime_get_noresume(gt_to_xe(gt));
-
-       spin_lock_irq(&gt->tlb_inval.lock);
-       dma_fence_init(&fence->base, &inval_fence_ops,
-                      &gt->tlb_inval.lock,
-                      dma_fence_context_alloc(1), 1);
-       spin_unlock_irq(&gt->tlb_inval.lock);
-       INIT_LIST_HEAD(&fence->link);
-       if (stack)
-               set_bit(FENCE_STACK_BIT, &fence->base.flags);
-       else
-               dma_fence_get(&fence->base);
-       fence->gt = gt;
-}
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_inval.h b/drivers/gpu/drm/xe/xe_gt_tlb_inval.h

deleted file mode 100644 (file)

index b1258ac..0000000
--- a/drivers/gpu/drm/xe/xe_gt_tlb_inval.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2023 Intel Corporation
- */
-
-#ifndef _XE_GT_TLB_INVAL_H_
-#define _XE_GT_TLB_INVAL_H_
-
-#include <linux/types.h>
-
-#include "xe_gt_tlb_inval_types.h"
-
-struct xe_gt;
-struct xe_guc;
-struct xe_vm;
-struct xe_vma;
-
-int xe_gt_tlb_inval_init_early(struct xe_gt *gt);
-void xe_gt_tlb_inval_fini(struct xe_gt *gt);
-
-void xe_gt_tlb_inval_reset(struct xe_gt *gt);
-int xe_gt_tlb_inval_ggtt(struct xe_gt *gt);
-void xe_gt_tlb_inval_vm(struct xe_gt *gt, struct xe_vm *vm);
-int xe_gt_tlb_inval_all(struct xe_gt *gt, struct xe_gt_tlb_inval_fence *fence);
-int xe_gt_tlb_inval_range(struct xe_gt *gt,
-                         struct xe_gt_tlb_inval_fence *fence,
-                         u64 start, u64 end, u32 asid);
-int xe_guc_tlb_inval_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
-
-void xe_gt_tlb_inval_fence_init(struct xe_gt *gt,
-                               struct xe_gt_tlb_inval_fence *fence,
-                               bool stack);
-void xe_gt_tlb_inval_fence_signal(struct xe_gt_tlb_inval_fence *fence);
-
-static inline void
-xe_gt_tlb_inval_fence_wait(struct xe_gt_tlb_inval_fence *fence)
-{
-       dma_fence_wait(&fence->base, false);
-}
-
-#endif /* _XE_GT_TLB_INVAL_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_inval_job.c b/drivers/gpu/drm/xe/xe_gt_tlb_inval_job.c

deleted file mode 100644 (file)

index 41e0ea9..0000000
--- a/drivers/gpu/drm/xe/xe_gt_tlb_inval_job.c
+++ /dev/null
@@ -1,274 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2025 Intel Corporation
- */
-
-#include "xe_dep_job_types.h"
-#include "xe_dep_scheduler.h"
-#include "xe_exec_queue.h"
-#include "xe_gt.h"
-#include "xe_gt_tlb_inval.h"
-#include "xe_gt_tlb_inval_job.h"
-#include "xe_migrate.h"
-#include "xe_pm.h"
-
-/** struct xe_gt_tlb_inval_job - GT TLB invalidation job */
-struct xe_gt_tlb_inval_job {
-       /** @dep: base generic dependency Xe job */
-       struct xe_dep_job dep;
-       /** @gt: GT to invalidate */
-       struct xe_gt *gt;
-       /** @q: exec queue issuing the invalidate */
-       struct xe_exec_queue *q;
-       /** @refcount: ref count of this job */
-       struct kref refcount;
-       /**
-        * @fence: dma fence to indicate completion. 1 way relationship - job
-        * can safely reference fence, fence cannot safely reference job.
-        */
-       struct dma_fence *fence;
-       /** @start: Start address to invalidate */
-       u64 start;
-       /** @end: End address to invalidate */
-       u64 end;
-       /** @asid: Address space ID to invalidate */
-       u32 asid;
-       /** @fence_armed: Fence has been armed */
-       bool fence_armed;
-};
-
-static struct dma_fence *xe_gt_tlb_inval_job_run(struct xe_dep_job *dep_job)
-{
-       struct xe_gt_tlb_inval_job *job =
-               container_of(dep_job, typeof(*job), dep);
-       struct xe_gt_tlb_inval_fence *ifence =
-               container_of(job->fence, typeof(*ifence), base);
-
-       xe_gt_tlb_inval_range(job->gt, ifence, job->start,
-                             job->end, job->asid);
-
-       return job->fence;
-}
-
-static void xe_gt_tlb_inval_job_free(struct xe_dep_job *dep_job)
-{
-       struct xe_gt_tlb_inval_job *job =
-               container_of(dep_job, typeof(*job), dep);
-
-       /* Pairs with get in xe_gt_tlb_inval_job_push */
-       xe_gt_tlb_inval_job_put(job);
-}
-
-static const struct xe_dep_job_ops dep_job_ops = {
-       .run_job = xe_gt_tlb_inval_job_run,
-       .free_job = xe_gt_tlb_inval_job_free,
-};
-
-static int xe_gt_tlb_inval_context(struct xe_gt *gt)
-{
-       return xe_gt_is_media_type(gt) ? XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT :
-               XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT;
-}
-
-/**
- * xe_gt_tlb_inval_job_create() - GT TLB invalidation job create
- * @gt: GT to invalidate
- * @q: exec queue issuing the invalidate
- * @start: Start address to invalidate
- * @end: End address to invalidate
- * @asid: Address space ID to invalidate
- *
- * Create a GT TLB invalidation job and initialize internal fields. The caller is
- * responsible for releasing the creation reference.
- *
- * Return: GT TLB invalidation job object on success, ERR_PTR failure
- */
-struct xe_gt_tlb_inval_job *xe_gt_tlb_inval_job_create(struct xe_exec_queue *q,
-                                                      struct xe_gt *gt,
-                                                      u64 start, u64 end,
-                                                      u32 asid)
-{
-       struct xe_gt_tlb_inval_job *job;
-       struct xe_dep_scheduler *dep_scheduler =
-               q->tlb_inval[xe_gt_tlb_inval_context(gt)].dep_scheduler;
-       struct drm_sched_entity *entity =
-               xe_dep_scheduler_entity(dep_scheduler);
-       struct xe_gt_tlb_inval_fence *ifence;
-       int err;
-
-       job = kmalloc(sizeof(*job), GFP_KERNEL);
-       if (!job)
-               return ERR_PTR(-ENOMEM);
-
-       job->q = q;
-       job->gt = gt;
-       job->start = start;
-       job->end = end;
-       job->asid = asid;
-       job->fence_armed = false;
-       job->dep.ops = &dep_job_ops;
-       kref_init(&job->refcount);
-       xe_exec_queue_get(q);   /* Pairs with put in xe_gt_tlb_inval_job_destroy */
-
-       ifence = kmalloc(sizeof(*ifence), GFP_KERNEL);
-       if (!ifence) {
-               err = -ENOMEM;
-               goto err_job;
-       }
-       job->fence = &ifence->base;
-
-       err = drm_sched_job_init(&job->dep.drm, entity, 1, NULL,
-                                q->xef ? q->xef->drm->client_id : 0);
-       if (err)
-               goto err_fence;
-
-       /* Pairs with put in xe_gt_tlb_inval_job_destroy */
-       xe_pm_runtime_get_noresume(gt_to_xe(job->gt));
-
-       return job;
-
-err_fence:
-       kfree(ifence);
-err_job:
-       xe_exec_queue_put(q);
-       kfree(job);
-
-       return ERR_PTR(err);
-}
-
-static void xe_gt_tlb_inval_job_destroy(struct kref *ref)
-{
-       struct xe_gt_tlb_inval_job *job = container_of(ref, typeof(*job),
-                                                      refcount);
-       struct xe_gt_tlb_inval_fence *ifence =
-               container_of(job->fence, typeof(*ifence), base);
-       struct xe_device *xe = gt_to_xe(job->gt);
-       struct xe_exec_queue *q = job->q;
-
-       if (!job->fence_armed)
-               kfree(ifence);
-       else
-               /* Ref from xe_gt_tlb_inval_fence_init */
-               dma_fence_put(job->fence);
-
-       drm_sched_job_cleanup(&job->dep.drm);
-       kfree(job);
-       xe_exec_queue_put(q);   /* Pairs with get from xe_gt_tlb_inval_job_create */
-       xe_pm_runtime_put(xe);  /* Pairs with get from xe_gt_tlb_inval_job_create */
-}
-
-/**
- * xe_gt_tlb_inval_alloc_dep() - GT TLB invalidation job alloc dependency
- * @job: GT TLB invalidation job to alloc dependency for
- *
- * Allocate storage for a dependency in the GT TLB invalidation fence. This
- * function should be called at most once per job and must be paired with
- * xe_gt_tlb_inval_job_push being called with a real fence.
- *
- * Return: 0 on success, -errno on failure
- */
-int xe_gt_tlb_inval_job_alloc_dep(struct xe_gt_tlb_inval_job *job)
-{
-       xe_assert(gt_to_xe(job->gt), !xa_load(&job->dep.drm.dependencies, 0));
-       might_alloc(GFP_KERNEL);
-
-       return drm_sched_job_add_dependency(&job->dep.drm,
-                                           dma_fence_get_stub());
-}
-
-/**
- * xe_gt_tlb_inval_job_push() - GT TLB invalidation job push
- * @job: GT TLB invalidation job to push
- * @m: The migration object being used
- * @fence: Dependency for GT TLB invalidation job
- *
- * Pushes a GT TLB invalidation job for execution, using @fence as a dependency.
- * Storage for @fence must be preallocated with xe_gt_tlb_inval_job_alloc_dep
- * prior to this call if @fence is not signaled. Takes a reference to the job’s
- * finished fence, which the caller is responsible for releasing, and return it
- * to the caller. This function is safe to be called in the path of reclaim.
- *
- * Return: Job's finished fence on success, cannot fail
- */
-struct dma_fence *xe_gt_tlb_inval_job_push(struct xe_gt_tlb_inval_job *job,
-                                          struct xe_migrate *m,
-                                          struct dma_fence *fence)
-{
-       struct xe_gt_tlb_inval_fence *ifence =
-               container_of(job->fence, typeof(*ifence), base);
-
-       if (!dma_fence_is_signaled(fence)) {
-               void *ptr;
-
-               /*
-                * Can be in path of reclaim, hence the preallocation of fence
-                * storage in xe_gt_tlb_inval_job_alloc_dep. Verify caller did
-                * this correctly.
-                */
-               xe_assert(gt_to_xe(job->gt),
-                         xa_load(&job->dep.drm.dependencies, 0) ==
-                         dma_fence_get_stub());
-
-               dma_fence_get(fence);   /* ref released once dependency processed by scheduler */
-               ptr = xa_store(&job->dep.drm.dependencies, 0, fence,
-                              GFP_ATOMIC);
-               xe_assert(gt_to_xe(job->gt), !xa_is_err(ptr));
-       }
-
-       xe_gt_tlb_inval_job_get(job);   /* Pairs with put in free_job */
-       job->fence_armed = true;
-
-       /*
-        * We need the migration lock to protect the job's seqno and the spsc
-        * queue, only taken on migration queue, user queues protected dma-resv
-        * VM lock.
-        */
-       xe_migrate_job_lock(m, job->q);
-
-       /* Creation ref pairs with put in xe_gt_tlb_inval_job_destroy */
-       xe_gt_tlb_inval_fence_init(job->gt, ifence, false);
-       dma_fence_get(job->fence);      /* Pairs with put in DRM scheduler */
-
-       drm_sched_job_arm(&job->dep.drm);
-       /*
-        * caller ref, get must be done before job push as it could immediately
-        * signal and free.
-        */
-       dma_fence_get(&job->dep.drm.s_fence->finished);
-       drm_sched_entity_push_job(&job->dep.drm);
-
-       xe_migrate_job_unlock(m, job->q);
-
-       /*
-        * Not using job->fence, as it has its own dma-fence context, which does
-        * not allow GT TLB invalidation fences on the same queue, GT tuple to
-        * be squashed in dma-resv/DRM scheduler. Instead, we use the DRM scheduler
-        * context and job's finished fence, which enables squashing.
-        */
-       return &job->dep.drm.s_fence->finished;
-}
-
-/**
- * xe_gt_tlb_inval_job_get() - Get a reference to GT TLB invalidation job
- * @job: GT TLB invalidation job object
- *
- * Increment the GT TLB invalidation job's reference count
- */
-void xe_gt_tlb_inval_job_get(struct xe_gt_tlb_inval_job *job)
-{
-       kref_get(&job->refcount);
-}
-
-/**
- * xe_gt_tlb_inval_job_put() - Put a reference to GT TLB invalidation job
- * @job: GT TLB invalidation job object
- *
- * Decrement the GT TLB invalidation job's reference count, call
- * xe_gt_tlb_inval_job_destroy when reference count == 0. Skips decrement if
- * input @job is NULL or IS_ERR.
- */
-void xe_gt_tlb_inval_job_put(struct xe_gt_tlb_inval_job *job)
-{
-       if (!IS_ERR_OR_NULL(job))
-               kref_put(&job->refcount, xe_gt_tlb_inval_job_destroy);
-}
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_inval_job.h b/drivers/gpu/drm/xe/xe_gt_tlb_inval_job.h

deleted file mode 100644 (file)

index 8838961..0000000
--- a/drivers/gpu/drm/xe/xe_gt_tlb_inval_job.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2025 Intel Corporation
- */
-
-#ifndef _XE_GT_TLB_INVAL_JOB_H_
-#define _XE_GT_TLB_INVAL_JOB_H_
-
-#include <linux/types.h>
-
-struct dma_fence;
-struct drm_sched_job;
-struct kref;
-struct xe_exec_queue;
-struct xe_gt;
-struct xe_gt_tlb_inval_job;
-struct xe_migrate;
-
-struct xe_gt_tlb_inval_job *xe_gt_tlb_inval_job_create(struct xe_exec_queue *q,
-                                                      struct xe_gt *gt,
-                                                      u64 start, u64 end,
-                                                      u32 asid);
-
-int xe_gt_tlb_inval_job_alloc_dep(struct xe_gt_tlb_inval_job *job);
-
-struct dma_fence *xe_gt_tlb_inval_job_push(struct xe_gt_tlb_inval_job *job,
-                                          struct xe_migrate *m,
-                                          struct dma_fence *fence);
-
-void xe_gt_tlb_inval_job_get(struct xe_gt_tlb_inval_job *job);
-
-void xe_gt_tlb_inval_job_put(struct xe_gt_tlb_inval_job *job);
-
-#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_inval_types.h b/drivers/gpu/drm/xe/xe_gt_tlb_inval_types.h

deleted file mode 100644 (file)

index 442f72b..0000000
--- a/drivers/gpu/drm/xe/xe_gt_tlb_inval_types.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2023 Intel Corporation
- */
-
-#ifndef _XE_GT_TLB_INVAL_TYPES_H_
-#define _XE_GT_TLB_INVAL_TYPES_H_
-
-#include <linux/workqueue.h>
-#include <linux/dma-fence.h>
-
-struct xe_gt;
-
-/** struct xe_tlb_inval - TLB invalidation client */
-struct xe_tlb_inval {
-       /** @tlb_inval.seqno: TLB invalidation seqno, protected by CT lock */
-#define TLB_INVALIDATION_SEQNO_MAX     0x100000
-       int seqno;
-       /** @tlb_invalidation.seqno_lock: protects @tlb_invalidation.seqno */
-       struct mutex seqno_lock;
-       /**
-        * @tlb_inval.seqno_recv: last received TLB invalidation seqno,
-        * protected by CT lock
-        */
-       int seqno_recv;
-       /**
-        * @tlb_inval.pending_fences: list of pending fences waiting TLB
-        * invaliations, protected by CT lock
-        */
-       struct list_head pending_fences;
-       /**
-        * @tlb_inval.pending_lock: protects @tlb_inval.pending_fences
-        * and updating @tlb_inval.seqno_recv.
-        */
-       spinlock_t pending_lock;
-       /**
-        * @tlb_inval.fence_tdr: schedules a delayed call to
-        * xe_gt_tlb_fence_timeout after the timeut interval is over.
-        */
-       struct delayed_work fence_tdr;
-       /** @wtlb_invalidation.wq: schedules GT TLB invalidation jobs */
-       struct workqueue_struct *job_wq;
-       /** @tlb_inval.lock: protects TLB invalidation fences */
-       spinlock_t lock;
-};
-
-/**
- * struct xe_gt_tlb_inval_fence - XE GT TLB invalidation fence
- *
- * Optionally passed to xe_gt_tlb_inval and will be signaled upon TLB
- * invalidation completion.
- */
-struct xe_gt_tlb_inval_fence {
-       /** @base: dma fence base */
-       struct dma_fence base;
-       /** @gt: GT which fence belong to */
-       struct xe_gt *gt;
-       /** @link: link into list of pending tlb fences */
-       struct list_head link;
-       /** @seqno: seqno of TLB invalidation to signal fence one */
-       int seqno;
-       /** @inval_time: time of TLB invalidation */
-       ktime_t inval_time;
-};
-
-#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h

index 7dc5a3f..6615810 100644 (file)
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -12,12 +12,12 @@
  #include "xe_gt_sriov_pf_types.h"
  #include "xe_gt_sriov_vf_types.h"
  #include "xe_gt_stats_types.h"
-#include "xe_gt_tlb_inval_types.h"
  #include "xe_hw_engine_types.h"
  #include "xe_hw_fence_types.h"
  #include "xe_oa_types.h"
  #include "xe_reg_sr_types.h"
  #include "xe_sa_types.h"
+#include "xe_tlb_inval_types.h"
  #include "xe_uc_types.h"
  
  struct xe_exec_queue_ops;
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c

index 9131d12..5f38041 100644 (file)
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -26,13 +26,13 @@
  #include "xe_gt_sriov_pf_control.h"
  #include "xe_gt_sriov_pf_monitor.h"
  #include "xe_gt_sriov_printk.h"
-#include "xe_gt_tlb_inval.h"
  #include "xe_guc.h"
  #include "xe_guc_log.h"
  #include "xe_guc_relay.h"
  #include "xe_guc_submit.h"
  #include "xe_map.h"
  #include "xe_pm.h"
+#include "xe_tlb_inval.h"
  #include "xe_trace_guc.h"
  
  static void receive_g2h(struct xe_guc_ct *ct);
diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c

index e5aba03..f2bfbfa 100644 (file)
--- a/drivers/gpu/drm/xe/xe_lmtt.c
+++ b/drivers/gpu/drm/xe/xe_lmtt.c
@@ -11,7 +11,7 @@
  
  #include "xe_assert.h"
  #include "xe_bo.h"
-#include "xe_gt_tlb_inval.h"
+#include "xe_tlb_inval.h"
  #include "xe_lmtt.h"
  #include "xe_map.h"
  #include "xe_mmio.h"
@@ -228,8 +228,8 @@ void xe_lmtt_init_hw(struct xe_lmtt *lmtt)
  
  static int lmtt_invalidate_hw(struct xe_lmtt *lmtt)
  {
-       struct xe_gt_tlb_inval_fence fences[XE_MAX_GT_PER_TILE];
-       struct xe_gt_tlb_inval_fence *fence = fences;
+       struct xe_tlb_inval_fence fences[XE_MAX_GT_PER_TILE];
+       struct xe_tlb_inval_fence *fence = fences;
         struct xe_tile *tile = lmtt_to_tile(lmtt);
         struct xe_gt *gt;
         int result = 0;
@@ -237,8 +237,8 @@ static int lmtt_invalidate_hw(struct xe_lmtt *lmtt)
         u8 id;
  
         for_each_gt_on_tile(gt, tile, id) {
-               xe_gt_tlb_inval_fence_init(gt, fence, true);
-               err = xe_gt_tlb_inval_all(gt, fence);
+               xe_tlb_inval_fence_init(&gt->tlb_inval, fence, true);
+               err = xe_tlb_inval_all(&gt->tlb_inval, fence);
                 result = result ?: err;
                 fence++;
         }
@@ -252,7 +252,7 @@ static int lmtt_invalidate_hw(struct xe_lmtt *lmtt)
          */
         fence = fences;
         for_each_gt_on_tile(gt, tile, id)
-               xe_gt_tlb_inval_fence_wait(fence++);
+               xe_tlb_inval_fence_wait(fence++);
  
         return result;
  }
diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h

index 8978d2c..4fad324 100644 (file)
--- a/drivers/gpu/drm/xe/xe_migrate.h
+++ b/drivers/gpu/drm/xe/xe_migrate.h
@@ -15,7 +15,7 @@ struct ttm_resource;
  
  struct xe_bo;
  struct xe_gt;
-struct xe_gt_tlb_inval_job;
+struct xe_tlb_inval_job;
  struct xe_exec_queue;
  struct xe_migrate;
  struct xe_migrate_pt_update;
@@ -94,13 +94,13 @@ struct xe_migrate_pt_update {
         /** @job: The job if a GPU page-table update. NULL otherwise */
         struct xe_sched_job *job;
         /**
-        * @ijob: The GT TLB invalidation job for primary tile. NULL otherwise
+        * @ijob: The TLB invalidation job for primary GT. NULL otherwise
          */
-       struct xe_gt_tlb_inval_job *ijob;
+       struct xe_tlb_inval_job *ijob;
         /**
-        * @mjob: The GT TLB invalidation job for media tile. NULL otherwise
+        * @mjob: The TLB invalidation job for media GT. NULL otherwise
          */
-       struct xe_gt_tlb_inval_job *mjob;
+       struct xe_tlb_inval_job *mjob;
         /** @tile_id: Tile ID of the update */
         u8 tile_id;
  };
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c

index c0a70c8..c129048 100644 (file)
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -13,7 +13,7 @@
  #include "xe_drm_client.h"
  #include "xe_exec_queue.h"
  #include "xe_gt.h"
-#include "xe_gt_tlb_inval_job.h"
+#include "xe_tlb_inval_job.h"
  #include "xe_migrate.h"
  #include "xe_pt_types.h"
  #include "xe_pt_walk.h"
@@ -21,6 +21,7 @@
  #include "xe_sched_job.h"
  #include "xe_sync.h"
  #include "xe_svm.h"
+#include "xe_tlb_inval_job.h"
  #include "xe_trace.h"
  #include "xe_ttm_stolen_mgr.h"
  #include "xe_vm.h"
@@ -1276,8 +1277,8 @@ static int op_add_deps(struct xe_vm *vm, struct xe_vma_op *op,
  }
  
  static int xe_pt_vm_dependencies(struct xe_sched_job *job,
-                                struct xe_gt_tlb_inval_job *ijob,
-                                struct xe_gt_tlb_inval_job *mjob,
+                                struct xe_tlb_inval_job *ijob,
+                                struct xe_tlb_inval_job *mjob,
                                  struct xe_vm *vm,
                                  struct xe_vma_ops *vops,
                                  struct xe_vm_pgtable_update_ops *pt_update_ops,
@@ -1347,13 +1348,13 @@ static int xe_pt_vm_dependencies(struct xe_sched_job *job,
  
         if (job) {
                 if (ijob) {
-                       err = xe_gt_tlb_inval_job_alloc_dep(ijob);
+                       err = xe_tlb_inval_job_alloc_dep(ijob);
                         if (err)
                                 return err;
                 }
  
                 if (mjob) {
-                       err = xe_gt_tlb_inval_job_alloc_dep(mjob);
+                       err = xe_tlb_inval_job_alloc_dep(mjob);
                         if (err)
                                 return err;
                 }
@@ -2353,6 +2354,15 @@ static const struct xe_migrate_pt_update_ops svm_migrate_ops = {
  static const struct xe_migrate_pt_update_ops svm_migrate_ops;
  #endif
  
+static struct xe_dep_scheduler *to_dep_scheduler(struct xe_exec_queue *q,
+                                                struct xe_gt *gt)
+{
+       if (xe_gt_is_media_type(gt))
+               return q->tlb_inval[XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT].dep_scheduler;
+
+       return q->tlb_inval[XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT].dep_scheduler;
+}
+
  /**
   * xe_pt_update_ops_run() - Run PT update operations
   * @tile: Tile of PT update operations
@@ -2371,7 +2381,7 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
         struct xe_vm_pgtable_update_ops *pt_update_ops =
                 &vops->pt_update_ops[tile->id];
         struct dma_fence *fence, *ifence, *mfence;
-       struct xe_gt_tlb_inval_job *ijob = NULL, *mjob = NULL;
+       struct xe_tlb_inval_job *ijob = NULL, *mjob = NULL;
         struct dma_fence **fences = NULL;
         struct dma_fence_array *cf = NULL;
         struct xe_range_fence *rfence;
@@ -2403,11 +2413,15 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
  #endif
  
         if (pt_update_ops->needs_invalidation) {
-               ijob = xe_gt_tlb_inval_job_create(pt_update_ops->q,
-                                                 tile->primary_gt,
-                                                 pt_update_ops->start,
-                                                 pt_update_ops->last,
-                                                 vm->usm.asid);
+               struct xe_exec_queue *q = pt_update_ops->q;
+               struct xe_dep_scheduler *dep_scheduler =
+                       to_dep_scheduler(q, tile->primary_gt);
+
+               ijob = xe_tlb_inval_job_create(q, &tile->primary_gt->tlb_inval,
+                                              dep_scheduler,
+                                              pt_update_ops->start,
+                                              pt_update_ops->last,
+                                              vm->usm.asid);
                 if (IS_ERR(ijob)) {
                         err = PTR_ERR(ijob);
                         goto kill_vm_tile1;
@@ -2415,11 +2429,14 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
                 update.ijob = ijob;
  
                 if (tile->media_gt) {
-                       mjob = xe_gt_tlb_inval_job_create(pt_update_ops->q,
-                                                         tile->media_gt,
-                                                         pt_update_ops->start,
-                                                         pt_update_ops->last,
-                                                         vm->usm.asid);
+                       dep_scheduler = to_dep_scheduler(q, tile->media_gt);
+
+                       mjob = xe_tlb_inval_job_create(q,
+                                                      &tile->media_gt->tlb_inval,
+                                                      dep_scheduler,
+                                                      pt_update_ops->start,
+                                                      pt_update_ops->last,
+                                                      vm->usm.asid);
                         if (IS_ERR(mjob)) {
                                 err = PTR_ERR(mjob);
                                 goto free_ijob;
@@ -2470,13 +2487,13 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
         if (ijob) {
                 struct dma_fence *__fence;
  
-               ifence = xe_gt_tlb_inval_job_push(ijob, tile->migrate, fence);
+               ifence = xe_tlb_inval_job_push(ijob, tile->migrate, fence);
                 __fence = ifence;
  
                 if (mjob) {
                         fences[0] = ifence;
-                       mfence = xe_gt_tlb_inval_job_push(mjob, tile->migrate,
-                                                         fence);
+                       mfence = xe_tlb_inval_job_push(mjob, tile->migrate,
+                                                      fence);
                         fences[1] = mfence;
  
                         dma_fence_array_init(cf, 2, fences,
@@ -2519,8 +2536,8 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
         if (pt_update_ops->needs_userptr_lock)
                 up_read(&vm->userptr.notifier_lock);
  
-       xe_gt_tlb_inval_job_put(mjob);
-       xe_gt_tlb_inval_job_put(ijob);
+       xe_tlb_inval_job_put(mjob);
+       xe_tlb_inval_job_put(ijob);
  
         return fence;
  
@@ -2529,8 +2546,8 @@ free_rfence:
  free_ijob:
         kfree(cf);
         kfree(fences);
-       xe_gt_tlb_inval_job_put(mjob);
-       xe_gt_tlb_inval_job_put(ijob);
+       xe_tlb_inval_job_put(mjob);
+       xe_tlb_inval_job_put(ijob);
  kill_vm_tile1:
         if (err != -EAGAIN && err != -ENODATA && tile->id)
                 xe_vm_kill(vops->vm, false);
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c

index babc644..76c6d74 100644 (file)
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -7,7 +7,6 @@
  
  #include "xe_bo.h"
  #include "xe_gt_stats.h"
-#include "xe_gt_tlb_inval.h"
  #include "xe_migrate.h"
  #include "xe_module.h"
  #include "xe_pm.h"
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.c b/drivers/gpu/drm/xe/xe_tlb_inval.c

new file mode 100644 (file)

index 0000000..f4b7c0c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tlb_inval.c
@@ -0,0 +1,648 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "abi/guc_actions_abi.h"
+#include "xe_device.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_gt_printk.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+#include "xe_gt_stats.h"
+#include "xe_tlb_inval.h"
+#include "xe_mmio.h"
+#include "xe_pm.h"
+#include "xe_sriov.h"
+#include "xe_trace.h"
+#include "regs/xe_guc_regs.h"
+
+#define FENCE_STACK_BIT                DMA_FENCE_FLAG_USER_BITS
+
+/*
+ * TLB inval depends on pending commands in the CT queue and then the real
+ * invalidation time. Double up the time to process full CT queue
+ * just to be on the safe side.
+ */
+static long tlb_timeout_jiffies(struct xe_gt *gt)
+{
+       /* this reflects what HW/GuC needs to process TLB inv request */
+       const long hw_tlb_timeout = HZ / 4;
+
+       /* this estimates actual delay caused by the CTB transport */
+       long delay = xe_guc_ct_queue_proc_time_jiffies(&gt->uc.guc.ct);
+
+       return hw_tlb_timeout + 2 * delay;
+}
+
+static void xe_tlb_inval_fence_fini(struct xe_tlb_inval_fence *fence)
+{
+       struct xe_gt *gt;
+
+       if (WARN_ON_ONCE(!fence->tlb_inval))
+               return;
+
+       gt = fence->tlb_inval->private;
+
+       xe_pm_runtime_put(gt_to_xe(gt));
+       fence->tlb_inval = NULL; /* fini() should be called once */
+}
+
+static void
+__inval_fence_signal(struct xe_device *xe, struct xe_tlb_inval_fence *fence)
+{
+       bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags);
+
+       trace_xe_tlb_inval_fence_signal(xe, fence);
+       xe_tlb_inval_fence_fini(fence);
+       dma_fence_signal(&fence->base);
+       if (!stack)
+               dma_fence_put(&fence->base);
+}
+
+static void
+inval_fence_signal(struct xe_device *xe, struct xe_tlb_inval_fence *fence)
+{
+       list_del(&fence->link);
+       __inval_fence_signal(xe, fence);
+}
+
+void xe_tlb_inval_fence_signal(struct xe_tlb_inval_fence *fence)
+{
+       struct xe_gt *gt;
+
+       if (WARN_ON_ONCE(!fence->tlb_inval))
+               return;
+
+       gt = fence->tlb_inval->private;
+       __inval_fence_signal(gt_to_xe(gt), fence);
+}
+
+static void xe_gt_tlb_fence_timeout(struct work_struct *work)
+{
+       struct xe_gt *gt = container_of(work, struct xe_gt,
+                                       tlb_inval.fence_tdr.work);
+       struct xe_device *xe = gt_to_xe(gt);
+       struct xe_tlb_inval_fence *fence, *next;
+
+       LNL_FLUSH_WORK(&gt->uc.guc.ct.g2h_worker);
+
+       spin_lock_irq(&gt->tlb_inval.pending_lock);
+       list_for_each_entry_safe(fence, next,
+                                &gt->tlb_inval.pending_fences, link) {
+               s64 since_inval_ms = ktime_ms_delta(ktime_get(),
+                                                   fence->inval_time);
+
+               if (msecs_to_jiffies(since_inval_ms) < tlb_timeout_jiffies(gt))
+                       break;
+
+               trace_xe_tlb_inval_fence_timeout(xe, fence);
+               xe_gt_err(gt, "TLB invalidation fence timeout, seqno=%d recv=%d",
+                         fence->seqno, gt->tlb_inval.seqno_recv);
+
+               fence->base.error = -ETIME;
+               inval_fence_signal(xe, fence);
+       }
+       if (!list_empty(&gt->tlb_inval.pending_fences))
+               queue_delayed_work(system_wq,
+                                  &gt->tlb_inval.fence_tdr,
+                                  tlb_timeout_jiffies(gt));
+       spin_unlock_irq(&gt->tlb_inval.pending_lock);
+}
+
+/**
+ * xe_gt_tlb_inval_init_early - Initialize GT TLB invalidation state
+ * @gt: GT structure
+ *
+ * Initialize TLB invalidation state, purely software initialization, should
+ * be called once during driver load.
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_gt_tlb_inval_init_early(struct xe_gt *gt)
+{
+       struct xe_device *xe = gt_to_xe(gt);
+       int err;
+
+       gt->tlb_inval.private = gt;
+       gt->tlb_inval.seqno = 1;
+       INIT_LIST_HEAD(&gt->tlb_inval.pending_fences);
+       spin_lock_init(&gt->tlb_inval.pending_lock);
+       spin_lock_init(&gt->tlb_inval.lock);
+       INIT_DELAYED_WORK(&gt->tlb_inval.fence_tdr,
+                         xe_gt_tlb_fence_timeout);
+
+       err = drmm_mutex_init(&xe->drm, &gt->tlb_inval.seqno_lock);
+       if (err)
+               return err;
+
+       gt->tlb_inval.job_wq =
+               drmm_alloc_ordered_workqueue(&gt_to_xe(gt)->drm, "gt-tbl-inval-job-wq",
+                                            WQ_MEM_RECLAIM);
+       if (IS_ERR(gt->tlb_inval.job_wq))
+               return PTR_ERR(gt->tlb_inval.job_wq);
+
+       return 0;
+}
+
+/**
+ * xe_tlb_inval_reset - Initialize TLB invalidation reset
+ * @tlb_inval: TLB invalidation client
+ *
+ * Signal any pending invalidation fences, should be called during a GT reset
+ */
+void xe_tlb_inval_reset(struct xe_tlb_inval *tlb_inval)
+{
+       struct xe_gt *gt = tlb_inval->private;
+       struct xe_tlb_inval_fence *fence, *next;
+       int pending_seqno;
+
+       /*
+        * we can get here before the CTs are even initialized if we're wedging
+        * very early, in which case there are not going to be any pending
+        * fences so we can bail immediately.
+        */
+       if (!xe_guc_ct_initialized(&gt->uc.guc.ct))
+               return;
+
+       /*
+        * CT channel is already disabled at this point. No new TLB requests can
+        * appear.
+        */
+
+       mutex_lock(&gt->tlb_inval.seqno_lock);
+       spin_lock_irq(&gt->tlb_inval.pending_lock);
+       cancel_delayed_work(&gt->tlb_inval.fence_tdr);
+       /*
+        * We might have various kworkers waiting for TLB flushes to complete
+        * which are not tracked with an explicit TLB fence, however at this
+        * stage that will never happen since the CT is already disabled, so
+        * make sure we signal them here under the assumption that we have
+        * completed a full GT reset.
+        */
+       if (gt->tlb_inval.seqno == 1)
+               pending_seqno = TLB_INVALIDATION_SEQNO_MAX - 1;
+       else
+               pending_seqno = gt->tlb_inval.seqno - 1;
+       WRITE_ONCE(gt->tlb_inval.seqno_recv, pending_seqno);
+
+       list_for_each_entry_safe(fence, next,
+                                &gt->tlb_inval.pending_fences, link)
+               inval_fence_signal(gt_to_xe(gt), fence);
+       spin_unlock_irq(&gt->tlb_inval.pending_lock);
+       mutex_unlock(&gt->tlb_inval.seqno_lock);
+}
+
+/**
+ *
+ * xe_gt_tlb_inval_fini - Clean up GT TLB invalidation state
+ *
+ * Cancel pending fence workers and clean up any additional
+ * GT TLB invalidation state.
+ */
+void xe_gt_tlb_inval_fini(struct xe_gt *gt)
+{
+       xe_gt_tlb_inval_reset(gt);
+}
+
+static bool tlb_inval_seqno_past(struct xe_gt *gt, int seqno)
+{
+       int seqno_recv = READ_ONCE(gt->tlb_inval.seqno_recv);
+
+       if (seqno - seqno_recv < -(TLB_INVALIDATION_SEQNO_MAX / 2))
+               return false;
+
+       if (seqno - seqno_recv > (TLB_INVALIDATION_SEQNO_MAX / 2))
+               return true;
+
+       return seqno_recv >= seqno;
+}
+
+static int send_tlb_inval(struct xe_guc *guc,
+                         struct xe_tlb_inval_fence *fence,
+                         u32 *action, int len)
+{
+       struct xe_gt *gt = guc_to_gt(guc);
+       struct xe_device *xe = gt_to_xe(gt);
+       int seqno;
+       int ret;
+
+       xe_gt_assert(gt, fence);
+
+       /*
+        * XXX: The seqno algorithm relies on TLB invalidation being processed
+        * in order which they currently are, if that changes the algorithm will
+        * need to be updated.
+        */
+
+       mutex_lock(&gt->tlb_inval.seqno_lock);
+       seqno = gt->tlb_inval.seqno;
+       fence->seqno = seqno;
+       trace_xe_tlb_inval_fence_send(xe, fence);
+       action[1] = seqno;
+       ret = xe_guc_ct_send(&guc->ct, action, len,
+                            G2H_LEN_DW_TLB_INVALIDATE, 1);
+       if (!ret) {
+               spin_lock_irq(&gt->tlb_inval.pending_lock);
+               /*
+                * We haven't actually published the TLB fence as per
+                * pending_fences, but in theory our seqno could have already
+                * been written as we acquired the pending_lock. In such a case
+                * we can just go ahead and signal the fence here.
+                */
+               if (tlb_inval_seqno_past(gt, seqno)) {
+                       __inval_fence_signal(xe, fence);
+               } else {
+                       fence->inval_time = ktime_get();
+                       list_add_tail(&fence->link,
+                                     &gt->tlb_inval.pending_fences);
+
+                       if (list_is_singular(&gt->tlb_inval.pending_fences))
+                               queue_delayed_work(system_wq,
+                                                  &gt->tlb_inval.fence_tdr,
+                                                  tlb_timeout_jiffies(gt));
+               }
+               spin_unlock_irq(&gt->tlb_inval.pending_lock);
+       } else {
+               __inval_fence_signal(xe, fence);
+       }
+       if (!ret) {
+               gt->tlb_inval.seqno = (gt->tlb_inval.seqno + 1) %
+                       TLB_INVALIDATION_SEQNO_MAX;
+               if (!gt->tlb_inval.seqno)
+                       gt->tlb_inval.seqno = 1;
+       }
+       mutex_unlock(&gt->tlb_inval.seqno_lock);
+       xe_gt_stats_incr(gt, XE_GT_STATS_ID_TLB_INVAL, 1);
+
+       return ret;
+}
+
+#define MAKE_INVAL_OP(type)    ((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \
+               XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \
+               XE_GUC_TLB_INVAL_FLUSH_CACHE)
+
+/**
+ * xe_tlb_inval_guc - Issue a TLB invalidation on this GT for the GuC
+ * @gt: GT structure
+ * @fence: invalidation fence which will be signal on TLB invalidation
+ * completion
+ *
+ * Issue a TLB invalidation for the GuC. Completion of TLB is asynchronous and
+ * caller can use the invalidation fence to wait for completion.
+ *
+ * Return: 0 on success, negative error code on error
+ */
+static int xe_tlb_inval_guc(struct xe_gt *gt,
+                           struct xe_tlb_inval_fence *fence)
+{
+       u32 action[] = {
+               XE_GUC_ACTION_TLB_INVALIDATION,
+               0,  /* seqno, replaced in send_tlb_inval */
+               MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC),
+       };
+       int ret;
+
+       ret = send_tlb_inval(&gt->uc.guc, fence, action, ARRAY_SIZE(action));
+       /*
+        * -ECANCELED indicates the CT is stopped for a GT reset. TLB caches
+        *  should be nuked on a GT reset so this error can be ignored.
+        */
+       if (ret == -ECANCELED)
+               return 0;
+
+       return ret;
+}
+
+/**
+ * xe_tlb_inval_ggtt - Issue a TLB invalidation on this GT for the GGTT
+ * @tlb_inval: TLB invalidation client
+ *
+ * Issue a TLB invalidation for the GGTT. Completion of TLB invalidation is
+ * synchronous.
+ *
+ * Return: 0 on success, negative error code on error
+ */
+int xe_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval)
+{
+       struct xe_gt *gt = tlb_inval->private;
+       struct xe_device *xe = gt_to_xe(gt);
+       unsigned int fw_ref;
+
+       if (xe_guc_ct_enabled(&gt->uc.guc.ct) &&
+           gt->uc.guc.submission_state.enabled) {
+               struct xe_tlb_inval_fence fence;
+               int ret;
+
+               xe_tlb_inval_fence_init(tlb_inval, &fence, true);
+               ret = xe_tlb_inval_guc(gt, &fence);
+               if (ret)
+                       return ret;
+
+               xe_tlb_inval_fence_wait(&fence);
+       } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) {
+               struct xe_mmio *mmio = &gt->mmio;
+
+               if (IS_SRIOV_VF(xe))
+                       return 0;
+
+               fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+               if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) {
+                       xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC1,
+                                       PVC_GUC_TLB_INV_DESC1_INVALIDATE);
+                       xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC0,
+                                       PVC_GUC_TLB_INV_DESC0_VALID);
+               } else {
+                       xe_mmio_write32(mmio, GUC_TLB_INV_CR,
+                                       GUC_TLB_INV_CR_INVALIDATE);
+               }
+               xe_force_wake_put(gt_to_fw(gt), fw_ref);
+       }
+
+       return 0;
+}
+
+static int send_tlb_inval_all(struct xe_tlb_inval *tlb_inval,
+                             struct xe_tlb_inval_fence *fence)
+{
+       u32 action[] = {
+               XE_GUC_ACTION_TLB_INVALIDATION_ALL,
+               0,  /* seqno, replaced in send_tlb_inval */
+               MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL),
+       };
+       struct xe_gt *gt = tlb_inval->private;
+
+       xe_gt_assert(gt, fence);
+
+       return send_tlb_inval(&gt->uc.guc, fence, action, ARRAY_SIZE(action));
+}
+
+/**
+ * xe_gt_tlb_invalidation_all - Invalidate all TLBs across PF and all VFs.
+ * @gt: the &xe_gt structure
+ * @fence: the &xe_tlb_inval_fence to be signaled on completion
+ *
+ * Send a request to invalidate all TLBs across PF and all VFs.
+ *
+ * Return: 0 on success, negative error code on error
+ */
+int xe_tlb_inval_all(struct xe_tlb_inval *tlb_inval,
+                    struct xe_tlb_inval_fence *fence)
+{
+       struct xe_gt *gt = tlb_inval->private;
+       int err;
+
+       err = send_tlb_inval_all(tlb_inval, fence);
+       if (err)
+               xe_gt_err(gt, "TLB invalidation request failed (%pe)", ERR_PTR(err));
+
+       return err;
+}
+
+/*
+ * Ensure that roundup_pow_of_two(length) doesn't overflow.
+ * Note that roundup_pow_of_two() operates on unsigned long,
+ * not on u64.
+ */
+#define MAX_RANGE_TLB_INVALIDATION_LENGTH (rounddown_pow_of_two(ULONG_MAX))
+
+/**
+ * xe_tlb_inval_range - Issue a TLB invalidation on this GT for an address range
+ * @tlb_inval: TLB invalidation client
+ * @fence: invalidation fence which will be signal on TLB invalidation
+ * completion
+ * @start: start address
+ * @end: end address
+ * @asid: address space id
+ *
+ * Issue a range based TLB invalidation if supported, if not fallback to a full
+ * TLB invalidation. Completion of TLB is asynchronous and caller can use
+ * the invalidation fence to wait for completion.
+ *
+ * Return: Negative error code on error, 0 on success
+ */
+int xe_tlb_inval_range(struct xe_tlb_inval *tlb_inval,
+                      struct xe_tlb_inval_fence *fence, u64 start, u64 end,
+                      u32 asid)
+{
+       struct xe_gt *gt = tlb_inval->private;
+       struct xe_device *xe = gt_to_xe(gt);
+#define MAX_TLB_INVALIDATION_LEN       7
+       u32 action[MAX_TLB_INVALIDATION_LEN];
+       u64 length = end - start;
+       int len = 0;
+
+       xe_gt_assert(gt, fence);
+
+       /* Execlists not supported */
+       if (gt_to_xe(gt)->info.force_execlist) {
+               __inval_fence_signal(xe, fence);
+               return 0;
+       }
+
+       action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
+       action[len++] = 0; /* seqno, replaced in send_tlb_inval */
+       if (!xe->info.has_range_tlb_inval ||
+           length > MAX_RANGE_TLB_INVALIDATION_LENGTH) {
+               action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
+       } else {
+               u64 orig_start = start;
+               u64 align;
+
+               if (length < SZ_4K)
+                       length = SZ_4K;
+
+               /*
+                * We need to invalidate a higher granularity if start address
+                * is not aligned to length. When start is not aligned with
+                * length we need to find the length large enough to create an
+                * address mask covering the required range.
+                */
+               align = roundup_pow_of_two(length);
+               start = ALIGN_DOWN(start, align);
+               end = ALIGN(end, align);
+               length = align;
+               while (start + length < end) {
+                       length <<= 1;
+                       start = ALIGN_DOWN(orig_start, length);
+               }
+
+               /*
+                * Minimum invalidation size for a 2MB page that the hardware
+                * expects is 16MB
+                */
+               if (length >= SZ_2M) {
+                       length = max_t(u64, SZ_16M, length);
+                       start = ALIGN_DOWN(orig_start, length);
+               }
+
+               xe_gt_assert(gt, length >= SZ_4K);
+               xe_gt_assert(gt, is_power_of_2(length));
+               xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1,
+                                                   ilog2(SZ_2M) + 1)));
+               xe_gt_assert(gt, IS_ALIGNED(start, length));
+
+               action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
+               action[len++] = asid;
+               action[len++] = lower_32_bits(start);
+               action[len++] = upper_32_bits(start);
+               action[len++] = ilog2(length) - ilog2(SZ_4K);
+       }
+
+       xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN);
+
+       return send_tlb_inval(&gt->uc.guc, fence, action, len);
+}
+
+/**
+ * xe_tlb_inval_vm - Issue a TLB invalidation on this GT for a VM
+ * @tlb_inval: TLB invalidation client
+ * @vm: VM to invalidate
+ *
+ * Invalidate entire VM's address space
+ */
+void xe_tlb_inval_vm(struct xe_tlb_inval *tlb_inval, struct xe_vm *vm)
+{
+       struct xe_tlb_inval_fence fence;
+       u64 range = 1ull << vm->xe->info.va_bits;
+       int ret;
+
+       xe_tlb_inval_fence_init(tlb_inval, &fence, true);
+
+       ret = xe_tlb_inval_range(tlb_inval, &fence, 0, range, vm->usm.asid);
+       if (ret < 0)
+               return;
+
+       xe_tlb_inval_fence_wait(&fence);
+}
+
+/**
+ * xe_tlb_inval_done_handler - TLB invalidation done handler
+ * @gt: gt
+ * @seqno: seqno of invalidation that is done
+ *
+ * Update recv seqno, signal any TLB invalidation fences, and restart TDR
+ */
+static void xe_tlb_inval_done_handler(struct xe_gt *gt, int seqno)
+{
+       struct xe_device *xe = gt_to_xe(gt);
+       struct xe_tlb_inval_fence *fence, *next;
+       unsigned long flags;
+
+       /*
+        * This can also be run both directly from the IRQ handler and also in
+        * process_g2h_msg(). Only one may process any individual CT message,
+        * however the order they are processed here could result in skipping a
+        * seqno. To handle that we just process all the seqnos from the last
+        * seqno_recv up to and including the one in msg[0]. The delta should be
+        * very small so there shouldn't be much of pending_fences we actually
+        * need to iterate over here.
+        *
+        * From GuC POV we expect the seqnos to always appear in-order, so if we
+        * see something later in the timeline we can be sure that anything
+        * appearing earlier has already signalled, just that we have yet to
+        * officially process the CT message like if racing against
+        * process_g2h_msg().
+        */
+       spin_lock_irqsave(&gt->tlb_inval.pending_lock, flags);
+       if (tlb_inval_seqno_past(gt, seqno)) {
+               spin_unlock_irqrestore(&gt->tlb_inval.pending_lock, flags);
+               return;
+       }
+
+       WRITE_ONCE(gt->tlb_inval.seqno_recv, seqno);
+
+       list_for_each_entry_safe(fence, next,
+                                &gt->tlb_inval.pending_fences, link) {
+               trace_xe_tlb_inval_fence_recv(xe, fence);
+
+               if (!tlb_inval_seqno_past(gt, fence->seqno))
+                       break;
+
+               inval_fence_signal(xe, fence);
+       }
+
+       if (!list_empty(&gt->tlb_inval.pending_fences))
+               mod_delayed_work(system_wq,
+                                &gt->tlb_inval.fence_tdr,
+                                tlb_timeout_jiffies(gt));
+       else
+               cancel_delayed_work(&gt->tlb_inval.fence_tdr);
+
+       spin_unlock_irqrestore(&gt->tlb_inval.pending_lock, flags);
+}
+
+/**
+ * xe_guc_tlb_inval_done_handler - TLB invalidation done handler
+ * @guc: guc
+ * @msg: message indicating TLB invalidation done
+ * @len: length of message
+ *
+ * Parse seqno of TLB invalidation, wake any waiters for seqno, and signal any
+ * invalidation fences for seqno. Algorithm for this depends on seqno being
+ * received in-order and asserts this assumption.
+ *
+ * Return: 0 on success, -EPROTO for malformed messages.
+ */
+int xe_guc_tlb_inval_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+       struct xe_gt *gt = guc_to_gt(guc);
+
+       if (unlikely(len != 1))
+               return -EPROTO;
+
+       xe_tlb_inval_done_handler(gt, msg[0]);
+
+       return 0;
+}
+
+static const char *
+inval_fence_get_driver_name(struct dma_fence *dma_fence)
+{
+       return "xe";
+}
+
+static const char *
+inval_fence_get_timeline_name(struct dma_fence *dma_fence)
+{
+       return "inval_fence";
+}
+
+static const struct dma_fence_ops inval_fence_ops = {
+       .get_driver_name = inval_fence_get_driver_name,
+       .get_timeline_name = inval_fence_get_timeline_name,
+};
+
+/**
+ * xe_tlb_inval_fence_init - Initialize TLB invalidation fence
+ * @tlb_inval: TLB invalidation client
+ * @fence: TLB invalidation fence to initialize
+ * @stack: fence is stack variable
+ *
+ * Initialize TLB invalidation fence for use. xe_tlb_inval_fence_fini
+ * will be automatically called when fence is signalled (all fences must signal),
+ * even on error.
+ */
+void xe_tlb_inval_fence_init(struct xe_tlb_inval *tlb_inval,
+                            struct xe_tlb_inval_fence *fence,
+                            bool stack)
+{
+       struct xe_gt *gt = tlb_inval->private;
+
+       xe_pm_runtime_get_noresume(gt_to_xe(gt));
+
+       spin_lock_irq(&gt->tlb_inval.lock);
+       dma_fence_init(&fence->base, &inval_fence_ops,
+                      &gt->tlb_inval.lock,
+                      dma_fence_context_alloc(1), 1);
+       spin_unlock_irq(&gt->tlb_inval.lock);
+       INIT_LIST_HEAD(&fence->link);
+       if (stack)
+               set_bit(FENCE_STACK_BIT, &fence->base.flags);
+       else
+               dma_fence_get(&fence->base);
+       fence->tlb_inval = tlb_inval;
+}
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.h b/drivers/gpu/drm/xe/xe_tlb_inval.h

new file mode 100644 (file)

index 0000000..ab6f769
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tlb_inval.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_TLB_INVAL_H_
+#define _XE_TLB_INVAL_H_
+
+#include <linux/types.h>
+
+#include "xe_tlb_inval_types.h"
+
+struct xe_gt;
+struct xe_guc;
+struct xe_vm;
+
+int xe_gt_tlb_inval_init_early(struct xe_gt *gt);
+void xe_gt_tlb_inval_fini(struct xe_gt *gt);
+
+void xe_tlb_inval_reset(struct xe_tlb_inval *tlb_inval);
+int xe_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval);
+void xe_tlb_inval_vm(struct xe_tlb_inval *tlb_inval, struct xe_vm *vm);
+int xe_tlb_inval_all(struct xe_tlb_inval *tlb_inval,
+                    struct xe_tlb_inval_fence *fence);
+int xe_tlb_inval_range(struct xe_tlb_inval *tlb_inval,
+                      struct xe_tlb_inval_fence *fence,
+                      u64 start, u64 end, u32 asid);
+int xe_guc_tlb_inval_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
+
+void xe_tlb_inval_fence_init(struct xe_tlb_inval *tlb_inval,
+                            struct xe_tlb_inval_fence *fence,
+                            bool stack);
+void xe_tlb_inval_fence_signal(struct xe_tlb_inval_fence *fence);
+
+static inline void
+xe_tlb_inval_fence_wait(struct xe_tlb_inval_fence *fence)
+{
+       dma_fence_wait(&fence->base, false);
+}
+
+#endif /* _XE_TLB_INVAL_ */
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_job.c b/drivers/gpu/drm/xe/xe_tlb_inval_job.c

new file mode 100644 (file)

index 0000000..492def0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tlb_inval_job.c
@@ -0,0 +1,268 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include "xe_assert.h"
+#include "xe_dep_job_types.h"
+#include "xe_dep_scheduler.h"
+#include "xe_exec_queue.h"
+#include "xe_gt_types.h"
+#include "xe_tlb_inval.h"
+#include "xe_tlb_inval_job.h"
+#include "xe_migrate.h"
+#include "xe_pm.h"
+
+/** struct xe_tlb_inval_job - TLB invalidation job */
+struct xe_tlb_inval_job {
+       /** @dep: base generic dependency Xe job */
+       struct xe_dep_job dep;
+       /** @tlb_inval: TLB invalidation client */
+       struct xe_tlb_inval *tlb_inval;
+       /** @q: exec queue issuing the invalidate */
+       struct xe_exec_queue *q;
+       /** @refcount: ref count of this job */
+       struct kref refcount;
+       /**
+        * @fence: dma fence to indicate completion. 1 way relationship - job
+        * can safely reference fence, fence cannot safely reference job.
+        */
+       struct dma_fence *fence;
+       /** @start: Start address to invalidate */
+       u64 start;
+       /** @end: End address to invalidate */
+       u64 end;
+       /** @asid: Address space ID to invalidate */
+       u32 asid;
+       /** @fence_armed: Fence has been armed */
+       bool fence_armed;
+};
+
+static struct dma_fence *xe_tlb_inval_job_run(struct xe_dep_job *dep_job)
+{
+       struct xe_tlb_inval_job *job =
+               container_of(dep_job, typeof(*job), dep);
+       struct xe_tlb_inval_fence *ifence =
+               container_of(job->fence, typeof(*ifence), base);
+
+       xe_tlb_inval_range(job->tlb_inval, ifence, job->start,
+                          job->end, job->asid);
+
+       return job->fence;
+}
+
+static void xe_tlb_inval_job_free(struct xe_dep_job *dep_job)
+{
+       struct xe_tlb_inval_job *job =
+               container_of(dep_job, typeof(*job), dep);
+
+       /* Pairs with get in xe_tlb_inval_job_push */
+       xe_tlb_inval_job_put(job);
+}
+
+static const struct xe_dep_job_ops dep_job_ops = {
+       .run_job = xe_tlb_inval_job_run,
+       .free_job = xe_tlb_inval_job_free,
+};
+
+/**
+ * xe_tlb_inval_job_create() - TLB invalidation job create
+ * @q: exec queue issuing the invalidate
+ * @tlb_inval: TLB invalidation client
+ * @dep_scheduler: Dependency scheduler for job
+ * @start: Start address to invalidate
+ * @end: End address to invalidate
+ * @asid: Address space ID to invalidate
+ *
+ * Create a TLB invalidation job and initialize internal fields. The caller is
+ * responsible for releasing the creation reference.
+ *
+ * Return: TLB invalidation job object on success, ERR_PTR failure
+ */
+struct xe_tlb_inval_job *
+xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval,
+                       struct xe_dep_scheduler *dep_scheduler, u64 start,
+                       u64 end, u32 asid)
+{
+       struct xe_tlb_inval_job *job;
+       struct drm_sched_entity *entity =
+               xe_dep_scheduler_entity(dep_scheduler);
+       struct xe_tlb_inval_fence *ifence;
+       int err;
+
+       job = kmalloc(sizeof(*job), GFP_KERNEL);
+       if (!job)
+               return ERR_PTR(-ENOMEM);
+
+       job->q = q;
+       job->tlb_inval = tlb_inval;
+       job->start = start;
+       job->end = end;
+       job->asid = asid;
+       job->fence_armed = false;
+       job->dep.ops = &dep_job_ops;
+       kref_init(&job->refcount);
+       xe_exec_queue_get(q);   /* Pairs with put in xe_tlb_inval_job_destroy */
+
+       ifence = kmalloc(sizeof(*ifence), GFP_KERNEL);
+       if (!ifence) {
+               err = -ENOMEM;
+               goto err_job;
+       }
+       job->fence = &ifence->base;
+
+       err = drm_sched_job_init(&job->dep.drm, entity, 1, NULL,
+                                q->xef ? q->xef->drm->client_id : 0);
+       if (err)
+               goto err_fence;
+
+       /* Pairs with put in xe_tlb_inval_job_destroy */
+       xe_pm_runtime_get_noresume(gt_to_xe(q->gt));
+
+       return job;
+
+err_fence:
+       kfree(ifence);
+err_job:
+       xe_exec_queue_put(q);
+       kfree(job);
+
+       return ERR_PTR(err);
+}
+
+static void xe_tlb_inval_job_destroy(struct kref *ref)
+{
+       struct xe_tlb_inval_job *job = container_of(ref, typeof(*job),
+                                                   refcount);
+       struct xe_tlb_inval_fence *ifence =
+               container_of(job->fence, typeof(*ifence), base);
+       struct xe_exec_queue *q = job->q;
+       struct xe_device *xe = gt_to_xe(q->gt);
+
+       if (!job->fence_armed)
+               kfree(ifence);
+       else
+               /* Ref from xe_tlb_inval_fence_init */
+               dma_fence_put(job->fence);
+
+       drm_sched_job_cleanup(&job->dep.drm);
+       kfree(job);
+       xe_exec_queue_put(q);   /* Pairs with get from xe_tlb_inval_job_create */
+       xe_pm_runtime_put(xe);  /* Pairs with get from xe_tlb_inval_job_create */
+}
+
+/**
+ * xe_tlb_inval_alloc_dep() - TLB invalidation job alloc dependency
+ * @job: TLB invalidation job to alloc dependency for
+ *
+ * Allocate storage for a dependency in the TLB invalidation fence. This
+ * function should be called at most once per job and must be paired with
+ * xe_tlb_inval_job_push being called with a real fence.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int xe_tlb_inval_job_alloc_dep(struct xe_tlb_inval_job *job)
+{
+       xe_assert(gt_to_xe(job->q->gt), !xa_load(&job->dep.drm.dependencies, 0));
+       might_alloc(GFP_KERNEL);
+
+       return drm_sched_job_add_dependency(&job->dep.drm,
+                                           dma_fence_get_stub());
+}
+
+/**
+ * xe_tlb_inval_job_push() - TLB invalidation job push
+ * @job: TLB invalidation job to push
+ * @m: The migration object being used
+ * @fence: Dependency for TLB invalidation job
+ *
+ * Pushes a TLB invalidation job for execution, using @fence as a dependency.
+ * Storage for @fence must be preallocated with xe_tlb_inval_job_alloc_dep
+ * prior to this call if @fence is not signaled. Takes a reference to the job’s
+ * finished fence, which the caller is responsible for releasing, and return it
+ * to the caller. This function is safe to be called in the path of reclaim.
+ *
+ * Return: Job's finished fence on success, cannot fail
+ */
+struct dma_fence *xe_tlb_inval_job_push(struct xe_tlb_inval_job *job,
+                                       struct xe_migrate *m,
+                                       struct dma_fence *fence)
+{
+       struct xe_tlb_inval_fence *ifence =
+               container_of(job->fence, typeof(*ifence), base);
+
+       if (!dma_fence_is_signaled(fence)) {
+               void *ptr;
+
+               /*
+                * Can be in path of reclaim, hence the preallocation of fence
+                * storage in xe_tlb_inval_job_alloc_dep. Verify caller did
+                * this correctly.
+                */
+               xe_assert(gt_to_xe(job->q->gt),
+                         xa_load(&job->dep.drm.dependencies, 0) ==
+                         dma_fence_get_stub());
+
+               dma_fence_get(fence);   /* ref released once dependency processed by scheduler */
+               ptr = xa_store(&job->dep.drm.dependencies, 0, fence,
+                              GFP_ATOMIC);
+               xe_assert(gt_to_xe(job->q->gt), !xa_is_err(ptr));
+       }
+
+       xe_tlb_inval_job_get(job);      /* Pairs with put in free_job */
+       job->fence_armed = true;
+
+       /*
+        * We need the migration lock to protect the job's seqno and the spsc
+        * queue, only taken on migration queue, user queues protected dma-resv
+        * VM lock.
+        */
+       xe_migrate_job_lock(m, job->q);
+
+       /* Creation ref pairs with put in xe_tlb_inval_job_destroy */
+       xe_tlb_inval_fence_init(job->tlb_inval, ifence, false);
+       dma_fence_get(job->fence);      /* Pairs with put in DRM scheduler */
+
+       drm_sched_job_arm(&job->dep.drm);
+       /*
+        * caller ref, get must be done before job push as it could immediately
+        * signal and free.
+        */
+       dma_fence_get(&job->dep.drm.s_fence->finished);
+       drm_sched_entity_push_job(&job->dep.drm);
+
+       xe_migrate_job_unlock(m, job->q);
+
+       /*
+        * Not using job->fence, as it has its own dma-fence context, which does
+        * not allow TLB invalidation fences on the same queue, GT tuple to
+        * be squashed in dma-resv/DRM scheduler. Instead, we use the DRM scheduler
+        * context and job's finished fence, which enables squashing.
+        */
+       return &job->dep.drm.s_fence->finished;
+}
+
+/**
+ * xe_tlb_inval_job_get() - Get a reference to TLB invalidation job
+ * @job: TLB invalidation job object
+ *
+ * Increment the TLB invalidation job's reference count
+ */
+void xe_tlb_inval_job_get(struct xe_tlb_inval_job *job)
+{
+       kref_get(&job->refcount);
+}
+
+/**
+ * xe_tlb_inval_job_put() - Put a reference to TLB invalidation job
+ * @job: TLB invalidation job object
+ *
+ * Decrement the TLB invalidation job's reference count, call
+ * xe_tlb_inval_job_destroy when reference count == 0. Skips decrement if
+ * input @job is NULL or IS_ERR.
+ */
+void xe_tlb_inval_job_put(struct xe_tlb_inval_job *job)
+{
+       if (!IS_ERR_OR_NULL(job))
+               kref_put(&job->refcount, xe_tlb_inval_job_destroy);
+}
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_job.h b/drivers/gpu/drm/xe/xe_tlb_inval_job.h

new file mode 100644 (file)

index 0000000..e63edcb
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tlb_inval_job.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_TLB_INVAL_JOB_H_
+#define _XE_TLB_INVAL_JOB_H_
+
+#include <linux/types.h>
+
+struct dma_fence;
+struct xe_dep_scheduler;
+struct xe_exec_queue;
+struct xe_tlb_inval;
+struct xe_tlb_inval_job;
+struct xe_migrate;
+
+struct xe_tlb_inval_job *
+xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval,
+                       struct xe_dep_scheduler *dep_scheduler,
+                       u64 start, u64 end, u32 asid);
+
+int xe_tlb_inval_job_alloc_dep(struct xe_tlb_inval_job *job);
+
+struct dma_fence *xe_tlb_inval_job_push(struct xe_tlb_inval_job *job,
+                                       struct xe_migrate *m,
+                                       struct dma_fence *fence);
+
+void xe_tlb_inval_job_get(struct xe_tlb_inval_job *job);
+
+void xe_tlb_inval_job_put(struct xe_tlb_inval_job *job);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_types.h b/drivers/gpu/drm/xe/xe_tlb_inval_types.h

new file mode 100644 (file)

index 0000000..6d14b9f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tlb_inval_types.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_TLB_INVAL_TYPES_H_
+#define _XE_TLB_INVAL_TYPES_H_
+
+#include <linux/workqueue.h>
+#include <linux/dma-fence.h>
+
+/** struct xe_tlb_inval - TLB invalidation client */
+struct xe_tlb_inval {
+       /** @private: Backend private pointer */
+       void *private;
+       /** @tlb_inval.seqno: TLB invalidation seqno, protected by CT lock */
+#define TLB_INVALIDATION_SEQNO_MAX     0x100000
+       int seqno;
+       /** @tlb_invalidation.seqno_lock: protects @tlb_invalidation.seqno */
+       struct mutex seqno_lock;
+       /**
+        * @seqno_recv: last received TLB invalidation seqno, protected by
+        * CT lock
+        */
+       int seqno_recv;
+       /**
+        * @pending_fences: list of pending fences waiting TLB invaliations,
+        * protected CT lock
+        */
+       struct list_head pending_fences;
+       /**
+        * @pending_lock: protects @pending_fences and updating @seqno_recv.
+        */
+       spinlock_t pending_lock;
+       /**
+        * @fence_tdr: schedules a delayed call to xe_tlb_fence_timeout after
+        * the timeout interval is over.
+        */
+       struct delayed_work fence_tdr;
+       /** @job_wq: schedules TLB invalidation jobs */
+       struct workqueue_struct *job_wq;
+       /** @tlb_inval.lock: protects TLB invalidation fences */
+       spinlock_t lock;
+};
+
+/**
+ * struct xe_tlb_inval_fence - TLB invalidation fence
+ *
+ * Optionally passed to xe_tlb_inval* functions and will be signaled upon TLB
+ * invalidation completion.
+ */
+struct xe_tlb_inval_fence {
+       /** @base: dma fence base */
+       struct dma_fence base;
+       /** @tlb_inval: TLB invalidation client which fence belong to */
+       struct xe_tlb_inval *tlb_inval;
+       /** @link: link into list of pending tlb fences */
+       struct list_head link;
+       /** @seqno: seqno of TLB invalidation to signal fence one */
+       int seqno;
+       /** @inval_time: time of TLB invalidation */
+       ktime_t inval_time;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h

index 36538f5..314f42f 100644 (file)
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -14,10 +14,10 @@
  
  #include "xe_exec_queue_types.h"
  #include "xe_gpu_scheduler_types.h"
-#include "xe_gt_tlb_inval_types.h"
  #include "xe_gt_types.h"
  #include "xe_guc_exec_queue_types.h"
  #include "xe_sched_job.h"
+#include "xe_tlb_inval_types.h"
  #include "xe_vm.h"
  
  #define __dev_name_xe(xe)      dev_name((xe)->drm.dev)
@@ -25,13 +25,13 @@
  #define __dev_name_gt(gt)      __dev_name_xe(gt_to_xe((gt)))
  #define __dev_name_eq(q)       __dev_name_gt((q)->gt)
  
-DECLARE_EVENT_CLASS(xe_gt_tlb_inval_fence,
-                   TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_inval_fence *fence),
+DECLARE_EVENT_CLASS(xe_tlb_inval_fence,
+                   TP_PROTO(struct xe_device *xe, struct xe_tlb_inval_fence *fence),
                     TP_ARGS(xe, fence),
  
                     TP_STRUCT__entry(
                              __string(dev, __dev_name_xe(xe))
-                            __field(struct xe_gt_tlb_inval_fence *, fence)
+                            __field(struct xe_tlb_inval_fence *, fence)
                              __field(int, seqno)
                              ),
  
@@ -45,23 +45,23 @@ DECLARE_EVENT_CLASS(xe_gt_tlb_inval_fence,
                               __get_str(dev), __entry->fence, __entry->seqno)
  );
  
-DEFINE_EVENT(xe_gt_tlb_inval_fence, xe_gt_tlb_inval_fence_send,
-            TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_inval_fence *fence),
+DEFINE_EVENT(xe_tlb_inval_fence, xe_tlb_inval_fence_send,
+            TP_PROTO(struct xe_device *xe, struct xe_tlb_inval_fence *fence),
              TP_ARGS(xe, fence)
  );
  
-DEFINE_EVENT(xe_gt_tlb_inval_fence, xe_gt_tlb_inval_fence_recv,
-            TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_inval_fence *fence),
+DEFINE_EVENT(xe_tlb_inval_fence, xe_tlb_inval_fence_recv,
+            TP_PROTO(struct xe_device *xe, struct xe_tlb_inval_fence *fence),
              TP_ARGS(xe, fence)
  );
  
-DEFINE_EVENT(xe_gt_tlb_inval_fence, xe_gt_tlb_inval_fence_signal,
-            TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_inval_fence *fence),
+DEFINE_EVENT(xe_tlb_inval_fence, xe_tlb_inval_fence_signal,
+            TP_PROTO(struct xe_device *xe, struct xe_tlb_inval_fence *fence),
              TP_ARGS(xe, fence)
  );
  
-DEFINE_EVENT(xe_gt_tlb_inval_fence, xe_gt_tlb_inval_fence_timeout,
-            TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_inval_fence *fence),
+DEFINE_EVENT(xe_tlb_inval_fence, xe_tlb_inval_fence_timeout,
+            TP_PROTO(struct xe_device *xe, struct xe_tlb_inval_fence *fence),
              TP_ARGS(xe, fence)
  );
  
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c

index b758e9a..3ff3c67 100644 (file)
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -28,7 +28,6 @@
  #include "xe_drm_client.h"
  #include "xe_exec_queue.h"
  #include "xe_gt_pagefault.h"
-#include "xe_gt_tlb_inval.h"
  #include "xe_migrate.h"
  #include "xe_pat.h"
  #include "xe_pm.h"
@@ -39,6 +38,7 @@
  #include "xe_svm.h"
  #include "xe_sync.h"
  #include "xe_tile.h"
+#include "xe_tlb_inval.h"
  #include "xe_trace_bo.h"
  #include "xe_wa.h"
  #include "xe_hmm.h"
@@ -1898,7 +1898,7 @@ static void xe_vm_close(struct xe_vm *vm)
                                         xe_pt_clear(xe, vm->pt_root[id]);
  
                         for_each_gt(gt, xe, id)
-                               xe_gt_tlb_inval_vm(gt, vm);
+                               xe_tlb_inval_vm(&gt->tlb_inval, vm);
                 }
         }
  
@@ -4046,7 +4046,7 @@ void xe_vm_unlock(struct xe_vm *vm)
  int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start,
                                    u64 end, u8 tile_mask)
  {
-       struct xe_gt_tlb_inval_fence
+       struct xe_tlb_inval_fence
                 fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
         struct xe_tile *tile;
         u32 fence_id = 0;
@@ -4060,11 +4060,12 @@ int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start,
                 if (!(tile_mask & BIT(id)))
                         continue;
  
-               xe_gt_tlb_inval_fence_init(tile->primary_gt,
-                                          &fence[fence_id], true);
+               xe_tlb_inval_fence_init(&tile->primary_gt->tlb_inval,
+                                       &fence[fence_id], true);
  
-               err = xe_gt_tlb_inval_range(tile->primary_gt, &fence[fence_id],
-                                           start, end, vm->usm.asid);
+               err = xe_tlb_inval_range(&tile->primary_gt->tlb_inval,
+                                        &fence[fence_id], start, end,
+                                        vm->usm.asid);
                 if (err)
                         goto wait;
                 ++fence_id;
@@ -4072,11 +4073,12 @@ int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start,
                 if (!tile->media_gt)
                         continue;
  
-               xe_gt_tlb_inval_fence_init(tile->media_gt,
-                                          &fence[fence_id], true);
+               xe_tlb_inval_fence_init(&tile->media_gt->tlb_inval,
+                                       &fence[fence_id], true);
  
-               err = xe_gt_tlb_inval_range(tile->media_gt, &fence[fence_id],
-                                           start, end, vm->usm.asid);
+               err = xe_tlb_inval_range(&tile->media_gt->tlb_inval,
+                                        &fence[fence_id], start, end,
+                                        vm->usm.asid);
                 if (err)
                         goto wait;
                 ++fence_id;
@@ -4084,7 +4086,7 @@ int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start,
  
  wait:
         for (id = 0; id < fence_id; ++id)
-               xe_gt_tlb_inval_fence_wait(&fence[id]);
+               xe_tlb_inval_fence_wait(&fence[id]);
  
         return err;
  }
author	Matthew Brost <matthew.brost@intel.com>
	Tue, 26 Aug 2025 18:29:08 +0000 (18:29 +0000)
committer	Matthew Brost <matthew.brost@intel.com>
	Wed, 27 Aug 2025 18:49:18 +0000 (11:49 -0700)
drivers/gpu/drm/xe/Makefile		patch \| blob \| history
drivers/gpu/drm/xe/xe_ggtt.c		patch \| blob \| history
drivers/gpu/drm/xe/xe_gt.c		patch \| blob \| history
drivers/gpu/drm/xe/xe_gt_tlb_inval.c	[deleted file]	patch \| blob \| history
drivers/gpu/drm/xe/xe_gt_tlb_inval.h	[deleted file]	patch \| blob \| history
drivers/gpu/drm/xe/xe_gt_tlb_inval_job.c	[deleted file]	patch \| blob \| history
drivers/gpu/drm/xe/xe_gt_tlb_inval_job.h	[deleted file]	patch \| blob \| history
drivers/gpu/drm/xe/xe_gt_tlb_inval_types.h	[deleted file]	patch \| blob \| history
drivers/gpu/drm/xe/xe_gt_types.h		patch \| blob \| history
drivers/gpu/drm/xe/xe_guc_ct.c		patch \| blob \| history
drivers/gpu/drm/xe/xe_lmtt.c		patch \| blob \| history
drivers/gpu/drm/xe/xe_migrate.h		patch \| blob \| history
drivers/gpu/drm/xe/xe_pt.c		patch \| blob \| history
drivers/gpu/drm/xe/xe_svm.c		patch \| blob \| history
drivers/gpu/drm/xe/xe_tlb_inval.c	[new file with mode: 0644]	patch \| blob
drivers/gpu/drm/xe/xe_tlb_inval.h	[new file with mode: 0644]	patch \| blob
drivers/gpu/drm/xe/xe_tlb_inval_job.c	[new file with mode: 0644]	patch \| blob
drivers/gpu/drm/xe/xe_tlb_inval_job.h	[new file with mode: 0644]	patch \| blob
drivers/gpu/drm/xe/xe_tlb_inval_types.h	[new file with mode: 0644]	patch \| blob
drivers/gpu/drm/xe/xe_trace.h		patch \| blob \| history
drivers/gpu/drm/xe/xe_vm.c		patch \| blob \| history