drm/xe: Use ring ops TLB invalidation for rebinds

author Thomas Hellström <thomas.hellstrom@linux.intel.com>

Wed, 27 Mar 2024 09:11:33 +0000 (10:11 +0100)

committer Lucas De Marchi <lucas.demarchi@intel.com>

Thu, 4 Apr 2024 13:32:22 +0000 (08:32 -0500)
author Thomas Hellström <thomas.hellstrom@linux.intel.com>
Wed, 27 Mar 2024 09:11:33 +0000 (10:11 +0100)
committer Lucas De Marchi <lucas.demarchi@intel.com>
Thu, 4 Apr 2024 13:32:22 +0000 (08:32 -0500)
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h

index 62b3d9d..462b331 100644 (file)
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -148,6 +148,11 @@ struct xe_exec_queue {
         const struct xe_ring_ops *ring_ops;
         /** @entity: DRM sched entity for this exec queue (1 to 1 relationship) */
         struct drm_sched_entity *entity;
+       /**
+        * @tlb_flush_seqno: The seqno of the last rebind tlb flush performed
+        * Protected by @vm's resv. Unused if @vm == NULL.
+        */
+       u64 tlb_flush_seqno;
         /** @lrc: logical ring context for this exec queue */
         struct xe_lrc lrc[];
  };
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c

index 7f54bc3..9fd65f5 100644 (file)
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -1254,11 +1254,13 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
          * non-faulting LR, in particular on user-space batch buffer chaining,
          * it needs to be done here.
          */
-       if ((rebind && !xe_vm_in_lr_mode(vm) && !vm->batch_invalidate_tlb) ||
-           (!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) {
+       if ((!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) {
                 ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
                 if (!ifence)
                         return ERR_PTR(-ENOMEM);
+       } else if (rebind && !xe_vm_in_lr_mode(vm)) {
+               /* We bump also if batch_invalidate_tlb is true */
+               vm->tlb_flush_seqno++;
         }
  
         rfence = kzalloc(sizeof(*rfence), GFP_KERNEL);
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c

index c4edffc..5b2b37b 100644 (file)
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -219,10 +219,9 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc
  {
         u32 dw[MAX_JOB_SIZE_DW], i = 0;
         u32 ppgtt_flag = get_ppgtt_flag(job);
-       struct xe_vm *vm = job->q->vm;
         struct xe_gt *gt = job->q->gt;
  
-       if (vm && vm->batch_invalidate_tlb) {
+       if (job->ring_ops_flush_tlb) {
                 dw[i++] = preparser_disable(true);
                 i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
                                         seqno, true, dw, i);
@@ -270,7 +269,6 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
         struct xe_gt *gt = job->q->gt;
         struct xe_device *xe = gt_to_xe(gt);
         bool decode = job->q->class == XE_ENGINE_CLASS_VIDEO_DECODE;
-       struct xe_vm *vm = job->q->vm;
  
         dw[i++] = preparser_disable(true);
  
@@ -282,13 +280,13 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
                         i = emit_aux_table_inv(gt, VE0_AUX_INV, dw, i);
         }
  
-       if (vm && vm->batch_invalidate_tlb)
+       if (job->ring_ops_flush_tlb)
                 i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
                                         seqno, true, dw, i);
  
         dw[i++] = preparser_disable(false);
  
-       if (!vm || !vm->batch_invalidate_tlb)
+       if (!job->ring_ops_flush_tlb)
                 i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
                                         seqno, dw, i);
  
@@ -317,7 +315,6 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
         struct xe_gt *gt = job->q->gt;
         struct xe_device *xe = gt_to_xe(gt);
         bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK);
-       struct xe_vm *vm = job->q->vm;
         u32 mask_flags = 0;
  
         dw[i++] = preparser_disable(true);
@@ -327,7 +324,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
                 mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS;
  
         /* See __xe_pt_bind_vma() for a discussion on TLB invalidations. */
-       i = emit_pipe_invalidate(mask_flags, vm && vm->batch_invalidate_tlb, dw, i);
+       i = emit_pipe_invalidate(mask_flags, job->ring_ops_flush_tlb, dw, i);
  
         /* hsdes: 1809175790 */
         if (has_aux_ccs(xe))
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c

index 8151dda..b0c7fa4 100644 (file)
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -250,6 +250,16 @@ bool xe_sched_job_completed(struct xe_sched_job *job)
  
  void xe_sched_job_arm(struct xe_sched_job *job)
  {
+       struct xe_exec_queue *q = job->q;
+       struct xe_vm *vm = q->vm;
+
+       if (vm && !xe_sched_job_is_migration(q) && !xe_vm_in_lr_mode(vm) &&
+           (vm->batch_invalidate_tlb || vm->tlb_flush_seqno != q->tlb_flush_seqno)) {
+               xe_vm_assert_held(vm);
+               q->tlb_flush_seqno = vm->tlb_flush_seqno;
+               job->ring_ops_flush_tlb = true;
+       }
+
         drm_sched_job_arm(&job->drm);
  }
  
diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h

index b1d83da..5e12724 100644 (file)
--- a/drivers/gpu/drm/xe/xe_sched_job_types.h
+++ b/drivers/gpu/drm/xe/xe_sched_job_types.h
@@ -39,6 +39,8 @@ struct xe_sched_job {
         } user_fence;
         /** @migrate_flush_flags: Additional flush flags for migration jobs */
         u32 migrate_flush_flags;
+       /** @ring_ops_flush_tlb: The ring ops need to flush TLB before payload. */
+       bool ring_ops_flush_tlb;
         /** @batch_addr: batch buffer address of job */
         u64 batch_addr[];
  };
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h

index ae5fb56..5747f13 100644 (file)
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -264,6 +264,11 @@ struct xe_vm {
                 bool capture_once;
         } error_capture;
  
+       /**
+        * @tlb_flush_seqno: Required TLB flush seqno for the next exec.
+        * protected by the vm resv.
+        */
+       u64 tlb_flush_seqno;
         /** @batch_invalidate_tlb: Always invalidate TLB before batch start */
         bool batch_invalidate_tlb;
         /** @xef: XE file handle for tracking this VM's drm client */
author	Thomas Hellström <thomas.hellstrom@linux.intel.com>
	Wed, 27 Mar 2024 09:11:33 +0000 (10:11 +0100)
committer	Lucas De Marchi <lucas.demarchi@intel.com>
	Thu, 4 Apr 2024 13:32:22 +0000 (08:32 -0500)
drivers/gpu/drm/xe/xe_exec_queue_types.h		patch \| blob \| history
drivers/gpu/drm/xe/xe_pt.c		patch \| blob \| history
drivers/gpu/drm/xe/xe_ring_ops.c		patch \| blob \| history
drivers/gpu/drm/xe/xe_sched_job.c		patch \| blob \| history
drivers/gpu/drm/xe/xe_sched_job_types.h		patch \| blob \| history
drivers/gpu/drm/xe/xe_vm_types.h		patch \| blob \| history