drm/xe: Add exec queue param to devcoredump

author Matthew Brost <matthew.brost@intel.com>

Thu, 14 Nov 2024 02:25:19 +0000 (18:25 -0800)

committer Matthew Brost <matthew.brost@intel.com>

Thu, 14 Nov 2024 14:38:44 +0000 (06:38 -0800)
author Matthew Brost <matthew.brost@intel.com>
Thu, 14 Nov 2024 02:25:19 +0000 (18:25 -0800)
committer Matthew Brost <matthew.brost@intel.com>
Thu, 14 Nov 2024 14:38:44 +0000 (06:38 -0800)
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c

index d3570d3..c32cbb4 100644 (file)
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -238,10 +238,10 @@ static void xe_devcoredump_free(void *data)
  }
  
  static void devcoredump_snapshot(struct xe_devcoredump *coredump,
+                                struct xe_exec_queue *q,
                                  struct xe_sched_job *job)
  {
         struct xe_devcoredump_snapshot *ss = &coredump->snapshot;
-       struct xe_exec_queue *q = job->q;
         struct xe_guc *guc = exec_queue_to_guc(q);
         u32 adj_logical_mask = q->logical_mask;
         u32 width_mask = (0x1 << q->width) - 1;
@@ -278,10 +278,12 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
         ss->guc.log = xe_guc_log_snapshot_capture(&guc->log, true);
         ss->guc.ct = xe_guc_ct_snapshot_capture(&guc->ct);
         ss->ge = xe_guc_exec_queue_snapshot_capture(q);
-       ss->job = xe_sched_job_snapshot_capture(job);
+       if (job)
+               ss->job = xe_sched_job_snapshot_capture(job);
         ss->vm = xe_vm_snapshot_capture(q->vm);
  
-       xe_engine_snapshot_capture_for_job(job);
+       if (job)
+               xe_engine_snapshot_capture_for_job(job);
  
         queue_work(system_unbound_wq, &ss->work);
  
@@ -291,15 +293,16 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
  
  /**
   * xe_devcoredump - Take the required snapshots and initialize coredump device.
+ * @q: The faulty xe_exec_queue, where the issue was detected.
   * @job: The faulty xe_sched_job, where the issue was detected.
   *
   * This function should be called at the crash time within the serialized
   * gt_reset. It is skipped if we still have the core dump device available
   * with the information of the 'first' snapshot.
   */
-void xe_devcoredump(struct xe_sched_job *job)
+void xe_devcoredump(struct xe_exec_queue *q, struct xe_sched_job *job)
  {
-       struct xe_device *xe = gt_to_xe(job->q->gt);
+       struct xe_device *xe = gt_to_xe(q->gt);
         struct xe_devcoredump *coredump = &xe->devcoredump;
  
         if (coredump->captured) {
@@ -308,7 +311,7 @@ void xe_devcoredump(struct xe_sched_job *job)
         }
  
         coredump->captured = true;
-       devcoredump_snapshot(coredump, job);
+       devcoredump_snapshot(coredump, q, job);
  
         drm_info(&xe->drm, "Xe device coredump has been created\n");
         drm_info(&xe->drm, "Check your /sys/class/drm/card%d/device/devcoredump/data\n",
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h

index a4eebc2..c04a534 100644 (file)
--- a/drivers/gpu/drm/xe/xe_devcoredump.h
+++ b/drivers/gpu/drm/xe/xe_devcoredump.h
@@ -10,13 +10,15 @@
  
  struct drm_printer;
  struct xe_device;
+struct xe_exec_queue;
  struct xe_sched_job;
  
  #ifdef CONFIG_DEV_COREDUMP
-void xe_devcoredump(struct xe_sched_job *job);
+void xe_devcoredump(struct xe_exec_queue *q, struct xe_sched_job *job);
  int xe_devcoredump_init(struct xe_device *xe);
  #else
-static inline void xe_devcoredump(struct xe_sched_job *job)
+static inline void xe_devcoredump(struct xe_exec_queue *q,
+                                 struct xe_sched_job *job)
  {
  }
  
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c

index 663ad4d..08a6578 100644 (file)
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1154,7 +1154,7 @@ trigger_reset:
         trace_xe_sched_job_timedout(job);
  
         if (!exec_queue_killed(q))
-               xe_devcoredump(job);
+               xe_devcoredump(q, job);
  
         /*
          * Kernel jobs should never fail, nor should VM jobs if they do
author	Matthew Brost <matthew.brost@intel.com>
	Thu, 14 Nov 2024 02:25:19 +0000 (18:25 -0800)
committer	Matthew Brost <matthew.brost@intel.com>
	Thu, 14 Nov 2024 14:38:44 +0000 (06:38 -0800)
drivers/gpu/drm/xe/xe_devcoredump.c		patch \| blob \| history
drivers/gpu/drm/xe/xe_devcoredump.h		patch \| blob \| history
drivers/gpu/drm/xe/xe_guc_submit.c		patch \| blob \| history