drm/xe: Add HW Engine snapshot to xe_devcoredump.
authorRodrigo Vivi <rodrigo.vivi@intel.com>
Tue, 16 May 2023 14:54:15 +0000 (10:54 -0400)
committerRodrigo Vivi <rodrigo.vivi@intel.com>
Tue, 19 Dec 2023 23:33:53 +0000 (18:33 -0500)
Let's continue to add our existent simple logs to devcoredump one
by one. Any format change should come on follow-up work.

v2: remove unnecessary, and now duplicated, dma_fence annotation. (Matthew)
v3: avoid for_each with faulty_engine since that can be already freed at
    the time of the read/free. Instead, iterate in the full array of
    hw_engines. (Kasan)

Cc: Francois Dugast <francois.dugast@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Francois Dugast <francois.dugast@intel.com>
drivers/gpu/drm/xe/xe_devcoredump.c
drivers/gpu/drm/xe/xe_devcoredump_types.h

index 7296c01..f53f4b5 100644 (file)
@@ -9,10 +9,13 @@
 #include <linux/devcoredump.h>
 #include <generated/utsrelease.h>
 
+#include "xe_device.h"
 #include "xe_engine.h"
+#include "xe_force_wake.h"
 #include "xe_gt.h"
 #include "xe_guc_ct.h"
 #include "xe_guc_submit.h"
+#include "xe_hw_engine.h"
 
 /**
  * DOC: Xe device coredump
@@ -63,6 +66,7 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
        struct drm_printer p;
        struct drm_print_iterator iter;
        struct timespec64 ts;
+       int i;
 
        /* Our device is gone already... */
        if (!data || !coredump_to_xe(coredump))
@@ -89,12 +93,19 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
        xe_guc_ct_snapshot_print(coredump->snapshot.ct, &p);
        xe_guc_engine_snapshot_print(coredump->snapshot.ge, &p);
 
+       drm_printf(&p, "\n**** HW Engines ****\n");
+       for (i = 0; i < XE_NUM_HW_ENGINES; i++)
+               if (coredump->snapshot.hwe[i])
+                       xe_hw_engine_snapshot_print(coredump->snapshot.hwe[i],
+                                                   &p);
+
        return count - iter.remain;
 }
 
 static void xe_devcoredump_free(void *data)
 {
        struct xe_devcoredump *coredump = data;
+       int i;
 
        /* Our device is gone. Nothing to do... */
        if (!data || !coredump_to_xe(coredump))
@@ -102,6 +113,9 @@ static void xe_devcoredump_free(void *data)
 
        xe_guc_ct_snapshot_free(coredump->snapshot.ct);
        xe_guc_engine_snapshot_free(coredump->snapshot.ge);
+       for (i = 0; i < XE_NUM_HW_ENGINES; i++)
+               if (coredump->snapshot.hwe[i])
+                       xe_hw_engine_snapshot_free(coredump->snapshot.hwe[i]);
 
        coredump->captured = false;
        drm_info(&coredump_to_xe(coredump)->drm,
@@ -113,14 +127,41 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
 {
        struct xe_devcoredump_snapshot *ss = &coredump->snapshot;
        struct xe_guc *guc = engine_to_guc(e);
+       struct xe_hw_engine *hwe;
+       enum xe_hw_engine_id id;
+       u32 adj_logical_mask = e->logical_mask;
+       u32 width_mask = (0x1 << e->width) - 1;
+       int i;
        bool cookie;
 
        ss->snapshot_time = ktime_get_real();
        ss->boot_time = ktime_get_boottime();
 
        cookie = dma_fence_begin_signalling();
+       for (i = 0; e->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) {
+               if (adj_logical_mask & BIT(i)) {
+                       adj_logical_mask |= width_mask << i;
+                       i += e->width;
+               } else {
+                       ++i;
+               }
+       }
+
+       xe_force_wake_get(gt_to_fw(e->gt), XE_FORCEWAKE_ALL);
+
        coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true);
        coredump->snapshot.ge = xe_guc_engine_snapshot_capture(e);
+
+       for_each_hw_engine(hwe, e->gt, id) {
+               if (hwe->class != e->hwe->class ||
+                   !(BIT(hwe->logical_instance) & adj_logical_mask)) {
+                       coredump->snapshot.hwe[id] = NULL;
+                       continue;
+               }
+               coredump->snapshot.hwe[id] = xe_hw_engine_snapshot_capture(hwe);
+       }
+
+       xe_force_wake_put(gt_to_fw(e->gt), XE_FORCEWAKE_ALL);
        dma_fence_end_signalling(cookie);
 }
 
index 7c64532..350b905 100644 (file)
@@ -9,6 +9,8 @@
 #include <linux/ktime.h>
 #include <linux/mutex.h>
 
+#include "xe_hw_engine_types.h"
+
 struct xe_device;
 
 /**
@@ -29,6 +31,8 @@ struct xe_devcoredump_snapshot {
        struct xe_guc_ct_snapshot *ct;
        /** @ge: Guc Engine snapshot */
        struct xe_guc_submit_engine_snapshot *ge;
+       /** @hwe: HW Engine snapshot array */
+       struct xe_hw_engine_snapshot *hwe[XE_NUM_HW_ENGINES];
 };
 
 /**