drm/xe/vf: React to MIGRATED interrupt

author Tomasz Lis <tomasz.lis@intel.com>

Mon, 4 Nov 2024 21:34:45 +0000 (22:34 +0100)

committer Michal Wajdeczko <michal.wajdeczko@intel.com>

Wed, 6 Nov 2024 13:53:35 +0000 (14:53 +0100)
author Tomasz Lis <tomasz.lis@intel.com>
Mon, 4 Nov 2024 21:34:45 +0000 (22:34 +0100)
committer Michal Wajdeczko <michal.wajdeczko@intel.com>
Wed, 6 Nov 2024 13:53:35 +0000 (14:53 +0100)
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile

index 8f81e1c..95aabbb 100644 (file)
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -125,7 +125,8 @@ xe-y += \
         xe_gt_sriov_vf.o \
         xe_guc_relay.o \
         xe_memirq.o \
-       xe_sriov.o
+       xe_sriov.o \
+       xe_sriov_vf.o
  
  xe-$(CONFIG_PCI_IOV) += \
         xe_gt_sriov_pf.o \
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h

index cb19323..bccca63 100644 (file)
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -374,6 +374,8 @@ struct xe_device {
  
                 /** @sriov.pf: PF specific data */
                 struct xe_device_pf pf;
+               /** @sriov.vf: VF specific data */
+               struct xe_device_vf vf;
  
                 /** @sriov.wq: workqueue used by the virtualization workers */
                 struct workqueue_struct *wq;
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c

index d3baba5..0e330eb 100644 (file)
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
@@ -27,6 +27,7 @@
  #include "xe_guc_relay.h"
  #include "xe_mmio.h"
  #include "xe_sriov.h"
+#include "xe_sriov_vf.h"
  #include "xe_uc_fw.h"
  #include "xe_wopcm.h"
  
@@ -692,6 +693,30 @@ failed:
         return err;
  }
  
+/**
+ * xe_gt_sriov_vf_migrated_event_handler - Start a VF migration recovery,
+ *   or just mark that a GuC is ready for it.
+ * @gt: the &xe_gt struct instance linked to target GuC
+ *
+ * This function shall be called only by VF.
+ */
+void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt)
+{
+       struct xe_device *xe = gt_to_xe(gt);
+
+       xe_gt_assert(gt, IS_SRIOV_VF(xe));
+
+       set_bit(gt->info.id, &xe->sriov.vf.migration.gt_flags);
+       /*
+        * We need to be certain that if all flags were set, at least one
+        * thread will notice that and schedule the recovery.
+        */
+       smp_mb__after_atomic();
+
+       xe_gt_sriov_info(gt, "ready for recovery after migration\n");
+       xe_sriov_vf_start_migration_recovery(xe);
+}
+
  static bool vf_is_negotiated(struct xe_gt *gt, u16 major, u16 minor)
  {
         xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h

index e541ce5..9959a29 100644 (file)
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
@@ -17,6 +17,7 @@ int xe_gt_sriov_vf_query_config(struct xe_gt *gt);
  int xe_gt_sriov_vf_connect(struct xe_gt *gt);
  int xe_gt_sriov_vf_query_runtime(struct xe_gt *gt);
  int xe_gt_sriov_vf_prepare_ggtt(struct xe_gt *gt);
+void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt);
  
  u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt);
  u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt);
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c

index 7f70434..7224593 100644 (file)
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -1099,10 +1099,21 @@ int xe_guc_self_cfg64(struct xe_guc *guc, u16 key, u64 val)
         return guc_self_cfg(guc, key, 2, val);
  }
  
+static void xe_guc_sw_0_irq_handler(struct xe_guc *guc)
+{
+       struct xe_gt *gt = guc_to_gt(guc);
+
+       if (IS_SRIOV_VF(gt_to_xe(gt)))
+               xe_gt_sriov_vf_migrated_event_handler(gt);
+}
+
  void xe_guc_irq_handler(struct xe_guc *guc, const u16 iir)
  {
         if (iir & GUC_INTR_GUC2HOST)
                 xe_guc_ct_irq_handler(&guc->ct);
+
+       if (iir & GUC_INTR_SW_INT_0)
+               xe_guc_sw_0_irq_handler(guc);
  }
  
  void xe_guc_sanitize(struct xe_guc *guc)
diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c

index f833da8..51dc909 100644 (file)
--- a/drivers/gpu/drm/xe/xe_memirq.c
+++ b/drivers/gpu/drm/xe/xe_memirq.c
@@ -442,6 +442,9 @@ static void memirq_dispatch_guc(struct xe_memirq *memirq, struct iosys_map *stat
  
         if (memirq_received(memirq, status, ilog2(GUC_INTR_GUC2HOST), name))
                 xe_guc_irq_handler(guc, GUC_INTR_GUC2HOST);
+
+       if (memirq_received(memirq, status, ilog2(GUC_INTR_SW_INT_0), name))
+               xe_guc_irq_handler(guc, GUC_INTR_SW_INT_0);
  }
  
  /**
diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c

index ef10782..04e2f53 100644 (file)
--- a/drivers/gpu/drm/xe/xe_sriov.c
+++ b/drivers/gpu/drm/xe/xe_sriov.c
@@ -14,6 +14,7 @@
  #include "xe_mmio.h"
  #include "xe_sriov.h"
  #include "xe_sriov_pf.h"
+#include "xe_sriov_vf.h"
  
  /**
   * xe_sriov_mode_to_string - Convert enum value to string.
@@ -114,6 +115,9 @@ int xe_sriov_init(struct xe_device *xe)
                         return err;
         }
  
+       if (IS_SRIOV_VF(xe))
+               xe_sriov_vf_init_early(xe);
+
         xe_assert(xe, !xe->sriov.wq);
         xe->sriov.wq = alloc_workqueue("xe-sriov-wq", 0, 0);
         if (!xe->sriov.wq)
diff --git a/drivers/gpu/drm/xe/xe_sriov_types.h b/drivers/gpu/drm/xe/xe_sriov_types.h

index c7b7ad4..ca94382 100644 (file)
--- a/drivers/gpu/drm/xe/xe_sriov_types.h
+++ b/drivers/gpu/drm/xe/xe_sriov_types.h
@@ -9,6 +9,7 @@
  #include <linux/build_bug.h>
  #include <linux/mutex.h>
  #include <linux/types.h>
+#include <linux/workqueue_types.h>
  
  /**
   * VFID - Virtual Function Identifier
@@ -56,4 +57,20 @@ struct xe_device_pf {
         struct mutex master_lock;
  };
  
+/**
+ * struct xe_device_vf - Xe Virtual Function related data
+ *
+ * The data in this structure is valid only if driver is running in the
+ * @XE_SRIOV_MODE_VF mode.
+ */
+struct xe_device_vf {
+       /** @migration: VF Migration state data */
+       struct {
+               /** @migration.worker: VF migration recovery worker */
+               struct work_struct worker;
+               /** @migration.gt_flags: Per-GT request flags for VF migration recovery */
+               unsigned long gt_flags;
+       } migration;
+};
+
  #endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c

new file mode 100644 (file)

index 0000000..2fe4929
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_vf.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "xe_assert.h"
+#include "xe_device.h"
+#include "xe_gt_sriov_printk.h"
+#include "xe_sriov.h"
+#include "xe_sriov_printk.h"
+#include "xe_sriov_vf.h"
+
+static void migration_worker_func(struct work_struct *w);
+
+/**
+ * xe_sriov_vf_init_early - Initialize SR-IOV VF specific data.
+ * @xe: the &xe_device to initialize
+ */
+void xe_sriov_vf_init_early(struct xe_device *xe)
+{
+       INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func);
+}
+
+static void vf_post_migration_recovery(struct xe_device *xe)
+{
+       drm_dbg(&xe->drm, "migration recovery in progress\n");
+       /* FIXME: add the recovery steps */
+       drm_notice(&xe->drm, "migration recovery ended\n");
+}
+
+static void migration_worker_func(struct work_struct *w)
+{
+       struct xe_device *xe = container_of(w, struct xe_device,
+                                           sriov.vf.migration.worker);
+
+       vf_post_migration_recovery(xe);
+}
+
+static bool vf_ready_to_recovery_on_all_gts(struct xe_device *xe)
+{
+       struct xe_gt *gt;
+       unsigned int id;
+
+       for_each_gt(gt, xe, id) {
+               if (!test_bit(id, &xe->sriov.vf.migration.gt_flags)) {
+                       xe_gt_sriov_dbg_verbose(gt, "still not ready to recover\n");
+                       return false;
+               }
+       }
+       return true;
+}
+
+/**
+ * xe_sriov_vf_start_migration_recovery - Start VF migration recovery.
+ * @xe: the &xe_device to start recovery on
+ *
+ * This function shall be called only by VF.
+ */
+void xe_sriov_vf_start_migration_recovery(struct xe_device *xe)
+{
+       bool started;
+
+       xe_assert(xe, IS_SRIOV_VF(xe));
+
+       if (!vf_ready_to_recovery_on_all_gts(xe))
+               return;
+
+       WRITE_ONCE(xe->sriov.vf.migration.gt_flags, 0);
+       /* Ensure other threads see that no flags are set now. */
+       smp_mb();
+
+       started = queue_work(xe->sriov.wq, &xe->sriov.vf.migration.worker);
+       drm_info(&xe->drm, "VF migration recovery %s\n", started ?
+                "scheduled" : "already in progress");
+}
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.h b/drivers/gpu/drm/xe/xe_sriov_vf.h

new file mode 100644 (file)

index 0000000..7b8622c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_vf.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_SRIOV_VF_H_
+#define _XE_SRIOV_VF_H_
+
+struct xe_device;
+
+void xe_sriov_vf_init_early(struct xe_device *xe);
+void xe_sriov_vf_start_migration_recovery(struct xe_device *xe);
+
+#endif
author	Tomasz Lis <tomasz.lis@intel.com>
	Mon, 4 Nov 2024 21:34:45 +0000 (22:34 +0100)
committer	Michal Wajdeczko <michal.wajdeczko@intel.com>
	Wed, 6 Nov 2024 13:53:35 +0000 (14:53 +0100)
drivers/gpu/drm/xe/Makefile		patch \| blob \| history
drivers/gpu/drm/xe/xe_device_types.h		patch \| blob \| history
drivers/gpu/drm/xe/xe_gt_sriov_vf.c		patch \| blob \| history
drivers/gpu/drm/xe/xe_gt_sriov_vf.h		patch \| blob \| history
drivers/gpu/drm/xe/xe_guc.c		patch \| blob \| history
drivers/gpu/drm/xe/xe_memirq.c		patch \| blob \| history
drivers/gpu/drm/xe/xe_sriov.c		patch \| blob \| history
drivers/gpu/drm/xe/xe_sriov_types.h		patch \| blob \| history
drivers/gpu/drm/xe/xe_sriov_vf.c	[new file with mode: 0644]	patch \| blob
drivers/gpu/drm/xe/xe_sriov_vf.h	[new file with mode: 0644]	patch \| blob