Backmerge tag 'v5.17-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds...

[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / gt / uc / intel_guc_submission.c
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c

index 0cb51c5..04b8321 100644 (file)
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1115,6 +1115,19 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
         if (new_start == lower_32_bits(*prev_start))
                 return;
  
+       /*
+        * When gt is unparked, we update the gt timestamp and start the ping
+        * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt
+        * is unparked, all switched in contexts will have a start time that is
+        * within +/- POLL_TIME_CLKS of the most recent gt_stamp.
+        *
+        * If neither gt_stamp nor new_start has rolled over, then the
+        * gt_stamp_hi does not need to be adjusted, however if one of them has
+        * rolled over, we need to adjust gt_stamp_hi accordingly.
+        *
+        * The below conditions address the cases of new_start rollover and
+        * gt_stamp_last rollover respectively.
+        */
         if (new_start < gt_stamp_last &&
             (new_start - gt_stamp_last) <= POLL_TIME_CLKS)
                 gt_stamp_hi++;
@@ -1126,17 +1139,45 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
         *prev_start = ((u64)gt_stamp_hi << 32) | new_start;
  }
  
-static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
+/*
+ * GuC updates shared memory and KMD reads it. Since this is not synchronized,
+ * we run into a race where the value read is inconsistent. Sometimes the
+ * inconsistency is in reading the upper MSB bytes of the last_in value when
+ * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper
+ * 24 bits are zero. Since these are non-zero values, it is non-trivial to
+ * determine validity of these values. Instead we read the values multiple times
+ * until they are consistent. In test runs, 3 attempts results in consistent
+ * values. The upper bound is set to 6 attempts and may need to be tuned as per
+ * any new occurences.
+ */
+static void __get_engine_usage_record(struct intel_engine_cs *engine,
+                                     u32 *last_in, u32 *id, u32 *total)
  {
         struct guc_engine_usage_record *rec = intel_guc_engine_usage(engine);
+       int i = 0;
+
+       do {
+               *last_in = READ_ONCE(rec->last_switch_in_stamp);
+               *id = READ_ONCE(rec->current_context_index);
+               *total = READ_ONCE(rec->total_runtime);
+
+               if (READ_ONCE(rec->last_switch_in_stamp) == *last_in &&
+                   READ_ONCE(rec->current_context_index) == *id &&
+                   READ_ONCE(rec->total_runtime) == *total)
+                       break;
+       } while (++i < 6);
+}
+
+static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
+{
         struct intel_engine_guc_stats *stats = &engine->stats.guc;
         struct intel_guc *guc = &engine->gt->uc.guc;
-       u32 last_switch = rec->last_switch_in_stamp;
-       u32 ctx_id = rec->current_context_index;
-       u32 total = rec->total_runtime;
+       u32 last_switch, ctx_id, total;
  
         lockdep_assert_held(&guc->timestamp.lock);
  
+       __get_engine_usage_record(engine, &last_switch, &ctx_id, &total);
+
         stats->running = ctx_id != ~0U && last_switch;
         if (stats->running)
                 __extend_last_switch(guc, &stats->start_gt_clk, last_switch);
@@ -1151,23 +1192,51 @@ static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
         }
  }
  
-static void guc_update_pm_timestamp(struct intel_guc *guc,
-                                   struct intel_engine_cs *engine,
-                                   ktime_t *now)
+static u32 gpm_timestamp_shift(struct intel_gt *gt)
+{
+       intel_wakeref_t wakeref;
+       u32 reg, shift;
+
+       with_intel_runtime_pm(gt->uncore->rpm, wakeref)
+               reg = intel_uncore_read(gt->uncore, RPM_CONFIG0);
+
+       shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
+               GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT;
+
+       return 3 - shift;
+}
+
+static u64 gpm_timestamp(struct intel_gt *gt)
+{
+       u32 lo, hi, old_hi, loop = 0;
+
+       hi = intel_uncore_read(gt->uncore, MISC_STATUS1);
+       do {
+               lo = intel_uncore_read(gt->uncore, MISC_STATUS0);
+               old_hi = hi;
+               hi = intel_uncore_read(gt->uncore, MISC_STATUS1);
+       } while (old_hi != hi && loop++ < 2);
+
+       return ((u64)hi << 32) | lo;
+}
+
+static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now)
  {
-       u32 gt_stamp_now, gt_stamp_hi;
+       struct intel_gt *gt = guc_to_gt(guc);
+       u32 gt_stamp_lo, gt_stamp_hi;
+       u64 gpm_ts;
  
         lockdep_assert_held(&guc->timestamp.lock);
  
         gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
-       gt_stamp_now = intel_uncore_read(engine->uncore,
-                                        RING_TIMESTAMP(engine->mmio_base));
+       gpm_ts = gpm_timestamp(gt) >> guc->timestamp.shift;
+       gt_stamp_lo = lower_32_bits(gpm_ts);
         *now = ktime_get();
  
-       if (gt_stamp_now < lower_32_bits(guc->timestamp.gt_stamp))
+       if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp))
                 gt_stamp_hi++;
  
-       guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_now;
+       guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo;
  }
  
  /*
@@ -1210,8 +1279,12 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
         if (!in_reset && intel_gt_pm_get_if_awake(gt)) {
                 stats_saved = *stats;
                 gt_stamp_saved = guc->timestamp.gt_stamp;
+               /*
+                * Update gt_clks, then gt timestamp to simplify the 'gt_stamp -
+                * start_gt_clk' calculation below for active engines.
+                */
                 guc_update_engine_gt_clks(engine);
-               guc_update_pm_timestamp(guc, engine, now);
+               guc_update_pm_timestamp(guc, now);
                 intel_gt_pm_put_async(gt);
                 if (i915_reset_count(gpu_error) != reset_count) {
                         *stats = stats_saved;
@@ -1243,8 +1316,8 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc)
  
         spin_lock_irqsave(&guc->timestamp.lock, flags);
  
+       guc_update_pm_timestamp(guc, &unused);
         for_each_engine(engine, gt, id) {
-               guc_update_pm_timestamp(guc, engine, &unused);
                 guc_update_engine_gt_clks(engine);
                 engine->stats.guc.prev_total = 0;
         }
@@ -1261,10 +1334,11 @@ static void __update_guc_busyness_stats(struct intel_guc *guc)
         ktime_t unused;
  
         spin_lock_irqsave(&guc->timestamp.lock, flags);
-       for_each_engine(engine, gt, id) {
-               guc_update_pm_timestamp(guc, engine, &unused);
+
+       guc_update_pm_timestamp(guc, &unused);
+       for_each_engine(engine, gt, id)
                 guc_update_engine_gt_clks(engine);
-       }
+
         spin_unlock_irqrestore(&guc->timestamp.lock, flags);
  }
  
@@ -1337,10 +1411,15 @@ void intel_guc_busyness_park(struct intel_gt *gt)
  void intel_guc_busyness_unpark(struct intel_gt *gt)
  {
         struct intel_guc *guc = &gt->uc.guc;
+       unsigned long flags;
+       ktime_t unused;
  
         if (!guc_submission_initialized(guc))
                 return;
  
+       spin_lock_irqsave(&guc->timestamp.lock, flags);
+       guc_update_pm_timestamp(guc, &unused);
+       spin_unlock_irqrestore(&guc->timestamp.lock, flags);
         mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
                          guc->timestamp.ping_delay);
  }
@@ -1785,6 +1864,7 @@ int intel_guc_submission_init(struct intel_guc *guc)
         spin_lock_init(&guc->timestamp.lock);
         INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
         guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
+       guc->timestamp.shift = gpm_timestamp_shift(gt);
  
         return 0;
  }