sched/pelt: Add support to track thermal pressure
authorThara Gopinath <thara.gopinath@linaro.org>
Sat, 22 Feb 2020 00:52:05 +0000 (19:52 -0500)
committerIngo Molnar <mingo@kernel.org>
Fri, 6 Mar 2020 11:57:17 +0000 (12:57 +0100)
Extrapolating on the existing framework to track rt/dl utilization using
pelt signals, add a similar mechanism to track thermal pressure. The
difference here from rt/dl utilization tracking is that, instead of
tracking time spent by a CPU running a RT/DL task through util_avg, the
average thermal pressure is tracked through load_avg. This is because
thermal pressure signal is weighted time "delta" capacity unlike util_avg
which is binary. "delta capacity" here means delta between the actual
capacity of a CPU and the decreased capacity a CPU due to a thermal event.

In order to track average thermal pressure, a new sched_avg variable
avg_thermal is introduced. Function update_thermal_load_avg can be called
to do the periodic bookkeeping (accumulate, decay and average) of the
thermal pressure.

Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Thara Gopinath <thara.gopinath@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lkml.kernel.org/r/20200222005213.3873-2-thara.gopinath@linaro.org
include/trace/events/sched.h
init/Kconfig
kernel/sched/pelt.c
kernel/sched/pelt.h
kernel/sched/sched.h

index 9c3ebb7..ed168b0 100644 (file)
@@ -618,6 +618,10 @@ DECLARE_TRACE(pelt_dl_tp,
        TP_PROTO(struct rq *rq),
        TP_ARGS(rq));
 
+DECLARE_TRACE(pelt_thermal_tp,
+       TP_PROTO(struct rq *rq),
+       TP_ARGS(rq));
+
 DECLARE_TRACE(pelt_irq_tp,
        TP_PROTO(struct rq *rq),
        TP_ARGS(rq));
index 20a6ac3..275c848 100644 (file)
@@ -451,6 +451,10 @@ config HAVE_SCHED_AVG_IRQ
        depends on IRQ_TIME_ACCOUNTING || PARAVIRT_TIME_ACCOUNTING
        depends on SMP
 
+config SCHED_THERMAL_PRESSURE
+       bool "Enable periodic averaging of thermal pressure"
+       depends on SMP
+
 config BSD_PROCESS_ACCT
        bool "BSD Process Accounting"
        depends on MULTIUSER
index c40d57a..b647d04 100644 (file)
@@ -368,6 +368,37 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
        return 0;
 }
 
+#ifdef CONFIG_SCHED_THERMAL_PRESSURE
+/*
+ * thermal:
+ *
+ *   load_sum = \Sum se->avg.load_sum but se->avg.load_sum is not tracked
+ *
+ *   util_avg and runnable_load_avg are not supported and meaningless.
+ *
+ * Unlike rt/dl utilization tracking that track time spent by a cpu
+ * running a rt/dl task through util_avg, the average thermal pressure is
+ * tracked through load_avg. This is because thermal pressure signal is
+ * time weighted "delta" capacity unlike util_avg which is binary.
+ * "delta capacity" =  actual capacity  -
+ *                     capped capacity a cpu due to a thermal event.
+ */
+
+int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity)
+{
+       if (___update_load_sum(now, &rq->avg_thermal,
+                              capacity,
+                              capacity,
+                              capacity)) {
+               ___update_load_avg(&rq->avg_thermal, 1);
+               trace_pelt_thermal_tp(rq);
+               return 1;
+       }
+
+       return 0;
+}
+#endif
+
 #ifdef CONFIG_HAVE_SCHED_AVG_IRQ
 /*
  * irq:
index afff644..eb034d9 100644 (file)
@@ -7,6 +7,26 @@ int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq);
 int update_rt_rq_load_avg(u64 now, struct rq *rq, int running);
 int update_dl_rq_load_avg(u64 now, struct rq *rq, int running);
 
+#ifdef CONFIG_SCHED_THERMAL_PRESSURE
+int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity);
+
+static inline u64 thermal_load_avg(struct rq *rq)
+{
+       return READ_ONCE(rq->avg_thermal.load_avg);
+}
+#else
+static inline int
+update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity)
+{
+       return 0;
+}
+
+static inline u64 thermal_load_avg(struct rq *rq)
+{
+       return 0;
+}
+#endif
+
 #ifdef CONFIG_HAVE_SCHED_AVG_IRQ
 int update_irq_load_avg(struct rq *rq, u64 running);
 #else
@@ -158,6 +178,17 @@ update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
        return 0;
 }
 
+static inline int
+update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity)
+{
+       return 0;
+}
+
+static inline u64 thermal_load_avg(struct rq *rq)
+{
+       return 0;
+}
+
 static inline int
 update_irq_load_avg(struct rq *rq, u64 running)
 {
index 2a0caf3..6c839f8 100644 (file)
@@ -960,6 +960,9 @@ struct rq {
        struct sched_avg        avg_dl;
 #ifdef CONFIG_HAVE_SCHED_AVG_IRQ
        struct sched_avg        avg_irq;
+#endif
+#ifdef CONFIG_SCHED_THERMAL_PRESSURE
+       struct sched_avg        avg_thermal;
 #endif
        u64                     idle_stamp;
        u64                     avg_idle;