Merge branches 'pm-cpuidle' and 'pm-cpufreq'
[linux-2.6-microblaze.git] / drivers / cpuidle / governors / teo.c
index 987fc5f..7244f71 100644 (file)
 #include <linux/sched/topology.h>
 #include <linux/tick.h>
 
+#include "gov.h"
+
 /*
  * The number of bits to shift the CPU's capacity by in order to determine
  * the utilized threshold.
  */
 #define UTIL_THRESHOLD_SHIFT 6
 
-
 /*
  * The PULSE value is added to metrics when they grow and the DECAY_SHIFT value
  * is used for decreasing metrics on a regular basis.
@@ -186,8 +187,8 @@ struct teo_bin {
  * @total: Grand total of the "intercepts" and "hits" metrics for all bins.
  * @next_recent_idx: Index of the next @recent_idx entry to update.
  * @recent_idx: Indices of bins corresponding to recent "intercepts".
+ * @tick_hits: Number of "hits" after TICK_NSEC.
  * @util_threshold: Threshold above which the CPU is considered utilized
- * @utilized: Whether the last sleep on the CPU happened while utilized
  */
 struct teo_cpu {
        s64 time_span_ns;
@@ -196,8 +197,8 @@ struct teo_cpu {
        unsigned int total;
        int next_recent_idx;
        int recent_idx[NR_RECENT];
+       unsigned int tick_hits;
        unsigned long util_threshold;
-       bool utilized;
 };
 
 static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
@@ -228,6 +229,7 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 {
        struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
        int i, idx_timer = 0, idx_duration = 0;
+       s64 target_residency_ns;
        u64 measured_ns;
 
        if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) {
@@ -268,7 +270,6 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
         * fall into.
         */
        for (i = 0; i < drv->state_count; i++) {
-               s64 target_residency_ns = drv->states[i].target_residency_ns;
                struct teo_bin *bin = &cpu_data->state_bins[i];
 
                bin->hits -= bin->hits >> DECAY_SHIFT;
@@ -276,6 +277,8 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 
                cpu_data->total += bin->hits + bin->intercepts;
 
+               target_residency_ns = drv->states[i].target_residency_ns;
+
                if (target_residency_ns <= cpu_data->sleep_length_ns) {
                        idx_timer = i;
                        if (target_residency_ns <= measured_ns)
@@ -290,6 +293,26 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
        if (cpu_data->recent_idx[i] >= 0)
                cpu_data->state_bins[cpu_data->recent_idx[i]].recent--;
 
+       /*
+        * If the deepest state's target residency is below the tick length,
+        * make a record of it to help teo_select() decide whether or not
+        * to stop the tick.  This effectively adds an extra hits-only bin
+        * beyond the last state-related one.
+        */
+       if (target_residency_ns < TICK_NSEC) {
+               cpu_data->tick_hits -= cpu_data->tick_hits >> DECAY_SHIFT;
+
+               cpu_data->total += cpu_data->tick_hits;
+
+               if (TICK_NSEC <= cpu_data->sleep_length_ns) {
+                       idx_timer = drv->state_count;
+                       if (TICK_NSEC <= measured_ns) {
+                               cpu_data->tick_hits += PULSE;
+                               goto end;
+                       }
+               }
+       }
+
        /*
         * If the measured idle duration falls into the same bin as the sleep
         * length, this is a "hit", so update the "hits" metric for that bin.
@@ -305,18 +328,14 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
                cpu_data->recent_idx[i] = idx_duration;
        }
 
+end:
        cpu_data->total += PULSE;
 }
 
-static bool teo_time_ok(u64 interval_ns)
+static bool teo_state_ok(int i, struct cpuidle_driver *drv)
 {
-       return !tick_nohz_tick_stopped() || interval_ns >= TICK_NSEC;
-}
-
-static s64 teo_middle_of_bin(int idx, struct cpuidle_driver *drv)
-{
-       return (drv->states[idx].target_residency_ns +
-               drv->states[idx+1].target_residency_ns) / 2;
+       return !tick_nohz_tick_stopped() ||
+               drv->states[i].target_residency_ns >= TICK_NSEC;
 }
 
 /**
@@ -356,6 +375,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 {
        struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
        s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
+       ktime_t delta_tick = TICK_NSEC / 2;
+       unsigned int tick_intercept_sum = 0;
        unsigned int idx_intercept_sum = 0;
        unsigned int intercept_sum = 0;
        unsigned int idx_recent_sum = 0;
@@ -365,7 +386,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
        int constraint_idx = 0;
        int idx0 = 0, idx = -1;
        bool alt_intercepts, alt_recent;
-       ktime_t delta_tick;
+       bool cpu_utilized;
        s64 duration_ns;
        int i;
 
@@ -375,44 +396,48 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
        }
 
        cpu_data->time_span_ns = local_clock();
-
-       duration_ns = tick_nohz_get_sleep_length(&delta_tick);
-       cpu_data->sleep_length_ns = duration_ns;
+       /*
+        * Set the expected sleep length to infinity in case of an early
+        * return.
+        */
+       cpu_data->sleep_length_ns = KTIME_MAX;
 
        /* Check if there is any choice in the first place. */
        if (drv->state_count < 2) {
                idx = 0;
-               goto end;
+               goto out_tick;
        }
-       if (!dev->states_usage[0].disable) {
+
+       if (!dev->states_usage[0].disable)
                idx = 0;
-               if (drv->states[1].target_residency_ns > duration_ns)
-                       goto end;
-       }
 
-       cpu_data->utilized = teo_cpu_is_utilized(dev->cpu, cpu_data);
+       cpu_utilized = teo_cpu_is_utilized(dev->cpu, cpu_data);
        /*
         * If the CPU is being utilized over the threshold and there are only 2
         * states to choose from, the metrics need not be considered, so choose
         * the shallowest non-polling state and exit.
         */
-       if (drv->state_count < 3 && cpu_data->utilized) {
-               for (i = 0; i < drv->state_count; ++i) {
-                       if (!dev->states_usage[i].disable &&
-                           !(drv->states[i].flags & CPUIDLE_FLAG_POLLING)) {
-                               idx = i;
-                               goto end;
-                       }
+       if (drv->state_count < 3 && cpu_utilized) {
+               /*
+                * If state 0 is enabled and it is not a polling one, select it
+                * right away unless the scheduler tick has been stopped, in
+                * which case care needs to be taken to leave the CPU in a deep
+                * enough state in case it is not woken up any time soon after
+                * all.  If state 1 is disabled, though, state 0 must be used
+                * anyway.
+                */
+               if ((!idx && !(drv->states[0].flags & CPUIDLE_FLAG_POLLING) &&
+                   teo_state_ok(0, drv)) || dev->states_usage[1].disable) {
+                       idx = 0;
+                       goto out_tick;
                }
+               /* Assume that state 1 is not a polling one and use it. */
+               idx = 1;
+               duration_ns = drv->states[1].target_residency_ns;
+               goto end;
        }
 
-       /*
-        * Find the deepest idle state whose target residency does not exceed
-        * the current sleep length and the deepest idle state not deeper than
-        * the former whose exit latency does not exceed the current latency
-        * constraint.  Compute the sums of metrics for early wakeup pattern
-        * detection.
-        */
+       /* Compute the sums of metrics for early wakeup pattern detection. */
        for (i = 1; i < drv->state_count; i++) {
                struct teo_bin *prev_bin = &cpu_data->state_bins[i-1];
                struct cpuidle_state *s = &drv->states[i];
@@ -428,19 +453,15 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
                if (dev->states_usage[i].disable)
                        continue;
 
-               if (idx < 0) {
-                       idx = i; /* first enabled state */
-                       idx0 = i;
-               }
-
-               if (s->target_residency_ns > duration_ns)
-                       break;
+               if (idx < 0)
+                       idx0 = i; /* first enabled state */
 
                idx = i;
 
                if (s->exit_latency_ns <= latency_req)
                        constraint_idx = i;
 
+               /* Save the sums for the current state. */
                idx_intercept_sum = intercept_sum;
                idx_hit_sum = hit_sum;
                idx_recent_sum = recent_sum;
@@ -449,11 +470,21 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
        /* Avoid unnecessary overhead. */
        if (idx < 0) {
                idx = 0; /* No states enabled, must use 0. */
-               goto end;
-       } else if (idx == idx0) {
+               goto out_tick;
+       }
+
+       if (idx == idx0) {
+               /*
+                * Only one idle state is enabled, so use it, but do not
+                * allow the tick to be stopped it is shallow enough.
+                */
+               duration_ns = drv->states[idx].target_residency_ns;
                goto end;
        }
 
+       tick_intercept_sum = intercept_sum +
+                       cpu_data->state_bins[drv->state_count-1].intercepts;
+
        /*
         * If the sum of the intercepts metric for all of the idle states
         * shallower than the current candidate one (idx) is greater than the
@@ -461,13 +492,11 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
         * all of the deeper states, or the sum of the numbers of recent
         * intercepts over all of the states shallower than the candidate one
         * is greater than a half of the number of recent events taken into
-        * account, the CPU is likely to wake up early, so find an alternative
-        * idle state to select.
+        * account, a shallower idle state is likely to be a better choice.
         */
        alt_intercepts = 2 * idx_intercept_sum > cpu_data->total - idx_hit_sum;
        alt_recent = idx_recent_sum > NR_RECENT / 2;
        if (alt_recent || alt_intercepts) {
-               s64 first_suitable_span_ns = duration_ns;
                int first_suitable_idx = idx;
 
                /*
@@ -476,44 +505,39 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
                 * cases (both with respect to intercepts overall and with
                 * respect to the recent intercepts only) in the past.
                 *
-                * Take the possible latency constraint and duration limitation
-                * present if the tick has been stopped already into account.
+                * Take the possible duration limitation present if the tick
+                * has been stopped already into account.
                 */
                intercept_sum = 0;
                recent_sum = 0;
 
                for (i = idx - 1; i >= 0; i--) {
                        struct teo_bin *bin = &cpu_data->state_bins[i];
-                       s64 span_ns;
 
                        intercept_sum += bin->intercepts;
                        recent_sum += bin->recent;
 
-                       span_ns = teo_middle_of_bin(i, drv);
-
                        if ((!alt_recent || 2 * recent_sum > idx_recent_sum) &&
                            (!alt_intercepts ||
                             2 * intercept_sum > idx_intercept_sum)) {
-                               if (teo_time_ok(span_ns) &&
-                                   !dev->states_usage[i].disable) {
+                               /*
+                                * Use the current state unless it is too
+                                * shallow or disabled, in which case take the
+                                * first enabled state that is deep enough.
+                                */
+                               if (teo_state_ok(i, drv) &&
+                                   !dev->states_usage[i].disable)
                                        idx = i;
-                                       duration_ns = span_ns;
-                               } else {
-                                       /*
-                                        * The current state is too shallow or
-                                        * disabled, so take the first enabled
-                                        * deeper state with suitable time span.
-                                        */
+                               else
                                        idx = first_suitable_idx;
-                                       duration_ns = first_suitable_span_ns;
-                               }
+
                                break;
                        }
 
                        if (dev->states_usage[i].disable)
                                continue;
 
-                       if (!teo_time_ok(span_ns)) {
+                       if (!teo_state_ok(i, drv)) {
                                /*
                                 * The current state is too shallow, but if an
                                 * alternative candidate state has been found,
@@ -525,7 +549,6 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
                                break;
                        }
 
-                       first_suitable_span_ns = span_ns;
                        first_suitable_idx = i;
                }
        }
@@ -539,31 +562,75 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 
        /*
         * If the CPU is being utilized over the threshold, choose a shallower
-        * non-polling state to improve latency
+        * non-polling state to improve latency, unless the scheduler tick has
+        * been stopped already and the shallower state's target residency is
+        * not sufficiently large.
         */
-       if (cpu_data->utilized)
-               idx = teo_find_shallower_state(drv, dev, idx, duration_ns, true);
+       if (cpu_utilized) {
+               i = teo_find_shallower_state(drv, dev, idx, KTIME_MAX, true);
+               if (teo_state_ok(i, drv))
+                       idx = i;
+       }
 
-end:
        /*
-        * Don't stop the tick if the selected state is a polling one or if the
-        * expected idle duration is shorter than the tick period length.
+        * Skip the timers check if state 0 is the current candidate one,
+        * because an immediate non-timer wakeup is expected in that case.
         */
-       if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) ||
-           duration_ns < TICK_NSEC) && !tick_nohz_tick_stopped()) {
-               *stop_tick = false;
+       if (!idx)
+               goto out_tick;
 
-               /*
-                * The tick is not going to be stopped, so if the target
-                * residency of the state to be returned is not within the time
-                * till the closest timer including the tick, try to correct
-                * that.
-                */
-               if (idx > idx0 &&
-                   drv->states[idx].target_residency_ns > delta_tick)
-                       idx = teo_find_shallower_state(drv, dev, idx, delta_tick, false);
+       /*
+        * If state 0 is a polling one, check if the target residency of
+        * the current candidate state is low enough and skip the timers
+        * check in that case too.
+        */
+       if ((drv->states[0].flags & CPUIDLE_FLAG_POLLING) &&
+           drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS)
+               goto out_tick;
+
+       duration_ns = tick_nohz_get_sleep_length(&delta_tick);
+       cpu_data->sleep_length_ns = duration_ns;
+
+       /*
+        * If the closest expected timer is before the terget residency of the
+        * candidate state, a shallower one needs to be found.
+        */
+       if (drv->states[idx].target_residency_ns > duration_ns) {
+               i = teo_find_shallower_state(drv, dev, idx, duration_ns, false);
+               if (teo_state_ok(i, drv))
+                       idx = i;
        }
 
+       /*
+        * If the selected state's target residency is below the tick length
+        * and intercepts occurring before the tick length are the majority of
+        * total wakeup events, do not stop the tick.
+        */
+       if (drv->states[idx].target_residency_ns < TICK_NSEC &&
+           tick_intercept_sum > cpu_data->total / 2 + cpu_data->total / 8)
+               duration_ns = TICK_NSEC / 2;
+
+end:
+       /*
+        * Allow the tick to be stopped unless the selected state is a polling
+        * one or the expected idle duration is shorter than the tick period
+        * length.
+        */
+       if ((!(drv->states[idx].flags & CPUIDLE_FLAG_POLLING) &&
+           duration_ns >= TICK_NSEC) || tick_nohz_tick_stopped())
+               return idx;
+
+       /*
+        * The tick is not going to be stopped, so if the target residency of
+        * the state to be returned is not within the time till the closest
+        * timer including the tick, try to correct that.
+        */
+       if (idx > idx0 &&
+           drv->states[idx].target_residency_ns > delta_tick)
+               idx = teo_find_shallower_state(drv, dev, idx, delta_tick, false);
+
+out_tick:
+       *stop_tick = false;
        return idx;
 }