#include <linux/latencytop.h>
#include <linux/sched.h>
#include <linux/cpumask.h>
+#include <linux/cpuidle.h>
#include <linux/slab.h>
#include <linux/profile.h>
#include <linux/interrupt.h>
if (!p->mm)
return;
- /* Do not worry about placement if exiting */
- if (p->state == TASK_DEAD)
- return;
-
/* Allocate buffer to track faults on a per-node basis */
if (unlikely(!p->numa_faults_memory)) {
int size = sizeof(*p->numa_faults_memory) *
vma = mm->mmap;
}
for (; vma; vma = vma->vm_next) {
- if (!vma_migratable(vma) || !vma_policy_mof(p, vma))
+ if (!vma_migratable(vma) || !vma_policy_mof(vma))
continue;
/*
/*
* As y^PERIOD = 1/2, we can combine
- * y^n = 1/2^(n/PERIOD) * k^(n%PERIOD)
- * With a look-up table which covers k^n (n<PERIOD)
+ * y^n = 1/2^(n/PERIOD) * y^(n%PERIOD)
+ * With a look-up table which covers y^n (n<PERIOD)
*
* To achieve constant time decay_load.
*/
find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
{
unsigned long load, min_load = ULONG_MAX;
- int idlest = -1;
+ unsigned int min_exit_latency = UINT_MAX;
+ u64 latest_idle_timestamp = 0;
+ int least_loaded_cpu = this_cpu;
+ int shallowest_idle_cpu = -1;
int i;
/* Traverse only the allowed CPUs */
for_each_cpu_and(i, sched_group_cpus(group), tsk_cpus_allowed(p)) {
- load = weighted_cpuload(i);
-
- if (load < min_load || (load == min_load && i == this_cpu)) {
- min_load = load;
- idlest = i;
+ if (idle_cpu(i)) {
+ struct rq *rq = cpu_rq(i);
+ struct cpuidle_state *idle = idle_get_state(rq);
+ if (idle && idle->exit_latency < min_exit_latency) {
+ /*
+ * We give priority to a CPU whose idle state
+ * has the smallest exit latency irrespective
+ * of any idle timestamp.
+ */
+ min_exit_latency = idle->exit_latency;
+ latest_idle_timestamp = rq->idle_stamp;
+ shallowest_idle_cpu = i;
+ } else if ((!idle || idle->exit_latency == min_exit_latency) &&
+ rq->idle_stamp > latest_idle_timestamp) {
+ /*
+ * If equal or no active idle state, then
+ * the most recently idled CPU might have
+ * a warmer cache.
+ */
+ latest_idle_timestamp = rq->idle_stamp;
+ shallowest_idle_cpu = i;
+ }
+ } else {
+ load = weighted_cpuload(i);
+ if (load < min_load || (load == min_load && i == this_cpu)) {
+ min_load = load;
+ least_loaded_cpu = i;
+ }
}
}
- return idlest;
+ return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu;
}
/*
if (!tsk_cache_hot)
tsk_cache_hot = migrate_degrades_locality(p, env);
- if (migrate_improves_locality(p, env)) {
-#ifdef CONFIG_SCHEDSTATS
+ if (migrate_improves_locality(p, env) || !tsk_cache_hot ||
+ env->sd->nr_balance_failed > env->sd->cache_nice_tries) {
if (tsk_cache_hot) {
schedstat_inc(env->sd, lb_hot_gained[env->idle]);
schedstat_inc(p, se.statistics.nr_forced_migrations);
}
-#endif
- return 1;
- }
-
- if (!tsk_cache_hot ||
- env->sd->nr_balance_failed > env->sd->cache_nice_tries) {
-
- if (tsk_cache_hot) {
- schedstat_inc(env->sd, lb_hot_gained[env->idle]);
- schedstat_inc(p, se.statistics.nr_forced_migrations);
- }
-
return 1;
}
goto force_balance;
/*
- * If the local group is more busy than the selected busiest group
+ * If the local group is busier than the selected busiest group
* don't try and pull any tasks.
*/
if (local->avg_load >= busiest->avg_load)
if (env->idle == CPU_IDLE) {
/*
- * This cpu is idle. If the busiest group load doesn't
- * have more tasks than the number of available cpu's and
- * there is no imbalance between this and busiest group
- * wrt to idle cpu's, it is balanced.
+ * This cpu is idle. If the busiest group is not overloaded
+ * and there is no imbalance between this and busiest group
+ * wrt idle cpus, it is balanced. The imbalance becomes
+ * significant if the diff is greater than 1 otherwise we
+ * might end up to just move the imbalance on another group
*/
- if ((local->idle_cpus < busiest->idle_cpus) &&
- busiest->sum_nr_running <= busiest->group_weight)
+ if ((busiest->group_type != group_overloaded) &&
+ (local->idle_cpus <= (busiest->idle_cpus + 1)))
goto out_balanced;
} else {
/*
local_irq_restore(flags);
- /*
- * some other cpu did the load balance for us.
- */
- if (cur_ld_moved && env.dst_cpu != smp_processor_id())
- resched_cpu(env.dst_cpu);
-
if (env.flags & LBF_NEED_BREAK) {
env.flags &= ~LBF_NEED_BREAK;
goto more_balance;