net: loopback: clear skb->tstamp before netif_rx()

[linux-2.6-microblaze.git] / kernel / sched / fair.c
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index f808ddf..7fc4a37 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1392,6 +1392,17 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
         int last_cpupid, this_cpupid;
  
         this_cpupid = cpu_pid_to_cpupid(dst_cpu, current->pid);
+       last_cpupid = page_cpupid_xchg_last(page, this_cpupid);
+
+       /*
+        * Allow first faults or private faults to migrate immediately early in
+        * the lifetime of a task. The magic number 4 is based on waiting for
+        * two full passes of the "multi-stage node selection" test that is
+        * executed below.
+        */
+       if ((p->numa_preferred_nid == -1 || p->numa_scan_seq <= 4) &&
+           (cpupid_pid_unset(last_cpupid) || cpupid_match_pid(p, last_cpupid)))
+               return true;
  
         /*
          * Multi-stage node selection is used in conjunction with a periodic
@@ -1410,7 +1421,6 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
          * This quadric squishes small probabilities, making it less likely we
          * act on an unlikely task<->page relation.
          */
-       last_cpupid = page_cpupid_xchg_last(page, this_cpupid);
         if (!cpupid_pid_unset(last_cpupid) &&
                                 cpupid_to_nid(last_cpupid) != dst_nid)
                 return false;
@@ -1514,6 +1524,21 @@ struct task_numa_env {
  static void task_numa_assign(struct task_numa_env *env,
                              struct task_struct *p, long imp)
  {
+       struct rq *rq = cpu_rq(env->dst_cpu);
+
+       /* Bail out if run-queue part of active NUMA balance. */
+       if (xchg(&rq->numa_migrate_on, 1))
+               return;
+
+       /*
+        * Clear previous best_cpu/rq numa-migrate flag, since task now
+        * found a better CPU to move/swap.
+        */
+       if (env->best_cpu != -1) {
+               rq = cpu_rq(env->best_cpu);
+               WRITE_ONCE(rq->numa_migrate_on, 0);
+       }
+
         if (env->best_task)
                 put_task_struct(env->best_task);
         if (p)
@@ -1552,6 +1577,13 @@ static bool load_too_imbalanced(long src_load, long dst_load,
         return (imb > old_imb);
  }
  
+/*
+ * Maximum NUMA importance can be 1998 (2*999);
+ * SMALLIMP @ 30 would be close to 1998/64.
+ * Used to deter task migration.
+ */
+#define SMALLIMP       30
+
  /*
   * This checks if the overall compute and NUMA accesses of the system would
   * be improved if the source tasks was migrated to the target dst_cpu taking
@@ -1569,6 +1601,9 @@ static void task_numa_compare(struct task_numa_env *env,
         long moveimp = imp;
         int dist = env->dist;
  
+       if (READ_ONCE(dst_rq->numa_migrate_on))
+               return;
+
         rcu_read_lock();
         cur = task_rcu_dereference(&dst_rq->curr);
         if (cur && ((cur->flags & PF_EXITING) || is_idle_task(cur)))
@@ -1582,7 +1617,7 @@ static void task_numa_compare(struct task_numa_env *env,
                 goto unlock;
  
         if (!cur) {
-               if (maymove || imp > env->best_imp)
+               if (maymove && moveimp >= env->best_imp)
                         goto assign;
                 else
                         goto unlock;
@@ -1625,15 +1660,21 @@ static void task_numa_compare(struct task_numa_env *env,
                                task_weight(cur, env->dst_nid, dist);
         }
  
-       if (imp <= env->best_imp)
-               goto unlock;
-
         if (maymove && moveimp > imp && moveimp > env->best_imp) {
-               imp = moveimp - 1;
+               imp = moveimp;
                 cur = NULL;
                 goto assign;
         }
  
+       /*
+        * If the NUMA importance is less than SMALLIMP,
+        * task migration might only result in ping pong
+        * of tasks and also hurt performance due to cache
+        * misses.
+        */
+       if (imp < SMALLIMP || imp <= env->best_imp + SMALLIMP / 2)
+               goto unlock;
+
         /*
          * In the overloaded case, try and keep the load balanced.
          */
@@ -1710,6 +1751,7 @@ static int task_numa_migrate(struct task_struct *p)
                 .best_cpu = -1,
         };
         struct sched_domain *sd;
+       struct rq *best_rq;
         unsigned long taskweight, groupweight;
         int nid, ret, dist;
         long taskimp, groupimp;
@@ -1805,20 +1847,17 @@ static int task_numa_migrate(struct task_struct *p)
         if (env.best_cpu == -1)
                 return -EAGAIN;
  
-       /*
-        * Reset the scan period if the task is being rescheduled on an
-        * alternative node to recheck if the tasks is now properly placed.
-        */
-       p->numa_scan_period = task_scan_start(p);
-
+       best_rq = cpu_rq(env.best_cpu);
         if (env.best_task == NULL) {
                 ret = migrate_task_to(p, env.best_cpu);
+               WRITE_ONCE(best_rq->numa_migrate_on, 0);
                 if (ret != 0)
                         trace_sched_stick_numa(p, env.src_cpu, env.best_cpu);
                 return ret;
         }
  
         ret = migrate_swap(p, env.best_task, env.best_cpu, env.src_cpu);
+       WRITE_ONCE(best_rq->numa_migrate_on, 0);
  
         if (ret != 0)
                 trace_sched_stick_numa(p, env.src_cpu, task_cpu(env.best_task));
@@ -2596,6 +2635,39 @@ void task_tick_numa(struct rq *rq, struct task_struct *curr)
         }
  }
  
+static void update_scan_period(struct task_struct *p, int new_cpu)
+{
+       int src_nid = cpu_to_node(task_cpu(p));
+       int dst_nid = cpu_to_node(new_cpu);
+
+       if (!static_branch_likely(&sched_numa_balancing))
+               return;
+
+       if (!p->mm || !p->numa_faults || (p->flags & PF_EXITING))
+               return;
+
+       if (src_nid == dst_nid)
+               return;
+
+       /*
+        * Allow resets if faults have been trapped before one scan
+        * has completed. This is most likely due to a new task that
+        * is pulled cross-node due to wakeups or load balancing.
+        */
+       if (p->numa_scan_seq) {
+               /*
+                * Avoid scan adjustments if moving to the preferred
+                * node or if the task was not previously running on
+                * the preferred node.
+                */
+               if (dst_nid == p->numa_preferred_nid ||
+                   (p->numa_preferred_nid != -1 && src_nid != p->numa_preferred_nid))
+                       return;
+       }
+
+       p->numa_scan_period = task_scan_start(p);
+}
+
  #else
  static void task_tick_numa(struct rq *rq, struct task_struct *curr)
  {
@@ -2609,6 +2681,10 @@ static inline void account_numa_dequeue(struct rq *rq, struct task_struct *p)
  {
  }
  
+static inline void update_scan_period(struct task_struct *p, int new_cpu)
+{
+}
+
  #endif /* CONFIG_NUMA_BALANCING */
  
  static void
@@ -6275,7 +6351,7 @@ static void detach_entity_cfs_rq(struct sched_entity *se);
   * cfs_rq_of(p) references at time of call are still valid and identify the
   * previous CPU. The caller guarantees p->pi_lock or task_rq(p)->lock is held.
   */
-static void migrate_task_rq_fair(struct task_struct *p)
+static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
  {
         /*
          * As blocked tasks retain absolute vruntime the migration needs to
@@ -6328,6 +6404,8 @@ static void migrate_task_rq_fair(struct task_struct *p)
  
         /* We have migrated, no longer consider this task hot */
         p->se.exec_start = 0;
+
+       update_scan_period(p, new_cpu);
  }
  
  static void task_dead_fair(struct task_struct *p)