Merge tag 'v4.20-rc5' into sched/core, to pick up fixes

author Ingo Molnar <mingo@kernel.org>

Mon, 3 Dec 2018 10:42:17 +0000 (11:42 +0100)

committer Ingo Molnar <mingo@kernel.org>

Mon, 3 Dec 2018 10:42:17 +0000 (11:42 +0100)
author Ingo Molnar <mingo@kernel.org>
Mon, 3 Dec 2018 10:42:17 +0000 (11:42 +0100)
committer Ingo Molnar <mingo@kernel.org>
Mon, 3 Dec 2018 10:42:17 +0000 (11:42 +0100)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 6fedf3a..8050f26 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -697,7 +697,7 @@ static void set_load_weight(struct task_struct *p, bool update_load)
         /*
          * SCHED_IDLE tasks get minimal weight:
          */
-       if (idle_policy(p->policy)) {
+       if (task_has_idle_policy(p)) {
                 load->weight = scale_load(WEIGHT_IDLEPRIO);
                 load->inv_weight = WMULT_IDLEPRIO;
                 p->se.runnable_weight = load->weight;
@@ -4191,7 +4191,7 @@ recheck:
                  * Treat SCHED_IDLE as nice 20. Only allow a switch to
                  * SCHED_NORMAL if the RLIMIT_NICE would normally permit it.
                  */
-               if (idle_policy(p->policy) && !idle_policy(policy)) {
+               if (task_has_idle_policy(p) && !idle_policy(policy)) {
                         if (!can_nice(p, task_nice(p)))
                                 return -EPERM;
                 }
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c

index 91e4202..470ba6b 100644 (file)
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1695,6 +1695,14 @@ static void start_hrtick_dl(struct rq *rq, struct task_struct *p)
  }
  #endif
  
+static inline void set_next_task(struct rq *rq, struct task_struct *p)
+{
+       p->se.exec_start = rq_clock_task(rq);
+
+       /* You can't push away the running task */
+       dequeue_pushable_dl_task(rq, p);
+}
+
  static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq,
                                                    struct dl_rq *dl_rq)
  {
@@ -1750,10 +1758,8 @@ pick_next_task_dl(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
         BUG_ON(!dl_se);
  
         p = dl_task_of(dl_se);
-       p->se.exec_start = rq_clock_task(rq);
  
-       /* Running task will never be pushed. */
-       dequeue_pushable_dl_task(rq, p);
+       set_next_task(rq, p);
  
         if (hrtick_enabled(rq))
                 start_hrtick_dl(rq, p);
@@ -1808,12 +1814,7 @@ static void task_fork_dl(struct task_struct *p)
  
  static void set_curr_task_dl(struct rq *rq)
  {
-       struct task_struct *p = rq->curr;
-
-       p->se.exec_start = rq_clock_task(rq);
-
-       /* You can't push away the running task */
-       dequeue_pushable_dl_task(rq, p);
+       set_next_task(rq, rq->curr);
  }
  
  #ifdef CONFIG_SMP
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c

index 6383aa6..02bd5f9 100644 (file)
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -974,7 +974,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
  #endif
         P(policy);
         P(prio);
-       if (p->policy == SCHED_DEADLINE) {
+       if (task_has_dl_policy(p)) {
                 P(dl.runtime);
                 P(dl.deadline);
         }
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index ac855b2..e30dea5 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -38,7 +38,7 @@
   * (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds)
   */
  unsigned int sysctl_sched_latency                      = 6000000ULL;
-unsigned int normalized_sysctl_sched_latency           = 6000000ULL;
+static unsigned int normalized_sysctl_sched_latency    = 6000000ULL;
  
  /*
   * The initial- and re-scaling of tunables is configurable
@@ -58,8 +58,8 @@ enum sched_tunable_scaling sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_L
   *
   * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
   */
-unsigned int sysctl_sched_min_granularity              = 750000ULL;
-unsigned int normalized_sysctl_sched_min_granularity   = 750000ULL;
+unsigned int sysctl_sched_min_granularity                      = 750000ULL;
+static unsigned int normalized_sysctl_sched_min_granularity    = 750000ULL;
  
  /*
   * This value is kept at sysctl_sched_latency/sysctl_sched_min_granularity
@@ -81,8 +81,8 @@ unsigned int sysctl_sched_child_runs_first __read_mostly;
   *
   * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
   */
-unsigned int sysctl_sched_wakeup_granularity           = 1000000UL;
-unsigned int normalized_sysctl_sched_wakeup_granularity        = 1000000UL;
+unsigned int sysctl_sched_wakeup_granularity                   = 1000000UL;
+static unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL;
  
  const_debug unsigned int sysctl_sched_migration_cost   = 500000UL;
  
@@ -116,7 +116,7 @@ unsigned int sysctl_sched_cfs_bandwidth_slice               = 5000UL;
   *
   * (default: ~20%)
   */
-unsigned int capacity_margin                           = 1280;
+static unsigned int capacity_margin                    = 1280;
  
  static inline void update_load_add(struct load_weight *lw, unsigned long inc)
  {
@@ -2734,6 +2734,17 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
         WRITE_ONCE(*ptr, res);                                  \
  } while (0)
  
+/*
+ * Remove and clamp on negative, from a local variable.
+ *
+ * A variant of sub_positive(), which does not use explicit load-store
+ * and is thus optimized for local variable updates.
+ */
+#define lsub_positive(_ptr, _val) do {                         \
+       typeof(_ptr) ptr = (_ptr);                              \
+       *ptr -= min_t(typeof(*ptr), *ptr, _val);                \
+} while (0)
+
  #ifdef CONFIG_SMP
  static inline void
  enqueue_runnable_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -3604,7 +3615,7 @@ static inline unsigned long _task_util_est(struct task_struct *p)
  {
         struct util_est ue = READ_ONCE(p->se.avg.util_est);
  
-       return max(ue.ewma, ue.enqueued);
+       return (max(ue.ewma, ue.enqueued) | UTIL_AVG_UNCHANGED);
  }
  
  static inline unsigned long task_util_est(struct task_struct *p)
@@ -3622,7 +3633,7 @@ static inline void util_est_enqueue(struct cfs_rq *cfs_rq,
  
         /* Update root cfs_rq's estimated utilization */
         enqueued  = cfs_rq->avg.util_est.enqueued;
-       enqueued += (_task_util_est(p) | UTIL_AVG_UNCHANGED);
+       enqueued += _task_util_est(p);
         WRITE_ONCE(cfs_rq->avg.util_est.enqueued, enqueued);
  }
  
@@ -3650,8 +3661,7 @@ util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep)
  
         /* Update root cfs_rq's estimated utilization */
         ue.enqueued  = cfs_rq->avg.util_est.enqueued;
-       ue.enqueued -= min_t(unsigned int, ue.enqueued,
-                            (_task_util_est(p) | UTIL_AVG_UNCHANGED));
+       ue.enqueued -= min_t(unsigned int, ue.enqueued, _task_util_est(p));
         WRITE_ONCE(cfs_rq->avg.util_est.enqueued, ue.enqueued);
  
         /*
@@ -4640,7 +4650,7 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
                 cfs_b->distribute_running = 0;
                 throttled = !list_empty(&cfs_b->throttled_cfs_rq);
  
-               cfs_b->runtime -= min(runtime, cfs_b->runtime);
+               lsub_positive(&cfs_b->runtime, runtime);
         }
  
         /*
@@ -4774,7 +4784,7 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
  
         raw_spin_lock(&cfs_b->lock);
         if (expires == cfs_b->runtime_expires)
-               cfs_b->runtime -= min(runtime, cfs_b->runtime);
+               lsub_positive(&cfs_b->runtime, runtime);
         cfs_b->distribute_running = 0;
         raw_spin_unlock(&cfs_b->lock);
  }
@@ -6241,7 +6251,7 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p)
         util = READ_ONCE(cfs_rq->avg.util_avg);
  
         /* Discount task's util from CPU's util */
-       util -= min_t(unsigned int, util, task_util(p));
+       lsub_positive(&util, task_util(p));
  
         /*
          * Covered cases:
@@ -6290,10 +6300,9 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p)
                  * properly fix the execl regression and it helps in further
                  * reducing the chances for the above race.
                  */
-               if (unlikely(task_on_rq_queued(p) || current == p)) {
-                       estimated -= min_t(unsigned int, estimated,
-                                          (_task_util_est(p) | UTIL_AVG_UNCHANGED));
-               }
+               if (unlikely(task_on_rq_queued(p) || current == p))
+                       lsub_positive(&estimated, _task_util_est(p));
+
                 util = max(util, estimated);
         }
  
@@ -6520,7 +6529,7 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
  
  static void set_last_buddy(struct sched_entity *se)
  {
-       if (entity_is_task(se) && unlikely(task_of(se)->policy == SCHED_IDLE))
+       if (entity_is_task(se) && unlikely(task_has_idle_policy(task_of(se))))
                 return;
  
         for_each_sched_entity(se) {
@@ -6532,7 +6541,7 @@ static void set_last_buddy(struct sched_entity *se)
  
  static void set_next_buddy(struct sched_entity *se)
  {
-       if (entity_is_task(se) && unlikely(task_of(se)->policy == SCHED_IDLE))
+       if (entity_is_task(se) && unlikely(task_has_idle_policy(task_of(se))))
                 return;
  
         for_each_sched_entity(se) {
@@ -6590,8 +6599,8 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
                 return;
  
         /* Idle tasks are by definition preempted by non-idle tasks. */
-       if (unlikely(curr->policy == SCHED_IDLE) &&
-           likely(p->policy != SCHED_IDLE))
+       if (unlikely(task_has_idle_policy(curr)) &&
+           likely(!task_has_idle_policy(p)))
                 goto preempt;
  
         /*
@@ -7012,7 +7021,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
         if (p->sched_class != &fair_sched_class)
                 return 0;
  
-       if (unlikely(p->policy == SCHED_IDLE))
+       if (unlikely(task_has_idle_policy(p)))
                 return 0;
  
         /*
@@ -8910,13 +8919,22 @@ out_all_pinned:
         sd->nr_balance_failed = 0;
  
  out_one_pinned:
+       ld_moved = 0;
+
+       /*
+        * idle_balance() disregards balance intervals, so we could repeatedly
+        * reach this code, which would lead to balance_interval skyrocketting
+        * in a short amount of time. Skip the balance_interval increase logic
+        * to avoid that.
+        */
+       if (env.idle == CPU_NEWLY_IDLE)
+               goto out;
+
         /* tune up the balancing interval */
-       if (((env.flags & LBF_ALL_PINNED) &&
-                       sd->balance_interval < MAX_PINNED_INTERVAL) ||
-                       (sd->balance_interval < sd->max_interval))
+       if ((env.flags & LBF_ALL_PINNED &&
+            sd->balance_interval < MAX_PINNED_INTERVAL) ||
+           sd->balance_interval < sd->max_interval)
                 sd->balance_interval *= 2;
-
-       ld_moved = 0;
  out:
         return ld_moved;
  }
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c

index a21ea60..9aa3287 100644 (file)
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1498,6 +1498,14 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flag
  #endif
  }
  
+static inline void set_next_task(struct rq *rq, struct task_struct *p)
+{
+       p->se.exec_start = rq_clock_task(rq);
+
+       /* The running task is never eligible for pushing */
+       dequeue_pushable_task(rq, p);
+}
+
  static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
                                                    struct rt_rq *rt_rq)
  {
@@ -1518,7 +1526,6 @@ static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
  static struct task_struct *_pick_next_task_rt(struct rq *rq)
  {
         struct sched_rt_entity *rt_se;
-       struct task_struct *p;
         struct rt_rq *rt_rq  = &rq->rt;
  
         do {
@@ -1527,10 +1534,7 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq)
                 rt_rq = group_rt_rq(rt_se);
         } while (rt_rq);
  
-       p = rt_task_of(rt_se);
-       p->se.exec_start = rq_clock_task(rq);
-
-       return p;
+       return rt_task_of(rt_se);
  }
  
  static struct task_struct *
@@ -1573,8 +1577,7 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
  
         p = _pick_next_task_rt(rq);
  
-       /* The running task is never eligible for pushing */
-       dequeue_pushable_task(rq, p);
+       set_next_task(rq, p);
  
         rt_queue_push_tasks(rq);
  
@@ -2355,12 +2358,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
  
  static void set_curr_task_rt(struct rq *rq)
  {
-       struct task_struct *p = rq->curr;
-
-       p->se.exec_start = rq_clock_task(rq);
-
-       /* The running task is never eligible for pushing */
-       dequeue_pushable_task(rq, p);
+       set_next_task(rq, rq->curr);
  }
  
  static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h

index 4e524ab..71cd8b7 100644 (file)
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -177,6 +177,11 @@ static inline bool valid_policy(int policy)
                 rt_policy(policy) || dl_policy(policy);
  }
  
+static inline int task_has_idle_policy(struct task_struct *p)
+{
+       return idle_policy(p->policy);
+}
+
  static inline int task_has_rt_policy(struct task_struct *p)
  {
         return rt_policy(p->policy);
@@ -1794,12 +1799,12 @@ static inline void add_nr_running(struct rq *rq, unsigned count)
  
         rq->nr_running = prev_nr + count;
  
-       if (prev_nr < 2 && rq->nr_running >= 2) {
  #ifdef CONFIG_SMP
+       if (prev_nr < 2 && rq->nr_running >= 2) {
                 if (!READ_ONCE(rq->rd->overload))
                         WRITE_ONCE(rq->rd->overload, 1);
-#endif
         }
+#endif
  
         sched_update_tick_dependency(rq);
  }
author	Ingo Molnar <mingo@kernel.org>
	Mon, 3 Dec 2018 10:42:17 +0000 (11:42 +0100)
committer	Ingo Molnar <mingo@kernel.org>
	Mon, 3 Dec 2018 10:42:17 +0000 (11:42 +0100)
kernel/sched/core.c		patch \| blob \| history
kernel/sched/deadline.c		patch \| blob \| history
kernel/sched/debug.c		patch \| blob \| history
kernel/sched/fair.c		patch \| blob \| history
kernel/sched/rt.c		patch \| blob \| history
kernel/sched/sched.h		patch \| blob \| history