#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)
/*
- * Migrate-Disable and why it is (strongly) undesired.
- *
- * The premise of the Real-Time schedulers we have on Linux
- * (SCHED_FIFO/SCHED_DEADLINE) is that M CPUs can/will run M tasks
- * concurrently, provided there are sufficient runnable tasks, also known as
- * work-conserving. For instance SCHED_DEADLINE tries to schedule the M
- * earliest deadline threads, and SCHED_FIFO the M highest priority threads.
- *
- * The correctness of various scheduling models depends on this, but is it
- * broken by migrate_disable() that doesn't imply preempt_disable(). Where
- * preempt_disable() implies an immediate priority ceiling, preemptible
- * migrate_disable() allows nesting.
- *
- * The worst case is that all tasks preempt one another in a migrate_disable()
- * region and stack on a single CPU. This then reduces the available bandwidth
- * to a single CPU. And since Real-Time schedulability theory considers the
- * Worst-Case only, all Real-Time analysis shall revert to single-CPU
- * (instantly solving the SMP analysis problem).
+ * Migrate-Disable and why it is undesired.
+ *
+ * When a preempted task becomes elegible to run under the ideal model (IOW it
+ * becomes one of the M highest priority tasks), it might still have to wait
+ * for the preemptee's migrate_disable() section to complete. Thereby suffering
+ * a reduction in bandwidth in the exact duration of the migrate_disable()
+ * section.
+ *
+ * Per this argument, the change from preempt_disable() to migrate_disable()
+ * gets us:
+ *
+ * - a higher priority tasks gains reduced wake-up latency; with preempt_disable()
+ * it would have had to wait for the lower priority task.
+ *
+ * - a lower priority tasks; which under preempt_disable() could've instantly
+ * migrated away when another CPU becomes available, is now constrained
+ * by the ability to push the higher priority task away, which might itself be
+ * in a migrate_disable() section, reducing it's available bandwidth.
+ *
+ * IOW it trades latency / moves the interference term, but it stays in the
+ * system, and as long as it remains unbounded, the system is not fully
+ * deterministic.
*
*
* The reason we have it anyway.
cpumask_t cpus_mask;
void *migration_pending;
#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)
- int migration_disabled;
+ unsigned short migration_disabled;
#endif
+ unsigned short migration_flags;
#ifdef CONFIG_PREEMPT_RCU
int rcu_read_lock_nesting;
}
EXPORT_SYMBOL_GPL(migrate_enable);
-static inline bool is_migration_disabled(struct task_struct *p)
-{
- return p->migration_disabled;
-}
-
static inline bool rq_has_pinned_tasks(struct rq *rq)
{
return rq->nr_pinned;
return 0;
}
+int push_cpu_stop(void *arg)
+{
+ struct rq *lowest_rq = NULL, *rq = this_rq();
+ struct task_struct *p = arg;
+
+ raw_spin_lock_irq(&p->pi_lock);
+ raw_spin_lock(&rq->lock);
+
+ if (task_rq(p) != rq)
+ goto out_unlock;
+
+ if (is_migration_disabled(p)) {
+ p->migration_flags |= MDF_PUSH;
+ goto out_unlock;
+ }
+
+ p->migration_flags &= ~MDF_PUSH;
+
+ if (p->sched_class->find_lock_rq)
+ lowest_rq = p->sched_class->find_lock_rq(p, rq);
+
+ if (!lowest_rq)
+ goto out_unlock;
+
+ // XXX validate p is still the highest prio task
+ if (task_rq(p) == rq) {
+ deactivate_task(rq, p, 0);
+ set_task_cpu(p, lowest_rq->cpu);
+ activate_task(lowest_rq, p, 0);
+ resched_curr(lowest_rq);
+ }
+
+ double_unlock_balance(rq, lowest_rq);
+
+out_unlock:
+ rq->push_busy = false;
+ raw_spin_unlock(&rq->lock);
+ raw_spin_unlock_irq(&p->pi_lock);
+
+ put_task_struct(p);
+ return 0;
+}
+
/*
* sched_class::set_cpus_allowed must do the below, but is not required to
* actually call this function.
/* Can the task run on the task's current CPU? If so, we're done */
if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) {
+ struct task_struct *push_task = NULL;
+
+ if ((flags & SCA_MIGRATE_ENABLE) &&
+ (p->migration_flags & MDF_PUSH) && !rq->push_busy) {
+ rq->push_busy = true;
+ push_task = get_task_struct(p);
+ }
+
pending = p->migration_pending;
if (pending) {
refcount_inc(&pending->refs);
}
task_rq_unlock(rq, p, rf);
+ if (push_task) {
+ stop_one_cpu_nowait(rq->cpu, push_cpu_stop,
+ p, &rq->push_work);
+ }
+
if (complete)
goto do_complete;
if (flags & SCA_MIGRATE_ENABLE) {
refcount_inc(&pending->refs); /* pending->{arg,stop_work} */
+ p->migration_flags &= ~MDF_PUSH;
task_rq_unlock(rq, p, rf);
pending->arg = (struct migration_arg) {
static inline void migrate_disable_switch(struct rq *rq, struct task_struct *p) { }
-static inline bool is_migration_disabled(struct task_struct *p)
-{
- return false;
-}
-
static inline bool rq_has_pinned_tasks(struct rq *rq)
{
return false;
return 0;
retry:
+ if (is_migration_disabled(next_task))
+ return 0;
+
if (WARN_ON(next_task == rq->curr))
return 0;
static void pull_dl_task(struct rq *this_rq)
{
int this_cpu = this_rq->cpu, cpu;
- struct task_struct *p;
+ struct task_struct *p, *push_task;
bool resched = false;
struct rq *src_rq;
u64 dmin = LONG_MAX;
continue;
/* Might drop this_rq->lock */
+ push_task = NULL;
double_lock_balance(this_rq, src_rq);
/*
src_rq->curr->dl.deadline))
goto skip;
- resched = true;
-
- deactivate_task(src_rq, p, 0);
- set_task_cpu(p, this_cpu);
- activate_task(this_rq, p, 0);
- dmin = p->dl.deadline;
+ if (is_migration_disabled(p)) {
+ push_task = get_push_task(src_rq);
+ } else {
+ deactivate_task(src_rq, p, 0);
+ set_task_cpu(p, this_cpu);
+ activate_task(this_rq, p, 0);
+ dmin = p->dl.deadline;
+ resched = true;
+ }
/* Is there any other task even earlier? */
}
skip:
double_unlock_balance(this_rq, src_rq);
+
+ if (push_task) {
+ raw_spin_unlock(&this_rq->lock);
+ stop_one_cpu_nowait(src_rq->cpu, push_cpu_stop,
+ push_task, &src_rq->push_work);
+ raw_spin_lock(&this_rq->lock);
+ }
}
if (resched)
.rq_online = rq_online_dl,
.rq_offline = rq_offline_dl,
.task_woken = task_woken_dl,
+ .find_lock_rq = find_lock_later_rq,
#endif
.task_tick = task_tick_dl,
* running task can migrate over to a CPU that is running a task
* of lesser priority.
*/
-static int push_rt_task(struct rq *rq)
+static int push_rt_task(struct rq *rq, bool pull)
{
struct task_struct *next_task;
struct rq *lowest_rq;
return 0;
retry:
+ if (is_migration_disabled(next_task)) {
+ struct task_struct *push_task = NULL;
+ int cpu;
+
+ if (!pull || rq->push_busy)
+ return 0;
+
+ cpu = find_lowest_rq(rq->curr);
+ if (cpu == -1 || cpu == rq->cpu)
+ return 0;
+
+ /*
+ * Given we found a CPU with lower priority than @next_task,
+ * therefore it should be running. However we cannot migrate it
+ * to this other CPU, instead attempt to push the current
+ * running task on this CPU away.
+ */
+ push_task = get_push_task(rq);
+ if (push_task) {
+ raw_spin_unlock(&rq->lock);
+ stop_one_cpu_nowait(rq->cpu, push_cpu_stop,
+ push_task, &rq->push_work);
+ raw_spin_lock(&rq->lock);
+ }
+
+ return 0;
+ }
+
if (WARN_ON(next_task == rq->curr))
return 0;
deactivate_task(rq, next_task, 0);
set_task_cpu(next_task, lowest_rq->cpu);
activate_task(lowest_rq, next_task, 0);
- ret = 1;
-
resched_curr(lowest_rq);
+ ret = 1;
double_unlock_balance(rq, lowest_rq);
-
out:
put_task_struct(next_task);
static void push_rt_tasks(struct rq *rq)
{
/* push_rt_task will return true if it moved an RT */
- while (push_rt_task(rq))
+ while (push_rt_task(rq, false))
;
}
*/
if (has_pushable_tasks(rq)) {
raw_spin_lock(&rq->lock);
- push_rt_tasks(rq);
+ while (push_rt_task(rq, true))
+ ;
raw_spin_unlock(&rq->lock);
}
{
int this_cpu = this_rq->cpu, cpu;
bool resched = false;
- struct task_struct *p;
+ struct task_struct *p, *push_task;
struct rq *src_rq;
int rt_overload_count = rt_overloaded(this_rq);
* double_lock_balance, and another CPU could
* alter this_rq
*/
+ push_task = NULL;
double_lock_balance(this_rq, src_rq);
/*
if (p->prio < src_rq->curr->prio)
goto skip;
- resched = true;
-
- deactivate_task(src_rq, p, 0);
- set_task_cpu(p, this_cpu);
- activate_task(this_rq, p, 0);
+ if (is_migration_disabled(p)) {
+ push_task = get_push_task(src_rq);
+ } else {
+ deactivate_task(src_rq, p, 0);
+ set_task_cpu(p, this_cpu);
+ activate_task(this_rq, p, 0);
+ resched = true;
+ }
/*
* We continue with the search, just in
* case there's an even higher prio task
}
skip:
double_unlock_balance(this_rq, src_rq);
+
+ if (push_task) {
+ raw_spin_unlock(&this_rq->lock);
+ stop_one_cpu_nowait(src_rq->cpu, push_cpu_stop,
+ push_task, &src_rq->push_work);
+ raw_spin_lock(&this_rq->lock);
+ }
}
if (resched)
.rq_offline = rq_offline_rt,
.task_woken = task_woken_rt,
.switched_from = switched_from_rt,
+ .find_lock_rq = find_lock_lowest_rq,
#endif
.task_tick = task_tick_rt,
#if defined(CONFIG_PREEMPT_RT) && defined(CONFIG_SMP)
unsigned int nr_pinned;
#endif
+ unsigned int push_busy;
+ struct cpu_stop_work push_work;
};
#ifdef CONFIG_FAIR_GROUP_SCHED
#endif
}
+#define MDF_PUSH 0x01
+
+static inline bool is_migration_disabled(struct task_struct *p)
+{
+#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)
+ return p->migration_disabled;
+#else
+ return false;
+#endif
+}
#ifdef CONFIG_SCHED_SMT
extern void __update_idle_core(struct rq *rq);
void (*rq_online)(struct rq *rq);
void (*rq_offline)(struct rq *rq);
+
+ struct rq *(*find_lock_rq)(struct task_struct *p, struct rq *rq);
#endif
void (*task_tick)(struct rq *rq, struct task_struct *p, int queued);
extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask, u32 flags);
+static inline struct task_struct *get_push_task(struct rq *rq)
+{
+ struct task_struct *p = rq->curr;
+
+ lockdep_assert_held(&rq->lock);
+
+ if (rq->push_busy)
+ return NULL;
+
+ if (p->nr_cpus_allowed == 1)
+ return NULL;
+
+ rq->push_busy = true;
+ return get_task_struct(p);
+}
+
+extern int push_cpu_stop(void *arg);
+
#endif
#ifdef CONFIG_CPU_IDLE