* task expires and the dispatch path is invoked.
*/
SCX_KICK_PREEMPT = 1LLU << 1,
+
+ /*
+ * Wait for the CPU to be rescheduled. The scx_bpf_kick_cpu() call will
+ * return after the target CPU finishes picking the next task.
+ */
+ SCX_KICK_WAIT = 1LLU << 2,
};
enum scx_ops_enable_state {
#endif /* CONFIG_SMP */
+/* for %SCX_KICK_WAIT */
+static unsigned long __percpu *scx_kick_cpus_pnt_seqs;
+
/*
* Direct dispatch marker.
*
return p;
}
+void scx_next_task_picked(struct rq *rq, struct task_struct *p,
+ const struct sched_class *active)
+{
+ lockdep_assert_rq_held(rq);
+
+ if (!scx_enabled())
+ return;
+#ifdef CONFIG_SMP
+ /*
+ * Pairs with the smp_load_acquire() issued by a CPU in
+ * kick_cpus_irq_workfn() who is waiting for this CPU to perform a
+ * resched.
+ */
+ smp_store_release(&rq->scx.pnt_seq, rq->scx.pnt_seq + 1);
+#endif
+}
+
#ifdef CONFIG_SMP
static bool test_and_clear_cpu_idle(int cpu)
seq_buf_init(&ns, buf, avail);
dump_newline(&ns);
- dump_line(&ns, "CPU %-4d: nr_run=%u flags=0x%x ops_qseq=%lu",
+ dump_line(&ns, "CPU %-4d: nr_run=%u flags=0x%x ops_qseq=%lu pnt_seq=%lu",
cpu, rq->scx.nr_running, rq->scx.flags,
- rq->scx.ops_qseq);
+ rq->scx.ops_qseq, rq->scx.pnt_seq);
dump_line(&ns, " curr=%s[%d] class=%ps",
rq->curr->comm, rq->curr->pid,
rq->curr->sched_class);
if (!cpumask_empty(rq->scx.cpus_to_preempt))
dump_line(&ns, " cpus_to_preempt: %*pb",
cpumask_pr_args(rq->scx.cpus_to_preempt));
+ if (!cpumask_empty(rq->scx.cpus_to_wait))
+ dump_line(&ns, " cpus_to_wait : %*pb",
+ cpumask_pr_args(rq->scx.cpus_to_wait));
used = seq_buf_used(&ns);
if (SCX_HAS_OP(dump_cpu)) {
return !is_idle_task(rq->curr) && !(rq->scx.flags & SCX_RQ_BALANCING);
}
-static void kick_one_cpu(s32 cpu, struct rq *this_rq)
+static bool kick_one_cpu(s32 cpu, struct rq *this_rq, unsigned long *pseqs)
{
struct rq *rq = cpu_rq(cpu);
struct scx_rq *this_scx = &this_rq->scx;
+ bool should_wait = false;
unsigned long flags;
raw_spin_rq_lock_irqsave(rq, flags);
cpumask_clear_cpu(cpu, this_scx->cpus_to_preempt);
}
+ if (cpumask_test_cpu(cpu, this_scx->cpus_to_wait)) {
+ pseqs[cpu] = rq->scx.pnt_seq;
+ should_wait = true;
+ }
+
resched_curr(rq);
} else {
cpumask_clear_cpu(cpu, this_scx->cpus_to_preempt);
+ cpumask_clear_cpu(cpu, this_scx->cpus_to_wait);
}
raw_spin_rq_unlock_irqrestore(rq, flags);
+
+ return should_wait;
}
static void kick_one_cpu_if_idle(s32 cpu, struct rq *this_rq)
{
struct rq *this_rq = this_rq();
struct scx_rq *this_scx = &this_rq->scx;
+ unsigned long *pseqs = this_cpu_ptr(scx_kick_cpus_pnt_seqs);
+ bool should_wait = false;
s32 cpu;
for_each_cpu(cpu, this_scx->cpus_to_kick) {
- kick_one_cpu(cpu, this_rq);
+ should_wait |= kick_one_cpu(cpu, this_rq, pseqs);
cpumask_clear_cpu(cpu, this_scx->cpus_to_kick);
cpumask_clear_cpu(cpu, this_scx->cpus_to_kick_if_idle);
}
kick_one_cpu_if_idle(cpu, this_rq);
cpumask_clear_cpu(cpu, this_scx->cpus_to_kick_if_idle);
}
+
+ if (!should_wait)
+ return;
+
+ for_each_cpu(cpu, this_scx->cpus_to_wait) {
+ unsigned long *wait_pnt_seq = &cpu_rq(cpu)->scx.pnt_seq;
+
+ if (cpu != cpu_of(this_rq)) {
+ /*
+ * Pairs with smp_store_release() issued by this CPU in
+ * scx_next_task_picked() on the resched path.
+ *
+ * We busy-wait here to guarantee that no other task can
+ * be scheduled on our core before the target CPU has
+ * entered the resched path.
+ */
+ while (smp_load_acquire(wait_pnt_seq) == pseqs[cpu])
+ cpu_relax();
+ }
+
+ cpumask_clear_cpu(cpu, this_scx->cpus_to_wait);
+ }
}
/**
BUG_ON(!alloc_cpumask_var(&idle_masks.cpu, GFP_KERNEL));
BUG_ON(!alloc_cpumask_var(&idle_masks.smt, GFP_KERNEL));
#endif
+ scx_kick_cpus_pnt_seqs =
+ __alloc_percpu(sizeof(scx_kick_cpus_pnt_seqs[0]) * nr_cpu_ids,
+ __alignof__(scx_kick_cpus_pnt_seqs[0]));
+ BUG_ON(!scx_kick_cpus_pnt_seqs);
+
for_each_possible_cpu(cpu) {
struct rq *rq = cpu_rq(cpu);
BUG_ON(!zalloc_cpumask_var(&rq->scx.cpus_to_kick, GFP_KERNEL));
BUG_ON(!zalloc_cpumask_var(&rq->scx.cpus_to_kick_if_idle, GFP_KERNEL));
BUG_ON(!zalloc_cpumask_var(&rq->scx.cpus_to_preempt, GFP_KERNEL));
+ BUG_ON(!zalloc_cpumask_var(&rq->scx.cpus_to_wait, GFP_KERNEL));
init_irq_work(&rq->scx.kick_cpus_irq_work, kick_cpus_irq_workfn);
}
if (flags & SCX_KICK_IDLE) {
struct rq *target_rq = cpu_rq(cpu);
- if (unlikely(flags & SCX_KICK_PREEMPT))
- scx_ops_error("PREEMPT cannot be used with SCX_KICK_IDLE");
+ if (unlikely(flags & (SCX_KICK_PREEMPT | SCX_KICK_WAIT)))
+ scx_ops_error("PREEMPT/WAIT cannot be used with SCX_KICK_IDLE");
if (raw_spin_rq_trylock(target_rq)) {
if (can_skip_idle_kick(target_rq)) {
if (flags & SCX_KICK_PREEMPT)
cpumask_set_cpu(cpu, this_rq->scx.cpus_to_preempt);
+ if (flags & SCX_KICK_WAIT)
+ cpumask_set_cpu(cpu, this_rq->scx.cpus_to_wait);
}
irq_work_queue(&this_rq->scx.kick_cpus_irq_work);