rdp->gp_seq = rnp->gp_seq; /* Remember new grace-period state. */
if (ULONG_CMP_LT(rdp->gp_seq_needed, rnp->gp_seq_needed) || rdp->gpwrap)
WRITE_ONCE(rdp->gp_seq_needed, rnp->gp_seq_needed);
+ if (IS_ENABLED(CONFIG_PROVE_RCU) && READ_ONCE(rdp->gpwrap))
+ WRITE_ONCE(rdp->last_sched_clock, jiffies);
WRITE_ONCE(rdp->gpwrap, false);
rcu_gpnum_ovf(rnp, rdp);
return ret;
rcu_gp_kthread_wake();
}
+static atomic_t *rcu_gp_slow_suppress;
+
+/* Register a counter to suppress debugging grace-period delays. */
+void rcu_gp_slow_register(atomic_t *rgssp)
+{
+ WARN_ON_ONCE(rcu_gp_slow_suppress);
+
+ WRITE_ONCE(rcu_gp_slow_suppress, rgssp);
+}
+EXPORT_SYMBOL_GPL(rcu_gp_slow_register);
+
+/* Unregister a counter, with NULL for not caring which. */
+void rcu_gp_slow_unregister(atomic_t *rgssp)
+{
+ WARN_ON_ONCE(rgssp && rgssp != rcu_gp_slow_suppress);
+
+ WRITE_ONCE(rcu_gp_slow_suppress, NULL);
+}
+EXPORT_SYMBOL_GPL(rcu_gp_slow_unregister);
+
+static bool rcu_gp_slow_is_suppressed(void)
+{
+ atomic_t *rgssp = READ_ONCE(rcu_gp_slow_suppress);
+
+ return rgssp && atomic_read(rgssp);
+}
+
static void rcu_gp_slow(int delay)
{
- if (delay > 0 &&
- !(rcu_seq_ctr(rcu_state.gp_seq) %
- (rcu_num_nodes * PER_RCU_NODE_PERIOD * delay)))
+ if (!rcu_gp_slow_is_suppressed() && delay > 0 &&
+ !(rcu_seq_ctr(rcu_state.gp_seq) % (rcu_num_nodes * PER_RCU_NODE_PERIOD * delay)))
schedule_timeout_idle(delay);
}
/* Advance CBs to reduce false positives below. */
offloaded = rcu_rdp_is_offloaded(rdp);
if ((offloaded || !rcu_accelerate_cbs(rnp, rdp)) && needgp) {
+
+ // We get here if a grace period was needed (“needgp”)
+ // and the above call to rcu_accelerate_cbs() did not set
+ // the RCU_GP_FLAG_INIT bit in ->gp_state (which records
+ // the need for another grace period). The purpose
+ // of the “offloaded” check is to avoid invoking
+ // rcu_accelerate_cbs() on an offloaded CPU because we do not
+ // hold the ->nocb_lock needed to safely access an offloaded
+ // ->cblist. We do not want to acquire that lock because
+ // it can be heavily contended during callback floods.
+
WRITE_ONCE(rcu_state.gp_flags, RCU_GP_FLAG_INIT);
WRITE_ONCE(rcu_state.gp_req_activity, jiffies);
- trace_rcu_grace_period(rcu_state.name,
- rcu_state.gp_seq,
- TPS("newreq"));
+ trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("newreq"));
} else {
- WRITE_ONCE(rcu_state.gp_flags,
- rcu_state.gp_flags & RCU_GP_FLAG_INIT);
+
+ // We get here either if there is no need for an
+ // additional grace period or if rcu_accelerate_cbs() has
+ // already set the RCU_GP_FLAG_INIT bit in ->gp_flags.
+ // So all we need to do is to clear all of the other
+ // ->gp_flags bits.
+
+ WRITE_ONCE(rcu_state.gp_flags, rcu_state.gp_flags & RCU_GP_FLAG_INIT);
}
raw_spin_unlock_irq_rcu_node(rnp);
*/
void rcu_sched_clock_irq(int user)
{
+ unsigned long j;
+
+ if (IS_ENABLED(CONFIG_PROVE_RCU)) {
+ j = jiffies;
+ WARN_ON_ONCE(time_before(j, __this_cpu_read(rcu_data.last_sched_clock)));
+ __this_cpu_write(rcu_data.last_sched_clock, j);
+ }
trace_rcu_utilization(TPS("Start scheduler-tick"));
lockdep_assert_irqs_disabled();
raw_cpu_inc(rcu_data.ticks_this_gp);
rcu_flavor_sched_clock_irq(user);
if (rcu_pending(user))
invoke_rcu_core();
+ if (user)
+ rcu_tasks_classic_qs(current, false);
lockdep_assert_irqs_disabled();
trace_rcu_utilization(TPS("End scheduler-tick"));
{
int ret;
- if (IS_ENABLED(CONFIG_PREEMPTION))
+ // Invoking preempt_model_*() too early gets a splat.
+ if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE ||
+ preempt_model_full() || preempt_model_rt())
return rcu_scheduler_active == RCU_SCHEDULER_INACTIVE;
might_sleep(); /* Check for RCU read-side critical section. */
preempt_disable();
*
* Yes, this function does not take counter wrap into account.
* But counter wrap is harmless. If the counter wraps, we have waited for
- * more than 2 billion grace periods (and way more on a 64-bit system!).
+ * more than a billion grace periods (and way more on a 64-bit system!).
* Those needing to keep oldstate values for very long time periods
* (many hours even on 32-bit systems) should check them occasionally
* and either refresh them or set a flag indicating that the grace period
*/
bool poll_state_synchronize_rcu(unsigned long oldstate)
{
- if (rcu_seq_done(&rcu_state.gp_seq, oldstate)) {
+ if (oldstate == RCU_GET_STATE_COMPLETED ||
+ rcu_seq_done_exact(&rcu_state.gp_seq, oldstate)) {
smp_mb(); /* Ensure GP ends before subsequent accesses. */
return true;
}
rdp->rcu_ofl_gp_flags = RCU_GP_CLEANED;
rdp->rcu_onl_gp_seq = rcu_state.gp_seq;
rdp->rcu_onl_gp_flags = RCU_GP_CLEANED;
+ rdp->last_sched_clock = jiffies;
rdp->cpu = cpu;
rcu_boot_init_nocb_percpu_data(rdp);
}
return NOTIFY_OK;
}
+#ifdef CONFIG_RCU_EXP_KTHREAD
+struct kthread_worker *rcu_exp_gp_kworker;
+struct kthread_worker *rcu_exp_par_gp_kworker;
+
+static void __init rcu_start_exp_gp_kworkers(void)
+{
+ const char *par_gp_kworker_name = "rcu_exp_par_gp_kthread_worker";
+ const char *gp_kworker_name = "rcu_exp_gp_kthread_worker";
+ struct sched_param param = { .sched_priority = kthread_prio };
+
+ rcu_exp_gp_kworker = kthread_create_worker(0, gp_kworker_name);
+ if (IS_ERR_OR_NULL(rcu_exp_gp_kworker)) {
+ pr_err("Failed to create %s!\n", gp_kworker_name);
+ return;
+ }
+
+ rcu_exp_par_gp_kworker = kthread_create_worker(0, par_gp_kworker_name);
+ if (IS_ERR_OR_NULL(rcu_exp_par_gp_kworker)) {
+ pr_err("Failed to create %s!\n", par_gp_kworker_name);
+ kthread_destroy_worker(rcu_exp_gp_kworker);
+ return;
+ }
+
+ sched_setscheduler_nocheck(rcu_exp_gp_kworker->task, SCHED_FIFO, ¶m);
+ sched_setscheduler_nocheck(rcu_exp_par_gp_kworker->task, SCHED_FIFO,
+ ¶m);
+}
+
+static inline void rcu_alloc_par_gp_wq(void)
+{
+}
+#else /* !CONFIG_RCU_EXP_KTHREAD */
+struct workqueue_struct *rcu_par_gp_wq;
+
+static void __init rcu_start_exp_gp_kworkers(void)
+{
+}
+
+static inline void rcu_alloc_par_gp_wq(void)
+{
+ rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0);
+ WARN_ON(!rcu_par_gp_wq);
+}
+#endif /* CONFIG_RCU_EXP_KTHREAD */
+
/*
* Spawn the kthreads that handle RCU's grace periods.
*/
struct rcu_node *rnp;
struct sched_param sp;
struct task_struct *t;
+ struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
rcu_scheduler_fully_active = 1;
t = kthread_create(rcu_gp_kthread, NULL, "%s", rcu_state.name);
smp_store_release(&rcu_state.gp_kthread, t); /* ^^^ */
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
wake_up_process(t);
- rcu_spawn_nocb_kthreads();
- rcu_spawn_boost_kthreads();
+ /* This is a pre-SMP initcall, we expect a single CPU */
+ WARN_ON(num_online_cpus() > 1);
+ /*
+ * Those kthreads couldn't be created on rcu_init() -> rcutree_prepare_cpu()
+ * due to rcu_scheduler_fully_active.
+ */
+ rcu_spawn_cpu_nocb_kthread(smp_processor_id());
+ rcu_spawn_one_boost_kthread(rdp->mynode);
rcu_spawn_core_kthreads();
+ /* Create kthread worker for expedited GPs */
+ rcu_start_exp_gp_kworkers();
return 0;
}
early_initcall(rcu_spawn_gp_kthread);
}
struct workqueue_struct *rcu_gp_wq;
-struct workqueue_struct *rcu_par_gp_wq;
static void __init kfree_rcu_batch_init(void)
{
void __init rcu_init(void)
{
- int cpu;
+ int cpu = smp_processor_id();
rcu_early_boot_tests();
* or the scheduler are operational.
*/
pm_notifier(rcu_pm_notify, 0);
- for_each_online_cpu(cpu) {
- rcutree_prepare_cpu(cpu);
- rcu_cpu_starting(cpu);
- rcutree_online_cpu(cpu);
- }
+ WARN_ON(num_online_cpus() > 1); // Only one CPU this early in boot.
+ rcutree_prepare_cpu(cpu);
+ rcu_cpu_starting(cpu);
+ rcutree_online_cpu(cpu);
/* Create workqueue for Tree SRCU and for expedited GPs. */
rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0);
WARN_ON(!rcu_gp_wq);
- rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0);
- WARN_ON(!rcu_par_gp_wq);
+ rcu_alloc_par_gp_wq();
/* Fill in default value for rcutree.qovld boot parameter. */
/* -After- the rcu_node ->lock fields are initialized! */