* For detailed explanation of Read-Copy Update mechanism see -
* Documentation/RCU
*/
+
+#define pr_fmt(fmt) "rcu: " fmt
+
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/init.h>
static int gp_cleanup_delay;
module_param(gp_cleanup_delay, int, 0444);
+/* Retreive RCU kthreads priority for rcutorture */
+int rcu_get_gp_kthreads_prio(void)
+{
+ return kthread_prio;
+}
+EXPORT_SYMBOL_GPL(rcu_get_gp_kthreads_prio);
+
/*
* Number of grace periods between delays, normalized by the duration of
* the delay. The longer the delay, the more the grace periods between
*/
#define PER_RCU_NODE_PERIOD 3 /* Number of grace periods between delays. */
-/*
- * Track the rcutorture test sequence number and the update version
- * number within a given test. The rcutorture_testseq is incremented
- * on every rcutorture module load and unload, so has an odd value
- * when a test is running. The rcutorture_vernum is set to zero
- * when rcutorture starts and is incremented on each rcutorture update.
- * These variables enable correlating rcutorture output with the
- * RCU tracing information.
- */
-unsigned long rcutorture_testseq;
-unsigned long rcutorture_vernum;
-
/*
* Compute the mask of online CPUs for the specified rcu_node structure.
* This will not be stable unless the rcu_node structure's ->lock is
return snap != rcu_dynticks_snap(rdtp);
}
-/*
- * Do a double-increment of the ->dynticks counter to emulate a
- * momentary idle-CPU quiescent state.
- */
-static void rcu_dynticks_momentary_idle(void)
-{
- struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
- int special = atomic_add_return(2 * RCU_DYNTICK_CTRL_CTR,
- &rdtp->dynticks);
-
- /* It is illegal to call this from idle state. */
- WARN_ON_ONCE(!(special & RCU_DYNTICK_CTRL_CTR));
-}
-
/*
* Set the special (bottom) bit of the specified CPU so that it
* will take special action (such as flushing its TLB) on the
*
* We inform the RCU core by emulating a zero-duration dyntick-idle period.
*
- * The caller must have disabled interrupts.
+ * The caller must have disabled interrupts and must not be idle.
*/
static void rcu_momentary_dyntick_idle(void)
{
+ struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
+ int special;
+
raw_cpu_write(rcu_dynticks.rcu_need_heavy_qs, false);
- rcu_dynticks_momentary_idle();
+ special = atomic_add_return(2 * RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks);
+ /* It is illegal to call this from idle state. */
+ WARN_ON_ONCE(!(special & RCU_DYNTICK_CTRL_CTR));
}
/*
rcu_momentary_dyntick_idle();
this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
if (!preempt)
- rcu_note_voluntary_context_switch_lite(current);
+ rcu_tasks_qs(current);
out:
trace_rcu_utilization(TPS("End context switch"));
barrier(); /* Avoid RCU read-side critical sections leaking up. */
static ulong jiffies_till_next_fqs = ULONG_MAX;
static bool rcu_kick_kthreads;
-module_param(jiffies_till_first_fqs, ulong, 0644);
-module_param(jiffies_till_next_fqs, ulong, 0644);
+static int param_set_first_fqs_jiffies(const char *val, const struct kernel_param *kp)
+{
+ ulong j;
+ int ret = kstrtoul(val, 0, &j);
+
+ if (!ret)
+ WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : j);
+ return ret;
+}
+
+static int param_set_next_fqs_jiffies(const char *val, const struct kernel_param *kp)
+{
+ ulong j;
+ int ret = kstrtoul(val, 0, &j);
+
+ if (!ret)
+ WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : (j ?: 1));
+ return ret;
+}
+
+static struct kernel_param_ops first_fqs_jiffies_ops = {
+ .set = param_set_first_fqs_jiffies,
+ .get = param_get_ulong,
+};
+
+static struct kernel_param_ops next_fqs_jiffies_ops = {
+ .set = param_set_next_fqs_jiffies,
+ .get = param_get_ulong,
+};
+
+module_param_cb(jiffies_till_first_fqs, &first_fqs_jiffies_ops, &jiffies_till_first_fqs, 0644);
+module_param_cb(jiffies_till_next_fqs, &next_fqs_jiffies_ops, &jiffies_till_next_fqs, 0644);
module_param(rcu_kick_kthreads, bool, 0644);
/*
*/
void show_rcu_gp_kthreads(void)
{
+ int cpu;
+ struct rcu_data *rdp;
+ struct rcu_node *rnp;
struct rcu_state *rsp;
for_each_rcu_flavor(rsp) {
pr_info("%s: wait state: %d ->state: %#lx\n",
rsp->name, rsp->gp_state, rsp->gp_kthread->state);
+ rcu_for_each_node_breadth_first(rsp, rnp) {
+ if (ULONG_CMP_GE(rsp->gp_seq, rnp->gp_seq_needed))
+ continue;
+ pr_info("\trcu_node %d:%d ->gp_seq %lu ->gp_seq_needed %lu\n",
+ rnp->grplo, rnp->grphi, rnp->gp_seq,
+ rnp->gp_seq_needed);
+ if (!rcu_is_leaf_node(rnp))
+ continue;
+ for_each_leaf_node_possible_cpu(rnp, cpu) {
+ rdp = per_cpu_ptr(rsp->rda, cpu);
+ if (rdp->gpwrap ||
+ ULONG_CMP_GE(rsp->gp_seq,
+ rdp->gp_seq_needed))
+ continue;
+ pr_info("\tcpu %d ->gp_seq_needed %lu\n",
+ cpu, rdp->gp_seq_needed);
+ }
+ }
/* sched_show_task(rsp->gp_kthread); */
}
}
EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads);
-/*
- * Record the number of times rcutorture tests have been initiated and
- * terminated. This information allows the debugfs tracing stats to be
- * correlated to the rcutorture messages, even when the rcutorture module
- * is being repeatedly loaded and unloaded. In other words, we cannot
- * store this state in rcutorture itself.
- */
-void rcutorture_record_test_transition(void)
-{
- rcutorture_testseq++;
- rcutorture_vernum = 0;
-}
-EXPORT_SYMBOL_GPL(rcutorture_record_test_transition);
-
/*
* Send along grace-period-related data for rcutorture diagnostics.
*/
}
EXPORT_SYMBOL_GPL(rcutorture_get_gp_data);
-/*
- * Record the number of writer passes through the current rcutorture test.
- * This is also used to correlate debugfs tracing stats with the rcutorture
- * messages.
- */
-void rcutorture_record_progress(unsigned long vernum)
-{
- rcutorture_vernum++;
-}
-EXPORT_SYMBOL_GPL(rcutorture_record_progress);
-
/*
* Return the root node of the specified rcu_state structure.
*/
smp_store_release(ruqp, true);
}
+ /* If waiting too long on an offline CPU, complain. */
+ if (!(rdp->grpmask & rcu_rnp_online_cpus(rnp)) &&
+ time_after(jiffies, rdp->rsp->gp_start + HZ)) {
+ bool onl;
+ struct rcu_node *rnp1;
+
+ WARN_ON(1); /* Offline CPUs are supposed to report QS! */
+ pr_info("%s: grp: %d-%d level: %d ->gp_seq %ld ->completedqs %ld\n",
+ __func__, rnp->grplo, rnp->grphi, rnp->level,
+ (long)rnp->gp_seq, (long)rnp->completedqs);
+ for (rnp1 = rnp; rnp1; rnp1 = rnp1->parent)
+ pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx ->rcu_gp_init_mask %#lx\n",
+ __func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext, rnp1->rcu_gp_init_mask);
+ onl = !!(rdp->grpmask & rcu_rnp_online_cpus(rnp));
+ pr_info("%s %d: %c online: %ld(%d) offline: %ld(%d)\n",
+ __func__, rdp->cpu, ".o"[onl],
+ (long)rdp->rcu_onl_gp_seq, rdp->rcu_onl_gp_flags,
+ (long)rdp->rcu_ofl_gp_seq, rdp->rcu_ofl_gp_flags);
+ return 1; /* Break things loose after complaining. */
+ }
+
/*
* A CPU running for an extended time within the kernel can
* delay RCU grace periods. When the CPU is in NO_HZ_FULL mode,
}
}
-static inline void panic_on_rcu_stall(void)
+static void panic_on_rcu_stall(void)
{
if (sysctl_panic_on_rcu_stall)
panic("RCU Stall\n");
* See Documentation/RCU/stallwarn.txt for info on how to debug
* RCU CPU stall warnings.
*/
- pr_err("INFO: %s detected stalls on CPUs/tasks:",
- rsp->name);
+ pr_err("INFO: %s detected stalls on CPUs/tasks:", rsp->name);
print_cpu_stall_info_begin();
rcu_for_each_leaf_node(rsp, rnp) {
raw_spin_lock_irqsave_rcu_node(rnp, flags);
!READ_ONCE(rsp->gp_flags) ||
!rsp->gp_kthread)
return;
- swake_up(&rsp->gp_wq);
+ swake_up_one(&rsp->gp_wq);
}
/*
* for subsequent online CPUs, and that quiescent-state forcing
* will handle subsequent offline CPUs.
*/
+ rsp->gp_state = RCU_GP_ONOFF;
rcu_for_each_leaf_node(rsp, rnp) {
spin_lock(&rsp->ofl_lock);
raw_spin_lock_irq_rcu_node(rnp);
* The grace period cannot complete until the initialization
* process finishes, because this kthread handles both.
*/
+ rsp->gp_state = RCU_GP_INIT;
rcu_for_each_node_breadth_first(rsp, rnp) {
rcu_gp_slow(rsp, gp_init_delay);
raw_spin_lock_irqsave_rcu_node(rnp, flags);
rdp = this_cpu_ptr(rsp->rda);
- rcu_preempt_check_blocked_tasks(rnp);
+ rcu_preempt_check_blocked_tasks(rsp, rnp);
rnp->qsmask = rnp->qsmaskinit;
WRITE_ONCE(rnp->gp_seq, rsp->gp_seq);
if (rnp == rdp->mynode)
rnp->grphi, rnp->qsmask);
/* Quiescent states for tasks on any now-offline CPUs. */
mask = rnp->qsmask & ~rnp->qsmaskinitnext;
+ rnp->rcu_gp_init_mask = mask;
if ((mask || rnp->wait_blkd_tasks) && rcu_is_leaf_node(rnp))
rcu_report_qs_rnp(mask, rsp, rnp, rnp->gp_seq, flags);
else
}
/*
- * Helper function for swait_event_idle() wakeup at force-quiescent-state
+ * Helper function for swait_event_idle_exclusive() wakeup at force-quiescent-state
* time.
*/
static bool rcu_gp_fqs_check_wake(struct rcu_state *rsp, int *gfp)
rcu_for_each_node_breadth_first(rsp, rnp) {
raw_spin_lock_irq_rcu_node(rnp);
if (WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)))
- dump_blkd_tasks(rnp, 10);
+ dump_blkd_tasks(rsp, rnp, 10);
WARN_ON_ONCE(rnp->qsmask);
WRITE_ONCE(rnp->gp_seq, new_gp_seq);
rdp = this_cpu_ptr(rsp->rda);
READ_ONCE(rsp->gp_seq),
TPS("reqwait"));
rsp->gp_state = RCU_GP_WAIT_GPS;
- swait_event_idle(rsp->gp_wq, READ_ONCE(rsp->gp_flags) &
+ swait_event_idle_exclusive(rsp->gp_wq, READ_ONCE(rsp->gp_flags) &
RCU_GP_FLAG_INIT);
rsp->gp_state = RCU_GP_DONE_GPS;
/* Locking provides needed memory barrier. */
/* Handle quiescent-state forcing. */
first_gp_fqs = true;
j = jiffies_till_first_fqs;
- if (j > HZ) {
- j = HZ;
- jiffies_till_first_fqs = HZ;
- }
ret = 0;
for (;;) {
if (!ret) {
READ_ONCE(rsp->gp_seq),
TPS("fqswait"));
rsp->gp_state = RCU_GP_WAIT_FQS;
- ret = swait_event_idle_timeout(rsp->gp_wq,
+ ret = swait_event_idle_timeout_exclusive(rsp->gp_wq,
rcu_gp_fqs_check_wake(rsp, &gf), j);
rsp->gp_state = RCU_GP_DOING_FQS;
/* Locking provides needed memory barriers. */
WRITE_ONCE(rsp->gp_activity, jiffies);
ret = 0; /* Force full wait till next FQS. */
j = jiffies_till_next_fqs;
- if (j > HZ) {
- j = HZ;
- jiffies_till_next_fqs = HZ;
- } else if (j < 1) {
- j = 1;
- jiffies_till_next_fqs = 1;
- }
} else {
/* Deal with stray signal. */
cond_resched_tasks_rcu_qs();
rcu_sched_qs();
rcu_bh_qs();
+ rcu_note_voluntary_context_switch(current);
} else if (!in_softirq()) {
rcu_preempt_check_callbacks();
if (rcu_pending())
invoke_rcu_core();
- if (user)
- rcu_note_voluntary_context_switch(current);
+
trace_rcu_utilization(TPS("End scheduler-tick"));
}
rcu_check_gp_start_stall(struct rcu_state *rsp, struct rcu_node *rnp,
struct rcu_data *rdp)
{
+ const unsigned long gpssdelay = rcu_jiffies_till_stall_check() * HZ;
unsigned long flags;
unsigned long j;
struct rcu_node *rnp_root = rcu_get_root(rsp);
ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed))
return;
j = jiffies; /* Expensive access, and in common case don't get here. */
- if (time_before(j, READ_ONCE(rsp->gp_req_activity) + HZ) ||
- time_before(j, READ_ONCE(rsp->gp_activity) + HZ) ||
+ if (time_before(j, READ_ONCE(rsp->gp_req_activity) + gpssdelay) ||
+ time_before(j, READ_ONCE(rsp->gp_activity) + gpssdelay) ||
atomic_read(&warned))
return;
j = jiffies;
if (rcu_gp_in_progress(rsp) ||
ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) ||
- time_before(j, READ_ONCE(rsp->gp_req_activity) + HZ) ||
- time_before(j, READ_ONCE(rsp->gp_activity) + HZ) ||
+ time_before(j, READ_ONCE(rsp->gp_req_activity) + gpssdelay) ||
+ time_before(j, READ_ONCE(rsp->gp_activity) + gpssdelay) ||
atomic_read(&warned)) {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return;
j = jiffies;
if (rcu_gp_in_progress(rsp) ||
ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) ||
- time_before(j, rsp->gp_req_activity + HZ) ||
- time_before(j, rsp->gp_activity + HZ) ||
+ time_before(j, rsp->gp_req_activity + gpssdelay) ||
+ time_before(j, rsp->gp_activity + gpssdelay) ||
atomic_xchg(&warned, 1)) {
raw_spin_unlock_rcu_node(rnp_root); /* irqs remain disabled. */
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return;
}
- pr_alert("%s: g%ld->%ld gar:%lu ga:%lu f%#x %s->state:%#lx\n",
+ pr_alert("%s: g%ld->%ld gar:%lu ga:%lu f%#x gs:%d %s->state:%#lx\n",
__func__, (long)READ_ONCE(rsp->gp_seq),
(long)READ_ONCE(rnp_root->gp_seq_needed),
j - rsp->gp_req_activity, j - rsp->gp_activity,
- rsp->gp_flags, rsp->name,
+ rsp->gp_flags, rsp->gp_state, rsp->name,
rsp->gp_kthread ? rsp->gp_kthread->state : 0x1ffffL);
WARN_ON(1);
if (rnp_root != rnp)
* when there was in fact only one the whole time, as this just adds
* some overhead: RCU still operates correctly.
*/
-static inline int rcu_blocking_is_gp(void)
+static int rcu_blocking_is_gp(void)
{
int ret;
* non-NULL, store an indication of whether all callbacks are lazy.
* (If there are no callbacks, all of them are deemed to be lazy.)
*/
-static bool __maybe_unused rcu_cpu_has_callbacks(bool *all_lazy)
+static bool rcu_cpu_has_callbacks(bool *all_lazy)
{
bool al = true;
bool hc = false;
rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != 1);
WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp->dynticks)));
+ rdp->rcu_ofl_gp_seq = rsp->gp_seq;
+ rdp->rcu_ofl_gp_flags = RCU_GP_CLEANED;
+ rdp->rcu_onl_gp_seq = rsp->gp_seq;
+ rdp->rcu_onl_gp_flags = RCU_GP_CLEANED;
rdp->cpu = cpu;
rdp->rsp = rsp;
rcu_boot_init_nocb_percpu_data(rdp);
/* Allow lockless access for expedited grace periods. */
smp_store_release(&rsp->ncpus, rsp->ncpus + nbits); /* ^^^ */
rcu_gpnum_ovf(rnp, rdp); /* Offline-induced counter wrap? */
+ rdp->rcu_onl_gp_seq = READ_ONCE(rsp->gp_seq);
+ rdp->rcu_onl_gp_flags = READ_ONCE(rsp->gp_flags);
if (rnp->qsmask & mask) { /* RCU waiting on incoming CPU? */
/* Report QS -after- changing ->qsmaskinitnext! */
rcu_report_qs_rnp(mask, rsp, rnp, rnp->gp_seq, flags);
mask = rdp->grpmask;
spin_lock(&rsp->ofl_lock);
raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
+ rdp->rcu_ofl_gp_seq = READ_ONCE(rsp->gp_seq);
+ rdp->rcu_ofl_gp_flags = READ_ONCE(rsp->gp_flags);
if (rnp->qsmask & mask) { /* RCU waiting on outgoing CPU? */
/* Report quiescent state -before- changing ->qsmaskinitnext! */
rcu_report_qs_rnp(mask, rsp, rnp, rnp->gp_seq, flags);
struct task_struct *t;
/* Force priority into range. */
- if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 1)
+ if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 2
+ && IS_BUILTIN(CONFIG_RCU_TORTURE_TEST))
+ kthread_prio = 2;
+ else if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 1)
kthread_prio = 1;
else if (kthread_prio < 0)
kthread_prio = 0;
else if (kthread_prio > 99)
kthread_prio = 99;
+
if (kthread_prio != kthread_prio_in)
pr_alert("rcu_spawn_gp_kthread(): Limited prio to %d from %d\n",
kthread_prio, kthread_prio_in);
if (rcu_fanout_leaf == RCU_FANOUT_LEAF &&
nr_cpu_ids == NR_CPUS)
return;
- pr_info("RCU: Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%u\n",
+ pr_info("Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%u\n",
rcu_fanout_leaf, nr_cpu_ids);
/*