rcu: Switch polled grace-period APIs to ->gp_seq_polled

author Paul E. McKenney <paulmck@kernel.org>

Thu, 14 Apr 2022 00:46:15 +0000 (17:46 -0700)

committer Paul E. McKenney <paulmck@kernel.org>

Fri, 22 Jul 2022 00:40:06 +0000 (17:40 -0700)
author Paul E. McKenney <paulmck@kernel.org>
Thu, 14 Apr 2022 00:46:15 +0000 (17:46 -0700)
committer Paul E. McKenney <paulmck@kernel.org>
Fri, 22 Jul 2022 00:40:06 +0000 (17:40 -0700)
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c

index 46cfcee..b40a5a1 100644 (file)
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1775,6 +1775,78 @@ static void rcu_strict_gp_boundary(void *unused)
         invoke_rcu_core();
  }
  
+// Has rcu_init() been invoked?  This is used (for example) to determine
+// whether spinlocks may be acquired safely.
+static bool rcu_init_invoked(void)
+{
+       return !!rcu_state.n_online_cpus;
+}
+
+// Make the polled API aware of the beginning of a grace period.
+static void rcu_poll_gp_seq_start(unsigned long *snap)
+{
+       struct rcu_node *rnp = rcu_get_root();
+
+       if (rcu_init_invoked())
+               raw_lockdep_assert_held_rcu_node(rnp);
+
+       // If RCU was idle, note beginning of GP.
+       if (!rcu_seq_state(rcu_state.gp_seq_polled))
+               rcu_seq_start(&rcu_state.gp_seq_polled);
+
+       // Either way, record current state.
+       *snap = rcu_state.gp_seq_polled;
+}
+
+// Make the polled API aware of the end of a grace period.
+static void rcu_poll_gp_seq_end(unsigned long *snap)
+{
+       struct rcu_node *rnp = rcu_get_root();
+
+       if (rcu_init_invoked())
+               raw_lockdep_assert_held_rcu_node(rnp);
+
+       // If the previously noted GP is still in effect, record the
+       // end of that GP.  Either way, zero counter to avoid counter-wrap
+       // problems.
+       if (*snap && *snap == rcu_state.gp_seq_polled) {
+               rcu_seq_end(&rcu_state.gp_seq_polled);
+               rcu_state.gp_seq_polled_snap = 0;
+       } else {
+               *snap = 0;
+       }
+}
+
+// Make the polled API aware of the beginning of a grace period, but
+// where caller does not hold the root rcu_node structure's lock.
+static void rcu_poll_gp_seq_start_unlocked(unsigned long *snap)
+{
+       struct rcu_node *rnp = rcu_get_root();
+
+       if (rcu_init_invoked()) {
+               lockdep_assert_irqs_enabled();
+               raw_spin_lock_irq_rcu_node(rnp);
+       }
+       rcu_poll_gp_seq_start(snap);
+       if (rcu_init_invoked())
+               raw_spin_unlock_irq_rcu_node(rnp);
+}
+
+// Make the polled API aware of the end of a grace period, but where
+// caller does not hold the root rcu_node structure's lock.
+static void rcu_poll_gp_seq_end_unlocked(unsigned long *snap)
+{
+       struct rcu_node *rnp = rcu_get_root();
+
+       if (rcu_init_invoked()) {
+               lockdep_assert_irqs_enabled();
+               raw_spin_lock_irq_rcu_node(rnp);
+       }
+       rcu_poll_gp_seq_end(snap);
+       if (rcu_init_invoked())
+               raw_spin_unlock_irq_rcu_node(rnp);
+}
+
  /*
   * Initialize a new grace period.  Return false if no grace period required.
   */
@@ -1810,6 +1882,7 @@ static noinline_for_stack bool rcu_gp_init(void)
         rcu_seq_start(&rcu_state.gp_seq);
         ASSERT_EXCLUSIVE_WRITER(rcu_state.gp_seq);
         trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("start"));
+       rcu_poll_gp_seq_start(&rcu_state.gp_seq_polled_snap);
         raw_spin_unlock_irq_rcu_node(rnp);
  
         /*
@@ -2069,6 +2142,7 @@ static noinline void rcu_gp_cleanup(void)
          * safe for us to drop the lock in order to mark the grace
          * period as completed in all of the rcu_node structures.
          */
+       rcu_poll_gp_seq_end(&rcu_state.gp_seq_polled_snap);
         raw_spin_unlock_irq_rcu_node(rnp);
  
         /*
@@ -3837,8 +3911,18 @@ void synchronize_rcu(void)
                          lock_is_held(&rcu_lock_map) ||
                          lock_is_held(&rcu_sched_lock_map),
                          "Illegal synchronize_rcu() in RCU read-side critical section");
-       if (rcu_blocking_is_gp())
+       if (rcu_blocking_is_gp()) {
+               // Note well that this code runs with !PREEMPT && !SMP.
+               // In addition, all code that advances grace periods runs
+               // at process level.  Therefore, this GP overlaps with other
+               // GPs only by being fully nested within them, which allows
+               // reuse of ->gp_seq_polled_snap.
+               rcu_poll_gp_seq_start_unlocked(&rcu_state.gp_seq_polled_snap);
+               rcu_poll_gp_seq_end_unlocked(&rcu_state.gp_seq_polled_snap);
+               if (rcu_init_invoked())
+                       cond_resched_tasks_rcu_qs();
                 return;  // Context allows vacuous grace periods.
+       }
         if (rcu_gp_is_expedited())
                 synchronize_rcu_expedited();
         else
@@ -3860,7 +3944,7 @@ unsigned long get_state_synchronize_rcu(void)
          * before the load from ->gp_seq.
          */
         smp_mb();  /* ^^^ */
-       return rcu_seq_snap(&rcu_state.gp_seq);
+       return rcu_seq_snap(&rcu_state.gp_seq_polled);
  }
  EXPORT_SYMBOL_GPL(get_state_synchronize_rcu);
  
@@ -3889,7 +3973,13 @@ unsigned long start_poll_synchronize_rcu(void)
         rdp = this_cpu_ptr(&rcu_data);
         rnp = rdp->mynode;
         raw_spin_lock_rcu_node(rnp); // irqs already disabled.
-       needwake = rcu_start_this_gp(rnp, rdp, gp_seq);
+       // Note it is possible for a grace period to have elapsed between
+       // the above call to get_state_synchronize_rcu() and the below call
+       // to rcu_seq_snap.  This is OK, the worst that happens is that we
+       // get a grace period that no one needed.  These accesses are ordered
+       // by smp_mb(), and we are accessing them in the opposite order
+       // from which they are updated at grace-period start, as required.
+       needwake = rcu_start_this_gp(rnp, rdp, rcu_seq_snap(&rcu_state.gp_seq));
         raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
         if (needwake)
                 rcu_gp_kthread_wake();
@@ -3925,7 +4015,7 @@ EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu);
  bool poll_state_synchronize_rcu(unsigned long oldstate)
  {
         if (oldstate == RCU_GET_STATE_COMPLETED ||
-           rcu_seq_done_exact(&rcu_state.gp_seq, oldstate)) {
+           rcu_seq_done_exact(&rcu_state.gp_seq_polled, oldstate)) {
                 smp_mb(); /* Ensure GP ends before subsequent accesses. */
                 return true;
         }
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h

index 2ccf584..9c85303 100644 (file)
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -323,6 +323,8 @@ struct rcu_state {
         short gp_state;                         /* GP kthread sleep state. */
         unsigned long gp_wake_time;             /* Last GP kthread wake. */
         unsigned long gp_wake_seq;              /* ->gp_seq at ^^^. */
+       unsigned long gp_seq_polled;            /* GP seq for polled API. */
+       unsigned long gp_seq_polled_snap;       /* ->gp_seq_polled at normal GP start. */
  
         /* End of fields guarded by root rcu_node's lock. */
author	Paul E. McKenney <paulmck@kernel.org>
	Thu, 14 Apr 2022 00:46:15 +0000 (17:46 -0700)
committer	Paul E. McKenney <paulmck@kernel.org>
	Fri, 22 Jul 2022 00:40:06 +0000 (17:40 -0700)
kernel/rcu/tree.c		patch \| blob \| history
kernel/rcu/tree.h		patch \| blob \| history