rcu: Move from ->need_future_gp[] to ->gp_seq_needed
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>
Tue, 1 May 2018 17:26:57 +0000 (10:26 -0700)
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>
Thu, 12 Jul 2018 22:37:48 +0000 (15:37 -0700)
One problem with the ->need_future_gp[] array is that the grace-period
assignment of each element changes as the grace periods complete.
This means that it is necessary to hold a lock when checking this
array to learn if a given grace period has already been requested.
This increase lock contention, which is the opposite of helpful.
This commit therefore replaces the ->need_future_gp[] with a single
->gp_seq_needed value and keeps it updated in the rcu_data structure.

This will enable reliable lockless checking of whether or not a given
grace period has already been requested.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
kernel/rcu/tree.c
kernel/rcu/tree.h

index 4a528a0..1ede516 100644 (file)
@@ -1560,7 +1560,7 @@ static void trace_rcu_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
 /*
  * Start the specified grace period, as needed to handle newly arrived
  * callbacks.  The required future grace periods are recorded in each
- * rcu_node structure's ->need_future_gp[] field.  Returns true if there
+ * rcu_node structure's ->gp_seq_needed field.  Returns true if there
  * is reason to awaken the grace-period kthread.
  *
  * The caller must hold the specified rcu_node structure's ->lock, which
@@ -1589,14 +1589,14 @@ static bool rcu_start_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
        for (rnp_root = rnp; 1; rnp_root = rnp_root->parent) {
                if (rnp_root != rnp)
                        raw_spin_lock_rcu_node(rnp_root);
-               if (need_future_gp_element(rnp_root, c) ||
+               if (ULONG_CMP_GE(rnp_root->gp_seq_needed, c) ||
                    rcu_seq_done(&rnp_root->gp_seq, c) ||
                    (rnp != rnp_root &&
                     rcu_seq_state(rcu_seq_current(&rnp_root->gp_seq)))) {
                        trace_rcu_this_gp(rnp_root, rdp, c, TPS("Prestarted"));
                        goto unlock_out;
                }
-               need_future_gp_element(rnp_root, c) = true;
+               rnp_root->gp_seq_needed = c;
                if (rnp_root != rnp && rnp_root->parent != NULL)
                        raw_spin_unlock_rcu_node(rnp_root);
                if (!rnp_root->parent)
@@ -1633,8 +1633,9 @@ static bool rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
        bool needmore;
        struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
 
-       need_future_gp_element(rnp, c) = false;
-       needmore = need_any_future_gp(rnp);
+       needmore = ULONG_CMP_LT(rnp->gp_seq, rnp->gp_seq_needed);
+       if (!needmore)
+               rnp->gp_seq_needed = rnp->gp_seq; /* Avoid counter wrap. */
        trace_rcu_this_gp(rnp, rdp, c,
                          needmore ? TPS("CleanupMore") : TPS("Cleanup"));
        return needmore;
@@ -2046,7 +2047,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
        rsp->gp_state = RCU_GP_IDLE;
        /* Check for GP requests since above loop. */
        rdp = this_cpu_ptr(rsp->rda);
-       if (need_any_future_gp(rnp)) {
+       if (ULONG_CMP_LT(rnp->gp_seq, rnp->gp_seq_needed)) {
                trace_rcu_this_gp(rnp, rdp, rsp->completed - 1,
                                  TPS("CleanupMore"));
                needgp = true;
@@ -2700,8 +2701,8 @@ rcu_check_gp_start_stall(struct rcu_state *rsp, struct rcu_node *rnp,
        struct rcu_node *rnp_root = rcu_get_root(rsp);
        static atomic_t warned = ATOMIC_INIT(0);
 
-       if (!IS_ENABLED(CONFIG_PROVE_RCU) ||
-           rcu_gp_in_progress(rsp) || !need_any_future_gp(rcu_get_root(rsp)))
+       if (!IS_ENABLED(CONFIG_PROVE_RCU) || rcu_gp_in_progress(rsp) ||
+           ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed))
                return;
        j = jiffies; /* Expensive access, and in common case don't get here. */
        if (time_before(j, READ_ONCE(rsp->gp_req_activity) + HZ) ||
@@ -2711,7 +2712,8 @@ rcu_check_gp_start_stall(struct rcu_state *rsp, struct rcu_node *rnp,
 
        raw_spin_lock_irqsave_rcu_node(rnp, flags);
        j = jiffies;
-       if (rcu_gp_in_progress(rsp) || !need_any_future_gp(rcu_get_root(rsp)) ||
+       if (rcu_gp_in_progress(rsp) ||
+           ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) ||
            time_before(j, READ_ONCE(rsp->gp_req_activity) + HZ) ||
            time_before(j, READ_ONCE(rsp->gp_activity) + HZ) ||
            atomic_read(&warned)) {
@@ -2723,7 +2725,8 @@ rcu_check_gp_start_stall(struct rcu_state *rsp, struct rcu_node *rnp,
        if (rnp_root != rnp)
                raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */
        j = jiffies;
-       if (rcu_gp_in_progress(rsp) || !need_any_future_gp(rcu_get_root(rsp)) ||
+       if (rcu_gp_in_progress(rsp) ||
+           ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) ||
            time_before(j, rsp->gp_req_activity + HZ) ||
            time_before(j, rsp->gp_activity + HZ) ||
            atomic_xchg(&warned, 1)) {
@@ -2731,12 +2734,9 @@ rcu_check_gp_start_stall(struct rcu_state *rsp, struct rcu_node *rnp,
                raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
                return;
        }
-       pr_alert("%s: g%lu %d%d%d%d gar:%lu ga:%lu f%#x %s->state:%#lx\n",
-                __func__, READ_ONCE(rsp->gpnum),
-                need_future_gp_element(rcu_get_root(rsp), 0),
-                need_future_gp_element(rcu_get_root(rsp), 1),
-                need_future_gp_element(rcu_get_root(rsp), 2),
-                need_future_gp_element(rcu_get_root(rsp), 3),
+       pr_alert("%s: g%ld->%ld gar:%lu ga:%lu f%#x %s->state:%#lx\n",
+                __func__, (long)READ_ONCE(rsp->gp_seq),
+                (long)READ_ONCE(rnp_root->gp_seq_needed),
                 j - rsp->gp_req_activity, j - rsp->gp_activity,
                 rsp->gp_flags, rsp->name,
                 rsp->gp_kthread ? rsp->gp_kthread->state : 0x1ffffL);
@@ -3527,6 +3527,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
        rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */
        rdp->completed = rnp->completed;
        rdp->gp_seq = rnp->gp_seq;
+       rdp->gp_seq_needed = rnp->gp_seq;
        rdp->cpu_no_qs.b.norm = true;
        rdp->rcu_qs_ctr_snap = per_cpu(rcu_dynticks.rcu_qs_ctr, cpu);
        rdp->core_needs_qs = false;
@@ -3907,6 +3908,7 @@ static void __init rcu_init_one(struct rcu_state *rsp)
                        rnp->gpnum = rsp->gpnum;
                        rnp->completed = rsp->completed;
                        rnp->gp_seq = rsp->gp_seq;
+                       rnp->gp_seq_needed = rsp->gp_seq;
                        rnp->completedqs = rsp->gp_seq;
                        rnp->qsmask = 0;
                        rnp->qsmaskinit = 0;
index a21d403..9329c1f 100644 (file)
@@ -88,6 +88,7 @@ struct rcu_node {
                                /*  This will either be equal to or one */
                                /*  behind the root rcu_node's gpnum. */
        unsigned long gp_seq;   /* Track rsp->rcu_gp_seq. */
+       unsigned long gp_seq_needed; /* Track rsp->rcu_gp_seq_needed. */
        unsigned long completedqs; /* All QSes done for this node. */
        unsigned long qsmask;   /* CPUs or groups that need to switch in */
                                /*  order for current grace period to proceed.*/
@@ -160,7 +161,6 @@ struct rcu_node {
        struct swait_queue_head nocb_gp_wq[2];
                                /* Place for rcu_nocb_kthread() to wait GP. */
 #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
-       u8 need_future_gp[4];   /* Counts of upcoming GP requests. */
        raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp;
 
        spinlock_t exp_lock ____cacheline_internodealigned_in_smp;
@@ -170,22 +170,6 @@ struct rcu_node {
        bool exp_need_flush;    /* Need to flush workitem? */
 } ____cacheline_internodealigned_in_smp;
 
-/* Accessors for ->need_future_gp[] array. */
-#define need_future_gp_mask() \
-       (ARRAY_SIZE(((struct rcu_node *)NULL)->need_future_gp) - 1)
-#define need_future_gp_element(rnp, c) \
-       ((rnp)->need_future_gp[(c >> RCU_SEQ_CTR_SHIFT) & need_future_gp_mask()])
-#define need_any_future_gp(rnp)                                                \
-({                                                                     \
-       int __i;                                                        \
-       bool __nonzero = false;                                         \
-                                                                       \
-       for (__i = 0; __i < ARRAY_SIZE((rnp)->need_future_gp); __i++)   \
-               __nonzero = __nonzero ||                                \
-                           READ_ONCE((rnp)->need_future_gp[__i]);      \
-       __nonzero;                                                      \
-})
-
 /*
  * Bitmasks in an rcu_node cover the interval [grplo, grphi] of CPU IDs, and
  * are indexed relative to this interval rather than the global CPU ID space.
@@ -213,6 +197,7 @@ struct rcu_data {
        unsigned long   gpnum;          /* Highest gp number that this CPU */
                                        /*  is aware of having started. */
        unsigned long   gp_seq;         /* Track rsp->rcu_gp_seq counter. */
+       unsigned long   gp_seq_needed;  /* Track rsp->rcu_gp_seq_needed ctr. */
        unsigned long   rcu_qs_ctr_snap;/* Snapshot of rcu_qs_ctr to check */
                                        /*  for rcu_all_qs() invocations. */
        union rcu_noqs  cpu_no_qs;      /* No QSes yet for this CPU. */