Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...

author Linus Torvalds <torvalds@linux-foundation.org>

Mon, 2 Apr 2018 16:59:09 +0000 (09:59 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Mon, 2 Apr 2018 16:59:09 +0000 (09:59 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Mon, 2 Apr 2018 16:59:09 +0000 (09:59 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Mon, 2 Apr 2018 16:59:09 +0000 (09:59 -0700)
diff --git a/Documentation/timers/NO_HZ.txt b/Documentation/timers/NO_HZ.txt

index 2dcaf9a..9591092 100644 (file)
--- a/Documentation/timers/NO_HZ.txt
+++ b/Documentation/timers/NO_HZ.txt
@@ -131,13 +131,6 @@ error message, and the boot CPU will be removed from the mask.  Note that
  this means that your system must have at least two CPUs in order for
  CONFIG_NO_HZ_FULL=y to do anything for you.
  
-Alternatively, the CONFIG_NO_HZ_FULL_ALL=y Kconfig parameter specifies
-that all CPUs other than the boot CPU are adaptive-ticks CPUs.  This
-Kconfig parameter will be overridden by the "nohz_full=" boot parameter,
-so that if both the CONFIG_NO_HZ_FULL_ALL=y Kconfig parameter and
-the "nohz_full=1" boot parameter is specified, the boot parameter will
-prevail so that only CPU 1 will be an adaptive-ticks CPU.
-
  Finally, adaptive-ticks CPUs must have their RCU callbacks offloaded.
  This is covered in the "RCU IMPLICATIONS" section below.
  
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h

index 043d047..36360d0 100644 (file)
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -214,10 +214,12 @@ do { \
  #endif
  
  /*
- * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic
- * initialization and destruction of rcu_head on the stack. rcu_head structures
- * allocated dynamically in the heap or defined statically don't need any
- * initialization.
+ * The init_rcu_head_on_stack() and destroy_rcu_head_on_stack() calls
+ * are needed for dynamic initialization and destruction of rcu_head
+ * on the stack, and init_rcu_head()/destroy_rcu_head() are needed for
+ * dynamic initialization and destruction of statically allocated rcu_head
+ * structures.  However, rcu_head structures allocated dynamically in the
+ * heap don't need any initialization.
   */
  #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
  void init_rcu_head(struct rcu_head *head);
diff --git a/include/linux/types.h b/include/linux/types.h

index c94d59e..ec13d02 100644 (file)
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -217,7 +217,7 @@ struct ustat {
   *
   * This guarantee is important for few reasons:
   *  - future call_rcu_lazy() will make use of lower bits in the pointer;
- *  - the structure shares storage spacer in struct page with @compound_head,
+ *  - the structure shares storage space in struct page with @compound_head,
   *    which encode PageTail() in bit 0. The guarantee is needed to avoid
   *    false-positive PageTail().
   */
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h

index 0b50fda..d8c3329 100644 (file)
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -179,6 +179,10 @@ TRACE_EVENT(rcu_grace_period_init,
   *
   *     "snap": Captured snapshot of expedited grace period sequence number.
   *     "start": Started a real expedited grace period.
+ *     "reset": Started resetting the tree
+ *     "select": Started selecting the CPUs to wait on.
+ *     "selectofl": Selected CPU partially offline.
+ *     "startwait": Started waiting on selected CPUs.
   *     "end": Ended a real expedited grace period.
   *     "endwake": Woke piggybackers up.
   *     "done": Someone else did the expedited grace period for us.
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h

index 6334f2c..7a693e3 100644 (file)
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -77,12 +77,18 @@ static inline void rcu_seq_start(unsigned long *sp)
         WARN_ON_ONCE(rcu_seq_state(*sp) != 1);
  }
  
+/* Compute the end-of-grace-period value for the specified sequence number. */
+static inline unsigned long rcu_seq_endval(unsigned long *sp)
+{
+       return (*sp | RCU_SEQ_STATE_MASK) + 1;
+}
+
  /* Adjust sequence number for end of update-side operation. */
  static inline void rcu_seq_end(unsigned long *sp)
  {
         smp_mb(); /* Ensure update-side operation before counter increment. */
         WARN_ON_ONCE(!rcu_seq_state(*sp));
-       WRITE_ONCE(*sp, (*sp | RCU_SEQ_STATE_MASK) + 1);
+       WRITE_ONCE(*sp, rcu_seq_endval(sp));
  }
  
  /* Take a snapshot of the update side's sequence number. */
@@ -295,9 +301,19 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
   * Iterate over all possible CPUs in a leaf RCU node.
   */
  #define for_each_leaf_node_possible_cpu(rnp, cpu) \
-       for ((cpu) = cpumask_next(rnp->grplo - 1, cpu_possible_mask); \
-            cpu <= rnp->grphi; \
-            cpu = cpumask_next((cpu), cpu_possible_mask))
+       for ((cpu) = cpumask_next((rnp)->grplo - 1, cpu_possible_mask); \
+            (cpu) <= rnp->grphi; \
+            (cpu) = cpumask_next((cpu), cpu_possible_mask))
+
+/*
+ * Iterate over all CPUs in a leaf RCU node's specified mask.
+ */
+#define rcu_find_next_bit(rnp, cpu, mask) \
+       ((rnp)->grplo + find_next_bit(&(mask), BITS_PER_LONG, (cpu)))
+#define for_each_leaf_node_cpu_mask(rnp, cpu, mask) \
+       for ((cpu) = rcu_find_next_bit((rnp), 0, (mask)); \
+            (cpu) <= rnp->grphi; \
+            (cpu) = rcu_find_next_bit((rnp), (cpu) + 1 - (rnp->grplo), (mask)))
  
  /*
   * Wrappers for the rcu_node::lock acquire and release.
@@ -337,7 +353,7 @@ do {                                                                        \
  } while (0)
  
  #define raw_spin_unlock_irqrestore_rcu_node(p, flags)                  \
-       raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags)     \
+       raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags)
  
  #define raw_spin_trylock_rcu_node(p)                                   \
  ({                                                                     \
@@ -348,6 +364,9 @@ do {                                                                        \
         ___locked;                                                      \
  })
  
+#define raw_lockdep_assert_held_rcu_node(p)                            \
+       lockdep_assert_held(&ACCESS_PRIVATE(p, lock))
+
  #endif /* #if defined(SRCU) || !defined(TINY_RCU) */
  
  #ifdef CONFIG_TINY_RCU
@@ -356,24 +375,20 @@ static inline bool rcu_gp_is_normal(void) { return true; }
  static inline bool rcu_gp_is_expedited(void) { return false; }
  static inline void rcu_expedite_gp(void) { }
  static inline void rcu_unexpedite_gp(void) { }
+static inline void rcu_request_urgent_qs_task(struct task_struct *t) { }
  #else /* #ifdef CONFIG_TINY_RCU */
  bool rcu_gp_is_normal(void);     /* Internal RCU use. */
  bool rcu_gp_is_expedited(void);  /* Internal RCU use. */
  void rcu_expedite_gp(void);
  void rcu_unexpedite_gp(void);
  void rcupdate_announce_bootup_oddness(void);
+void rcu_request_urgent_qs_task(struct task_struct *t);
  #endif /* #else #ifdef CONFIG_TINY_RCU */
  
  #define RCU_SCHEDULER_INACTIVE 0
  #define RCU_SCHEDULER_INIT     1
  #define RCU_SCHEDULER_RUNNING  2
  
-#ifdef CONFIG_TINY_RCU
-static inline void rcu_request_urgent_qs_task(struct task_struct *t) { }
-#else /* #ifdef CONFIG_TINY_RCU */
-void rcu_request_urgent_qs_task(struct task_struct *t);
-#endif /* #else #ifdef CONFIG_TINY_RCU */
-
  enum rcutorture_type {
         RCU_FLAVOR,
         RCU_BH_FLAVOR,
@@ -470,6 +485,7 @@ void show_rcu_gp_kthreads(void);
  void rcu_force_quiescent_state(void);
  void rcu_bh_force_quiescent_state(void);
  void rcu_sched_force_quiescent_state(void);
+extern struct workqueue_struct *rcu_gp_wq;
  #endif /* #else #ifdef CONFIG_TINY_RCU */
  
  #ifdef CONFIG_RCU_NOCB_CPU
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c

index d1ebdf9..777e7a6 100644 (file)
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -61,11 +61,30 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.vnet.ibm.com>");
  #define VERBOSE_PERFOUT_ERRSTRING(s) \
         do { if (verbose) pr_alert("%s" PERF_FLAG "!!! %s\n", perf_type, s); } while (0)
  
+/*
+ * The intended use cases for the nreaders and nwriters module parameters
+ * are as follows:
+ *
+ * 1.  Specify only the nr_cpus kernel boot parameter.  This will
+ *     set both nreaders and nwriters to the value specified by
+ *     nr_cpus for a mixed reader/writer test.
+ *
+ * 2.  Specify the nr_cpus kernel boot parameter, but set
+ *     rcuperf.nreaders to zero.  This will set nwriters to the
+ *     value specified by nr_cpus for an update-only test.
+ *
+ * 3.  Specify the nr_cpus kernel boot parameter, but set
+ *     rcuperf.nwriters to zero.  This will set nreaders to the
+ *     value specified by nr_cpus for a read-only test.
+ *
+ * Various other use cases may of course be specified.
+ */
+
  torture_param(bool, gp_async, false, "Use asynchronous GP wait primitives");
  torture_param(int, gp_async_max, 1000, "Max # outstanding waits per reader");
  torture_param(bool, gp_exp, false, "Use expedited GP wait primitives");
  torture_param(int, holdoff, 10, "Holdoff time before test start (s)");
-torture_param(int, nreaders, 0, "Number of RCU reader threads");
+torture_param(int, nreaders, -1, "Number of RCU reader threads");
  torture_param(int, nwriters, -1, "Number of RCU updater threads");
  torture_param(bool, shutdown, !IS_ENABLED(MODULE),
               "Shutdown at end of performance tests.");
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c

index 308e6fd..680c96d 100644 (file)
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -909,34 +909,38 @@ rcu_torture_writer(void *arg)
         int nsynctypes = 0;
  
         VERBOSE_TOROUT_STRING("rcu_torture_writer task started");
-       if (!can_expedite) {
+       if (!can_expedite)
                 pr_alert("%s" TORTURE_FLAG
-                        " GP expediting controlled from boot/sysfs for %s,\n",
+                        " GP expediting controlled from boot/sysfs for %s.\n",
                          torture_type, cur_ops->name);
-               pr_alert("%s" TORTURE_FLAG
-                        " Disabled dynamic grace-period expediting.\n",
-                        torture_type);
-       }
  
         /* Initialize synctype[] array.  If none set, take default. */
         if (!gp_cond1 && !gp_exp1 && !gp_normal1 && !gp_sync1)
                 gp_cond1 = gp_exp1 = gp_normal1 = gp_sync1 = true;
-       if (gp_cond1 && cur_ops->get_state && cur_ops->cond_sync)
+       if (gp_cond1 && cur_ops->get_state && cur_ops->cond_sync) {
                 synctype[nsynctypes++] = RTWS_COND_GET;
-       else if (gp_cond && (!cur_ops->get_state || !cur_ops->cond_sync))
-               pr_alert("rcu_torture_writer: gp_cond without primitives.\n");
-       if (gp_exp1 && cur_ops->exp_sync)
+               pr_info("%s: Testing conditional GPs.\n", __func__);
+       } else if (gp_cond && (!cur_ops->get_state || !cur_ops->cond_sync)) {
+               pr_alert("%s: gp_cond without primitives.\n", __func__);
+       }
+       if (gp_exp1 && cur_ops->exp_sync) {
                 synctype[nsynctypes++] = RTWS_EXP_SYNC;
-       else if (gp_exp && !cur_ops->exp_sync)
-               pr_alert("rcu_torture_writer: gp_exp without primitives.\n");
-       if (gp_normal1 && cur_ops->deferred_free)
+               pr_info("%s: Testing expedited GPs.\n", __func__);
+       } else if (gp_exp && !cur_ops->exp_sync) {
+               pr_alert("%s: gp_exp without primitives.\n", __func__);
+       }
+       if (gp_normal1 && cur_ops->deferred_free) {
                 synctype[nsynctypes++] = RTWS_DEF_FREE;
-       else if (gp_normal && !cur_ops->deferred_free)
-               pr_alert("rcu_torture_writer: gp_normal without primitives.\n");
-       if (gp_sync1 && cur_ops->sync)
+               pr_info("%s: Testing asynchronous GPs.\n", __func__);
+       } else if (gp_normal && !cur_ops->deferred_free) {
+               pr_alert("%s: gp_normal without primitives.\n", __func__);
+       }
+       if (gp_sync1 && cur_ops->sync) {
                 synctype[nsynctypes++] = RTWS_SYNC;
-       else if (gp_sync && !cur_ops->sync)
-               pr_alert("rcu_torture_writer: gp_sync without primitives.\n");
+               pr_info("%s: Testing normal GPs.\n", __func__);
+       } else if (gp_sync && !cur_ops->sync) {
+               pr_alert("%s: gp_sync without primitives.\n", __func__);
+       }
         if (WARN_ONCE(nsynctypes == 0,
                       "rcu_torture_writer: No update-side primitives.\n")) {
                 /*
@@ -1011,6 +1015,9 @@ rcu_torture_writer(void *arg)
                                 rcu_unexpedite_gp();
                         if (++expediting > 3)
                                 expediting = -expediting;
+               } else if (!can_expedite) { /* Disabled during boot, recheck. */
+                       can_expedite = !rcu_gp_is_expedited() &&
+                                      !rcu_gp_is_normal();
                 }
                 rcu_torture_writer_state = RTWS_STUTTER;
                 stutter_wait("rcu_torture_writer");
@@ -1021,6 +1028,10 @@ rcu_torture_writer(void *arg)
         while (can_expedite && expediting++ < 0)
                 rcu_unexpedite_gp();
         WARN_ON_ONCE(can_expedite && rcu_gp_is_expedited());
+       if (!can_expedite)
+               pr_alert("%s" TORTURE_FLAG
+                        " Dynamic grace-period expediting was disabled.\n",
+                        torture_type);
         rcu_torture_writer_state = RTWS_STOPPING;
         torture_kthread_stopping("rcu_torture_writer");
         return 0;
@@ -1045,13 +1056,13 @@ rcu_torture_fakewriter(void *arg)
                     torture_random(&rand) % (nfakewriters * 8) == 0) {
                         cur_ops->cb_barrier();
                 } else if (gp_normal == gp_exp) {
-                       if (torture_random(&rand) & 0x80)
+                       if (cur_ops->sync && torture_random(&rand) & 0x80)
                                 cur_ops->sync();
-                       else
+                       else if (cur_ops->exp_sync)
                                 cur_ops->exp_sync();
-               } else if (gp_normal) {
+               } else if (gp_normal && cur_ops->sync) {
                         cur_ops->sync();
-               } else {
+               } else if (cur_ops->exp_sync) {
                         cur_ops->exp_sync();
                 }
                 stutter_wait("rcu_torture_fakewriter");
@@ -1557,11 +1568,10 @@ static int rcu_torture_barrier_init(void)
         atomic_set(&barrier_cbs_count, 0);
         atomic_set(&barrier_cbs_invoked, 0);
         barrier_cbs_tasks =
-               kzalloc(n_barrier_cbs * sizeof(barrier_cbs_tasks[0]),
+               kcalloc(n_barrier_cbs, sizeof(barrier_cbs_tasks[0]),
                         GFP_KERNEL);
         barrier_cbs_wq =
-               kzalloc(n_barrier_cbs * sizeof(barrier_cbs_wq[0]),
-                       GFP_KERNEL);
+               kcalloc(n_barrier_cbs, sizeof(barrier_cbs_wq[0]), GFP_KERNEL);
         if (barrier_cbs_tasks == NULL || !barrier_cbs_wq)
                 return -ENOMEM;
         for (i = 0; i < n_barrier_cbs; i++) {
@@ -1674,7 +1684,7 @@ static void rcu_torture_err_cb(struct rcu_head *rhp)
          * next grace period.  Unlikely, but can happen.  If it
          * does happen, the debug-objects subsystem won't have splatted.
          */
-       pr_alert("rcutorture: duplicated callback was invoked.\n");
+       pr_alert("%s: duplicated callback was invoked.\n", KBUILD_MODNAME);
  }
  #endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
  
@@ -1691,7 +1701,7 @@ static void rcu_test_debug_objects(void)
  
         init_rcu_head_on_stack(&rh1);
         init_rcu_head_on_stack(&rh2);
-       pr_alert("rcutorture: WARN: Duplicate call_rcu() test starting.\n");
+       pr_alert("%s: WARN: Duplicate call_rcu() test starting.\n", KBUILD_MODNAME);
  
         /* Try to queue the rh2 pair of callbacks for the same grace period. */
         preempt_disable(); /* Prevent preemption from interrupting test. */
@@ -1706,11 +1716,11 @@ static void rcu_test_debug_objects(void)
  
         /* Wait for them all to get done so we can safely return. */
         rcu_barrier();
-       pr_alert("rcutorture: WARN: Duplicate call_rcu() test complete.\n");
+       pr_alert("%s: WARN: Duplicate call_rcu() test complete.\n", KBUILD_MODNAME);
         destroy_rcu_head_on_stack(&rh1);
         destroy_rcu_head_on_stack(&rh2);
  #else /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
-       pr_alert("rcutorture: !CONFIG_DEBUG_OBJECTS_RCU_HEAD, not testing duplicate call_rcu()\n");
+       pr_alert("%s: !CONFIG_DEBUG_OBJECTS_RCU_HEAD, not testing duplicate call_rcu()\n", KBUILD_MODNAME);
  #endif /* #else #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
  }
  
@@ -1799,7 +1809,7 @@ rcu_torture_init(void)
         if (firsterr)
                 goto unwind;
         if (nfakewriters > 0) {
-               fakewriter_tasks = kzalloc(nfakewriters *
+               fakewriter_tasks = kcalloc(nfakewriters,
                                            sizeof(fakewriter_tasks[0]),
                                            GFP_KERNEL);
                 if (fakewriter_tasks == NULL) {
@@ -1814,7 +1824,7 @@ rcu_torture_init(void)
                 if (firsterr)
                         goto unwind;
         }
-       reader_tasks = kzalloc(nrealreaders * sizeof(reader_tasks[0]),
+       reader_tasks = kcalloc(nrealreaders, sizeof(reader_tasks[0]),
                                GFP_KERNEL);
         if (reader_tasks == NULL) {
                 VERBOSE_TOROUT_ERRSTRING("out of memory");
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c

index d5cea81..fb560fc 100644 (file)
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -386,7 +386,7 @@ void cleanup_srcu_struct(struct srcu_struct *sp)
                 flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work);
         if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
             WARN_ON(srcu_readers_active(sp))) {
-               pr_info("cleanup_srcu_struct: Active srcu_struct %p state: %d\n", sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)));
+               pr_info("%s: Active srcu_struct %p state: %d\n", __func__, sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)));
                 return; /* Caller forgot to stop doing call_srcu()? */
         }
         free_percpu(sp->sda);
@@ -439,7 +439,7 @@ static void srcu_gp_start(struct srcu_struct *sp)
         struct srcu_data *sdp = this_cpu_ptr(sp->sda);
         int state;
  
-       lockdep_assert_held(&sp->lock);
+       lockdep_assert_held(&ACCESS_PRIVATE(sp, lock));
         WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed));
         rcu_segcblist_advance(&sdp->srcu_cblist,
                               rcu_seq_current(&sp->srcu_gp_seq));
@@ -492,8 +492,7 @@ static bool srcu_queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
   */
  static void srcu_schedule_cbs_sdp(struct srcu_data *sdp, unsigned long delay)
  {
-       srcu_queue_delayed_work_on(sdp->cpu, system_power_efficient_wq,
-                                  &sdp->work, delay);
+       srcu_queue_delayed_work_on(sdp->cpu, rcu_gp_wq, &sdp->work, delay);
  }
  
  /*
@@ -527,11 +526,11 @@ static void srcu_gp_end(struct srcu_struct *sp)
  {
         unsigned long cbdelay;
         bool cbs;
+       bool last_lvl;
         int cpu;
         unsigned long flags;
         unsigned long gpseq;
         int idx;
-       int idxnext;
         unsigned long mask;
         struct srcu_data *sdp;
         struct srcu_node *snp;
@@ -555,11 +554,11 @@ static void srcu_gp_end(struct srcu_struct *sp)
  
         /* Initiate callback invocation as needed. */
         idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs);
-       idxnext = (idx + 1) % ARRAY_SIZE(snp->srcu_have_cbs);
         rcu_for_each_node_breadth_first(sp, snp) {
                 spin_lock_irq_rcu_node(snp);
                 cbs = false;
-               if (snp >= sp->level[rcu_num_lvls - 1])
+               last_lvl = snp >= sp->level[rcu_num_lvls - 1];
+               if (last_lvl)
                         cbs = snp->srcu_have_cbs[idx] == gpseq;
                 snp->srcu_have_cbs[idx] = gpseq;
                 rcu_seq_set_state(&snp->srcu_have_cbs[idx], 1);
@@ -572,13 +571,16 @@ static void srcu_gp_end(struct srcu_struct *sp)
                         srcu_schedule_cbs_snp(sp, snp, mask, cbdelay);
  
                 /* Occasionally prevent srcu_data counter wrap. */
-               if (!(gpseq & counter_wrap_check))
+               if (!(gpseq & counter_wrap_check) && last_lvl)
                         for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) {
                                 sdp = per_cpu_ptr(sp->sda, cpu);
                                 spin_lock_irqsave_rcu_node(sdp, flags);
                                 if (ULONG_CMP_GE(gpseq,
                                                  sdp->srcu_gp_seq_needed + 100))
                                         sdp->srcu_gp_seq_needed = gpseq;
+                               if (ULONG_CMP_GE(gpseq,
+                                                sdp->srcu_gp_seq_needed_exp + 100))
+                                       sdp->srcu_gp_seq_needed_exp = gpseq;
                                 spin_unlock_irqrestore_rcu_node(sdp, flags);
                         }
         }
@@ -593,9 +595,7 @@ static void srcu_gp_end(struct srcu_struct *sp)
             ULONG_CMP_LT(gpseq, sp->srcu_gp_seq_needed)) {
                 srcu_gp_start(sp);
                 spin_unlock_irq_rcu_node(sp);
-               /* Throttle expedited grace periods: Should be rare! */
-               srcu_reschedule(sp, rcu_seq_ctr(gpseq) & 0x3ff
-                                   ? 0 : SRCU_INTERVAL);
+               srcu_reschedule(sp, 0);
         } else {
                 spin_unlock_irq_rcu_node(sp);
         }
@@ -626,7 +626,7 @@ static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp,
                 spin_unlock_irqrestore_rcu_node(snp, flags);
         }
         spin_lock_irqsave_rcu_node(sp, flags);
-       if (!ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s))
+       if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s))
                 sp->srcu_gp_seq_needed_exp = s;
         spin_unlock_irqrestore_rcu_node(sp, flags);
  }
@@ -691,8 +691,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
             rcu_seq_state(sp->srcu_gp_seq) == SRCU_STATE_IDLE) {
                 WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed));
                 srcu_gp_start(sp);
-               queue_delayed_work(system_power_efficient_wq, &sp->work,
-                                  srcu_get_delay(sp));
+               queue_delayed_work(rcu_gp_wq, &sp->work, srcu_get_delay(sp));
         }
         spin_unlock_irqrestore_rcu_node(sp, flags);
  }
@@ -1225,7 +1224,7 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay)
         spin_unlock_irq_rcu_node(sp);
  
         if (pushgp)
-               queue_delayed_work(system_power_efficient_wq, &sp->work, delay);
+               queue_delayed_work(rcu_gp_wq, &sp->work, delay);
  }
  
  /*
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c

index 491bdf3..2a73469 100644 (file)
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1161,7 +1161,7 @@ static int rcu_is_cpu_rrupt_from_idle(void)
   */
  static void rcu_gpnum_ovf(struct rcu_node *rnp, struct rcu_data *rdp)
  {
-       lockdep_assert_held(&rnp->lock);
+       raw_lockdep_assert_held_rcu_node(rnp);
         if (ULONG_CMP_LT(READ_ONCE(rdp->gpnum) + ULONG_MAX / 4, rnp->gpnum))
                 WRITE_ONCE(rdp->gpwrap, true);
         if (ULONG_CMP_LT(rdp->rcu_iw_gpnum + ULONG_MAX / 4, rnp->gpnum))
@@ -1350,6 +1350,7 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
                        rsp->gp_kthread ? rsp->gp_kthread->state : ~0,
                        rsp->gp_kthread ? task_cpu(rsp->gp_kthread) : -1);
                 if (rsp->gp_kthread) {
+                       pr_err("RCU grace-period kthread stack dump:\n");
                         sched_show_task(rsp->gp_kthread);
                         wake_up_process(rsp->gp_kthread);
                 }
@@ -1628,7 +1629,7 @@ void rcu_cpu_stall_reset(void)
  static unsigned long rcu_cbs_completed(struct rcu_state *rsp,
                                        struct rcu_node *rnp)
  {
-       lockdep_assert_held(&rnp->lock);
+       raw_lockdep_assert_held_rcu_node(rnp);
  
         /*
          * If RCU is idle, we just wait for the next grace period.
@@ -1675,7 +1676,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
         bool ret = false;
         struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
  
-       lockdep_assert_held(&rnp->lock);
+       raw_lockdep_assert_held_rcu_node(rnp);
  
         /*
          * Pick up grace-period number for new callbacks.  If this
@@ -1803,7 +1804,7 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
  {
         bool ret = false;
  
-       lockdep_assert_held(&rnp->lock);
+       raw_lockdep_assert_held_rcu_node(rnp);
  
         /* If no pending (not yet ready to invoke) callbacks, nothing to do. */
         if (!rcu_segcblist_pend_cbs(&rdp->cblist))
@@ -1843,7 +1844,7 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
  static bool rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
                             struct rcu_data *rdp)
  {
-       lockdep_assert_held(&rnp->lock);
+       raw_lockdep_assert_held_rcu_node(rnp);
  
         /* If no pending (not yet ready to invoke) callbacks, nothing to do. */
         if (!rcu_segcblist_pend_cbs(&rdp->cblist))
@@ -1871,7 +1872,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
         bool ret;
         bool need_gp;
  
-       lockdep_assert_held(&rnp->lock);
+       raw_lockdep_assert_held_rcu_node(rnp);
  
         /* Handle the ends of any preceding grace periods first. */
         if (rdp->completed == rnp->completed &&
@@ -2296,7 +2297,7 @@ static bool
  rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
                       struct rcu_data *rdp)
  {
-       lockdep_assert_held(&rnp->lock);
+       raw_lockdep_assert_held_rcu_node(rnp);
         if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) {
                 /*
                  * Either we have not yet spawned the grace-period
@@ -2358,7 +2359,7 @@ static bool rcu_start_gp(struct rcu_state *rsp)
  static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
         __releases(rcu_get_root(rsp)->lock)
  {
-       lockdep_assert_held(&rcu_get_root(rsp)->lock);
+       raw_lockdep_assert_held_rcu_node(rcu_get_root(rsp));
         WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
         WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
         raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags);
@@ -2383,7 +2384,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
         unsigned long oldmask = 0;
         struct rcu_node *rnp_c;
  
-       lockdep_assert_held(&rnp->lock);
+       raw_lockdep_assert_held_rcu_node(rnp);
  
         /* Walk up the rcu_node hierarchy. */
         for (;;) {
@@ -2447,7 +2448,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_state *rsp,
         unsigned long mask;
         struct rcu_node *rnp_p;
  
-       lockdep_assert_held(&rnp->lock);
+       raw_lockdep_assert_held_rcu_node(rnp);
         if (rcu_state_p == &rcu_sched_state || rsp != rcu_state_p ||
             rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
                 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
@@ -2592,7 +2593,7 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
         long mask;
         struct rcu_node *rnp = rnp_leaf;
  
-       lockdep_assert_held(&rnp->lock);
+       raw_lockdep_assert_held_rcu_node(rnp);
         if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) ||
             rnp->qsmaskinit || rcu_preempt_has_tasks(rnp))
                 return;
@@ -2691,7 +2692,6 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
         /* Update counts and requeue any remaining callbacks. */
         rcu_segcblist_insert_done_cbs(&rdp->cblist, &rcl);
         smp_mb(); /* List handling before counting for rcu_barrier(). */
-       rdp->n_cbs_invoked += count;
         rcu_segcblist_insert_count(&rdp->cblist, &rcl);
  
         /* Reinstate batch limit if we have worked down the excess. */
@@ -2845,10 +2845,8 @@ static void force_quiescent_state(struct rcu_state *rsp)
                       !raw_spin_trylock(&rnp->fqslock);
                 if (rnp_old != NULL)
                         raw_spin_unlock(&rnp_old->fqslock);
-               if (ret) {
-                       rsp->n_force_qs_lh++;
+               if (ret)
                         return;
-               }
                 rnp_old = rnp;
         }
         /* rnp_old == rcu_get_root(rsp), rnp == NULL. */
@@ -2857,7 +2855,6 @@ static void force_quiescent_state(struct rcu_state *rsp)
         raw_spin_lock_irqsave_rcu_node(rnp_old, flags);
         raw_spin_unlock(&rnp_old->fqslock);
         if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
-               rsp->n_force_qs_lh++;
                 raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags);
                 return;  /* Someone beat us to it. */
         }
@@ -3355,8 +3352,6 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
  {
         struct rcu_node *rnp = rdp->mynode;
  
-       rdp->n_rcu_pending++;
-
         /* Check for CPU stalls, if enabled. */
         check_cpu_stall(rsp, rdp);
  
@@ -3365,48 +3360,31 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
                 return 0;
  
         /* Is the RCU core waiting for a quiescent state from this CPU? */
-       if (rcu_scheduler_fully_active &&
-           rdp->core_needs_qs && rdp->cpu_no_qs.b.norm &&
-           rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_dynticks.rcu_qs_ctr)) {
-               rdp->n_rp_core_needs_qs++;
-       } else if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm) {
-               rdp->n_rp_report_qs++;
+       if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm)
                 return 1;
-       }
  
         /* Does this CPU have callbacks ready to invoke? */
-       if (rcu_segcblist_ready_cbs(&rdp->cblist)) {
-               rdp->n_rp_cb_ready++;
+       if (rcu_segcblist_ready_cbs(&rdp->cblist))
                 return 1;
-       }
  
         /* Has RCU gone idle with this CPU needing another grace period? */
-       if (cpu_needs_another_gp(rsp, rdp)) {
-               rdp->n_rp_cpu_needs_gp++;
+       if (cpu_needs_another_gp(rsp, rdp))
                 return 1;
-       }
  
         /* Has another RCU grace period completed?  */
-       if (READ_ONCE(rnp->completed) != rdp->completed) { /* outside lock */
-               rdp->n_rp_gp_completed++;
+       if (READ_ONCE(rnp->completed) != rdp->completed) /* outside lock */
                 return 1;
-       }
  
         /* Has a new RCU grace period started? */
         if (READ_ONCE(rnp->gpnum) != rdp->gpnum ||
-           unlikely(READ_ONCE(rdp->gpwrap))) { /* outside lock */
-               rdp->n_rp_gp_started++;
+           unlikely(READ_ONCE(rdp->gpwrap))) /* outside lock */
                 return 1;
-       }
  
         /* Does this CPU need a deferred NOCB wakeup? */
-       if (rcu_nocb_need_deferred_wakeup(rdp)) {
-               rdp->n_rp_nocb_defer_wakeup++;
+       if (rcu_nocb_need_deferred_wakeup(rdp))
                 return 1;
-       }
  
         /* nothing to do */
-       rdp->n_rp_need_nothing++;
         return 0;
  }
  
@@ -3618,7 +3596,7 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf)
         long mask;
         struct rcu_node *rnp = rnp_leaf;
  
-       lockdep_assert_held(&rnp->lock);
+       raw_lockdep_assert_held_rcu_node(rnp);
         for (;;) {
                 mask = rnp->grpmask;
                 rnp = rnp->parent;
@@ -3636,12 +3614,9 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf)
  static void __init
  rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
  {
-       unsigned long flags;
         struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
-       struct rcu_node *rnp = rcu_get_root(rsp);
  
         /* Set up local state, ensuring consistent view of global state. */
-       raw_spin_lock_irqsave_rcu_node(rnp, flags);
         rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu);
         rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
         WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != 1);
@@ -3649,7 +3624,6 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
         rdp->cpu = cpu;
         rdp->rsp = rsp;
         rcu_boot_init_nocb_percpu_data(rdp);
-       raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  }
  
  /*
@@ -4193,6 +4167,8 @@ static void __init rcu_dump_rcu_node_tree(struct rcu_state *rsp)
         pr_cont("\n");
  }
  
+struct workqueue_struct *rcu_gp_wq;
+
  void __init rcu_init(void)
  {
         int cpu;
@@ -4219,6 +4195,10 @@ void __init rcu_init(void)
                 rcu_cpu_starting(cpu);
                 rcutree_online_cpu(cpu);
         }
+
+       /* Create workqueue for expedited GPs and for Tree SRCU. */
+       rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0);
+       WARN_ON(!rcu_gp_wq);
  }
  
  #include "tree_exp.h"
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h

index 6488a3b..f491ab4 100644 (file)
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -146,12 +146,6 @@ struct rcu_node {
                                 /*  boosting for this rcu_node structure. */
         unsigned int boost_kthread_status;
                                 /* State of boost_kthread_task for tracing. */
-       unsigned long n_tasks_boosted;
-                               /* Total number of tasks boosted. */
-       unsigned long n_exp_boosts;
-                               /* Number of tasks boosted for expedited GP. */
-       unsigned long n_normal_boosts;
-                               /* Number of tasks boosted for normal GP. */
  #ifdef CONFIG_RCU_NOCB_CPU
         struct swait_queue_head nocb_gp_wq[2];
                                 /* Place for rcu_nocb_kthread() to wait GP. */
@@ -184,13 +178,6 @@ union rcu_noqs {
         u16 s; /* Set of bits, aggregate OR here. */
  };
  
-/* Index values for nxttail array in struct rcu_data. */
-#define RCU_DONE_TAIL          0       /* Also RCU_WAIT head. */
-#define RCU_WAIT_TAIL          1       /* Also RCU_NEXT_READY head. */
-#define RCU_NEXT_READY_TAIL    2       /* Also RCU_NEXT head. */
-#define RCU_NEXT_TAIL          3
-#define RCU_NEXT_SIZE          4
-
  /* Per-CPU data for read-copy update. */
  struct rcu_data {
         /* 1) quiescent-state and grace-period handling : */
@@ -217,8 +204,6 @@ struct rcu_data {
                                         /* different grace periods. */
         long            qlen_last_fqs_check;
                                         /* qlen at last check for QS forcing */
-       unsigned long   n_cbs_invoked;  /* count of RCU cbs invoked. */
-       unsigned long   n_nocbs_invoked; /* count of no-CBs RCU cbs invoked. */
         unsigned long   n_force_qs_snap;
                                         /* did other CPU force QS recently? */
         long            blimit;         /* Upper limit on a processed batch */
@@ -234,18 +219,7 @@ struct rcu_data {
                                         /* Grace period that needs help */
                                         /*  from cond_resched(). */
  
-       /* 5) __rcu_pending() statistics. */
-       unsigned long n_rcu_pending;    /* rcu_pending() calls since boot. */
-       unsigned long n_rp_core_needs_qs;
-       unsigned long n_rp_report_qs;
-       unsigned long n_rp_cb_ready;
-       unsigned long n_rp_cpu_needs_gp;
-       unsigned long n_rp_gp_completed;
-       unsigned long n_rp_gp_started;
-       unsigned long n_rp_nocb_defer_wakeup;
-       unsigned long n_rp_need_nothing;
-
-       /* 6) _rcu_barrier(), OOM callbacks, and expediting. */
+       /* 5) _rcu_barrier(), OOM callbacks, and expediting. */
         struct rcu_head barrier_head;
  #ifdef CONFIG_RCU_FAST_NO_HZ
         struct rcu_head oom_head;
@@ -256,7 +230,7 @@ struct rcu_data {
         atomic_long_t exp_workdone3;    /* # done by others #3. */
         int exp_dynticks_snap;          /* Double-check need for IPI. */
  
-       /* 7) Callback offloading. */
+       /* 6) Callback offloading. */
  #ifdef CONFIG_RCU_NOCB_CPU
         struct rcu_head *nocb_head;     /* CBs waiting for kthread. */
         struct rcu_head **nocb_tail;
@@ -283,7 +257,7 @@ struct rcu_data {
                                         /* Leader CPU takes GP-end wakeups. */
  #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
  
-       /* 8) RCU CPU stall data. */
+       /* 7) RCU CPU stall data. */
         unsigned int softirq_snap;      /* Snapshot of softirq activity. */
         /* ->rcu_iw* fields protected by leaf rcu_node ->lock. */
         struct irq_work rcu_iw;         /* Check for non-irq activity. */
@@ -374,10 +348,6 @@ struct rcu_state {
                                                 /*  kthreads, if configured. */
         unsigned long n_force_qs;               /* Number of calls to */
                                                 /*  force_quiescent_state(). */
-       unsigned long n_force_qs_lh;            /* ~Number of calls leaving */
-                                               /*  due to lock unavailable. */
-       unsigned long n_force_qs_ngp;           /* Number of calls leaving */
-                                               /*  due to no GP active. */
         unsigned long gp_start;                 /* Time at which GP started, */
                                                 /*  but in jiffies. */
         unsigned long gp_activity;              /* Time of last GP kthread */
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h

index 46d61b5..f72eefa 100644 (file)
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -28,6 +28,15 @@ static void rcu_exp_gp_seq_start(struct rcu_state *rsp)
         rcu_seq_start(&rsp->expedited_sequence);
  }
  
+/*
+ * Return then value that expedited-grace-period counter will have
+ * at the end of the current grace period.
+ */
+static __maybe_unused unsigned long rcu_exp_gp_seq_endval(struct rcu_state *rsp)
+{
+       return rcu_seq_endval(&rsp->expedited_sequence);
+}
+
  /*
   * Record the end of an expedited grace period.
   */
@@ -366,21 +375,30 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
         int ret;
         struct rcu_node *rnp;
  
+       trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("reset"));
         sync_exp_reset_tree(rsp);
+       trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("select"));
         rcu_for_each_leaf_node(rsp, rnp) {
                 raw_spin_lock_irqsave_rcu_node(rnp, flags);
  
                 /* Each pass checks a CPU for identity, offline, and idle. */
                 mask_ofl_test = 0;
-               for_each_leaf_node_possible_cpu(rnp, cpu) {
+               for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
+                       unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
                         struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
+                       struct rcu_dynticks *rdtp = per_cpu_ptr(&rcu_dynticks, cpu);
+                       int snap;
  
-                       rdp->exp_dynticks_snap =
-                               rcu_dynticks_snap(rdp->dynticks);
                         if (raw_smp_processor_id() == cpu ||
-                           rcu_dynticks_in_eqs(rdp->exp_dynticks_snap) ||
-                           !(rnp->qsmaskinitnext & rdp->grpmask))
-                               mask_ofl_test |= rdp->grpmask;
+                           !(rnp->qsmaskinitnext & mask)) {
+                               mask_ofl_test |= mask;
+                       } else {
+                               snap = rcu_dynticks_snap(rdtp);
+                               if (rcu_dynticks_in_eqs(snap))
+                                       mask_ofl_test |= mask;
+                               else
+                                       rdp->exp_dynticks_snap = snap;
+                       }
                 }
                 mask_ofl_ipi = rnp->expmask & ~mask_ofl_test;
  
@@ -394,7 +412,7 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
                 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  
                 /* IPI the remaining CPUs for expedited quiescent state. */
-               for_each_leaf_node_possible_cpu(rnp, cpu) {
+               for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
                         unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
                         struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
  
@@ -417,6 +435,7 @@ retry_ipi:
                             (rnp->expmask & mask)) {
                                 /* Online, so delay for a bit and try again. */
                                 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+                               trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("selectofl"));
                                 schedule_timeout_uninterruptible(1);
                                 goto retry_ipi;
                         }
@@ -443,6 +462,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
         struct rcu_node *rnp_root = rcu_get_root(rsp);
         int ret;
  
+       trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("startwait"));
         jiffies_stall = rcu_jiffies_till_stall_check();
         jiffies_start = jiffies;
  
@@ -606,7 +626,7 @@ static void _synchronize_rcu_expedited(struct rcu_state *rsp,
                 rew.rew_rsp = rsp;
                 rew.rew_s = s;
                 INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp);
-               schedule_work(&rew.rew_work);
+               queue_work(rcu_gp_wq, &rew.rew_work);
         }
  
         /* Wait for expedited grace period to complete. */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h

index fb88a02..84fbee4 100644 (file)
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -180,7 +180,7 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
                          (rnp->expmask & rdp->grpmask ? RCU_EXP_BLKD : 0);
         struct task_struct *t = current;
  
-       lockdep_assert_held(&rnp->lock);
+       raw_lockdep_assert_held_rcu_node(rnp);
         WARN_ON_ONCE(rdp->mynode != rnp);
         WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1);
  
@@ -560,8 +560,14 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
         }
         t = list_entry(rnp->gp_tasks->prev,
                        struct task_struct, rcu_node_entry);
-       list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
+       list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
+               /*
+                * We could be printing a lot while holding a spinlock.
+                * Avoid triggering hard lockup.
+                */
+               touch_nmi_watchdog();
                 sched_show_task(t);
+       }
         raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  }
  
@@ -957,14 +963,10 @@ static int rcu_boost(struct rcu_node *rnp)
          * expedited grace period must boost all blocked tasks, including
          * those blocking the pre-existing normal grace period.
          */
-       if (rnp->exp_tasks != NULL) {
+       if (rnp->exp_tasks != NULL)
                 tb = rnp->exp_tasks;
-               rnp->n_exp_boosts++;
-       } else {
+       else
                 tb = rnp->boost_tasks;
-               rnp->n_normal_boosts++;
-       }
-       rnp->n_tasks_boosted++;
  
         /*
          * We boost task t by manufacturing an rt_mutex that appears to
@@ -1042,7 +1044,7 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
  {
         struct task_struct *t;
  
-       lockdep_assert_held(&rnp->lock);
+       raw_lockdep_assert_held_rcu_node(rnp);
         if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {
                 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
                 return;
@@ -1677,6 +1679,12 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
         char *ticks_title;
         unsigned long ticks_value;
  
+       /*
+        * We could be printing a lot while holding a spinlock.  Avoid
+        * triggering hard lockup.
+        */
+       touch_nmi_watchdog();
+
         if (rsp->gpnum == rdp->gpnum) {
                 ticks_title = "ticks this GP";
                 ticks_value = rdp->ticks_this_gp;
@@ -2235,7 +2243,6 @@ static int rcu_nocb_kthread(void *arg)
                 smp_mb__before_atomic();  /* _add after CB invocation. */
                 atomic_long_add(-c, &rdp->nocb_q_count);
                 atomic_long_add(-cl, &rdp->nocb_q_count_lazy);
-               rdp->n_nocbs_invoked += c;
         }
         return 0;
  }
@@ -2312,8 +2319,11 @@ void __init rcu_init_nohz(void)
                 cpumask_and(rcu_nocb_mask, cpu_possible_mask,
                             rcu_nocb_mask);
         }
-       pr_info("\tOffload RCU callbacks from CPUs: %*pbl.\n",
-               cpumask_pr_args(rcu_nocb_mask));
+       if (cpumask_empty(rcu_nocb_mask))
+               pr_info("\tOffload RCU callbacks from CPUs: (none).\n");
+       else
+               pr_info("\tOffload RCU callbacks from CPUs: %*pbl.\n",
+                       cpumask_pr_args(rcu_nocb_mask));
         if (rcu_nocb_poll)
                 pr_info("\tPoll for callbacks from no-CBs CPUs.\n");
  
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig

index f6b5f19..78eabc4 100644 (file)
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -113,16 +113,6 @@ config NO_HZ_FULL
  
  endchoice
  
-config NO_HZ_FULL_ALL
-       bool "Full dynticks system on all CPUs by default (except CPU 0)"
-       depends on NO_HZ_FULL
-       help
-         If the user doesn't pass the nohz_full boot option to
-        define the range of full dynticks CPUs, consider that all
-        CPUs in the system are full dynticks by default.
-        Note the boot CPU will still be kept outside the range to
-        handle the timekeeping duty.
-
  config NO_HZ
         bool "Old Idle dynticks config"
         depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c

index 29a5733..ccd3782 100644 (file)
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -405,30 +405,12 @@ static int tick_nohz_cpu_down(unsigned int cpu)
         return 0;
  }
  
-static int tick_nohz_init_all(void)
-{
-       int err = -1;
-
-#ifdef CONFIG_NO_HZ_FULL_ALL
-       if (!alloc_cpumask_var(&tick_nohz_full_mask, GFP_KERNEL)) {
-               WARN(1, "NO_HZ: Can't allocate full dynticks cpumask\n");
-               return err;
-       }
-       err = 0;
-       cpumask_setall(tick_nohz_full_mask);
-       tick_nohz_full_running = true;
-#endif
-       return err;
-}
-
  void __init tick_nohz_init(void)
  {
         int cpu, ret;
  
-       if (!tick_nohz_full_running) {
-               if (tick_nohz_init_all() < 0)
-                       return;
-       }
+       if (!tick_nohz_full_running)
+               return;
  
         /*
          * Full dynticks uses irq work to drive the tick rescheduling on safe
diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh

index 07a1377..65f6655 100644 (file)
--- a/tools/testing/selftests/rcutorture/bin/functions.sh
+++ b/tools/testing/selftests/rcutorture/bin/functions.sh
@@ -136,6 +136,9 @@ identify_boot_image () {
                 qemu-system-x86_64|qemu-system-i386)
                         echo arch/x86/boot/bzImage
                         ;;
+               qemu-system-aarch64)
+                       echo arch/arm64/boot/Image
+                       ;;
                 *)
                         echo vmlinux
                         ;;
@@ -158,6 +161,9 @@ identify_qemu () {
         elif echo $u | grep -q "Intel 80386"
         then
                 echo qemu-system-i386
+       elif echo $u | grep -q aarch64
+       then
+               echo qemu-system-aarch64
         elif uname -a | grep -q ppc64
         then
                 echo qemu-system-ppc64
@@ -176,16 +182,20 @@ identify_qemu () {
  # Output arguments for the qemu "-append" string based on CPU type
  # and the TORTURE_QEMU_INTERACTIVE environment variable.
  identify_qemu_append () {
+       local console=ttyS0
         case "$1" in
         qemu-system-x86_64|qemu-system-i386)
                 echo noapic selinux=0 initcall_debug debug
                 ;;
+       qemu-system-aarch64)
+               console=ttyAMA0
+               ;;
         esac
         if test -n "$TORTURE_QEMU_INTERACTIVE"
         then
                 echo root=/dev/sda
         else
-               echo console=ttyS0
+               echo console=$console
         fi
  }
  
@@ -197,6 +207,9 @@ identify_qemu_args () {
         case "$1" in
         qemu-system-x86_64|qemu-system-i386)
                 ;;
+       qemu-system-aarch64)
+               echo -machine virt,gic-version=host -cpu host
+               ;;
         qemu-system-ppc64)
                 echo -enable-kvm -M pseries -nodefaults
                 echo -device spapr-vscsi
@@ -254,7 +267,7 @@ specify_qemu_cpus () {
                 echo $2
         else
                 case "$1" in
-               qemu-system-x86_64|qemu-system-i386)
+               qemu-system-x86_64|qemu-system-i386|qemu-system-aarch64)
                         echo $2 -smp $3
                         ;;
                 qemu-system-ppc64)
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh

index 963f712..8948f79 100755 (executable)
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh
@@ -39,30 +39,31 @@ sed -e 's/us : / : /' |
  tr -d '\015' |
  awk '
  $8 == "start" {
-       if (starttask != "")
+       if (startseq != "")
                 nlost++;
         starttask = $1;
         starttime = $3;
         startseq = $7;
+       seqtask[startseq] = starttask;
  }
  
  $8 == "end" {
-       if (starttask == $1 && startseq == $7) {
+       if (startseq == $7) {
                 curgpdur = $3 - starttime;
                 gptimes[++n] = curgpdur;
                 gptaskcnt[starttask]++;
                 sum += curgpdur;
                 if (curgpdur > 1000)
                         print "Long GP " starttime "us to " $3 "us (" curgpdur "us)";
-               starttask = "";
+               startseq = "";
         } else {
                 # Lost a message or some such, reset.
-               starttask = "";
+               startseq = "";
                 nlost++;
         }
  }
  
-$8 == "done" {
+$8 == "done" && seqtask[$7] != $1 {
         piggybackcnt[$1]++;
  }
  
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh

index 1b78a12..5f8fbb0 100755 (executable)
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -177,8 +177,8 @@ then
         exit 0
  fi
  echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log
-echo $QEMU $qemu_args -m 512 -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd
-( $QEMU $qemu_args -m 512 -kernel $KERNEL -append "$qemu_append $boot_args"& echo $! > $resdir/qemu_pid; wait `cat  $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
+echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd
+( $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append "$qemu_append $boot_args"& echo $! > $resdir/qemu_pid; wait `cat  $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
  commandcompleted=0
  sleep 10 # Give qemu's pid a chance to reach the file
  if test -s "$resdir/qemu_pid"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh

index 7d1f607..56610db 100755 (executable)
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -1,10 +1,8 @@
  #!/bin/bash
  #
-# Run a series of 14 tests under KVM.  These are not particularly
-# well-selected or well-tuned, but are the current set.
-#
-# Edit the definitions below to set the locations of the various directories,
-# as well as the test duration.
+# Run a series of tests under KVM.  By default, this series is specified
+# by the relevant CFLIST file, but can be overridden by the --configs
+# command-line argument.
  #
  # Usage: kvm.sh [ options ]
  #
@@ -44,6 +42,7 @@ TORTURE_BOOT_IMAGE=""
  TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD
  TORTURE_KCONFIG_ARG=""
  TORTURE_KMAKE_ARG=""
+TORTURE_QEMU_MEM=512
  TORTURE_SHUTDOWN_GRACE=180
  TORTURE_SUITE=rcu
  resdir=""
@@ -70,6 +69,7 @@ usage () {
         echo "       --kconfig Kconfig-options"
         echo "       --kmake-arg kernel-make-arguments"
         echo "       --mac nn:nn:nn:nn:nn:nn"
+       echo "       --memory megabytes | nnnG"
         echo "       --no-initrd"
         echo "       --qemu-args qemu-arguments"
         echo "       --qemu-cmd qemu-system-..."
@@ -147,6 +147,11 @@ do
                 TORTURE_QEMU_MAC=$2
                 shift
                 ;;
+       --memory)
+               checkarg --memory "(memory size)" $# "$2" '^[0-9]\+[MG]\?$' error
+               TORTURE_QEMU_MEM=$2
+               shift
+               ;;
         --no-initrd)
                 TORTURE_INITRD=""; export TORTURE_INITRD
                 ;;
@@ -174,6 +179,12 @@ do
                 checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuperf\)$' '^--'
                 TORTURE_SUITE=$2
                 shift
+               if test "$TORTURE_SUITE" = rcuperf
+               then
+                       # If you really want jitter for rcuperf, specify
+                       # it after specifying rcuperf.  (But why?)
+                       jitter=0
+               fi
                 ;;
         *)
                 echo Unknown argument $1
@@ -288,6 +299,7 @@ TORTURE_KMAKE_ARG="$TORTURE_KMAKE_ARG"; export TORTURE_KMAKE_ARG
  TORTURE_QEMU_CMD="$TORTURE_QEMU_CMD"; export TORTURE_QEMU_CMD
  TORTURE_QEMU_INTERACTIVE="$TORTURE_QEMU_INTERACTIVE"; export TORTURE_QEMU_INTERACTIVE
  TORTURE_QEMU_MAC="$TORTURE_QEMU_MAC"; export TORTURE_QEMU_MAC
+TORTURE_QEMU_MEM="$TORTURE_QEMU_MEM"; export TORTURE_QEMU_MEM
  TORTURE_SHUTDOWN_GRACE="$TORTURE_SHUTDOWN_GRACE"; export TORTURE_SHUTDOWN_GRACE
  TORTURE_SUITE="$TORTURE_SUITE"; export TORTURE_SUITE
  if ! test -e $resdir
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03

index c70c51d..28568b7 100644 (file)
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
@@ -9,5 +9,4 @@ CONFIG_PREEMPT=y
  CONFIG_HZ_PERIODIC=n
  CONFIG_NO_HZ_IDLE=n
  CONFIG_NO_HZ_FULL=y
-CONFIG_NO_HZ_FULL_ALL=y
  #CHECK#CONFIG_RCU_EXPERT=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03.boot

index cd2a188..838297c 100644 (file)
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03.boot
@@ -1 +1 @@
-rcutorture.torture_type=tasks
+rcutorture.torture_type=tasks nohz_full=1
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04

index 27d2269..24c9f60 100644 (file)
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
@@ -7,7 +7,6 @@ CONFIG_PREEMPT=n
  CONFIG_HZ_PERIODIC=n
  CONFIG_NO_HZ_IDLE=n
  CONFIG_NO_HZ_FULL=y
-CONFIG_NO_HZ_FULL_ALL=y
  CONFIG_RCU_FAST_NO_HZ=y
  CONFIG_RCU_TRACE=y
  CONFIG_HOTPLUG_CPU=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot

index e34c334..e6071bb 100644 (file)
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot
@@ -1 +1 @@
-rcutorture.torture_type=rcu_bh rcutree.rcu_fanout_leaf=4
+rcutorture.torture_type=rcu_bh rcutree.rcu_fanout_leaf=4 nohz_full=1-7
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/tools/testing/selftests/rcutorture/configs/rcu/TREE07

index 0f4759f..d7afb27 100644 (file)
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
@@ -7,7 +7,6 @@ CONFIG_PREEMPT=n
  CONFIG_HZ_PERIODIC=n
  CONFIG_NO_HZ_IDLE=n
  CONFIG_NO_HZ_FULL=y
-CONFIG_NO_HZ_FULL_ALL=n
  CONFIG_RCU_FAST_NO_HZ=n
  CONFIG_RCU_TRACE=y
  CONFIG_HOTPLUG_CPU=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh

index b960311..d36b8fd 100644 (file)
--- a/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh
@@ -20,32 +20,10 @@
  #
  # Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  
-# rcuperf_param_nreaders bootparam-string
-#
-# Adds nreaders rcuperf module parameter if not already specified.
-rcuperf_param_nreaders () {
-       if ! echo "$1" | grep -q "rcuperf.nreaders"
-       then
-               echo rcuperf.nreaders=-1
-       fi
-}
-
-# rcuperf_param_nwriters bootparam-string
-#
-# Adds nwriters rcuperf module parameter if not already specified.
-rcuperf_param_nwriters () {
-       if ! echo "$1" | grep -q "rcuperf.nwriters"
-       then
-               echo rcuperf.nwriters=-1
-       fi
-}
-
  # per_version_boot_params bootparam-string config-file seconds
  #
  # Adds per-version torture-module parameters to kernels supporting them.
  per_version_boot_params () {
-       echo $1 `rcuperf_param_nreaders "$1"` \
-               `rcuperf_param_nwriters "$1"` \
-               rcuperf.shutdown=1 \
+       echo $1 rcuperf.shutdown=1 \
                 rcuperf.verbose=1
  }
diff --git a/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt b/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt

index 66efb59..449cf57 100644 (file)
--- a/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt
+++ b/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt
@@ -1,4 +1,4 @@
-This document describes one way to created the rcu-test-image file
+This document describes one way to create the rcu-test-image file
  that contains the filesystem used by the guest-OS kernel.  There are
  probably much better ways of doing this, and this filesystem could no
  doubt be smaller.  It is probably also possible to simply download
author	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 2 Apr 2018 16:59:09 +0000 (09:59 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 2 Apr 2018 16:59:09 +0000 (09:59 -0700)
Documentation/timers/NO_HZ.txt		patch \| blob \| history
include/linux/rcupdate.h		patch \| blob \| history
include/linux/types.h		patch \| blob \| history
include/trace/events/rcu.h		patch \| blob \| history
kernel/rcu/rcu.h		patch \| blob \| history
kernel/rcu/rcuperf.c		patch \| blob \| history
kernel/rcu/rcutorture.c		patch \| blob \| history
kernel/rcu/srcutree.c		patch \| blob \| history
kernel/rcu/tree.c		patch \| blob \| history
kernel/rcu/tree.h		patch \| blob \| history
kernel/rcu/tree_exp.h		patch \| blob \| history
kernel/rcu/tree_plugin.h		patch \| blob \| history
kernel/time/Kconfig		patch \| blob \| history
kernel/time/tick-sched.c		patch \| blob \| history
tools/testing/selftests/rcutorture/bin/functions.sh		patch \| blob \| history
tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh		patch \| blob \| history
tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh		patch \| blob \| history
tools/testing/selftests/rcutorture/bin/kvm.sh		patch \| blob \| history
tools/testing/selftests/rcutorture/configs/rcu/TASKS03		patch \| blob \| history
tools/testing/selftests/rcutorture/configs/rcu/TASKS03.boot		patch \| blob \| history
tools/testing/selftests/rcutorture/configs/rcu/TREE04		patch \| blob \| history
tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot		patch \| blob \| history
tools/testing/selftests/rcutorture/configs/rcu/TREE07		patch \| blob \| history
tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh		patch \| blob \| history
tools/testing/selftests/rcutorture/doc/rcu-test-image.txt		patch \| blob \| history