Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 2 Apr 2018 16:59:09 +0000 (09:59 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 2 Apr 2018 16:59:09 +0000 (09:59 -0700)
Pull RCU updates from Ingo Molnar:
 "The main RCU subsystem changes in this cycle were:

  - Miscellaneous fixes, perhaps most notably removing obsolete code
    whose only purpose in life was to gather information for the
    now-removed RCU debugfs facility. Other notable changes include
    removing NO_HZ_FULL_ALL in favor of the nohz_full kernel boot
    parameter, minor optimizations for expedited grace periods, some
    added tracing, creating an RCU-specific workqueue using Tejun's new
    WQ_MEM_RECLAIM flag, and several cleanups to code and comments.

  - SRCU cleanups and optimizations.

  - Torture-test updates, perhaps most notably the adding of ARMv8
    support, but also including numerous cleanups and usability fixes"

* 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (37 commits)
  rcu: Create RCU-specific workqueues with rescuers
  torture: Provide more sensible nreader/nwriter defaults for rcuperf
  torture: Grace periods do not piggyback off of themselves
  torture: Adjust rcuperf trace processing to allow for workqueues
  torture: Default jitter off when running rcuperf
  torture: Specify qemu memory size with --memory argument
  rcutorture: Add basic ARM64 support to run scripts
  rcutorture: Update kvm.sh header comment
  rcutorture: Record which grace-period primitives are tested
  rcutorture: Re-enable testing of dynamic expediting
  rcutorture: Avoid fake-writer use of undefined primitives
  rcutorture: Abstract function and module names
  rcutorture: Replace multi-instance kzalloc() with kcalloc()
  rcu: Remove SRCU throttling
  srcu: Remove dead code in srcu_gp_end()
  srcu: Reduce scans of srcu_data in counter wrap check
  srcu: Prevent sdp->srcu_gp_seq_needed_exp counter wrap
  srcu: Abstract function name
  rcu: Make expedited RCU CPU selection avoid unnecessary stores
  rcu: Trace expedited GP delays due to transitioning CPUs
  ...

25 files changed:
Documentation/timers/NO_HZ.txt
include/linux/rcupdate.h
include/linux/types.h
include/trace/events/rcu.h
kernel/rcu/rcu.h
kernel/rcu/rcuperf.c
kernel/rcu/rcutorture.c
kernel/rcu/srcutree.c
kernel/rcu/tree.c
kernel/rcu/tree.h
kernel/rcu/tree_exp.h
kernel/rcu/tree_plugin.h
kernel/time/Kconfig
kernel/time/tick-sched.c
tools/testing/selftests/rcutorture/bin/functions.sh
tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh
tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
tools/testing/selftests/rcutorture/bin/kvm.sh
tools/testing/selftests/rcutorture/configs/rcu/TASKS03
tools/testing/selftests/rcutorture/configs/rcu/TASKS03.boot
tools/testing/selftests/rcutorture/configs/rcu/TREE04
tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot
tools/testing/selftests/rcutorture/configs/rcu/TREE07
tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh
tools/testing/selftests/rcutorture/doc/rcu-test-image.txt

index 2dcaf9a..9591092 100644 (file)
@@ -131,13 +131,6 @@ error message, and the boot CPU will be removed from the mask.  Note that
 this means that your system must have at least two CPUs in order for
 CONFIG_NO_HZ_FULL=y to do anything for you.
 
-Alternatively, the CONFIG_NO_HZ_FULL_ALL=y Kconfig parameter specifies
-that all CPUs other than the boot CPU are adaptive-ticks CPUs.  This
-Kconfig parameter will be overridden by the "nohz_full=" boot parameter,
-so that if both the CONFIG_NO_HZ_FULL_ALL=y Kconfig parameter and
-the "nohz_full=1" boot parameter is specified, the boot parameter will
-prevail so that only CPU 1 will be an adaptive-ticks CPU.
-
 Finally, adaptive-ticks CPUs must have their RCU callbacks offloaded.
 This is covered in the "RCU IMPLICATIONS" section below.
 
index 043d047..36360d0 100644 (file)
@@ -214,10 +214,12 @@ do { \
 #endif
 
 /*
- * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic
- * initialization and destruction of rcu_head on the stack. rcu_head structures
- * allocated dynamically in the heap or defined statically don't need any
- * initialization.
+ * The init_rcu_head_on_stack() and destroy_rcu_head_on_stack() calls
+ * are needed for dynamic initialization and destruction of rcu_head
+ * on the stack, and init_rcu_head()/destroy_rcu_head() are needed for
+ * dynamic initialization and destruction of statically allocated rcu_head
+ * structures.  However, rcu_head structures allocated dynamically in the
+ * heap don't need any initialization.
  */
 #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
 void init_rcu_head(struct rcu_head *head);
index c94d59e..ec13d02 100644 (file)
@@ -217,7 +217,7 @@ struct ustat {
  *
  * This guarantee is important for few reasons:
  *  - future call_rcu_lazy() will make use of lower bits in the pointer;
- *  - the structure shares storage spacer in struct page with @compound_head,
+ *  - the structure shares storage space in struct page with @compound_head,
  *    which encode PageTail() in bit 0. The guarantee is needed to avoid
  *    false-positive PageTail().
  */
index 0b50fda..d8c3329 100644 (file)
@@ -179,6 +179,10 @@ TRACE_EVENT(rcu_grace_period_init,
  *
  *     "snap": Captured snapshot of expedited grace period sequence number.
  *     "start": Started a real expedited grace period.
+ *     "reset": Started resetting the tree
+ *     "select": Started selecting the CPUs to wait on.
+ *     "selectofl": Selected CPU partially offline.
+ *     "startwait": Started waiting on selected CPUs.
  *     "end": Ended a real expedited grace period.
  *     "endwake": Woke piggybackers up.
  *     "done": Someone else did the expedited grace period for us.
index 6334f2c..7a693e3 100644 (file)
@@ -77,12 +77,18 @@ static inline void rcu_seq_start(unsigned long *sp)
        WARN_ON_ONCE(rcu_seq_state(*sp) != 1);
 }
 
+/* Compute the end-of-grace-period value for the specified sequence number. */
+static inline unsigned long rcu_seq_endval(unsigned long *sp)
+{
+       return (*sp | RCU_SEQ_STATE_MASK) + 1;
+}
+
 /* Adjust sequence number for end of update-side operation. */
 static inline void rcu_seq_end(unsigned long *sp)
 {
        smp_mb(); /* Ensure update-side operation before counter increment. */
        WARN_ON_ONCE(!rcu_seq_state(*sp));
-       WRITE_ONCE(*sp, (*sp | RCU_SEQ_STATE_MASK) + 1);
+       WRITE_ONCE(*sp, rcu_seq_endval(sp));
 }
 
 /* Take a snapshot of the update side's sequence number. */
@@ -295,9 +301,19 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
  * Iterate over all possible CPUs in a leaf RCU node.
  */
 #define for_each_leaf_node_possible_cpu(rnp, cpu) \
-       for ((cpu) = cpumask_next(rnp->grplo - 1, cpu_possible_mask); \
-            cpu <= rnp->grphi; \
-            cpu = cpumask_next((cpu), cpu_possible_mask))
+       for ((cpu) = cpumask_next((rnp)->grplo - 1, cpu_possible_mask); \
+            (cpu) <= rnp->grphi; \
+            (cpu) = cpumask_next((cpu), cpu_possible_mask))
+
+/*
+ * Iterate over all CPUs in a leaf RCU node's specified mask.
+ */
+#define rcu_find_next_bit(rnp, cpu, mask) \
+       ((rnp)->grplo + find_next_bit(&(mask), BITS_PER_LONG, (cpu)))
+#define for_each_leaf_node_cpu_mask(rnp, cpu, mask) \
+       for ((cpu) = rcu_find_next_bit((rnp), 0, (mask)); \
+            (cpu) <= rnp->grphi; \
+            (cpu) = rcu_find_next_bit((rnp), (cpu) + 1 - (rnp->grplo), (mask)))
 
 /*
  * Wrappers for the rcu_node::lock acquire and release.
@@ -337,7 +353,7 @@ do {                                                                        \
 } while (0)
 
 #define raw_spin_unlock_irqrestore_rcu_node(p, flags)                  \
-       raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags)     \
+       raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags)
 
 #define raw_spin_trylock_rcu_node(p)                                   \
 ({                                                                     \
@@ -348,6 +364,9 @@ do {                                                                        \
        ___locked;                                                      \
 })
 
+#define raw_lockdep_assert_held_rcu_node(p)                            \
+       lockdep_assert_held(&ACCESS_PRIVATE(p, lock))
+
 #endif /* #if defined(SRCU) || !defined(TINY_RCU) */
 
 #ifdef CONFIG_TINY_RCU
@@ -356,24 +375,20 @@ static inline bool rcu_gp_is_normal(void) { return true; }
 static inline bool rcu_gp_is_expedited(void) { return false; }
 static inline void rcu_expedite_gp(void) { }
 static inline void rcu_unexpedite_gp(void) { }
+static inline void rcu_request_urgent_qs_task(struct task_struct *t) { }
 #else /* #ifdef CONFIG_TINY_RCU */
 bool rcu_gp_is_normal(void);     /* Internal RCU use. */
 bool rcu_gp_is_expedited(void);  /* Internal RCU use. */
 void rcu_expedite_gp(void);
 void rcu_unexpedite_gp(void);
 void rcupdate_announce_bootup_oddness(void);
+void rcu_request_urgent_qs_task(struct task_struct *t);
 #endif /* #else #ifdef CONFIG_TINY_RCU */
 
 #define RCU_SCHEDULER_INACTIVE 0
 #define RCU_SCHEDULER_INIT     1
 #define RCU_SCHEDULER_RUNNING  2
 
-#ifdef CONFIG_TINY_RCU
-static inline void rcu_request_urgent_qs_task(struct task_struct *t) { }
-#else /* #ifdef CONFIG_TINY_RCU */
-void rcu_request_urgent_qs_task(struct task_struct *t);
-#endif /* #else #ifdef CONFIG_TINY_RCU */
-
 enum rcutorture_type {
        RCU_FLAVOR,
        RCU_BH_FLAVOR,
@@ -470,6 +485,7 @@ void show_rcu_gp_kthreads(void);
 void rcu_force_quiescent_state(void);
 void rcu_bh_force_quiescent_state(void);
 void rcu_sched_force_quiescent_state(void);
+extern struct workqueue_struct *rcu_gp_wq;
 #endif /* #else #ifdef CONFIG_TINY_RCU */
 
 #ifdef CONFIG_RCU_NOCB_CPU
index d1ebdf9..777e7a6 100644 (file)
@@ -61,11 +61,30 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.vnet.ibm.com>");
 #define VERBOSE_PERFOUT_ERRSTRING(s) \
        do { if (verbose) pr_alert("%s" PERF_FLAG "!!! %s\n", perf_type, s); } while (0)
 
+/*
+ * The intended use cases for the nreaders and nwriters module parameters
+ * are as follows:
+ *
+ * 1.  Specify only the nr_cpus kernel boot parameter.  This will
+ *     set both nreaders and nwriters to the value specified by
+ *     nr_cpus for a mixed reader/writer test.
+ *
+ * 2.  Specify the nr_cpus kernel boot parameter, but set
+ *     rcuperf.nreaders to zero.  This will set nwriters to the
+ *     value specified by nr_cpus for an update-only test.
+ *
+ * 3.  Specify the nr_cpus kernel boot parameter, but set
+ *     rcuperf.nwriters to zero.  This will set nreaders to the
+ *     value specified by nr_cpus for a read-only test.
+ *
+ * Various other use cases may of course be specified.
+ */
+
 torture_param(bool, gp_async, false, "Use asynchronous GP wait primitives");
 torture_param(int, gp_async_max, 1000, "Max # outstanding waits per reader");
 torture_param(bool, gp_exp, false, "Use expedited GP wait primitives");
 torture_param(int, holdoff, 10, "Holdoff time before test start (s)");
-torture_param(int, nreaders, 0, "Number of RCU reader threads");
+torture_param(int, nreaders, -1, "Number of RCU reader threads");
 torture_param(int, nwriters, -1, "Number of RCU updater threads");
 torture_param(bool, shutdown, !IS_ENABLED(MODULE),
              "Shutdown at end of performance tests.");
index 308e6fd..680c96d 100644 (file)
@@ -909,34 +909,38 @@ rcu_torture_writer(void *arg)
        int nsynctypes = 0;
 
        VERBOSE_TOROUT_STRING("rcu_torture_writer task started");
-       if (!can_expedite) {
+       if (!can_expedite)
                pr_alert("%s" TORTURE_FLAG
-                        " GP expediting controlled from boot/sysfs for %s,\n",
+                        " GP expediting controlled from boot/sysfs for %s.\n",
                         torture_type, cur_ops->name);
-               pr_alert("%s" TORTURE_FLAG
-                        " Disabled dynamic grace-period expediting.\n",
-                        torture_type);
-       }
 
        /* Initialize synctype[] array.  If none set, take default. */
        if (!gp_cond1 && !gp_exp1 && !gp_normal1 && !gp_sync1)
                gp_cond1 = gp_exp1 = gp_normal1 = gp_sync1 = true;
-       if (gp_cond1 && cur_ops->get_state && cur_ops->cond_sync)
+       if (gp_cond1 && cur_ops->get_state && cur_ops->cond_sync) {
                synctype[nsynctypes++] = RTWS_COND_GET;
-       else if (gp_cond && (!cur_ops->get_state || !cur_ops->cond_sync))
-               pr_alert("rcu_torture_writer: gp_cond without primitives.\n");
-       if (gp_exp1 && cur_ops->exp_sync)
+               pr_info("%s: Testing conditional GPs.\n", __func__);
+       } else if (gp_cond && (!cur_ops->get_state || !cur_ops->cond_sync)) {
+               pr_alert("%s: gp_cond without primitives.\n", __func__);
+       }
+       if (gp_exp1 && cur_ops->exp_sync) {
                synctype[nsynctypes++] = RTWS_EXP_SYNC;
-       else if (gp_exp && !cur_ops->exp_sync)
-               pr_alert("rcu_torture_writer: gp_exp without primitives.\n");
-       if (gp_normal1 && cur_ops->deferred_free)
+               pr_info("%s: Testing expedited GPs.\n", __func__);
+       } else if (gp_exp && !cur_ops->exp_sync) {
+               pr_alert("%s: gp_exp without primitives.\n", __func__);
+       }
+       if (gp_normal1 && cur_ops->deferred_free) {
                synctype[nsynctypes++] = RTWS_DEF_FREE;
-       else if (gp_normal && !cur_ops->deferred_free)
-               pr_alert("rcu_torture_writer: gp_normal without primitives.\n");
-       if (gp_sync1 && cur_ops->sync)
+               pr_info("%s: Testing asynchronous GPs.\n", __func__);
+       } else if (gp_normal && !cur_ops->deferred_free) {
+               pr_alert("%s: gp_normal without primitives.\n", __func__);
+       }
+       if (gp_sync1 && cur_ops->sync) {
                synctype[nsynctypes++] = RTWS_SYNC;
-       else if (gp_sync && !cur_ops->sync)
-               pr_alert("rcu_torture_writer: gp_sync without primitives.\n");
+               pr_info("%s: Testing normal GPs.\n", __func__);
+       } else if (gp_sync && !cur_ops->sync) {
+               pr_alert("%s: gp_sync without primitives.\n", __func__);
+       }
        if (WARN_ONCE(nsynctypes == 0,
                      "rcu_torture_writer: No update-side primitives.\n")) {
                /*
@@ -1011,6 +1015,9 @@ rcu_torture_writer(void *arg)
                                rcu_unexpedite_gp();
                        if (++expediting > 3)
                                expediting = -expediting;
+               } else if (!can_expedite) { /* Disabled during boot, recheck. */
+                       can_expedite = !rcu_gp_is_expedited() &&
+                                      !rcu_gp_is_normal();
                }
                rcu_torture_writer_state = RTWS_STUTTER;
                stutter_wait("rcu_torture_writer");
@@ -1021,6 +1028,10 @@ rcu_torture_writer(void *arg)
        while (can_expedite && expediting++ < 0)
                rcu_unexpedite_gp();
        WARN_ON_ONCE(can_expedite && rcu_gp_is_expedited());
+       if (!can_expedite)
+               pr_alert("%s" TORTURE_FLAG
+                        " Dynamic grace-period expediting was disabled.\n",
+                        torture_type);
        rcu_torture_writer_state = RTWS_STOPPING;
        torture_kthread_stopping("rcu_torture_writer");
        return 0;
@@ -1045,13 +1056,13 @@ rcu_torture_fakewriter(void *arg)
                    torture_random(&rand) % (nfakewriters * 8) == 0) {
                        cur_ops->cb_barrier();
                } else if (gp_normal == gp_exp) {
-                       if (torture_random(&rand) & 0x80)
+                       if (cur_ops->sync && torture_random(&rand) & 0x80)
                                cur_ops->sync();
-                       else
+                       else if (cur_ops->exp_sync)
                                cur_ops->exp_sync();
-               } else if (gp_normal) {
+               } else if (gp_normal && cur_ops->sync) {
                        cur_ops->sync();
-               } else {
+               } else if (cur_ops->exp_sync) {
                        cur_ops->exp_sync();
                }
                stutter_wait("rcu_torture_fakewriter");
@@ -1557,11 +1568,10 @@ static int rcu_torture_barrier_init(void)
        atomic_set(&barrier_cbs_count, 0);
        atomic_set(&barrier_cbs_invoked, 0);
        barrier_cbs_tasks =
-               kzalloc(n_barrier_cbs * sizeof(barrier_cbs_tasks[0]),
+               kcalloc(n_barrier_cbs, sizeof(barrier_cbs_tasks[0]),
                        GFP_KERNEL);
        barrier_cbs_wq =
-               kzalloc(n_barrier_cbs * sizeof(barrier_cbs_wq[0]),
-                       GFP_KERNEL);
+               kcalloc(n_barrier_cbs, sizeof(barrier_cbs_wq[0]), GFP_KERNEL);
        if (barrier_cbs_tasks == NULL || !barrier_cbs_wq)
                return -ENOMEM;
        for (i = 0; i < n_barrier_cbs; i++) {
@@ -1674,7 +1684,7 @@ static void rcu_torture_err_cb(struct rcu_head *rhp)
         * next grace period.  Unlikely, but can happen.  If it
         * does happen, the debug-objects subsystem won't have splatted.
         */
-       pr_alert("rcutorture: duplicated callback was invoked.\n");
+       pr_alert("%s: duplicated callback was invoked.\n", KBUILD_MODNAME);
 }
 #endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
 
@@ -1691,7 +1701,7 @@ static void rcu_test_debug_objects(void)
 
        init_rcu_head_on_stack(&rh1);
        init_rcu_head_on_stack(&rh2);
-       pr_alert("rcutorture: WARN: Duplicate call_rcu() test starting.\n");
+       pr_alert("%s: WARN: Duplicate call_rcu() test starting.\n", KBUILD_MODNAME);
 
        /* Try to queue the rh2 pair of callbacks for the same grace period. */
        preempt_disable(); /* Prevent preemption from interrupting test. */
@@ -1706,11 +1716,11 @@ static void rcu_test_debug_objects(void)
 
        /* Wait for them all to get done so we can safely return. */
        rcu_barrier();
-       pr_alert("rcutorture: WARN: Duplicate call_rcu() test complete.\n");
+       pr_alert("%s: WARN: Duplicate call_rcu() test complete.\n", KBUILD_MODNAME);
        destroy_rcu_head_on_stack(&rh1);
        destroy_rcu_head_on_stack(&rh2);
 #else /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
-       pr_alert("rcutorture: !CONFIG_DEBUG_OBJECTS_RCU_HEAD, not testing duplicate call_rcu()\n");
+       pr_alert("%s: !CONFIG_DEBUG_OBJECTS_RCU_HEAD, not testing duplicate call_rcu()\n", KBUILD_MODNAME);
 #endif /* #else #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
 }
 
@@ -1799,7 +1809,7 @@ rcu_torture_init(void)
        if (firsterr)
                goto unwind;
        if (nfakewriters > 0) {
-               fakewriter_tasks = kzalloc(nfakewriters *
+               fakewriter_tasks = kcalloc(nfakewriters,
                                           sizeof(fakewriter_tasks[0]),
                                           GFP_KERNEL);
                if (fakewriter_tasks == NULL) {
@@ -1814,7 +1824,7 @@ rcu_torture_init(void)
                if (firsterr)
                        goto unwind;
        }
-       reader_tasks = kzalloc(nrealreaders * sizeof(reader_tasks[0]),
+       reader_tasks = kcalloc(nrealreaders, sizeof(reader_tasks[0]),
                               GFP_KERNEL);
        if (reader_tasks == NULL) {
                VERBOSE_TOROUT_ERRSTRING("out of memory");
index d5cea81..fb560fc 100644 (file)
@@ -386,7 +386,7 @@ void cleanup_srcu_struct(struct srcu_struct *sp)
                flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work);
        if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
            WARN_ON(srcu_readers_active(sp))) {
-               pr_info("cleanup_srcu_struct: Active srcu_struct %p state: %d\n", sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)));
+               pr_info("%s: Active srcu_struct %p state: %d\n", __func__, sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)));
                return; /* Caller forgot to stop doing call_srcu()? */
        }
        free_percpu(sp->sda);
@@ -439,7 +439,7 @@ static void srcu_gp_start(struct srcu_struct *sp)
        struct srcu_data *sdp = this_cpu_ptr(sp->sda);
        int state;
 
-       lockdep_assert_held(&sp->lock);
+       lockdep_assert_held(&ACCESS_PRIVATE(sp, lock));
        WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed));
        rcu_segcblist_advance(&sdp->srcu_cblist,
                              rcu_seq_current(&sp->srcu_gp_seq));
@@ -492,8 +492,7 @@ static bool srcu_queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
  */
 static void srcu_schedule_cbs_sdp(struct srcu_data *sdp, unsigned long delay)
 {
-       srcu_queue_delayed_work_on(sdp->cpu, system_power_efficient_wq,
-                                  &sdp->work, delay);
+       srcu_queue_delayed_work_on(sdp->cpu, rcu_gp_wq, &sdp->work, delay);
 }
 
 /*
@@ -527,11 +526,11 @@ static void srcu_gp_end(struct srcu_struct *sp)
 {
        unsigned long cbdelay;
        bool cbs;
+       bool last_lvl;
        int cpu;
        unsigned long flags;
        unsigned long gpseq;
        int idx;
-       int idxnext;
        unsigned long mask;
        struct srcu_data *sdp;
        struct srcu_node *snp;
@@ -555,11 +554,11 @@ static void srcu_gp_end(struct srcu_struct *sp)
 
        /* Initiate callback invocation as needed. */
        idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs);
-       idxnext = (idx + 1) % ARRAY_SIZE(snp->srcu_have_cbs);
        rcu_for_each_node_breadth_first(sp, snp) {
                spin_lock_irq_rcu_node(snp);
                cbs = false;
-               if (snp >= sp->level[rcu_num_lvls - 1])
+               last_lvl = snp >= sp->level[rcu_num_lvls - 1];
+               if (last_lvl)
                        cbs = snp->srcu_have_cbs[idx] == gpseq;
                snp->srcu_have_cbs[idx] = gpseq;
                rcu_seq_set_state(&snp->srcu_have_cbs[idx], 1);
@@ -572,13 +571,16 @@ static void srcu_gp_end(struct srcu_struct *sp)
                        srcu_schedule_cbs_snp(sp, snp, mask, cbdelay);
 
                /* Occasionally prevent srcu_data counter wrap. */
-               if (!(gpseq & counter_wrap_check))
+               if (!(gpseq & counter_wrap_check) && last_lvl)
                        for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) {
                                sdp = per_cpu_ptr(sp->sda, cpu);
                                spin_lock_irqsave_rcu_node(sdp, flags);
                                if (ULONG_CMP_GE(gpseq,
                                                 sdp->srcu_gp_seq_needed + 100))
                                        sdp->srcu_gp_seq_needed = gpseq;
+                               if (ULONG_CMP_GE(gpseq,
+                                                sdp->srcu_gp_seq_needed_exp + 100))
+                                       sdp->srcu_gp_seq_needed_exp = gpseq;
                                spin_unlock_irqrestore_rcu_node(sdp, flags);
                        }
        }
@@ -593,9 +595,7 @@ static void srcu_gp_end(struct srcu_struct *sp)
            ULONG_CMP_LT(gpseq, sp->srcu_gp_seq_needed)) {
                srcu_gp_start(sp);
                spin_unlock_irq_rcu_node(sp);
-               /* Throttle expedited grace periods: Should be rare! */
-               srcu_reschedule(sp, rcu_seq_ctr(gpseq) & 0x3ff
-                                   ? 0 : SRCU_INTERVAL);
+               srcu_reschedule(sp, 0);
        } else {
                spin_unlock_irq_rcu_node(sp);
        }
@@ -626,7 +626,7 @@ static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp,
                spin_unlock_irqrestore_rcu_node(snp, flags);
        }
        spin_lock_irqsave_rcu_node(sp, flags);
-       if (!ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s))
+       if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s))
                sp->srcu_gp_seq_needed_exp = s;
        spin_unlock_irqrestore_rcu_node(sp, flags);
 }
@@ -691,8 +691,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
            rcu_seq_state(sp->srcu_gp_seq) == SRCU_STATE_IDLE) {
                WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed));
                srcu_gp_start(sp);
-               queue_delayed_work(system_power_efficient_wq, &sp->work,
-                                  srcu_get_delay(sp));
+               queue_delayed_work(rcu_gp_wq, &sp->work, srcu_get_delay(sp));
        }
        spin_unlock_irqrestore_rcu_node(sp, flags);
 }
@@ -1225,7 +1224,7 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay)
        spin_unlock_irq_rcu_node(sp);
 
        if (pushgp)
-               queue_delayed_work(system_power_efficient_wq, &sp->work, delay);
+               queue_delayed_work(rcu_gp_wq, &sp->work, delay);
 }
 
 /*
index 491bdf3..2a73469 100644 (file)
@@ -1161,7 +1161,7 @@ static int rcu_is_cpu_rrupt_from_idle(void)
  */
 static void rcu_gpnum_ovf(struct rcu_node *rnp, struct rcu_data *rdp)
 {
-       lockdep_assert_held(&rnp->lock);
+       raw_lockdep_assert_held_rcu_node(rnp);
        if (ULONG_CMP_LT(READ_ONCE(rdp->gpnum) + ULONG_MAX / 4, rnp->gpnum))
                WRITE_ONCE(rdp->gpwrap, true);
        if (ULONG_CMP_LT(rdp->rcu_iw_gpnum + ULONG_MAX / 4, rnp->gpnum))
@@ -1350,6 +1350,7 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
                       rsp->gp_kthread ? rsp->gp_kthread->state : ~0,
                       rsp->gp_kthread ? task_cpu(rsp->gp_kthread) : -1);
                if (rsp->gp_kthread) {
+                       pr_err("RCU grace-period kthread stack dump:\n");
                        sched_show_task(rsp->gp_kthread);
                        wake_up_process(rsp->gp_kthread);
                }
@@ -1628,7 +1629,7 @@ void rcu_cpu_stall_reset(void)
 static unsigned long rcu_cbs_completed(struct rcu_state *rsp,
                                       struct rcu_node *rnp)
 {
-       lockdep_assert_held(&rnp->lock);
+       raw_lockdep_assert_held_rcu_node(rnp);
 
        /*
         * If RCU is idle, we just wait for the next grace period.
@@ -1675,7 +1676,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
        bool ret = false;
        struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
 
-       lockdep_assert_held(&rnp->lock);
+       raw_lockdep_assert_held_rcu_node(rnp);
 
        /*
         * Pick up grace-period number for new callbacks.  If this
@@ -1803,7 +1804,7 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
 {
        bool ret = false;
 
-       lockdep_assert_held(&rnp->lock);
+       raw_lockdep_assert_held_rcu_node(rnp);
 
        /* If no pending (not yet ready to invoke) callbacks, nothing to do. */
        if (!rcu_segcblist_pend_cbs(&rdp->cblist))
@@ -1843,7 +1844,7 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
 static bool rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
                            struct rcu_data *rdp)
 {
-       lockdep_assert_held(&rnp->lock);
+       raw_lockdep_assert_held_rcu_node(rnp);
 
        /* If no pending (not yet ready to invoke) callbacks, nothing to do. */
        if (!rcu_segcblist_pend_cbs(&rdp->cblist))
@@ -1871,7 +1872,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
        bool ret;
        bool need_gp;
 
-       lockdep_assert_held(&rnp->lock);
+       raw_lockdep_assert_held_rcu_node(rnp);
 
        /* Handle the ends of any preceding grace periods first. */
        if (rdp->completed == rnp->completed &&
@@ -2296,7 +2297,7 @@ static bool
 rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
                      struct rcu_data *rdp)
 {
-       lockdep_assert_held(&rnp->lock);
+       raw_lockdep_assert_held_rcu_node(rnp);
        if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) {
                /*
                 * Either we have not yet spawned the grace-period
@@ -2358,7 +2359,7 @@ static bool rcu_start_gp(struct rcu_state *rsp)
 static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
        __releases(rcu_get_root(rsp)->lock)
 {
-       lockdep_assert_held(&rcu_get_root(rsp)->lock);
+       raw_lockdep_assert_held_rcu_node(rcu_get_root(rsp));
        WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
        WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
        raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags);
@@ -2383,7 +2384,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
        unsigned long oldmask = 0;
        struct rcu_node *rnp_c;
 
-       lockdep_assert_held(&rnp->lock);
+       raw_lockdep_assert_held_rcu_node(rnp);
 
        /* Walk up the rcu_node hierarchy. */
        for (;;) {
@@ -2447,7 +2448,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_state *rsp,
        unsigned long mask;
        struct rcu_node *rnp_p;
 
-       lockdep_assert_held(&rnp->lock);
+       raw_lockdep_assert_held_rcu_node(rnp);
        if (rcu_state_p == &rcu_sched_state || rsp != rcu_state_p ||
            rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
                raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
@@ -2592,7 +2593,7 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
        long mask;
        struct rcu_node *rnp = rnp_leaf;
 
-       lockdep_assert_held(&rnp->lock);
+       raw_lockdep_assert_held_rcu_node(rnp);
        if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) ||
            rnp->qsmaskinit || rcu_preempt_has_tasks(rnp))
                return;
@@ -2691,7 +2692,6 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
        /* Update counts and requeue any remaining callbacks. */
        rcu_segcblist_insert_done_cbs(&rdp->cblist, &rcl);
        smp_mb(); /* List handling before counting for rcu_barrier(). */
-       rdp->n_cbs_invoked += count;
        rcu_segcblist_insert_count(&rdp->cblist, &rcl);
 
        /* Reinstate batch limit if we have worked down the excess. */
@@ -2845,10 +2845,8 @@ static void force_quiescent_state(struct rcu_state *rsp)
                      !raw_spin_trylock(&rnp->fqslock);
                if (rnp_old != NULL)
                        raw_spin_unlock(&rnp_old->fqslock);
-               if (ret) {
-                       rsp->n_force_qs_lh++;
+               if (ret)
                        return;
-               }
                rnp_old = rnp;
        }
        /* rnp_old == rcu_get_root(rsp), rnp == NULL. */
@@ -2857,7 +2855,6 @@ static void force_quiescent_state(struct rcu_state *rsp)
        raw_spin_lock_irqsave_rcu_node(rnp_old, flags);
        raw_spin_unlock(&rnp_old->fqslock);
        if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
-               rsp->n_force_qs_lh++;
                raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags);
                return;  /* Someone beat us to it. */
        }
@@ -3355,8 +3352,6 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
 {
        struct rcu_node *rnp = rdp->mynode;
 
-       rdp->n_rcu_pending++;
-
        /* Check for CPU stalls, if enabled. */
        check_cpu_stall(rsp, rdp);
 
@@ -3365,48 +3360,31 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
                return 0;
 
        /* Is the RCU core waiting for a quiescent state from this CPU? */
-       if (rcu_scheduler_fully_active &&
-           rdp->core_needs_qs && rdp->cpu_no_qs.b.norm &&
-           rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_dynticks.rcu_qs_ctr)) {
-               rdp->n_rp_core_needs_qs++;
-       } else if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm) {
-               rdp->n_rp_report_qs++;
+       if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm)
                return 1;
-       }
 
        /* Does this CPU have callbacks ready to invoke? */
-       if (rcu_segcblist_ready_cbs(&rdp->cblist)) {
-               rdp->n_rp_cb_ready++;
+       if (rcu_segcblist_ready_cbs(&rdp->cblist))
                return 1;
-       }
 
        /* Has RCU gone idle with this CPU needing another grace period? */
-       if (cpu_needs_another_gp(rsp, rdp)) {
-               rdp->n_rp_cpu_needs_gp++;
+       if (cpu_needs_another_gp(rsp, rdp))
                return 1;
-       }
 
        /* Has another RCU grace period completed?  */
-       if (READ_ONCE(rnp->completed) != rdp->completed) { /* outside lock */
-               rdp->n_rp_gp_completed++;
+       if (READ_ONCE(rnp->completed) != rdp->completed) /* outside lock */
                return 1;
-       }
 
        /* Has a new RCU grace period started? */
        if (READ_ONCE(rnp->gpnum) != rdp->gpnum ||
-           unlikely(READ_ONCE(rdp->gpwrap))) { /* outside lock */
-               rdp->n_rp_gp_started++;
+           unlikely(READ_ONCE(rdp->gpwrap))) /* outside lock */
                return 1;
-       }
 
        /* Does this CPU need a deferred NOCB wakeup? */
-       if (rcu_nocb_need_deferred_wakeup(rdp)) {
-               rdp->n_rp_nocb_defer_wakeup++;
+       if (rcu_nocb_need_deferred_wakeup(rdp))
                return 1;
-       }
 
        /* nothing to do */
-       rdp->n_rp_need_nothing++;
        return 0;
 }
 
@@ -3618,7 +3596,7 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf)
        long mask;
        struct rcu_node *rnp = rnp_leaf;
 
-       lockdep_assert_held(&rnp->lock);
+       raw_lockdep_assert_held_rcu_node(rnp);
        for (;;) {
                mask = rnp->grpmask;
                rnp = rnp->parent;
@@ -3636,12 +3614,9 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf)
 static void __init
 rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
 {
-       unsigned long flags;
        struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
-       struct rcu_node *rnp = rcu_get_root(rsp);
 
        /* Set up local state, ensuring consistent view of global state. */
-       raw_spin_lock_irqsave_rcu_node(rnp, flags);
        rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu);
        rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
        WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != 1);
@@ -3649,7 +3624,6 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
        rdp->cpu = cpu;
        rdp->rsp = rsp;
        rcu_boot_init_nocb_percpu_data(rdp);
-       raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 }
 
 /*
@@ -4193,6 +4167,8 @@ static void __init rcu_dump_rcu_node_tree(struct rcu_state *rsp)
        pr_cont("\n");
 }
 
+struct workqueue_struct *rcu_gp_wq;
+
 void __init rcu_init(void)
 {
        int cpu;
@@ -4219,6 +4195,10 @@ void __init rcu_init(void)
                rcu_cpu_starting(cpu);
                rcutree_online_cpu(cpu);
        }
+
+       /* Create workqueue for expedited GPs and for Tree SRCU. */
+       rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0);
+       WARN_ON(!rcu_gp_wq);
 }
 
 #include "tree_exp.h"
index 6488a3b..f491ab4 100644 (file)
@@ -146,12 +146,6 @@ struct rcu_node {
                                /*  boosting for this rcu_node structure. */
        unsigned int boost_kthread_status;
                                /* State of boost_kthread_task for tracing. */
-       unsigned long n_tasks_boosted;
-                               /* Total number of tasks boosted. */
-       unsigned long n_exp_boosts;
-                               /* Number of tasks boosted for expedited GP. */
-       unsigned long n_normal_boosts;
-                               /* Number of tasks boosted for normal GP. */
 #ifdef CONFIG_RCU_NOCB_CPU
        struct swait_queue_head nocb_gp_wq[2];
                                /* Place for rcu_nocb_kthread() to wait GP. */
@@ -184,13 +178,6 @@ union rcu_noqs {
        u16 s; /* Set of bits, aggregate OR here. */
 };
 
-/* Index values for nxttail array in struct rcu_data. */
-#define RCU_DONE_TAIL          0       /* Also RCU_WAIT head. */
-#define RCU_WAIT_TAIL          1       /* Also RCU_NEXT_READY head. */
-#define RCU_NEXT_READY_TAIL    2       /* Also RCU_NEXT head. */
-#define RCU_NEXT_TAIL          3
-#define RCU_NEXT_SIZE          4
-
 /* Per-CPU data for read-copy update. */
 struct rcu_data {
        /* 1) quiescent-state and grace-period handling : */
@@ -217,8 +204,6 @@ struct rcu_data {
                                        /* different grace periods. */
        long            qlen_last_fqs_check;
                                        /* qlen at last check for QS forcing */
-       unsigned long   n_cbs_invoked;  /* count of RCU cbs invoked. */
-       unsigned long   n_nocbs_invoked; /* count of no-CBs RCU cbs invoked. */
        unsigned long   n_force_qs_snap;
                                        /* did other CPU force QS recently? */
        long            blimit;         /* Upper limit on a processed batch */
@@ -234,18 +219,7 @@ struct rcu_data {
                                        /* Grace period that needs help */
                                        /*  from cond_resched(). */
 
-       /* 5) __rcu_pending() statistics. */
-       unsigned long n_rcu_pending;    /* rcu_pending() calls since boot. */
-       unsigned long n_rp_core_needs_qs;
-       unsigned long n_rp_report_qs;
-       unsigned long n_rp_cb_ready;
-       unsigned long n_rp_cpu_needs_gp;
-       unsigned long n_rp_gp_completed;
-       unsigned long n_rp_gp_started;
-       unsigned long n_rp_nocb_defer_wakeup;
-       unsigned long n_rp_need_nothing;
-
-       /* 6) _rcu_barrier(), OOM callbacks, and expediting. */
+       /* 5) _rcu_barrier(), OOM callbacks, and expediting. */
        struct rcu_head barrier_head;
 #ifdef CONFIG_RCU_FAST_NO_HZ
        struct rcu_head oom_head;
@@ -256,7 +230,7 @@ struct rcu_data {
        atomic_long_t exp_workdone3;    /* # done by others #3. */
        int exp_dynticks_snap;          /* Double-check need for IPI. */
 
-       /* 7) Callback offloading. */
+       /* 6) Callback offloading. */
 #ifdef CONFIG_RCU_NOCB_CPU
        struct rcu_head *nocb_head;     /* CBs waiting for kthread. */
        struct rcu_head **nocb_tail;
@@ -283,7 +257,7 @@ struct rcu_data {
                                        /* Leader CPU takes GP-end wakeups. */
 #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
 
-       /* 8) RCU CPU stall data. */
+       /* 7) RCU CPU stall data. */
        unsigned int softirq_snap;      /* Snapshot of softirq activity. */
        /* ->rcu_iw* fields protected by leaf rcu_node ->lock. */
        struct irq_work rcu_iw;         /* Check for non-irq activity. */
@@ -374,10 +348,6 @@ struct rcu_state {
                                                /*  kthreads, if configured. */
        unsigned long n_force_qs;               /* Number of calls to */
                                                /*  force_quiescent_state(). */
-       unsigned long n_force_qs_lh;            /* ~Number of calls leaving */
-                                               /*  due to lock unavailable. */
-       unsigned long n_force_qs_ngp;           /* Number of calls leaving */
-                                               /*  due to no GP active. */
        unsigned long gp_start;                 /* Time at which GP started, */
                                                /*  but in jiffies. */
        unsigned long gp_activity;              /* Time of last GP kthread */
index 46d61b5..f72eefa 100644 (file)
@@ -28,6 +28,15 @@ static void rcu_exp_gp_seq_start(struct rcu_state *rsp)
        rcu_seq_start(&rsp->expedited_sequence);
 }
 
+/*
+ * Return then value that expedited-grace-period counter will have
+ * at the end of the current grace period.
+ */
+static __maybe_unused unsigned long rcu_exp_gp_seq_endval(struct rcu_state *rsp)
+{
+       return rcu_seq_endval(&rsp->expedited_sequence);
+}
+
 /*
  * Record the end of an expedited grace period.
  */
@@ -366,21 +375,30 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
        int ret;
        struct rcu_node *rnp;
 
+       trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("reset"));
        sync_exp_reset_tree(rsp);
+       trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("select"));
        rcu_for_each_leaf_node(rsp, rnp) {
                raw_spin_lock_irqsave_rcu_node(rnp, flags);
 
                /* Each pass checks a CPU for identity, offline, and idle. */
                mask_ofl_test = 0;
-               for_each_leaf_node_possible_cpu(rnp, cpu) {
+               for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
+                       unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
                        struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
+                       struct rcu_dynticks *rdtp = per_cpu_ptr(&rcu_dynticks, cpu);
+                       int snap;
 
-                       rdp->exp_dynticks_snap =
-                               rcu_dynticks_snap(rdp->dynticks);
                        if (raw_smp_processor_id() == cpu ||
-                           rcu_dynticks_in_eqs(rdp->exp_dynticks_snap) ||
-                           !(rnp->qsmaskinitnext & rdp->grpmask))
-                               mask_ofl_test |= rdp->grpmask;
+                           !(rnp->qsmaskinitnext & mask)) {
+                               mask_ofl_test |= mask;
+                       } else {
+                               snap = rcu_dynticks_snap(rdtp);
+                               if (rcu_dynticks_in_eqs(snap))
+                                       mask_ofl_test |= mask;
+                               else
+                                       rdp->exp_dynticks_snap = snap;
+                       }
                }
                mask_ofl_ipi = rnp->expmask & ~mask_ofl_test;
 
@@ -394,7 +412,7 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
                raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 
                /* IPI the remaining CPUs for expedited quiescent state. */
-               for_each_leaf_node_possible_cpu(rnp, cpu) {
+               for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
                        unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
                        struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
 
@@ -417,6 +435,7 @@ retry_ipi:
                            (rnp->expmask & mask)) {
                                /* Online, so delay for a bit and try again. */
                                raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+                               trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("selectofl"));
                                schedule_timeout_uninterruptible(1);
                                goto retry_ipi;
                        }
@@ -443,6 +462,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
        struct rcu_node *rnp_root = rcu_get_root(rsp);
        int ret;
 
+       trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("startwait"));
        jiffies_stall = rcu_jiffies_till_stall_check();
        jiffies_start = jiffies;
 
@@ -606,7 +626,7 @@ static void _synchronize_rcu_expedited(struct rcu_state *rsp,
                rew.rew_rsp = rsp;
                rew.rew_s = s;
                INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp);
-               schedule_work(&rew.rew_work);
+               queue_work(rcu_gp_wq, &rew.rew_work);
        }
 
        /* Wait for expedited grace period to complete. */
index fb88a02..84fbee4 100644 (file)
@@ -180,7 +180,7 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
                         (rnp->expmask & rdp->grpmask ? RCU_EXP_BLKD : 0);
        struct task_struct *t = current;
 
-       lockdep_assert_held(&rnp->lock);
+       raw_lockdep_assert_held_rcu_node(rnp);
        WARN_ON_ONCE(rdp->mynode != rnp);
        WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1);
 
@@ -560,8 +560,14 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
        }
        t = list_entry(rnp->gp_tasks->prev,
                       struct task_struct, rcu_node_entry);
-       list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
+       list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
+               /*
+                * We could be printing a lot while holding a spinlock.
+                * Avoid triggering hard lockup.
+                */
+               touch_nmi_watchdog();
                sched_show_task(t);
+       }
        raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 }
 
@@ -957,14 +963,10 @@ static int rcu_boost(struct rcu_node *rnp)
         * expedited grace period must boost all blocked tasks, including
         * those blocking the pre-existing normal grace period.
         */
-       if (rnp->exp_tasks != NULL) {
+       if (rnp->exp_tasks != NULL)
                tb = rnp->exp_tasks;
-               rnp->n_exp_boosts++;
-       } else {
+       else
                tb = rnp->boost_tasks;
-               rnp->n_normal_boosts++;
-       }
-       rnp->n_tasks_boosted++;
 
        /*
         * We boost task t by manufacturing an rt_mutex that appears to
@@ -1042,7 +1044,7 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
 {
        struct task_struct *t;
 
-       lockdep_assert_held(&rnp->lock);
+       raw_lockdep_assert_held_rcu_node(rnp);
        if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {
                raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
                return;
@@ -1677,6 +1679,12 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
        char *ticks_title;
        unsigned long ticks_value;
 
+       /*
+        * We could be printing a lot while holding a spinlock.  Avoid
+        * triggering hard lockup.
+        */
+       touch_nmi_watchdog();
+
        if (rsp->gpnum == rdp->gpnum) {
                ticks_title = "ticks this GP";
                ticks_value = rdp->ticks_this_gp;
@@ -2235,7 +2243,6 @@ static int rcu_nocb_kthread(void *arg)
                smp_mb__before_atomic();  /* _add after CB invocation. */
                atomic_long_add(-c, &rdp->nocb_q_count);
                atomic_long_add(-cl, &rdp->nocb_q_count_lazy);
-               rdp->n_nocbs_invoked += c;
        }
        return 0;
 }
@@ -2312,8 +2319,11 @@ void __init rcu_init_nohz(void)
                cpumask_and(rcu_nocb_mask, cpu_possible_mask,
                            rcu_nocb_mask);
        }
-       pr_info("\tOffload RCU callbacks from CPUs: %*pbl.\n",
-               cpumask_pr_args(rcu_nocb_mask));
+       if (cpumask_empty(rcu_nocb_mask))
+               pr_info("\tOffload RCU callbacks from CPUs: (none).\n");
+       else
+               pr_info("\tOffload RCU callbacks from CPUs: %*pbl.\n",
+                       cpumask_pr_args(rcu_nocb_mask));
        if (rcu_nocb_poll)
                pr_info("\tPoll for callbacks from no-CBs CPUs.\n");
 
index f6b5f19..78eabc4 100644 (file)
@@ -113,16 +113,6 @@ config NO_HZ_FULL
 
 endchoice
 
-config NO_HZ_FULL_ALL
-       bool "Full dynticks system on all CPUs by default (except CPU 0)"
-       depends on NO_HZ_FULL
-       help
-         If the user doesn't pass the nohz_full boot option to
-        define the range of full dynticks CPUs, consider that all
-        CPUs in the system are full dynticks by default.
-        Note the boot CPU will still be kept outside the range to
-        handle the timekeeping duty.
-
 config NO_HZ
        bool "Old Idle dynticks config"
        depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
index 29a5733..ccd3782 100644 (file)
@@ -405,30 +405,12 @@ static int tick_nohz_cpu_down(unsigned int cpu)
        return 0;
 }
 
-static int tick_nohz_init_all(void)
-{
-       int err = -1;
-
-#ifdef CONFIG_NO_HZ_FULL_ALL
-       if (!alloc_cpumask_var(&tick_nohz_full_mask, GFP_KERNEL)) {
-               WARN(1, "NO_HZ: Can't allocate full dynticks cpumask\n");
-               return err;
-       }
-       err = 0;
-       cpumask_setall(tick_nohz_full_mask);
-       tick_nohz_full_running = true;
-#endif
-       return err;
-}
-
 void __init tick_nohz_init(void)
 {
        int cpu, ret;
 
-       if (!tick_nohz_full_running) {
-               if (tick_nohz_init_all() < 0)
-                       return;
-       }
+       if (!tick_nohz_full_running)
+               return;
 
        /*
         * Full dynticks uses irq work to drive the tick rescheduling on safe
index 07a1377..65f6655 100644 (file)
@@ -136,6 +136,9 @@ identify_boot_image () {
                qemu-system-x86_64|qemu-system-i386)
                        echo arch/x86/boot/bzImage
                        ;;
+               qemu-system-aarch64)
+                       echo arch/arm64/boot/Image
+                       ;;
                *)
                        echo vmlinux
                        ;;
@@ -158,6 +161,9 @@ identify_qemu () {
        elif echo $u | grep -q "Intel 80386"
        then
                echo qemu-system-i386
+       elif echo $u | grep -q aarch64
+       then
+               echo qemu-system-aarch64
        elif uname -a | grep -q ppc64
        then
                echo qemu-system-ppc64
@@ -176,16 +182,20 @@ identify_qemu () {
 # Output arguments for the qemu "-append" string based on CPU type
 # and the TORTURE_QEMU_INTERACTIVE environment variable.
 identify_qemu_append () {
+       local console=ttyS0
        case "$1" in
        qemu-system-x86_64|qemu-system-i386)
                echo noapic selinux=0 initcall_debug debug
                ;;
+       qemu-system-aarch64)
+               console=ttyAMA0
+               ;;
        esac
        if test -n "$TORTURE_QEMU_INTERACTIVE"
        then
                echo root=/dev/sda
        else
-               echo console=ttyS0
+               echo console=$console
        fi
 }
 
@@ -197,6 +207,9 @@ identify_qemu_args () {
        case "$1" in
        qemu-system-x86_64|qemu-system-i386)
                ;;
+       qemu-system-aarch64)
+               echo -machine virt,gic-version=host -cpu host
+               ;;
        qemu-system-ppc64)
                echo -enable-kvm -M pseries -nodefaults
                echo -device spapr-vscsi
@@ -254,7 +267,7 @@ specify_qemu_cpus () {
                echo $2
        else
                case "$1" in
-               qemu-system-x86_64|qemu-system-i386)
+               qemu-system-x86_64|qemu-system-i386|qemu-system-aarch64)
                        echo $2 -smp $3
                        ;;
                qemu-system-ppc64)
index 963f712..8948f79 100755 (executable)
@@ -39,30 +39,31 @@ sed -e 's/us : / : /' |
 tr -d '\015' |
 awk '
 $8 == "start" {
-       if (starttask != "")
+       if (startseq != "")
                nlost++;
        starttask = $1;
        starttime = $3;
        startseq = $7;
+       seqtask[startseq] = starttask;
 }
 
 $8 == "end" {
-       if (starttask == $1 && startseq == $7) {
+       if (startseq == $7) {
                curgpdur = $3 - starttime;
                gptimes[++n] = curgpdur;
                gptaskcnt[starttask]++;
                sum += curgpdur;
                if (curgpdur > 1000)
                        print "Long GP " starttime "us to " $3 "us (" curgpdur "us)";
-               starttask = "";
+               startseq = "";
        } else {
                # Lost a message or some such, reset.
-               starttask = "";
+               startseq = "";
                nlost++;
        }
 }
 
-$8 == "done" {
+$8 == "done" && seqtask[$7] != $1 {
        piggybackcnt[$1]++;
 }
 
index 1b78a12..5f8fbb0 100755 (executable)
@@ -177,8 +177,8 @@ then
        exit 0
 fi
 echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log
-echo $QEMU $qemu_args -m 512 -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd
-( $QEMU $qemu_args -m 512 -kernel $KERNEL -append "$qemu_append $boot_args"& echo $! > $resdir/qemu_pid; wait `cat  $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
+echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd
+( $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append "$qemu_append $boot_args"& echo $! > $resdir/qemu_pid; wait `cat  $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
 commandcompleted=0
 sleep 10 # Give qemu's pid a chance to reach the file
 if test -s "$resdir/qemu_pid"
index 7d1f607..56610db 100755 (executable)
@@ -1,10 +1,8 @@
 #!/bin/bash
 #
-# Run a series of 14 tests under KVM.  These are not particularly
-# well-selected or well-tuned, but are the current set.
-#
-# Edit the definitions below to set the locations of the various directories,
-# as well as the test duration.
+# Run a series of tests under KVM.  By default, this series is specified
+# by the relevant CFLIST file, but can be overridden by the --configs
+# command-line argument.
 #
 # Usage: kvm.sh [ options ]
 #
@@ -44,6 +42,7 @@ TORTURE_BOOT_IMAGE=""
 TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD
 TORTURE_KCONFIG_ARG=""
 TORTURE_KMAKE_ARG=""
+TORTURE_QEMU_MEM=512
 TORTURE_SHUTDOWN_GRACE=180
 TORTURE_SUITE=rcu
 resdir=""
@@ -70,6 +69,7 @@ usage () {
        echo "       --kconfig Kconfig-options"
        echo "       --kmake-arg kernel-make-arguments"
        echo "       --mac nn:nn:nn:nn:nn:nn"
+       echo "       --memory megabytes | nnnG"
        echo "       --no-initrd"
        echo "       --qemu-args qemu-arguments"
        echo "       --qemu-cmd qemu-system-..."
@@ -147,6 +147,11 @@ do
                TORTURE_QEMU_MAC=$2
                shift
                ;;
+       --memory)
+               checkarg --memory "(memory size)" $# "$2" '^[0-9]\+[MG]\?$' error
+               TORTURE_QEMU_MEM=$2
+               shift
+               ;;
        --no-initrd)
                TORTURE_INITRD=""; export TORTURE_INITRD
                ;;
@@ -174,6 +179,12 @@ do
                checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuperf\)$' '^--'
                TORTURE_SUITE=$2
                shift
+               if test "$TORTURE_SUITE" = rcuperf
+               then
+                       # If you really want jitter for rcuperf, specify
+                       # it after specifying rcuperf.  (But why?)
+                       jitter=0
+               fi
                ;;
        *)
                echo Unknown argument $1
@@ -288,6 +299,7 @@ TORTURE_KMAKE_ARG="$TORTURE_KMAKE_ARG"; export TORTURE_KMAKE_ARG
 TORTURE_QEMU_CMD="$TORTURE_QEMU_CMD"; export TORTURE_QEMU_CMD
 TORTURE_QEMU_INTERACTIVE="$TORTURE_QEMU_INTERACTIVE"; export TORTURE_QEMU_INTERACTIVE
 TORTURE_QEMU_MAC="$TORTURE_QEMU_MAC"; export TORTURE_QEMU_MAC
+TORTURE_QEMU_MEM="$TORTURE_QEMU_MEM"; export TORTURE_QEMU_MEM
 TORTURE_SHUTDOWN_GRACE="$TORTURE_SHUTDOWN_GRACE"; export TORTURE_SHUTDOWN_GRACE
 TORTURE_SUITE="$TORTURE_SUITE"; export TORTURE_SUITE
 if ! test -e $resdir
index c70c51d..28568b7 100644 (file)
@@ -9,5 +9,4 @@ CONFIG_PREEMPT=y
 CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=n
 CONFIG_NO_HZ_FULL=y
-CONFIG_NO_HZ_FULL_ALL=y
 #CHECK#CONFIG_RCU_EXPERT=n
index 27d2269..24c9f60 100644 (file)
@@ -7,7 +7,6 @@ CONFIG_PREEMPT=n
 CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=n
 CONFIG_NO_HZ_FULL=y
-CONFIG_NO_HZ_FULL_ALL=y
 CONFIG_RCU_FAST_NO_HZ=y
 CONFIG_RCU_TRACE=y
 CONFIG_HOTPLUG_CPU=n
index e34c334..e6071bb 100644 (file)
@@ -1 +1 @@
-rcutorture.torture_type=rcu_bh rcutree.rcu_fanout_leaf=4
+rcutorture.torture_type=rcu_bh rcutree.rcu_fanout_leaf=4 nohz_full=1-7
index 0f4759f..d7afb27 100644 (file)
@@ -7,7 +7,6 @@ CONFIG_PREEMPT=n
 CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=n
 CONFIG_NO_HZ_FULL=y
-CONFIG_NO_HZ_FULL_ALL=n
 CONFIG_RCU_FAST_NO_HZ=n
 CONFIG_RCU_TRACE=y
 CONFIG_HOTPLUG_CPU=y
index b960311..d36b8fd 100644 (file)
 #
 # Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
 
-# rcuperf_param_nreaders bootparam-string
-#
-# Adds nreaders rcuperf module parameter if not already specified.
-rcuperf_param_nreaders () {
-       if ! echo "$1" | grep -q "rcuperf.nreaders"
-       then
-               echo rcuperf.nreaders=-1
-       fi
-}
-
-# rcuperf_param_nwriters bootparam-string
-#
-# Adds nwriters rcuperf module parameter if not already specified.
-rcuperf_param_nwriters () {
-       if ! echo "$1" | grep -q "rcuperf.nwriters"
-       then
-               echo rcuperf.nwriters=-1
-       fi
-}
-
 # per_version_boot_params bootparam-string config-file seconds
 #
 # Adds per-version torture-module parameters to kernels supporting them.
 per_version_boot_params () {
-       echo $1 `rcuperf_param_nreaders "$1"` \
-               `rcuperf_param_nwriters "$1"` \
-               rcuperf.shutdown=1 \
+       echo $1 rcuperf.shutdown=1 \
                rcuperf.verbose=1
 }
index 66efb59..449cf57 100644 (file)
@@ -1,4 +1,4 @@
-This document describes one way to created the rcu-test-image file
+This document describes one way to create the rcu-test-image file
 that contains the filesystem used by the guest-OS kernel.  There are
 probably much better ways of doing this, and this filesystem could no
 doubt be smaller.  It is probably also possible to simply download