Merge tag 'sched-core-2020-12-14' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-microblaze.git] / kernel / rcu / tree.c
index ed4941f..40e5e3d 100644 (file)
@@ -177,7 +177,7 @@ module_param(rcu_unlock_delay, int, 0444);
  * per-CPU. Object size is equal to one page. This value
  * can be changed at boot time.
  */
-static int rcu_min_cached_objs = 2;
+static int rcu_min_cached_objs = 5;
 module_param(rcu_min_cached_objs, int, 0444);
 
 /* Retrieve RCU kthreads priority for rcutorture */
@@ -341,6 +341,14 @@ static bool rcu_dynticks_in_eqs(int snap)
        return !(snap & RCU_DYNTICK_CTRL_CTR);
 }
 
+/* Return true if the specified CPU is currently idle from an RCU viewpoint.  */
+bool rcu_is_idle_cpu(int cpu)
+{
+       struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+
+       return rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp));
+}
+
 /*
  * Return true if the CPU corresponding to the specified rcu_data
  * structure has spent some time in an extended quiescent state since
@@ -546,12 +554,12 @@ static int param_set_next_fqs_jiffies(const char *val, const struct kernel_param
        return ret;
 }
 
-static struct kernel_param_ops first_fqs_jiffies_ops = {
+static const struct kernel_param_ops first_fqs_jiffies_ops = {
        .set = param_set_first_fqs_jiffies,
        .get = param_get_ulong,
 };
 
-static struct kernel_param_ops next_fqs_jiffies_ops = {
+static const struct kernel_param_ops next_fqs_jiffies_ops = {
        .set = param_set_next_fqs_jiffies,
        .get = param_get_ulong,
 };
@@ -928,8 +936,8 @@ void __rcu_irq_enter_check_tick(void)
 {
        struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
 
-        // Enabling the tick is unsafe in NMI handlers.
-       if (WARN_ON_ONCE(in_nmi()))
+       // If we're here from NMI there's nothing to do.
+       if (in_nmi())
                return;
 
        RCU_LOCKDEP_WARN(rcu_dynticks_curr_cpu_in_eqs(),
@@ -1093,8 +1101,11 @@ static void rcu_disable_urgency_upon_qs(struct rcu_data *rdp)
  * CPU can safely enter RCU read-side critical sections.  In other words,
  * if the current CPU is not in its idle loop or is in an interrupt or
  * NMI handler, return true.
+ *
+ * Make notrace because it can be called by the internal functions of
+ * ftrace, and making this notrace removes unnecessary recursion calls.
  */
-bool rcu_is_watching(void)
+notrace bool rcu_is_watching(void)
 {
        bool ret;
 
@@ -1149,7 +1160,7 @@ bool rcu_lockdep_current_cpu_online(void)
        preempt_disable_notrace();
        rdp = this_cpu_ptr(&rcu_data);
        rnp = rdp->mynode;
-       if (rdp->grpmask & rcu_rnp_online_cpus(rnp))
+       if (rdp->grpmask & rcu_rnp_online_cpus(rnp) || READ_ONCE(rnp->ofl_seq) & 0x1)
                ret = true;
        preempt_enable_notrace();
        return ret;
@@ -1601,8 +1612,7 @@ static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp)
 {
        bool ret = false;
        bool need_qs;
-       const bool offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
-                              rcu_segcblist_is_offloaded(&rdp->cblist);
+       const bool offloaded = rcu_segcblist_is_offloaded(&rdp->cblist);
 
        raw_lockdep_assert_held_rcu_node(rnp);
 
@@ -1713,6 +1723,7 @@ static void rcu_strict_gp_boundary(void *unused)
  */
 static bool rcu_gp_init(void)
 {
+       unsigned long firstseq;
        unsigned long flags;
        unsigned long oldmask;
        unsigned long mask;
@@ -1756,6 +1767,12 @@ static bool rcu_gp_init(void)
         */
        rcu_state.gp_state = RCU_GP_ONOFF;
        rcu_for_each_leaf_node(rnp) {
+               smp_mb(); // Pair with barriers used when updating ->ofl_seq to odd values.
+               firstseq = READ_ONCE(rnp->ofl_seq);
+               if (firstseq & 0x1)
+                       while (firstseq == READ_ONCE(rnp->ofl_seq))
+                               schedule_timeout_idle(1);  // Can't wake unless RCU is watching.
+               smp_mb(); // Pair with barriers used when updating ->ofl_seq to even values.
                raw_spin_lock(&rcu_state.ofl_lock);
                raw_spin_lock_irq_rcu_node(rnp);
                if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
@@ -2046,8 +2063,7 @@ static void rcu_gp_cleanup(void)
                needgp = true;
        }
        /* Advance CBs to reduce false positives below. */
-       offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
-                   rcu_segcblist_is_offloaded(&rdp->cblist);
+       offloaded = rcu_segcblist_is_offloaded(&rdp->cblist);
        if ((offloaded || !rcu_accelerate_cbs(rnp, rdp)) && needgp) {
                WRITE_ONCE(rcu_state.gp_flags, RCU_GP_FLAG_INIT);
                WRITE_ONCE(rcu_state.gp_req_activity, jiffies);
@@ -2246,8 +2262,7 @@ rcu_report_qs_rdp(struct rcu_data *rdp)
        unsigned long flags;
        unsigned long mask;
        bool needwake = false;
-       const bool offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
-                              rcu_segcblist_is_offloaded(&rdp->cblist);
+       const bool offloaded = rcu_segcblist_is_offloaded(&rdp->cblist);
        struct rcu_node *rnp;
 
        WARN_ON_ONCE(rdp->cpu != smp_processor_id());
@@ -2397,6 +2412,7 @@ int rcutree_dead_cpu(unsigned int cpu)
        if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
                return 0;
 
+       WRITE_ONCE(rcu_state.n_online_cpus, rcu_state.n_online_cpus - 1);
        /* Adjust any no-longer-needed kthreads. */
        rcu_boost_kthread_setaffinity(rnp, -1);
        /* Do any needed no-CB deferred wakeups from this CPU. */
@@ -2415,8 +2431,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
 {
        int div;
        unsigned long flags;
-       const bool offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
-                              rcu_segcblist_is_offloaded(&rdp->cblist);
+       const bool offloaded = rcu_segcblist_is_offloaded(&rdp->cblist);
        struct rcu_head *rhp;
        struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl);
        long bl, count;
@@ -2673,8 +2688,7 @@ static __latent_entropy void rcu_core(void)
        unsigned long flags;
        struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
        struct rcu_node *rnp = rdp->mynode;
-       const bool offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
-                              rcu_segcblist_is_offloaded(&rdp->cblist);
+       const bool offloaded = rcu_segcblist_is_offloaded(&rdp->cblist);
 
        if (cpu_is_offline(smp_processor_id()))
                return;
@@ -2976,8 +2990,7 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func)
                                   rcu_segcblist_n_cbs(&rdp->cblist));
 
        /* Go handle any RCU core processing required. */
-       if (IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
-           unlikely(rcu_segcblist_is_offloaded(&rdp->cblist))) {
+       if (unlikely(rcu_segcblist_is_offloaded(&rdp->cblist))) {
                __call_rcu_nocb_wake(rdp, was_alldone, flags); /* unlocks */
        } else {
                __call_rcu_core(rdp, head, flags);
@@ -3082,6 +3095,9 @@ struct kfree_rcu_cpu_work {
  *     In order to save some per-cpu space the list is singular.
  *     Even though it is lockless an access has to be protected by the
  *     per-cpu lock.
+ * @page_cache_work: A work to refill the cache when it is empty
+ * @work_in_progress: Indicates that page_cache_work is running
+ * @hrtimer: A hrtimer for scheduling a page_cache_work
  * @nr_bkv_objs: number of allocated objects at @bkvcache.
  *
  * This is a per-CPU structure.  The reason that it is not included in
@@ -3098,6 +3114,11 @@ struct kfree_rcu_cpu {
        bool monitor_todo;
        bool initialized;
        int count;
+
+       struct work_struct page_cache_work;
+       atomic_t work_in_progress;
+       struct hrtimer hrtimer;
+
        struct llist_head bkvcache;
        int nr_bkv_objs;
 };
@@ -3215,10 +3236,10 @@ static void kfree_rcu_work(struct work_struct *work)
                        }
                        rcu_lock_release(&rcu_callback_map);
 
-                       krcp = krc_this_cpu_lock(&flags);
+                       raw_spin_lock_irqsave(&krcp->lock, flags);
                        if (put_cached_bnode(krcp, bkvhead[i]))
                                bkvhead[i] = NULL;
-                       krc_this_cpu_unlock(krcp, flags);
+                       raw_spin_unlock_irqrestore(&krcp->lock, flags);
 
                        if (bkvhead[i])
                                free_page((unsigned long) bkvhead[i]);
@@ -3345,6 +3366,57 @@ static void kfree_rcu_monitor(struct work_struct *work)
                raw_spin_unlock_irqrestore(&krcp->lock, flags);
 }
 
+static enum hrtimer_restart
+schedule_page_work_fn(struct hrtimer *t)
+{
+       struct kfree_rcu_cpu *krcp =
+               container_of(t, struct kfree_rcu_cpu, hrtimer);
+
+       queue_work(system_highpri_wq, &krcp->page_cache_work);
+       return HRTIMER_NORESTART;
+}
+
+static void fill_page_cache_func(struct work_struct *work)
+{
+       struct kvfree_rcu_bulk_data *bnode;
+       struct kfree_rcu_cpu *krcp =
+               container_of(work, struct kfree_rcu_cpu,
+                       page_cache_work);
+       unsigned long flags;
+       bool pushed;
+       int i;
+
+       for (i = 0; i < rcu_min_cached_objs; i++) {
+               bnode = (struct kvfree_rcu_bulk_data *)
+                       __get_free_page(GFP_KERNEL | __GFP_NOWARN);
+
+               if (bnode) {
+                       raw_spin_lock_irqsave(&krcp->lock, flags);
+                       pushed = put_cached_bnode(krcp, bnode);
+                       raw_spin_unlock_irqrestore(&krcp->lock, flags);
+
+                       if (!pushed) {
+                               free_page((unsigned long) bnode);
+                               break;
+                       }
+               }
+       }
+
+       atomic_set(&krcp->work_in_progress, 0);
+}
+
+static void
+run_page_cache_worker(struct kfree_rcu_cpu *krcp)
+{
+       if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
+                       !atomic_xchg(&krcp->work_in_progress, 1)) {
+               hrtimer_init(&krcp->hrtimer, CLOCK_MONOTONIC,
+                       HRTIMER_MODE_REL);
+               krcp->hrtimer.function = schedule_page_work_fn;
+               hrtimer_start(&krcp->hrtimer, 0, HRTIMER_MODE_REL);
+       }
+}
+
 static inline bool
 kvfree_call_rcu_add_ptr_to_bulk(struct kfree_rcu_cpu *krcp, void *ptr)
 {
@@ -3361,32 +3433,8 @@ kvfree_call_rcu_add_ptr_to_bulk(struct kfree_rcu_cpu *krcp, void *ptr)
        if (!krcp->bkvhead[idx] ||
                        krcp->bkvhead[idx]->nr_records == KVFREE_BULK_MAX_ENTR) {
                bnode = get_cached_bnode(krcp);
-               if (!bnode) {
-                       /*
-                        * To keep this path working on raw non-preemptible
-                        * sections, prevent the optional entry into the
-                        * allocator as it uses sleeping locks. In fact, even
-                        * if the caller of kfree_rcu() is preemptible, this
-                        * path still is not, as krcp->lock is a raw spinlock.
-                        * With additional page pre-allocation in the works,
-                        * hitting this return is going to be much less likely.
-                        */
-                       if (IS_ENABLED(CONFIG_PREEMPT_RT))
-                               return false;
-
-                       /*
-                        * NOTE: For one argument of kvfree_rcu() we can
-                        * drop the lock and get the page in sleepable
-                        * context. That would allow to maintain an array
-                        * for the CONFIG_PREEMPT_RT as well if no cached
-                        * pages are available.
-                        */
-                       bnode = (struct kvfree_rcu_bulk_data *)
-                               __get_free_page(GFP_NOWAIT | __GFP_NOWARN);
-               }
-
                /* Switch to emergency path. */
-               if (unlikely(!bnode))
+               if (!bnode)
                        return false;
 
                /* Initialize the new block. */
@@ -3450,12 +3498,10 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
                goto unlock_return;
        }
 
-       /*
-        * Under high memory pressure GFP_NOWAIT can fail,
-        * in that case the emergency path is maintained.
-        */
        success = kvfree_call_rcu_add_ptr_to_bulk(krcp, ptr);
        if (!success) {
+               run_page_cache_worker(krcp);
+
                if (head == NULL)
                        // Inline if kvfree_rcu(one_arg) call.
                        goto unlock_return;
@@ -3565,7 +3611,7 @@ void __init kfree_rcu_scheduler_running(void)
  * During early boot, any blocking grace-period wait automatically
  * implies a grace period.  Later on, this is never the case for PREEMPTION.
  *
- * Howevr, because a context switch is a grace period for !PREEMPTION, any
+ * However, because a context switch is a grace period for !PREEMPTION, any
  * blocking grace-period wait automatically implies a grace period if
  * there is only one CPU online at any point time during execution of
  * either synchronize_rcu() or synchronize_rcu_expedited().  It is OK to
@@ -3581,7 +3627,20 @@ static int rcu_blocking_is_gp(void)
                return rcu_scheduler_active == RCU_SCHEDULER_INACTIVE;
        might_sleep();  /* Check for RCU read-side critical section. */
        preempt_disable();
-       ret = num_online_cpus() <= 1;
+       /*
+        * If the rcu_state.n_online_cpus counter is equal to one,
+        * there is only one CPU, and that CPU sees all prior accesses
+        * made by any CPU that was online at the time of its access.
+        * Furthermore, if this counter is equal to one, its value cannot
+        * change until after the preempt_enable() below.
+        *
+        * Furthermore, if rcu_state.n_online_cpus is equal to one here,
+        * all later CPUs (both this one and any that come online later
+        * on) are guaranteed to see all accesses prior to this point
+        * in the code, without the need for additional memory barriers.
+        * Those memory barriers are provided by CPU-hotplug code.
+        */
+       ret = READ_ONCE(rcu_state.n_online_cpus) <= 1;
        preempt_enable();
        return ret;
 }
@@ -3626,7 +3685,7 @@ void synchronize_rcu(void)
                         lock_is_held(&rcu_sched_lock_map),
                         "Illegal synchronize_rcu() in RCU read-side critical section");
        if (rcu_blocking_is_gp())
-               return;
+               return;  // Context allows vacuous grace periods.
        if (rcu_gp_is_expedited())
                synchronize_rcu_expedited();
        else
@@ -3705,13 +3764,13 @@ static int rcu_pending(int user)
                return 1;
 
        /* Does this CPU have callbacks ready to invoke? */
-       if (rcu_segcblist_ready_cbs(&rdp->cblist))
+       if (!rcu_segcblist_is_offloaded(&rdp->cblist) &&
+           rcu_segcblist_ready_cbs(&rdp->cblist))
                return 1;
 
        /* Has RCU gone idle with this CPU needing another grace period? */
        if (!gp_in_progress && rcu_segcblist_is_enabled(&rdp->cblist) &&
-           (!IS_ENABLED(CONFIG_RCU_NOCB_CPU) ||
-            !rcu_segcblist_is_offloaded(&rdp->cblist)) &&
+           !rcu_segcblist_is_offloaded(&rdp->cblist) &&
            !rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))
                return 1;
 
@@ -3968,6 +4027,7 @@ int rcutree_prepare_cpu(unsigned int cpu)
        raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
        rcu_prepare_kthreads(cpu);
        rcu_spawn_cpu_nocb_kthread(cpu);
+       WRITE_ONCE(rcu_state.n_online_cpus, rcu_state.n_online_cpus + 1);
 
        return 0;
 }
@@ -4056,6 +4116,9 @@ void rcu_cpu_starting(unsigned int cpu)
 
        rnp = rdp->mynode;
        mask = rdp->grpmask;
+       WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+       WARN_ON_ONCE(!(rnp->ofl_seq & 0x1));
+       smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
        raw_spin_lock_irqsave_rcu_node(rnp, flags);
        WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
        newcpu = !(rnp->expmaskinitnext & mask);
@@ -4066,13 +4129,18 @@ void rcu_cpu_starting(unsigned int cpu)
        rcu_gpnum_ovf(rnp, rdp); /* Offline-induced counter wrap? */
        rdp->rcu_onl_gp_seq = READ_ONCE(rcu_state.gp_seq);
        rdp->rcu_onl_gp_flags = READ_ONCE(rcu_state.gp_flags);
-       if (rnp->qsmask & mask) { /* RCU waiting on incoming CPU? */
+
+       /* An incoming CPU should never be blocking a grace period. */
+       if (WARN_ON_ONCE(rnp->qsmask & mask)) { /* RCU waiting on incoming CPU? */
                rcu_disable_urgency_upon_qs(rdp);
                /* Report QS -after- changing ->qsmaskinitnext! */
                rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
        } else {
                raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
        }
+       smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
+       WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+       WARN_ON_ONCE(rnp->ofl_seq & 0x1);
        smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
 }
 
@@ -4099,6 +4167,9 @@ void rcu_report_dead(unsigned int cpu)
 
        /* Remove outgoing CPU from mask in the leaf rcu_node structure. */
        mask = rdp->grpmask;
+       WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+       WARN_ON_ONCE(!(rnp->ofl_seq & 0x1));
+       smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
        raw_spin_lock(&rcu_state.ofl_lock);
        raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
        rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq);
@@ -4111,6 +4182,9 @@ void rcu_report_dead(unsigned int cpu)
        WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext & ~mask);
        raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
        raw_spin_unlock(&rcu_state.ofl_lock);
+       smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
+       WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+       WARN_ON_ONCE(rnp->ofl_seq & 0x1);
 
        rdp->cpu_started = false;
 }
@@ -4448,24 +4522,14 @@ static void __init kfree_rcu_batch_init(void)
 
        for_each_possible_cpu(cpu) {
                struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
-               struct kvfree_rcu_bulk_data *bnode;
 
                for (i = 0; i < KFREE_N_BATCHES; i++) {
                        INIT_RCU_WORK(&krcp->krw_arr[i].rcu_work, kfree_rcu_work);
                        krcp->krw_arr[i].krcp = krcp;
                }
 
-               for (i = 0; i < rcu_min_cached_objs; i++) {
-                       bnode = (struct kvfree_rcu_bulk_data *)
-                               __get_free_page(GFP_NOWAIT | __GFP_NOWARN);
-
-                       if (bnode)
-                               put_cached_bnode(krcp, bnode);
-                       else
-                               pr_err("Failed to preallocate for %d CPU!\n", cpu);
-               }
-
                INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor);
+               INIT_WORK(&krcp->page_cache_work, fill_page_cache_func);
                krcp->initialized = true;
        }
        if (register_shrinker(&kfree_rcu_shrinker))