Merge tag 'sched-core-2020-12-14' of git://git.kernel.org/pub/scm/linux/kernel/git...

[linux-2.6-microblaze.git] / kernel / rcu / tree.c
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c

index ed4941f..40e5e3d 100644 (file)
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -177,7 +177,7 @@ module_param(rcu_unlock_delay, int, 0444);
   * per-CPU. Object size is equal to one page. This value
   * can be changed at boot time.
   */
-static int rcu_min_cached_objs = 2;
+static int rcu_min_cached_objs = 5;
  module_param(rcu_min_cached_objs, int, 0444);
  
  /* Retrieve RCU kthreads priority for rcutorture */
@@ -341,6 +341,14 @@ static bool rcu_dynticks_in_eqs(int snap)
         return !(snap & RCU_DYNTICK_CTRL_CTR);
  }
  
+/* Return true if the specified CPU is currently idle from an RCU viewpoint.  */
+bool rcu_is_idle_cpu(int cpu)
+{
+       struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+
+       return rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp));
+}
+
  /*
   * Return true if the CPU corresponding to the specified rcu_data
   * structure has spent some time in an extended quiescent state since
@@ -546,12 +554,12 @@ static int param_set_next_fqs_jiffies(const char *val, const struct kernel_param
         return ret;
  }
  
-static struct kernel_param_ops first_fqs_jiffies_ops = {
+static const struct kernel_param_ops first_fqs_jiffies_ops = {
         .set = param_set_first_fqs_jiffies,
         .get = param_get_ulong,
  };
  
-static struct kernel_param_ops next_fqs_jiffies_ops = {
+static const struct kernel_param_ops next_fqs_jiffies_ops = {
         .set = param_set_next_fqs_jiffies,
         .get = param_get_ulong,
  };
@@ -928,8 +936,8 @@ void __rcu_irq_enter_check_tick(void)
  {
         struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
  
-        // Enabling the tick is unsafe in NMI handlers.
-       if (WARN_ON_ONCE(in_nmi()))
+       // If we're here from NMI there's nothing to do.
+       if (in_nmi())
                 return;
  
         RCU_LOCKDEP_WARN(rcu_dynticks_curr_cpu_in_eqs(),
@@ -1093,8 +1101,11 @@ static void rcu_disable_urgency_upon_qs(struct rcu_data *rdp)
   * CPU can safely enter RCU read-side critical sections.  In other words,
   * if the current CPU is not in its idle loop or is in an interrupt or
   * NMI handler, return true.
+ *
+ * Make notrace because it can be called by the internal functions of
+ * ftrace, and making this notrace removes unnecessary recursion calls.
   */
-bool rcu_is_watching(void)
+notrace bool rcu_is_watching(void)
  {
         bool ret;
  
@@ -1149,7 +1160,7 @@ bool rcu_lockdep_current_cpu_online(void)
         preempt_disable_notrace();
         rdp = this_cpu_ptr(&rcu_data);
         rnp = rdp->mynode;
-       if (rdp->grpmask & rcu_rnp_online_cpus(rnp))
+       if (rdp->grpmask & rcu_rnp_online_cpus(rnp) || READ_ONCE(rnp->ofl_seq) & 0x1)
                 ret = true;
         preempt_enable_notrace();
         return ret;
@@ -1601,8 +1612,7 @@ static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp)
  {
         bool ret = false;
         bool need_qs;
-       const bool offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
-                              rcu_segcblist_is_offloaded(&rdp->cblist);
+       const bool offloaded = rcu_segcblist_is_offloaded(&rdp->cblist);
  
         raw_lockdep_assert_held_rcu_node(rnp);
  
@@ -1713,6 +1723,7 @@ static void rcu_strict_gp_boundary(void *unused)
   */
  static bool rcu_gp_init(void)
  {
+       unsigned long firstseq;
         unsigned long flags;
         unsigned long oldmask;
         unsigned long mask;
@@ -1756,6 +1767,12 @@ static bool rcu_gp_init(void)
          */
         rcu_state.gp_state = RCU_GP_ONOFF;
         rcu_for_each_leaf_node(rnp) {
+               smp_mb(); // Pair with barriers used when updating ->ofl_seq to odd values.
+               firstseq = READ_ONCE(rnp->ofl_seq);
+               if (firstseq & 0x1)
+                       while (firstseq == READ_ONCE(rnp->ofl_seq))
+                               schedule_timeout_idle(1);  // Can't wake unless RCU is watching.
+               smp_mb(); // Pair with barriers used when updating ->ofl_seq to even values.
                 raw_spin_lock(&rcu_state.ofl_lock);
                 raw_spin_lock_irq_rcu_node(rnp);
                 if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
@@ -2046,8 +2063,7 @@ static void rcu_gp_cleanup(void)
                 needgp = true;
         }
         /* Advance CBs to reduce false positives below. */
-       offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
-                   rcu_segcblist_is_offloaded(&rdp->cblist);
+       offloaded = rcu_segcblist_is_offloaded(&rdp->cblist);
         if ((offloaded || !rcu_accelerate_cbs(rnp, rdp)) && needgp) {
                 WRITE_ONCE(rcu_state.gp_flags, RCU_GP_FLAG_INIT);
                 WRITE_ONCE(rcu_state.gp_req_activity, jiffies);
@@ -2246,8 +2262,7 @@ rcu_report_qs_rdp(struct rcu_data *rdp)
         unsigned long flags;
         unsigned long mask;
         bool needwake = false;
-       const bool offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
-                              rcu_segcblist_is_offloaded(&rdp->cblist);
+       const bool offloaded = rcu_segcblist_is_offloaded(&rdp->cblist);
         struct rcu_node *rnp;
  
         WARN_ON_ONCE(rdp->cpu != smp_processor_id());
@@ -2397,6 +2412,7 @@ int rcutree_dead_cpu(unsigned int cpu)
         if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
                 return 0;
  
+       WRITE_ONCE(rcu_state.n_online_cpus, rcu_state.n_online_cpus - 1);
         /* Adjust any no-longer-needed kthreads. */
         rcu_boost_kthread_setaffinity(rnp, -1);
         /* Do any needed no-CB deferred wakeups from this CPU. */
@@ -2415,8 +2431,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
  {
         int div;
         unsigned long flags;
-       const bool offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
-                              rcu_segcblist_is_offloaded(&rdp->cblist);
+       const bool offloaded = rcu_segcblist_is_offloaded(&rdp->cblist);
         struct rcu_head *rhp;
         struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl);
         long bl, count;
@@ -2673,8 +2688,7 @@ static __latent_entropy void rcu_core(void)
         unsigned long flags;
         struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
         struct rcu_node *rnp = rdp->mynode;
-       const bool offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
-                              rcu_segcblist_is_offloaded(&rdp->cblist);
+       const bool offloaded = rcu_segcblist_is_offloaded(&rdp->cblist);
  
         if (cpu_is_offline(smp_processor_id()))
                 return;
@@ -2976,8 +2990,7 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func)
                                    rcu_segcblist_n_cbs(&rdp->cblist));
  
         /* Go handle any RCU core processing required. */
-       if (IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
-           unlikely(rcu_segcblist_is_offloaded(&rdp->cblist))) {
+       if (unlikely(rcu_segcblist_is_offloaded(&rdp->cblist))) {
                 __call_rcu_nocb_wake(rdp, was_alldone, flags); /* unlocks */
         } else {
                 __call_rcu_core(rdp, head, flags);
@@ -3082,6 +3095,9 @@ struct kfree_rcu_cpu_work {
   *     In order to save some per-cpu space the list is singular.
   *     Even though it is lockless an access has to be protected by the
   *     per-cpu lock.
+ * @page_cache_work: A work to refill the cache when it is empty
+ * @work_in_progress: Indicates that page_cache_work is running
+ * @hrtimer: A hrtimer for scheduling a page_cache_work
   * @nr_bkv_objs: number of allocated objects at @bkvcache.
   *
   * This is a per-CPU structure.  The reason that it is not included in
@@ -3098,6 +3114,11 @@ struct kfree_rcu_cpu {
         bool monitor_todo;
         bool initialized;
         int count;
+
+       struct work_struct page_cache_work;
+       atomic_t work_in_progress;
+       struct hrtimer hrtimer;
+
         struct llist_head bkvcache;
         int nr_bkv_objs;
  };
@@ -3215,10 +3236,10 @@ static void kfree_rcu_work(struct work_struct *work)
                         }
                         rcu_lock_release(&rcu_callback_map);
  
-                       krcp = krc_this_cpu_lock(&flags);
+                       raw_spin_lock_irqsave(&krcp->lock, flags);
                         if (put_cached_bnode(krcp, bkvhead[i]))
                                 bkvhead[i] = NULL;
-                       krc_this_cpu_unlock(krcp, flags);
+                       raw_spin_unlock_irqrestore(&krcp->lock, flags);
  
                         if (bkvhead[i])
                                 free_page((unsigned long) bkvhead[i]);
@@ -3345,6 +3366,57 @@ static void kfree_rcu_monitor(struct work_struct *work)
                 raw_spin_unlock_irqrestore(&krcp->lock, flags);
  }
  
+static enum hrtimer_restart
+schedule_page_work_fn(struct hrtimer *t)
+{
+       struct kfree_rcu_cpu *krcp =
+               container_of(t, struct kfree_rcu_cpu, hrtimer);
+
+       queue_work(system_highpri_wq, &krcp->page_cache_work);
+       return HRTIMER_NORESTART;
+}
+
+static void fill_page_cache_func(struct work_struct *work)
+{
+       struct kvfree_rcu_bulk_data *bnode;
+       struct kfree_rcu_cpu *krcp =
+               container_of(work, struct kfree_rcu_cpu,
+                       page_cache_work);
+       unsigned long flags;
+       bool pushed;
+       int i;
+
+       for (i = 0; i < rcu_min_cached_objs; i++) {
+               bnode = (struct kvfree_rcu_bulk_data *)
+                       __get_free_page(GFP_KERNEL | __GFP_NOWARN);
+
+               if (bnode) {
+                       raw_spin_lock_irqsave(&krcp->lock, flags);
+                       pushed = put_cached_bnode(krcp, bnode);
+                       raw_spin_unlock_irqrestore(&krcp->lock, flags);
+
+                       if (!pushed) {
+                               free_page((unsigned long) bnode);
+                               break;
+                       }
+               }
+       }
+
+       atomic_set(&krcp->work_in_progress, 0);
+}
+
+static void
+run_page_cache_worker(struct kfree_rcu_cpu *krcp)
+{
+       if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
+                       !atomic_xchg(&krcp->work_in_progress, 1)) {
+               hrtimer_init(&krcp->hrtimer, CLOCK_MONOTONIC,
+                       HRTIMER_MODE_REL);
+               krcp->hrtimer.function = schedule_page_work_fn;
+               hrtimer_start(&krcp->hrtimer, 0, HRTIMER_MODE_REL);
+       }
+}
+
  static inline bool
  kvfree_call_rcu_add_ptr_to_bulk(struct kfree_rcu_cpu *krcp, void *ptr)
  {
@@ -3361,32 +3433,8 @@ kvfree_call_rcu_add_ptr_to_bulk(struct kfree_rcu_cpu *krcp, void *ptr)
         if (!krcp->bkvhead[idx] ||
                         krcp->bkvhead[idx]->nr_records == KVFREE_BULK_MAX_ENTR) {
                 bnode = get_cached_bnode(krcp);
-               if (!bnode) {
-                       /*
-                        * To keep this path working on raw non-preemptible
-                        * sections, prevent the optional entry into the
-                        * allocator as it uses sleeping locks. In fact, even
-                        * if the caller of kfree_rcu() is preemptible, this
-                        * path still is not, as krcp->lock is a raw spinlock.
-                        * With additional page pre-allocation in the works,
-                        * hitting this return is going to be much less likely.
-                        */
-                       if (IS_ENABLED(CONFIG_PREEMPT_RT))
-                               return false;
-
-                       /*
-                        * NOTE: For one argument of kvfree_rcu() we can
-                        * drop the lock and get the page in sleepable
-                        * context. That would allow to maintain an array
-                        * for the CONFIG_PREEMPT_RT as well if no cached
-                        * pages are available.
-                        */
-                       bnode = (struct kvfree_rcu_bulk_data *)
-                               __get_free_page(GFP_NOWAIT | __GFP_NOWARN);
-               }
-
                 /* Switch to emergency path. */
-               if (unlikely(!bnode))
+               if (!bnode)
                         return false;
  
                 /* Initialize the new block. */
@@ -3450,12 +3498,10 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
                 goto unlock_return;
         }
  
-       /*
-        * Under high memory pressure GFP_NOWAIT can fail,
-        * in that case the emergency path is maintained.
-        */
         success = kvfree_call_rcu_add_ptr_to_bulk(krcp, ptr);
         if (!success) {
+               run_page_cache_worker(krcp);
+
                 if (head == NULL)
                         // Inline if kvfree_rcu(one_arg) call.
                         goto unlock_return;
@@ -3565,7 +3611,7 @@ void __init kfree_rcu_scheduler_running(void)
   * During early boot, any blocking grace-period wait automatically
   * implies a grace period.  Later on, this is never the case for PREEMPTION.
   *
- * Howevr, because a context switch is a grace period for !PREEMPTION, any
+ * However, because a context switch is a grace period for !PREEMPTION, any
   * blocking grace-period wait automatically implies a grace period if
   * there is only one CPU online at any point time during execution of
   * either synchronize_rcu() or synchronize_rcu_expedited().  It is OK to
@@ -3581,7 +3627,20 @@ static int rcu_blocking_is_gp(void)
                 return rcu_scheduler_active == RCU_SCHEDULER_INACTIVE;
         might_sleep();  /* Check for RCU read-side critical section. */
         preempt_disable();
-       ret = num_online_cpus() <= 1;
+       /*
+        * If the rcu_state.n_online_cpus counter is equal to one,
+        * there is only one CPU, and that CPU sees all prior accesses
+        * made by any CPU that was online at the time of its access.
+        * Furthermore, if this counter is equal to one, its value cannot
+        * change until after the preempt_enable() below.
+        *
+        * Furthermore, if rcu_state.n_online_cpus is equal to one here,
+        * all later CPUs (both this one and any that come online later
+        * on) are guaranteed to see all accesses prior to this point
+        * in the code, without the need for additional memory barriers.
+        * Those memory barriers are provided by CPU-hotplug code.
+        */
+       ret = READ_ONCE(rcu_state.n_online_cpus) <= 1;
         preempt_enable();
         return ret;
  }
@@ -3626,7 +3685,7 @@ void synchronize_rcu(void)
                          lock_is_held(&rcu_sched_lock_map),
                          "Illegal synchronize_rcu() in RCU read-side critical section");
         if (rcu_blocking_is_gp())
-               return;
+               return;  // Context allows vacuous grace periods.
         if (rcu_gp_is_expedited())
                 synchronize_rcu_expedited();
         else
@@ -3705,13 +3764,13 @@ static int rcu_pending(int user)
                 return 1;
  
         /* Does this CPU have callbacks ready to invoke? */
-       if (rcu_segcblist_ready_cbs(&rdp->cblist))
+       if (!rcu_segcblist_is_offloaded(&rdp->cblist) &&
+           rcu_segcblist_ready_cbs(&rdp->cblist))
                 return 1;
  
         /* Has RCU gone idle with this CPU needing another grace period? */
         if (!gp_in_progress && rcu_segcblist_is_enabled(&rdp->cblist) &&
-           (!IS_ENABLED(CONFIG_RCU_NOCB_CPU) ||
-            !rcu_segcblist_is_offloaded(&rdp->cblist)) &&
+           !rcu_segcblist_is_offloaded(&rdp->cblist) &&
             !rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))
                 return 1;
  
@@ -3968,6 +4027,7 @@ int rcutree_prepare_cpu(unsigned int cpu)
         raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
         rcu_prepare_kthreads(cpu);
         rcu_spawn_cpu_nocb_kthread(cpu);
+       WRITE_ONCE(rcu_state.n_online_cpus, rcu_state.n_online_cpus + 1);
  
         return 0;
  }
@@ -4056,6 +4116,9 @@ void rcu_cpu_starting(unsigned int cpu)
  
         rnp = rdp->mynode;
         mask = rdp->grpmask;
+       WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+       WARN_ON_ONCE(!(rnp->ofl_seq & 0x1));
+       smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
         raw_spin_lock_irqsave_rcu_node(rnp, flags);
         WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
         newcpu = !(rnp->expmaskinitnext & mask);
@@ -4066,13 +4129,18 @@ void rcu_cpu_starting(unsigned int cpu)
         rcu_gpnum_ovf(rnp, rdp); /* Offline-induced counter wrap? */
         rdp->rcu_onl_gp_seq = READ_ONCE(rcu_state.gp_seq);
         rdp->rcu_onl_gp_flags = READ_ONCE(rcu_state.gp_flags);
-       if (rnp->qsmask & mask) { /* RCU waiting on incoming CPU? */
+
+       /* An incoming CPU should never be blocking a grace period. */
+       if (WARN_ON_ONCE(rnp->qsmask & mask)) { /* RCU waiting on incoming CPU? */
                 rcu_disable_urgency_upon_qs(rdp);
                 /* Report QS -after- changing ->qsmaskinitnext! */
                 rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
         } else {
                 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
         }
+       smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
+       WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+       WARN_ON_ONCE(rnp->ofl_seq & 0x1);
         smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
  }
  
@@ -4099,6 +4167,9 @@ void rcu_report_dead(unsigned int cpu)
  
         /* Remove outgoing CPU from mask in the leaf rcu_node structure. */
         mask = rdp->grpmask;
+       WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+       WARN_ON_ONCE(!(rnp->ofl_seq & 0x1));
+       smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
         raw_spin_lock(&rcu_state.ofl_lock);
         raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
         rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq);
@@ -4111,6 +4182,9 @@ void rcu_report_dead(unsigned int cpu)
         WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext & ~mask);
         raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
         raw_spin_unlock(&rcu_state.ofl_lock);
+       smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
+       WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+       WARN_ON_ONCE(rnp->ofl_seq & 0x1);
  
         rdp->cpu_started = false;
  }
@@ -4448,24 +4522,14 @@ static void __init kfree_rcu_batch_init(void)
  
         for_each_possible_cpu(cpu) {
                 struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
-               struct kvfree_rcu_bulk_data *bnode;
  
                 for (i = 0; i < KFREE_N_BATCHES; i++) {
                         INIT_RCU_WORK(&krcp->krw_arr[i].rcu_work, kfree_rcu_work);
                         krcp->krw_arr[i].krcp = krcp;
                 }
  
-               for (i = 0; i < rcu_min_cached_objs; i++) {
-                       bnode = (struct kvfree_rcu_bulk_data *)
-                               __get_free_page(GFP_NOWAIT | __GFP_NOWARN);
-
-                       if (bnode)
-                               put_cached_bnode(krcp, bnode);
-                       else
-                               pr_err("Failed to preallocate for %d CPU!\n", cpu);
-               }
-
                 INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor);
+               INIT_WORK(&krcp->page_cache_work, fill_page_cache_func);
                 krcp->initialized = true;
         }
         if (register_shrinker(&kfree_rcu_shrinker))