Merge branches 'cpuinfo.2020.11.06a', 'doc.2020.11.06a', 'fixes.2020.11.19b', 'lockde...

[linux-2.6-microblaze.git] / kernel / rcu / tree.c
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c

index 50d90ee..516c689 100644 (file)
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -177,7 +177,7 @@ module_param(rcu_unlock_delay, int, 0444);
   * per-CPU. Object size is equal to one page. This value
   * can be changed at boot time.
   */
-static int rcu_min_cached_objs = 2;
+static int rcu_min_cached_objs = 5;
  module_param(rcu_min_cached_objs, int, 0444);
  
  /* Retrieve RCU kthreads priority for rcutorture */
@@ -341,6 +341,14 @@ static bool rcu_dynticks_in_eqs(int snap)
         return !(snap & RCU_DYNTICK_CTRL_CTR);
  }
  
+/* Return true if the specified CPU is currently idle from an RCU viewpoint.  */
+bool rcu_is_idle_cpu(int cpu)
+{
+       struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+
+       return rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp));
+}
+
  /*
   * Return true if the CPU corresponding to the specified rcu_data
   * structure has spent some time in an extended quiescent state since
@@ -1152,7 +1160,7 @@ bool rcu_lockdep_current_cpu_online(void)
         preempt_disable_notrace();
         rdp = this_cpu_ptr(&rcu_data);
         rnp = rdp->mynode;
-       if (rdp->grpmask & rcu_rnp_online_cpus(rnp))
+       if (rdp->grpmask & rcu_rnp_online_cpus(rnp) || READ_ONCE(rnp->ofl_seq) & 0x1)
                 ret = true;
         preempt_enable_notrace();
         return ret;
@@ -1717,6 +1725,7 @@ static void rcu_strict_gp_boundary(void *unused)
   */
  static bool rcu_gp_init(void)
  {
+       unsigned long firstseq;
         unsigned long flags;
         unsigned long oldmask;
         unsigned long mask;
@@ -1760,6 +1769,12 @@ static bool rcu_gp_init(void)
          */
         rcu_state.gp_state = RCU_GP_ONOFF;
         rcu_for_each_leaf_node(rnp) {
+               smp_mb(); // Pair with barriers used when updating ->ofl_seq to odd values.
+               firstseq = READ_ONCE(rnp->ofl_seq);
+               if (firstseq & 0x1)
+                       while (firstseq == READ_ONCE(rnp->ofl_seq))
+                               schedule_timeout_idle(1);  // Can't wake unless RCU is watching.
+               smp_mb(); // Pair with barriers used when updating ->ofl_seq to even values.
                 raw_spin_lock(&rcu_state.ofl_lock);
                 raw_spin_lock_irq_rcu_node(rnp);
                 if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
@@ -3082,6 +3097,9 @@ struct kfree_rcu_cpu_work {
   *     In order to save some per-cpu space the list is singular.
   *     Even though it is lockless an access has to be protected by the
   *     per-cpu lock.
+ * @page_cache_work: A work to refill the cache when it is empty
+ * @work_in_progress: Indicates that page_cache_work is running
+ * @hrtimer: A hrtimer for scheduling a page_cache_work
   * @nr_bkv_objs: number of allocated objects at @bkvcache.
   *
   * This is a per-CPU structure.  The reason that it is not included in
@@ -3098,6 +3116,11 @@ struct kfree_rcu_cpu {
         bool monitor_todo;
         bool initialized;
         int count;
+
+       struct work_struct page_cache_work;
+       atomic_t work_in_progress;
+       struct hrtimer hrtimer;
+
         struct llist_head bkvcache;
         int nr_bkv_objs;
  };
@@ -3215,10 +3238,10 @@ static void kfree_rcu_work(struct work_struct *work)
                         }
                         rcu_lock_release(&rcu_callback_map);
  
-                       krcp = krc_this_cpu_lock(&flags);
+                       raw_spin_lock_irqsave(&krcp->lock, flags);
                         if (put_cached_bnode(krcp, bkvhead[i]))
                                 bkvhead[i] = NULL;
-                       krc_this_cpu_unlock(krcp, flags);
+                       raw_spin_unlock_irqrestore(&krcp->lock, flags);
  
                         if (bkvhead[i])
                                 free_page((unsigned long) bkvhead[i]);
@@ -3345,6 +3368,57 @@ static void kfree_rcu_monitor(struct work_struct *work)
                 raw_spin_unlock_irqrestore(&krcp->lock, flags);
  }
  
+static enum hrtimer_restart
+schedule_page_work_fn(struct hrtimer *t)
+{
+       struct kfree_rcu_cpu *krcp =
+               container_of(t, struct kfree_rcu_cpu, hrtimer);
+
+       queue_work(system_highpri_wq, &krcp->page_cache_work);
+       return HRTIMER_NORESTART;
+}
+
+static void fill_page_cache_func(struct work_struct *work)
+{
+       struct kvfree_rcu_bulk_data *bnode;
+       struct kfree_rcu_cpu *krcp =
+               container_of(work, struct kfree_rcu_cpu,
+                       page_cache_work);
+       unsigned long flags;
+       bool pushed;
+       int i;
+
+       for (i = 0; i < rcu_min_cached_objs; i++) {
+               bnode = (struct kvfree_rcu_bulk_data *)
+                       __get_free_page(GFP_KERNEL | __GFP_NOWARN);
+
+               if (bnode) {
+                       raw_spin_lock_irqsave(&krcp->lock, flags);
+                       pushed = put_cached_bnode(krcp, bnode);
+                       raw_spin_unlock_irqrestore(&krcp->lock, flags);
+
+                       if (!pushed) {
+                               free_page((unsigned long) bnode);
+                               break;
+                       }
+               }
+       }
+
+       atomic_set(&krcp->work_in_progress, 0);
+}
+
+static void
+run_page_cache_worker(struct kfree_rcu_cpu *krcp)
+{
+       if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
+                       !atomic_xchg(&krcp->work_in_progress, 1)) {
+               hrtimer_init(&krcp->hrtimer, CLOCK_MONOTONIC,
+                       HRTIMER_MODE_REL);
+               krcp->hrtimer.function = schedule_page_work_fn;
+               hrtimer_start(&krcp->hrtimer, 0, HRTIMER_MODE_REL);
+       }
+}
+
  static inline bool
  kvfree_call_rcu_add_ptr_to_bulk(struct kfree_rcu_cpu *krcp, void *ptr)
  {
@@ -3361,32 +3435,8 @@ kvfree_call_rcu_add_ptr_to_bulk(struct kfree_rcu_cpu *krcp, void *ptr)
         if (!krcp->bkvhead[idx] ||
                         krcp->bkvhead[idx]->nr_records == KVFREE_BULK_MAX_ENTR) {
                 bnode = get_cached_bnode(krcp);
-               if (!bnode) {
-                       /*
-                        * To keep this path working on raw non-preemptible
-                        * sections, prevent the optional entry into the
-                        * allocator as it uses sleeping locks. In fact, even
-                        * if the caller of kfree_rcu() is preemptible, this
-                        * path still is not, as krcp->lock is a raw spinlock.
-                        * With additional page pre-allocation in the works,
-                        * hitting this return is going to be much less likely.
-                        */
-                       if (IS_ENABLED(CONFIG_PREEMPT_RT))
-                               return false;
-
-                       /*
-                        * NOTE: For one argument of kvfree_rcu() we can
-                        * drop the lock and get the page in sleepable
-                        * context. That would allow to maintain an array
-                        * for the CONFIG_PREEMPT_RT as well if no cached
-                        * pages are available.
-                        */
-                       bnode = (struct kvfree_rcu_bulk_data *)
-                               __get_free_page(GFP_NOWAIT | __GFP_NOWARN);
-               }
-
                 /* Switch to emergency path. */
-               if (unlikely(!bnode))
+               if (!bnode)
                         return false;
  
                 /* Initialize the new block. */
@@ -3450,12 +3500,10 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
                 goto unlock_return;
         }
  
-       /*
-        * Under high memory pressure GFP_NOWAIT can fail,
-        * in that case the emergency path is maintained.
-        */
         success = kvfree_call_rcu_add_ptr_to_bulk(krcp, ptr);
         if (!success) {
+               run_page_cache_worker(krcp);
+
                 if (head == NULL)
                         // Inline if kvfree_rcu(one_arg) call.
                         goto unlock_return;
@@ -3565,7 +3613,7 @@ void __init kfree_rcu_scheduler_running(void)
   * During early boot, any blocking grace-period wait automatically
   * implies a grace period.  Later on, this is never the case for PREEMPTION.
   *
- * Howevr, because a context switch is a grace period for !PREEMPTION, any
+ * However, because a context switch is a grace period for !PREEMPTION, any
   * blocking grace-period wait automatically implies a grace period if
   * there is only one CPU online at any point time during execution of
   * either synchronize_rcu() or synchronize_rcu_expedited().  It is OK to
@@ -4069,6 +4117,9 @@ void rcu_cpu_starting(unsigned int cpu)
  
         rnp = rdp->mynode;
         mask = rdp->grpmask;
+       WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+       WARN_ON_ONCE(!(rnp->ofl_seq & 0x1));
+       smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
         raw_spin_lock_irqsave_rcu_node(rnp, flags);
         WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
         newcpu = !(rnp->expmaskinitnext & mask);
@@ -4088,6 +4139,9 @@ void rcu_cpu_starting(unsigned int cpu)
         } else {
                 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
         }
+       smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
+       WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+       WARN_ON_ONCE(rnp->ofl_seq & 0x1);
         smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
  }
  
@@ -4115,6 +4169,9 @@ void rcu_report_dead(unsigned int cpu)
  
         /* Remove outgoing CPU from mask in the leaf rcu_node structure. */
         mask = rdp->grpmask;
+       WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+       WARN_ON_ONCE(!(rnp->ofl_seq & 0x1));
+       smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
         raw_spin_lock(&rcu_state.ofl_lock);
         raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
         rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq);
@@ -4127,6 +4184,9 @@ void rcu_report_dead(unsigned int cpu)
         WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext & ~mask);
         raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
         raw_spin_unlock(&rcu_state.ofl_lock);
+       smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
+       WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+       WARN_ON_ONCE(rnp->ofl_seq & 0x1);
  
         rdp->cpu_started = false;
  }
@@ -4463,24 +4523,14 @@ static void __init kfree_rcu_batch_init(void)
  
         for_each_possible_cpu(cpu) {
                 struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
-               struct kvfree_rcu_bulk_data *bnode;
  
                 for (i = 0; i < KFREE_N_BATCHES; i++) {
                         INIT_RCU_WORK(&krcp->krw_arr[i].rcu_work, kfree_rcu_work);
                         krcp->krw_arr[i].krcp = krcp;
                 }
  
-               for (i = 0; i < rcu_min_cached_objs; i++) {
-                       bnode = (struct kvfree_rcu_bulk_data *)
-                               __get_free_page(GFP_NOWAIT | __GFP_NOWARN);
-
-                       if (bnode)
-                               put_cached_bnode(krcp, bnode);
-                       else
-                               pr_err("Failed to preallocate for %d CPU!\n", cpu);
-               }
-
                 INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor);
+               INIT_WORK(&krcp->page_cache_work, fill_page_cache_func);
                 krcp->initialized = true;
         }
         if (register_shrinker(&kfree_rcu_shrinker))