Merge branches 'cpuinfo.2020.11.06a', 'doc.2020.11.06a', 'fixes.2020.11.19b', 'lockde...
[linux-2.6-microblaze.git] / kernel / rcu / tree.c
index 50d90ee..516c689 100644 (file)
@@ -177,7 +177,7 @@ module_param(rcu_unlock_delay, int, 0444);
  * per-CPU. Object size is equal to one page. This value
  * can be changed at boot time.
  */
-static int rcu_min_cached_objs = 2;
+static int rcu_min_cached_objs = 5;
 module_param(rcu_min_cached_objs, int, 0444);
 
 /* Retrieve RCU kthreads priority for rcutorture */
@@ -341,6 +341,14 @@ static bool rcu_dynticks_in_eqs(int snap)
        return !(snap & RCU_DYNTICK_CTRL_CTR);
 }
 
+/* Return true if the specified CPU is currently idle from an RCU viewpoint.  */
+bool rcu_is_idle_cpu(int cpu)
+{
+       struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+
+       return rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp));
+}
+
 /*
  * Return true if the CPU corresponding to the specified rcu_data
  * structure has spent some time in an extended quiescent state since
@@ -1152,7 +1160,7 @@ bool rcu_lockdep_current_cpu_online(void)
        preempt_disable_notrace();
        rdp = this_cpu_ptr(&rcu_data);
        rnp = rdp->mynode;
-       if (rdp->grpmask & rcu_rnp_online_cpus(rnp))
+       if (rdp->grpmask & rcu_rnp_online_cpus(rnp) || READ_ONCE(rnp->ofl_seq) & 0x1)
                ret = true;
        preempt_enable_notrace();
        return ret;
@@ -1717,6 +1725,7 @@ static void rcu_strict_gp_boundary(void *unused)
  */
 static bool rcu_gp_init(void)
 {
+       unsigned long firstseq;
        unsigned long flags;
        unsigned long oldmask;
        unsigned long mask;
@@ -1760,6 +1769,12 @@ static bool rcu_gp_init(void)
         */
        rcu_state.gp_state = RCU_GP_ONOFF;
        rcu_for_each_leaf_node(rnp) {
+               smp_mb(); // Pair with barriers used when updating ->ofl_seq to odd values.
+               firstseq = READ_ONCE(rnp->ofl_seq);
+               if (firstseq & 0x1)
+                       while (firstseq == READ_ONCE(rnp->ofl_seq))
+                               schedule_timeout_idle(1);  // Can't wake unless RCU is watching.
+               smp_mb(); // Pair with barriers used when updating ->ofl_seq to even values.
                raw_spin_lock(&rcu_state.ofl_lock);
                raw_spin_lock_irq_rcu_node(rnp);
                if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
@@ -3082,6 +3097,9 @@ struct kfree_rcu_cpu_work {
  *     In order to save some per-cpu space the list is singular.
  *     Even though it is lockless an access has to be protected by the
  *     per-cpu lock.
+ * @page_cache_work: A work to refill the cache when it is empty
+ * @work_in_progress: Indicates that page_cache_work is running
+ * @hrtimer: A hrtimer for scheduling a page_cache_work
  * @nr_bkv_objs: number of allocated objects at @bkvcache.
  *
  * This is a per-CPU structure.  The reason that it is not included in
@@ -3098,6 +3116,11 @@ struct kfree_rcu_cpu {
        bool monitor_todo;
        bool initialized;
        int count;
+
+       struct work_struct page_cache_work;
+       atomic_t work_in_progress;
+       struct hrtimer hrtimer;
+
        struct llist_head bkvcache;
        int nr_bkv_objs;
 };
@@ -3215,10 +3238,10 @@ static void kfree_rcu_work(struct work_struct *work)
                        }
                        rcu_lock_release(&rcu_callback_map);
 
-                       krcp = krc_this_cpu_lock(&flags);
+                       raw_spin_lock_irqsave(&krcp->lock, flags);
                        if (put_cached_bnode(krcp, bkvhead[i]))
                                bkvhead[i] = NULL;
-                       krc_this_cpu_unlock(krcp, flags);
+                       raw_spin_unlock_irqrestore(&krcp->lock, flags);
 
                        if (bkvhead[i])
                                free_page((unsigned long) bkvhead[i]);
@@ -3345,6 +3368,57 @@ static void kfree_rcu_monitor(struct work_struct *work)
                raw_spin_unlock_irqrestore(&krcp->lock, flags);
 }
 
+static enum hrtimer_restart
+schedule_page_work_fn(struct hrtimer *t)
+{
+       struct kfree_rcu_cpu *krcp =
+               container_of(t, struct kfree_rcu_cpu, hrtimer);
+
+       queue_work(system_highpri_wq, &krcp->page_cache_work);
+       return HRTIMER_NORESTART;
+}
+
+static void fill_page_cache_func(struct work_struct *work)
+{
+       struct kvfree_rcu_bulk_data *bnode;
+       struct kfree_rcu_cpu *krcp =
+               container_of(work, struct kfree_rcu_cpu,
+                       page_cache_work);
+       unsigned long flags;
+       bool pushed;
+       int i;
+
+       for (i = 0; i < rcu_min_cached_objs; i++) {
+               bnode = (struct kvfree_rcu_bulk_data *)
+                       __get_free_page(GFP_KERNEL | __GFP_NOWARN);
+
+               if (bnode) {
+                       raw_spin_lock_irqsave(&krcp->lock, flags);
+                       pushed = put_cached_bnode(krcp, bnode);
+                       raw_spin_unlock_irqrestore(&krcp->lock, flags);
+
+                       if (!pushed) {
+                               free_page((unsigned long) bnode);
+                               break;
+                       }
+               }
+       }
+
+       atomic_set(&krcp->work_in_progress, 0);
+}
+
+static void
+run_page_cache_worker(struct kfree_rcu_cpu *krcp)
+{
+       if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
+                       !atomic_xchg(&krcp->work_in_progress, 1)) {
+               hrtimer_init(&krcp->hrtimer, CLOCK_MONOTONIC,
+                       HRTIMER_MODE_REL);
+               krcp->hrtimer.function = schedule_page_work_fn;
+               hrtimer_start(&krcp->hrtimer, 0, HRTIMER_MODE_REL);
+       }
+}
+
 static inline bool
 kvfree_call_rcu_add_ptr_to_bulk(struct kfree_rcu_cpu *krcp, void *ptr)
 {
@@ -3361,32 +3435,8 @@ kvfree_call_rcu_add_ptr_to_bulk(struct kfree_rcu_cpu *krcp, void *ptr)
        if (!krcp->bkvhead[idx] ||
                        krcp->bkvhead[idx]->nr_records == KVFREE_BULK_MAX_ENTR) {
                bnode = get_cached_bnode(krcp);
-               if (!bnode) {
-                       /*
-                        * To keep this path working on raw non-preemptible
-                        * sections, prevent the optional entry into the
-                        * allocator as it uses sleeping locks. In fact, even
-                        * if the caller of kfree_rcu() is preemptible, this
-                        * path still is not, as krcp->lock is a raw spinlock.
-                        * With additional page pre-allocation in the works,
-                        * hitting this return is going to be much less likely.
-                        */
-                       if (IS_ENABLED(CONFIG_PREEMPT_RT))
-                               return false;
-
-                       /*
-                        * NOTE: For one argument of kvfree_rcu() we can
-                        * drop the lock and get the page in sleepable
-                        * context. That would allow to maintain an array
-                        * for the CONFIG_PREEMPT_RT as well if no cached
-                        * pages are available.
-                        */
-                       bnode = (struct kvfree_rcu_bulk_data *)
-                               __get_free_page(GFP_NOWAIT | __GFP_NOWARN);
-               }
-
                /* Switch to emergency path. */
-               if (unlikely(!bnode))
+               if (!bnode)
                        return false;
 
                /* Initialize the new block. */
@@ -3450,12 +3500,10 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
                goto unlock_return;
        }
 
-       /*
-        * Under high memory pressure GFP_NOWAIT can fail,
-        * in that case the emergency path is maintained.
-        */
        success = kvfree_call_rcu_add_ptr_to_bulk(krcp, ptr);
        if (!success) {
+               run_page_cache_worker(krcp);
+
                if (head == NULL)
                        // Inline if kvfree_rcu(one_arg) call.
                        goto unlock_return;
@@ -3565,7 +3613,7 @@ void __init kfree_rcu_scheduler_running(void)
  * During early boot, any blocking grace-period wait automatically
  * implies a grace period.  Later on, this is never the case for PREEMPTION.
  *
- * Howevr, because a context switch is a grace period for !PREEMPTION, any
+ * However, because a context switch is a grace period for !PREEMPTION, any
  * blocking grace-period wait automatically implies a grace period if
  * there is only one CPU online at any point time during execution of
  * either synchronize_rcu() or synchronize_rcu_expedited().  It is OK to
@@ -4069,6 +4117,9 @@ void rcu_cpu_starting(unsigned int cpu)
 
        rnp = rdp->mynode;
        mask = rdp->grpmask;
+       WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+       WARN_ON_ONCE(!(rnp->ofl_seq & 0x1));
+       smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
        raw_spin_lock_irqsave_rcu_node(rnp, flags);
        WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
        newcpu = !(rnp->expmaskinitnext & mask);
@@ -4088,6 +4139,9 @@ void rcu_cpu_starting(unsigned int cpu)
        } else {
                raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
        }
+       smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
+       WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+       WARN_ON_ONCE(rnp->ofl_seq & 0x1);
        smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
 }
 
@@ -4115,6 +4169,9 @@ void rcu_report_dead(unsigned int cpu)
 
        /* Remove outgoing CPU from mask in the leaf rcu_node structure. */
        mask = rdp->grpmask;
+       WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+       WARN_ON_ONCE(!(rnp->ofl_seq & 0x1));
+       smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
        raw_spin_lock(&rcu_state.ofl_lock);
        raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
        rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq);
@@ -4127,6 +4184,9 @@ void rcu_report_dead(unsigned int cpu)
        WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext & ~mask);
        raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
        raw_spin_unlock(&rcu_state.ofl_lock);
+       smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
+       WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+       WARN_ON_ONCE(rnp->ofl_seq & 0x1);
 
        rdp->cpu_started = false;
 }
@@ -4463,24 +4523,14 @@ static void __init kfree_rcu_batch_init(void)
 
        for_each_possible_cpu(cpu) {
                struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
-               struct kvfree_rcu_bulk_data *bnode;
 
                for (i = 0; i < KFREE_N_BATCHES; i++) {
                        INIT_RCU_WORK(&krcp->krw_arr[i].rcu_work, kfree_rcu_work);
                        krcp->krw_arr[i].krcp = krcp;
                }
 
-               for (i = 0; i < rcu_min_cached_objs; i++) {
-                       bnode = (struct kvfree_rcu_bulk_data *)
-                               __get_free_page(GFP_NOWAIT | __GFP_NOWARN);
-
-                       if (bnode)
-                               put_cached_bnode(krcp, bnode);
-                       else
-                               pr_err("Failed to preallocate for %d CPU!\n", cpu);
-               }
-
                INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor);
+               INIT_WORK(&krcp->page_cache_work, fill_page_cache_func);
                krcp->initialized = true;
        }
        if (register_shrinker(&kfree_rcu_shrinker))