rcu/exp: Make parallel exp gp kworker per rcu node
authorFrederic Weisbecker <frederic@kernel.org>
Fri, 12 Jan 2024 15:46:19 +0000 (16:46 +0100)
committerBoqun Feng <boqun.feng@gmail.com>
Wed, 14 Feb 2024 15:51:36 +0000 (07:51 -0800)
When CONFIG_RCU_EXP_KTHREAD=n, the expedited grace period per node
initialization is performed in parallel via workqueues (one work per
node).

However in CONFIG_RCU_EXP_KTHREAD=y, this per node initialization is
performed by a single kworker serializing each node initialization (one
work for all nodes).

The second part is certainly less scalable and efficient beyond a single
leaf node.

To improve this, expand this single kworker into per-node kworkers. This
new layout is eventually intended to remove the workqueues based
implementation since it will essentially now become duplicate code.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
kernel/rcu/rcu.h
kernel/rcu/tree.c
kernel/rcu/tree.h
kernel/rcu/tree_exp.h
kernel/rcu/tree_plugin.h

index f94f658..6beaf70 100644 (file)
@@ -625,7 +625,6 @@ void rcu_force_quiescent_state(void);
 extern struct workqueue_struct *rcu_gp_wq;
 #ifdef CONFIG_RCU_EXP_KTHREAD
 extern struct kthread_worker *rcu_exp_gp_kworker;
-extern struct kthread_worker *rcu_exp_par_gp_kworker;
 #else /* !CONFIG_RCU_EXP_KTHREAD */
 extern struct workqueue_struct *rcu_par_gp_wq;
 #endif /* CONFIG_RCU_EXP_KTHREAD */
index 657ac12..398c099 100644 (file)
@@ -4396,33 +4396,39 @@ rcu_boot_init_percpu_data(int cpu)
 
 #ifdef CONFIG_RCU_EXP_KTHREAD
 struct kthread_worker *rcu_exp_gp_kworker;
-struct kthread_worker *rcu_exp_par_gp_kworker;
 
-static void __init rcu_start_exp_gp_kworkers(void)
+static void rcu_spawn_exp_par_gp_kworker(struct rcu_node *rnp)
 {
-       const char *par_gp_kworker_name = "rcu_exp_par_gp_kthread_worker";
-       const char *gp_kworker_name = "rcu_exp_gp_kthread_worker";
+       struct kthread_worker *kworker;
+       const char *name = "rcu_exp_par_gp_kthread_worker/%d";
        struct sched_param param = { .sched_priority = kthread_prio };
+       int rnp_index = rnp - rcu_get_root();
 
-       rcu_exp_gp_kworker = kthread_create_worker(0, gp_kworker_name);
-       if (IS_ERR_OR_NULL(rcu_exp_gp_kworker)) {
-               pr_err("Failed to create %s!\n", gp_kworker_name);
-               rcu_exp_gp_kworker = NULL;
+       if (rnp->exp_kworker)
+               return;
+
+       kworker = kthread_create_worker(0, name, rnp_index);
+       if (IS_ERR_OR_NULL(kworker)) {
+               pr_err("Failed to create par gp kworker on %d/%d\n",
+                      rnp->grplo, rnp->grphi);
                return;
        }
+       WRITE_ONCE(rnp->exp_kworker, kworker);
+       sched_setscheduler_nocheck(kworker->task, SCHED_FIFO, &param);
+}
 
-       rcu_exp_par_gp_kworker = kthread_create_worker(0, par_gp_kworker_name);
-       if (IS_ERR_OR_NULL(rcu_exp_par_gp_kworker)) {
-               pr_err("Failed to create %s!\n", par_gp_kworker_name);
-               rcu_exp_par_gp_kworker = NULL;
-               kthread_destroy_worker(rcu_exp_gp_kworker);
+static void __init rcu_start_exp_gp_kworker(void)
+{
+       const char *name = "rcu_exp_gp_kthread_worker";
+       struct sched_param param = { .sched_priority = kthread_prio };
+
+       rcu_exp_gp_kworker = kthread_create_worker(0, name);
+       if (IS_ERR_OR_NULL(rcu_exp_gp_kworker)) {
+               pr_err("Failed to create %s!\n", name);
                rcu_exp_gp_kworker = NULL;
                return;
        }
-
        sched_setscheduler_nocheck(rcu_exp_gp_kworker->task, SCHED_FIFO, &param);
-       sched_setscheduler_nocheck(rcu_exp_par_gp_kworker->task, SCHED_FIFO,
-                                  &param);
 }
 
 static inline void rcu_alloc_par_gp_wq(void)
@@ -4431,7 +4437,11 @@ static inline void rcu_alloc_par_gp_wq(void)
 #else /* !CONFIG_RCU_EXP_KTHREAD */
 struct workqueue_struct *rcu_par_gp_wq;
 
-static void __init rcu_start_exp_gp_kworkers(void)
+static void rcu_spawn_exp_par_gp_kworker(struct rcu_node *rnp)
+{
+}
+
+static void __init rcu_start_exp_gp_kworker(void)
 {
 }
 
@@ -4442,6 +4452,17 @@ static inline void rcu_alloc_par_gp_wq(void)
 }
 #endif /* CONFIG_RCU_EXP_KTHREAD */
 
+static void rcu_spawn_rnp_kthreads(struct rcu_node *rnp)
+{
+       if ((IS_ENABLED(CONFIG_RCU_EXP_KTHREAD) ||
+            IS_ENABLED(CONFIG_RCU_BOOST)) && rcu_scheduler_fully_active) {
+               mutex_lock(&rnp->kthread_mutex);
+               rcu_spawn_one_boost_kthread(rnp);
+               rcu_spawn_exp_par_gp_kworker(rnp);
+               mutex_unlock(&rnp->kthread_mutex);
+       }
+}
+
 /*
  * Invoked early in the CPU-online process, when pretty much all services
  * are available.  The incoming CPU is not present.
@@ -4490,7 +4511,7 @@ int rcutree_prepare_cpu(unsigned int cpu)
        rdp->rcu_iw_gp_seq = rdp->gp_seq - 1;
        trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuonl"));
        raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-       rcu_spawn_one_boost_kthread(rnp);
+       rcu_spawn_rnp_kthreads(rnp);
        rcu_spawn_cpu_nocb_kthread(cpu);
        WRITE_ONCE(rcu_state.n_online_cpus, rcu_state.n_online_cpus + 1);
 
@@ -4812,10 +4833,10 @@ static int __init rcu_spawn_gp_kthread(void)
         * due to rcu_scheduler_fully_active.
         */
        rcu_spawn_cpu_nocb_kthread(smp_processor_id());
-       rcu_spawn_one_boost_kthread(rdp->mynode);
+       rcu_spawn_rnp_kthreads(rdp->mynode);
        rcu_spawn_core_kthreads();
        /* Create kthread worker for expedited GPs */
-       rcu_start_exp_gp_kworkers();
+       rcu_start_exp_gp_kworker();
        return 0;
 }
 early_initcall(rcu_spawn_gp_kthread);
index 13e7b0d..e173808 100644 (file)
@@ -72,6 +72,9 @@ struct rcu_node {
                                /* Online CPUs for next expedited GP. */
                                /*  Any CPU that has ever been online will */
                                /*  have its bit set. */
+       struct kthread_worker *exp_kworker;
+                               /* Workers performing per node expedited GP */
+                               /* initialization. */
        unsigned long cbovldmask;
                                /* CPUs experiencing callback overload. */
        unsigned long ffmask;   /* Fully functional CPUs. */
index 6123a60..0318a8a 100644 (file)
@@ -432,9 +432,9 @@ static inline bool rcu_exp_worker_started(void)
        return !!READ_ONCE(rcu_exp_gp_kworker);
 }
 
-static inline bool rcu_exp_par_worker_started(void)
+static inline bool rcu_exp_par_worker_started(struct rcu_node *rnp)
 {
-       return !!READ_ONCE(rcu_exp_par_gp_kworker);
+       return !!READ_ONCE(rnp->exp_kworker);
 }
 
 static inline void sync_rcu_exp_select_cpus_queue_work(struct rcu_node *rnp)
@@ -445,7 +445,7 @@ static inline void sync_rcu_exp_select_cpus_queue_work(struct rcu_node *rnp)
         * another work item on the same kthread worker can result in
         * deadlock.
         */
-       kthread_queue_work(rcu_exp_par_gp_kworker, &rnp->rew.rew_work);
+       kthread_queue_work(READ_ONCE(rnp->exp_kworker), &rnp->rew.rew_work);
 }
 
 static inline void sync_rcu_exp_select_cpus_flush_work(struct rcu_node *rnp)
@@ -487,7 +487,7 @@ static inline bool rcu_exp_worker_started(void)
        return !!READ_ONCE(rcu_gp_wq);
 }
 
-static inline bool rcu_exp_par_worker_started(void)
+static inline bool rcu_exp_par_worker_started(struct rcu_node *rnp)
 {
        return !!READ_ONCE(rcu_par_gp_wq);
 }
@@ -550,7 +550,7 @@ static void sync_rcu_exp_select_cpus(void)
                rnp->exp_need_flush = false;
                if (!READ_ONCE(rnp->expmask))
                        continue; /* Avoid early boot non-existent wq. */
-               if (!rcu_exp_par_worker_started() ||
+               if (!rcu_exp_par_worker_started(rnp) ||
                    rcu_scheduler_active != RCU_SCHEDULER_RUNNING ||
                    rcu_is_last_leaf_node(rnp)) {
                        /* No worker started yet or last leaf, do direct call. */
index 0d30767..09bdd36 100644 (file)
@@ -1195,14 +1195,13 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
        struct sched_param sp;
        struct task_struct *t;
 
-       mutex_lock(&rnp->kthread_mutex);
-       if (rnp->boost_kthread_task || !rcu_scheduler_fully_active)
-               goto out;
+       if (rnp->boost_kthread_task)
+               return;
 
        t = kthread_create(rcu_boost_kthread, (void *)rnp,
                           "rcub/%d", rnp_index);
        if (WARN_ON_ONCE(IS_ERR(t)))
-               goto out;
+               return;
 
        raw_spin_lock_irqsave_rcu_node(rnp, flags);
        rnp->boost_kthread_task = t;
@@ -1210,9 +1209,6 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
        sp.sched_priority = kthread_prio;
        sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
        wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */
-
- out:
-       mutex_unlock(&rnp->kthread_mutex);
 }
 
 /*