smp: Optimize send_call_function_single_ipi()
authorPeter Zijlstra <peterz@infradead.org>
Tue, 26 May 2020 16:11:01 +0000 (18:11 +0200)
committerIngo Molnar <mingo@kernel.org>
Thu, 28 May 2020 08:54:15 +0000 (10:54 +0200)
Just like the ttwu_queue_remote() IPI, make use of _TIF_POLLING_NRFLAG
to avoid sending IPIs to idle CPUs.

[ mingo: Fix UP build bug. ]

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20200526161907.953304789@infradead.org
kernel/sched/core.c
kernel/sched/idle.c
kernel/sched/sched.h
kernel/smp.c

index 2cacc1e..fa0d499 100644 (file)
@@ -2296,6 +2296,16 @@ static void wake_csd_func(void *info)
        sched_ttwu_pending();
 }
 
+void send_call_function_single_ipi(int cpu)
+{
+       struct rq *rq = cpu_rq(cpu);
+
+       if (!set_nr_if_polling(rq->idle))
+               arch_send_call_function_single_ipi(cpu);
+       else
+               trace_sched_wake_idle_without_ipi(cpu);
+}
+
 /*
  * Queue a task on the target CPUs wake_list and wake the CPU via IPI if
  * necessary. The wakee CPU on receipt of the IPI will queue the task
index b743bf3..387fd75 100644 (file)
@@ -289,6 +289,11 @@ static void do_idle(void)
         */
        smp_mb__after_atomic();
 
+       /*
+        * RCU relies on this call to be done outside of an RCU read-side
+        * critical section.
+        */
+       flush_smp_call_function_from_idle();
        sched_ttwu_pending();
        schedule_idle();
 
index 3c163cb..75b0629 100644 (file)
@@ -1506,11 +1506,12 @@ static inline void unregister_sched_domain_sysctl(void)
 }
 #endif
 
-#else
+extern void flush_smp_call_function_from_idle(void);
 
+#else /* !CONFIG_SMP: */
+static inline void flush_smp_call_function_from_idle(void) { }
 static inline void sched_ttwu_pending(void) { }
-
-#endif /* CONFIG_SMP */
+#endif
 
 #include "stats.h"
 #include "autogroup.h"
index f720e38..9f11813 100644 (file)
@@ -135,6 +135,8 @@ static __always_inline void csd_unlock(call_single_data_t *csd)
 
 static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data);
 
+extern void send_call_function_single_ipi(int cpu);
+
 /*
  * Insert a previously allocated call_single_data_t element
  * for execution on the given CPU. data must already have
@@ -178,7 +180,7 @@ static int generic_exec_single(int cpu, call_single_data_t *csd,
         * equipped to do the right thing...
         */
        if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
-               arch_send_call_function_single_ipi(cpu);
+               send_call_function_single_ipi(cpu);
 
        return 0;
 }
@@ -278,6 +280,18 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
        }
 }
 
+void flush_smp_call_function_from_idle(void)
+{
+       unsigned long flags;
+
+       if (llist_empty(this_cpu_ptr(&call_single_queue)))
+               return;
+
+       local_irq_save(flags);
+       flush_smp_call_function_queue(true);
+       local_irq_restore(flags);
+}
+
 /*
  * smp_call_function_single - Run a function on a specific CPU
  * @func: The function to run. This must be fast and non-blocking.