softirq: Allow to drop the softirq-BKL lock on PREEMPT_RT
authorSebastian Andrzej Siewior <bigeasy@linutronix.de>
Thu, 4 Sep 2025 14:25:25 +0000 (16:25 +0200)
committerThomas Gleixner <tglx@linutronix.de>
Wed, 17 Sep 2025 14:25:41 +0000 (16:25 +0200)
softirqs are preemptible on PREEMPT_RT. There is synchronisation between
individual sections which disable bottom halves. This in turn means that
a forced threaded interrupt cannot preempt another forced threaded
interrupt. Instead it will PI-boost the other handler and wait for its
completion.

This is required because code within a softirq section is assumed to be
non-preemptible and may expect exclusive access to per-CPU resources
such as variables or pinned timers.

Code with such expectation has been identified and updated to use
local_lock_nested_bh() for locking of the per-CPU resource. This means the
softirq lock can be removed.

Disable the softirq synchronization, but add a new config switch
CONFIG_PREEMPT_RT_NEEDS_BH_LOCK which allows to re-enable the synchronized
behavior in case that there are issues, which haven't been detected yet.

The softirq_ctrl.cnt accounting remains to let the NOHZ code know if
softirqs are currently handled.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
kernel/Kconfig.preempt
kernel/softirq.c

index 54ea59f..da32680 100644 (file)
@@ -103,6 +103,19 @@ config PREEMPT_RT
          Select this if you are building a kernel for systems which
          require real-time guarantees.
 
+config PREEMPT_RT_NEEDS_BH_LOCK
+       bool "Enforce softirq synchronisation on PREEMPT_RT"
+       depends on PREEMPT_RT
+       help
+         Enforce synchronisation across the softirqs context. On PREEMPT_RT
+         the softirq is preemptible. This enforces the same per-CPU BLK
+         semantic non-PREEMPT_RT builds have. This should not be needed
+         because per-CPU locks were added to avoid the per-CPU BKL.
+
+         This switch provides the old behaviour for testing reasons. Select
+         this if you suspect an error with preemptible softirq and want test
+         the old synchronized behaviour.
+
 config PREEMPT_COUNT
        bool
 
index 4e2c980..7719891 100644 (file)
@@ -165,7 +165,11 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
        /* First entry of a task into a BH disabled section? */
        if (!current->softirq_disable_cnt) {
                if (preemptible()) {
-                       local_lock(&softirq_ctrl.lock);
+                       if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK))
+                               local_lock(&softirq_ctrl.lock);
+                       else
+                               migrate_disable();
+
                        /* Required to meet the RCU bottomhalf requirements. */
                        rcu_read_lock();
                } else {
@@ -177,17 +181,34 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
         * Track the per CPU softirq disabled state. On RT this is per CPU
         * state to allow preemption of bottom half disabled sections.
         */
-       newcnt = __this_cpu_add_return(softirq_ctrl.cnt, cnt);
-       /*
-        * Reflect the result in the task state to prevent recursion on the
-        * local lock and to make softirq_count() & al work.
-        */
-       current->softirq_disable_cnt = newcnt;
+       if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK)) {
+               newcnt = this_cpu_add_return(softirq_ctrl.cnt, cnt);
+               /*
+                * Reflect the result in the task state to prevent recursion on the
+                * local lock and to make softirq_count() & al work.
+                */
+               current->softirq_disable_cnt = newcnt;
 
-       if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && newcnt == cnt) {
-               raw_local_irq_save(flags);
-               lockdep_softirqs_off(ip);
-               raw_local_irq_restore(flags);
+               if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && newcnt == cnt) {
+                       raw_local_irq_save(flags);
+                       lockdep_softirqs_off(ip);
+                       raw_local_irq_restore(flags);
+               }
+       } else {
+               bool sirq_dis = false;
+
+               if (!current->softirq_disable_cnt)
+                       sirq_dis = true;
+
+               this_cpu_add(softirq_ctrl.cnt, cnt);
+               current->softirq_disable_cnt += cnt;
+               WARN_ON_ONCE(current->softirq_disable_cnt < 0);
+
+               if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && sirq_dis) {
+                       raw_local_irq_save(flags);
+                       lockdep_softirqs_off(ip);
+                       raw_local_irq_restore(flags);
+               }
        }
 }
 EXPORT_SYMBOL(__local_bh_disable_ip);
@@ -195,23 +216,42 @@ EXPORT_SYMBOL(__local_bh_disable_ip);
 static void __local_bh_enable(unsigned int cnt, bool unlock)
 {
        unsigned long flags;
+       bool sirq_en = false;
        int newcnt;
 
-       DEBUG_LOCKS_WARN_ON(current->softirq_disable_cnt !=
-                           this_cpu_read(softirq_ctrl.cnt));
+       if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK)) {
+               DEBUG_LOCKS_WARN_ON(current->softirq_disable_cnt !=
+                                   this_cpu_read(softirq_ctrl.cnt));
+               if (softirq_count() == cnt)
+                       sirq_en = true;
+       } else {
+               if (current->softirq_disable_cnt == cnt)
+                       sirq_en = true;
+       }
 
-       if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && softirq_count() == cnt) {
+       if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && sirq_en) {
                raw_local_irq_save(flags);
                lockdep_softirqs_on(_RET_IP_);
                raw_local_irq_restore(flags);
        }
 
-       newcnt = __this_cpu_sub_return(softirq_ctrl.cnt, cnt);
-       current->softirq_disable_cnt = newcnt;
+       if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK)) {
+               newcnt = this_cpu_sub_return(softirq_ctrl.cnt, cnt);
+               current->softirq_disable_cnt = newcnt;
 
-       if (!newcnt && unlock) {
-               rcu_read_unlock();
-               local_unlock(&softirq_ctrl.lock);
+               if (!newcnt && unlock) {
+                       rcu_read_unlock();
+                       local_unlock(&softirq_ctrl.lock);
+               }
+       } else {
+               current->softirq_disable_cnt -= cnt;
+               this_cpu_sub(softirq_ctrl.cnt, cnt);
+               if (unlock && !current->softirq_disable_cnt) {
+                       migrate_enable();
+                       rcu_read_unlock();
+               } else {
+                       WARN_ON_ONCE(current->softirq_disable_cnt < 0);
+               }
        }
 }
 
@@ -228,7 +268,10 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
        lock_map_release(&bh_lock_map);
 
        local_irq_save(flags);
-       curcnt = __this_cpu_read(softirq_ctrl.cnt);
+       if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK))
+               curcnt = this_cpu_read(softirq_ctrl.cnt);
+       else
+               curcnt = current->softirq_disable_cnt;
 
        /*
         * If this is not reenabling soft interrupts, no point in trying to