genirq: Add support for warning on long-running interrupt handlers
authorWladislav Wiebe <wladislav.wiebe@nokia.com>
Mon, 4 Aug 2025 09:35:25 +0000 (11:35 +0200)
committerThomas Gleixner <tglx@linutronix.de>
Wed, 3 Sep 2025 14:10:40 +0000 (16:10 +0200)
Introduce a mechanism to detect and warn about prolonged interrupt handlers.
With a new command-line parameter (irqhandler.duration_warn_us=), users can
configure the duration threshold in microseconds when a warning in such
format should be emitted:

"[CPU14] long duration of IRQ[159:bad_irq_handler [long_irq]], took: 1330 us"

The implementation uses local_clock() to measure the execution duration of the
generic IRQ per-CPU event handler.

Signed-off-by: Wladislav Wiebe <wladislav.wiebe@nokia.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Jiri Slaby <jirislaby@kernel.org>
Link: https://lore.kernel.org/all/20250804093525.851-1-wladislav.wiebe@nokia.com
Documentation/admin-guide/kernel-parameters.txt
kernel/irq/handle.c

index 747a55a..bdbc44f 100644 (file)
                        for it. Intended to get systems with badly broken
                        firmware running.
 
+       irqhandler.duration_warn_us= [KNL]
+                       Warn if an IRQ handler exceeds the specified duration
+                       threshold in microseconds. Useful for identifying
+                       long-running IRQs in the system.
+
        irqpoll         [HW]
                        When an interrupt is not handled search all handlers
                        for it. Also check all handlers each timer
index 9489f93..e103451 100644 (file)
@@ -136,6 +136,44 @@ void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action)
        wake_up_process(action->thread);
 }
 
+static DEFINE_STATIC_KEY_FALSE(irqhandler_duration_check_enabled);
+static u64 irqhandler_duration_threshold_ns __ro_after_init;
+
+static int __init irqhandler_duration_check_setup(char *arg)
+{
+       unsigned long val;
+       int ret;
+
+       ret = kstrtoul(arg, 0, &val);
+       if (ret) {
+               pr_err("Unable to parse irqhandler.duration_warn_us setting: ret=%d\n", ret);
+               return 0;
+       }
+
+       if (!val) {
+               pr_err("Invalid irqhandler.duration_warn_us setting, must be > 0\n");
+               return 0;
+       }
+
+       irqhandler_duration_threshold_ns = val * 1000;
+       static_branch_enable(&irqhandler_duration_check_enabled);
+
+       return 1;
+}
+__setup("irqhandler.duration_warn_us=", irqhandler_duration_check_setup);
+
+static inline void irqhandler_duration_check(u64 ts_start, unsigned int irq,
+                                            const struct irqaction *action)
+{
+       u64 delta_ns = local_clock() - ts_start;
+
+       if (unlikely(delta_ns > irqhandler_duration_threshold_ns)) {
+               pr_warn_ratelimited("[CPU%u] long duration of IRQ[%u:%ps], took: %llu us\n",
+                                   smp_processor_id(), irq, action->handler,
+                                   div_u64(delta_ns, NSEC_PER_USEC));
+       }
+}
+
 irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc)
 {
        irqreturn_t retval = IRQ_NONE;
@@ -155,7 +193,16 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc)
                        lockdep_hardirq_threaded();
 
                trace_irq_handler_entry(irq, action);
-               res = action->handler(irq, action->dev_id);
+
+               if (static_branch_unlikely(&irqhandler_duration_check_enabled)) {
+                       u64 ts_start = local_clock();
+
+                       res = action->handler(irq, action->dev_id);
+                       irqhandler_duration_check(ts_start, irq, action);
+               } else {
+                       res = action->handler(irq, action->dev_id);
+               }
+
                trace_irq_handler_exit(irq, action, res);
 
                if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pS enabled interrupts\n",