net: fix possible race in skb_attempt_defer_free()
authorEric Dumazet <edumazet@google.com>
Mon, 16 May 2022 04:24:53 +0000 (21:24 -0700)
committerDavid S. Miller <davem@davemloft.net>
Mon, 16 May 2022 10:33:59 +0000 (11:33 +0100)
A cpu can observe sd->defer_count reaching 128,
and call smp_call_function_single_async()

Problem is that the remote CPU can clear sd->defer_count
before the IPI is run/acknowledged.

Other cpus can queue more packets and also decide
to call smp_call_function_single_async() while the pending
IPI was not yet delivered.

This is a common issue with smp_call_function_single_async().
Callers must ensure correct synchronization and serialization.

I triggered this issue while experimenting smaller threshold.
Performing the call to smp_call_function_single_async()
under sd->defer_lock protection did not solve the problem.

Commit 5a18ceca6350 ("smp: Allow smp_call_function_single_async()
to insert locked csd") replaced an informative WARN_ON_ONCE()
with a return of -EBUSY, which is often ignored.
Test of CSD_FLAG_LOCK presence is racy anyway.

Fixes: 68822bdf76f1 ("net: generalize skb freeing deferral to per-cpu lists")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/linux/netdevice.h
net/core/dev.c
net/core/skbuff.c

index d57ce24..cbaf312 100644 (file)
@@ -3136,6 +3136,7 @@ struct softnet_data {
        /* Another possibly contended cache line */
        spinlock_t              defer_lock ____cacheline_aligned_in_smp;
        int                     defer_count;
+       int                     defer_ipi_scheduled;
        struct sk_buff          *defer_list;
        call_single_data_t      defer_csd;
 };
index d93456c..a5e663e 100644 (file)
@@ -4582,9 +4582,12 @@ static void rps_trigger_softirq(void *data)
 #endif /* CONFIG_RPS */
 
 /* Called from hardirq (IPI) context */
-static void trigger_rx_softirq(void *data __always_unused)
+static void trigger_rx_softirq(void *data)
 {
+       struct softnet_data *sd = data;
+
        __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+       smp_store_release(&sd->defer_ipi_scheduled, 0);
 }
 
 /*
@@ -11382,7 +11385,7 @@ static int __init net_dev_init(void)
                INIT_CSD(&sd->csd, rps_trigger_softirq, sd);
                sd->cpu = i;
 #endif
-               INIT_CSD(&sd->defer_csd, trigger_rx_softirq, NULL);
+               INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd);
                spin_lock_init(&sd->defer_lock);
 
                init_gro_hash(&sd->backlog);
index 2fea964..b40c8cd 100644 (file)
@@ -6516,8 +6516,7 @@ void skb_attempt_defer_free(struct sk_buff *skb)
        sd->defer_count++;
 
        /* kick every time queue length reaches 128.
-        * This should avoid blocking in smp_call_function_single_async().
-        * This condition should hardly be bit under normal conditions,
+        * This condition should hardly be hit under normal conditions,
         * unless cpu suddenly stopped to receive NIC interrupts.
         */
        kick = sd->defer_count == 128;
@@ -6527,6 +6526,6 @@ void skb_attempt_defer_free(struct sk_buff *skb)
        /* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU
         * if we are unlucky enough (this seems very unlikely).
         */
-       if (unlikely(kick))
+       if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1))
                smp_call_function_single_async(cpu, &sd->defer_csd);
 }