net: minor optimization in __alloc_skb()
authorEric Dumazet <edumazet@google.com>
Thu, 7 Jul 2022 19:18:46 +0000 (19:18 +0000)
committerDavid S. Miller <davem@davemloft.net>
Fri, 8 Jul 2022 13:21:08 +0000 (14:21 +0100)
TCP allocates 'fast clones' skbs for packets in tx queues.

Currently, __alloc_skb() initializes the companion fclone
field to SKB_FCLONE_CLONE, and leaves other fields untouched.

It makes sense to defer this init much later in skb_clone(),
because all fclone fields are copied and hot in cpu caches
at that time.

This removes one cache line miss in __alloc_skb(), cost seen
on an host with 256 cpus all competing on memory accesses.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
net/core/skbuff.c

index c62e42d..c4a7517 100644 (file)
@@ -454,8 +454,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 
                skb->fclone = SKB_FCLONE_ORIG;
                refcount_set(&fclones->fclone_ref, 1);
-
-               fclones->skb2.fclone = SKB_FCLONE_CLONE;
        }
 
        return skb;
@@ -1513,6 +1511,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
            refcount_read(&fclones->fclone_ref) == 1) {
                n = &fclones->skb2;
                refcount_set(&fclones->fclone_ref, 2);
+               n->fclone = SKB_FCLONE_CLONE;
        } else {
                if (skb_pfmemalloc(skb))
                        gfp_mask |= __GFP_MEMALLOC;