net/mlx5e: RX, Test the XDP program existence out of the handler
authorTariq Toukan <tariqt@nvidia.com>
Wed, 19 Jan 2022 19:28:36 +0000 (21:28 +0200)
committerSaeed Mahameed <saeedm@nvidia.com>
Thu, 17 Mar 2022 18:51:55 +0000 (11:51 -0700)
Instead of early return inside mlx5e_xdp_handle(), let the caller check
if an XDP program is loaded.  This allows saving a few unnecessary
function calls and calculations in case !prog.

Performance test: single core, drop packets in iptables
Before: 3,872,504 pps
After:  3,975,628 pps (+2.66%)

Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Maxim Mikityanskiy <maximmi@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c

index a7f0203..fcb8497 100644 (file)
@@ -120,15 +120,12 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
 
 /* returns true if packet was consumed by xdp */
 bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
+                     struct bpf_prog *prog,
                      u32 *len, struct xdp_buff *xdp)
 {
-       struct bpf_prog *prog = rcu_dereference(rq->xdp_prog);
        u32 act;
        int err;
 
-       if (!prog)
-               return false;
-
        act = bpf_prog_run_xdp(prog, xdp);
        switch (act) {
        case XDP_PASS:
index c62f11d..850540e 100644 (file)
@@ -48,6 +48,7 @@
 struct mlx5e_xsk_param;
 int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk);
 bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
+                     struct bpf_prog *prog,
                      u32 *len, struct xdp_buff *xdp);
 void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq);
 bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq);
index 8e7b877..1625135 100644 (file)
@@ -4,6 +4,7 @@
 #include "rx.h"
 #include "en/xdp.h"
 #include <net/xdp_sock_drv.h>
+#include <linux/filter.h>
 
 /* RX data path */
 
@@ -31,6 +32,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
 {
        struct xdp_buff *xdp = wi->umr.dma_info[page_idx].xsk;
        u32 cqe_bcnt32 = cqe_bcnt;
+       struct bpf_prog *prog;
 
        /* Check packet size. Note LRO doesn't use linear SKB */
        if (unlikely(cqe_bcnt > rq->hw_mtu)) {
@@ -65,7 +67,8 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
         * allocated first from the Reuse Ring, so it has enough space.
         */
 
-       if (likely(mlx5e_xdp_handle(rq, NULL, &cqe_bcnt32, xdp))) {
+       prog = rcu_dereference(rq->xdp_prog);
+       if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, &cqe_bcnt32, xdp))) {
                if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)))
                        __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
                return NULL; /* page/packet was consumed by XDP */
@@ -83,6 +86,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
                                              u32 cqe_bcnt)
 {
        struct xdp_buff *xdp = wi->di->xsk;
+       struct bpf_prog *prog;
 
        /* wi->offset is not used in this function, because xdp->data and the
         * DMA address point directly to the necessary place. Furthermore, the
@@ -101,7 +105,8 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
                return NULL;
        }
 
-       if (likely(mlx5e_xdp_handle(rq, NULL, &cqe_bcnt, xdp)))
+       prog = rcu_dereference(rq->xdp_prog);
+       if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, &cqe_bcnt, xdp)))
                return NULL; /* page/packet was consumed by XDP */
 
        /* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse
index b06aac0..60c640f 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/ipv6.h>
 #include <linux/tcp.h>
 #include <linux/bitmap.h>
+#include <linux/filter.h>
 #include <net/ip6_checksum.h>
 #include <net/page_pool.h>
 #include <net/inet_ecn.h>
@@ -1523,11 +1524,11 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
 {
        struct mlx5e_dma_info *di = wi->di;
        u16 rx_headroom = rq->buff.headroom;
-       struct xdp_buff xdp;
+       struct bpf_prog *prog;
        struct sk_buff *skb;
+       u32 metasize = 0;
        void *va, *data;
        u32 frag_size;
-       u32 metasize;
 
        va             = page_address(di->page) + wi->offset;
        data           = va + rx_headroom;
@@ -1535,16 +1536,21 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
 
        dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset,
                                      frag_size, DMA_FROM_DEVICE);
-       net_prefetchw(va); /* xdp_frame data area */
        net_prefetch(data);
 
-       mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp);
-       if (mlx5e_xdp_handle(rq, di, &cqe_bcnt, &xdp))
-               return NULL; /* page/packet was consumed by XDP */
+       prog = rcu_dereference(rq->xdp_prog);
+       if (prog) {
+               struct xdp_buff xdp;
 
-       rx_headroom = xdp.data - xdp.data_hard_start;
+               net_prefetchw(va); /* xdp_frame data area */
+               mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp);
+               if (mlx5e_xdp_handle(rq, di, prog, &cqe_bcnt, &xdp))
+                       return NULL; /* page/packet was consumed by XDP */
+
+               rx_headroom = xdp.data - xdp.data_hard_start;
+               metasize = xdp.data - xdp.data_meta;
+       }
        frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
-       metasize = xdp.data - xdp.data_meta;
        skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize);
        if (unlikely(!skb))
                return NULL;
@@ -1842,11 +1848,11 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
        struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx];
        u16 rx_headroom = rq->buff.headroom;
        u32 cqe_bcnt32 = cqe_bcnt;
-       struct xdp_buff xdp;
+       struct bpf_prog *prog;
        struct sk_buff *skb;
+       u32 metasize = 0;
        void *va, *data;
        u32 frag_size;
-       u32 metasize;
 
        /* Check packet size. Note LRO doesn't use linear SKB */
        if (unlikely(cqe_bcnt > rq->hw_mtu)) {
@@ -1860,19 +1866,24 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
 
        dma_sync_single_range_for_cpu(rq->pdev, di->addr, head_offset,
                                      frag_size, DMA_FROM_DEVICE);
-       net_prefetchw(va); /* xdp_frame data area */
        net_prefetch(data);
 
-       mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt32, &xdp);
-       if (mlx5e_xdp_handle(rq, di, &cqe_bcnt32, &xdp)) {
-               if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
-                       __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
-               return NULL; /* page/packet was consumed by XDP */
-       }
+       prog = rcu_dereference(rq->xdp_prog);
+       if (prog) {
+               struct xdp_buff xdp;
 
-       rx_headroom = xdp.data - xdp.data_hard_start;
+               net_prefetchw(va); /* xdp_frame data area */
+               mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt32, &xdp);
+               if (mlx5e_xdp_handle(rq, di, prog, &cqe_bcnt32, &xdp)) {
+                       if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
+                               __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
+                       return NULL; /* page/packet was consumed by XDP */
+               }
+
+               rx_headroom = xdp.data - xdp.data_hard_start;
+               metasize = xdp.data - xdp.data_meta;
+       }
        frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt32);
-       metasize = xdp.data - xdp.data_meta;
        skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt32, metasize);
        if (unlikely(!skb))
                return NULL;