net/mlx5e: Support multiple SKBs in a TX WQE
authorMaxim Mikityanskiy <maximmi@mellanox.com>
Thu, 16 Apr 2020 08:30:33 +0000 (11:30 +0300)
committerSaeed Mahameed <saeedm@nvidia.com>
Tue, 22 Sep 2020 02:41:15 +0000 (19:41 -0700)
TX MPWQE support for SKBs is coming in one of the following patches, and
a single MPWQE can send multiple SKBs. This commit prepares the TX path
code to handle such cases:

1. An additional FIFO for SKBs is added, just like the FIFO for DMA
chunks.

2. struct mlx5e_tx_wqe_info will contain num_fifo_pkts. If a given WQE
contains only one packet, num_fifo_pkts will be zero, and the SKB will
be stored in mlx5e_tx_wqe_info, as usual. If num_fifo_pkts > 0, the SKB
pointer will be NULL, and the SKBs will be stored in the FIFO.

This change has no performance impact in TCP single stream test and
XDP_TX single stream test.

When compiled with a recent GCC, this change shows no visible
performance impact on UDP pktgen (burst 32) single stream test either:
  Packet rate: 16.95 Mpps (±0.15 Mpps) -> 16.96 Mpps (±0.12 Mpps)
  Instructions per packet: 429 -> 421
  Cycles per packet: 160 -> 156
  Instructions per cycle: 2.69 -> 2.70

CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz (x86_64)
NIC: Mellanox ConnectX-6 Dx
GCC 10.2.0

Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_tx.c

index 95aab8b..04c6ff2 100644 (file)
@@ -318,11 +318,13 @@ struct mlx5e_txqsq {
 
        /* dirtied @completion */
        u16                        cc;
+       u16                        skb_fifo_cc;
        u32                        dma_fifo_cc;
        struct dim                 dim; /* Adaptive Moderation */
 
        /* dirtied @xmit */
        u16                        pc ____cacheline_aligned_in_smp;
+       u16                        skb_fifo_pc;
        u32                        dma_fifo_pc;
 
        struct mlx5e_cq            cq;
@@ -330,9 +332,11 @@ struct mlx5e_txqsq {
        /* read only */
        struct mlx5_wq_cyc         wq;
        u32                        dma_fifo_mask;
+       u16                        skb_fifo_mask;
        struct mlx5e_sq_stats     *stats;
        struct {
                struct mlx5e_sq_dma       *dma_fifo;
+               struct sk_buff           **skb_fifo;
                struct mlx5e_tx_wqe_info  *wqe_info;
        } db;
        void __iomem              *uar_map;
index 277725c..03fe923 100644 (file)
@@ -105,6 +105,7 @@ struct mlx5e_tx_wqe_info {
        u32 num_bytes;
        u8 num_wqebbs;
        u8 num_dma;
+       u8 num_fifo_pkts;
 #ifdef CONFIG_MLX5_EN_TLS
        struct page *resync_dump_frag_page;
 #endif
@@ -231,6 +232,23 @@ mlx5e_dma_push(struct mlx5e_txqsq *sq, dma_addr_t addr, u32 size,
        dma->type = map_type;
 }
 
+static inline struct sk_buff **mlx5e_skb_fifo_get(struct mlx5e_txqsq *sq, u16 i)
+{
+       return &sq->db.skb_fifo[i & sq->skb_fifo_mask];
+}
+
+static inline void mlx5e_skb_fifo_push(struct mlx5e_txqsq *sq, struct sk_buff *skb)
+{
+       struct sk_buff **skb_item = mlx5e_skb_fifo_get(sq, sq->skb_fifo_pc++);
+
+       *skb_item = skb;
+}
+
+static inline struct sk_buff *mlx5e_skb_fifo_pop(struct mlx5e_txqsq *sq)
+{
+       return *mlx5e_skb_fifo_get(sq, sq->skb_fifo_cc++);
+}
+
 static inline void
 mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
 {
index fcfb156..7521c9b 100644 (file)
@@ -29,20 +29,24 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi,
 void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
                                           struct mlx5e_tx_wqe_info *wi,
                                           u32 *dma_fifo_cc);
-static inline void
+static inline bool
 mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
                                          struct mlx5e_tx_wqe_info *wi,
                                          u32 *dma_fifo_cc)
 {
-       if (unlikely(wi->resync_dump_frag_page))
+       if (unlikely(wi->resync_dump_frag_page)) {
                mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, dma_fifo_cc);
+               return true;
+       }
+       return false;
 }
 #else
-static inline void
+static inline bool
 mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
                                          struct mlx5e_tx_wqe_info *wi,
                                          u32 *dma_fifo_cc)
 {
+       return false;
 }
 
 #endif /* CONFIG_MLX5_EN_TLS */
index b057a6c..c331aa9 100644 (file)
@@ -1047,6 +1047,7 @@ static void mlx5e_free_icosq(struct mlx5e_icosq *sq)
 static void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq)
 {
        kvfree(sq->db.wqe_info);
+       kvfree(sq->db.skb_fifo);
        kvfree(sq->db.dma_fifo);
 }
 
@@ -1058,15 +1059,19 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa)
        sq->db.dma_fifo = kvzalloc_node(array_size(df_sz,
                                                   sizeof(*sq->db.dma_fifo)),
                                        GFP_KERNEL, numa);
+       sq->db.skb_fifo = kvzalloc_node(array_size(df_sz,
+                                                  sizeof(*sq->db.skb_fifo)),
+                                       GFP_KERNEL, numa);
        sq->db.wqe_info = kvzalloc_node(array_size(wq_sz,
                                                   sizeof(*sq->db.wqe_info)),
                                        GFP_KERNEL, numa);
-       if (!sq->db.dma_fifo || !sq->db.wqe_info) {
+       if (!sq->db.dma_fifo || !sq->db.skb_fifo || !sq->db.wqe_info) {
                mlx5e_free_txqsq_db(sq);
                return -ENOMEM;
        }
 
        sq->dma_fifo_mask = df_sz - 1;
+       sq->skb_fifo_mask = df_sz - 1;
 
        return 0;
 }
index aea3039..857d1c0 100644 (file)
@@ -342,6 +342,7 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
                .num_bytes = attr->num_bytes,
                .num_dma = num_dma,
                .num_wqebbs = wqe_attr->num_wqebbs,
+               .num_fifo_pkts = 0,
        };
 
        cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | attr->opcode);
@@ -489,6 +490,18 @@ static void mlx5e_consume_skb(struct mlx5e_txqsq *sq, struct sk_buff *skb,
        napi_consume_skb(skb, napi_budget);
 }
 
+static void mlx5e_tx_wi_consume_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi,
+                                         struct mlx5_cqe64 *cqe, int napi_budget)
+{
+       int i;
+
+       for (i = 0; i < wi->num_fifo_pkts; i++) {
+               struct sk_buff *skb = mlx5e_skb_fifo_pop(sq);
+
+               mlx5e_consume_skb(sq, skb, cqe, napi_budget);
+       }
+}
+
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 {
        struct mlx5e_sq_stats *stats;
@@ -534,26 +547,33 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
                wqe_counter = be16_to_cpu(cqe->wqe_counter);
 
                do {
-                       struct sk_buff *skb;
-
                        last_wqe = (sqcc == wqe_counter);
 
                        ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
                        wi = &sq->db.wqe_info[ci];
-                       skb = wi->skb;
 
                        sqcc += wi->num_wqebbs;
 
-                       if (unlikely(!skb)) {
-                               mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, &dma_fifo_cc);
+                       if (likely(wi->skb)) {
+                               mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
+                               mlx5e_consume_skb(sq, wi->skb, cqe, napi_budget);
+
+                               npkts++;
+                               nbytes += wi->num_bytes;
                                continue;
                        }
 
-                       mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
-                       mlx5e_consume_skb(sq, wi->skb, cqe, napi_budget);
+                       if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi,
+                                                                              &dma_fifo_cc)))
+                               continue;
 
-                       npkts++;
-                       nbytes += wi->num_bytes;
+                       if (wi->num_fifo_pkts) {
+                               mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
+                               mlx5e_tx_wi_consume_fifo_skbs(sq, wi, cqe, napi_budget);
+
+                               npkts += wi->num_fifo_pkts;
+                               nbytes += wi->num_bytes;
+                       }
                } while (!last_wqe);
 
                if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) {
@@ -592,12 +612,19 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
        return (i == MLX5E_TX_CQ_POLL_BUDGET);
 }
 
+static void mlx5e_tx_wi_kfree_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi)
+{
+       int i;
+
+       for (i = 0; i < wi->num_fifo_pkts; i++)
+               dev_kfree_skb_any(mlx5e_skb_fifo_pop(sq));
+}
+
 void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq)
 {
        struct mlx5e_tx_wqe_info *wi;
        u32 dma_fifo_cc, nbytes = 0;
        u16 ci, sqcc, npkts = 0;
-       struct sk_buff *skb;
 
        sqcc = sq->cc;
        dma_fifo_cc = sq->dma_fifo_cc;
@@ -605,20 +632,28 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq)
        while (sqcc != sq->pc) {
                ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
                wi = &sq->db.wqe_info[ci];
-               skb = wi->skb;
 
                sqcc += wi->num_wqebbs;
 
-               if (!skb) {
-                       mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, &dma_fifo_cc);
+               if (likely(wi->skb)) {
+                       mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
+                       dev_kfree_skb_any(wi->skb);
+
+                       npkts++;
+                       nbytes += wi->num_bytes;
                        continue;
                }
 
-               mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
-               dev_kfree_skb_any(skb);
+               if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, &dma_fifo_cc)))
+                       continue;
 
-               npkts++;
-               nbytes += wi->num_bytes;
+               if (wi->num_fifo_pkts) {
+                       mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
+                       mlx5e_tx_wi_kfree_fifo_skbs(sq, wi);
+
+                       npkts += wi->num_fifo_pkts;
+                       nbytes += wi->num_bytes;
+               }
        }
 
        sq->dma_fifo_cc = dma_fifo_cc;