net/mlx5e: TX, Improve performance under GSO workload
authorErez Alfasi <ereza@mellanox.com>
Tue, 14 May 2019 10:55:22 +0000 (13:55 +0300)
committerSaeed Mahameed <saeedm@mellanox.com>
Fri, 31 May 2019 20:04:26 +0000 (13:04 -0700)
__netdev_tx_sent_queue() was introduced by:
commit 3e59020abf0f ("net: bql: add __netdev_tx_sent_queue()")

BQL counters should be updated without flipping/caring about
BQL status, if the current skb has xmit_more set.

Using __netdev_tx_sent_queue() avoids messing with BQL stop
flag, increases performance on GSO workload by keeping
doorbells to the minimum required and also sparing atomic
operations.

Signed-off-by: Erez Alfasi <ereza@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
drivers/net/ethernet/mellanox/mlx5/core/en_tx.c

index 195a7d9..6fd6d53 100644 (file)
@@ -301,6 +301,7 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
                     bool xmit_more)
 {
        struct mlx5_wq_cyc *wq = &sq->wq;
+       bool send_doorbell;
 
        wi->num_bytes = num_bytes;
        wi->num_dma = num_dma;
@@ -310,8 +311,6 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
        cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
        cseg->qpn_ds           = cpu_to_be32((sq->sqn << 8) | ds_cnt);
 
-       netdev_tx_sent_queue(sq->txq, num_bytes);
-
        if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
                skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 
@@ -321,7 +320,9 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
                sq->stats->stopped++;
        }
 
-       if (!xmit_more || netif_xmit_stopped(sq->txq))
+       send_doorbell = __netdev_tx_sent_queue(sq->txq, num_bytes,
+                                              xmit_more);
+       if (send_doorbell)
                mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg);
 }