net: ti: icssg_prueth: Add SW TX / RX Coalescing based on hrtimers
authorMD Danish Anwar <danishanwar@ti.com>
Tue, 30 Apr 2024 12:06:34 +0000 (17:36 +0530)
committerJakub Kicinski <kuba@kernel.org>
Thu, 2 May 2024 01:38:47 +0000 (18:38 -0700)
Add SW IRQ coalescing based on hrtimers for RX and TX data path for ICSSG
driver, which can be enabled by ethtool commands:

- RX coalescing
  ethtool -C eth1 rx-usecs 50

- TX coalescing can be enabled per TX queue

  - by default enables coalescing for TX0
  ethtool -C eth1 tx-usecs 50
  - configure TX0
  ethtool -Q eth0 queue_mask 1 --coalesce tx-usecs 100
  - configure TX1
  ethtool -Q eth0 queue_mask 2 --coalesce tx-usecs 100
  - configure TX0 and TX1
  ethtool -Q eth0 queue_mask 3 --coalesce tx-usecs 100 --coalesce
tx-usecs 100

Minimum value for both rx-usecs and tx-usecs is 20us.

Compared to gro_flush_timeout and napi_defer_hard_irqs this patch allows
to enable IRQ coalescing for RX path separately.

Benchmarking numbers:
 ===============================================================
| Method                  | Tput_TX | CPU_TX | Tput_RX | CPU_RX |
| ==============================================================
| Default Driver           943 Mbps    31%      517 Mbps  38%   |
| IRQ Coalescing (Patch)   943 Mbps    28%      518 Mbps  25%   |
 ===============================================================

Signed-off-by: MD Danish Anwar <danishanwar@ti.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://lore.kernel.org/r/20240430120634.1558998-1-danishanwar@ti.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
drivers/net/ethernet/ti/icssg/icssg_common.c
drivers/net/ethernet/ti/icssg/icssg_ethtool.c
drivers/net/ethernet/ti/icssg/icssg_prueth.c
drivers/net/ethernet/ti/icssg/icssg_prueth.h

index 284f978..088ab80 100644 (file)
@@ -122,7 +122,7 @@ void prueth_xmit_free(struct prueth_tx_chn *tx_chn,
 }
 
 int emac_tx_complete_packets(struct prueth_emac *emac, int chn,
-                            int budget)
+                            int budget, bool *tdown)
 {
        struct net_device *ndev = emac->ndev;
        struct cppi5_host_desc_t *desc_tx;
@@ -145,6 +145,7 @@ int emac_tx_complete_packets(struct prueth_emac *emac, int chn,
                if (cppi5_desc_is_tdcm(desc_dma)) {
                        if (atomic_dec_and_test(&emac->tdown_cnt))
                                complete(&emac->tdown_complete);
+                       *tdown = true;
                        break;
                }
 
@@ -190,19 +191,37 @@ int emac_tx_complete_packets(struct prueth_emac *emac, int chn,
        return num_tx;
 }
 
+static enum hrtimer_restart emac_tx_timer_callback(struct hrtimer *timer)
+{
+       struct prueth_tx_chn *tx_chns =
+                       container_of(timer, struct prueth_tx_chn, tx_hrtimer);
+
+       enable_irq(tx_chns->irq);
+       return HRTIMER_NORESTART;
+}
+
 static int emac_napi_tx_poll(struct napi_struct *napi_tx, int budget)
 {
        struct prueth_tx_chn *tx_chn = prueth_napi_to_tx_chn(napi_tx);
        struct prueth_emac *emac = tx_chn->emac;
+       bool tdown = false;
        int num_tx_packets;
 
-       num_tx_packets = emac_tx_complete_packets(emac, tx_chn->id, budget);
+       num_tx_packets = emac_tx_complete_packets(emac, tx_chn->id, budget,
+                                                 &tdown);
 
        if (num_tx_packets >= budget)
                return budget;
 
-       if (napi_complete_done(napi_tx, num_tx_packets))
-               enable_irq(tx_chn->irq);
+       if (napi_complete_done(napi_tx, num_tx_packets)) {
+               if (unlikely(tx_chn->tx_pace_timeout_ns && !tdown)) {
+                       hrtimer_start(&tx_chn->tx_hrtimer,
+                                     ns_to_ktime(tx_chn->tx_pace_timeout_ns),
+                                     HRTIMER_MODE_REL_PINNED);
+               } else {
+                       enable_irq(tx_chn->irq);
+               }
+       }
 
        return num_tx_packets;
 }
@@ -226,6 +245,9 @@ int prueth_ndev_add_tx_napi(struct prueth_emac *emac)
                struct prueth_tx_chn *tx_chn = &emac->tx_chns[i];
 
                netif_napi_add_tx(emac->ndev, &tx_chn->napi_tx, emac_napi_tx_poll);
+               hrtimer_init(&tx_chn->tx_hrtimer, CLOCK_MONOTONIC,
+                            HRTIMER_MODE_REL_PINNED);
+               tx_chn->tx_hrtimer.function = &emac_tx_timer_callback;
                ret = request_irq(tx_chn->irq, prueth_tx_irq,
                                  IRQF_TRIGGER_HIGH, tx_chn->name,
                                  tx_chn);
@@ -871,8 +893,15 @@ int emac_napi_rx_poll(struct napi_struct *napi_rx, int budget)
                        break;
        }
 
-       if (num_rx < budget && napi_complete_done(napi_rx, num_rx))
-               enable_irq(emac->rx_chns.irq[rx_flow]);
+       if (num_rx < budget && napi_complete_done(napi_rx, num_rx)) {
+               if (unlikely(emac->rx_pace_timeout_ns)) {
+                       hrtimer_start(&emac->rx_hrtimer,
+                                     ns_to_ktime(emac->rx_pace_timeout_ns),
+                                     HRTIMER_MODE_REL_PINNED);
+               } else {
+                       enable_irq(emac->rx_chns.irq[rx_flow]);
+               }
+       }
 
        return num_rx;
 }
index ca20325..c8d0f45 100644 (file)
@@ -201,6 +201,93 @@ static void emac_get_rmon_stats(struct net_device *ndev,
        rmon_stats->hist_tx[4] = emac_get_stat_by_name(emac, "tx_bucket5_frames");
 }
 
+static int emac_get_coalesce(struct net_device *ndev,
+                            struct ethtool_coalesce *coal,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
+{
+       struct prueth_emac *emac = netdev_priv(ndev);
+       struct prueth_tx_chn *tx_chn;
+
+       tx_chn = &emac->tx_chns[0];
+
+       coal->rx_coalesce_usecs = emac->rx_pace_timeout_ns / 1000;
+       coal->tx_coalesce_usecs = tx_chn->tx_pace_timeout_ns / 1000;
+
+       return 0;
+}
+
+static int emac_get_per_queue_coalesce(struct net_device *ndev, u32 queue,
+                                      struct ethtool_coalesce *coal)
+{
+       struct prueth_emac *emac = netdev_priv(ndev);
+       struct prueth_tx_chn *tx_chn;
+
+       if (queue >= PRUETH_MAX_TX_QUEUES)
+               return -EINVAL;
+
+       tx_chn = &emac->tx_chns[queue];
+
+       coal->tx_coalesce_usecs = tx_chn->tx_pace_timeout_ns / 1000;
+
+       return 0;
+}
+
+static int emac_set_coalesce(struct net_device *ndev,
+                            struct ethtool_coalesce *coal,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
+{
+       struct prueth_emac *emac = netdev_priv(ndev);
+       struct prueth *prueth = emac->prueth;
+       struct prueth_tx_chn *tx_chn;
+
+       tx_chn = &emac->tx_chns[0];
+
+       if (coal->rx_coalesce_usecs &&
+           coal->rx_coalesce_usecs < ICSSG_MIN_COALESCE_USECS) {
+               dev_info(prueth->dev, "defaulting to min value of %dus for rx-usecs\n",
+                        ICSSG_MIN_COALESCE_USECS);
+               coal->rx_coalesce_usecs = ICSSG_MIN_COALESCE_USECS;
+       }
+
+       if (coal->tx_coalesce_usecs &&
+           coal->tx_coalesce_usecs < ICSSG_MIN_COALESCE_USECS) {
+               dev_info(prueth->dev, "defaulting to min value of %dus for tx-usecs\n",
+                        ICSSG_MIN_COALESCE_USECS);
+               coal->tx_coalesce_usecs = ICSSG_MIN_COALESCE_USECS;
+       }
+
+       emac->rx_pace_timeout_ns = coal->rx_coalesce_usecs * 1000;
+       tx_chn->tx_pace_timeout_ns = coal->tx_coalesce_usecs * 1000;
+
+       return 0;
+}
+
+static int emac_set_per_queue_coalesce(struct net_device *ndev, u32 queue,
+                                      struct ethtool_coalesce *coal)
+{
+       struct prueth_emac *emac = netdev_priv(ndev);
+       struct prueth *prueth = emac->prueth;
+       struct prueth_tx_chn *tx_chn;
+
+       if (queue >= PRUETH_MAX_TX_QUEUES)
+               return -EINVAL;
+
+       tx_chn = &emac->tx_chns[queue];
+
+       if (coal->tx_coalesce_usecs &&
+           coal->tx_coalesce_usecs < ICSSG_MIN_COALESCE_USECS) {
+               dev_info(prueth->dev, "defaulting to min value of %dus for tx-usecs for tx-%u\n",
+                        ICSSG_MIN_COALESCE_USECS, queue);
+               coal->tx_coalesce_usecs = ICSSG_MIN_COALESCE_USECS;
+       }
+
+       tx_chn->tx_pace_timeout_ns = coal->tx_coalesce_usecs * 1000;
+
+       return 0;
+}
+
 const struct ethtool_ops icssg_ethtool_ops = {
        .get_drvinfo = emac_get_drvinfo,
        .get_msglevel = emac_get_msglevel,
@@ -209,6 +296,12 @@ const struct ethtool_ops icssg_ethtool_ops = {
        .get_ethtool_stats = emac_get_ethtool_stats,
        .get_strings = emac_get_strings,
        .get_ts_info = emac_get_ts_info,
+       .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS |
+                                    ETHTOOL_COALESCE_TX_USECS,
+       .get_coalesce = emac_get_coalesce,
+       .set_coalesce = emac_set_coalesce,
+       .get_per_queue_coalesce = emac_get_per_queue_coalesce,
+       .set_per_queue_coalesce = emac_set_per_queue_coalesce,
        .get_channels = emac_get_channels,
        .set_channels = emac_set_channels,
        .get_link_ksettings = emac_get_link_ksettings,
index 186b036..7c9e951 100644 (file)
@@ -243,6 +243,16 @@ static void emac_adjust_link(struct net_device *ndev)
        }
 }
 
+static enum hrtimer_restart emac_rx_timer_callback(struct hrtimer *timer)
+{
+       struct prueth_emac *emac =
+                       container_of(timer, struct prueth_emac, rx_hrtimer);
+       int rx_flow = PRUETH_RX_FLOW_DATA;
+
+       enable_irq(emac->rx_chns.irq[rx_flow]);
+       return HRTIMER_NORESTART;
+}
+
 static int emac_phy_connect(struct prueth_emac *emac)
 {
        struct prueth *prueth = emac->prueth;
@@ -582,8 +592,10 @@ static int emac_ndo_stop(struct net_device *ndev)
                netdev_err(ndev, "tx teardown timeout\n");
 
        prueth_reset_tx_chan(emac, emac->tx_ch_num, true);
-       for (i = 0; i < emac->tx_ch_num; i++)
+       for (i = 0; i < emac->tx_ch_num; i++) {
                napi_disable(&emac->tx_chns[i].napi_tx);
+               hrtimer_cancel(&emac->tx_chns[i].tx_hrtimer);
+       }
 
        max_rx_flows = PRUETH_MAX_RX_FLOWS;
        k3_udma_glue_tdown_rx_chn(emac->rx_chns.rx_chn, true);
@@ -591,6 +603,7 @@ static int emac_ndo_stop(struct net_device *ndev)
        prueth_reset_rx_chan(&emac->rx_chns, max_rx_flows, true);
 
        napi_disable(&emac->napi_rx);
+       hrtimer_cancel(&emac->rx_hrtimer);
 
        cancel_work_sync(&emac->rx_mode_work);
 
@@ -801,6 +814,9 @@ static int prueth_netdev_init(struct prueth *prueth,
        ndev->features = ndev->hw_features;
 
        netif_napi_add(ndev, &emac->napi_rx, emac_napi_rx_poll);
+       hrtimer_init(&emac->rx_hrtimer, CLOCK_MONOTONIC,
+                    HRTIMER_MODE_REL_PINNED);
+       emac->rx_hrtimer.function = &emac_rx_timer_callback;
        prueth->emac[mac] = emac;
 
        return 0;
index 82e38ef..a78c5eb 100644 (file)
@@ -108,6 +108,8 @@ struct prueth_tx_chn {
        u32 descs_num;
        unsigned int irq;
        char name[32];
+       struct hrtimer tx_hrtimer;
+       unsigned long tx_pace_timeout_ns;
 };
 
 struct prueth_rx_chn {
@@ -127,6 +129,9 @@ struct prueth_rx_chn {
 
 #define PRUETH_MAX_TX_TS_REQUESTS      50 /* Max simultaneous TX_TS requests */
 
+/* Minimum coalesce time in usecs for both Tx and Rx */
+#define ICSSG_MIN_COALESCE_USECS 20
+
 /* data for each emac port */
 struct prueth_emac {
        bool is_sr1;
@@ -183,6 +188,10 @@ struct prueth_emac {
 
        struct delayed_work stats_work;
        u64 stats[ICSSG_NUM_STATS];
+
+       /* RX IRQ Coalescing Related */
+       struct hrtimer rx_hrtimer;
+       unsigned long rx_pace_timeout_ns;
 };
 
 /**
@@ -320,7 +329,7 @@ void prueth_ndev_del_tx_napi(struct prueth_emac *emac, int num);
 void prueth_xmit_free(struct prueth_tx_chn *tx_chn,
                      struct cppi5_host_desc_t *desc);
 int emac_tx_complete_packets(struct prueth_emac *emac, int chn,
-                            int budget);
+                            int budget, bool *tdown);
 int prueth_ndev_add_tx_napi(struct prueth_emac *emac);
 int prueth_init_tx_chns(struct prueth_emac *emac);
 int prueth_init_rx_chns(struct prueth_emac *emac,