gve: DQO: Add core netdev features
authorBailey Forrest <bcf@google.com>
Thu, 24 Jun 2021 18:06:28 +0000 (11:06 -0700)
committerDavid S. Miller <davem@davemloft.net>
Thu, 24 Jun 2021 19:47:38 +0000 (12:47 -0700)
Add napi netdev device registration, interrupt handling and initial tx
and rx polling stubs. The stubs will be filled in follow-on patches.

Also:
- LRO feature advertisement and handling
- Also update ethtool logic

Signed-off-by: Bailey Forrest <bcf@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Catherine Sullivan <csully@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/google/gve/Makefile
drivers/net/ethernet/google/gve/gve.h
drivers/net/ethernet/google/gve/gve_adminq.c
drivers/net/ethernet/google/gve/gve_dqo.h [new file with mode: 0644]
drivers/net/ethernet/google/gve/gve_ethtool.c
drivers/net/ethernet/google/gve/gve_main.c
drivers/net/ethernet/google/gve/gve_rx_dqo.c [new file with mode: 0644]
drivers/net/ethernet/google/gve/gve_tx_dqo.c [new file with mode: 0644]

index 0143f44..b9a6be7 100644 (file)
@@ -1,4 +1,4 @@
 # Makefile for the Google virtual Ethernet (gve) driver
 
 obj-$(CONFIG_GVE) += gve.o
-gve-objs := gve_main.o gve_tx.o gve_rx.o gve_ethtool.o gve_adminq.o gve_utils.o
+gve-objs := gve_main.o gve_tx.o gve_tx_dqo.o gve_rx.o gve_rx_dqo.o gve_ethtool.o gve_adminq.o gve_utils.o
index 8a2a8d1..d6bf046 100644 (file)
@@ -45,6 +45,8 @@
 /* PTYPEs are always 10 bits. */
 #define GVE_NUM_PTYPES 1024
 
+#define GVE_RX_BUFFER_SIZE_DQO 2048
+
 /* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */
 struct gve_rx_desc_queue {
        struct gve_rx_desc *desc_ring; /* the descriptor ring */
index cf017a4..5bb56b4 100644 (file)
@@ -714,6 +714,8 @@ int gve_adminq_describe_device(struct gve_priv *priv)
        if (gve_is_gqi(priv)) {
                err = gve_set_desc_cnt(priv, descriptor);
        } else {
+               /* DQO supports LRO. */
+               priv->dev->hw_features |= NETIF_F_LRO;
                err = gve_set_desc_cnt_dqo(priv, descriptor, dev_op_dqo_rda);
        }
        if (err)
diff --git a/drivers/net/ethernet/google/gve/gve_dqo.h b/drivers/net/ethernet/google/gve/gve_dqo.h
new file mode 100644 (file)
index 0000000..cff4e6e
--- /dev/null
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT)
+ * Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2021 Google, Inc.
+ */
+
+#ifndef _GVE_DQO_H_
+#define _GVE_DQO_H_
+
+#include "gve_adminq.h"
+
+#define GVE_ITR_ENABLE_BIT_DQO BIT(0)
+#define GVE_ITR_CLEAR_PBA_BIT_DQO BIT(1)
+#define GVE_ITR_NO_UPDATE_DQO (3 << 3)
+
+#define GVE_TX_IRQ_RATELIMIT_US_DQO 50
+#define GVE_RX_IRQ_RATELIMIT_US_DQO 20
+
+netdev_tx_t gve_tx_dqo(struct sk_buff *skb, struct net_device *dev);
+bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean);
+int gve_rx_poll_dqo(struct gve_notify_block *block, int budget);
+
+static inline void
+gve_write_irq_doorbell_dqo(const struct gve_priv *priv,
+                          const struct gve_notify_block *block, u32 val)
+{
+       u32 index = be32_to_cpu(block->irq_db_index);
+
+       iowrite32(val, &priv->db_bar2[index]);
+}
+
+#endif /* _GVE_DQO_H_ */
index ccaf685..716e624 100644 (file)
@@ -311,8 +311,16 @@ gve_get_ethtool_stats(struct net_device *netdev,
                for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) {
                        struct gve_tx_ring *tx = &priv->tx[ring];
 
-                       data[i++] = tx->req;
-                       data[i++] = tx->done;
+                       if (gve_is_gqi(priv)) {
+                               data[i++] = tx->req;
+                               data[i++] = tx->done;
+                       } else {
+                               /* DQO doesn't currently support
+                                * posted/completed descriptor counts;
+                                */
+                               data[i++] = 0;
+                               data[i++] = 0;
+                       }
                        do {
                                start =
                                  u64_stats_fetch_begin(&priv->tx[ring].statss);
index 8cc0ac0..579f867 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/workqueue.h>
 #include <net/sch_generic.h>
 #include "gve.h"
+#include "gve_dqo.h"
 #include "gve_adminq.h"
 #include "gve_register.h"
 
 const char gve_version_str[] = GVE_VERSION;
 static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
 
+static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+       struct gve_priv *priv = netdev_priv(dev);
+
+       if (gve_is_gqi(priv))
+               return gve_tx(skb, dev);
+       else
+               return gve_tx_dqo(skb, dev);
+}
+
 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
 {
        struct gve_priv *priv = netdev_priv(dev);
@@ -155,6 +166,15 @@ static irqreturn_t gve_intr(int irq, void *arg)
        return IRQ_HANDLED;
 }
 
+static irqreturn_t gve_intr_dqo(int irq, void *arg)
+{
+       struct gve_notify_block *block = arg;
+
+       /* Interrupts are automatically masked */
+       napi_schedule_irqoff(&block->napi);
+       return IRQ_HANDLED;
+}
+
 static int gve_napi_poll(struct napi_struct *napi, int budget)
 {
        struct gve_notify_block *block;
@@ -191,6 +211,54 @@ static int gve_napi_poll(struct napi_struct *napi, int budget)
        return 0;
 }
 
+static int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
+{
+       struct gve_notify_block *block =
+               container_of(napi, struct gve_notify_block, napi);
+       struct gve_priv *priv = block->priv;
+       bool reschedule = false;
+       int work_done = 0;
+
+       /* Clear PCI MSI-X Pending Bit Array (PBA)
+        *
+        * This bit is set if an interrupt event occurs while the vector is
+        * masked. If this bit is set and we reenable the interrupt, it will
+        * fire again. Since we're just about to poll the queue state, we don't
+        * need it to fire again.
+        *
+        * Under high softirq load, it's possible that the interrupt condition
+        * is triggered twice before we got the chance to process it.
+        */
+       gve_write_irq_doorbell_dqo(priv, block,
+                                  GVE_ITR_NO_UPDATE_DQO | GVE_ITR_CLEAR_PBA_BIT_DQO);
+
+       if (block->tx)
+               reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
+
+       if (block->rx) {
+               work_done = gve_rx_poll_dqo(block, budget);
+               reschedule |= work_done == budget;
+       }
+
+       if (reschedule)
+               return budget;
+
+       if (likely(napi_complete_done(napi, work_done))) {
+               /* Enable interrupts again.
+                *
+                * We don't need to repoll afterwards because HW supports the
+                * PCI MSI-X PBA feature.
+                *
+                * Another interrupt would be triggered if a new event came in
+                * since the last one.
+                */
+               gve_write_irq_doorbell_dqo(priv, block,
+                                          GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
+       }
+
+       return work_done;
+}
+
 static int gve_alloc_notify_blocks(struct gve_priv *priv)
 {
        int num_vecs_requested = priv->num_ntfy_blks + 1;
@@ -264,7 +332,8 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv)
                         name, i);
                block->priv = priv;
                err = request_irq(priv->msix_vectors[msix_idx].vector,
-                                 gve_intr, 0, block->name, block);
+                                 gve_is_gqi(priv) ? gve_intr : gve_intr_dqo,
+                                 0, block->name, block);
                if (err) {
                        dev_err(&priv->pdev->dev,
                                "Failed to receive msix vector %d\n", i);
@@ -417,11 +486,12 @@ static void gve_teardown_device_resources(struct gve_priv *priv)
        gve_clear_device_resources_ok(priv);
 }
 
-static void gve_add_napi(struct gve_priv *priv, int ntfy_idx)
+static void gve_add_napi(struct gve_priv *priv, int ntfy_idx,
+                        int (*gve_poll)(struct napi_struct *, int))
 {
        struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
 
-       netif_napi_add(priv->dev, &block->napi, gve_napi_poll,
+       netif_napi_add(priv->dev, &block->napi, gve_poll,
                       NAPI_POLL_WEIGHT);
 }
 
@@ -512,11 +582,33 @@ static int gve_create_rings(struct gve_priv *priv)
        return 0;
 }
 
+static void add_napi_init_sync_stats(struct gve_priv *priv,
+                                    int (*napi_poll)(struct napi_struct *napi,
+                                                     int budget))
+{
+       int i;
+
+       /* Add tx napi & init sync stats*/
+       for (i = 0; i < priv->tx_cfg.num_queues; i++) {
+               int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
+
+               u64_stats_init(&priv->tx[i].statss);
+               priv->tx[i].ntfy_id = ntfy_idx;
+               gve_add_napi(priv, ntfy_idx, napi_poll);
+       }
+       /* Add rx napi  & init sync stats*/
+       for (i = 0; i < priv->rx_cfg.num_queues; i++) {
+               int ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
+
+               u64_stats_init(&priv->rx[i].statss);
+               priv->rx[i].ntfy_id = ntfy_idx;
+               gve_add_napi(priv, ntfy_idx, napi_poll);
+       }
+}
+
 static int gve_alloc_rings(struct gve_priv *priv)
 {
-       int ntfy_idx;
        int err;
-       int i;
 
        /* Setup tx rings */
        priv->tx = kvzalloc(priv->tx_cfg.num_queues * sizeof(*priv->tx),
@@ -536,18 +628,11 @@ static int gve_alloc_rings(struct gve_priv *priv)
        err = gve_rx_alloc_rings(priv);
        if (err)
                goto free_rx;
-       /* Add tx napi & init sync stats*/
-       for (i = 0; i < priv->tx_cfg.num_queues; i++) {
-               u64_stats_init(&priv->tx[i].statss);
-               ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
-               gve_add_napi(priv, ntfy_idx);
-       }
-       /* Add rx napi  & init sync stats*/
-       for (i = 0; i < priv->rx_cfg.num_queues; i++) {
-               u64_stats_init(&priv->rx[i].statss);
-               ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
-               gve_add_napi(priv, ntfy_idx);
-       }
+
+       if (gve_is_gqi(priv))
+               add_napi_init_sync_stats(priv, gve_napi_poll);
+       else
+               add_napi_init_sync_stats(priv, gve_napi_poll_dqo);
 
        return 0;
 
@@ -798,9 +883,17 @@ static int gve_open(struct net_device *dev)
        err = gve_register_qpls(priv);
        if (err)
                goto reset;
+
+       if (!gve_is_gqi(priv)) {
+               /* Hard code this for now. This may be tuned in the future for
+                * performance.
+                */
+               priv->data_buffer_size_dqo = GVE_RX_BUFFER_SIZE_DQO;
+       }
        err = gve_create_rings(priv);
        if (err)
                goto reset;
+
        gve_set_device_rings_ok(priv);
 
        if (gve_get_report_stats(priv))
@@ -970,12 +1063,49 @@ static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
        priv->tx_timeo_cnt++;
 }
 
+static int gve_set_features(struct net_device *netdev,
+                           netdev_features_t features)
+{
+       const netdev_features_t orig_features = netdev->features;
+       struct gve_priv *priv = netdev_priv(netdev);
+       int err;
+
+       if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) {
+               netdev->features ^= NETIF_F_LRO;
+               if (netif_carrier_ok(netdev)) {
+                       /* To make this process as simple as possible we
+                        * teardown the device, set the new configuration,
+                        * and then bring the device up again.
+                        */
+                       err = gve_close(netdev);
+                       /* We have already tried to reset in close, just fail
+                        * at this point.
+                        */
+                       if (err)
+                               goto err;
+
+                       err = gve_open(netdev);
+                       if (err)
+                               goto err;
+               }
+       }
+
+       return 0;
+err:
+       /* Reverts the change on error. */
+       netdev->features = orig_features;
+       netif_err(priv, drv, netdev,
+                 "Set features failed! !!! DISABLING ALL QUEUES !!!\n");
+       return err;
+}
+
 static const struct net_device_ops gve_netdev_ops = {
-       .ndo_start_xmit         =       gve_tx,
+       .ndo_start_xmit         =       gve_start_xmit,
        .ndo_open               =       gve_open,
        .ndo_stop               =       gve_close,
        .ndo_get_stats64        =       gve_get_stats,
        .ndo_tx_timeout         =       gve_tx_timeout,
+       .ndo_set_features       =       gve_set_features,
 };
 
 static void gve_handle_status(struct gve_priv *priv, u32 status)
@@ -1019,6 +1149,15 @@ void gve_handle_report_stats(struct gve_priv *priv)
        /* tx stats */
        if (priv->tx) {
                for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
+                       u32 last_completion = 0;
+                       u32 tx_frames = 0;
+
+                       /* DQO doesn't currently support these metrics. */
+                       if (gve_is_gqi(priv)) {
+                               last_completion = priv->tx[idx].done;
+                               tx_frames = priv->tx[idx].req;
+                       }
+
                        do {
                                start = u64_stats_fetch_begin(&priv->tx[idx].statss);
                                tx_bytes = priv->tx[idx].bytes_done;
@@ -1035,7 +1174,7 @@ void gve_handle_report_stats(struct gve_priv *priv)
                        };
                        stats[stats_idx++] = (struct stats) {
                                .stat_name = cpu_to_be32(TX_FRAMES_SENT),
-                               .value = cpu_to_be64(priv->tx[idx].req),
+                               .value = cpu_to_be64(tx_frames),
                                .queue_id = cpu_to_be32(idx),
                        };
                        stats[stats_idx++] = (struct stats) {
@@ -1045,7 +1184,7 @@ void gve_handle_report_stats(struct gve_priv *priv)
                        };
                        stats[stats_idx++] = (struct stats) {
                                .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED),
-                               .value = cpu_to_be64(priv->tx[idx].done),
+                               .value = cpu_to_be64(last_completion),
                                .queue_id = cpu_to_be32(idx),
                        };
                }
@@ -1121,7 +1260,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
                        "Could not get device information: err=%d\n", err);
                goto err;
        }
-       if (priv->dev->max_mtu > PAGE_SIZE) {
+       if (gve_is_gqi(priv) && priv->dev->max_mtu > PAGE_SIZE) {
                priv->dev->max_mtu = PAGE_SIZE;
                err = gve_adminq_set_mtu(priv, priv->dev->mtu);
                if (err) {
@@ -1332,7 +1471,12 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        pci_set_drvdata(pdev, dev);
        dev->ethtool_ops = &gve_ethtool_ops;
        dev->netdev_ops = &gve_netdev_ops;
-       /* advertise features */
+
+       /* Set default and supported features.
+        *
+        * Features might be set in other locations as well (such as
+        * `gve_adminq_describe_device`).
+        */
        dev->hw_features = NETIF_F_HIGHDMA;
        dev->hw_features |= NETIF_F_SG;
        dev->hw_features |= NETIF_F_HW_CSUM;
diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
new file mode 100644 (file)
index 0000000..808e097
--- /dev/null
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2021 Google, Inc.
+ */
+
+#include "gve.h"
+#include "gve_dqo.h"
+#include "gve_adminq.h"
+#include "gve_utils.h"
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <net/ip6_checksum.h>
+#include <net/ipv6.h>
+#include <net/tcp.h>
+
+int gve_rx_poll_dqo(struct gve_notify_block *block, int budget)
+{
+       u32 work_done = 0;
+
+       return work_done;
+}
diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c
new file mode 100644 (file)
index 0000000..4b3319a
--- /dev/null
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2021 Google, Inc.
+ */
+
+#include "gve.h"
+#include "gve_adminq.h"
+#include "gve_utils.h"
+#include "gve_dqo.h"
+#include <linux/tcp.h>
+#include <linux/slab.h>
+#include <linux/skbuff.h>
+
+netdev_tx_t gve_tx_dqo(struct sk_buff *skb, struct net_device *dev)
+{
+       return NETDEV_TX_OK;
+}
+
+bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean)
+{
+       return false;
+}