1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
4 * Copyright (C) 2015-2019 Google, Inc.
7 #include <linux/cpumask.h>
8 #include <linux/etherdevice.h>
9 #include <linux/interrupt.h>
10 #include <linux/module.h>
11 #include <linux/pci.h>
12 #include <linux/sched.h>
13 #include <linux/timer.h>
14 #include <linux/workqueue.h>
15 #include <net/sch_generic.h>
17 #include "gve_adminq.h"
18 #include "gve_register.h"
20 #define GVE_DEFAULT_RX_COPYBREAK (256)
22 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK)
23 #define GVE_VERSION "1.0.0"
24 #define GVE_VERSION_PREFIX "GVE-"
26 const char gve_version_str[] = GVE_VERSION;
27 static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
29 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
31 struct gve_priv *priv = netdev_priv(dev);
36 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
39 u64_stats_fetch_begin(&priv->rx[ring].statss);
40 s->rx_packets += priv->rx[ring].rpackets;
41 s->rx_bytes += priv->rx[ring].rbytes;
42 } while (u64_stats_fetch_retry(&priv->rx[ring].statss,
47 for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) {
50 u64_stats_fetch_begin(&priv->tx[ring].statss);
51 s->tx_packets += priv->tx[ring].pkt_done;
52 s->tx_bytes += priv->tx[ring].bytes_done;
53 } while (u64_stats_fetch_retry(&priv->tx[ring].statss,
59 static int gve_alloc_counter_array(struct gve_priv *priv)
62 dma_alloc_coherent(&priv->pdev->dev,
63 priv->num_event_counters *
64 sizeof(*priv->counter_array),
65 &priv->counter_array_bus, GFP_KERNEL);
66 if (!priv->counter_array)
72 static void gve_free_counter_array(struct gve_priv *priv)
74 dma_free_coherent(&priv->pdev->dev,
75 priv->num_event_counters *
76 sizeof(*priv->counter_array),
77 priv->counter_array, priv->counter_array_bus);
78 priv->counter_array = NULL;
81 static irqreturn_t gve_mgmnt_intr(int irq, void *arg)
83 struct gve_priv *priv = arg;
85 queue_work(priv->gve_wq, &priv->service_task);
89 static irqreturn_t gve_intr(int irq, void *arg)
91 struct gve_notify_block *block = arg;
92 struct gve_priv *priv = block->priv;
94 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
95 napi_schedule_irqoff(&block->napi);
99 static int gve_napi_poll(struct napi_struct *napi, int budget)
101 struct gve_notify_block *block;
102 __be32 __iomem *irq_doorbell;
103 bool reschedule = false;
104 struct gve_priv *priv;
106 block = container_of(napi, struct gve_notify_block, napi);
110 reschedule |= gve_tx_poll(block, budget);
112 reschedule |= gve_rx_poll(block, budget);
118 irq_doorbell = gve_irq_doorbell(priv, block);
119 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell);
121 /* Double check we have no extra work.
122 * Ensure unmask synchronizes with checking for work.
126 reschedule |= gve_tx_poll(block, -1);
128 reschedule |= gve_rx_poll(block, -1);
129 if (reschedule && napi_reschedule(napi))
130 iowrite32be(GVE_IRQ_MASK, irq_doorbell);
135 static int gve_alloc_notify_blocks(struct gve_priv *priv)
137 int num_vecs_requested = priv->num_ntfy_blks + 1;
138 char *name = priv->dev->name;
139 unsigned int active_cpus;
144 priv->msix_vectors = kvzalloc(num_vecs_requested *
145 sizeof(*priv->msix_vectors), GFP_KERNEL);
146 if (!priv->msix_vectors)
148 for (i = 0; i < num_vecs_requested; i++)
149 priv->msix_vectors[i].entry = i;
150 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors,
151 GVE_MIN_MSIX, num_vecs_requested);
152 if (vecs_enabled < 0) {
153 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n",
154 GVE_MIN_MSIX, vecs_enabled);
156 goto abort_with_msix_vectors;
158 if (vecs_enabled != num_vecs_requested) {
159 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1;
160 int vecs_per_type = new_num_ntfy_blks / 2;
161 int vecs_left = new_num_ntfy_blks % 2;
163 priv->num_ntfy_blks = new_num_ntfy_blks;
164 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues,
166 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues,
167 vecs_per_type + vecs_left);
168 dev_err(&priv->pdev->dev,
169 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n",
170 vecs_enabled, priv->tx_cfg.max_queues,
171 priv->rx_cfg.max_queues);
172 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)
173 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
174 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues)
175 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
177 /* Half the notification blocks go to TX and half to RX */
178 active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus());
180 /* Setup Management Vector - the last vector */
181 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "%s-mgmnt",
183 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector,
184 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv);
186 dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
187 goto abort_with_msix_enabled;
190 dma_alloc_coherent(&priv->pdev->dev,
191 priv->num_ntfy_blks *
192 sizeof(*priv->ntfy_blocks),
193 &priv->ntfy_block_bus, GFP_KERNEL);
194 if (!priv->ntfy_blocks) {
196 goto abort_with_mgmt_vector;
198 /* Setup the other blocks - the first n-1 vectors */
199 for (i = 0; i < priv->num_ntfy_blks; i++) {
200 struct gve_notify_block *block = &priv->ntfy_blocks[i];
203 snprintf(block->name, sizeof(block->name), "%s-ntfy-block.%d",
206 err = request_irq(priv->msix_vectors[msix_idx].vector,
207 gve_intr, 0, block->name, block);
209 dev_err(&priv->pdev->dev,
210 "Failed to receive msix vector %d\n", i);
211 goto abort_with_some_ntfy_blocks;
213 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
214 get_cpu_mask(i % active_cpus));
217 abort_with_some_ntfy_blocks:
218 for (j = 0; j < i; j++) {
219 struct gve_notify_block *block = &priv->ntfy_blocks[j];
222 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
224 free_irq(priv->msix_vectors[msix_idx].vector, block);
226 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
227 sizeof(*priv->ntfy_blocks),
228 priv->ntfy_blocks, priv->ntfy_block_bus);
229 priv->ntfy_blocks = NULL;
230 abort_with_mgmt_vector:
231 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
232 abort_with_msix_enabled:
233 pci_disable_msix(priv->pdev);
234 abort_with_msix_vectors:
235 kvfree(priv->msix_vectors);
236 priv->msix_vectors = NULL;
240 static void gve_free_notify_blocks(struct gve_priv *priv)
245 for (i = 0; i < priv->num_ntfy_blks; i++) {
246 struct gve_notify_block *block = &priv->ntfy_blocks[i];
249 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
251 free_irq(priv->msix_vectors[msix_idx].vector, block);
253 dma_free_coherent(&priv->pdev->dev,
254 priv->num_ntfy_blks * sizeof(*priv->ntfy_blocks),
255 priv->ntfy_blocks, priv->ntfy_block_bus);
256 priv->ntfy_blocks = NULL;
257 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
258 pci_disable_msix(priv->pdev);
259 kvfree(priv->msix_vectors);
260 priv->msix_vectors = NULL;
263 static int gve_setup_device_resources(struct gve_priv *priv)
267 err = gve_alloc_counter_array(priv);
270 err = gve_alloc_notify_blocks(priv);
272 goto abort_with_counter;
273 err = gve_adminq_configure_device_resources(priv,
274 priv->counter_array_bus,
275 priv->num_event_counters,
276 priv->ntfy_block_bus,
277 priv->num_ntfy_blks);
279 dev_err(&priv->pdev->dev,
280 "could not setup device_resources: err=%d\n", err);
282 goto abort_with_ntfy_blocks;
284 gve_set_device_resources_ok(priv);
286 abort_with_ntfy_blocks:
287 gve_free_notify_blocks(priv);
289 gve_free_counter_array(priv);
293 static void gve_trigger_reset(struct gve_priv *priv);
295 static void gve_teardown_device_resources(struct gve_priv *priv)
299 /* Tell device its resources are being freed */
300 if (gve_get_device_resources_ok(priv)) {
301 err = gve_adminq_deconfigure_device_resources(priv);
303 dev_err(&priv->pdev->dev,
304 "Could not deconfigure device resources: err=%d\n",
306 gve_trigger_reset(priv);
309 gve_free_counter_array(priv);
310 gve_free_notify_blocks(priv);
311 gve_clear_device_resources_ok(priv);
314 static void gve_add_napi(struct gve_priv *priv, int ntfy_idx)
316 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
318 netif_napi_add(priv->dev, &block->napi, gve_napi_poll,
322 static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx)
324 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
326 netif_napi_del(&block->napi);
329 static int gve_register_qpls(struct gve_priv *priv)
331 int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
335 for (i = 0; i < num_qpls; i++) {
336 err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
338 netif_err(priv, drv, priv->dev,
339 "failed to register queue page list %d\n",
341 /* This failure will trigger a reset - no need to clean
350 static int gve_unregister_qpls(struct gve_priv *priv)
352 int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
356 for (i = 0; i < num_qpls; i++) {
357 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
358 /* This failure will trigger a reset - no need to clean up */
360 netif_err(priv, drv, priv->dev,
361 "Failed to unregister queue page list %d\n",
369 static int gve_create_rings(struct gve_priv *priv)
374 for (i = 0; i < priv->tx_cfg.num_queues; i++) {
375 err = gve_adminq_create_tx_queue(priv, i);
377 netif_err(priv, drv, priv->dev, "failed to create tx queue %d\n",
379 /* This failure will trigger a reset - no need to clean
384 netif_dbg(priv, drv, priv->dev, "created tx queue %d\n", i);
386 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
387 err = gve_adminq_create_rx_queue(priv, i);
389 netif_err(priv, drv, priv->dev, "failed to create rx queue %d\n",
391 /* This failure will trigger a reset - no need to clean
396 /* Rx data ring has been prefilled with packet buffers at
397 * queue allocation time.
398 * Write the doorbell to provide descriptor slots and packet
399 * buffers to the NIC.
401 gve_rx_write_doorbell(priv, &priv->rx[i]);
402 netif_dbg(priv, drv, priv->dev, "created rx queue %d\n", i);
408 static int gve_alloc_rings(struct gve_priv *priv)
415 priv->tx = kvzalloc(priv->tx_cfg.num_queues * sizeof(*priv->tx),
419 err = gve_tx_alloc_rings(priv);
423 priv->rx = kvzalloc(priv->rx_cfg.num_queues * sizeof(*priv->rx),
429 err = gve_rx_alloc_rings(priv);
432 /* Add tx napi & init sync stats*/
433 for (i = 0; i < priv->tx_cfg.num_queues; i++) {
434 u64_stats_init(&priv->tx[i].statss);
435 ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
436 gve_add_napi(priv, ntfy_idx);
438 /* Add rx napi & init sync stats*/
439 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
440 u64_stats_init(&priv->rx[i].statss);
441 ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
442 gve_add_napi(priv, ntfy_idx);
451 gve_tx_free_rings(priv);
458 static int gve_destroy_rings(struct gve_priv *priv)
463 for (i = 0; i < priv->tx_cfg.num_queues; i++) {
464 err = gve_adminq_destroy_tx_queue(priv, i);
466 netif_err(priv, drv, priv->dev,
467 "failed to destroy tx queue %d\n",
469 /* This failure will trigger a reset - no need to clean
474 netif_dbg(priv, drv, priv->dev, "destroyed tx queue %d\n", i);
476 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
477 err = gve_adminq_destroy_rx_queue(priv, i);
479 netif_err(priv, drv, priv->dev,
480 "failed to destroy rx queue %d\n",
482 /* This failure will trigger a reset - no need to clean
487 netif_dbg(priv, drv, priv->dev, "destroyed rx queue %d\n", i);
492 static void gve_free_rings(struct gve_priv *priv)
498 for (i = 0; i < priv->tx_cfg.num_queues; i++) {
499 ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
500 gve_remove_napi(priv, ntfy_idx);
502 gve_tx_free_rings(priv);
507 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
508 ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
509 gve_remove_napi(priv, ntfy_idx);
511 gve_rx_free_rings(priv);
517 int gve_alloc_page(struct gve_priv *priv, struct device *dev,
518 struct page **page, dma_addr_t *dma,
519 enum dma_data_direction dir)
521 *page = alloc_page(GFP_KERNEL);
523 priv->page_alloc_fail++;
526 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
527 if (dma_mapping_error(dev, *dma)) {
528 priv->dma_mapping_error++;
535 static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id,
538 struct gve_queue_page_list *qpl = &priv->qpls[id];
542 if (pages + priv->num_registered_pages > priv->max_registered_pages) {
543 netif_err(priv, drv, priv->dev,
544 "Reached max number of registered pages %llu > %llu\n",
545 pages + priv->num_registered_pages,
546 priv->max_registered_pages);
551 qpl->num_entries = 0;
552 qpl->pages = kvzalloc(pages * sizeof(*qpl->pages), GFP_KERNEL);
553 /* caller handles clean up */
556 qpl->page_buses = kvzalloc(pages * sizeof(*qpl->page_buses),
558 /* caller handles clean up */
559 if (!qpl->page_buses)
562 for (i = 0; i < pages; i++) {
563 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i],
565 gve_qpl_dma_dir(priv, id));
566 /* caller handles clean up */
571 priv->num_registered_pages += pages;
576 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
577 enum dma_data_direction dir)
579 if (!dma_mapping_error(dev, dma))
580 dma_unmap_page(dev, dma, PAGE_SIZE, dir);
585 static void gve_free_queue_page_list(struct gve_priv *priv,
588 struct gve_queue_page_list *qpl = &priv->qpls[id];
593 if (!qpl->page_buses)
596 for (i = 0; i < qpl->num_entries; i++)
597 gve_free_page(&priv->pdev->dev, qpl->pages[i],
598 qpl->page_buses[i], gve_qpl_dma_dir(priv, id));
600 kvfree(qpl->page_buses);
603 priv->num_registered_pages -= qpl->num_entries;
606 static int gve_alloc_qpls(struct gve_priv *priv)
608 int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
612 priv->qpls = kvzalloc(num_qpls * sizeof(*priv->qpls), GFP_KERNEL);
616 for (i = 0; i < gve_num_tx_qpls(priv); i++) {
617 err = gve_alloc_queue_page_list(priv, i,
618 priv->tx_pages_per_qpl);
622 for (; i < num_qpls; i++) {
623 err = gve_alloc_queue_page_list(priv, i,
624 priv->rx_pages_per_qpl);
629 priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(num_qpls) *
630 sizeof(unsigned long) * BITS_PER_BYTE;
631 priv->qpl_cfg.qpl_id_map = kvzalloc(BITS_TO_LONGS(num_qpls) *
632 sizeof(unsigned long), GFP_KERNEL);
633 if (!priv->qpl_cfg.qpl_id_map) {
641 for (j = 0; j <= i; j++)
642 gve_free_queue_page_list(priv, j);
647 static void gve_free_qpls(struct gve_priv *priv)
649 int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
652 kvfree(priv->qpl_cfg.qpl_id_map);
654 for (i = 0; i < num_qpls; i++)
655 gve_free_queue_page_list(priv, i);
660 /* Use this to schedule a reset when the device is capable of continuing
661 * to handle other requests in its current state. If it is not, do a reset
664 void gve_schedule_reset(struct gve_priv *priv)
666 gve_set_do_reset(priv);
667 queue_work(priv->gve_wq, &priv->service_task);
670 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up);
671 static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
672 static void gve_turndown(struct gve_priv *priv);
673 static void gve_turnup(struct gve_priv *priv);
675 static int gve_open(struct net_device *dev)
677 struct gve_priv *priv = netdev_priv(dev);
680 err = gve_alloc_qpls(priv);
683 err = gve_alloc_rings(priv);
687 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues);
690 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues);
694 err = gve_register_qpls(priv);
697 err = gve_create_rings(priv);
700 gve_set_device_rings_ok(priv);
703 netif_carrier_on(dev);
704 priv->interface_up_cnt++;
708 gve_free_rings(priv);
714 /* This must have been called from a reset due to the rtnl lock
715 * so just return at this point.
717 if (gve_get_reset_in_progress(priv))
719 /* Otherwise reset before returning */
720 gve_reset_and_teardown(priv, true);
721 /* if this fails there is nothing we can do so just ignore the return */
722 gve_reset_recovery(priv, false);
723 /* return the original error */
727 static int gve_close(struct net_device *dev)
729 struct gve_priv *priv = netdev_priv(dev);
732 netif_carrier_off(dev);
733 if (gve_get_device_rings_ok(priv)) {
735 err = gve_destroy_rings(priv);
738 err = gve_unregister_qpls(priv);
741 gve_clear_device_rings_ok(priv);
744 gve_free_rings(priv);
746 priv->interface_down_cnt++;
750 /* This must have been called from a reset due to the rtnl lock
751 * so just return at this point.
753 if (gve_get_reset_in_progress(priv))
755 /* Otherwise reset before returning */
756 gve_reset_and_teardown(priv, true);
757 return gve_reset_recovery(priv, false);
760 int gve_adjust_queues(struct gve_priv *priv,
761 struct gve_queue_config new_rx_config,
762 struct gve_queue_config new_tx_config)
766 if (netif_carrier_ok(priv->dev)) {
767 /* To make this process as simple as possible we teardown the
768 * device, set the new configuration, and then bring the device
771 err = gve_close(priv->dev);
772 /* we have already tried to reset in close,
773 * just fail at this point
777 priv->tx_cfg = new_tx_config;
778 priv->rx_cfg = new_rx_config;
780 err = gve_open(priv->dev);
786 /* Set the config for the next up. */
787 priv->tx_cfg = new_tx_config;
788 priv->rx_cfg = new_rx_config;
792 netif_err(priv, drv, priv->dev,
793 "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n");
798 static void gve_turndown(struct gve_priv *priv)
802 if (netif_carrier_ok(priv->dev))
803 netif_carrier_off(priv->dev);
805 if (!gve_get_napi_enabled(priv))
808 /* Disable napi to prevent more work from coming in */
809 for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
810 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
811 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
813 napi_disable(&block->napi);
815 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
816 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
817 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
819 napi_disable(&block->napi);
823 netif_tx_disable(priv->dev);
825 gve_clear_napi_enabled(priv);
828 static void gve_turnup(struct gve_priv *priv)
832 /* Start the tx queues */
833 netif_tx_start_all_queues(priv->dev);
835 /* Enable napi and unmask interrupts for all queues */
836 for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
837 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
838 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
840 napi_enable(&block->napi);
841 iowrite32be(0, gve_irq_doorbell(priv, block));
843 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
844 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
845 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
847 napi_enable(&block->napi);
848 iowrite32be(0, gve_irq_doorbell(priv, block));
851 gve_set_napi_enabled(priv);
854 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
856 struct gve_priv *priv = netdev_priv(dev);
858 gve_schedule_reset(priv);
859 priv->tx_timeo_cnt++;
862 static const struct net_device_ops gve_netdev_ops = {
863 .ndo_start_xmit = gve_tx,
864 .ndo_open = gve_open,
865 .ndo_stop = gve_close,
866 .ndo_get_stats64 = gve_get_stats,
867 .ndo_tx_timeout = gve_tx_timeout,
870 static void gve_handle_status(struct gve_priv *priv, u32 status)
872 if (GVE_DEVICE_STATUS_RESET_MASK & status) {
873 dev_info(&priv->pdev->dev, "Device requested reset.\n");
874 gve_set_do_reset(priv);
878 static void gve_handle_reset(struct gve_priv *priv)
880 /* A service task will be scheduled at the end of probe to catch any
881 * resets that need to happen, and we don't want to reset until
884 if (gve_get_probe_in_progress(priv))
887 if (gve_get_do_reset(priv)) {
889 gve_reset(priv, false);
894 /* Handle NIC status register changes and reset requests */
895 static void gve_service_task(struct work_struct *work)
897 struct gve_priv *priv = container_of(work, struct gve_priv,
900 gve_handle_status(priv,
901 ioread32be(&priv->reg_bar0->device_status));
903 gve_handle_reset(priv);
906 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
911 /* Set up the adminq */
912 err = gve_adminq_alloc(&priv->pdev->dev, priv);
914 dev_err(&priv->pdev->dev,
915 "Failed to alloc admin queue: err=%d\n", err);
919 if (skip_describe_device)
922 /* Get the initial information we need from the device */
923 err = gve_adminq_describe_device(priv);
925 dev_err(&priv->pdev->dev,
926 "Could not get device information: err=%d\n", err);
929 if (priv->dev->max_mtu > PAGE_SIZE) {
930 priv->dev->max_mtu = PAGE_SIZE;
931 err = gve_adminq_set_mtu(priv, priv->dev->mtu);
933 netif_err(priv, drv, priv->dev, "Could not set mtu");
937 priv->dev->mtu = priv->dev->max_mtu;
938 num_ntfy = pci_msix_vec_count(priv->pdev);
940 dev_err(&priv->pdev->dev,
941 "could not count MSI-x vectors: err=%d\n", num_ntfy);
944 } else if (num_ntfy < GVE_MIN_MSIX) {
945 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n",
946 GVE_MIN_MSIX, num_ntfy);
951 priv->num_registered_pages = 0;
952 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
953 /* gvnic has one Notification Block per MSI-x vector, except for the
956 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1;
957 priv->mgmt_msix_idx = priv->num_ntfy_blks;
959 priv->tx_cfg.max_queues =
960 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2);
961 priv->rx_cfg.max_queues =
962 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2);
964 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
965 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
966 if (priv->default_num_queues > 0) {
967 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues,
968 priv->tx_cfg.num_queues);
969 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues,
970 priv->rx_cfg.num_queues);
973 netif_info(priv, drv, priv->dev, "TX queues %d, RX queues %d\n",
974 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
975 netif_info(priv, drv, priv->dev, "Max TX queues %d, Max RX queues %d\n",
976 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
979 err = gve_setup_device_resources(priv);
983 gve_adminq_free(&priv->pdev->dev, priv);
987 static void gve_teardown_priv_resources(struct gve_priv *priv)
989 gve_teardown_device_resources(priv);
990 gve_adminq_free(&priv->pdev->dev, priv);
993 static void gve_trigger_reset(struct gve_priv *priv)
995 /* Reset the device by releasing the AQ */
996 gve_adminq_release(priv);
999 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up)
1001 gve_trigger_reset(priv);
1002 /* With the reset having already happened, close cannot fail */
1004 gve_close(priv->dev);
1005 gve_teardown_priv_resources(priv);
1008 static int gve_reset_recovery(struct gve_priv *priv, bool was_up)
1012 err = gve_init_priv(priv, true);
1016 err = gve_open(priv->dev);
1022 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n");
1027 int gve_reset(struct gve_priv *priv, bool attempt_teardown)
1029 bool was_up = netif_carrier_ok(priv->dev);
1032 dev_info(&priv->pdev->dev, "Performing reset\n");
1033 gve_clear_do_reset(priv);
1034 gve_set_reset_in_progress(priv);
1035 /* If we aren't attempting to teardown normally, just go turndown and
1038 if (!attempt_teardown) {
1040 gve_reset_and_teardown(priv, was_up);
1042 /* Otherwise attempt to close normally */
1044 err = gve_close(priv->dev);
1045 /* If that fails reset as we did above */
1047 gve_reset_and_teardown(priv, was_up);
1049 /* Clean up any remaining resources */
1050 gve_teardown_priv_resources(priv);
1053 /* Set it all back up */
1054 err = gve_reset_recovery(priv, was_up);
1055 gve_clear_reset_in_progress(priv);
1057 priv->interface_up_cnt = 0;
1058 priv->interface_down_cnt = 0;
1062 static void gve_write_version(u8 __iomem *driver_version_register)
1064 const char *c = gve_version_prefix;
1067 writeb(*c, driver_version_register);
1071 c = gve_version_str;
1073 writeb(*c, driver_version_register);
1076 writeb('\n', driver_version_register);
1079 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
1081 int max_tx_queues, max_rx_queues;
1082 struct net_device *dev;
1083 __be32 __iomem *db_bar;
1084 struct gve_registers __iomem *reg_bar;
1085 struct gve_priv *priv;
1088 err = pci_enable_device(pdev);
1092 err = pci_request_regions(pdev, "gvnic-cfg");
1094 goto abort_with_enabled;
1096 pci_set_master(pdev);
1098 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1100 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
1101 goto abort_with_pci_region;
1104 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1107 "Failed to set consistent dma mask: err=%d\n", err);
1108 goto abort_with_pci_region;
1111 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0);
1113 dev_err(&pdev->dev, "Failed to map pci bar!\n");
1115 goto abort_with_pci_region;
1118 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0);
1120 dev_err(&pdev->dev, "Failed to map doorbell bar!\n");
1122 goto abort_with_reg_bar;
1125 gve_write_version(®_bar->driver_version);
1126 /* Get max queues to alloc etherdev */
1127 max_rx_queues = ioread32be(®_bar->max_tx_queues);
1128 max_tx_queues = ioread32be(®_bar->max_rx_queues);
1129 /* Alloc and setup the netdev and priv */
1130 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues);
1132 dev_err(&pdev->dev, "could not allocate netdev\n");
1133 goto abort_with_db_bar;
1135 SET_NETDEV_DEV(dev, &pdev->dev);
1136 pci_set_drvdata(pdev, dev);
1137 dev->ethtool_ops = &gve_ethtool_ops;
1138 dev->netdev_ops = &gve_netdev_ops;
1139 /* advertise features */
1140 dev->hw_features = NETIF_F_HIGHDMA;
1141 dev->hw_features |= NETIF_F_SG;
1142 dev->hw_features |= NETIF_F_HW_CSUM;
1143 dev->hw_features |= NETIF_F_TSO;
1144 dev->hw_features |= NETIF_F_TSO6;
1145 dev->hw_features |= NETIF_F_TSO_ECN;
1146 dev->hw_features |= NETIF_F_RXCSUM;
1147 dev->hw_features |= NETIF_F_RXHASH;
1148 dev->features = dev->hw_features;
1149 dev->watchdog_timeo = 5 * HZ;
1150 dev->min_mtu = ETH_MIN_MTU;
1151 netif_carrier_off(dev);
1153 priv = netdev_priv(dev);
1156 priv->msg_enable = DEFAULT_MSG_LEVEL;
1157 priv->reg_bar0 = reg_bar;
1158 priv->db_bar2 = db_bar;
1159 priv->service_task_flags = 0x0;
1160 priv->state_flags = 0x0;
1162 gve_set_probe_in_progress(priv);
1163 priv->gve_wq = alloc_ordered_workqueue("gve", 0);
1164 if (!priv->gve_wq) {
1165 dev_err(&pdev->dev, "Could not allocate workqueue");
1167 goto abort_with_netdev;
1169 INIT_WORK(&priv->service_task, gve_service_task);
1170 priv->tx_cfg.max_queues = max_tx_queues;
1171 priv->rx_cfg.max_queues = max_rx_queues;
1173 err = gve_init_priv(priv, false);
1177 err = register_netdev(dev);
1181 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str);
1182 gve_clear_probe_in_progress(priv);
1183 queue_work(priv->gve_wq, &priv->service_task);
1187 destroy_workqueue(priv->gve_wq);
1193 pci_iounmap(pdev, db_bar);
1196 pci_iounmap(pdev, reg_bar);
1198 abort_with_pci_region:
1199 pci_release_regions(pdev);
1202 pci_disable_device(pdev);
1206 static void gve_remove(struct pci_dev *pdev)
1208 struct net_device *netdev = pci_get_drvdata(pdev);
1209 struct gve_priv *priv = netdev_priv(netdev);
1210 __be32 __iomem *db_bar = priv->db_bar2;
1211 void __iomem *reg_bar = priv->reg_bar0;
1213 unregister_netdev(netdev);
1214 gve_teardown_priv_resources(priv);
1215 destroy_workqueue(priv->gve_wq);
1216 free_netdev(netdev);
1217 pci_iounmap(pdev, db_bar);
1218 pci_iounmap(pdev, reg_bar);
1219 pci_release_regions(pdev);
1220 pci_disable_device(pdev);
1223 static const struct pci_device_id gve_id_table[] = {
1224 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) },
1228 static struct pci_driver gvnic_driver = {
1230 .id_table = gve_id_table,
1232 .remove = gve_remove,
1235 module_pci_driver(gvnic_driver);
1237 MODULE_DEVICE_TABLE(pci, gve_id_table);
1238 MODULE_AUTHOR("Google, Inc.");
1239 MODULE_DESCRIPTION("gVNIC Driver");
1240 MODULE_LICENSE("Dual MIT/GPL");
1241 MODULE_VERSION(GVE_VERSION);