Merge branches 'clk-range', 'clk-uniphier', 'clk-apple' and 'clk-qcom' into clk-next
[linux-2.6-microblaze.git] / drivers / net / ethernet / google / gve / gve_rx.c
1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3  *
4  * Copyright (C) 2015-2021 Google, Inc.
5  */
6
7 #include "gve.h"
8 #include "gve_adminq.h"
9 #include "gve_utils.h"
10 #include <linux/etherdevice.h>
11
12 static void gve_rx_free_buffer(struct device *dev,
13                                struct gve_rx_slot_page_info *page_info,
14                                union gve_rx_data_slot *data_slot)
15 {
16         dma_addr_t dma = (dma_addr_t)(be64_to_cpu(data_slot->addr) &
17                                       GVE_DATA_SLOT_ADDR_PAGE_MASK);
18
19         page_ref_sub(page_info->page, page_info->pagecnt_bias - 1);
20         gve_free_page(dev, page_info->page, dma, DMA_FROM_DEVICE);
21 }
22
23 static void gve_rx_unfill_pages(struct gve_priv *priv, struct gve_rx_ring *rx)
24 {
25         u32 slots = rx->mask + 1;
26         int i;
27
28         if (rx->data.raw_addressing) {
29                 for (i = 0; i < slots; i++)
30                         gve_rx_free_buffer(&priv->pdev->dev, &rx->data.page_info[i],
31                                            &rx->data.data_ring[i]);
32         } else {
33                 for (i = 0; i < slots; i++)
34                         page_ref_sub(rx->data.page_info[i].page,
35                                      rx->data.page_info[i].pagecnt_bias - 1);
36                 gve_unassign_qpl(priv, rx->data.qpl->id);
37                 rx->data.qpl = NULL;
38         }
39         kvfree(rx->data.page_info);
40         rx->data.page_info = NULL;
41 }
42
43 static void gve_rx_free_ring(struct gve_priv *priv, int idx)
44 {
45         struct gve_rx_ring *rx = &priv->rx[idx];
46         struct device *dev = &priv->pdev->dev;
47         u32 slots = rx->mask + 1;
48         size_t bytes;
49
50         gve_rx_remove_from_block(priv, idx);
51
52         bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
53         dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus);
54         rx->desc.desc_ring = NULL;
55
56         dma_free_coherent(dev, sizeof(*rx->q_resources),
57                           rx->q_resources, rx->q_resources_bus);
58         rx->q_resources = NULL;
59
60         gve_rx_unfill_pages(priv, rx);
61
62         bytes = sizeof(*rx->data.data_ring) * slots;
63         dma_free_coherent(dev, bytes, rx->data.data_ring,
64                           rx->data.data_bus);
65         rx->data.data_ring = NULL;
66         netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
67 }
68
69 static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info,
70                              dma_addr_t addr, struct page *page, __be64 *slot_addr)
71 {
72         page_info->page = page;
73         page_info->page_offset = 0;
74         page_info->page_address = page_address(page);
75         *slot_addr = cpu_to_be64(addr);
76         /* The page already has 1 ref */
77         page_ref_add(page, INT_MAX - 1);
78         page_info->pagecnt_bias = INT_MAX;
79 }
80
81 static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev,
82                                struct gve_rx_slot_page_info *page_info,
83                                union gve_rx_data_slot *data_slot)
84 {
85         struct page *page;
86         dma_addr_t dma;
87         int err;
88
89         err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE,
90                              GFP_ATOMIC);
91         if (err)
92                 return err;
93
94         gve_setup_rx_buffer(page_info, dma, page, &data_slot->addr);
95         return 0;
96 }
97
98 static int gve_prefill_rx_pages(struct gve_rx_ring *rx)
99 {
100         struct gve_priv *priv = rx->gve;
101         u32 slots;
102         int err;
103         int i;
104
105         /* Allocate one page per Rx queue slot. Each page is split into two
106          * packet buffers, when possible we "page flip" between the two.
107          */
108         slots = rx->mask + 1;
109
110         rx->data.page_info = kvzalloc(slots *
111                                       sizeof(*rx->data.page_info), GFP_KERNEL);
112         if (!rx->data.page_info)
113                 return -ENOMEM;
114
115         if (!rx->data.raw_addressing) {
116                 rx->data.qpl = gve_assign_rx_qpl(priv);
117                 if (!rx->data.qpl) {
118                         kvfree(rx->data.page_info);
119                         rx->data.page_info = NULL;
120                         return -ENOMEM;
121                 }
122         }
123         for (i = 0; i < slots; i++) {
124                 if (!rx->data.raw_addressing) {
125                         struct page *page = rx->data.qpl->pages[i];
126                         dma_addr_t addr = i * PAGE_SIZE;
127
128                         gve_setup_rx_buffer(&rx->data.page_info[i], addr, page,
129                                             &rx->data.data_ring[i].qpl_offset);
130                         continue;
131                 }
132                 err = gve_rx_alloc_buffer(priv, &priv->pdev->dev, &rx->data.page_info[i],
133                                           &rx->data.data_ring[i]);
134                 if (err)
135                         goto alloc_err;
136         }
137
138         return slots;
139 alloc_err:
140         while (i--)
141                 gve_rx_free_buffer(&priv->pdev->dev,
142                                    &rx->data.page_info[i],
143                                    &rx->data.data_ring[i]);
144         return err;
145 }
146
147 static void gve_rx_ctx_clear(struct gve_rx_ctx *ctx)
148 {
149         ctx->curr_frag_cnt = 0;
150         ctx->total_expected_size = 0;
151         ctx->expected_frag_cnt = 0;
152         ctx->skb_head = NULL;
153         ctx->skb_tail = NULL;
154         ctx->reuse_frags = false;
155 }
156
157 static int gve_rx_alloc_ring(struct gve_priv *priv, int idx)
158 {
159         struct gve_rx_ring *rx = &priv->rx[idx];
160         struct device *hdev = &priv->pdev->dev;
161         u32 slots, npages;
162         int filled_pages;
163         size_t bytes;
164         int err;
165
166         netif_dbg(priv, drv, priv->dev, "allocating rx ring\n");
167         /* Make sure everything is zeroed to start with */
168         memset(rx, 0, sizeof(*rx));
169
170         rx->gve = priv;
171         rx->q_num = idx;
172
173         slots = priv->rx_data_slot_cnt;
174         rx->mask = slots - 1;
175         rx->data.raw_addressing = priv->queue_format == GVE_GQI_RDA_FORMAT;
176
177         /* alloc rx data ring */
178         bytes = sizeof(*rx->data.data_ring) * slots;
179         rx->data.data_ring = dma_alloc_coherent(hdev, bytes,
180                                                 &rx->data.data_bus,
181                                                 GFP_KERNEL);
182         if (!rx->data.data_ring)
183                 return -ENOMEM;
184         filled_pages = gve_prefill_rx_pages(rx);
185         if (filled_pages < 0) {
186                 err = -ENOMEM;
187                 goto abort_with_slots;
188         }
189         rx->fill_cnt = filled_pages;
190         /* Ensure data ring slots (packet buffers) are visible. */
191         dma_wmb();
192
193         /* Alloc gve_queue_resources */
194         rx->q_resources =
195                 dma_alloc_coherent(hdev,
196                                    sizeof(*rx->q_resources),
197                                    &rx->q_resources_bus,
198                                    GFP_KERNEL);
199         if (!rx->q_resources) {
200                 err = -ENOMEM;
201                 goto abort_filled;
202         }
203         netif_dbg(priv, drv, priv->dev, "rx[%d]->data.data_bus=%lx\n", idx,
204                   (unsigned long)rx->data.data_bus);
205
206         /* alloc rx desc ring */
207         bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
208         npages = bytes / PAGE_SIZE;
209         if (npages * PAGE_SIZE != bytes) {
210                 err = -EIO;
211                 goto abort_with_q_resources;
212         }
213
214         rx->desc.desc_ring = dma_alloc_coherent(hdev, bytes, &rx->desc.bus,
215                                                 GFP_KERNEL);
216         if (!rx->desc.desc_ring) {
217                 err = -ENOMEM;
218                 goto abort_with_q_resources;
219         }
220         rx->cnt = 0;
221         rx->db_threshold = priv->rx_desc_cnt / 2;
222         rx->desc.seqno = 1;
223
224         /* Allocating half-page buffers allows page-flipping which is faster
225          * than copying or allocating new pages.
226          */
227         rx->packet_buffer_size = PAGE_SIZE / 2;
228         gve_rx_ctx_clear(&rx->ctx);
229         gve_rx_add_to_block(priv, idx);
230
231         return 0;
232
233 abort_with_q_resources:
234         dma_free_coherent(hdev, sizeof(*rx->q_resources),
235                           rx->q_resources, rx->q_resources_bus);
236         rx->q_resources = NULL;
237 abort_filled:
238         gve_rx_unfill_pages(priv, rx);
239 abort_with_slots:
240         bytes = sizeof(*rx->data.data_ring) * slots;
241         dma_free_coherent(hdev, bytes, rx->data.data_ring, rx->data.data_bus);
242         rx->data.data_ring = NULL;
243
244         return err;
245 }
246
247 int gve_rx_alloc_rings(struct gve_priv *priv)
248 {
249         int err = 0;
250         int i;
251
252         for (i = 0; i < priv->rx_cfg.num_queues; i++) {
253                 err = gve_rx_alloc_ring(priv, i);
254                 if (err) {
255                         netif_err(priv, drv, priv->dev,
256                                   "Failed to alloc rx ring=%d: err=%d\n",
257                                   i, err);
258                         break;
259                 }
260         }
261         /* Unallocate if there was an error */
262         if (err) {
263                 int j;
264
265                 for (j = 0; j < i; j++)
266                         gve_rx_free_ring(priv, j);
267         }
268         return err;
269 }
270
271 void gve_rx_free_rings_gqi(struct gve_priv *priv)
272 {
273         int i;
274
275         for (i = 0; i < priv->rx_cfg.num_queues; i++)
276                 gve_rx_free_ring(priv, i);
277 }
278
279 void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx)
280 {
281         u32 db_idx = be32_to_cpu(rx->q_resources->db_index);
282
283         iowrite32be(rx->fill_cnt, &priv->db_bar2[db_idx]);
284 }
285
286 static enum pkt_hash_types gve_rss_type(__be16 pkt_flags)
287 {
288         if (likely(pkt_flags & (GVE_RXF_TCP | GVE_RXF_UDP)))
289                 return PKT_HASH_TYPE_L4;
290         if (pkt_flags & (GVE_RXF_IPV4 | GVE_RXF_IPV6))
291                 return PKT_HASH_TYPE_L3;
292         return PKT_HASH_TYPE_L2;
293 }
294
295 static u16 gve_rx_ctx_padding(struct gve_rx_ctx *ctx)
296 {
297         return (ctx->curr_frag_cnt == 0) ? GVE_RX_PAD : 0;
298 }
299
300 static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi,
301                                         struct gve_rx_slot_page_info *page_info,
302                                         u16 packet_buffer_size, u16 len,
303                                         struct gve_rx_ctx *ctx)
304 {
305         u32 offset = page_info->page_offset +  gve_rx_ctx_padding(ctx);
306         struct sk_buff *skb;
307
308         if (!ctx->skb_head)
309                 ctx->skb_head = napi_get_frags(napi);
310
311         if (unlikely(!ctx->skb_head))
312                 return NULL;
313
314         skb = ctx->skb_head;
315         skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page_info->page,
316                         offset, len, packet_buffer_size);
317
318         return skb;
319 }
320
321 static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, __be64 *slot_addr)
322 {
323         const __be64 offset = cpu_to_be64(PAGE_SIZE / 2);
324
325         /* "flip" to other packet buffer on this page */
326         page_info->page_offset ^= PAGE_SIZE / 2;
327         *(slot_addr) ^= offset;
328 }
329
330 static int gve_rx_can_recycle_buffer(struct gve_rx_slot_page_info *page_info)
331 {
332         int pagecount = page_count(page_info->page);
333
334         /* This page is not being used by any SKBs - reuse */
335         if (pagecount == page_info->pagecnt_bias)
336                 return 1;
337         /* This page is still being used by an SKB - we can't reuse */
338         else if (pagecount > page_info->pagecnt_bias)
339                 return 0;
340         WARN(pagecount < page_info->pagecnt_bias,
341              "Pagecount should never be less than the bias.");
342         return -1;
343 }
344
345 static struct sk_buff *
346 gve_rx_raw_addressing(struct device *dev, struct net_device *netdev,
347                       struct gve_rx_slot_page_info *page_info, u16 len,
348                       struct napi_struct *napi,
349                       union gve_rx_data_slot *data_slot,
350                       u16 packet_buffer_size, struct gve_rx_ctx *ctx)
351 {
352         struct sk_buff *skb = gve_rx_add_frags(napi, page_info, packet_buffer_size, len, ctx);
353
354         if (!skb)
355                 return NULL;
356
357         /* Optimistically stop the kernel from freeing the page.
358          * We will check again in refill to determine if we need to alloc a
359          * new page.
360          */
361         gve_dec_pagecnt_bias(page_info);
362
363         return skb;
364 }
365
366 static struct sk_buff *
367 gve_rx_qpl(struct device *dev, struct net_device *netdev,
368            struct gve_rx_ring *rx, struct gve_rx_slot_page_info *page_info,
369            u16 len, struct napi_struct *napi,
370            union gve_rx_data_slot *data_slot)
371 {
372         struct gve_rx_ctx *ctx = &rx->ctx;
373         struct sk_buff *skb;
374
375         /* if raw_addressing mode is not enabled gvnic can only receive into
376          * registered segments. If the buffer can't be recycled, our only
377          * choice is to copy the data out of it so that we can return it to the
378          * device.
379          */
380         if (ctx->reuse_frags) {
381                 skb = gve_rx_add_frags(napi, page_info, rx->packet_buffer_size, len, ctx);
382                 /* No point in recycling if we didn't get the skb */
383                 if (skb) {
384                         /* Make sure that the page isn't freed. */
385                         gve_dec_pagecnt_bias(page_info);
386                         gve_rx_flip_buff(page_info, &data_slot->qpl_offset);
387                 }
388         } else {
389                 const u16 padding = gve_rx_ctx_padding(ctx);
390
391                 skb = gve_rx_copy(netdev, napi, page_info, len, padding, ctx);
392                 if (skb) {
393                         u64_stats_update_begin(&rx->statss);
394                         rx->rx_frag_copy_cnt++;
395                         u64_stats_update_end(&rx->statss);
396                 }
397         }
398         return skb;
399 }
400
401 #define GVE_PKTCONT_BIT_IS_SET(x) (GVE_RXF_PKT_CONT & (x))
402 static u16 gve_rx_get_fragment_size(struct gve_rx_ctx *ctx, struct gve_rx_desc *desc)
403 {
404         return be16_to_cpu(desc->len) - gve_rx_ctx_padding(ctx);
405 }
406
407 static bool gve_rx_ctx_init(struct gve_rx_ctx *ctx, struct gve_rx_ring *rx)
408 {
409         bool qpl_mode = !rx->data.raw_addressing, packet_size_error = false;
410         bool buffer_error = false, desc_error = false, seqno_error = false;
411         struct gve_rx_slot_page_info *page_info;
412         struct gve_priv *priv = rx->gve;
413         u32 idx = rx->cnt & rx->mask;
414         bool reuse_frags, can_flip;
415         struct gve_rx_desc *desc;
416         u16 packet_size = 0;
417         u16 n_frags = 0;
418         int recycle;
419
420         /** In QPL mode, we only flip buffers when all buffers containing the packet
421          * can be flipped. RDA can_flip decisions will be made later, per frag.
422          */
423         can_flip = qpl_mode;
424         reuse_frags = can_flip;
425         do {
426                 u16 frag_size;
427
428                 n_frags++;
429                 desc = &rx->desc.desc_ring[idx];
430                 desc_error = unlikely(desc->flags_seq & GVE_RXF_ERR) || desc_error;
431                 if (GVE_SEQNO(desc->flags_seq) != rx->desc.seqno) {
432                         seqno_error = true;
433                         netdev_warn(priv->dev,
434                                     "RX seqno error: want=%d, got=%d, dropping packet and scheduling reset.",
435                                     rx->desc.seqno, GVE_SEQNO(desc->flags_seq));
436                 }
437                 frag_size = be16_to_cpu(desc->len);
438                 packet_size += frag_size;
439                 if (frag_size > rx->packet_buffer_size) {
440                         packet_size_error = true;
441                         netdev_warn(priv->dev,
442                                     "RX fragment error: packet_buffer_size=%d, frag_size=%d, droping packet.",
443                                     rx->packet_buffer_size, be16_to_cpu(desc->len));
444                 }
445                 page_info = &rx->data.page_info[idx];
446                 if (can_flip) {
447                         recycle = gve_rx_can_recycle_buffer(page_info);
448                         reuse_frags = reuse_frags && recycle > 0;
449                         buffer_error = buffer_error || unlikely(recycle < 0);
450                 }
451                 idx = (idx + 1) & rx->mask;
452                 rx->desc.seqno = gve_next_seqno(rx->desc.seqno);
453         } while (GVE_PKTCONT_BIT_IS_SET(desc->flags_seq));
454
455         prefetch(rx->desc.desc_ring + idx);
456
457         ctx->curr_frag_cnt = 0;
458         ctx->total_expected_size = packet_size - GVE_RX_PAD;
459         ctx->expected_frag_cnt = n_frags;
460         ctx->skb_head = NULL;
461         ctx->reuse_frags = reuse_frags;
462
463         if (ctx->expected_frag_cnt > 1) {
464                 u64_stats_update_begin(&rx->statss);
465                 rx->rx_cont_packet_cnt++;
466                 u64_stats_update_end(&rx->statss);
467         }
468         if (ctx->total_expected_size > priv->rx_copybreak && !ctx->reuse_frags && qpl_mode) {
469                 u64_stats_update_begin(&rx->statss);
470                 rx->rx_copied_pkt++;
471                 u64_stats_update_end(&rx->statss);
472         }
473
474         if (unlikely(buffer_error || seqno_error || packet_size_error)) {
475                 gve_schedule_reset(priv);
476                 return false;
477         }
478
479         if (unlikely(desc_error)) {
480                 u64_stats_update_begin(&rx->statss);
481                 rx->rx_desc_err_dropped_pkt++;
482                 u64_stats_update_end(&rx->statss);
483                 return false;
484         }
485         return true;
486 }
487
488 static struct sk_buff *gve_rx_skb(struct gve_priv *priv, struct gve_rx_ring *rx,
489                                   struct gve_rx_slot_page_info *page_info, struct napi_struct *napi,
490                                   u16 len, union gve_rx_data_slot *data_slot)
491 {
492         struct net_device *netdev = priv->dev;
493         struct gve_rx_ctx *ctx = &rx->ctx;
494         struct sk_buff *skb = NULL;
495
496         if (len <= priv->rx_copybreak && ctx->expected_frag_cnt == 1) {
497                 /* Just copy small packets */
498                 skb = gve_rx_copy(netdev, napi, page_info, len, GVE_RX_PAD, ctx);
499                 if (skb) {
500                         u64_stats_update_begin(&rx->statss);
501                         rx->rx_copied_pkt++;
502                         rx->rx_frag_copy_cnt++;
503                         rx->rx_copybreak_pkt++;
504                         u64_stats_update_end(&rx->statss);
505                 }
506         } else {
507                 if (rx->data.raw_addressing) {
508                         int recycle = gve_rx_can_recycle_buffer(page_info);
509
510                         if (unlikely(recycle < 0)) {
511                                 gve_schedule_reset(priv);
512                                 return NULL;
513                         }
514                         page_info->can_flip = recycle;
515                         if (page_info->can_flip) {
516                                 u64_stats_update_begin(&rx->statss);
517                                 rx->rx_frag_flip_cnt++;
518                                 u64_stats_update_end(&rx->statss);
519                         }
520                         skb = gve_rx_raw_addressing(&priv->pdev->dev, netdev,
521                                                     page_info, len, napi,
522                                                     data_slot,
523                                                     rx->packet_buffer_size, ctx);
524                 } else {
525                         if (ctx->reuse_frags) {
526                                 u64_stats_update_begin(&rx->statss);
527                                 rx->rx_frag_flip_cnt++;
528                                 u64_stats_update_end(&rx->statss);
529                         }
530                         skb = gve_rx_qpl(&priv->pdev->dev, netdev, rx,
531                                          page_info, len, napi, data_slot);
532                 }
533         }
534         return skb;
535 }
536
537 static bool gve_rx(struct gve_rx_ring *rx, netdev_features_t feat,
538                    u64 *packet_size_bytes, u32 *work_done)
539 {
540         struct gve_rx_slot_page_info *page_info;
541         struct gve_rx_ctx *ctx = &rx->ctx;
542         union gve_rx_data_slot *data_slot;
543         struct gve_priv *priv = rx->gve;
544         struct gve_rx_desc *first_desc;
545         struct sk_buff *skb = NULL;
546         struct gve_rx_desc *desc;
547         struct napi_struct *napi;
548         dma_addr_t page_bus;
549         u32 work_cnt = 0;
550         void *va;
551         u32 idx;
552         u16 len;
553
554         idx = rx->cnt & rx->mask;
555         first_desc = &rx->desc.desc_ring[idx];
556         desc = first_desc;
557         napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
558
559         if (unlikely(!gve_rx_ctx_init(ctx, rx)))
560                 goto skb_alloc_fail;
561
562         while (ctx->curr_frag_cnt < ctx->expected_frag_cnt) {
563                 /* Prefetch two packet buffers ahead, we will need it soon. */
564                 page_info = &rx->data.page_info[(idx + 2) & rx->mask];
565                 va = page_info->page_address + page_info->page_offset;
566
567                 prefetch(page_info->page); /* Kernel page struct. */
568                 prefetch(va);              /* Packet header. */
569                 prefetch(va + 64);         /* Next cacheline too. */
570
571                 len = gve_rx_get_fragment_size(ctx, desc);
572
573                 page_info = &rx->data.page_info[idx];
574                 data_slot = &rx->data.data_ring[idx];
575                 page_bus = rx->data.raw_addressing ?
576                            be64_to_cpu(data_slot->addr) - page_info->page_offset :
577                            rx->data.qpl->page_buses[idx];
578                 dma_sync_single_for_cpu(&priv->pdev->dev, page_bus, PAGE_SIZE, DMA_FROM_DEVICE);
579
580                 skb = gve_rx_skb(priv, rx, page_info, napi, len, data_slot);
581                 if (!skb) {
582                         u64_stats_update_begin(&rx->statss);
583                         rx->rx_skb_alloc_fail++;
584                         u64_stats_update_end(&rx->statss);
585                         goto skb_alloc_fail;
586                 }
587
588                 ctx->curr_frag_cnt++;
589                 rx->cnt++;
590                 idx = rx->cnt & rx->mask;
591                 work_cnt++;
592                 desc = &rx->desc.desc_ring[idx];
593         }
594
595         if (likely(feat & NETIF_F_RXCSUM)) {
596                 /* NIC passes up the partial sum */
597                 if (first_desc->csum)
598                         skb->ip_summed = CHECKSUM_COMPLETE;
599                 else
600                         skb->ip_summed = CHECKSUM_NONE;
601                 skb->csum = csum_unfold(first_desc->csum);
602         }
603
604         /* parse flags & pass relevant info up */
605         if (likely(feat & NETIF_F_RXHASH) &&
606             gve_needs_rss(first_desc->flags_seq))
607                 skb_set_hash(skb, be32_to_cpu(first_desc->rss_hash),
608                              gve_rss_type(first_desc->flags_seq));
609
610         *packet_size_bytes = skb->len + (skb->protocol ? ETH_HLEN : 0);
611         *work_done = work_cnt;
612         skb_record_rx_queue(skb, rx->q_num);
613         if (skb_is_nonlinear(skb))
614                 napi_gro_frags(napi);
615         else
616                 napi_gro_receive(napi, skb);
617
618         gve_rx_ctx_clear(ctx);
619         return true;
620
621 skb_alloc_fail:
622         if (napi->skb)
623                 napi_free_frags(napi);
624         *packet_size_bytes = 0;
625         *work_done = ctx->expected_frag_cnt;
626         while (ctx->curr_frag_cnt < ctx->expected_frag_cnt) {
627                 rx->cnt++;
628                 ctx->curr_frag_cnt++;
629         }
630         gve_rx_ctx_clear(ctx);
631         return false;
632 }
633
634 bool gve_rx_work_pending(struct gve_rx_ring *rx)
635 {
636         struct gve_rx_desc *desc;
637         __be16 flags_seq;
638         u32 next_idx;
639
640         next_idx = rx->cnt & rx->mask;
641         desc = rx->desc.desc_ring + next_idx;
642
643         flags_seq = desc->flags_seq;
644
645         return (GVE_SEQNO(flags_seq) == rx->desc.seqno);
646 }
647
648 static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx)
649 {
650         int refill_target = rx->mask + 1;
651         u32 fill_cnt = rx->fill_cnt;
652
653         while (fill_cnt - rx->cnt < refill_target) {
654                 struct gve_rx_slot_page_info *page_info;
655                 u32 idx = fill_cnt & rx->mask;
656
657                 page_info = &rx->data.page_info[idx];
658                 if (page_info->can_flip) {
659                         /* The other half of the page is free because it was
660                          * free when we processed the descriptor. Flip to it.
661                          */
662                         union gve_rx_data_slot *data_slot =
663                                                 &rx->data.data_ring[idx];
664
665                         gve_rx_flip_buff(page_info, &data_slot->addr);
666                         page_info->can_flip = 0;
667                 } else {
668                         /* It is possible that the networking stack has already
669                          * finished processing all outstanding packets in the buffer
670                          * and it can be reused.
671                          * Flipping is unnecessary here - if the networking stack still
672                          * owns half the page it is impossible to tell which half. Either
673                          * the whole page is free or it needs to be replaced.
674                          */
675                         int recycle = gve_rx_can_recycle_buffer(page_info);
676
677                         if (recycle < 0) {
678                                 if (!rx->data.raw_addressing)
679                                         gve_schedule_reset(priv);
680                                 return false;
681                         }
682                         if (!recycle) {
683                                 /* We can't reuse the buffer - alloc a new one*/
684                                 union gve_rx_data_slot *data_slot =
685                                                 &rx->data.data_ring[idx];
686                                 struct device *dev = &priv->pdev->dev;
687                                 gve_rx_free_buffer(dev, page_info, data_slot);
688                                 page_info->page = NULL;
689                                 if (gve_rx_alloc_buffer(priv, dev, page_info,
690                                                         data_slot)) {
691                                         u64_stats_update_begin(&rx->statss);
692                                         rx->rx_buf_alloc_fail++;
693                                         u64_stats_update_end(&rx->statss);
694                                         break;
695                                 }
696                         }
697                 }
698                 fill_cnt++;
699         }
700         rx->fill_cnt = fill_cnt;
701         return true;
702 }
703
704 static int gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
705                              netdev_features_t feat)
706 {
707         u32 work_done = 0, total_packet_cnt = 0, ok_packet_cnt = 0;
708         struct gve_priv *priv = rx->gve;
709         u32 idx = rx->cnt & rx->mask;
710         struct gve_rx_desc *desc;
711         u64 bytes = 0;
712
713         desc = &rx->desc.desc_ring[idx];
714         while ((GVE_SEQNO(desc->flags_seq) == rx->desc.seqno) &&
715                work_done < budget) {
716                 u64 packet_size_bytes = 0;
717                 u32 work_cnt = 0;
718                 bool dropped;
719
720                 netif_info(priv, rx_status, priv->dev,
721                            "[%d] idx=%d desc=%p desc->flags_seq=0x%x\n",
722                            rx->q_num, idx, desc, desc->flags_seq);
723                 netif_info(priv, rx_status, priv->dev,
724                            "[%d] seqno=%d rx->desc.seqno=%d\n",
725                            rx->q_num, GVE_SEQNO(desc->flags_seq),
726                            rx->desc.seqno);
727
728                 dropped = !gve_rx(rx, feat, &packet_size_bytes, &work_cnt);
729                 if (!dropped) {
730                         bytes += packet_size_bytes;
731                         ok_packet_cnt++;
732                 }
733                 total_packet_cnt++;
734                 idx = rx->cnt & rx->mask;
735                 desc = &rx->desc.desc_ring[idx];
736                 work_done += work_cnt;
737         }
738
739         if (!work_done && rx->fill_cnt - rx->cnt > rx->db_threshold)
740                 return 0;
741
742         if (work_done) {
743                 u64_stats_update_begin(&rx->statss);
744                 rx->rpackets += ok_packet_cnt;
745                 rx->rbytes += bytes;
746                 u64_stats_update_end(&rx->statss);
747         }
748
749         /* restock ring slots */
750         if (!rx->data.raw_addressing) {
751                 /* In QPL mode buffs are refilled as the desc are processed */
752                 rx->fill_cnt += work_done;
753         } else if (rx->fill_cnt - rx->cnt <= rx->db_threshold) {
754                 /* In raw addressing mode buffs are only refilled if the avail
755                  * falls below a threshold.
756                  */
757                 if (!gve_rx_refill_buffers(priv, rx))
758                         return 0;
759
760                 /* If we were not able to completely refill buffers, we'll want
761                  * to schedule this queue for work again to refill buffers.
762                  */
763                 if (rx->fill_cnt - rx->cnt <= rx->db_threshold) {
764                         gve_rx_write_doorbell(priv, rx);
765                         return budget;
766                 }
767         }
768
769         gve_rx_write_doorbell(priv, rx);
770         return total_packet_cnt;
771 }
772
773 int gve_rx_poll(struct gve_notify_block *block, int budget)
774 {
775         struct gve_rx_ring *rx = block->rx;
776         netdev_features_t feat;
777         int work_done = 0;
778
779         feat = block->napi.dev->features;
780
781         /* If budget is 0, do all the work */
782         if (budget == 0)
783                 budget = INT_MAX;
784
785         if (budget > 0)
786                 work_done = gve_clean_rx_done(rx, budget, feat);
787
788         return work_done;
789 }