gve: Add support for raw addressing to the rx path
[linux-2.6-microblaze.git] / drivers / net / ethernet / google / gve / gve_rx.c
1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3  *
4  * Copyright (C) 2015-2019 Google, Inc.
5  */
6
7 #include "gve.h"
8 #include "gve_adminq.h"
9 #include <linux/etherdevice.h>
10
11 static void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx)
12 {
13         struct gve_notify_block *block =
14                         &priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_idx)];
15
16         block->rx = NULL;
17 }
18
19 static void gve_rx_free_buffer(struct device *dev,
20                                struct gve_rx_slot_page_info *page_info,
21                                union gve_rx_data_slot *data_slot)
22 {
23         dma_addr_t dma = (dma_addr_t)(be64_to_cpu(data_slot->addr) &
24                                       GVE_DATA_SLOT_ADDR_PAGE_MASK);
25
26         gve_free_page(dev, page_info->page, dma, DMA_FROM_DEVICE);
27 }
28
29 static void gve_rx_unfill_pages(struct gve_priv *priv, struct gve_rx_ring *rx)
30 {
31         if (rx->data.raw_addressing) {
32                 u32 slots = rx->mask + 1;
33                 int i;
34
35                 for (i = 0; i < slots; i++)
36                         gve_rx_free_buffer(&priv->pdev->dev, &rx->data.page_info[i],
37                                            &rx->data.data_ring[i]);
38         } else {
39                 gve_unassign_qpl(priv, rx->data.qpl->id);
40                 rx->data.qpl = NULL;
41         }
42         kvfree(rx->data.page_info);
43         rx->data.page_info = NULL;
44 }
45
46 static void gve_rx_free_ring(struct gve_priv *priv, int idx)
47 {
48         struct gve_rx_ring *rx = &priv->rx[idx];
49         struct device *dev = &priv->pdev->dev;
50         u32 slots = rx->mask + 1;
51         size_t bytes;
52
53         gve_rx_remove_from_block(priv, idx);
54
55         bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
56         dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus);
57         rx->desc.desc_ring = NULL;
58
59         dma_free_coherent(dev, sizeof(*rx->q_resources),
60                           rx->q_resources, rx->q_resources_bus);
61         rx->q_resources = NULL;
62
63         gve_rx_unfill_pages(priv, rx);
64
65         bytes = sizeof(*rx->data.data_ring) * slots;
66         dma_free_coherent(dev, bytes, rx->data.data_ring,
67                           rx->data.data_bus);
68         rx->data.data_ring = NULL;
69         netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
70 }
71
72 static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info,
73                              dma_addr_t addr, struct page *page, __be64 *slot_addr)
74 {
75         page_info->page = page;
76         page_info->page_offset = 0;
77         page_info->page_address = page_address(page);
78         *slot_addr = cpu_to_be64(addr);
79 }
80
81 static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev,
82                                struct gve_rx_slot_page_info *page_info,
83                                union gve_rx_data_slot *data_slot)
84 {
85         struct page *page;
86         dma_addr_t dma;
87         int err;
88
89         err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE);
90         if (err)
91                 return err;
92
93         gve_setup_rx_buffer(page_info, dma, page, &data_slot->addr);
94         return 0;
95 }
96
97 static int gve_prefill_rx_pages(struct gve_rx_ring *rx)
98 {
99         struct gve_priv *priv = rx->gve;
100         u32 slots;
101         int err;
102         int i;
103
104         /* Allocate one page per Rx queue slot. Each page is split into two
105          * packet buffers, when possible we "page flip" between the two.
106          */
107         slots = rx->mask + 1;
108
109         rx->data.page_info = kvzalloc(slots *
110                                       sizeof(*rx->data.page_info), GFP_KERNEL);
111         if (!rx->data.page_info)
112                 return -ENOMEM;
113
114         if (!rx->data.raw_addressing)
115                 rx->data.qpl = gve_assign_rx_qpl(priv);
116         for (i = 0; i < slots; i++) {
117                 if (!rx->data.raw_addressing) {
118                         struct page *page = rx->data.qpl->pages[i];
119                         dma_addr_t addr = i * PAGE_SIZE;
120
121                         gve_setup_rx_buffer(&rx->data.page_info[i], addr, page,
122                                             &rx->data.data_ring[i].qpl_offset);
123                         continue;
124                 }
125                 err = gve_rx_alloc_buffer(priv, &priv->pdev->dev, &rx->data.page_info[i],
126                                           &rx->data.data_ring[i]);
127                 if (err)
128                         goto alloc_err;
129         }
130
131         return slots;
132 alloc_err:
133         while (i--)
134                 gve_rx_free_buffer(&priv->pdev->dev,
135                                    &rx->data.page_info[i],
136                                    &rx->data.data_ring[i]);
137         return err;
138 }
139
140 static void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx)
141 {
142         u32 ntfy_idx = gve_rx_idx_to_ntfy(priv, queue_idx);
143         struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
144         struct gve_rx_ring *rx = &priv->rx[queue_idx];
145
146         block->rx = rx;
147         rx->ntfy_id = ntfy_idx;
148 }
149
150 static int gve_rx_alloc_ring(struct gve_priv *priv, int idx)
151 {
152         struct gve_rx_ring *rx = &priv->rx[idx];
153         struct device *hdev = &priv->pdev->dev;
154         u32 slots, npages;
155         int filled_pages;
156         size_t bytes;
157         int err;
158
159         netif_dbg(priv, drv, priv->dev, "allocating rx ring\n");
160         /* Make sure everything is zeroed to start with */
161         memset(rx, 0, sizeof(*rx));
162
163         rx->gve = priv;
164         rx->q_num = idx;
165
166         slots = priv->rx_data_slot_cnt;
167         rx->mask = slots - 1;
168         rx->data.raw_addressing = priv->raw_addressing;
169
170         /* alloc rx data ring */
171         bytes = sizeof(*rx->data.data_ring) * slots;
172         rx->data.data_ring = dma_alloc_coherent(hdev, bytes,
173                                                 &rx->data.data_bus,
174                                                 GFP_KERNEL);
175         if (!rx->data.data_ring)
176                 return -ENOMEM;
177         filled_pages = gve_prefill_rx_pages(rx);
178         if (filled_pages < 0) {
179                 err = -ENOMEM;
180                 goto abort_with_slots;
181         }
182         rx->fill_cnt = filled_pages;
183         /* Ensure data ring slots (packet buffers) are visible. */
184         dma_wmb();
185
186         /* Alloc gve_queue_resources */
187         rx->q_resources =
188                 dma_alloc_coherent(hdev,
189                                    sizeof(*rx->q_resources),
190                                    &rx->q_resources_bus,
191                                    GFP_KERNEL);
192         if (!rx->q_resources) {
193                 err = -ENOMEM;
194                 goto abort_filled;
195         }
196         netif_dbg(priv, drv, priv->dev, "rx[%d]->data.data_bus=%lx\n", idx,
197                   (unsigned long)rx->data.data_bus);
198
199         /* alloc rx desc ring */
200         bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
201         npages = bytes / PAGE_SIZE;
202         if (npages * PAGE_SIZE != bytes) {
203                 err = -EIO;
204                 goto abort_with_q_resources;
205         }
206
207         rx->desc.desc_ring = dma_alloc_coherent(hdev, bytes, &rx->desc.bus,
208                                                 GFP_KERNEL);
209         if (!rx->desc.desc_ring) {
210                 err = -ENOMEM;
211                 goto abort_with_q_resources;
212         }
213         rx->cnt = 0;
214         rx->db_threshold = priv->rx_desc_cnt / 2;
215         rx->desc.seqno = 1;
216         gve_rx_add_to_block(priv, idx);
217
218         return 0;
219
220 abort_with_q_resources:
221         dma_free_coherent(hdev, sizeof(*rx->q_resources),
222                           rx->q_resources, rx->q_resources_bus);
223         rx->q_resources = NULL;
224 abort_filled:
225         gve_rx_unfill_pages(priv, rx);
226 abort_with_slots:
227         bytes = sizeof(*rx->data.data_ring) * slots;
228         dma_free_coherent(hdev, bytes, rx->data.data_ring, rx->data.data_bus);
229         rx->data.data_ring = NULL;
230
231         return err;
232 }
233
234 int gve_rx_alloc_rings(struct gve_priv *priv)
235 {
236         int err = 0;
237         int i;
238
239         for (i = 0; i < priv->rx_cfg.num_queues; i++) {
240                 err = gve_rx_alloc_ring(priv, i);
241                 if (err) {
242                         netif_err(priv, drv, priv->dev,
243                                   "Failed to alloc rx ring=%d: err=%d\n",
244                                   i, err);
245                         break;
246                 }
247         }
248         /* Unallocate if there was an error */
249         if (err) {
250                 int j;
251
252                 for (j = 0; j < i; j++)
253                         gve_rx_free_ring(priv, j);
254         }
255         return err;
256 }
257
258 void gve_rx_free_rings(struct gve_priv *priv)
259 {
260         int i;
261
262         for (i = 0; i < priv->rx_cfg.num_queues; i++)
263                 gve_rx_free_ring(priv, i);
264 }
265
266 void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx)
267 {
268         u32 db_idx = be32_to_cpu(rx->q_resources->db_index);
269
270         iowrite32be(rx->fill_cnt, &priv->db_bar2[db_idx]);
271 }
272
273 static enum pkt_hash_types gve_rss_type(__be16 pkt_flags)
274 {
275         if (likely(pkt_flags & (GVE_RXF_TCP | GVE_RXF_UDP)))
276                 return PKT_HASH_TYPE_L4;
277         if (pkt_flags & (GVE_RXF_IPV4 | GVE_RXF_IPV6))
278                 return PKT_HASH_TYPE_L3;
279         return PKT_HASH_TYPE_L2;
280 }
281
282 static struct sk_buff *gve_rx_copy(struct gve_rx_ring *rx,
283                                    struct net_device *dev,
284                                    struct napi_struct *napi,
285                                    struct gve_rx_slot_page_info *page_info,
286                                    u16 len)
287 {
288         struct sk_buff *skb = napi_alloc_skb(napi, len);
289         void *va = page_info->page_address + GVE_RX_PAD +
290                    (page_info->page_offset ? PAGE_SIZE / 2 : 0);
291
292         if (unlikely(!skb))
293                 return NULL;
294
295         __skb_put(skb, len);
296
297         skb_copy_to_linear_data(skb, va, len);
298
299         skb->protocol = eth_type_trans(skb, dev);
300
301         u64_stats_update_begin(&rx->statss);
302         rx->rx_copied_pkt++;
303         u64_stats_update_end(&rx->statss);
304
305         return skb;
306 }
307
308 static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi,
309                                         struct gve_rx_slot_page_info *page_info,
310                                         u16 len)
311 {
312         struct sk_buff *skb = napi_get_frags(napi);
313
314         if (unlikely(!skb))
315                 return NULL;
316
317         skb_add_rx_frag(skb, 0, page_info->page,
318                         (page_info->page_offset ? PAGE_SIZE / 2 : 0) +
319                         GVE_RX_PAD, len, PAGE_SIZE / 2);
320
321         return skb;
322 }
323
324 static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, __be64 *slot_addr)
325 {
326         const __be64 offset = cpu_to_be64(PAGE_SIZE / 2);
327
328         /* "flip" to other packet buffer on this page */
329         page_info->page_offset ^= 0x1;
330         *(slot_addr) ^= offset;
331 }
332
333 static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
334                    netdev_features_t feat, u32 idx)
335 {
336         struct gve_rx_slot_page_info *page_info;
337         struct gve_priv *priv = rx->gve;
338         struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
339         struct net_device *dev = priv->dev;
340         union gve_rx_data_slot *data_slot;
341         struct sk_buff *skb = NULL;
342         dma_addr_t page_bus;
343         int pagecount;
344         u16 len;
345
346         /* drop this packet */
347         if (unlikely(rx_desc->flags_seq & GVE_RXF_ERR)) {
348                 u64_stats_update_begin(&rx->statss);
349                 rx->rx_desc_err_dropped_pkt++;
350                 u64_stats_update_end(&rx->statss);
351                 return false;
352         }
353
354         len = be16_to_cpu(rx_desc->len) - GVE_RX_PAD;
355         page_info = &rx->data.page_info[idx];
356
357         data_slot = &rx->data.data_ring[idx];
358         page_bus = (rx->data.raw_addressing) ?
359                         be64_to_cpu(data_slot->addr) & GVE_DATA_SLOT_ADDR_PAGE_MASK :
360                         rx->data.qpl->page_buses[idx];
361         dma_sync_single_for_cpu(&priv->pdev->dev, page_bus,
362                                 PAGE_SIZE, DMA_FROM_DEVICE);
363
364         if (PAGE_SIZE == 4096) {
365                 if (len <= priv->rx_copybreak) {
366                         /* Just copy small packets */
367                         skb = gve_rx_copy(rx, dev, napi, page_info, len);
368                         u64_stats_update_begin(&rx->statss);
369                         rx->rx_copybreak_pkt++;
370                         u64_stats_update_end(&rx->statss);
371                         goto have_skb;
372                 }
373                 if (rx->data.raw_addressing) {
374                         skb = gve_rx_add_frags(napi, page_info, len);
375                         goto have_skb;
376                 }
377                 if (unlikely(!gve_can_recycle_pages(dev))) {
378                         skb = gve_rx_copy(rx, dev, napi, page_info, len);
379                         goto have_skb;
380                 }
381                 pagecount = page_count(page_info->page);
382                 if (pagecount == 1) {
383                         /* No part of this page is used by any SKBs; we attach
384                          * the page fragment to a new SKB and pass it up the
385                          * stack.
386                          */
387                         skb = gve_rx_add_frags(napi, page_info, len);
388                         if (!skb) {
389                                 u64_stats_update_begin(&rx->statss);
390                                 rx->rx_skb_alloc_fail++;
391                                 u64_stats_update_end(&rx->statss);
392                                 return false;
393                         }
394                         /* Make sure the kernel stack can't release the page */
395                         get_page(page_info->page);
396                         /* "flip" to other packet buffer on this page */
397                         gve_rx_flip_buff(page_info, &rx->data.data_ring[idx].qpl_offset);
398                 } else if (pagecount >= 2) {
399                         /* We have previously passed the other half of this
400                          * page up the stack, but it has not yet been freed.
401                          */
402                         skb = gve_rx_copy(rx, dev, napi, page_info, len);
403                 } else {
404                         WARN(pagecount < 1, "Pagecount should never be < 1");
405                         return false;
406                 }
407         } else {
408                 if (rx->data.raw_addressing)
409                         skb = gve_rx_add_frags(napi, page_info, len);
410                 else
411                         skb = gve_rx_copy(rx, dev, napi, page_info, len);
412         }
413
414 have_skb:
415         /* We didn't manage to allocate an skb but we haven't had any
416          * reset worthy failures.
417          */
418         if (!skb) {
419                 u64_stats_update_begin(&rx->statss);
420                 rx->rx_skb_alloc_fail++;
421                 u64_stats_update_end(&rx->statss);
422                 return false;
423         }
424
425         if (likely(feat & NETIF_F_RXCSUM)) {
426                 /* NIC passes up the partial sum */
427                 if (rx_desc->csum)
428                         skb->ip_summed = CHECKSUM_COMPLETE;
429                 else
430                         skb->ip_summed = CHECKSUM_NONE;
431                 skb->csum = csum_unfold(rx_desc->csum);
432         }
433
434         /* parse flags & pass relevant info up */
435         if (likely(feat & NETIF_F_RXHASH) &&
436             gve_needs_rss(rx_desc->flags_seq))
437                 skb_set_hash(skb, be32_to_cpu(rx_desc->rss_hash),
438                              gve_rss_type(rx_desc->flags_seq));
439
440         if (skb_is_nonlinear(skb))
441                 napi_gro_frags(napi);
442         else
443                 napi_gro_receive(napi, skb);
444         return true;
445 }
446
447 static bool gve_rx_work_pending(struct gve_rx_ring *rx)
448 {
449         struct gve_rx_desc *desc;
450         __be16 flags_seq;
451         u32 next_idx;
452
453         next_idx = rx->cnt & rx->mask;
454         desc = rx->desc.desc_ring + next_idx;
455
456         flags_seq = desc->flags_seq;
457         /* Make sure we have synchronized the seq no with the device */
458         smp_rmb();
459
460         return (GVE_SEQNO(flags_seq) == rx->desc.seqno);
461 }
462
463 static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx)
464 {
465         int refill_target = rx->mask + 1;
466         u32 fill_cnt = rx->fill_cnt;
467
468         while (fill_cnt - rx->cnt < refill_target) {
469                 struct gve_rx_slot_page_info *page_info;
470                 struct device *dev = &priv->pdev->dev;
471                 union gve_rx_data_slot *data_slot;
472                 u32 idx = fill_cnt & rx->mask;
473
474                 page_info = &rx->data.page_info[idx];
475                 data_slot = &rx->data.data_ring[idx];
476                 gve_rx_free_buffer(dev, page_info, data_slot);
477                 page_info->page = NULL;
478                 if (gve_rx_alloc_buffer(priv, dev, page_info, data_slot)) {
479                         u64_stats_update_begin(&rx->statss);
480                         rx->rx_buf_alloc_fail++;
481                         u64_stats_update_end(&rx->statss);
482                         break;
483                 }
484                 fill_cnt++;
485         }
486         rx->fill_cnt = fill_cnt;
487         return true;
488 }
489
490 bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
491                        netdev_features_t feat)
492 {
493         struct gve_priv *priv = rx->gve;
494         u32 work_done = 0, packets = 0;
495         struct gve_rx_desc *desc;
496         u32 cnt = rx->cnt;
497         u32 idx = cnt & rx->mask;
498         u64 bytes = 0;
499
500         desc = rx->desc.desc_ring + idx;
501         while ((GVE_SEQNO(desc->flags_seq) == rx->desc.seqno) &&
502                work_done < budget) {
503                 bool dropped;
504
505                 netif_info(priv, rx_status, priv->dev,
506                            "[%d] idx=%d desc=%p desc->flags_seq=0x%x\n",
507                            rx->q_num, idx, desc, desc->flags_seq);
508                 netif_info(priv, rx_status, priv->dev,
509                            "[%d] seqno=%d rx->desc.seqno=%d\n",
510                            rx->q_num, GVE_SEQNO(desc->flags_seq),
511                            rx->desc.seqno);
512                 dropped = !gve_rx(rx, desc, feat, idx);
513                 if (!dropped) {
514                         bytes += be16_to_cpu(desc->len) - GVE_RX_PAD;
515                         packets++;
516                 }
517                 cnt++;
518                 idx = cnt & rx->mask;
519                 desc = rx->desc.desc_ring + idx;
520                 rx->desc.seqno = gve_next_seqno(rx->desc.seqno);
521                 work_done++;
522         }
523
524         if (!work_done && rx->fill_cnt - cnt > rx->db_threshold)
525                 return false;
526
527         u64_stats_update_begin(&rx->statss);
528         rx->rpackets += packets;
529         rx->rbytes += bytes;
530         u64_stats_update_end(&rx->statss);
531         rx->cnt = cnt;
532
533         /* restock ring slots */
534         if (!rx->data.raw_addressing) {
535                 /* In QPL mode buffs are refilled as the desc are processed */
536                 rx->fill_cnt += work_done;
537         } else if (rx->fill_cnt - cnt <= rx->db_threshold) {
538                 /* In raw addressing mode buffs are only refilled if the avail
539                  * falls below a threshold.
540                  */
541                 if (!gve_rx_refill_buffers(priv, rx))
542                         return false;
543
544                 /* If we were not able to completely refill buffers, we'll want
545                  * to schedule this queue for work again to refill buffers.
546                  */
547                 if (rx->fill_cnt - cnt <= rx->db_threshold) {
548                         gve_rx_write_doorbell(priv, rx);
549                         return true;
550                 }
551         }
552
553         gve_rx_write_doorbell(priv, rx);
554         return gve_rx_work_pending(rx);
555 }
556
557 bool gve_rx_poll(struct gve_notify_block *block, int budget)
558 {
559         struct gve_rx_ring *rx = block->rx;
560         netdev_features_t feat;
561         bool repoll = false;
562
563         feat = block->napi.dev->features;
564
565         /* If budget is 0, do all the work */
566         if (budget == 0)
567                 budget = INT_MAX;
568
569         if (budget > 0)
570                 repoll |= gve_clean_rx_done(rx, budget, feat);
571         else
572                 repoll |= gve_rx_work_pending(rx);
573         return repoll;
574 }