1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2018, Intel Corporation. */
4 /* The driver transmit and receive code */
6 #include <linux/prefetch.h>
8 #include <linux/bpf_trace.h>
10 #include "ice_txrx_lib.h"
13 #include "ice_dcb_lib.h"
16 #define ICE_RX_HDR_SIZE 256
18 #define FDIR_DESC_RXDID 0x40
19 #define ICE_FDIR_CLEAN_DELAY 10
22 * ice_prgm_fdir_fltr - Program a Flow Director filter
23 * @vsi: VSI to send dummy packet
24 * @fdir_desc: flow director descriptor
25 * @raw_packet: allocated buffer for flow director
28 ice_prgm_fdir_fltr(struct ice_vsi *vsi, struct ice_fltr_desc *fdir_desc,
31 struct ice_tx_buf *tx_buf, *first;
32 struct ice_fltr_desc *f_desc;
33 struct ice_tx_desc *tx_desc;
34 struct ice_ring *tx_ring;
43 tx_ring = vsi->tx_rings[0];
44 if (!tx_ring || !tx_ring->desc)
48 /* we are using two descriptors to add/del a filter and we can wait */
49 for (i = ICE_FDIR_CLEAN_DELAY; ICE_DESC_UNUSED(tx_ring) < 2; i--) {
52 msleep_interruptible(1);
55 dma = dma_map_single(dev, raw_packet, ICE_FDIR_MAX_RAW_PKT_SIZE,
58 if (dma_mapping_error(dev, dma))
61 /* grab the next descriptor */
62 i = tx_ring->next_to_use;
63 first = &tx_ring->tx_buf[i];
64 f_desc = ICE_TX_FDIRDESC(tx_ring, i);
65 memcpy(f_desc, fdir_desc, sizeof(*f_desc));
68 i = (i < tx_ring->count) ? i : 0;
69 tx_desc = ICE_TX_DESC(tx_ring, i);
70 tx_buf = &tx_ring->tx_buf[i];
73 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
75 memset(tx_buf, 0, sizeof(*tx_buf));
76 dma_unmap_len_set(tx_buf, len, ICE_FDIR_MAX_RAW_PKT_SIZE);
77 dma_unmap_addr_set(tx_buf, dma, dma);
79 tx_desc->buf_addr = cpu_to_le64(dma);
80 td_cmd = ICE_TXD_LAST_DESC_CMD | ICE_TX_DESC_CMD_DUMMY |
83 tx_buf->tx_flags = ICE_TX_FLAGS_DUMMY_PKT;
84 tx_buf->raw_buf = raw_packet;
86 tx_desc->cmd_type_offset_bsz =
87 ice_build_ctob(td_cmd, 0, ICE_FDIR_MAX_RAW_PKT_SIZE, 0);
89 /* Force memory write to complete before letting h/w know
90 * there are new descriptors to fetch.
94 /* mark the data descriptor to be watched */
95 first->next_to_watch = tx_desc;
97 writel(tx_ring->next_to_use, tx_ring->tail);
103 * ice_unmap_and_free_tx_buf - Release a Tx buffer
104 * @ring: the ring that owns the buffer
105 * @tx_buf: the buffer to free
108 ice_unmap_and_free_tx_buf(struct ice_ring *ring, struct ice_tx_buf *tx_buf)
111 if (tx_buf->tx_flags & ICE_TX_FLAGS_DUMMY_PKT)
112 devm_kfree(ring->dev, tx_buf->raw_buf);
113 else if (ice_ring_is_xdp(ring))
114 page_frag_free(tx_buf->raw_buf);
116 dev_kfree_skb_any(tx_buf->skb);
117 if (dma_unmap_len(tx_buf, len))
118 dma_unmap_single(ring->dev,
119 dma_unmap_addr(tx_buf, dma),
120 dma_unmap_len(tx_buf, len),
122 } else if (dma_unmap_len(tx_buf, len)) {
123 dma_unmap_page(ring->dev,
124 dma_unmap_addr(tx_buf, dma),
125 dma_unmap_len(tx_buf, len),
129 tx_buf->next_to_watch = NULL;
131 dma_unmap_len_set(tx_buf, len, 0);
132 /* tx_buf must be completely set up in the transmit path */
135 static struct netdev_queue *txring_txq(const struct ice_ring *ring)
137 return netdev_get_tx_queue(ring->netdev, ring->q_index);
141 * ice_clean_tx_ring - Free any empty Tx buffers
142 * @tx_ring: ring to be cleaned
144 void ice_clean_tx_ring(struct ice_ring *tx_ring)
148 if (ice_ring_is_xdp(tx_ring) && tx_ring->xsk_pool) {
149 ice_xsk_clean_xdp_ring(tx_ring);
153 /* ring already cleared, nothing to do */
154 if (!tx_ring->tx_buf)
157 /* Free all the Tx ring sk_buffs */
158 for (i = 0; i < tx_ring->count; i++)
159 ice_unmap_and_free_tx_buf(tx_ring, &tx_ring->tx_buf[i]);
162 memset(tx_ring->tx_buf, 0, sizeof(*tx_ring->tx_buf) * tx_ring->count);
164 /* Zero out the descriptor ring */
165 memset(tx_ring->desc, 0, tx_ring->size);
167 tx_ring->next_to_use = 0;
168 tx_ring->next_to_clean = 0;
170 if (!tx_ring->netdev)
173 /* cleanup Tx queue statistics */
174 netdev_tx_reset_queue(txring_txq(tx_ring));
178 * ice_free_tx_ring - Free Tx resources per queue
179 * @tx_ring: Tx descriptor ring for a specific queue
181 * Free all transmit software resources
183 void ice_free_tx_ring(struct ice_ring *tx_ring)
185 ice_clean_tx_ring(tx_ring);
186 devm_kfree(tx_ring->dev, tx_ring->tx_buf);
187 tx_ring->tx_buf = NULL;
190 dmam_free_coherent(tx_ring->dev, tx_ring->size,
191 tx_ring->desc, tx_ring->dma);
192 tx_ring->desc = NULL;
197 * ice_clean_tx_irq - Reclaim resources after transmit completes
198 * @tx_ring: Tx ring to clean
199 * @napi_budget: Used to determine if we are in netpoll
201 * Returns true if there's any budget left (e.g. the clean is finished)
203 static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget)
205 unsigned int total_bytes = 0, total_pkts = 0;
206 unsigned int budget = ICE_DFLT_IRQ_WORK;
207 struct ice_vsi *vsi = tx_ring->vsi;
208 s16 i = tx_ring->next_to_clean;
209 struct ice_tx_desc *tx_desc;
210 struct ice_tx_buf *tx_buf;
212 tx_buf = &tx_ring->tx_buf[i];
213 tx_desc = ICE_TX_DESC(tx_ring, i);
216 prefetch(&vsi->state);
219 struct ice_tx_desc *eop_desc = tx_buf->next_to_watch;
221 /* if next_to_watch is not set then there is no work pending */
225 smp_rmb(); /* prevent any other reads prior to eop_desc */
227 /* if the descriptor isn't done, no work yet to do */
228 if (!(eop_desc->cmd_type_offset_bsz &
229 cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
232 /* clear next_to_watch to prevent false hangs */
233 tx_buf->next_to_watch = NULL;
235 /* update the statistics for this packet */
236 total_bytes += tx_buf->bytecount;
237 total_pkts += tx_buf->gso_segs;
239 if (ice_ring_is_xdp(tx_ring))
240 page_frag_free(tx_buf->raw_buf);
243 napi_consume_skb(tx_buf->skb, napi_budget);
245 /* unmap skb header data */
246 dma_unmap_single(tx_ring->dev,
247 dma_unmap_addr(tx_buf, dma),
248 dma_unmap_len(tx_buf, len),
251 /* clear tx_buf data */
253 dma_unmap_len_set(tx_buf, len, 0);
255 /* unmap remaining buffers */
256 while (tx_desc != eop_desc) {
262 tx_buf = tx_ring->tx_buf;
263 tx_desc = ICE_TX_DESC(tx_ring, 0);
266 /* unmap any remaining paged data */
267 if (dma_unmap_len(tx_buf, len)) {
268 dma_unmap_page(tx_ring->dev,
269 dma_unmap_addr(tx_buf, dma),
270 dma_unmap_len(tx_buf, len),
272 dma_unmap_len_set(tx_buf, len, 0);
276 /* move us one more past the eop_desc for start of next pkt */
282 tx_buf = tx_ring->tx_buf;
283 tx_desc = ICE_TX_DESC(tx_ring, 0);
288 /* update budget accounting */
290 } while (likely(budget));
293 tx_ring->next_to_clean = i;
295 ice_update_tx_ring_stats(tx_ring, total_pkts, total_bytes);
297 if (ice_ring_is_xdp(tx_ring))
300 netdev_tx_completed_queue(txring_txq(tx_ring), total_pkts,
303 #define TX_WAKE_THRESHOLD ((s16)(DESC_NEEDED * 2))
304 if (unlikely(total_pkts && netif_carrier_ok(tx_ring->netdev) &&
305 (ICE_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
306 /* Make sure that anybody stopping the queue after this
307 * sees the new next_to_clean.
310 if (__netif_subqueue_stopped(tx_ring->netdev,
312 !test_bit(ICE_VSI_DOWN, vsi->state)) {
313 netif_wake_subqueue(tx_ring->netdev,
315 ++tx_ring->tx_stats.restart_q;
323 * ice_setup_tx_ring - Allocate the Tx descriptors
324 * @tx_ring: the Tx ring to set up
326 * Return 0 on success, negative on error
328 int ice_setup_tx_ring(struct ice_ring *tx_ring)
330 struct device *dev = tx_ring->dev;
335 /* warn if we are about to overwrite the pointer */
336 WARN_ON(tx_ring->tx_buf);
338 devm_kzalloc(dev, sizeof(*tx_ring->tx_buf) * tx_ring->count,
340 if (!tx_ring->tx_buf)
343 /* round up to nearest page */
344 tx_ring->size = ALIGN(tx_ring->count * sizeof(struct ice_tx_desc),
346 tx_ring->desc = dmam_alloc_coherent(dev, tx_ring->size, &tx_ring->dma,
348 if (!tx_ring->desc) {
349 dev_err(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
354 tx_ring->next_to_use = 0;
355 tx_ring->next_to_clean = 0;
356 tx_ring->tx_stats.prev_pkt = -1;
360 devm_kfree(dev, tx_ring->tx_buf);
361 tx_ring->tx_buf = NULL;
366 * ice_clean_rx_ring - Free Rx buffers
367 * @rx_ring: ring to be cleaned
369 void ice_clean_rx_ring(struct ice_ring *rx_ring)
371 struct device *dev = rx_ring->dev;
374 /* ring already cleared, nothing to do */
375 if (!rx_ring->rx_buf)
379 dev_kfree_skb(rx_ring->skb);
383 if (rx_ring->xsk_pool) {
384 ice_xsk_clean_rx_ring(rx_ring);
388 /* Free all the Rx ring sk_buffs */
389 for (i = 0; i < rx_ring->count; i++) {
390 struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i];
395 /* Invalidate cache lines that may have been written to by
396 * device so that we avoid corrupting memory.
398 dma_sync_single_range_for_cpu(dev, rx_buf->dma,
403 /* free resources associated with mapping */
404 dma_unmap_page_attrs(dev, rx_buf->dma, ice_rx_pg_size(rx_ring),
405 DMA_FROM_DEVICE, ICE_RX_DMA_ATTR);
406 __page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias);
409 rx_buf->page_offset = 0;
413 memset(rx_ring->rx_buf, 0, sizeof(*rx_ring->rx_buf) * rx_ring->count);
415 /* Zero out the descriptor ring */
416 memset(rx_ring->desc, 0, rx_ring->size);
418 rx_ring->next_to_alloc = 0;
419 rx_ring->next_to_clean = 0;
420 rx_ring->next_to_use = 0;
424 * ice_free_rx_ring - Free Rx resources
425 * @rx_ring: ring to clean the resources from
427 * Free all receive software resources
429 void ice_free_rx_ring(struct ice_ring *rx_ring)
431 ice_clean_rx_ring(rx_ring);
432 if (rx_ring->vsi->type == ICE_VSI_PF)
433 if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
434 xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
435 rx_ring->xdp_prog = NULL;
436 devm_kfree(rx_ring->dev, rx_ring->rx_buf);
437 rx_ring->rx_buf = NULL;
440 dmam_free_coherent(rx_ring->dev, rx_ring->size,
441 rx_ring->desc, rx_ring->dma);
442 rx_ring->desc = NULL;
447 * ice_setup_rx_ring - Allocate the Rx descriptors
448 * @rx_ring: the Rx ring to set up
450 * Return 0 on success, negative on error
452 int ice_setup_rx_ring(struct ice_ring *rx_ring)
454 struct device *dev = rx_ring->dev;
459 /* warn if we are about to overwrite the pointer */
460 WARN_ON(rx_ring->rx_buf);
462 devm_kzalloc(dev, sizeof(*rx_ring->rx_buf) * rx_ring->count,
464 if (!rx_ring->rx_buf)
467 /* round up to nearest page */
468 rx_ring->size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc),
470 rx_ring->desc = dmam_alloc_coherent(dev, rx_ring->size, &rx_ring->dma,
472 if (!rx_ring->desc) {
473 dev_err(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
478 rx_ring->next_to_use = 0;
479 rx_ring->next_to_clean = 0;
481 if (ice_is_xdp_ena_vsi(rx_ring->vsi))
482 WRITE_ONCE(rx_ring->xdp_prog, rx_ring->vsi->xdp_prog);
484 if (rx_ring->vsi->type == ICE_VSI_PF &&
485 !xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
486 if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
487 rx_ring->q_index, rx_ring->q_vector->napi.napi_id))
492 devm_kfree(dev, rx_ring->rx_buf);
493 rx_ring->rx_buf = NULL;
498 ice_rx_frame_truesize(struct ice_ring *rx_ring, unsigned int __maybe_unused size)
500 unsigned int truesize;
502 #if (PAGE_SIZE < 8192)
503 truesize = ice_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */
505 truesize = rx_ring->rx_offset ?
506 SKB_DATA_ALIGN(rx_ring->rx_offset + size) +
507 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
508 SKB_DATA_ALIGN(size);
514 * ice_run_xdp - Executes an XDP program on initialized xdp_buff
516 * @xdp: xdp_buff used as input to the XDP program
517 * @xdp_prog: XDP program to run
519 * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
522 ice_run_xdp(struct ice_ring *rx_ring, struct xdp_buff *xdp,
523 struct bpf_prog *xdp_prog)
525 struct ice_ring *xdp_ring;
529 act = bpf_prog_run_xdp(xdp_prog, xdp);
534 xdp_ring = rx_ring->vsi->xdp_rings[smp_processor_id()];
535 result = ice_xmit_xdp_buff(xdp, xdp_ring);
536 if (result == ICE_XDP_CONSUMED)
540 err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
543 return ICE_XDP_REDIR;
545 bpf_warn_invalid_xdp_action(act);
549 trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
552 return ICE_XDP_CONSUMED;
557 * ice_xdp_xmit - submit packets to XDP ring for transmission
559 * @n: number of XDP frames to be transmitted
560 * @frames: XDP frames to be transmitted
561 * @flags: transmit flags
563 * Returns number of frames successfully sent. Failed frames
564 * will be free'ed by XDP core.
565 * For error cases, a negative errno code is returned and no-frames
566 * are transmitted (caller must handle freeing frames).
569 ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
572 struct ice_netdev_priv *np = netdev_priv(dev);
573 unsigned int queue_index = smp_processor_id();
574 struct ice_vsi *vsi = np->vsi;
575 struct ice_ring *xdp_ring;
578 if (test_bit(ICE_VSI_DOWN, vsi->state))
581 if (!ice_is_xdp_ena_vsi(vsi) || queue_index >= vsi->num_xdp_txq)
584 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
587 xdp_ring = vsi->xdp_rings[queue_index];
588 for (i = 0; i < n; i++) {
589 struct xdp_frame *xdpf = frames[i];
592 err = ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring);
593 if (err != ICE_XDP_TX)
598 if (unlikely(flags & XDP_XMIT_FLUSH))
599 ice_xdp_ring_update_tail(xdp_ring);
605 * ice_alloc_mapped_page - recycle or make a new page
606 * @rx_ring: ring to use
607 * @bi: rx_buf struct to modify
609 * Returns true if the page was successfully allocated or
613 ice_alloc_mapped_page(struct ice_ring *rx_ring, struct ice_rx_buf *bi)
615 struct page *page = bi->page;
618 /* since we are recycling buffers we should seldom need to alloc */
622 /* alloc new page for storage */
623 page = dev_alloc_pages(ice_rx_pg_order(rx_ring));
624 if (unlikely(!page)) {
625 rx_ring->rx_stats.alloc_page_failed++;
629 /* map page for use */
630 dma = dma_map_page_attrs(rx_ring->dev, page, 0, ice_rx_pg_size(rx_ring),
631 DMA_FROM_DEVICE, ICE_RX_DMA_ATTR);
633 /* if mapping failed free memory back to system since
634 * there isn't much point in holding memory we can't use
636 if (dma_mapping_error(rx_ring->dev, dma)) {
637 __free_pages(page, ice_rx_pg_order(rx_ring));
638 rx_ring->rx_stats.alloc_page_failed++;
644 bi->page_offset = rx_ring->rx_offset;
645 page_ref_add(page, USHRT_MAX - 1);
646 bi->pagecnt_bias = USHRT_MAX;
652 * ice_alloc_rx_bufs - Replace used receive buffers
653 * @rx_ring: ring to place buffers on
654 * @cleaned_count: number of buffers to replace
656 * Returns false if all allocations were successful, true if any fail. Returning
657 * true signals to the caller that we didn't replace cleaned_count buffers and
658 * there is more work to do.
660 * First, try to clean "cleaned_count" Rx buffers. Then refill the cleaned Rx
661 * buffers. Then bump tail at most one time. Grouping like this lets us avoid
662 * multiple tail writes per call.
664 bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count)
666 union ice_32b_rx_flex_desc *rx_desc;
667 u16 ntu = rx_ring->next_to_use;
668 struct ice_rx_buf *bi;
670 /* do nothing if no valid netdev defined */
671 if ((!rx_ring->netdev && rx_ring->vsi->type != ICE_VSI_CTRL) ||
675 /* get the Rx descriptor and buffer based on next_to_use */
676 rx_desc = ICE_RX_DESC(rx_ring, ntu);
677 bi = &rx_ring->rx_buf[ntu];
680 /* if we fail here, we have work remaining */
681 if (!ice_alloc_mapped_page(rx_ring, bi))
684 /* sync the buffer for use by the device */
685 dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
690 /* Refresh the desc even if buffer_addrs didn't change
691 * because each write-back erases this info.
693 rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
698 if (unlikely(ntu == rx_ring->count)) {
699 rx_desc = ICE_RX_DESC(rx_ring, 0);
700 bi = rx_ring->rx_buf;
704 /* clear the status bits for the next_to_use descriptor */
705 rx_desc->wb.status_error0 = 0;
708 } while (cleaned_count);
710 if (rx_ring->next_to_use != ntu)
711 ice_release_rx_desc(rx_ring, ntu);
713 return !!cleaned_count;
717 * ice_rx_buf_adjust_pg_offset - Prepare Rx buffer for reuse
718 * @rx_buf: Rx buffer to adjust
719 * @size: Size of adjustment
721 * Update the offset within page so that Rx buf will be ready to be reused.
722 * For systems with PAGE_SIZE < 8192 this function will flip the page offset
723 * so the second half of page assigned to Rx buffer will be used, otherwise
724 * the offset is moved by "size" bytes
727 ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size)
729 #if (PAGE_SIZE < 8192)
730 /* flip page offset to other buffer */
731 rx_buf->page_offset ^= size;
733 /* move offset up to the next cache line */
734 rx_buf->page_offset += size;
739 * ice_can_reuse_rx_page - Determine if page can be reused for another Rx
740 * @rx_buf: buffer containing the page
741 * @rx_buf_pgcnt: rx_buf page refcount pre xdp_do_redirect() call
743 * If page is reusable, we have a green light for calling ice_reuse_rx_page,
744 * which will assign the current buffer to the buffer that next_to_alloc is
745 * pointing to; otherwise, the DMA mapping needs to be destroyed and
749 ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf, int rx_buf_pgcnt)
751 unsigned int pagecnt_bias = rx_buf->pagecnt_bias;
752 struct page *page = rx_buf->page;
754 /* avoid re-using remote and pfmemalloc pages */
755 if (!dev_page_is_reusable(page))
758 #if (PAGE_SIZE < 8192)
759 /* if we are only owner of page we can reuse it */
760 if (unlikely((rx_buf_pgcnt - pagecnt_bias) > 1))
763 #define ICE_LAST_OFFSET \
764 (SKB_WITH_OVERHEAD(PAGE_SIZE) - ICE_RXBUF_2048)
765 if (rx_buf->page_offset > ICE_LAST_OFFSET)
767 #endif /* PAGE_SIZE < 8192) */
769 /* If we have drained the page fragment pool we need to update
770 * the pagecnt_bias and page count so that we fully restock the
771 * number of references the driver holds.
773 if (unlikely(pagecnt_bias == 1)) {
774 page_ref_add(page, USHRT_MAX - 1);
775 rx_buf->pagecnt_bias = USHRT_MAX;
782 * ice_add_rx_frag - Add contents of Rx buffer to sk_buff as a frag
783 * @rx_ring: Rx descriptor ring to transact packets on
784 * @rx_buf: buffer containing page to add
785 * @skb: sk_buff to place the data into
786 * @size: packet length from rx_desc
788 * This function will add the data contained in rx_buf->page to the skb.
789 * It will just attach the page as a frag to the skb.
790 * The function will then update the page offset.
793 ice_add_rx_frag(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
794 struct sk_buff *skb, unsigned int size)
796 #if (PAGE_SIZE >= 8192)
797 unsigned int truesize = SKB_DATA_ALIGN(size + rx_ring->rx_offset);
799 unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
804 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buf->page,
805 rx_buf->page_offset, size, truesize);
807 /* page is being used so we must update the page offset */
808 ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
812 * ice_reuse_rx_page - page flip buffer and store it back on the ring
813 * @rx_ring: Rx descriptor ring to store buffers on
814 * @old_buf: donor buffer to have page reused
816 * Synchronizes page for reuse by the adapter
819 ice_reuse_rx_page(struct ice_ring *rx_ring, struct ice_rx_buf *old_buf)
821 u16 nta = rx_ring->next_to_alloc;
822 struct ice_rx_buf *new_buf;
824 new_buf = &rx_ring->rx_buf[nta];
826 /* update, and store next to alloc */
828 rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
830 /* Transfer page from old buffer to new buffer.
831 * Move each member individually to avoid possible store
832 * forwarding stalls and unnecessary copy of skb.
834 new_buf->dma = old_buf->dma;
835 new_buf->page = old_buf->page;
836 new_buf->page_offset = old_buf->page_offset;
837 new_buf->pagecnt_bias = old_buf->pagecnt_bias;
841 * ice_get_rx_buf - Fetch Rx buffer and synchronize data for use
842 * @rx_ring: Rx descriptor ring to transact packets on
843 * @size: size of buffer to add to skb
844 * @rx_buf_pgcnt: rx_buf page refcount
846 * This function will pull an Rx buffer from the ring and synchronize it
847 * for use by the CPU.
849 static struct ice_rx_buf *
850 ice_get_rx_buf(struct ice_ring *rx_ring, const unsigned int size,
853 struct ice_rx_buf *rx_buf;
855 rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean];
857 #if (PAGE_SIZE < 8192)
858 page_count(rx_buf->page);
862 prefetchw(rx_buf->page);
866 /* we are reusing so sync this buffer for CPU use */
867 dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma,
868 rx_buf->page_offset, size,
871 /* We have pulled a buffer for use, so decrement pagecnt_bias */
872 rx_buf->pagecnt_bias--;
878 * ice_build_skb - Build skb around an existing buffer
879 * @rx_ring: Rx descriptor ring to transact packets on
880 * @rx_buf: Rx buffer to pull data from
881 * @xdp: xdp_buff pointing to the data
883 * This function builds an skb around an existing Rx buffer, taking care
884 * to set up the skb correctly and avoid any memcpy overhead.
886 static struct sk_buff *
887 ice_build_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
888 struct xdp_buff *xdp)
890 u8 metasize = xdp->data - xdp->data_meta;
891 #if (PAGE_SIZE < 8192)
892 unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
894 unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
895 SKB_DATA_ALIGN(xdp->data_end -
896 xdp->data_hard_start);
900 /* Prefetch first cache line of first page. If xdp->data_meta
901 * is unused, this points exactly as xdp->data, otherwise we
902 * likely have a consumer accessing first few bytes of meta
903 * data, and then actual data.
905 net_prefetch(xdp->data_meta);
906 /* build an skb around the page buffer */
907 skb = build_skb(xdp->data_hard_start, truesize);
911 /* must to record Rx queue, otherwise OS features such as
912 * symmetric queue won't work
914 skb_record_rx_queue(skb, rx_ring->q_index);
916 /* update pointers within the skb to store the data */
917 skb_reserve(skb, xdp->data - xdp->data_hard_start);
918 __skb_put(skb, xdp->data_end - xdp->data);
920 skb_metadata_set(skb, metasize);
922 /* buffer is used by skb, update page_offset */
923 ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
929 * ice_construct_skb - Allocate skb and populate it
930 * @rx_ring: Rx descriptor ring to transact packets on
931 * @rx_buf: Rx buffer to pull data from
932 * @xdp: xdp_buff pointing to the data
934 * This function allocates an skb. It then populates it with the page
935 * data from the current receive descriptor, taking care to set up the
938 static struct sk_buff *
939 ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
940 struct xdp_buff *xdp)
942 unsigned int size = xdp->data_end - xdp->data;
943 unsigned int headlen;
946 /* prefetch first cache line of first page */
947 net_prefetch(xdp->data);
949 /* allocate a skb to store the frags */
950 skb = __napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE,
951 GFP_ATOMIC | __GFP_NOWARN);
955 skb_record_rx_queue(skb, rx_ring->q_index);
956 /* Determine available headroom for copy */
958 if (headlen > ICE_RX_HDR_SIZE)
959 headlen = eth_get_headlen(skb->dev, xdp->data, ICE_RX_HDR_SIZE);
961 /* align pull length to size of long to optimize memcpy performance */
962 memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen,
965 /* if we exhaust the linear part then add what is left as a frag */
968 #if (PAGE_SIZE >= 8192)
969 unsigned int truesize = SKB_DATA_ALIGN(size);
971 unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
973 skb_add_rx_frag(skb, 0, rx_buf->page,
974 rx_buf->page_offset + headlen, size, truesize);
975 /* buffer is used by skb, update page_offset */
976 ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
978 /* buffer is unused, reset bias back to rx_buf; data was copied
979 * onto skb's linear part so there's no need for adjusting
980 * page offset and we can reuse this buffer as-is
982 rx_buf->pagecnt_bias++;
989 * ice_put_rx_buf - Clean up used buffer and either recycle or free
990 * @rx_ring: Rx descriptor ring to transact packets on
991 * @rx_buf: Rx buffer to pull data from
992 * @rx_buf_pgcnt: Rx buffer page count pre xdp_do_redirect()
994 * This function will update next_to_clean and then clean up the contents
995 * of the rx_buf. It will either recycle the buffer or unmap it and free
996 * the associated resources.
999 ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
1002 u16 ntc = rx_ring->next_to_clean + 1;
1004 /* fetch, update, and store next to clean */
1005 ntc = (ntc < rx_ring->count) ? ntc : 0;
1006 rx_ring->next_to_clean = ntc;
1011 if (ice_can_reuse_rx_page(rx_buf, rx_buf_pgcnt)) {
1012 /* hand second half of page back to the ring */
1013 ice_reuse_rx_page(rx_ring, rx_buf);
1015 /* we are not reusing the buffer so unmap it */
1016 dma_unmap_page_attrs(rx_ring->dev, rx_buf->dma,
1017 ice_rx_pg_size(rx_ring), DMA_FROM_DEVICE,
1019 __page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias);
1022 /* clear contents of buffer_info */
1023 rx_buf->page = NULL;
1027 * ice_is_non_eop - process handling of non-EOP buffers
1028 * @rx_ring: Rx ring being processed
1029 * @rx_desc: Rx descriptor for current buffer
1031 * If the buffer is an EOP buffer, this function exits returning false,
1032 * otherwise return true indicating that this is in fact a non-EOP buffer.
1035 ice_is_non_eop(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc)
1037 /* if we are the last buffer then there is nothing else to do */
1038 #define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)
1039 if (likely(ice_test_staterr(rx_desc, ICE_RXD_EOF)))
1042 rx_ring->rx_stats.non_eop_descs++;
1048 * ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
1049 * @rx_ring: Rx descriptor ring to transact packets on
1050 * @budget: Total limit on number of packets to process
1052 * This function provides a "bounce buffer" approach to Rx interrupt
1053 * processing. The advantage to this is that on systems that have
1054 * expensive overhead for IOMMU access this provides a means of avoiding
1055 * it by maintaining the mapping of the page to the system.
1057 * Returns amount of work completed
1059 int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
1061 unsigned int total_rx_bytes = 0, total_rx_pkts = 0, frame_sz = 0;
1062 u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
1063 unsigned int offset = rx_ring->rx_offset;
1064 unsigned int xdp_res, xdp_xmit = 0;
1065 struct sk_buff *skb = rx_ring->skb;
1066 struct bpf_prog *xdp_prog = NULL;
1067 struct xdp_buff xdp;
1070 /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
1071 #if (PAGE_SIZE < 8192)
1072 frame_sz = ice_rx_frame_truesize(rx_ring, 0);
1074 xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq);
1076 /* start the loop to process Rx packets bounded by 'budget' */
1077 while (likely(total_rx_pkts < (unsigned int)budget)) {
1078 union ice_32b_rx_flex_desc *rx_desc;
1079 struct ice_rx_buf *rx_buf;
1080 unsigned char *hard_start;
1087 /* get the Rx desc from Rx ring based on 'next_to_clean' */
1088 rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean);
1090 /* status_error_len will always be zero for unused descriptors
1091 * because it's cleared in cleanup, and overlaps with hdr_addr
1092 * which is always zero because packet split isn't used, if the
1093 * hardware wrote DD then it will be non-zero
1095 stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S);
1096 if (!ice_test_staterr(rx_desc, stat_err_bits))
1099 /* This memory barrier is needed to keep us from reading
1100 * any other fields out of the rx_desc until we know the
1105 if (rx_desc->wb.rxdid == FDIR_DESC_RXDID || !rx_ring->netdev) {
1106 struct ice_vsi *ctrl_vsi = rx_ring->vsi;
1108 if (rx_desc->wb.rxdid == FDIR_DESC_RXDID &&
1109 ctrl_vsi->vf_id != ICE_INVAL_VFID)
1110 ice_vc_fdir_irq_handler(ctrl_vsi, rx_desc);
1111 ice_put_rx_buf(rx_ring, NULL, 0);
1116 size = le16_to_cpu(rx_desc->wb.pkt_len) &
1117 ICE_RX_FLX_DESC_PKT_LEN_M;
1119 /* retrieve a buffer from the ring */
1120 rx_buf = ice_get_rx_buf(rx_ring, size, &rx_buf_pgcnt);
1124 xdp.data_end = NULL;
1125 xdp.data_hard_start = NULL;
1126 xdp.data_meta = NULL;
1130 hard_start = page_address(rx_buf->page) + rx_buf->page_offset -
1132 xdp_prepare_buff(&xdp, hard_start, offset, size, true);
1133 #if (PAGE_SIZE > 4096)
1134 /* At larger PAGE_SIZE, frame_sz depend on len size */
1135 xdp.frame_sz = ice_rx_frame_truesize(rx_ring, size);
1139 xdp_prog = READ_ONCE(rx_ring->xdp_prog);
1145 xdp_res = ice_run_xdp(rx_ring, &xdp, xdp_prog);
1149 if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) {
1150 xdp_xmit |= xdp_res;
1151 ice_rx_buf_adjust_pg_offset(rx_buf, xdp.frame_sz);
1153 rx_buf->pagecnt_bias++;
1155 total_rx_bytes += size;
1159 ice_put_rx_buf(rx_ring, rx_buf, rx_buf_pgcnt);
1163 ice_add_rx_frag(rx_ring, rx_buf, skb, size);
1164 } else if (likely(xdp.data)) {
1165 if (ice_ring_uses_build_skb(rx_ring))
1166 skb = ice_build_skb(rx_ring, rx_buf, &xdp);
1168 skb = ice_construct_skb(rx_ring, rx_buf, &xdp);
1170 /* exit if we failed to retrieve a buffer */
1172 rx_ring->rx_stats.alloc_buf_failed++;
1174 rx_buf->pagecnt_bias++;
1178 ice_put_rx_buf(rx_ring, rx_buf, rx_buf_pgcnt);
1181 /* skip if it is NOP desc */
1182 if (ice_is_non_eop(rx_ring, rx_desc))
1185 stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S);
1186 if (unlikely(ice_test_staterr(rx_desc, stat_err_bits))) {
1187 dev_kfree_skb_any(skb);
1191 stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S);
1192 if (ice_test_staterr(rx_desc, stat_err_bits))
1193 vlan_tag = le16_to_cpu(rx_desc->wb.l2tag1);
1195 /* pad the skb if needed, to make a valid ethernet frame */
1196 if (eth_skb_pad(skb)) {
1201 /* probably a little skewed due to removing CRC */
1202 total_rx_bytes += skb->len;
1204 /* populate checksum, VLAN, and protocol */
1205 rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) &
1206 ICE_RX_FLEX_DESC_PTYPE_M;
1208 ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
1210 /* send completed skb up the stack */
1211 ice_receive_skb(rx_ring, skb, vlan_tag);
1214 /* update budget accounting */
1218 /* return up to cleaned_count buffers to hardware */
1219 failure = ice_alloc_rx_bufs(rx_ring, cleaned_count);
1222 ice_finalize_xdp_rx(rx_ring, xdp_xmit);
1225 ice_update_rx_ring_stats(rx_ring, total_rx_pkts, total_rx_bytes);
1227 /* guarantee a trip back through this routine if there was a failure */
1228 return failure ? budget : (int)total_rx_pkts;
1232 * ice_net_dim - Update net DIM algorithm
1233 * @q_vector: the vector associated with the interrupt
1235 * Create a DIM sample and notify net_dim() so that it can possibly decide
1236 * a new ITR value based on incoming packets, bytes, and interrupts.
1238 * This function is a no-op if the ring is not configured to dynamic ITR.
1240 static void ice_net_dim(struct ice_q_vector *q_vector)
1242 struct ice_ring_container *tx = &q_vector->tx;
1243 struct ice_ring_container *rx = &q_vector->rx;
1245 if (ITR_IS_DYNAMIC(tx)) {
1246 struct dim_sample dim_sample = {};
1247 u64 packets = 0, bytes = 0;
1248 struct ice_ring *ring;
1250 ice_for_each_ring(ring, q_vector->tx) {
1251 packets += ring->stats.pkts;
1252 bytes += ring->stats.bytes;
1255 dim_update_sample(q_vector->total_events, packets, bytes,
1258 net_dim(&tx->dim, dim_sample);
1261 if (ITR_IS_DYNAMIC(rx)) {
1262 struct dim_sample dim_sample = {};
1263 u64 packets = 0, bytes = 0;
1264 struct ice_ring *ring;
1266 ice_for_each_ring(ring, q_vector->rx) {
1267 packets += ring->stats.pkts;
1268 bytes += ring->stats.bytes;
1271 dim_update_sample(q_vector->total_events, packets, bytes,
1274 net_dim(&rx->dim, dim_sample);
1279 * ice_buildreg_itr - build value for writing to the GLINT_DYN_CTL register
1280 * @itr_idx: interrupt throttling index
1281 * @itr: interrupt throttling value in usecs
1283 static u32 ice_buildreg_itr(u16 itr_idx, u16 itr)
1285 /* The ITR value is reported in microseconds, and the register value is
1286 * recorded in 2 microsecond units. For this reason we only need to
1287 * shift by the GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S to apply this
1288 * granularity as a shift instead of division. The mask makes sure the
1289 * ITR value is never odd so we don't accidentally write into the field
1290 * prior to the ITR field.
1292 itr &= ICE_ITR_MASK;
1294 return GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M |
1295 (itr_idx << GLINT_DYN_CTL_ITR_INDX_S) |
1296 (itr << (GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S));
1300 * ice_update_ena_itr - Update ITR moderation and re-enable MSI-X interrupt
1301 * @q_vector: the vector associated with the interrupt to enable
1303 * Update the net_dim() algorithm and re-enable the interrupt associated with
1306 * If the VSI is down, the interrupt will not be re-enabled.
1308 static void ice_update_ena_itr(struct ice_q_vector *q_vector)
1310 struct ice_vsi *vsi = q_vector->vsi;
1311 bool wb_en = q_vector->wb_on_itr;
1314 if (test_bit(ICE_DOWN, vsi->state))
1317 /* When exiting WB_ON_ITR, let ITR resume its normal
1318 * interrupts-enabled path.
1321 q_vector->wb_on_itr = false;
1323 /* This will do nothing if dynamic updates are not enabled. */
1324 ice_net_dim(q_vector);
1326 /* net_dim() updates ITR out-of-band using a work item */
1327 itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0);
1328 /* trigger an immediate software interrupt when exiting
1329 * busy poll, to make sure to catch any pending cleanups
1330 * that might have been missed due to interrupt state
1334 itr_val |= GLINT_DYN_CTL_SWINT_TRIG_M |
1335 GLINT_DYN_CTL_SW_ITR_INDX_M |
1336 GLINT_DYN_CTL_SW_ITR_INDX_ENA_M;
1338 wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val);
1342 * ice_set_wb_on_itr - set WB_ON_ITR for this q_vector
1343 * @q_vector: q_vector to set WB_ON_ITR on
1345 * We need to tell hardware to write-back completed descriptors even when
1346 * interrupts are disabled. Descriptors will be written back on cache line
1347 * boundaries without WB_ON_ITR enabled, but if we don't enable WB_ON_ITR
1348 * descriptors may not be written back if they don't fill a cache line until
1349 * the next interrupt.
1351 * This sets the write-back frequency to whatever was set previously for the
1352 * ITR indices. Also, set the INTENA_MSK bit to make sure hardware knows we
1353 * aren't meddling with the INTENA_M bit.
1355 static void ice_set_wb_on_itr(struct ice_q_vector *q_vector)
1357 struct ice_vsi *vsi = q_vector->vsi;
1359 /* already in wb_on_itr mode no need to change it */
1360 if (q_vector->wb_on_itr)
1363 /* use previously set ITR values for all of the ITR indices by
1364 * specifying ICE_ITR_NONE, which will vary in adaptive (AIM) mode and
1365 * be static in non-adaptive mode (user configured)
1367 wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx),
1368 ((ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S) &
1369 GLINT_DYN_CTL_ITR_INDX_M) | GLINT_DYN_CTL_INTENA_MSK_M |
1370 GLINT_DYN_CTL_WB_ON_ITR_M);
1372 q_vector->wb_on_itr = true;
1376 * ice_napi_poll - NAPI polling Rx/Tx cleanup routine
1377 * @napi: napi struct with our devices info in it
1378 * @budget: amount of work driver is allowed to do this pass, in packets
1380 * This function will clean all queues associated with a q_vector.
1382 * Returns the amount of work done
1384 int ice_napi_poll(struct napi_struct *napi, int budget)
1386 struct ice_q_vector *q_vector =
1387 container_of(napi, struct ice_q_vector, napi);
1388 bool clean_complete = true;
1389 struct ice_ring *ring;
1390 int budget_per_ring;
1393 /* Since the actual Tx work is minimal, we can give the Tx a larger
1394 * budget and be more aggressive about cleaning up the Tx descriptors.
1396 ice_for_each_ring(ring, q_vector->tx) {
1397 bool wd = ring->xsk_pool ?
1398 ice_clean_tx_irq_zc(ring, budget) :
1399 ice_clean_tx_irq(ring, budget);
1402 clean_complete = false;
1405 /* Handle case where we are called by netpoll with a budget of 0 */
1406 if (unlikely(budget <= 0))
1409 /* normally we have 1 Rx ring per q_vector */
1410 if (unlikely(q_vector->num_ring_rx > 1))
1411 /* We attempt to distribute budget to each Rx queue fairly, but
1412 * don't allow the budget to go below 1 because that would exit
1415 budget_per_ring = max_t(int, budget / q_vector->num_ring_rx, 1);
1417 /* Max of 1 Rx ring in this q_vector so give it the budget */
1418 budget_per_ring = budget;
1420 ice_for_each_ring(ring, q_vector->rx) {
1423 /* A dedicated path for zero-copy allows making a single
1424 * comparison in the irq context instead of many inside the
1425 * ice_clean_rx_irq function and makes the codebase cleaner.
1427 cleaned = ring->xsk_pool ?
1428 ice_clean_rx_irq_zc(ring, budget_per_ring) :
1429 ice_clean_rx_irq(ring, budget_per_ring);
1430 work_done += cleaned;
1431 /* if we clean as many as budgeted, we must not be done */
1432 if (cleaned >= budget_per_ring)
1433 clean_complete = false;
1436 /* If work not completed, return budget and polling will return */
1437 if (!clean_complete) {
1438 /* Set the writeback on ITR so partial completions of
1439 * cache-lines will still continue even if we're polling.
1441 ice_set_wb_on_itr(q_vector);
1445 /* Exit the polling mode, but don't re-enable interrupts if stack might
1446 * poll us due to busy-polling
1448 if (likely(napi_complete_done(napi, work_done)))
1449 ice_update_ena_itr(q_vector);
1451 ice_set_wb_on_itr(q_vector);
1453 return min_t(int, work_done, budget - 1);
1457 * __ice_maybe_stop_tx - 2nd level check for Tx stop conditions
1458 * @tx_ring: the ring to be checked
1459 * @size: the size buffer we want to assure is available
1461 * Returns -EBUSY if a stop is needed, else 0
1463 static int __ice_maybe_stop_tx(struct ice_ring *tx_ring, unsigned int size)
1465 netif_stop_subqueue(tx_ring->netdev, tx_ring->q_index);
1466 /* Memory barrier before checking head and tail */
1469 /* Check again in a case another CPU has just made room available. */
1470 if (likely(ICE_DESC_UNUSED(tx_ring) < size))
1473 /* A reprieve! - use start_subqueue because it doesn't call schedule */
1474 netif_start_subqueue(tx_ring->netdev, tx_ring->q_index);
1475 ++tx_ring->tx_stats.restart_q;
1480 * ice_maybe_stop_tx - 1st level check for Tx stop conditions
1481 * @tx_ring: the ring to be checked
1482 * @size: the size buffer we want to assure is available
1484 * Returns 0 if stop is not needed
1486 static int ice_maybe_stop_tx(struct ice_ring *tx_ring, unsigned int size)
1488 if (likely(ICE_DESC_UNUSED(tx_ring) >= size))
1491 return __ice_maybe_stop_tx(tx_ring, size);
1495 * ice_tx_map - Build the Tx descriptor
1496 * @tx_ring: ring to send buffer on
1497 * @first: first buffer info buffer to use
1498 * @off: pointer to struct that holds offload parameters
1500 * This function loops over the skb data pointed to by *first
1501 * and gets a physical address for each memory location and programs
1502 * it and the length into the transmit descriptor.
1505 ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first,
1506 struct ice_tx_offload_params *off)
1508 u64 td_offset, td_tag, td_cmd;
1509 u16 i = tx_ring->next_to_use;
1510 unsigned int data_len, size;
1511 struct ice_tx_desc *tx_desc;
1512 struct ice_tx_buf *tx_buf;
1513 struct sk_buff *skb;
1517 td_tag = off->td_l2tag1;
1518 td_cmd = off->td_cmd;
1519 td_offset = off->td_offset;
1522 data_len = skb->data_len;
1523 size = skb_headlen(skb);
1525 tx_desc = ICE_TX_DESC(tx_ring, i);
1527 if (first->tx_flags & ICE_TX_FLAGS_HW_VLAN) {
1528 td_cmd |= (u64)ICE_TX_DESC_CMD_IL2TAG1;
1529 td_tag = (first->tx_flags & ICE_TX_FLAGS_VLAN_M) >>
1530 ICE_TX_FLAGS_VLAN_S;
1533 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
1537 for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
1538 unsigned int max_data = ICE_MAX_DATA_PER_TXD_ALIGNED;
1540 if (dma_mapping_error(tx_ring->dev, dma))
1543 /* record length, and DMA address */
1544 dma_unmap_len_set(tx_buf, len, size);
1545 dma_unmap_addr_set(tx_buf, dma, dma);
1547 /* align size to end of page */
1548 max_data += -dma & (ICE_MAX_READ_REQ_SIZE - 1);
1549 tx_desc->buf_addr = cpu_to_le64(dma);
1551 /* account for data chunks larger than the hardware
1554 while (unlikely(size > ICE_MAX_DATA_PER_TXD)) {
1555 tx_desc->cmd_type_offset_bsz =
1556 ice_build_ctob(td_cmd, td_offset, max_data,
1562 if (i == tx_ring->count) {
1563 tx_desc = ICE_TX_DESC(tx_ring, 0);
1570 max_data = ICE_MAX_DATA_PER_TXD_ALIGNED;
1571 tx_desc->buf_addr = cpu_to_le64(dma);
1574 if (likely(!data_len))
1577 tx_desc->cmd_type_offset_bsz = ice_build_ctob(td_cmd, td_offset,
1583 if (i == tx_ring->count) {
1584 tx_desc = ICE_TX_DESC(tx_ring, 0);
1588 size = skb_frag_size(frag);
1591 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
1594 tx_buf = &tx_ring->tx_buf[i];
1597 /* record bytecount for BQL */
1598 netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
1600 /* record SW timestamp if HW timestamp is not available */
1601 skb_tx_timestamp(first->skb);
1604 if (i == tx_ring->count)
1607 /* write last descriptor with RS and EOP bits */
1608 td_cmd |= (u64)ICE_TXD_LAST_DESC_CMD;
1609 tx_desc->cmd_type_offset_bsz =
1610 ice_build_ctob(td_cmd, td_offset, size, td_tag);
1612 /* Force memory writes to complete before letting h/w know there
1613 * are new descriptors to fetch.
1615 * We also use this memory barrier to make certain all of the
1616 * status bits have been updated before next_to_watch is written.
1620 /* set next_to_watch value indicating a packet is present */
1621 first->next_to_watch = tx_desc;
1623 tx_ring->next_to_use = i;
1625 ice_maybe_stop_tx(tx_ring, DESC_NEEDED);
1627 /* notify HW of packet */
1628 if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more())
1629 writel(i, tx_ring->tail);
1634 /* clear DMA mappings for failed tx_buf map */
1636 tx_buf = &tx_ring->tx_buf[i];
1637 ice_unmap_and_free_tx_buf(tx_ring, tx_buf);
1638 if (tx_buf == first)
1645 tx_ring->next_to_use = i;
1649 * ice_tx_csum - Enable Tx checksum offloads
1650 * @first: pointer to the first descriptor
1651 * @off: pointer to struct that holds offload parameters
1653 * Returns 0 or error (negative) if checksum offload can't happen, 1 otherwise.
1656 int ice_tx_csum(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
1658 u32 l4_len = 0, l3_len = 0, l2_len = 0;
1659 struct sk_buff *skb = first->skb;
1669 __be16 frag_off, protocol;
1670 unsigned char *exthdr;
1671 u32 offset, cmd = 0;
1674 if (skb->ip_summed != CHECKSUM_PARTIAL)
1677 ip.hdr = skb_network_header(skb);
1678 l4.hdr = skb_transport_header(skb);
1680 /* compute outer L2 header size */
1681 l2_len = ip.hdr - skb->data;
1682 offset = (l2_len / 2) << ICE_TX_DESC_LEN_MACLEN_S;
1684 protocol = vlan_get_protocol(skb);
1686 if (protocol == htons(ETH_P_IP))
1687 first->tx_flags |= ICE_TX_FLAGS_IPV4;
1688 else if (protocol == htons(ETH_P_IPV6))
1689 first->tx_flags |= ICE_TX_FLAGS_IPV6;
1691 if (skb->encapsulation) {
1692 bool gso_ena = false;
1695 /* define outer network header type */
1696 if (first->tx_flags & ICE_TX_FLAGS_IPV4) {
1697 tunnel |= (first->tx_flags & ICE_TX_FLAGS_TSO) ?
1698 ICE_TX_CTX_EIPT_IPV4 :
1699 ICE_TX_CTX_EIPT_IPV4_NO_CSUM;
1700 l4_proto = ip.v4->protocol;
1701 } else if (first->tx_flags & ICE_TX_FLAGS_IPV6) {
1704 tunnel |= ICE_TX_CTX_EIPT_IPV6;
1705 exthdr = ip.hdr + sizeof(*ip.v6);
1706 l4_proto = ip.v6->nexthdr;
1707 ret = ipv6_skip_exthdr(skb, exthdr - skb->data,
1708 &l4_proto, &frag_off);
1713 /* define outer transport */
1716 tunnel |= ICE_TXD_CTX_UDP_TUNNELING;
1717 first->tx_flags |= ICE_TX_FLAGS_TUNNEL;
1720 tunnel |= ICE_TXD_CTX_GRE_TUNNELING;
1721 first->tx_flags |= ICE_TX_FLAGS_TUNNEL;
1725 first->tx_flags |= ICE_TX_FLAGS_TUNNEL;
1726 l4.hdr = skb_inner_network_header(skb);
1729 if (first->tx_flags & ICE_TX_FLAGS_TSO)
1732 skb_checksum_help(skb);
1736 /* compute outer L3 header size */
1737 tunnel |= ((l4.hdr - ip.hdr) / 4) <<
1738 ICE_TXD_CTX_QW0_EIPLEN_S;
1740 /* switch IP header pointer from outer to inner header */
1741 ip.hdr = skb_inner_network_header(skb);
1743 /* compute tunnel header size */
1744 tunnel |= ((ip.hdr - l4.hdr) / 2) <<
1745 ICE_TXD_CTX_QW0_NATLEN_S;
1747 gso_ena = skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL;
1748 /* indicate if we need to offload outer UDP header */
1749 if ((first->tx_flags & ICE_TX_FLAGS_TSO) && !gso_ena &&
1750 (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM))
1751 tunnel |= ICE_TXD_CTX_QW0_L4T_CS_M;
1753 /* record tunnel offload values */
1754 off->cd_tunnel_params |= tunnel;
1756 /* set DTYP=1 to indicate that it's an Tx context descriptor
1757 * in IPsec tunnel mode with Tx offloads in Quad word 1
1759 off->cd_qw1 |= (u64)ICE_TX_DESC_DTYPE_CTX;
1761 /* switch L4 header pointer from outer to inner */
1762 l4.hdr = skb_inner_transport_header(skb);
1765 /* reset type as we transition from outer to inner headers */
1766 first->tx_flags &= ~(ICE_TX_FLAGS_IPV4 | ICE_TX_FLAGS_IPV6);
1767 if (ip.v4->version == 4)
1768 first->tx_flags |= ICE_TX_FLAGS_IPV4;
1769 if (ip.v6->version == 6)
1770 first->tx_flags |= ICE_TX_FLAGS_IPV6;
1773 /* Enable IP checksum offloads */
1774 if (first->tx_flags & ICE_TX_FLAGS_IPV4) {
1775 l4_proto = ip.v4->protocol;
1776 /* the stack computes the IP header already, the only time we
1777 * need the hardware to recompute it is in the case of TSO.
1779 if (first->tx_flags & ICE_TX_FLAGS_TSO)
1780 cmd |= ICE_TX_DESC_CMD_IIPT_IPV4_CSUM;
1782 cmd |= ICE_TX_DESC_CMD_IIPT_IPV4;
1784 } else if (first->tx_flags & ICE_TX_FLAGS_IPV6) {
1785 cmd |= ICE_TX_DESC_CMD_IIPT_IPV6;
1786 exthdr = ip.hdr + sizeof(*ip.v6);
1787 l4_proto = ip.v6->nexthdr;
1788 if (l4.hdr != exthdr)
1789 ipv6_skip_exthdr(skb, exthdr - skb->data, &l4_proto,
1795 /* compute inner L3 header size */
1796 l3_len = l4.hdr - ip.hdr;
1797 offset |= (l3_len / 4) << ICE_TX_DESC_LEN_IPLEN_S;
1799 /* Enable L4 checksum offloads */
1802 /* enable checksum offloads */
1803 cmd |= ICE_TX_DESC_CMD_L4T_EOFT_TCP;
1804 l4_len = l4.tcp->doff;
1805 offset |= l4_len << ICE_TX_DESC_LEN_L4_LEN_S;
1808 /* enable UDP checksum offload */
1809 cmd |= ICE_TX_DESC_CMD_L4T_EOFT_UDP;
1810 l4_len = (sizeof(struct udphdr) >> 2);
1811 offset |= l4_len << ICE_TX_DESC_LEN_L4_LEN_S;
1814 /* enable SCTP checksum offload */
1815 cmd |= ICE_TX_DESC_CMD_L4T_EOFT_SCTP;
1816 l4_len = sizeof(struct sctphdr) >> 2;
1817 offset |= l4_len << ICE_TX_DESC_LEN_L4_LEN_S;
1821 if (first->tx_flags & ICE_TX_FLAGS_TSO)
1823 skb_checksum_help(skb);
1828 off->td_offset |= offset;
1833 * ice_tx_prepare_vlan_flags - prepare generic Tx VLAN tagging flags for HW
1834 * @tx_ring: ring to send buffer on
1835 * @first: pointer to struct ice_tx_buf
1837 * Checks the skb and set up correspondingly several generic transmit flags
1838 * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
1841 ice_tx_prepare_vlan_flags(struct ice_ring *tx_ring, struct ice_tx_buf *first)
1843 struct sk_buff *skb = first->skb;
1845 /* nothing left to do, software offloaded VLAN */
1846 if (!skb_vlan_tag_present(skb) && eth_type_vlan(skb->protocol))
1849 /* currently, we always assume 802.1Q for VLAN insertion as VLAN
1850 * insertion for 802.1AD is not supported
1852 if (skb_vlan_tag_present(skb)) {
1853 first->tx_flags |= skb_vlan_tag_get(skb) << ICE_TX_FLAGS_VLAN_S;
1854 first->tx_flags |= ICE_TX_FLAGS_HW_VLAN;
1857 ice_tx_prepare_vlan_flags_dcb(tx_ring, first);
1861 * ice_tso - computes mss and TSO length to prepare for TSO
1862 * @first: pointer to struct ice_tx_buf
1863 * @off: pointer to struct that holds offload parameters
1865 * Returns 0 or error (negative) if TSO can't happen, 1 otherwise.
1868 int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
1870 struct sk_buff *skb = first->skb;
1881 u64 cd_mss, cd_tso_len;
1886 if (skb->ip_summed != CHECKSUM_PARTIAL)
1889 if (!skb_is_gso(skb))
1892 err = skb_cow_head(skb, 0);
1896 /* cppcheck-suppress unreadVariable */
1897 ip.hdr = skb_network_header(skb);
1898 l4.hdr = skb_transport_header(skb);
1900 /* initialize outer IP header fields */
1901 if (ip.v4->version == 4) {
1905 ip.v6->payload_len = 0;
1908 if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE |
1912 SKB_GSO_UDP_TUNNEL |
1913 SKB_GSO_UDP_TUNNEL_CSUM)) {
1914 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) &&
1915 (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) {
1918 /* determine offset of outer transport header */
1919 l4_start = (u8)(l4.hdr - skb->data);
1921 /* remove payload length from outer checksum */
1922 paylen = skb->len - l4_start;
1923 csum_replace_by_diff(&l4.udp->check,
1924 (__force __wsum)htonl(paylen));
1927 /* reset pointers to inner headers */
1929 /* cppcheck-suppress unreadVariable */
1930 ip.hdr = skb_inner_network_header(skb);
1931 l4.hdr = skb_inner_transport_header(skb);
1933 /* initialize inner IP header fields */
1934 if (ip.v4->version == 4) {
1938 ip.v6->payload_len = 0;
1942 /* determine offset of transport header */
1943 l4_start = (u8)(l4.hdr - skb->data);
1945 /* remove payload length from checksum */
1946 paylen = skb->len - l4_start;
1948 if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
1949 csum_replace_by_diff(&l4.udp->check,
1950 (__force __wsum)htonl(paylen));
1951 /* compute length of UDP segmentation header */
1952 off->header_len = (u8)sizeof(l4.udp) + l4_start;
1954 csum_replace_by_diff(&l4.tcp->check,
1955 (__force __wsum)htonl(paylen));
1956 /* compute length of TCP segmentation header */
1957 off->header_len = (u8)((l4.tcp->doff * 4) + l4_start);
1960 /* update gso_segs and bytecount */
1961 first->gso_segs = skb_shinfo(skb)->gso_segs;
1962 first->bytecount += (first->gso_segs - 1) * off->header_len;
1964 cd_tso_len = skb->len - off->header_len;
1965 cd_mss = skb_shinfo(skb)->gso_size;
1967 /* record cdesc_qw1 with TSO parameters */
1968 off->cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX |
1969 (ICE_TX_CTX_DESC_TSO << ICE_TXD_CTX_QW1_CMD_S) |
1970 (cd_tso_len << ICE_TXD_CTX_QW1_TSO_LEN_S) |
1971 (cd_mss << ICE_TXD_CTX_QW1_MSS_S));
1972 first->tx_flags |= ICE_TX_FLAGS_TSO;
1977 * ice_txd_use_count - estimate the number of descriptors needed for Tx
1978 * @size: transmit request size in bytes
1980 * Due to hardware alignment restrictions (4K alignment), we need to
1981 * assume that we can have no more than 12K of data per descriptor, even
1982 * though each descriptor can take up to 16K - 1 bytes of aligned memory.
1983 * Thus, we need to divide by 12K. But division is slow! Instead,
1984 * we decompose the operation into shifts and one relatively cheap
1985 * multiply operation.
1987 * To divide by 12K, we first divide by 4K, then divide by 3:
1988 * To divide by 4K, shift right by 12 bits
1989 * To divide by 3, multiply by 85, then divide by 256
1990 * (Divide by 256 is done by shifting right by 8 bits)
1991 * Finally, we add one to round up. Because 256 isn't an exact multiple of
1992 * 3, we'll underestimate near each multiple of 12K. This is actually more
1993 * accurate as we have 4K - 1 of wiggle room that we can fit into the last
1994 * segment. For our purposes this is accurate out to 1M which is orders of
1995 * magnitude greater than our largest possible GSO size.
1997 * This would then be implemented as:
1998 * return (((size >> 12) * 85) >> 8) + ICE_DESCS_FOR_SKB_DATA_PTR;
2000 * Since multiplication and division are commutative, we can reorder
2002 * return ((size * 85) >> 20) + ICE_DESCS_FOR_SKB_DATA_PTR;
2004 static unsigned int ice_txd_use_count(unsigned int size)
2006 return ((size * 85) >> 20) + ICE_DESCS_FOR_SKB_DATA_PTR;
2010 * ice_xmit_desc_count - calculate number of Tx descriptors needed
2013 * Returns number of data descriptors needed for this skb.
2015 static unsigned int ice_xmit_desc_count(struct sk_buff *skb)
2017 const skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
2018 unsigned int nr_frags = skb_shinfo(skb)->nr_frags;
2019 unsigned int count = 0, size = skb_headlen(skb);
2022 count += ice_txd_use_count(size);
2027 size = skb_frag_size(frag++);
2034 * __ice_chk_linearize - Check if there are more than 8 buffers per packet
2037 * Note: This HW can't DMA more than 8 buffers to build a packet on the wire
2038 * and so we need to figure out the cases where we need to linearize the skb.
2040 * For TSO we need to count the TSO header and segment payload separately.
2041 * As such we need to check cases where we have 7 fragments or more as we
2042 * can potentially require 9 DMA transactions, 1 for the TSO header, 1 for
2043 * the segment payload in the first descriptor, and another 7 for the
2046 static bool __ice_chk_linearize(struct sk_buff *skb)
2048 const skb_frag_t *frag, *stale;
2051 /* no need to check if number of frags is less than 7 */
2052 nr_frags = skb_shinfo(skb)->nr_frags;
2053 if (nr_frags < (ICE_MAX_BUF_TXD - 1))
2056 /* We need to walk through the list and validate that each group
2057 * of 6 fragments totals at least gso_size.
2059 nr_frags -= ICE_MAX_BUF_TXD - 2;
2060 frag = &skb_shinfo(skb)->frags[0];
2062 /* Initialize size to the negative value of gso_size minus 1. We
2063 * use this as the worst case scenario in which the frag ahead
2064 * of us only provides one byte which is why we are limited to 6
2065 * descriptors for a single transmit as the header and previous
2066 * fragment are already consuming 2 descriptors.
2068 sum = 1 - skb_shinfo(skb)->gso_size;
2070 /* Add size of frags 0 through 4 to create our initial sum */
2071 sum += skb_frag_size(frag++);
2072 sum += skb_frag_size(frag++);
2073 sum += skb_frag_size(frag++);
2074 sum += skb_frag_size(frag++);
2075 sum += skb_frag_size(frag++);
2077 /* Walk through fragments adding latest fragment, testing it, and
2078 * then removing stale fragments from the sum.
2080 for (stale = &skb_shinfo(skb)->frags[0];; stale++) {
2081 int stale_size = skb_frag_size(stale);
2083 sum += skb_frag_size(frag++);
2085 /* The stale fragment may present us with a smaller
2086 * descriptor than the actual fragment size. To account
2087 * for that we need to remove all the data on the front and
2088 * figure out what the remainder would be in the last
2089 * descriptor associated with the fragment.
2091 if (stale_size > ICE_MAX_DATA_PER_TXD) {
2092 int align_pad = -(skb_frag_off(stale)) &
2093 (ICE_MAX_READ_REQ_SIZE - 1);
2096 stale_size -= align_pad;
2099 sum -= ICE_MAX_DATA_PER_TXD_ALIGNED;
2100 stale_size -= ICE_MAX_DATA_PER_TXD_ALIGNED;
2101 } while (stale_size > ICE_MAX_DATA_PER_TXD);
2104 /* if sum is negative we failed to make sufficient progress */
2118 * ice_chk_linearize - Check if there are more than 8 fragments per packet
2120 * @count: number of buffers used
2122 * Note: Our HW can't scatter-gather more than 8 fragments to build
2123 * a packet on the wire and so we need to figure out the cases where we
2124 * need to linearize the skb.
2126 static bool ice_chk_linearize(struct sk_buff *skb, unsigned int count)
2128 /* Both TSO and single send will work if count is less than 8 */
2129 if (likely(count < ICE_MAX_BUF_TXD))
2132 if (skb_is_gso(skb))
2133 return __ice_chk_linearize(skb);
2135 /* we can support up to 8 data buffers for a single send */
2136 return count != ICE_MAX_BUF_TXD;
2140 * ice_tstamp - set up context descriptor for hardware timestamp
2141 * @tx_ring: pointer to the Tx ring to send buffer on
2142 * @skb: pointer to the SKB we're sending
2144 * @off: Tx offload parameters
2147 ice_tstamp(struct ice_ring *tx_ring, struct sk_buff *skb,
2148 struct ice_tx_buf *first, struct ice_tx_offload_params *off)
2152 /* only timestamp the outbound packet if the user has requested it */
2153 if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
2156 if (!tx_ring->ptp_tx)
2159 /* Tx timestamps cannot be sampled when doing TSO */
2160 if (first->tx_flags & ICE_TX_FLAGS_TSO)
2163 /* Grab an open timestamp slot */
2164 idx = ice_ptp_request_ts(tx_ring->tx_tstamps, skb);
2168 off->cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX |
2169 (ICE_TX_CTX_DESC_TSYN << ICE_TXD_CTX_QW1_CMD_S) |
2170 ((u64)idx << ICE_TXD_CTX_QW1_TSO_LEN_S));
2171 first->tx_flags |= ICE_TX_FLAGS_TSYN;
2175 * ice_xmit_frame_ring - Sends buffer on Tx ring
2177 * @tx_ring: ring to send buffer on
2179 * Returns NETDEV_TX_OK if sent, else an error code
2182 ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring)
2184 struct ice_tx_offload_params offload = { 0 };
2185 struct ice_vsi *vsi = tx_ring->vsi;
2186 struct ice_tx_buf *first;
2191 count = ice_xmit_desc_count(skb);
2192 if (ice_chk_linearize(skb, count)) {
2193 if (__skb_linearize(skb))
2195 count = ice_txd_use_count(skb->len);
2196 tx_ring->tx_stats.tx_linearize++;
2199 /* need: 1 descriptor per page * PAGE_SIZE/ICE_MAX_DATA_PER_TXD,
2200 * + 1 desc for skb_head_len/ICE_MAX_DATA_PER_TXD,
2201 * + 4 desc gap to avoid the cache line where head is,
2202 * + 1 desc for context descriptor,
2203 * otherwise try next time
2205 if (ice_maybe_stop_tx(tx_ring, count + ICE_DESCS_PER_CACHE_LINE +
2206 ICE_DESCS_FOR_CTX_DESC)) {
2207 tx_ring->tx_stats.tx_busy++;
2208 return NETDEV_TX_BUSY;
2211 offload.tx_ring = tx_ring;
2213 /* record the location of the first descriptor for this packet */
2214 first = &tx_ring->tx_buf[tx_ring->next_to_use];
2216 first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN);
2217 first->gso_segs = 1;
2218 first->tx_flags = 0;
2220 /* prepare the VLAN tagging flags for Tx */
2221 ice_tx_prepare_vlan_flags(tx_ring, first);
2223 /* set up TSO offload */
2224 tso = ice_tso(first, &offload);
2228 /* always set up Tx checksum offload */
2229 csum = ice_tx_csum(first, &offload);
2233 /* allow CONTROL frames egress from main VSI if FW LLDP disabled */
2234 eth = (struct ethhdr *)skb_mac_header(skb);
2235 if (unlikely((skb->priority == TC_PRIO_CONTROL ||
2236 eth->h_proto == htons(ETH_P_LLDP)) &&
2237 vsi->type == ICE_VSI_PF &&
2238 vsi->port_info->qos_cfg.is_sw_lldp))
2239 offload.cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX |
2240 ICE_TX_CTX_DESC_SWTCH_UPLINK <<
2241 ICE_TXD_CTX_QW1_CMD_S);
2243 ice_tstamp(tx_ring, skb, first, &offload);
2245 if (offload.cd_qw1 & ICE_TX_DESC_DTYPE_CTX) {
2246 struct ice_tx_ctx_desc *cdesc;
2247 u16 i = tx_ring->next_to_use;
2249 /* grab the next descriptor */
2250 cdesc = ICE_TX_CTX_DESC(tx_ring, i);
2252 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
2254 /* setup context descriptor */
2255 cdesc->tunneling_params = cpu_to_le32(offload.cd_tunnel_params);
2256 cdesc->l2tag2 = cpu_to_le16(offload.cd_l2tag2);
2257 cdesc->rsvd = cpu_to_le16(0);
2258 cdesc->qw1 = cpu_to_le64(offload.cd_qw1);
2261 ice_tx_map(tx_ring, first, &offload);
2262 return NETDEV_TX_OK;
2265 dev_kfree_skb_any(skb);
2266 return NETDEV_TX_OK;
2270 * ice_start_xmit - Selects the correct VSI and Tx queue to send buffer
2272 * @netdev: network interface device structure
2274 * Returns NETDEV_TX_OK if sent, else an error code
2276 netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev)
2278 struct ice_netdev_priv *np = netdev_priv(netdev);
2279 struct ice_vsi *vsi = np->vsi;
2280 struct ice_ring *tx_ring;
2282 tx_ring = vsi->tx_rings[skb->queue_mapping];
2284 /* hardware can't handle really short frames, hardware padding works
2287 if (skb_put_padto(skb, ICE_MIN_TX_LEN))
2288 return NETDEV_TX_OK;
2290 return ice_xmit_frame_ring(skb, tx_ring);
2294 * ice_clean_ctrl_tx_irq - interrupt handler for flow director Tx queue
2295 * @tx_ring: tx_ring to clean
2297 void ice_clean_ctrl_tx_irq(struct ice_ring *tx_ring)
2299 struct ice_vsi *vsi = tx_ring->vsi;
2300 s16 i = tx_ring->next_to_clean;
2301 int budget = ICE_DFLT_IRQ_WORK;
2302 struct ice_tx_desc *tx_desc;
2303 struct ice_tx_buf *tx_buf;
2305 tx_buf = &tx_ring->tx_buf[i];
2306 tx_desc = ICE_TX_DESC(tx_ring, i);
2307 i -= tx_ring->count;
2310 struct ice_tx_desc *eop_desc = tx_buf->next_to_watch;
2312 /* if next_to_watch is not set then there is no pending work */
2316 /* prevent any other reads prior to eop_desc */
2319 /* if the descriptor isn't done, no work to do */
2320 if (!(eop_desc->cmd_type_offset_bsz &
2321 cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
2324 /* clear next_to_watch to prevent false hangs */
2325 tx_buf->next_to_watch = NULL;
2326 tx_desc->buf_addr = 0;
2327 tx_desc->cmd_type_offset_bsz = 0;
2329 /* move past filter desc */
2334 i -= tx_ring->count;
2335 tx_buf = tx_ring->tx_buf;
2336 tx_desc = ICE_TX_DESC(tx_ring, 0);
2339 /* unmap the data header */
2340 if (dma_unmap_len(tx_buf, len))
2341 dma_unmap_single(tx_ring->dev,
2342 dma_unmap_addr(tx_buf, dma),
2343 dma_unmap_len(tx_buf, len),
2345 if (tx_buf->tx_flags & ICE_TX_FLAGS_DUMMY_PKT)
2346 devm_kfree(tx_ring->dev, tx_buf->raw_buf);
2348 /* clear next_to_watch to prevent false hangs */
2349 tx_buf->raw_buf = NULL;
2350 tx_buf->tx_flags = 0;
2351 tx_buf->next_to_watch = NULL;
2352 dma_unmap_len_set(tx_buf, len, 0);
2353 tx_desc->buf_addr = 0;
2354 tx_desc->cmd_type_offset_bsz = 0;
2356 /* move past eop_desc for start of next FD desc */
2361 i -= tx_ring->count;
2362 tx_buf = tx_ring->tx_buf;
2363 tx_desc = ICE_TX_DESC(tx_ring, 0);
2367 } while (likely(budget));
2369 i += tx_ring->count;
2370 tx_ring->next_to_clean = i;
2372 /* re-enable interrupt if needed */
2373 ice_irq_dynamic_ena(&vsi->back->hw, vsi, vsi->q_vectors[0]);