1 /*******************************************************************************
3 * Intel Ethernet Controller XL710 Family Linux Driver
4 * Copyright(c) 2013 - 2014 Intel Corporation.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * You should have received a copy of the GNU General Public License along
16 * with this program. If not, see <http://www.gnu.org/licenses/>.
18 * The full GNU General Public License is included in this distribution in
19 * the file called "COPYING".
21 * Contact Information:
22 * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
23 * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25 ******************************************************************************/
27 #include <linux/prefetch.h>
28 #include <net/busy_poll.h>
30 #include "i40e_prototype.h"
32 static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
35 return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA |
36 ((u64)td_cmd << I40E_TXD_QW1_CMD_SHIFT) |
37 ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) |
38 ((u64)size << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) |
39 ((u64)td_tag << I40E_TXD_QW1_L2TAG1_SHIFT));
42 #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
43 #define I40E_FD_CLEAN_DELAY 10
45 * i40e_program_fdir_filter - Program a Flow Director filter
46 * @fdir_data: Packet data that will be filter parameters
47 * @raw_packet: the pre-allocated packet buffer for FDir
49 * @add: True for add/update, False for remove
51 int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet,
52 struct i40e_pf *pf, bool add)
54 struct i40e_filter_program_desc *fdir_desc;
55 struct i40e_tx_buffer *tx_buf, *first;
56 struct i40e_tx_desc *tx_desc;
57 struct i40e_ring *tx_ring;
58 unsigned int fpt, dcc;
66 /* find existing FDIR VSI */
68 for (i = 0; i < pf->num_alloc_vsi; i++)
69 if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR)
74 tx_ring = vsi->tx_rings[0];
77 /* we need two descriptors to add/del a filter and we can wait */
79 if (I40E_DESC_UNUSED(tx_ring) > 1)
81 msleep_interruptible(1);
83 } while (delay < I40E_FD_CLEAN_DELAY);
85 if (!(I40E_DESC_UNUSED(tx_ring) > 1))
88 dma = dma_map_single(dev, raw_packet,
89 I40E_FDIR_MAX_RAW_PACKET_SIZE, DMA_TO_DEVICE);
90 if (dma_mapping_error(dev, dma))
93 /* grab the next descriptor */
94 i = tx_ring->next_to_use;
95 fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
96 first = &tx_ring->tx_bi[i];
97 memset(first, 0, sizeof(struct i40e_tx_buffer));
99 tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
101 fpt = (fdir_data->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
102 I40E_TXD_FLTR_QW0_QINDEX_MASK;
104 fpt |= (fdir_data->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT) &
105 I40E_TXD_FLTR_QW0_FLEXOFF_MASK;
107 fpt |= (fdir_data->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) &
108 I40E_TXD_FLTR_QW0_PCTYPE_MASK;
110 /* Use LAN VSI Id if not programmed by user */
111 if (fdir_data->dest_vsi == 0)
112 fpt |= (pf->vsi[pf->lan_vsi]->id) <<
113 I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
115 fpt |= ((u32)fdir_data->dest_vsi <<
116 I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT) &
117 I40E_TXD_FLTR_QW0_DEST_VSI_MASK;
119 dcc = I40E_TX_DESC_DTYPE_FILTER_PROG;
122 dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
123 I40E_TXD_FLTR_QW1_PCMD_SHIFT;
125 dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
126 I40E_TXD_FLTR_QW1_PCMD_SHIFT;
128 dcc |= (fdir_data->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT) &
129 I40E_TXD_FLTR_QW1_DEST_MASK;
131 dcc |= (fdir_data->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT) &
132 I40E_TXD_FLTR_QW1_FD_STATUS_MASK;
134 if (fdir_data->cnt_index != 0) {
135 dcc |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
136 dcc |= ((u32)fdir_data->cnt_index <<
137 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
138 I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
141 fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(fpt);
142 fdir_desc->rsvd = cpu_to_le32(0);
143 fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dcc);
144 fdir_desc->fd_id = cpu_to_le32(fdir_data->fd_id);
146 /* Now program a dummy descriptor */
147 i = tx_ring->next_to_use;
148 tx_desc = I40E_TX_DESC(tx_ring, i);
149 tx_buf = &tx_ring->tx_bi[i];
151 tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
153 memset(tx_buf, 0, sizeof(struct i40e_tx_buffer));
155 /* record length, and DMA address */
156 dma_unmap_len_set(tx_buf, len, I40E_FDIR_MAX_RAW_PACKET_SIZE);
157 dma_unmap_addr_set(tx_buf, dma, dma);
159 tx_desc->buffer_addr = cpu_to_le64(dma);
160 td_cmd = I40E_TXD_CMD | I40E_TX_DESC_CMD_DUMMY;
162 tx_buf->tx_flags = I40E_TX_FLAGS_FD_SB;
163 tx_buf->raw_buf = (void *)raw_packet;
165 tx_desc->cmd_type_offset_bsz =
166 build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_SIZE, 0);
168 /* Force memory writes to complete before letting h/w
169 * know there are new descriptors to fetch.
173 /* Mark the data descriptor to be watched */
174 first->next_to_watch = tx_desc;
176 writel(tx_ring->next_to_use, tx_ring->tail);
183 #define IP_HEADER_OFFSET 14
184 #define I40E_UDPIP_DUMMY_PACKET_LEN 42
186 * i40e_add_del_fdir_udpv4 - Add/Remove UDPv4 filters
187 * @vsi: pointer to the targeted VSI
188 * @fd_data: the flow director data required for the FDir descriptor
189 * @add: true adds a filter, false removes it
191 * Returns 0 if the filters were successfully added or removed
193 static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi,
194 struct i40e_fdir_filter *fd_data,
197 struct i40e_pf *pf = vsi->back;
203 static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
204 0x45, 0, 0, 0x1c, 0, 0, 0x40, 0, 0x40, 0x11, 0, 0, 0, 0, 0, 0,
205 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
207 raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
210 memcpy(raw_packet, packet, I40E_UDPIP_DUMMY_PACKET_LEN);
212 ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
213 udp = (struct udphdr *)(raw_packet + IP_HEADER_OFFSET
214 + sizeof(struct iphdr));
216 ip->daddr = fd_data->dst_ip[0];
217 udp->dest = fd_data->dst_port;
218 ip->saddr = fd_data->src_ip[0];
219 udp->source = fd_data->src_port;
221 fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
222 ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
224 dev_info(&pf->pdev->dev,
225 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
226 fd_data->pctype, fd_data->fd_id, ret);
228 } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
230 dev_info(&pf->pdev->dev,
231 "Filter OK for PCTYPE %d loc = %d\n",
232 fd_data->pctype, fd_data->fd_id);
234 dev_info(&pf->pdev->dev,
235 "Filter deleted for PCTYPE %d loc = %d\n",
236 fd_data->pctype, fd_data->fd_id);
238 return err ? -EOPNOTSUPP : 0;
241 #define I40E_TCPIP_DUMMY_PACKET_LEN 54
243 * i40e_add_del_fdir_tcpv4 - Add/Remove TCPv4 filters
244 * @vsi: pointer to the targeted VSI
245 * @fd_data: the flow director data required for the FDir descriptor
246 * @add: true adds a filter, false removes it
248 * Returns 0 if the filters were successfully added or removed
250 static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
251 struct i40e_fdir_filter *fd_data,
254 struct i40e_pf *pf = vsi->back;
261 static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
262 0x45, 0, 0, 0x28, 0, 0, 0x40, 0, 0x40, 0x6, 0, 0, 0, 0, 0, 0,
263 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x80, 0x11,
264 0x0, 0x72, 0, 0, 0, 0};
266 raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
269 memcpy(raw_packet, packet, I40E_TCPIP_DUMMY_PACKET_LEN);
271 ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
272 tcp = (struct tcphdr *)(raw_packet + IP_HEADER_OFFSET
273 + sizeof(struct iphdr));
275 ip->daddr = fd_data->dst_ip[0];
276 tcp->dest = fd_data->dst_port;
277 ip->saddr = fd_data->src_ip[0];
278 tcp->source = fd_data->src_port;
282 if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) {
283 if (I40E_DEBUG_FD & pf->hw.debug_mask)
284 dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
285 pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
288 pf->fd_tcp_rule = (pf->fd_tcp_rule > 0) ?
289 (pf->fd_tcp_rule - 1) : 0;
290 if (pf->fd_tcp_rule == 0) {
291 pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
292 if (I40E_DEBUG_FD & pf->hw.debug_mask)
293 dev_info(&pf->pdev->dev, "ATR re-enabled due to no sideband TCP/IPv4 rules\n");
297 fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
298 ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
301 dev_info(&pf->pdev->dev,
302 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
303 fd_data->pctype, fd_data->fd_id, ret);
305 } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
307 dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d loc = %d)\n",
308 fd_data->pctype, fd_data->fd_id);
310 dev_info(&pf->pdev->dev,
311 "Filter deleted for PCTYPE %d loc = %d\n",
312 fd_data->pctype, fd_data->fd_id);
315 return err ? -EOPNOTSUPP : 0;
319 * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for
320 * a specific flow spec
321 * @vsi: pointer to the targeted VSI
322 * @fd_data: the flow director data required for the FDir descriptor
323 * @add: true adds a filter, false removes it
325 * Always returns -EOPNOTSUPP
327 static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi,
328 struct i40e_fdir_filter *fd_data,
334 #define I40E_IP_DUMMY_PACKET_LEN 34
336 * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for
337 * a specific flow spec
338 * @vsi: pointer to the targeted VSI
339 * @fd_data: the flow director data required for the FDir descriptor
340 * @add: true adds a filter, false removes it
342 * Returns 0 if the filters were successfully added or removed
344 static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi,
345 struct i40e_fdir_filter *fd_data,
348 struct i40e_pf *pf = vsi->back;
354 static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
355 0x45, 0, 0, 0x14, 0, 0, 0x40, 0, 0x40, 0x10, 0, 0, 0, 0, 0, 0,
358 for (i = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
359 i <= I40E_FILTER_PCTYPE_FRAG_IPV4; i++) {
360 raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
363 memcpy(raw_packet, packet, I40E_IP_DUMMY_PACKET_LEN);
364 ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
366 ip->saddr = fd_data->src_ip[0];
367 ip->daddr = fd_data->dst_ip[0];
371 ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
374 dev_info(&pf->pdev->dev,
375 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
376 fd_data->pctype, fd_data->fd_id, ret);
378 } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
380 dev_info(&pf->pdev->dev,
381 "Filter OK for PCTYPE %d loc = %d\n",
382 fd_data->pctype, fd_data->fd_id);
384 dev_info(&pf->pdev->dev,
385 "Filter deleted for PCTYPE %d loc = %d\n",
386 fd_data->pctype, fd_data->fd_id);
390 return err ? -EOPNOTSUPP : 0;
394 * i40e_add_del_fdir - Build raw packets to add/del fdir filter
395 * @vsi: pointer to the targeted VSI
396 * @cmd: command to get or set RX flow classification rules
397 * @add: true adds a filter, false removes it
400 int i40e_add_del_fdir(struct i40e_vsi *vsi,
401 struct i40e_fdir_filter *input, bool add)
403 struct i40e_pf *pf = vsi->back;
406 switch (input->flow_type & ~FLOW_EXT) {
408 ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
411 ret = i40e_add_del_fdir_udpv4(vsi, input, add);
414 ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
417 ret = i40e_add_del_fdir_ipv4(vsi, input, add);
420 switch (input->ip4_proto) {
422 ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
425 ret = i40e_add_del_fdir_udpv4(vsi, input, add);
428 ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
431 ret = i40e_add_del_fdir_ipv4(vsi, input, add);
436 dev_info(&pf->pdev->dev, "Could not specify spec type %d\n",
441 /* The buffer allocated here is freed by the i40e_clean_tx_ring() */
446 * i40e_fd_handle_status - check the Programming Status for FD
447 * @rx_ring: the Rx ring for this descriptor
448 * @rx_desc: the Rx descriptor for programming Status, not a packet descriptor.
449 * @prog_id: the id originally used for programming
451 * This is used to verify if the FD programming or invalidation
452 * requested by SW to the HW is successful or not and take actions accordingly.
454 static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
455 union i40e_rx_desc *rx_desc, u8 prog_id)
457 struct i40e_pf *pf = rx_ring->vsi->back;
458 struct pci_dev *pdev = pf->pdev;
459 u32 fcnt_prog, fcnt_avail;
463 qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
464 error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
465 I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
467 if (error == BIT(I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) {
468 if ((rx_desc->wb.qword0.hi_dword.fd_id != 0) ||
469 (I40E_DEBUG_FD & pf->hw.debug_mask))
470 dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n",
471 rx_desc->wb.qword0.hi_dword.fd_id);
473 /* Check if the programming error is for ATR.
474 * If so, auto disable ATR and set a state for
475 * flush in progress. Next time we come here if flush is in
476 * progress do nothing, once flush is complete the state will
479 if (test_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state))
483 /* store the current atr filter count */
484 pf->fd_atr_cnt = i40e_get_current_atr_cnt(pf);
486 if ((rx_desc->wb.qword0.hi_dword.fd_id == 0) &&
487 (pf->auto_disable_flags & I40E_FLAG_FD_SB_ENABLED)) {
488 pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED;
489 set_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state);
492 /* filter programming failed most likely due to table full */
493 fcnt_prog = i40e_get_global_fd_count(pf);
494 fcnt_avail = pf->fdir_pf_filter_count;
495 /* If ATR is running fcnt_prog can quickly change,
496 * if we are very close to full, it makes sense to disable
497 * FD ATR/SB and then re-enable it when there is room.
499 if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) {
500 if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
501 !(pf->auto_disable_flags &
502 I40E_FLAG_FD_SB_ENABLED)) {
503 if (I40E_DEBUG_FD & pf->hw.debug_mask)
504 dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n");
505 pf->auto_disable_flags |=
506 I40E_FLAG_FD_SB_ENABLED;
510 "FD filter programming failed due to incorrect filter parameters\n");
512 } else if (error == BIT(I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) {
513 if (I40E_DEBUG_FD & pf->hw.debug_mask)
514 dev_info(&pdev->dev, "ntuple filter fd_id = %d, could not be removed\n",
515 rx_desc->wb.qword0.hi_dword.fd_id);
520 * i40e_unmap_and_free_tx_resource - Release a Tx buffer
521 * @ring: the ring that owns the buffer
522 * @tx_buffer: the buffer to free
524 static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
525 struct i40e_tx_buffer *tx_buffer)
527 if (tx_buffer->skb) {
528 if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
529 kfree(tx_buffer->raw_buf);
531 dev_kfree_skb_any(tx_buffer->skb);
533 if (dma_unmap_len(tx_buffer, len))
534 dma_unmap_single(ring->dev,
535 dma_unmap_addr(tx_buffer, dma),
536 dma_unmap_len(tx_buffer, len),
538 } else if (dma_unmap_len(tx_buffer, len)) {
539 dma_unmap_page(ring->dev,
540 dma_unmap_addr(tx_buffer, dma),
541 dma_unmap_len(tx_buffer, len),
544 tx_buffer->next_to_watch = NULL;
545 tx_buffer->skb = NULL;
546 dma_unmap_len_set(tx_buffer, len, 0);
547 /* tx_buffer must be completely set up in the transmit path */
551 * i40e_clean_tx_ring - Free any empty Tx buffers
552 * @tx_ring: ring to be cleaned
554 void i40e_clean_tx_ring(struct i40e_ring *tx_ring)
556 unsigned long bi_size;
559 /* ring already cleared, nothing to do */
563 /* Free all the Tx ring sk_buffs */
564 for (i = 0; i < tx_ring->count; i++)
565 i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]);
567 bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
568 memset(tx_ring->tx_bi, 0, bi_size);
570 /* Zero out the descriptor ring */
571 memset(tx_ring->desc, 0, tx_ring->size);
573 tx_ring->next_to_use = 0;
574 tx_ring->next_to_clean = 0;
576 if (!tx_ring->netdev)
579 /* cleanup Tx queue statistics */
580 netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
581 tx_ring->queue_index));
585 * i40e_free_tx_resources - Free Tx resources per queue
586 * @tx_ring: Tx descriptor ring for a specific queue
588 * Free all transmit software resources
590 void i40e_free_tx_resources(struct i40e_ring *tx_ring)
592 i40e_clean_tx_ring(tx_ring);
593 kfree(tx_ring->tx_bi);
594 tx_ring->tx_bi = NULL;
597 dma_free_coherent(tx_ring->dev, tx_ring->size,
598 tx_ring->desc, tx_ring->dma);
599 tx_ring->desc = NULL;
604 * i40e_get_head - Retrieve head from head writeback
605 * @tx_ring: tx ring to fetch head of
607 * Returns value of Tx ring head based on value stored
608 * in head write-back location
610 static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
612 void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count;
614 return le32_to_cpu(*(volatile __le32 *)head);
618 * i40e_get_tx_pending - how many tx descriptors not processed
619 * @tx_ring: the ring of descriptors
621 * Since there is no access to the ring head register
622 * in XL710, we need to use our local copies
624 static u32 i40e_get_tx_pending(struct i40e_ring *ring)
628 head = i40e_get_head(ring);
629 tail = readl(ring->tail);
632 return (head < tail) ?
633 tail - head : (tail + ring->count - head);
639 * i40e_check_tx_hang - Is there a hang in the Tx queue
640 * @tx_ring: the ring of descriptors
642 static bool i40e_check_tx_hang(struct i40e_ring *tx_ring)
644 u32 tx_done = tx_ring->stats.packets;
645 u32 tx_done_old = tx_ring->tx_stats.tx_done_old;
646 u32 tx_pending = i40e_get_tx_pending(tx_ring);
647 struct i40e_pf *pf = tx_ring->vsi->back;
650 clear_check_for_tx_hang(tx_ring);
652 /* Check for a hung queue, but be thorough. This verifies
653 * that a transmit has been completed since the previous
654 * check AND there is at least one packet pending. The
655 * ARMED bit is set to indicate a potential hang. The
656 * bit is cleared if a pause frame is received to remove
657 * false hang detection due to PFC or 802.3x frames. By
658 * requiring this to fail twice we avoid races with
659 * PFC clearing the ARMED bit and conditions where we
660 * run the check_tx_hang logic with a transmit completion
661 * pending but without time to complete it yet.
663 if ((tx_done_old == tx_done) && tx_pending) {
664 /* make sure it is true for two checks in a row */
665 ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED,
667 } else if (tx_done_old == tx_done &&
668 (tx_pending < I40E_MIN_DESC_PENDING) && (tx_pending > 0)) {
669 if (I40E_DEBUG_FLOW & pf->hw.debug_mask)
670 dev_info(tx_ring->dev, "HW needs some more descs to do a cacheline flush. tx_pending %d, queue %d",
671 tx_pending, tx_ring->queue_index);
672 pf->tx_sluggish_count++;
674 /* update completed stats and disarm the hang check */
675 tx_ring->tx_stats.tx_done_old = tx_done;
676 clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state);
682 #define WB_STRIDE 0x3
685 * i40e_clean_tx_irq - Reclaim resources after transmit completes
686 * @tx_ring: tx ring to clean
687 * @budget: how many cleans we're allowed
689 * Returns true if there's any budget left (e.g. the clean is finished)
691 static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
693 u16 i = tx_ring->next_to_clean;
694 struct i40e_tx_buffer *tx_buf;
695 struct i40e_tx_desc *tx_head;
696 struct i40e_tx_desc *tx_desc;
697 unsigned int total_packets = 0;
698 unsigned int total_bytes = 0;
700 tx_buf = &tx_ring->tx_bi[i];
701 tx_desc = I40E_TX_DESC(tx_ring, i);
704 tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring));
707 struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
709 /* if next_to_watch is not set then there is no work pending */
713 /* prevent any other reads prior to eop_desc */
714 read_barrier_depends();
716 /* we have caught up to head, no work left to do */
717 if (tx_head == tx_desc)
720 /* clear next_to_watch to prevent false hangs */
721 tx_buf->next_to_watch = NULL;
723 /* update the statistics for this packet */
724 total_bytes += tx_buf->bytecount;
725 total_packets += tx_buf->gso_segs;
728 dev_consume_skb_any(tx_buf->skb);
730 /* unmap skb header data */
731 dma_unmap_single(tx_ring->dev,
732 dma_unmap_addr(tx_buf, dma),
733 dma_unmap_len(tx_buf, len),
736 /* clear tx_buffer data */
738 dma_unmap_len_set(tx_buf, len, 0);
740 /* unmap remaining buffers */
741 while (tx_desc != eop_desc) {
748 tx_buf = tx_ring->tx_bi;
749 tx_desc = I40E_TX_DESC(tx_ring, 0);
752 /* unmap any remaining paged data */
753 if (dma_unmap_len(tx_buf, len)) {
754 dma_unmap_page(tx_ring->dev,
755 dma_unmap_addr(tx_buf, dma),
756 dma_unmap_len(tx_buf, len),
758 dma_unmap_len_set(tx_buf, len, 0);
762 /* move us one more past the eop_desc for start of next pkt */
768 tx_buf = tx_ring->tx_bi;
769 tx_desc = I40E_TX_DESC(tx_ring, 0);
774 /* update budget accounting */
776 } while (likely(budget));
779 tx_ring->next_to_clean = i;
780 u64_stats_update_begin(&tx_ring->syncp);
781 tx_ring->stats.bytes += total_bytes;
782 tx_ring->stats.packets += total_packets;
783 u64_stats_update_end(&tx_ring->syncp);
784 tx_ring->q_vector->tx.total_bytes += total_bytes;
785 tx_ring->q_vector->tx.total_packets += total_packets;
787 /* check to see if there are any non-cache aligned descriptors
788 * waiting to be written back, and kick the hardware to force
789 * them to be written back in case of napi polling
792 !((i & WB_STRIDE) == WB_STRIDE) &&
793 !test_bit(__I40E_DOWN, &tx_ring->vsi->state) &&
794 (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
795 tx_ring->arm_wb = true;
797 tx_ring->arm_wb = false;
799 if (check_for_tx_hang(tx_ring) && i40e_check_tx_hang(tx_ring)) {
800 /* schedule immediate reset if we believe we hung */
801 dev_info(tx_ring->dev, "Detected Tx Unit Hang\n"
804 " next_to_use <%x>\n"
805 " next_to_clean <%x>\n",
807 tx_ring->queue_index,
808 tx_ring->next_to_use, i);
810 netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
812 dev_info(tx_ring->dev,
813 "tx hang detected on queue %d, reset requested\n",
814 tx_ring->queue_index);
816 /* do not fire the reset immediately, wait for the stack to
817 * decide we are truly stuck, also prevents every queue from
818 * simultaneously requesting a reset
821 /* the adapter is about to reset, no point in enabling polling */
825 netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev,
826 tx_ring->queue_index),
827 total_packets, total_bytes);
829 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
830 if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
831 (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
832 /* Make sure that anybody stopping the queue after this
833 * sees the new next_to_clean.
836 if (__netif_subqueue_stopped(tx_ring->netdev,
837 tx_ring->queue_index) &&
838 !test_bit(__I40E_DOWN, &tx_ring->vsi->state)) {
839 netif_wake_subqueue(tx_ring->netdev,
840 tx_ring->queue_index);
841 ++tx_ring->tx_stats.restart_queue;
849 * i40e_force_wb - Arm hardware to do a wb on noncache aligned descriptors
850 * @vsi: the VSI we care about
851 * @q_vector: the vector on which to force writeback
854 static void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
856 u16 flags = q_vector->tx.ring[0].flags;
858 if (flags & I40E_TXR_FLAGS_WB_ON_ITR) {
861 if (q_vector->arm_wb_state)
864 val = I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK;
867 I40E_PFINT_DYN_CTLN(q_vector->v_idx +
868 vsi->base_vector - 1),
870 q_vector->arm_wb_state = true;
871 } else if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
872 u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
873 I40E_PFINT_DYN_CTLN_ITR_INDX_MASK | /* set noitr */
874 I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
875 I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK;
876 /* allow 00 to be written to the index */
879 I40E_PFINT_DYN_CTLN(q_vector->v_idx +
880 vsi->base_vector - 1), val);
882 u32 val = I40E_PFINT_DYN_CTL0_INTENA_MASK |
883 I40E_PFINT_DYN_CTL0_ITR_INDX_MASK | /* set noitr */
884 I40E_PFINT_DYN_CTL0_SWINT_TRIG_MASK |
885 I40E_PFINT_DYN_CTL0_SW_ITR_INDX_ENA_MASK;
886 /* allow 00 to be written to the index */
888 wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val);
893 * i40e_set_new_dynamic_itr - Find new ITR level
894 * @rc: structure containing ring performance data
896 * Stores a new ITR value based on packets and byte counts during
897 * the last interrupt. The advantage of per interrupt computation
898 * is faster updates and more accurate ITR for the current traffic
899 * pattern. Constants in this function were computed based on
900 * theoretical maximum wire speed and thresholds were set based on
901 * testing data as well as attempting to minimize response time
902 * while increasing bulk throughput.
904 static void i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
906 enum i40e_latency_range new_latency_range = rc->latency_range;
907 u32 new_itr = rc->itr;
910 if (rc->total_packets == 0 || !rc->itr)
913 /* simple throttlerate management
914 * 0-10MB/s lowest (100000 ints/s)
915 * 10-20MB/s low (20000 ints/s)
916 * 20-1249MB/s bulk (8000 ints/s)
918 bytes_per_int = rc->total_bytes / rc->itr;
919 switch (new_latency_range) {
920 case I40E_LOWEST_LATENCY:
921 if (bytes_per_int > 10)
922 new_latency_range = I40E_LOW_LATENCY;
924 case I40E_LOW_LATENCY:
925 if (bytes_per_int > 20)
926 new_latency_range = I40E_BULK_LATENCY;
927 else if (bytes_per_int <= 10)
928 new_latency_range = I40E_LOWEST_LATENCY;
930 case I40E_BULK_LATENCY:
931 if (bytes_per_int <= 20)
932 new_latency_range = I40E_LOW_LATENCY;
935 if (bytes_per_int <= 20)
936 new_latency_range = I40E_LOW_LATENCY;
939 rc->latency_range = new_latency_range;
941 switch (new_latency_range) {
942 case I40E_LOWEST_LATENCY:
943 new_itr = I40E_ITR_100K;
945 case I40E_LOW_LATENCY:
946 new_itr = I40E_ITR_20K;
948 case I40E_BULK_LATENCY:
949 new_itr = I40E_ITR_8K;
955 if (new_itr != rc->itr)
959 rc->total_packets = 0;
963 * i40e_clean_programming_status - clean the programming status descriptor
964 * @rx_ring: the rx ring that has this descriptor
965 * @rx_desc: the rx descriptor written back by HW
967 * Flow director should handle FD_FILTER_STATUS to check its filter programming
968 * status being successful or not and take actions accordingly. FCoE should
969 * handle its context/filter programming/invalidation status and take actions.
972 static void i40e_clean_programming_status(struct i40e_ring *rx_ring,
973 union i40e_rx_desc *rx_desc)
978 qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
979 id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
980 I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
982 if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS)
983 i40e_fd_handle_status(rx_ring, rx_desc, id);
985 else if ((id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS) ||
986 (id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS))
987 i40e_fcoe_handle_status(rx_ring, rx_desc, id);
992 * i40e_setup_tx_descriptors - Allocate the Tx descriptors
993 * @tx_ring: the tx ring to set up
995 * Return 0 on success, negative on error
997 int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
999 struct device *dev = tx_ring->dev;
1005 bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
1006 tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL);
1007 if (!tx_ring->tx_bi)
1010 /* round up to nearest 4K */
1011 tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
1012 /* add u32 for head writeback, align after this takes care of
1013 * guaranteeing this is at least one cache line in size
1015 tx_ring->size += sizeof(u32);
1016 tx_ring->size = ALIGN(tx_ring->size, 4096);
1017 tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
1018 &tx_ring->dma, GFP_KERNEL);
1019 if (!tx_ring->desc) {
1020 dev_info(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
1025 tx_ring->next_to_use = 0;
1026 tx_ring->next_to_clean = 0;
1030 kfree(tx_ring->tx_bi);
1031 tx_ring->tx_bi = NULL;
1036 * i40e_clean_rx_ring - Free Rx buffers
1037 * @rx_ring: ring to be cleaned
1039 void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
1041 struct device *dev = rx_ring->dev;
1042 struct i40e_rx_buffer *rx_bi;
1043 unsigned long bi_size;
1046 /* ring already cleared, nothing to do */
1047 if (!rx_ring->rx_bi)
1050 if (ring_is_ps_enabled(rx_ring)) {
1051 int bufsz = ALIGN(rx_ring->rx_hdr_len, 256) * rx_ring->count;
1053 rx_bi = &rx_ring->rx_bi[0];
1054 if (rx_bi->hdr_buf) {
1055 dma_free_coherent(dev,
1059 for (i = 0; i < rx_ring->count; i++) {
1060 rx_bi = &rx_ring->rx_bi[i];
1062 rx_bi->hdr_buf = NULL;
1066 /* Free all the Rx ring sk_buffs */
1067 for (i = 0; i < rx_ring->count; i++) {
1068 rx_bi = &rx_ring->rx_bi[i];
1070 dma_unmap_single(dev,
1072 rx_ring->rx_buf_len,
1077 dev_kfree_skb(rx_bi->skb);
1081 if (rx_bi->page_dma) {
1086 rx_bi->page_dma = 0;
1088 __free_page(rx_bi->page);
1090 rx_bi->page_offset = 0;
1094 bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1095 memset(rx_ring->rx_bi, 0, bi_size);
1097 /* Zero out the descriptor ring */
1098 memset(rx_ring->desc, 0, rx_ring->size);
1100 rx_ring->next_to_clean = 0;
1101 rx_ring->next_to_use = 0;
1105 * i40e_free_rx_resources - Free Rx resources
1106 * @rx_ring: ring to clean the resources from
1108 * Free all receive software resources
1110 void i40e_free_rx_resources(struct i40e_ring *rx_ring)
1112 i40e_clean_rx_ring(rx_ring);
1113 kfree(rx_ring->rx_bi);
1114 rx_ring->rx_bi = NULL;
1116 if (rx_ring->desc) {
1117 dma_free_coherent(rx_ring->dev, rx_ring->size,
1118 rx_ring->desc, rx_ring->dma);
1119 rx_ring->desc = NULL;
1124 * i40e_alloc_rx_headers - allocate rx header buffers
1125 * @rx_ring: ring to alloc buffers
1127 * Allocate rx header buffers for the entire ring. As these are static,
1128 * this is only called when setting up a new ring.
1130 void i40e_alloc_rx_headers(struct i40e_ring *rx_ring)
1132 struct device *dev = rx_ring->dev;
1133 struct i40e_rx_buffer *rx_bi;
1139 if (rx_ring->rx_bi[0].hdr_buf)
1141 /* Make sure the buffers don't cross cache line boundaries. */
1142 buf_size = ALIGN(rx_ring->rx_hdr_len, 256);
1143 buffer = dma_alloc_coherent(dev, buf_size * rx_ring->count,
1147 for (i = 0; i < rx_ring->count; i++) {
1148 rx_bi = &rx_ring->rx_bi[i];
1149 rx_bi->dma = dma + (i * buf_size);
1150 rx_bi->hdr_buf = buffer + (i * buf_size);
1155 * i40e_setup_rx_descriptors - Allocate Rx descriptors
1156 * @rx_ring: Rx descriptor ring (for a specific queue) to setup
1158 * Returns 0 on success, negative on failure
1160 int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
1162 struct device *dev = rx_ring->dev;
1165 bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1166 rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
1167 if (!rx_ring->rx_bi)
1170 u64_stats_init(&rx_ring->syncp);
1172 /* Round up to nearest 4K */
1173 rx_ring->size = ring_is_16byte_desc_enabled(rx_ring)
1174 ? rx_ring->count * sizeof(union i40e_16byte_rx_desc)
1175 : rx_ring->count * sizeof(union i40e_32byte_rx_desc);
1176 rx_ring->size = ALIGN(rx_ring->size, 4096);
1177 rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
1178 &rx_ring->dma, GFP_KERNEL);
1180 if (!rx_ring->desc) {
1181 dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
1186 rx_ring->next_to_clean = 0;
1187 rx_ring->next_to_use = 0;
1191 kfree(rx_ring->rx_bi);
1192 rx_ring->rx_bi = NULL;
1197 * i40e_release_rx_desc - Store the new tail and head values
1198 * @rx_ring: ring to bump
1199 * @val: new head index
1201 static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
1203 rx_ring->next_to_use = val;
1204 /* Force memory writes to complete before letting h/w
1205 * know there are new descriptors to fetch. (Only
1206 * applicable for weak-ordered memory model archs,
1210 writel(val, rx_ring->tail);
1214 * i40e_alloc_rx_buffers_ps - Replace used receive buffers; packet split
1215 * @rx_ring: ring to place buffers on
1216 * @cleaned_count: number of buffers to replace
1218 void i40e_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count)
1220 u16 i = rx_ring->next_to_use;
1221 union i40e_rx_desc *rx_desc;
1222 struct i40e_rx_buffer *bi;
1224 /* do nothing if no valid netdev defined */
1225 if (!rx_ring->netdev || !cleaned_count)
1228 while (cleaned_count--) {
1229 rx_desc = I40E_RX_DESC(rx_ring, i);
1230 bi = &rx_ring->rx_bi[i];
1232 if (bi->skb) /* desc is in use */
1235 bi->page = alloc_page(GFP_ATOMIC);
1237 rx_ring->rx_stats.alloc_page_failed++;
1242 if (!bi->page_dma) {
1243 /* use a half page if we're re-using */
1244 bi->page_offset ^= PAGE_SIZE / 2;
1245 bi->page_dma = dma_map_page(rx_ring->dev,
1250 if (dma_mapping_error(rx_ring->dev,
1252 rx_ring->rx_stats.alloc_page_failed++;
1258 dma_sync_single_range_for_device(rx_ring->dev,
1261 rx_ring->rx_hdr_len,
1263 /* Refresh the desc even if buffer_addrs didn't change
1264 * because each write-back erases this info.
1266 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
1267 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
1269 if (i == rx_ring->count)
1274 if (rx_ring->next_to_use != i)
1275 i40e_release_rx_desc(rx_ring, i);
1279 * i40e_alloc_rx_buffers_1buf - Replace used receive buffers; single buffer
1280 * @rx_ring: ring to place buffers on
1281 * @cleaned_count: number of buffers to replace
1283 void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count)
1285 u16 i = rx_ring->next_to_use;
1286 union i40e_rx_desc *rx_desc;
1287 struct i40e_rx_buffer *bi;
1288 struct sk_buff *skb;
1290 /* do nothing if no valid netdev defined */
1291 if (!rx_ring->netdev || !cleaned_count)
1294 while (cleaned_count--) {
1295 rx_desc = I40E_RX_DESC(rx_ring, i);
1296 bi = &rx_ring->rx_bi[i];
1300 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
1301 rx_ring->rx_buf_len);
1303 rx_ring->rx_stats.alloc_buff_failed++;
1306 /* initialize queue mapping */
1307 skb_record_rx_queue(skb, rx_ring->queue_index);
1312 bi->dma = dma_map_single(rx_ring->dev,
1314 rx_ring->rx_buf_len,
1316 if (dma_mapping_error(rx_ring->dev, bi->dma)) {
1317 rx_ring->rx_stats.alloc_buff_failed++;
1323 rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
1324 rx_desc->read.hdr_addr = 0;
1326 if (i == rx_ring->count)
1331 if (rx_ring->next_to_use != i)
1332 i40e_release_rx_desc(rx_ring, i);
1336 * i40e_receive_skb - Send a completed packet up the stack
1337 * @rx_ring: rx ring in play
1338 * @skb: packet to send up
1339 * @vlan_tag: vlan tag for packet
1341 static void i40e_receive_skb(struct i40e_ring *rx_ring,
1342 struct sk_buff *skb, u16 vlan_tag)
1344 struct i40e_q_vector *q_vector = rx_ring->q_vector;
1345 struct i40e_vsi *vsi = rx_ring->vsi;
1346 u64 flags = vsi->back->flags;
1348 if (vlan_tag & VLAN_VID_MASK)
1349 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
1351 if (flags & I40E_FLAG_IN_NETPOLL)
1354 napi_gro_receive(&q_vector->napi, skb);
1358 * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum
1359 * @vsi: the VSI we care about
1360 * @skb: skb currently being received and modified
1361 * @rx_status: status value of last descriptor in packet
1362 * @rx_error: error value of last descriptor in packet
1363 * @rx_ptype: ptype value of last descriptor in packet
1365 static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
1366 struct sk_buff *skb,
1371 struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(rx_ptype);
1372 bool ipv4 = false, ipv6 = false;
1373 bool ipv4_tunnel, ipv6_tunnel;
1378 ipv4_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT4_MAC_PAY3) &&
1379 (rx_ptype <= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4);
1380 ipv6_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT6_MAC_PAY3) &&
1381 (rx_ptype <= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4);
1383 skb->ip_summed = CHECKSUM_NONE;
1385 /* Rx csum enabled and ip headers found? */
1386 if (!(vsi->netdev->features & NETIF_F_RXCSUM))
1389 /* did the hardware decode the packet and checksum? */
1390 if (!(rx_status & BIT(I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
1393 /* both known and outer_ip must be set for the below code to work */
1394 if (!(decoded.known && decoded.outer_ip))
1397 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1398 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4)
1400 else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1401 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1405 (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) |
1406 BIT(I40E_RX_DESC_ERROR_EIPE_SHIFT))))
1409 /* likely incorrect csum if alternate IP extension headers found */
1411 rx_status & BIT(I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT))
1412 /* don't increment checksum err here, non-fatal err */
1415 /* there was some L4 error, count error and punt packet to the stack */
1416 if (rx_error & BIT(I40E_RX_DESC_ERROR_L4E_SHIFT))
1419 /* handle packets that were not able to be checksummed due
1420 * to arrival speed, in this case the stack can compute
1423 if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT))
1426 /* If VXLAN traffic has an outer UDPv4 checksum we need to check
1427 * it in the driver, hardware does not do it for us.
1428 * Since L3L4P bit was set we assume a valid IHL value (>=5)
1429 * so the total length of IPv4 header is IHL*4 bytes
1430 * The UDP_0 bit *may* bet set if the *inner* header is UDP
1433 skb->transport_header = skb->mac_header +
1434 sizeof(struct ethhdr) +
1435 (ip_hdr(skb)->ihl * 4);
1437 /* Add 4 bytes for VLAN tagged packets */
1438 skb->transport_header += (skb->protocol == htons(ETH_P_8021Q) ||
1439 skb->protocol == htons(ETH_P_8021AD))
1442 if ((ip_hdr(skb)->protocol == IPPROTO_UDP) &&
1443 (udp_hdr(skb)->check != 0)) {
1444 rx_udp_csum = udp_csum(skb);
1446 csum = csum_tcpudp_magic(
1447 iph->saddr, iph->daddr,
1448 (skb->len - skb_transport_offset(skb)),
1449 IPPROTO_UDP, rx_udp_csum);
1451 if (udp_hdr(skb)->check != csum)
1454 } /* else its GRE and so no outer UDP header */
1457 skb->ip_summed = CHECKSUM_UNNECESSARY;
1458 skb->csum_level = ipv4_tunnel || ipv6_tunnel;
1463 vsi->back->hw_csum_rx_error++;
1467 * i40e_rx_hash - returns the hash value from the Rx descriptor
1468 * @ring: descriptor ring
1469 * @rx_desc: specific descriptor
1471 static inline u32 i40e_rx_hash(struct i40e_ring *ring,
1472 union i40e_rx_desc *rx_desc)
1474 const __le64 rss_mask =
1475 cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH <<
1476 I40E_RX_DESC_STATUS_FLTSTAT_SHIFT);
1478 if ((ring->netdev->features & NETIF_F_RXHASH) &&
1479 (rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask)
1480 return le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss);
1486 * i40e_ptype_to_hash - get a hash type
1487 * @ptype: the ptype value from the descriptor
1489 * Returns a hash type to be used by skb_set_hash
1491 static inline enum pkt_hash_types i40e_ptype_to_hash(u8 ptype)
1493 struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype);
1496 return PKT_HASH_TYPE_NONE;
1498 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1499 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4)
1500 return PKT_HASH_TYPE_L4;
1501 else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1502 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3)
1503 return PKT_HASH_TYPE_L3;
1505 return PKT_HASH_TYPE_L2;
1509 * i40e_clean_rx_irq_ps - Reclaim resources after receive; packet split
1510 * @rx_ring: rx ring to clean
1511 * @budget: how many cleans we're allowed
1513 * Returns true if there's any budget left (e.g. the clean is finished)
1515 static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
1517 unsigned int total_rx_bytes = 0, total_rx_packets = 0;
1518 u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo;
1519 u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
1520 const int current_node = numa_node_id();
1521 struct i40e_vsi *vsi = rx_ring->vsi;
1522 u16 i = rx_ring->next_to_clean;
1523 union i40e_rx_desc *rx_desc;
1524 u32 rx_error, rx_status;
1532 struct i40e_rx_buffer *rx_bi;
1533 struct sk_buff *skb;
1535 /* return some buffers to hardware, one at a time is too slow */
1536 if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
1537 i40e_alloc_rx_buffers_ps(rx_ring, cleaned_count);
1541 i = rx_ring->next_to_clean;
1542 rx_desc = I40E_RX_DESC(rx_ring, i);
1543 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1544 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1545 I40E_RXD_QW1_STATUS_SHIFT;
1547 if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
1550 /* This memory barrier is needed to keep us from reading
1551 * any other fields out of the rx_desc until we know the
1555 if (i40e_rx_is_programming_status(qword)) {
1556 i40e_clean_programming_status(rx_ring, rx_desc);
1557 I40E_RX_INCREMENT(rx_ring, i);
1560 rx_bi = &rx_ring->rx_bi[i];
1563 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
1564 rx_ring->rx_hdr_len);
1566 rx_ring->rx_stats.alloc_buff_failed++;
1570 /* initialize queue mapping */
1571 skb_record_rx_queue(skb, rx_ring->queue_index);
1572 /* we are reusing so sync this buffer for CPU use */
1573 dma_sync_single_range_for_cpu(rx_ring->dev,
1576 rx_ring->rx_hdr_len,
1579 rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
1580 I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1581 rx_header_len = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) >>
1582 I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1583 rx_sph = (qword & I40E_RXD_QW1_LENGTH_SPH_MASK) >>
1584 I40E_RXD_QW1_LENGTH_SPH_SHIFT;
1586 rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
1587 I40E_RXD_QW1_ERROR_SHIFT;
1588 rx_hbo = rx_error & BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
1589 rx_error &= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
1591 rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
1592 I40E_RXD_QW1_PTYPE_SHIFT;
1593 prefetch(rx_bi->page);
1596 if (rx_hbo || rx_sph) {
1599 len = I40E_RX_HDR_SIZE;
1601 len = rx_header_len;
1602 memcpy(__skb_put(skb, len), rx_bi->hdr_buf, len);
1603 } else if (skb->len == 0) {
1606 len = (rx_packet_len > skb_headlen(skb) ?
1607 skb_headlen(skb) : rx_packet_len);
1608 memcpy(__skb_put(skb, len),
1609 rx_bi->page + rx_bi->page_offset,
1611 rx_bi->page_offset += len;
1612 rx_packet_len -= len;
1615 /* Get the rest of the data if this was a header split */
1616 if (rx_packet_len) {
1617 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
1622 skb->len += rx_packet_len;
1623 skb->data_len += rx_packet_len;
1624 skb->truesize += rx_packet_len;
1626 if ((page_count(rx_bi->page) == 1) &&
1627 (page_to_nid(rx_bi->page) == current_node))
1628 get_page(rx_bi->page);
1632 dma_unmap_page(rx_ring->dev,
1636 rx_bi->page_dma = 0;
1638 I40E_RX_INCREMENT(rx_ring, i);
1641 !(rx_status & BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
1642 struct i40e_rx_buffer *next_buffer;
1644 next_buffer = &rx_ring->rx_bi[i];
1645 next_buffer->skb = skb;
1646 rx_ring->rx_stats.non_eop_descs++;
1650 /* ERR_MASK will only have valid bits if EOP set */
1651 if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1652 dev_kfree_skb_any(skb);
1656 skb_set_hash(skb, i40e_rx_hash(rx_ring, rx_desc),
1657 i40e_ptype_to_hash(rx_ptype));
1658 if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) {
1659 i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status &
1660 I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
1661 I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT);
1662 rx_ring->last_rx_timestamp = jiffies;
1665 /* probably a little skewed due to removing CRC */
1666 total_rx_bytes += skb->len;
1669 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1671 i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
1673 vlan_tag = rx_status & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
1674 ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
1677 if (!i40e_fcoe_handle_offload(rx_ring, rx_desc, skb)) {
1678 dev_kfree_skb_any(skb);
1682 skb_mark_napi_id(skb, &rx_ring->q_vector->napi);
1683 i40e_receive_skb(rx_ring, skb, vlan_tag);
1685 rx_desc->wb.qword1.status_error_len = 0;
1687 } while (likely(total_rx_packets < budget));
1689 u64_stats_update_begin(&rx_ring->syncp);
1690 rx_ring->stats.packets += total_rx_packets;
1691 rx_ring->stats.bytes += total_rx_bytes;
1692 u64_stats_update_end(&rx_ring->syncp);
1693 rx_ring->q_vector->rx.total_packets += total_rx_packets;
1694 rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
1696 return total_rx_packets;
1700 * i40e_clean_rx_irq_1buf - Reclaim resources after receive; single buffer
1701 * @rx_ring: rx ring to clean
1702 * @budget: how many cleans we're allowed
1704 * Returns number of packets cleaned
1706 static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
1708 unsigned int total_rx_bytes = 0, total_rx_packets = 0;
1709 u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
1710 struct i40e_vsi *vsi = rx_ring->vsi;
1711 union i40e_rx_desc *rx_desc;
1712 u32 rx_error, rx_status;
1719 struct i40e_rx_buffer *rx_bi;
1720 struct sk_buff *skb;
1722 /* return some buffers to hardware, one at a time is too slow */
1723 if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
1724 i40e_alloc_rx_buffers_1buf(rx_ring, cleaned_count);
1728 i = rx_ring->next_to_clean;
1729 rx_desc = I40E_RX_DESC(rx_ring, i);
1730 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1731 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1732 I40E_RXD_QW1_STATUS_SHIFT;
1734 if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
1737 /* This memory barrier is needed to keep us from reading
1738 * any other fields out of the rx_desc until we know the
1743 if (i40e_rx_is_programming_status(qword)) {
1744 i40e_clean_programming_status(rx_ring, rx_desc);
1745 I40E_RX_INCREMENT(rx_ring, i);
1748 rx_bi = &rx_ring->rx_bi[i];
1750 prefetch(skb->data);
1752 rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
1753 I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1755 rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
1756 I40E_RXD_QW1_ERROR_SHIFT;
1757 rx_error &= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
1759 rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
1760 I40E_RXD_QW1_PTYPE_SHIFT;
1764 /* Get the header and possibly the whole packet
1765 * If this is an skb from previous receive dma will be 0
1767 skb_put(skb, rx_packet_len);
1768 dma_unmap_single(rx_ring->dev, rx_bi->dma, rx_ring->rx_buf_len,
1772 I40E_RX_INCREMENT(rx_ring, i);
1775 !(rx_status & BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
1776 rx_ring->rx_stats.non_eop_descs++;
1780 /* ERR_MASK will only have valid bits if EOP set */
1781 if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1782 dev_kfree_skb_any(skb);
1783 /* TODO: shouldn't we increment a counter indicating the
1789 skb_set_hash(skb, i40e_rx_hash(rx_ring, rx_desc),
1790 i40e_ptype_to_hash(rx_ptype));
1791 if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) {
1792 i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status &
1793 I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
1794 I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT);
1795 rx_ring->last_rx_timestamp = jiffies;
1798 /* probably a little skewed due to removing CRC */
1799 total_rx_bytes += skb->len;
1802 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1804 i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
1806 vlan_tag = rx_status & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
1807 ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
1810 if (!i40e_fcoe_handle_offload(rx_ring, rx_desc, skb)) {
1811 dev_kfree_skb_any(skb);
1815 i40e_receive_skb(rx_ring, skb, vlan_tag);
1817 rx_desc->wb.qword1.status_error_len = 0;
1818 } while (likely(total_rx_packets < budget));
1820 u64_stats_update_begin(&rx_ring->syncp);
1821 rx_ring->stats.packets += total_rx_packets;
1822 rx_ring->stats.bytes += total_rx_bytes;
1823 u64_stats_update_end(&rx_ring->syncp);
1824 rx_ring->q_vector->rx.total_packets += total_rx_packets;
1825 rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
1827 return total_rx_packets;
1831 * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
1832 * @vsi: the VSI we care about
1833 * @q_vector: q_vector for which itr is being updated and interrupt enabled
1836 static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
1837 struct i40e_q_vector *q_vector)
1839 struct i40e_hw *hw = &vsi->back->hw;
1844 vector = (q_vector->v_idx + vsi->base_vector);
1845 if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) {
1846 old_itr = q_vector->rx.itr;
1847 i40e_set_new_dynamic_itr(&q_vector->rx);
1848 if (old_itr != q_vector->rx.itr) {
1849 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
1850 I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
1852 I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
1853 (q_vector->rx.itr <<
1854 I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT);
1856 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
1857 I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
1859 I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT);
1861 if (!test_bit(__I40E_DOWN, &vsi->state))
1862 wr32(hw, I40E_PFINT_DYN_CTLN(vector - 1), val);
1864 i40e_irq_dynamic_enable(vsi,
1865 q_vector->v_idx + vsi->base_vector);
1867 if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) {
1868 old_itr = q_vector->tx.itr;
1869 i40e_set_new_dynamic_itr(&q_vector->tx);
1870 if (old_itr != q_vector->tx.itr) {
1871 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
1872 I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
1874 I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
1875 (q_vector->tx.itr <<
1876 I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT);
1878 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
1879 I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
1881 I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT);
1883 if (!test_bit(__I40E_DOWN, &vsi->state))
1884 wr32(hw, I40E_PFINT_DYN_CTLN(q_vector->v_idx +
1885 vsi->base_vector - 1), val);
1887 i40e_irq_dynamic_enable(vsi,
1888 q_vector->v_idx + vsi->base_vector);
1893 * i40e_napi_poll - NAPI polling Rx/Tx cleanup routine
1894 * @napi: napi struct with our devices info in it
1895 * @budget: amount of work driver is allowed to do this pass, in packets
1897 * This function will clean all queues associated with a q_vector.
1899 * Returns the amount of work done
1901 int i40e_napi_poll(struct napi_struct *napi, int budget)
1903 struct i40e_q_vector *q_vector =
1904 container_of(napi, struct i40e_q_vector, napi);
1905 struct i40e_vsi *vsi = q_vector->vsi;
1906 struct i40e_ring *ring;
1907 bool clean_complete = true;
1908 bool arm_wb = false;
1909 int budget_per_ring;
1912 if (test_bit(__I40E_DOWN, &vsi->state)) {
1913 napi_complete(napi);
1917 /* Since the actual Tx work is minimal, we can give the Tx a larger
1918 * budget and be more aggressive about cleaning up the Tx descriptors.
1920 i40e_for_each_ring(ring, q_vector->tx) {
1921 clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit);
1922 arm_wb |= ring->arm_wb;
1925 /* We attempt to distribute budget to each Rx queue fairly, but don't
1926 * allow the budget to go below 1 because that would exit polling early.
1928 budget_per_ring = max(budget/q_vector->num_ringpairs, 1);
1930 i40e_for_each_ring(ring, q_vector->rx) {
1931 if (ring_is_ps_enabled(ring))
1932 cleaned = i40e_clean_rx_irq_ps(ring, budget_per_ring);
1934 cleaned = i40e_clean_rx_irq_1buf(ring, budget_per_ring);
1935 /* if we didn't clean as many as budgeted, we must be done */
1936 clean_complete &= (budget_per_ring != cleaned);
1939 /* If work not completed, return budget and polling will return */
1940 if (!clean_complete) {
1942 i40e_force_wb(vsi, q_vector);
1946 if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR)
1947 q_vector->arm_wb_state = false;
1949 /* Work is done so exit the polling mode and re-enable the interrupt */
1950 napi_complete(napi);
1951 if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
1952 i40e_update_enable_itr(vsi, q_vector);
1953 } else { /* Legacy mode */
1954 struct i40e_hw *hw = &vsi->back->hw;
1955 /* We re-enable the queue 0 cause, but
1956 * don't worry about dynamic_enable
1957 * because we left it on for the other
1958 * possible interrupts during napi
1960 u32 qval = rd32(hw, I40E_QINT_RQCTL(0)) |
1961 I40E_QINT_RQCTL_CAUSE_ENA_MASK;
1963 wr32(hw, I40E_QINT_RQCTL(0), qval);
1964 qval = rd32(hw, I40E_QINT_TQCTL(0)) |
1965 I40E_QINT_TQCTL_CAUSE_ENA_MASK;
1966 wr32(hw, I40E_QINT_TQCTL(0), qval);
1967 i40e_irq_dynamic_enable_icr0(vsi->back);
1973 * i40e_atr - Add a Flow Director ATR filter
1974 * @tx_ring: ring to add programming descriptor to
1976 * @tx_flags: send tx flags
1977 * @protocol: wire protocol
1979 static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
1980 u32 tx_flags, __be16 protocol)
1982 struct i40e_filter_program_desc *fdir_desc;
1983 struct i40e_pf *pf = tx_ring->vsi->back;
1985 unsigned char *network;
1987 struct ipv6hdr *ipv6;
1991 u32 flex_ptype, dtype_cmd;
1994 /* make sure ATR is enabled */
1995 if (!(pf->flags & I40E_FLAG_FD_ATR_ENABLED))
1998 if ((pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
2001 /* if sampling is disabled do nothing */
2002 if (!tx_ring->atr_sample_rate)
2005 if (!(tx_flags & (I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6)))
2008 if (!(tx_flags & I40E_TX_FLAGS_VXLAN_TUNNEL)) {
2009 /* snag network header to get L4 type and address */
2010 hdr.network = skb_network_header(skb);
2012 /* Currently only IPv4/IPv6 with TCP is supported
2013 * access ihl as u8 to avoid unaligned access on ia64
2015 if (tx_flags & I40E_TX_FLAGS_IPV4)
2016 hlen = (hdr.network[0] & 0x0F) << 2;
2017 else if (protocol == htons(ETH_P_IPV6))
2018 hlen = sizeof(struct ipv6hdr);
2022 hdr.network = skb_inner_network_header(skb);
2023 hlen = skb_inner_network_header_len(skb);
2026 /* Currently only IPv4/IPv6 with TCP is supported
2027 * Note: tx_flags gets modified to reflect inner protocols in
2028 * tx_enable_csum function if encap is enabled.
2030 if ((tx_flags & I40E_TX_FLAGS_IPV4) &&
2031 (hdr.ipv4->protocol != IPPROTO_TCP))
2033 else if ((tx_flags & I40E_TX_FLAGS_IPV6) &&
2034 (hdr.ipv6->nexthdr != IPPROTO_TCP))
2037 th = (struct tcphdr *)(hdr.network + hlen);
2039 /* Due to lack of space, no more new filters can be programmed */
2040 if (th->syn && (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
2043 tx_ring->atr_count++;
2045 /* sample on all syn/fin/rst packets or once every atr sample rate */
2049 (tx_ring->atr_count < tx_ring->atr_sample_rate))
2052 tx_ring->atr_count = 0;
2054 /* grab the next descriptor */
2055 i = tx_ring->next_to_use;
2056 fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
2059 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
2061 flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
2062 I40E_TXD_FLTR_QW0_QINDEX_MASK;
2063 flex_ptype |= (protocol == htons(ETH_P_IP)) ?
2064 (I40E_FILTER_PCTYPE_NONF_IPV4_TCP <<
2065 I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) :
2066 (I40E_FILTER_PCTYPE_NONF_IPV6_TCP <<
2067 I40E_TXD_FLTR_QW0_PCTYPE_SHIFT);
2069 flex_ptype |= tx_ring->vsi->id << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
2071 dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG;
2073 dtype_cmd |= (th->fin || th->rst) ?
2074 (I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
2075 I40E_TXD_FLTR_QW1_PCMD_SHIFT) :
2076 (I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
2077 I40E_TXD_FLTR_QW1_PCMD_SHIFT);
2079 dtype_cmd |= I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX <<
2080 I40E_TXD_FLTR_QW1_DEST_SHIFT;
2082 dtype_cmd |= I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID <<
2083 I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT;
2085 dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
2086 if (!(tx_flags & I40E_TX_FLAGS_VXLAN_TUNNEL))
2088 ((u32)I40E_FD_ATR_STAT_IDX(pf->hw.pf_id) <<
2089 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
2090 I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
2093 ((u32)I40E_FD_ATR_TUNNEL_STAT_IDX(pf->hw.pf_id) <<
2094 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
2095 I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
2097 fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
2098 fdir_desc->rsvd = cpu_to_le32(0);
2099 fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd);
2100 fdir_desc->fd_id = cpu_to_le32(0);
2104 * i40e_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW
2106 * @tx_ring: ring to send buffer on
2107 * @flags: the tx flags to be set
2109 * Checks the skb and set up correspondingly several generic transmit flags
2110 * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
2112 * Returns error code indicate the frame should be dropped upon error and the
2113 * otherwise returns 0 to indicate the flags has been set properly.
2116 inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
2117 struct i40e_ring *tx_ring,
2120 static inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
2121 struct i40e_ring *tx_ring,
2125 __be16 protocol = skb->protocol;
2128 if (protocol == htons(ETH_P_8021Q) &&
2129 !(tx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) {
2130 /* When HW VLAN acceleration is turned off by the user the
2131 * stack sets the protocol to 8021q so that the driver
2132 * can take any steps required to support the SW only
2133 * VLAN handling. In our case the driver doesn't need
2134 * to take any further steps so just set the protocol
2135 * to the encapsulated ethertype.
2137 skb->protocol = vlan_get_protocol(skb);
2141 /* if we have a HW VLAN tag being added, default to the HW one */
2142 if (skb_vlan_tag_present(skb)) {
2143 tx_flags |= skb_vlan_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT;
2144 tx_flags |= I40E_TX_FLAGS_HW_VLAN;
2145 /* else if it is a SW VLAN, check the next protocol and store the tag */
2146 } else if (protocol == htons(ETH_P_8021Q)) {
2147 struct vlan_hdr *vhdr, _vhdr;
2148 vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr);
2152 protocol = vhdr->h_vlan_encapsulated_proto;
2153 tx_flags |= ntohs(vhdr->h_vlan_TCI) << I40E_TX_FLAGS_VLAN_SHIFT;
2154 tx_flags |= I40E_TX_FLAGS_SW_VLAN;
2157 if (!(tx_ring->vsi->back->flags & I40E_FLAG_DCB_ENABLED))
2160 /* Insert 802.1p priority into VLAN header */
2161 if ((tx_flags & (I40E_TX_FLAGS_HW_VLAN | I40E_TX_FLAGS_SW_VLAN)) ||
2162 (skb->priority != TC_PRIO_CONTROL)) {
2163 tx_flags &= ~I40E_TX_FLAGS_VLAN_PRIO_MASK;
2164 tx_flags |= (skb->priority & 0x7) <<
2165 I40E_TX_FLAGS_VLAN_PRIO_SHIFT;
2166 if (tx_flags & I40E_TX_FLAGS_SW_VLAN) {
2167 struct vlan_ethhdr *vhdr;
2170 rc = skb_cow_head(skb, 0);
2173 vhdr = (struct vlan_ethhdr *)skb->data;
2174 vhdr->h_vlan_TCI = htons(tx_flags >>
2175 I40E_TX_FLAGS_VLAN_SHIFT);
2177 tx_flags |= I40E_TX_FLAGS_HW_VLAN;
2187 * i40e_tso - set up the tso context descriptor
2188 * @tx_ring: ptr to the ring to send
2189 * @skb: ptr to the skb we're sending
2190 * @hdr_len: ptr to the size of the packet header
2191 * @cd_tunneling: ptr to context descriptor bits
2193 * Returns 0 if no TSO can happen, 1 if tso is going, or error
2195 static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb,
2196 u8 *hdr_len, u64 *cd_type_cmd_tso_mss,
2199 u32 cd_cmd, cd_tso_len, cd_mss;
2200 struct ipv6hdr *ipv6h;
2201 struct tcphdr *tcph;
2206 if (!skb_is_gso(skb))
2209 err = skb_cow_head(skb, 0);
2213 iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb);
2214 ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb);
2216 if (iph->version == 4) {
2217 tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
2220 tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
2222 } else if (ipv6h->version == 6) {
2223 tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
2224 ipv6h->payload_len = 0;
2225 tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
2229 l4len = skb->encapsulation ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb);
2230 *hdr_len = (skb->encapsulation
2231 ? (skb_inner_transport_header(skb) - skb->data)
2232 : skb_transport_offset(skb)) + l4len;
2234 /* find the field values */
2235 cd_cmd = I40E_TX_CTX_DESC_TSO;
2236 cd_tso_len = skb->len - *hdr_len;
2237 cd_mss = skb_shinfo(skb)->gso_size;
2238 *cd_type_cmd_tso_mss |= ((u64)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
2240 I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
2241 ((u64)cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
2246 * i40e_tsyn - set up the tsyn context descriptor
2247 * @tx_ring: ptr to the ring to send
2248 * @skb: ptr to the skb we're sending
2249 * @tx_flags: the collected send information
2251 * Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen
2253 static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb,
2254 u32 tx_flags, u64 *cd_type_cmd_tso_mss)
2258 if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
2261 /* Tx timestamps cannot be sampled when doing TSO */
2262 if (tx_flags & I40E_TX_FLAGS_TSO)
2265 /* only timestamp the outbound packet if the user has requested it and
2266 * we are not already transmitting a packet to be timestamped
2268 pf = i40e_netdev_to_pf(tx_ring->netdev);
2269 if (!(pf->flags & I40E_FLAG_PTP))
2273 !test_and_set_bit_lock(__I40E_PTP_TX_IN_PROGRESS, &pf->state)) {
2274 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
2275 pf->ptp_tx_skb = skb_get(skb);
2280 *cd_type_cmd_tso_mss |= (u64)I40E_TX_CTX_DESC_TSYN <<
2281 I40E_TXD_CTX_QW1_CMD_SHIFT;
2287 * i40e_tx_enable_csum - Enable Tx checksum offloads
2289 * @tx_flags: pointer to Tx flags currently set
2290 * @td_cmd: Tx descriptor command bits to set
2291 * @td_offset: Tx descriptor header offsets to set
2292 * @cd_tunneling: ptr to context desc bits
2294 static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
2295 u32 *td_cmd, u32 *td_offset,
2296 struct i40e_ring *tx_ring,
2299 struct ipv6hdr *this_ipv6_hdr;
2300 unsigned int this_tcp_hdrlen;
2301 struct iphdr *this_ip_hdr;
2302 u32 network_hdr_len;
2306 if (skb->encapsulation) {
2307 switch (ip_hdr(skb)->protocol) {
2309 l4_tunnel = I40E_TXD_CTX_UDP_TUNNELING;
2310 *tx_flags |= I40E_TX_FLAGS_VXLAN_TUNNEL;
2315 network_hdr_len = skb_inner_network_header_len(skb);
2316 this_ip_hdr = inner_ip_hdr(skb);
2317 this_ipv6_hdr = inner_ipv6_hdr(skb);
2318 this_tcp_hdrlen = inner_tcp_hdrlen(skb);
2320 if (*tx_flags & I40E_TX_FLAGS_IPV4) {
2321 if (*tx_flags & I40E_TX_FLAGS_TSO) {
2322 *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4;
2323 ip_hdr(skb)->check = 0;
2326 I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
2328 } else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
2329 *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;
2330 if (*tx_flags & I40E_TX_FLAGS_TSO)
2331 ip_hdr(skb)->check = 0;
2334 /* Now set the ctx descriptor fields */
2335 *cd_tunneling |= (skb_network_header_len(skb) >> 2) <<
2336 I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT |
2338 ((skb_inner_network_offset(skb) -
2339 skb_transport_offset(skb)) >> 1) <<
2340 I40E_TXD_CTX_QW0_NATLEN_SHIFT;
2341 if (this_ip_hdr->version == 6) {
2342 *tx_flags &= ~I40E_TX_FLAGS_IPV4;
2343 *tx_flags |= I40E_TX_FLAGS_IPV6;
2346 network_hdr_len = skb_network_header_len(skb);
2347 this_ip_hdr = ip_hdr(skb);
2348 this_ipv6_hdr = ipv6_hdr(skb);
2349 this_tcp_hdrlen = tcp_hdrlen(skb);
2352 /* Enable IP checksum offloads */
2353 if (*tx_flags & I40E_TX_FLAGS_IPV4) {
2354 l4_hdr = this_ip_hdr->protocol;
2355 /* the stack computes the IP header already, the only time we
2356 * need the hardware to recompute it is in the case of TSO.
2358 if (*tx_flags & I40E_TX_FLAGS_TSO) {
2359 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
2360 this_ip_hdr->check = 0;
2362 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
2364 /* Now set the td_offset for IP header length */
2365 *td_offset = (network_hdr_len >> 2) <<
2366 I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
2367 } else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
2368 l4_hdr = this_ipv6_hdr->nexthdr;
2369 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
2370 /* Now set the td_offset for IP header length */
2371 *td_offset = (network_hdr_len >> 2) <<
2372 I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
2374 /* words in MACLEN + dwords in IPLEN + dwords in L4Len */
2375 *td_offset |= (skb_network_offset(skb) >> 1) <<
2376 I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
2378 /* Enable L4 checksum offloads */
2381 /* enable checksum offloads */
2382 *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
2383 *td_offset |= (this_tcp_hdrlen >> 2) <<
2384 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2387 /* enable SCTP checksum offload */
2388 *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
2389 *td_offset |= (sizeof(struct sctphdr) >> 2) <<
2390 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2393 /* enable UDP checksum offload */
2394 *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
2395 *td_offset |= (sizeof(struct udphdr) >> 2) <<
2396 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2404 * i40e_create_tx_ctx Build the Tx context descriptor
2405 * @tx_ring: ring to create the descriptor on
2406 * @cd_type_cmd_tso_mss: Quad Word 1
2407 * @cd_tunneling: Quad Word 0 - bits 0-31
2408 * @cd_l2tag2: Quad Word 0 - bits 32-63
2410 static void i40e_create_tx_ctx(struct i40e_ring *tx_ring,
2411 const u64 cd_type_cmd_tso_mss,
2412 const u32 cd_tunneling, const u32 cd_l2tag2)
2414 struct i40e_tx_context_desc *context_desc;
2415 int i = tx_ring->next_to_use;
2417 if ((cd_type_cmd_tso_mss == I40E_TX_DESC_DTYPE_CONTEXT) &&
2418 !cd_tunneling && !cd_l2tag2)
2421 /* grab the next descriptor */
2422 context_desc = I40E_TX_CTXTDESC(tx_ring, i);
2425 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
2427 /* cpu_to_le32 and assign to struct fields */
2428 context_desc->tunneling_params = cpu_to_le32(cd_tunneling);
2429 context_desc->l2tag2 = cpu_to_le16(cd_l2tag2);
2430 context_desc->rsvd = cpu_to_le16(0);
2431 context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss);
2435 * __i40e_maybe_stop_tx - 2nd level check for tx stop conditions
2436 * @tx_ring: the ring to be checked
2437 * @size: the size buffer we want to assure is available
2439 * Returns -EBUSY if a stop is needed, else 0
2441 static inline int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2443 netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
2444 /* Memory barrier before checking head and tail */
2447 /* Check again in a case another CPU has just made room available. */
2448 if (likely(I40E_DESC_UNUSED(tx_ring) < size))
2451 /* A reprieve! - use start_queue because it doesn't call schedule */
2452 netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
2453 ++tx_ring->tx_stats.restart_queue;
2458 * i40e_maybe_stop_tx - 1st level check for tx stop conditions
2459 * @tx_ring: the ring to be checked
2460 * @size: the size buffer we want to assure is available
2462 * Returns 0 if stop is not needed
2465 inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2467 static inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2470 if (likely(I40E_DESC_UNUSED(tx_ring) >= size))
2472 return __i40e_maybe_stop_tx(tx_ring, size);
2476 * i40e_chk_linearize - Check if there are more than 8 fragments per packet
2478 * @tx_flags: collected send information
2480 * Note: Our HW can't scatter-gather more than 8 fragments to build
2481 * a packet on the wire and so we need to figure out the cases where we
2482 * need to linearize the skb.
2484 static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags)
2486 struct skb_frag_struct *frag;
2487 bool linearize = false;
2488 unsigned int size = 0;
2492 num_frags = skb_shinfo(skb)->nr_frags;
2493 gso_segs = skb_shinfo(skb)->gso_segs;
2495 if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) {
2498 if (num_frags < (I40E_MAX_BUFFER_TXD))
2499 goto linearize_chk_done;
2500 /* try the simple math, if we have too many frags per segment */
2501 if (DIV_ROUND_UP((num_frags + gso_segs), gso_segs) >
2502 I40E_MAX_BUFFER_TXD) {
2504 goto linearize_chk_done;
2506 frag = &skb_shinfo(skb)->frags[0];
2507 /* we might still have more fragments per segment */
2509 size += skb_frag_size(frag);
2511 if ((size >= skb_shinfo(skb)->gso_size) &&
2512 (j < I40E_MAX_BUFFER_TXD)) {
2513 size = (size % skb_shinfo(skb)->gso_size);
2516 if (j == I40E_MAX_BUFFER_TXD) {
2521 } while (num_frags);
2523 if (num_frags >= I40E_MAX_BUFFER_TXD)
2532 * i40e_tx_map - Build the Tx descriptor
2533 * @tx_ring: ring to send buffer on
2535 * @first: first buffer info buffer to use
2536 * @tx_flags: collected send information
2537 * @hdr_len: size of the packet header
2538 * @td_cmd: the command field in the descriptor
2539 * @td_offset: offset for checksum or crc
2542 inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
2543 struct i40e_tx_buffer *first, u32 tx_flags,
2544 const u8 hdr_len, u32 td_cmd, u32 td_offset)
2546 static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
2547 struct i40e_tx_buffer *first, u32 tx_flags,
2548 const u8 hdr_len, u32 td_cmd, u32 td_offset)
2551 unsigned int data_len = skb->data_len;
2552 unsigned int size = skb_headlen(skb);
2553 struct skb_frag_struct *frag;
2554 struct i40e_tx_buffer *tx_bi;
2555 struct i40e_tx_desc *tx_desc;
2556 u16 i = tx_ring->next_to_use;
2561 if (tx_flags & I40E_TX_FLAGS_HW_VLAN) {
2562 td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
2563 td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >>
2564 I40E_TX_FLAGS_VLAN_SHIFT;
2567 if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO))
2568 gso_segs = skb_shinfo(skb)->gso_segs;
2572 /* multiply data chunks by size of headers */
2573 first->bytecount = skb->len - hdr_len + (gso_segs * hdr_len);
2574 first->gso_segs = gso_segs;
2576 first->tx_flags = tx_flags;
2578 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
2580 tx_desc = I40E_TX_DESC(tx_ring, i);
2583 for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
2584 if (dma_mapping_error(tx_ring->dev, dma))
2587 /* record length, and DMA address */
2588 dma_unmap_len_set(tx_bi, len, size);
2589 dma_unmap_addr_set(tx_bi, dma, dma);
2591 tx_desc->buffer_addr = cpu_to_le64(dma);
2593 while (unlikely(size > I40E_MAX_DATA_PER_TXD)) {
2594 tx_desc->cmd_type_offset_bsz =
2595 build_ctob(td_cmd, td_offset,
2596 I40E_MAX_DATA_PER_TXD, td_tag);
2600 if (i == tx_ring->count) {
2601 tx_desc = I40E_TX_DESC(tx_ring, 0);
2605 dma += I40E_MAX_DATA_PER_TXD;
2606 size -= I40E_MAX_DATA_PER_TXD;
2608 tx_desc->buffer_addr = cpu_to_le64(dma);
2611 if (likely(!data_len))
2614 tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset,
2619 if (i == tx_ring->count) {
2620 tx_desc = I40E_TX_DESC(tx_ring, 0);
2624 size = skb_frag_size(frag);
2627 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
2630 tx_bi = &tx_ring->tx_bi[i];
2633 /* Place RS bit on last descriptor of any packet that spans across the
2634 * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
2636 if (((i & WB_STRIDE) != WB_STRIDE) &&
2637 (first <= &tx_ring->tx_bi[i]) &&
2638 (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) {
2639 tx_desc->cmd_type_offset_bsz =
2640 build_ctob(td_cmd, td_offset, size, td_tag) |
2641 cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP <<
2642 I40E_TXD_QW1_CMD_SHIFT);
2644 tx_desc->cmd_type_offset_bsz =
2645 build_ctob(td_cmd, td_offset, size, td_tag) |
2646 cpu_to_le64((u64)I40E_TXD_CMD <<
2647 I40E_TXD_QW1_CMD_SHIFT);
2650 netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
2651 tx_ring->queue_index),
2654 /* Force memory writes to complete before letting h/w
2655 * know there are new descriptors to fetch. (Only
2656 * applicable for weak-ordered memory model archs,
2661 /* set next_to_watch value indicating a packet is present */
2662 first->next_to_watch = tx_desc;
2665 if (i == tx_ring->count)
2668 tx_ring->next_to_use = i;
2670 i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
2671 /* notify HW of packet */
2672 if (!skb->xmit_more ||
2673 netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
2674 tx_ring->queue_index)))
2675 writel(i, tx_ring->tail);
2677 prefetchw(tx_desc + 1);
2682 dev_info(tx_ring->dev, "TX DMA map failed\n");
2684 /* clear dma mappings for failed tx_bi map */
2686 tx_bi = &tx_ring->tx_bi[i];
2687 i40e_unmap_and_free_tx_resource(tx_ring, tx_bi);
2695 tx_ring->next_to_use = i;
2699 * i40e_xmit_descriptor_count - calculate number of tx descriptors needed
2701 * @tx_ring: ring to send buffer on
2703 * Returns number of data descriptors needed for this skb. Returns 0 to indicate
2704 * there is not enough descriptors available in this ring since we need at least
2708 inline int i40e_xmit_descriptor_count(struct sk_buff *skb,
2709 struct i40e_ring *tx_ring)
2711 static inline int i40e_xmit_descriptor_count(struct sk_buff *skb,
2712 struct i40e_ring *tx_ring)
2718 /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
2719 * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
2720 * + 4 desc gap to avoid the cache line where head is,
2721 * + 1 desc for context descriptor,
2722 * otherwise try next time
2724 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
2725 count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
2727 count += TXD_USE_COUNT(skb_headlen(skb));
2728 if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) {
2729 tx_ring->tx_stats.tx_busy++;
2736 * i40e_xmit_frame_ring - Sends buffer on Tx ring
2738 * @tx_ring: ring to send buffer on
2740 * Returns NETDEV_TX_OK if sent, else an error code
2742 static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
2743 struct i40e_ring *tx_ring)
2745 u64 cd_type_cmd_tso_mss = I40E_TX_DESC_DTYPE_CONTEXT;
2746 u32 cd_tunneling = 0, cd_l2tag2 = 0;
2747 struct i40e_tx_buffer *first;
2755 if (0 == i40e_xmit_descriptor_count(skb, tx_ring))
2756 return NETDEV_TX_BUSY;
2758 /* prepare the xmit flags */
2759 if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags))
2762 /* obtain protocol of skb */
2763 protocol = vlan_get_protocol(skb);
2765 /* record the location of the first descriptor for this packet */
2766 first = &tx_ring->tx_bi[tx_ring->next_to_use];
2768 /* setup IPv4/IPv6 offloads */
2769 if (protocol == htons(ETH_P_IP))
2770 tx_flags |= I40E_TX_FLAGS_IPV4;
2771 else if (protocol == htons(ETH_P_IPV6))
2772 tx_flags |= I40E_TX_FLAGS_IPV6;
2774 tso = i40e_tso(tx_ring, skb, &hdr_len,
2775 &cd_type_cmd_tso_mss, &cd_tunneling);
2780 tx_flags |= I40E_TX_FLAGS_TSO;
2782 tsyn = i40e_tsyn(tx_ring, skb, tx_flags, &cd_type_cmd_tso_mss);
2785 tx_flags |= I40E_TX_FLAGS_TSYN;
2787 if (i40e_chk_linearize(skb, tx_flags))
2788 if (skb_linearize(skb))
2791 skb_tx_timestamp(skb);
2793 /* always enable CRC insertion offload */
2794 td_cmd |= I40E_TX_DESC_CMD_ICRC;
2796 /* Always offload the checksum, since it's in the data descriptor */
2797 if (skb->ip_summed == CHECKSUM_PARTIAL) {
2798 tx_flags |= I40E_TX_FLAGS_CSUM;
2800 i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset,
2801 tx_ring, &cd_tunneling);
2804 i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss,
2805 cd_tunneling, cd_l2tag2);
2807 /* Add Flow Director ATR if it's enabled.
2809 * NOTE: this must always be directly before the data descriptor.
2811 i40e_atr(tx_ring, skb, tx_flags, protocol);
2813 i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len,
2816 return NETDEV_TX_OK;
2819 dev_kfree_skb_any(skb);
2820 return NETDEV_TX_OK;
2824 * i40e_lan_xmit_frame - Selects the correct VSI and Tx queue to send buffer
2826 * @netdev: network interface device structure
2828 * Returns NETDEV_TX_OK if sent, else an error code
2830 netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
2832 struct i40e_netdev_priv *np = netdev_priv(netdev);
2833 struct i40e_vsi *vsi = np->vsi;
2834 struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping];
2836 /* hardware can't handle really short frames, hardware padding works
2839 if (skb_put_padto(skb, I40E_MIN_TX_LEN))
2840 return NETDEV_TX_OK;
2842 return i40e_xmit_frame_ring(skb, tx_ring);