Merge tag 'gfs2-for-5.12' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux...
[linux-2.6-microblaze.git] / drivers / net / ethernet / intel / ice / ice_txrx.c
index a2d0aad..b7dc25d 100644 (file)
@@ -375,6 +375,11 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
        if (!rx_ring->rx_buf)
                return;
 
+       if (rx_ring->skb) {
+               dev_kfree_skb(rx_ring->skb);
+               rx_ring->skb = NULL;
+       }
+
        if (rx_ring->xsk_pool) {
                ice_xsk_clean_rx_ring(rx_ring);
                goto rx_skip_free;
@@ -384,10 +389,6 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
        for (i = 0; i < rx_ring->count; i++) {
                struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i];
 
-               if (rx_buf->skb) {
-                       dev_kfree_skb(rx_buf->skb);
-                       rx_buf->skb = NULL;
-               }
                if (!rx_buf->page)
                        continue;
 
@@ -442,6 +443,22 @@ void ice_free_rx_ring(struct ice_ring *rx_ring)
        }
 }
 
+/**
+ * ice_rx_offset - Return expected offset into page to access data
+ * @rx_ring: Ring we are requesting offset of
+ *
+ * Returns the offset value for ring into the data buffer.
+ */
+static unsigned int ice_rx_offset(struct ice_ring *rx_ring)
+{
+       if (ice_ring_uses_build_skb(rx_ring))
+               return ICE_SKB_PAD;
+       else if (ice_is_xdp_ena_vsi(rx_ring->vsi))
+               return XDP_PACKET_HEADROOM;
+
+       return 0;
+}
+
 /**
  * ice_setup_rx_ring - Allocate the Rx descriptors
  * @rx_ring: the Rx ring to set up
@@ -476,6 +493,7 @@ int ice_setup_rx_ring(struct ice_ring *rx_ring)
 
        rx_ring->next_to_use = 0;
        rx_ring->next_to_clean = 0;
+       rx_ring->rx_offset = ice_rx_offset(rx_ring);
 
        if (ice_is_xdp_ena_vsi(rx_ring->vsi))
                WRITE_ONCE(rx_ring->xdp_prog, rx_ring->vsi->xdp_prog);
@@ -493,22 +511,6 @@ err:
        return -ENOMEM;
 }
 
-/**
- * ice_rx_offset - Return expected offset into page to access data
- * @rx_ring: Ring we are requesting offset of
- *
- * Returns the offset value for ring into the data buffer.
- */
-static unsigned int ice_rx_offset(struct ice_ring *rx_ring)
-{
-       if (ice_ring_uses_build_skb(rx_ring))
-               return ICE_SKB_PAD;
-       else if (ice_is_xdp_ena_vsi(rx_ring->vsi))
-               return XDP_PACKET_HEADROOM;
-
-       return 0;
-}
-
 static unsigned int
 ice_rx_frame_truesize(struct ice_ring *rx_ring, unsigned int __maybe_unused size)
 {
@@ -517,8 +519,8 @@ ice_rx_frame_truesize(struct ice_ring *rx_ring, unsigned int __maybe_unused size
 #if (PAGE_SIZE < 8192)
        truesize = ice_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */
 #else
-       truesize = ice_rx_offset(rx_ring) ?
-               SKB_DATA_ALIGN(ice_rx_offset(rx_ring) + size) +
+       truesize = rx_ring->rx_offset ?
+               SKB_DATA_ALIGN(rx_ring->rx_offset + size) +
                SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
                SKB_DATA_ALIGN(size);
 #endif
@@ -537,22 +539,20 @@ static int
 ice_run_xdp(struct ice_ring *rx_ring, struct xdp_buff *xdp,
            struct bpf_prog *xdp_prog)
 {
-       int err, result = ICE_XDP_PASS;
        struct ice_ring *xdp_ring;
+       int err;
        u32 act;
 
        act = bpf_prog_run_xdp(xdp_prog, xdp);
        switch (act) {
        case XDP_PASS:
-               break;
+               return ICE_XDP_PASS;
        case XDP_TX:
                xdp_ring = rx_ring->vsi->xdp_rings[smp_processor_id()];
-               result = ice_xmit_xdp_buff(xdp, xdp_ring);
-               break;
+               return ice_xmit_xdp_buff(xdp, xdp_ring);
        case XDP_REDIRECT:
                err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
-               result = !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED;
-               break;
+               return !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED;
        default:
                bpf_warn_invalid_xdp_action(act);
                fallthrough;
@@ -560,11 +560,8 @@ ice_run_xdp(struct ice_ring *rx_ring, struct xdp_buff *xdp,
                trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
                fallthrough;
        case XDP_DROP:
-               result = ICE_XDP_CONSUMED;
-               break;
+               return ICE_XDP_CONSUMED;
        }
-
-       return result;
 }
 
 /**
@@ -656,7 +653,7 @@ ice_alloc_mapped_page(struct ice_ring *rx_ring, struct ice_rx_buf *bi)
 
        bi->dma = dma;
        bi->page = page;
-       bi->page_offset = ice_rx_offset(rx_ring);
+       bi->page_offset = rx_ring->rx_offset;
        page_ref_add(page, USHRT_MAX - 1);
        bi->pagecnt_bias = USHRT_MAX;
 
@@ -728,15 +725,6 @@ bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count)
        return !!cleaned_count;
 }
 
-/**
- * ice_page_is_reserved - check if reuse is possible
- * @page: page struct to check
- */
-static bool ice_page_is_reserved(struct page *page)
-{
-       return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
-}
-
 /**
  * ice_rx_buf_adjust_pg_offset - Prepare Rx buffer for reuse
  * @rx_buf: Rx buffer to adjust
@@ -775,8 +763,8 @@ ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf, int rx_buf_pgcnt)
        unsigned int pagecnt_bias = rx_buf->pagecnt_bias;
        struct page *page = rx_buf->page;
 
-       /* avoid re-using remote pages */
-       if (unlikely(ice_page_is_reserved(page)))
+       /* avoid re-using remote and pfmemalloc pages */
+       if (!dev_page_is_reusable(page))
                return false;
 
 #if (PAGE_SIZE < 8192)
@@ -818,7 +806,7 @@ ice_add_rx_frag(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
                struct sk_buff *skb, unsigned int size)
 {
 #if (PAGE_SIZE >= 8192)
-       unsigned int truesize = SKB_DATA_ALIGN(size + ice_rx_offset(rx_ring));
+       unsigned int truesize = SKB_DATA_ALIGN(size + rx_ring->rx_offset);
 #else
        unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
 #endif
@@ -864,7 +852,6 @@ ice_reuse_rx_page(struct ice_ring *rx_ring, struct ice_rx_buf *old_buf)
 /**
  * ice_get_rx_buf - Fetch Rx buffer and synchronize data for use
  * @rx_ring: Rx descriptor ring to transact packets on
- * @skb: skb to be used
  * @size: size of buffer to add to skb
  * @rx_buf_pgcnt: rx_buf page refcount
  *
@@ -872,8 +859,8 @@ ice_reuse_rx_page(struct ice_ring *rx_ring, struct ice_rx_buf *old_buf)
  * for use by the CPU.
  */
 static struct ice_rx_buf *
-ice_get_rx_buf(struct ice_ring *rx_ring, struct sk_buff **skb,
-              const unsigned int size, int *rx_buf_pgcnt)
+ice_get_rx_buf(struct ice_ring *rx_ring, const unsigned int size,
+              int *rx_buf_pgcnt)
 {
        struct ice_rx_buf *rx_buf;
 
@@ -885,7 +872,6 @@ ice_get_rx_buf(struct ice_ring *rx_ring, struct sk_buff **skb,
                0;
 #endif
        prefetchw(rx_buf->page);
-       *skb = rx_buf->skb;
 
        if (!size)
                return rx_buf;
@@ -1047,29 +1033,24 @@ ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
 
        /* clear contents of buffer_info */
        rx_buf->page = NULL;
-       rx_buf->skb = NULL;
 }
 
 /**
  * ice_is_non_eop - process handling of non-EOP buffers
  * @rx_ring: Rx ring being processed
  * @rx_desc: Rx descriptor for current buffer
- * @skb: Current socket buffer containing buffer in progress
  *
  * If the buffer is an EOP buffer, this function exits returning false,
  * otherwise return true indicating that this is in fact a non-EOP buffer.
  */
 static bool
-ice_is_non_eop(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
-              struct sk_buff *skb)
+ice_is_non_eop(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc)
 {
        /* if we are the last buffer then there is nothing else to do */
 #define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)
        if (likely(ice_test_staterr(rx_desc, ICE_RXD_EOF)))
                return false;
 
-       /* place skb in next buffer to be received */
-       rx_ring->rx_buf[rx_ring->next_to_clean].skb = skb;
        rx_ring->rx_stats.non_eop_descs++;
 
        return true;
@@ -1089,24 +1070,26 @@ ice_is_non_eop(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
  */
 int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
 {
-       unsigned int total_rx_bytes = 0, total_rx_pkts = 0;
+       unsigned int total_rx_bytes = 0, total_rx_pkts = 0, frame_sz = 0;
        u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
+       unsigned int offset = rx_ring->rx_offset;
        unsigned int xdp_res, xdp_xmit = 0;
+       struct sk_buff *skb = rx_ring->skb;
        struct bpf_prog *xdp_prog = NULL;
        struct xdp_buff xdp;
        bool failure;
 
-       xdp.rxq = &rx_ring->xdp_rxq;
        /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
 #if (PAGE_SIZE < 8192)
-       xdp.frame_sz = ice_rx_frame_truesize(rx_ring, 0);
+       frame_sz = ice_rx_frame_truesize(rx_ring, 0);
 #endif
+       xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq);
 
        /* start the loop to process Rx packets bounded by 'budget' */
        while (likely(total_rx_pkts < (unsigned int)budget)) {
                union ice_32b_rx_flex_desc *rx_desc;
                struct ice_rx_buf *rx_buf;
-               struct sk_buff *skb;
+               unsigned char *hard_start;
                unsigned int size;
                u16 stat_err_bits;
                int rx_buf_pgcnt;
@@ -1141,7 +1124,7 @@ int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
                        ICE_RX_FLX_DESC_PKT_LEN_M;
 
                /* retrieve a buffer from the ring */
-               rx_buf = ice_get_rx_buf(rx_ring, &skb, size, &rx_buf_pgcnt);
+               rx_buf = ice_get_rx_buf(rx_ring, size, &rx_buf_pgcnt);
 
                if (!size) {
                        xdp.data = NULL;
@@ -1151,10 +1134,9 @@ int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
                        goto construct_skb;
                }
 
-               xdp.data = page_address(rx_buf->page) + rx_buf->page_offset;
-               xdp.data_hard_start = xdp.data - ice_rx_offset(rx_ring);
-               xdp.data_meta = xdp.data;
-               xdp.data_end = xdp.data + size;
+               hard_start = page_address(rx_buf->page) + rx_buf->page_offset -
+                            offset;
+               xdp_prepare_buff(&xdp, hard_start, offset, size, true);
 #if (PAGE_SIZE > 4096)
                /* At larger PAGE_SIZE, frame_sz depend on len size */
                xdp.frame_sz = ice_rx_frame_truesize(rx_ring, size);
@@ -1204,7 +1186,7 @@ construct_skb:
                cleaned_count++;
 
                /* skip if it is NOP desc */
-               if (ice_is_non_eop(rx_ring, rx_desc, skb))
+               if (ice_is_non_eop(rx_ring, rx_desc))
                        continue;
 
                stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S);
@@ -1234,6 +1216,7 @@ construct_skb:
 
                /* send completed skb up the stack */
                ice_receive_skb(rx_ring, skb, vlan_tag);
+               skb = NULL;
 
                /* update budget accounting */
                total_rx_pkts++;
@@ -1244,6 +1227,7 @@ construct_skb:
 
        if (xdp_prog)
                ice_finalize_xdp_rx(rx_ring, xdp_xmit);
+       rx_ring->skb = skb;
 
        ice_update_rx_ring_stats(rx_ring, total_rx_pkts, total_rx_bytes);
 
@@ -1505,22 +1489,11 @@ static void ice_update_ena_itr(struct ice_q_vector *q_vector)
        struct ice_vsi *vsi = q_vector->vsi;
        u32 itr_val;
 
-       /* when exiting WB_ON_ITR lets set a low ITR value and trigger
-        * interrupts to expire right away in case we have more work ready to go
-        * already
+       /* when exiting WB_ON_ITR just reset the countdown and let ITR
+        * resume it's normal "interrupts-enabled" path
         */
-       if (q_vector->itr_countdown == ICE_IN_WB_ON_ITR_MODE) {
-               itr_val = ice_buildreg_itr(rx->itr_idx, ICE_WB_ON_ITR_USECS);
-               wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val);
-               /* set target back to last user set value */
-               rx->target_itr = rx->itr_setting;
-               /* set current to what we just wrote and dynamic if needed */
-               rx->current_itr = ICE_WB_ON_ITR_USECS |
-                       (rx->itr_setting & ICE_ITR_DYNAMIC);
-               /* allow normal interrupt flow to start */
+       if (q_vector->itr_countdown == ICE_IN_WB_ON_ITR_MODE)
                q_vector->itr_countdown = 0;
-               return;
-       }
 
        /* This will do nothing if dynamic updates are not enabled */
        ice_update_itr(q_vector, tx);
@@ -1560,10 +1533,8 @@ static void ice_update_ena_itr(struct ice_q_vector *q_vector)
                        q_vector->itr_countdown--;
        }
 
-       if (!test_bit(__ICE_DOWN, q_vector->vsi->state))
-               wr32(&q_vector->vsi->back->hw,
-                    GLINT_DYN_CTL(q_vector->reg_idx),
-                    itr_val);
+       if (!test_bit(__ICE_DOWN, vsi->state))
+               wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val);
 }
 
 /**
@@ -1573,30 +1544,29 @@ static void ice_update_ena_itr(struct ice_q_vector *q_vector)
  * We need to tell hardware to write-back completed descriptors even when
  * interrupts are disabled. Descriptors will be written back on cache line
  * boundaries without WB_ON_ITR enabled, but if we don't enable WB_ON_ITR
- * descriptors may not be written back if they don't fill a cache line until the
- * next interrupt.
+ * descriptors may not be written back if they don't fill a cache line until
+ * the next interrupt.
  *
- * This sets the write-back frequency to 2 microseconds as that is the minimum
- * value that's not 0 due to ITR granularity. Also, set the INTENA_MSK bit to
- * make sure hardware knows we aren't meddling with the INTENA_M bit.
+ * This sets the write-back frequency to whatever was set previously for the
+ * ITR indices. Also, set the INTENA_MSK bit to make sure hardware knows we
+ * aren't meddling with the INTENA_M bit.
  */
 static void ice_set_wb_on_itr(struct ice_q_vector *q_vector)
 {
        struct ice_vsi *vsi = q_vector->vsi;
 
-       /* already in WB_ON_ITR mode no need to change it */
+       /* already in wb_on_itr mode no need to change it */
        if (q_vector->itr_countdown == ICE_IN_WB_ON_ITR_MODE)
                return;
 
-       if (q_vector->num_ring_rx)
-               wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx),
-                    ICE_GLINT_DYN_CTL_WB_ON_ITR(ICE_WB_ON_ITR_USECS,
-                                                ICE_RX_ITR));
-
-       if (q_vector->num_ring_tx)
-               wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx),
-                    ICE_GLINT_DYN_CTL_WB_ON_ITR(ICE_WB_ON_ITR_USECS,
-                                                ICE_TX_ITR));
+       /* use previously set ITR values for all of the ITR indices by
+        * specifying ICE_ITR_NONE, which will vary in adaptive (AIM) mode and
+        * be static in non-adaptive mode (user configured)
+        */
+       wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx),
+            ((ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S) &
+             GLINT_DYN_CTL_ITR_INDX_M) | GLINT_DYN_CTL_INTENA_MSK_M |
+            GLINT_DYN_CTL_WB_ON_ITR_M);
 
        q_vector->itr_countdown = ICE_IN_WB_ON_ITR_MODE;
 }
@@ -1663,8 +1633,13 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
        }
 
        /* If work not completed, return budget and polling will return */
-       if (!clean_complete)
+       if (!clean_complete) {
+               /* Set the writeback on ITR so partial completions of
+                * cache-lines will still continue even if we're polling.
+                */
+               ice_set_wb_on_itr(q_vector);
                return budget;
+       }
 
        /* Exit the polling mode, but don't re-enable interrupts if stack might
         * poll us due to busy-polling
@@ -1923,12 +1898,15 @@ int ice_tx_csum(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
                                  ICE_TX_CTX_EIPT_IPV4_NO_CSUM;
                        l4_proto = ip.v4->protocol;
                } else if (first->tx_flags & ICE_TX_FLAGS_IPV6) {
+                       int ret;
+
                        tunnel |= ICE_TX_CTX_EIPT_IPV6;
                        exthdr = ip.hdr + sizeof(*ip.v6);
                        l4_proto = ip.v6->nexthdr;
-                       if (l4.hdr != exthdr)
-                               ipv6_skip_exthdr(skb, exthdr - skb->data,
-                                                &l4_proto, &frag_off);
+                       ret = ipv6_skip_exthdr(skb, exthdr - skb->data,
+                                              &l4_proto, &frag_off);
+                       if (ret < 0)
+                               return -1;
                }
 
                /* define outer transport */
@@ -2418,7 +2396,7 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring)
        /* allow CONTROL frames egress from main VSI if FW LLDP disabled */
        if (unlikely(skb->priority == TC_PRIO_CONTROL &&
                     vsi->type == ICE_VSI_PF &&
-                    vsi->port_info->is_sw_lldp))
+                    vsi->port_info->qos_cfg.is_sw_lldp))
                offload.cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX |
                                        ICE_TX_CTX_DESC_SWTCH_UPLINK <<
                                        ICE_TXD_CTX_QW1_CMD_S);