drivers/net/ethernet/intel/ice/ice_xsk.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /* Copyright (c) 2019, Intel Corporation. */
   3
   4 #include <linux/bpf_trace.h>
   5 #include <net/xdp_sock_drv.h>
   6 #include <net/xdp.h>
   7 #include "ice.h"
   8 #include "ice_base.h"
   9 #include "ice_type.h"
  10 #include "ice_xsk.h"
  11 #include "ice_txrx.h"
  12 #include "ice_txrx_lib.h"
  13 #include "ice_lib.h"
  14
  15 static struct xdp_buff **ice_xdp_buf(struct ice_rx_ring *rx_ring, u32 idx)
  16 {
  17         return &rx_ring->xdp_buf[idx];
  18 }
  19
  20 /**
  21  * ice_qp_reset_stats - Resets all stats for rings of given index
  22  * @vsi: VSI that contains rings of interest
  23  * @q_idx: ring index in array
  24  */
  25 static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx)
  26 {
  27         memset(&vsi->rx_rings[q_idx]->rx_stats, 0,
  28                sizeof(vsi->rx_rings[q_idx]->rx_stats));
  29         memset(&vsi->tx_rings[q_idx]->stats, 0,
  30                sizeof(vsi->tx_rings[q_idx]->stats));
  31         if (ice_is_xdp_ena_vsi(vsi))
  32                 memset(&vsi->xdp_rings[q_idx]->stats, 0,
  33                        sizeof(vsi->xdp_rings[q_idx]->stats));
  34 }
  35
  36 /**
  37  * ice_qp_clean_rings - Cleans all the rings of a given index
  38  * @vsi: VSI that contains rings of interest
  39  * @q_idx: ring index in array
  40  */
  41 static void ice_qp_clean_rings(struct ice_vsi *vsi, u16 q_idx)
  42 {
  43         ice_clean_tx_ring(vsi->tx_rings[q_idx]);
  44         if (ice_is_xdp_ena_vsi(vsi))
  45                 ice_clean_tx_ring(vsi->xdp_rings[q_idx]);
  46         ice_clean_rx_ring(vsi->rx_rings[q_idx]);
  47 }
  48
  49 /**
  50  * ice_qvec_toggle_napi - Enables/disables NAPI for a given q_vector
  51  * @vsi: VSI that has netdev
  52  * @q_vector: q_vector that has NAPI context
  53  * @enable: true for enable, false for disable
  54  */
  55 static void
  56 ice_qvec_toggle_napi(struct ice_vsi *vsi, struct ice_q_vector *q_vector,
  57                      bool enable)
  58 {
  59         if (!vsi->netdev || !q_vector)
  60                 return;
  61
  62         if (enable)
  63                 napi_enable(&q_vector->napi);
  64         else
  65                 napi_disable(&q_vector->napi);
  66 }
  67
  68 /**
  69  * ice_qvec_dis_irq - Mask off queue interrupt generation on given ring
  70  * @vsi: the VSI that contains queue vector being un-configured
  71  * @rx_ring: Rx ring that will have its IRQ disabled
  72  * @q_vector: queue vector
  73  */
  74 static void
  75 ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_rx_ring *rx_ring,
  76                  struct ice_q_vector *q_vector)
  77 {
  78         struct ice_pf *pf = vsi->back;
  79         struct ice_hw *hw = &pf->hw;
  80         int base = vsi->base_vector;
  81         u16 reg;
  82         u32 val;
  83
  84         /* QINT_TQCTL is being cleared in ice_vsi_stop_tx_ring, so handle
  85          * here only QINT_RQCTL
  86          */
  87         reg = rx_ring->reg_idx;
  88         val = rd32(hw, QINT_RQCTL(reg));
  89         val &= ~QINT_RQCTL_CAUSE_ENA_M;
  90         wr32(hw, QINT_RQCTL(reg), val);
  91
  92         if (q_vector) {
  93                 u16 v_idx = q_vector->v_idx;
  94
  95                 wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx), 0);
  96                 ice_flush(hw);
  97                 synchronize_irq(pf->msix_entries[v_idx + base].vector);
  98         }
  99 }
 100
 101 /**
 102  * ice_qvec_cfg_msix - Enable IRQ for given queue vector
 103  * @vsi: the VSI that contains queue vector
 104  * @q_vector: queue vector
 105  */
 106 static void
 107 ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
 108 {
 109         u16 reg_idx = q_vector->reg_idx;
 110         struct ice_pf *pf = vsi->back;
 111         struct ice_hw *hw = &pf->hw;
 112         struct ice_tx_ring *tx_ring;
 113         struct ice_rx_ring *rx_ring;
 114
 115         ice_cfg_itr(hw, q_vector);
 116
 117         ice_for_each_tx_ring(tx_ring, q_vector->tx)
 118                 ice_cfg_txq_interrupt(vsi, tx_ring->reg_idx, reg_idx,
 119                                       q_vector->tx.itr_idx);
 120
 121         ice_for_each_rx_ring(rx_ring, q_vector->rx)
 122                 ice_cfg_rxq_interrupt(vsi, rx_ring->reg_idx, reg_idx,
 123                                       q_vector->rx.itr_idx);
 124
 125         ice_flush(hw);
 126 }
 127
 128 /**
 129  * ice_qvec_ena_irq - Enable IRQ for given queue vector
 130  * @vsi: the VSI that contains queue vector
 131  * @q_vector: queue vector
 132  */
 133 static void ice_qvec_ena_irq(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
 134 {
 135         struct ice_pf *pf = vsi->back;
 136         struct ice_hw *hw = &pf->hw;
 137
 138         ice_irq_dynamic_ena(hw, vsi, q_vector);
 139
 140         ice_flush(hw);
 141 }
 142
 143 /**
 144  * ice_qp_dis - Disables a queue pair
 145  * @vsi: VSI of interest
 146  * @q_idx: ring index in array
 147  *
 148  * Returns 0 on success, negative on failure.
 149  */
 150 static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
 151 {
 152         struct ice_txq_meta txq_meta = { };
 153         struct ice_q_vector *q_vector;
 154         struct ice_tx_ring *tx_ring;
 155         struct ice_rx_ring *rx_ring;
 156         int timeout = 50;
 157         int err;
 158
 159         if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq)
 160                 return -EINVAL;
 161
 162         tx_ring = vsi->tx_rings[q_idx];
 163         rx_ring = vsi->rx_rings[q_idx];
 164         q_vector = rx_ring->q_vector;
 165
 166         while (test_and_set_bit(ICE_CFG_BUSY, vsi->state)) {
 167                 timeout--;
 168                 if (!timeout)
 169                         return -EBUSY;
 170                 usleep_range(1000, 2000);
 171         }
 172         netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
 173
 174         ice_qvec_dis_irq(vsi, rx_ring, q_vector);
 175
 176         ice_fill_txq_meta(vsi, tx_ring, &txq_meta);
 177         err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, tx_ring, &txq_meta);
 178         if (err)
 179                 return err;
 180         if (ice_is_xdp_ena_vsi(vsi)) {
 181                 struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx];
 182
 183                 memset(&txq_meta, 0, sizeof(txq_meta));
 184                 ice_fill_txq_meta(vsi, xdp_ring, &txq_meta);
 185                 err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, xdp_ring,
 186                                            &txq_meta);
 187                 if (err)
 188                         return err;
 189         }
 190         err = ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, true);
 191         if (err)
 192                 return err;
 193
 194         ice_qvec_toggle_napi(vsi, q_vector, false);
 195         ice_qp_clean_rings(vsi, q_idx);
 196         ice_qp_reset_stats(vsi, q_idx);
 197
 198         return 0;
 199 }
 200
 201 /**
 202  * ice_qp_ena - Enables a queue pair
 203  * @vsi: VSI of interest
 204  * @q_idx: ring index in array
 205  *
 206  * Returns 0 on success, negative on failure.
 207  */
 208 static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx)
 209 {
 210         struct ice_aqc_add_tx_qgrp *qg_buf;
 211         struct ice_q_vector *q_vector;
 212         struct ice_tx_ring *tx_ring;
 213         struct ice_rx_ring *rx_ring;
 214         u16 size;
 215         int err;
 216
 217         if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq)
 218                 return -EINVAL;
 219
 220         size = struct_size(qg_buf, txqs, 1);
 221         qg_buf = kzalloc(size, GFP_KERNEL);
 222         if (!qg_buf)
 223                 return -ENOMEM;
 224
 225         qg_buf->num_txqs = 1;
 226
 227         tx_ring = vsi->tx_rings[q_idx];
 228         rx_ring = vsi->rx_rings[q_idx];
 229         q_vector = rx_ring->q_vector;
 230
 231         err = ice_vsi_cfg_txq(vsi, tx_ring, qg_buf);
 232         if (err)
 233                 goto free_buf;
 234
 235         if (ice_is_xdp_ena_vsi(vsi)) {
 236                 struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx];
 237
 238                 memset(qg_buf, 0, size);
 239                 qg_buf->num_txqs = 1;
 240                 err = ice_vsi_cfg_txq(vsi, xdp_ring, qg_buf);
 241                 if (err)
 242                         goto free_buf;
 243                 ice_set_ring_xdp(xdp_ring);
 244                 xdp_ring->xsk_pool = ice_tx_xsk_pool(xdp_ring);
 245         }
 246
 247         err = ice_vsi_cfg_rxq(rx_ring);
 248         if (err)
 249                 goto free_buf;
 250
 251         ice_qvec_cfg_msix(vsi, q_vector);
 252
 253         err = ice_vsi_ctrl_one_rx_ring(vsi, true, q_idx, true);
 254         if (err)
 255                 goto free_buf;
 256
 257         clear_bit(ICE_CFG_BUSY, vsi->state);
 258         ice_qvec_toggle_napi(vsi, q_vector, true);
 259         ice_qvec_ena_irq(vsi, q_vector);
 260
 261         netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
 262 free_buf:
 263         kfree(qg_buf);
 264         return err;
 265 }
 266
 267 /**
 268  * ice_xsk_pool_disable - disable a buffer pool region
 269  * @vsi: Current VSI
 270  * @qid: queue ID
 271  *
 272  * Returns 0 on success, negative on failure
 273  */
 274 static int ice_xsk_pool_disable(struct ice_vsi *vsi, u16 qid)
 275 {
 276         struct xsk_buff_pool *pool = xsk_get_pool_from_qid(vsi->netdev, qid);
 277
 278         if (!pool)
 279                 return -EINVAL;
 280
 281         clear_bit(qid, vsi->af_xdp_zc_qps);
 282         xsk_pool_dma_unmap(pool, ICE_RX_DMA_ATTR);
 283
 284         return 0;
 285 }
 286
 287 /**
 288  * ice_xsk_pool_enable - enable a buffer pool region
 289  * @vsi: Current VSI
 290  * @pool: pointer to a requested buffer pool region
 291  * @qid: queue ID
 292  *
 293  * Returns 0 on success, negative on failure
 294  */
 295 static int
 296 ice_xsk_pool_enable(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
 297 {
 298         int err;
 299
 300         if (vsi->type != ICE_VSI_PF)
 301                 return -EINVAL;
 302
 303         if (qid >= vsi->netdev->real_num_rx_queues ||
 304             qid >= vsi->netdev->real_num_tx_queues)
 305                 return -EINVAL;
 306
 307         err = xsk_pool_dma_map(pool, ice_pf_to_dev(vsi->back),
 308                                ICE_RX_DMA_ATTR);
 309         if (err)
 310                 return err;
 311
 312         set_bit(qid, vsi->af_xdp_zc_qps);
 313
 314         return 0;
 315 }
 316
 317 /**
 318  * ice_xsk_pool_setup - enable/disable a buffer pool region depending on its state
 319  * @vsi: Current VSI
 320  * @pool: buffer pool to enable/associate to a ring, NULL to disable
 321  * @qid: queue ID
 322  *
 323  * Returns 0 on success, negative on failure
 324  */
 325 int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
 326 {
 327         bool if_running, pool_present = !!pool;
 328         int ret = 0, pool_failure = 0;
 329
 330         if (!is_power_of_2(vsi->rx_rings[qid]->count) ||
 331             !is_power_of_2(vsi->tx_rings[qid]->count)) {
 332                 netdev_err(vsi->netdev, "Please align ring sizes to power of 2\n");
 333                 pool_failure = -EINVAL;
 334                 goto failure;
 335         }
 336
 337         if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi);
 338
 339         if (if_running) {
 340                 ret = ice_qp_dis(vsi, qid);
 341                 if (ret) {
 342                         netdev_err(vsi->netdev, "ice_qp_dis error = %d\n", ret);
 343                         goto xsk_pool_if_up;
 344                 }
 345         }
 346
 347         pool_failure = pool_present ? ice_xsk_pool_enable(vsi, pool, qid) :
 348                                       ice_xsk_pool_disable(vsi, qid);
 349
 350 xsk_pool_if_up:
 351         if (if_running) {
 352                 ret = ice_qp_ena(vsi, qid);
 353                 if (!ret && pool_present)
 354                         napi_schedule(&vsi->xdp_rings[qid]->q_vector->napi);
 355                 else if (ret)
 356                         netdev_err(vsi->netdev, "ice_qp_ena error = %d\n", ret);
 357         }
 358
 359 failure:
 360         if (pool_failure) {
 361                 netdev_err(vsi->netdev, "Could not %sable buffer pool, error = %d\n",
 362                            pool_present ? "en" : "dis", pool_failure);
 363                 return pool_failure;
 364         }
 365
 366         return ret;
 367 }
 368
 369 /**
 370  * ice_fill_rx_descs - pick buffers from XSK buffer pool and use it
 371  * @pool: XSK Buffer pool to pull the buffers from
 372  * @xdp: SW ring of xdp_buff that will hold the buffers
 373  * @rx_desc: Pointer to Rx descriptors that will be filled
 374  * @count: The number of buffers to allocate
 375  *
 376  * This function allocates a number of Rx buffers from the fill ring
 377  * or the internal recycle mechanism and places them on the Rx ring.
 378  *
 379  * Note that ring wrap should be handled by caller of this function.
 380  *
 381  * Returns the amount of allocated Rx descriptors
 382  */
 383 static u16 ice_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp,
 384                              union ice_32b_rx_flex_desc *rx_desc, u16 count)
 385 {
 386         dma_addr_t dma;
 387         u16 buffs;
 388         int i;
 389
 390         buffs = xsk_buff_alloc_batch(pool, xdp, count);
 391         for (i = 0; i < buffs; i++) {
 392                 dma = xsk_buff_xdp_get_dma(*xdp);
 393                 rx_desc->read.pkt_addr = cpu_to_le64(dma);
 394                 rx_desc->wb.status_error0 = 0;
 395
 396                 rx_desc++;
 397                 xdp++;
 398         }
 399
 400         return buffs;
 401 }
 402
 403 /**
 404  * __ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
 405  * @rx_ring: Rx ring
 406  * @count: The number of buffers to allocate
 407  *
 408  * Place the @count of descriptors onto Rx ring. Handle the ring wrap
 409  * for case where space from next_to_use up to the end of ring is less
 410  * than @count. Finally do a tail bump.
 411  *
 412  * Returns true if all allocations were successful, false if any fail.
 413  */
 414 static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
 415 {
 416         union ice_32b_rx_flex_desc *rx_desc;
 417         u32 nb_buffs_extra = 0, nb_buffs;
 418         u16 ntu = rx_ring->next_to_use;
 419         u16 total_count = count;
 420         struct xdp_buff **xdp;
 421
 422         rx_desc = ICE_RX_DESC(rx_ring, ntu);
 423         xdp = ice_xdp_buf(rx_ring, ntu);
 424
 425         if (ntu + count >= rx_ring->count) {
 426                 nb_buffs_extra = ice_fill_rx_descs(rx_ring->xsk_pool, xdp,
 427                                                    rx_desc,
 428                                                    rx_ring->count - ntu);
 429                 rx_desc = ICE_RX_DESC(rx_ring, 0);
 430                 xdp = ice_xdp_buf(rx_ring, 0);
 431                 ntu = 0;
 432                 count -= nb_buffs_extra;
 433                 ice_release_rx_desc(rx_ring, 0);
 434         }
 435
 436         nb_buffs = ice_fill_rx_descs(rx_ring->xsk_pool, xdp, rx_desc, count);
 437
 438         ntu += nb_buffs;
 439         if (ntu == rx_ring->count)
 440                 ntu = 0;
 441
 442         if (rx_ring->next_to_use != ntu)
 443                 ice_release_rx_desc(rx_ring, ntu);
 444
 445         return total_count == (nb_buffs_extra + nb_buffs);
 446 }
 447
 448 /**
 449  * ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
 450  * @rx_ring: Rx ring
 451  * @count: The number of buffers to allocate
 452  *
 453  * Wrapper for internal allocation routine; figure out how many tail
 454  * bumps should take place based on the given threshold
 455  *
 456  * Returns true if all calls to internal alloc routine succeeded
 457  */
 458 bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
 459 {
 460         u16 rx_thresh = ICE_RING_QUARTER(rx_ring);
 461         u16 batched, leftover, i, tail_bumps;
 462
 463         batched = ALIGN_DOWN(count, rx_thresh);
 464         tail_bumps = batched / rx_thresh;
 465         leftover = count & (rx_thresh - 1);
 466
 467         for (i = 0; i < tail_bumps; i++)
 468                 if (!__ice_alloc_rx_bufs_zc(rx_ring, rx_thresh))
 469                         return false;
 470         return __ice_alloc_rx_bufs_zc(rx_ring, leftover);
 471 }
 472
 473 /**
 474  * ice_bump_ntc - Bump the next_to_clean counter of an Rx ring
 475  * @rx_ring: Rx ring
 476  */
 477 static void ice_bump_ntc(struct ice_rx_ring *rx_ring)
 478 {
 479         int ntc = rx_ring->next_to_clean + 1;
 480
 481         ntc = (ntc < rx_ring->count) ? ntc : 0;
 482         rx_ring->next_to_clean = ntc;
 483         prefetch(ICE_RX_DESC(rx_ring, ntc));
 484 }
 485
 486 /**
 487  * ice_construct_skb_zc - Create an sk_buff from zero-copy buffer
 488  * @rx_ring: Rx ring
 489  * @xdp: Pointer to XDP buffer
 490  *
 491  * This function allocates a new skb from a zero-copy Rx buffer.
 492  *
 493  * Returns the skb on success, NULL on failure.
 494  */
 495 static struct sk_buff *
 496 ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
 497 {
 498         unsigned int totalsize = xdp->data_end - xdp->data_meta;
 499         unsigned int metasize = xdp->data - xdp->data_meta;
 500         struct sk_buff *skb;
 501
 502         net_prefetch(xdp->data_meta);
 503
 504         skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize,
 505                                GFP_ATOMIC | __GFP_NOWARN);
 506         if (unlikely(!skb))
 507                 return NULL;
 508
 509         memcpy(__skb_put(skb, totalsize), xdp->data_meta,
 510                ALIGN(totalsize, sizeof(long)));
 511
 512         if (metasize) {
 513                 skb_metadata_set(skb, metasize);
 514                 __skb_pull(skb, metasize);
 515         }
 516
 517         xsk_buff_free(xdp);
 518         return skb;
 519 }
 520
 521 /**
 522  * ice_run_xdp_zc - Executes an XDP program in zero-copy path
 523  * @rx_ring: Rx ring
 524  * @xdp: xdp_buff used as input to the XDP program
 525  * @xdp_prog: XDP program to run
 526  * @xdp_ring: ring to be used for XDP_TX action
 527  *
 528  * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
 529  */
 530 static int
 531 ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
 532                struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring)
 533 {
 534         int err, result = ICE_XDP_PASS;
 535         u32 act;
 536
 537         act = bpf_prog_run_xdp(xdp_prog, xdp);
 538
 539         if (likely(act == XDP_REDIRECT)) {
 540                 err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
 541                 if (err)
 542                         goto out_failure;
 543                 return ICE_XDP_REDIR;
 544         }
 545
 546         switch (act) {
 547         case XDP_PASS:
 548                 break;
 549         case XDP_TX:
 550                 result = ice_xmit_xdp_buff(xdp, xdp_ring);
 551                 if (result == ICE_XDP_CONSUMED)
 552                         goto out_failure;
 553                 break;
 554         default:
 555                 bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act);
 556                 fallthrough;
 557         case XDP_ABORTED:
 558 out_failure:
 559                 trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
 560                 fallthrough;
 561         case XDP_DROP:
 562                 result = ICE_XDP_CONSUMED;
 563                 break;
 564         }
 565
 566         return result;
 567 }
 568
 569 /**
 570  * ice_clean_rx_irq_zc - consumes packets from the hardware ring
 571  * @rx_ring: AF_XDP Rx ring
 572  * @budget: NAPI budget
 573  *
 574  * Returns number of processed packets on success, remaining budget on failure.
 575  */
 576 int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
 577 {
 578         unsigned int total_rx_bytes = 0, total_rx_packets = 0;
 579         struct ice_tx_ring *xdp_ring;
 580         unsigned int xdp_xmit = 0;
 581         struct bpf_prog *xdp_prog;
 582         bool failure = false;
 583
 584         /* ZC patch is enabled only when XDP program is set,
 585          * so here it can not be NULL
 586          */
 587         xdp_prog = READ_ONCE(rx_ring->xdp_prog);
 588         xdp_ring = rx_ring->xdp_ring;
 589
 590         while (likely(total_rx_packets < (unsigned int)budget)) {
 591                 union ice_32b_rx_flex_desc *rx_desc;
 592                 unsigned int size, xdp_res = 0;
 593                 struct xdp_buff *xdp;
 594                 struct sk_buff *skb;
 595                 u16 stat_err_bits;
 596                 u16 vlan_tag = 0;
 597                 u16 rx_ptype;
 598
 599                 rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean);
 600
 601                 stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S);
 602                 if (!ice_test_staterr(rx_desc->wb.status_error0, stat_err_bits))
 603                         break;
 604
 605                 /* This memory barrier is needed to keep us from reading
 606                  * any other fields out of the rx_desc until we have
 607                  * verified the descriptor has been written back.
 608                  */
 609                 dma_rmb();
 610
 611                 xdp = *ice_xdp_buf(rx_ring, rx_ring->next_to_clean);
 612
 613                 size = le16_to_cpu(rx_desc->wb.pkt_len) &
 614                                    ICE_RX_FLX_DESC_PKT_LEN_M;
 615                 if (!size) {
 616                         xdp->data = NULL;
 617                         xdp->data_end = NULL;
 618                         xdp->data_hard_start = NULL;
 619                         xdp->data_meta = NULL;
 620                         goto construct_skb;
 621                 }
 622
 623                 xsk_buff_set_size(xdp, size);
 624                 xsk_buff_dma_sync_for_cpu(xdp, rx_ring->xsk_pool);
 625
 626                 xdp_res = ice_run_xdp_zc(rx_ring, xdp, xdp_prog, xdp_ring);
 627                 if (xdp_res) {
 628                         if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))
 629                                 xdp_xmit |= xdp_res;
 630                         else
 631                                 xsk_buff_free(xdp);
 632
 633                         total_rx_bytes += size;
 634                         total_rx_packets++;
 635
 636                         ice_bump_ntc(rx_ring);
 637                         continue;
 638                 }
 639 construct_skb:
 640                 /* XDP_PASS path */
 641                 skb = ice_construct_skb_zc(rx_ring, xdp);
 642                 if (!skb) {
 643                         rx_ring->rx_stats.alloc_buf_failed++;
 644                         break;
 645                 }
 646
 647                 ice_bump_ntc(rx_ring);
 648
 649                 if (eth_skb_pad(skb)) {
 650                         skb = NULL;
 651                         continue;
 652                 }
 653
 654                 total_rx_bytes += skb->len;
 655                 total_rx_packets++;
 656
 657                 vlan_tag = ice_get_vlan_tag_from_rx_desc(rx_desc);
 658
 659                 rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) &
 660                                        ICE_RX_FLEX_DESC_PTYPE_M;
 661
 662                 ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
 663                 ice_receive_skb(rx_ring, skb, vlan_tag);
 664         }
 665
 666         failure = !ice_alloc_rx_bufs_zc(rx_ring, ICE_DESC_UNUSED(rx_ring));
 667
 668         ice_finalize_xdp_rx(xdp_ring, xdp_xmit);
 669         ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes);
 670
 671         if (xsk_uses_need_wakeup(rx_ring->xsk_pool)) {
 672                 if (failure || rx_ring->next_to_clean == rx_ring->next_to_use)
 673                         xsk_set_rx_need_wakeup(rx_ring->xsk_pool);
 674                 else
 675                         xsk_clear_rx_need_wakeup(rx_ring->xsk_pool);
 676
 677                 return (int)total_rx_packets;
 678         }
 679
 680         return failure ? budget : (int)total_rx_packets;
 681 }
 682
 683 /**
 684  * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer
 685  * @xdp_ring: XDP Tx ring
 686  * @tx_buf: Tx buffer to clean
 687  */
 688 static void
 689 ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf)
 690 {
 691         xdp_return_frame((struct xdp_frame *)tx_buf->raw_buf);
 692         xdp_ring->xdp_tx_active--;
 693         dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma),
 694                          dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
 695         dma_unmap_len_set(tx_buf, len, 0);
 696 }
 697
 698 /**
 699  * ice_clean_xdp_irq_zc - Reclaim resources after transmit completes on XDP ring
 700  * @xdp_ring: XDP ring to clean
 701  * @napi_budget: amount of descriptors that NAPI allows us to clean
 702  *
 703  * Returns count of cleaned descriptors
 704  */
 705 static u16 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring, int napi_budget)
 706 {
 707         u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
 708         int budget = napi_budget / tx_thresh;
 709         u16 next_dd = xdp_ring->next_dd;
 710         u16 ntc, cleared_dds = 0;
 711
 712         do {
 713                 struct ice_tx_desc *next_dd_desc;
 714                 u16 desc_cnt = xdp_ring->count;
 715                 struct ice_tx_buf *tx_buf;
 716                 u32 xsk_frames;
 717                 u16 i;
 718
 719                 next_dd_desc = ICE_TX_DESC(xdp_ring, next_dd);
 720                 if (!(next_dd_desc->cmd_type_offset_bsz &
 721                     cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
 722                         break;
 723
 724                 cleared_dds++;
 725                 xsk_frames = 0;
 726                 if (likely(!xdp_ring->xdp_tx_active)) {
 727                         xsk_frames = tx_thresh;
 728                         goto skip;
 729                 }
 730
 731                 ntc = xdp_ring->next_to_clean;
 732
 733                 for (i = 0; i < tx_thresh; i++) {
 734                         tx_buf = &xdp_ring->tx_buf[ntc];
 735
 736                         if (tx_buf->raw_buf) {
 737                                 ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
 738                                 tx_buf->raw_buf = NULL;
 739                         } else {
 740                                 xsk_frames++;
 741                         }
 742
 743                         ntc++;
 744                         if (ntc >= xdp_ring->count)
 745                                 ntc = 0;
 746                 }
 747 skip:
 748                 xdp_ring->next_to_clean += tx_thresh;
 749                 if (xdp_ring->next_to_clean >= desc_cnt)
 750                         xdp_ring->next_to_clean -= desc_cnt;
 751                 if (xsk_frames)
 752                         xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
 753                 next_dd_desc->cmd_type_offset_bsz = 0;
 754                 next_dd = next_dd + tx_thresh;
 755                 if (next_dd >= desc_cnt)
 756                         next_dd = tx_thresh - 1;
 757         } while (--budget);
 758
 759         xdp_ring->next_dd = next_dd;
 760
 761         return cleared_dds * tx_thresh;
 762 }
 763
 764 /**
 765  * ice_xmit_pkt - produce a single HW Tx descriptor out of AF_XDP descriptor
 766  * @xdp_ring: XDP ring to produce the HW Tx descriptor on
 767  * @desc: AF_XDP descriptor to pull the DMA address and length from
 768  * @total_bytes: bytes accumulator that will be used for stats update
 769  */
 770 static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, struct xdp_desc *desc,
 771                          unsigned int *total_bytes)
 772 {
 773         struct ice_tx_desc *tx_desc;
 774         dma_addr_t dma;
 775
 776         dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc->addr);
 777         xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, desc->len);
 778
 779         tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use++);
 780         tx_desc->buf_addr = cpu_to_le64(dma);
 781         tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP,
 782                                                       0, desc->len, 0);
 783
 784         *total_bytes += desc->len;
 785 }
 786
 787 /**
 788  * ice_xmit_pkt_batch - produce a batch of HW Tx descriptors out of AF_XDP descriptors
 789  * @xdp_ring: XDP ring to produce the HW Tx descriptors on
 790  * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from
 791  * @total_bytes: bytes accumulator that will be used for stats update
 792  */
 793 static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs,
 794                                unsigned int *total_bytes)
 795 {
 796         u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
 797         u16 ntu = xdp_ring->next_to_use;
 798         struct ice_tx_desc *tx_desc;
 799         u32 i;
 800
 801         loop_unrolled_for(i = 0; i < PKTS_PER_BATCH; i++) {
 802                 dma_addr_t dma;
 803
 804                 dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, descs[i].addr);
 805                 xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, descs[i].len);
 806
 807                 tx_desc = ICE_TX_DESC(xdp_ring, ntu++);
 808                 tx_desc->buf_addr = cpu_to_le64(dma);
 809                 tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP,
 810                                                               0, descs[i].len, 0);
 811
 812                 *total_bytes += descs[i].len;
 813         }
 814
 815         xdp_ring->next_to_use = ntu;
 816
 817         if (xdp_ring->next_to_use > xdp_ring->next_rs) {
 818                 tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
 819                 tx_desc->cmd_type_offset_bsz |=
 820                         cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
 821                 xdp_ring->next_rs += tx_thresh;
 822         }
 823 }
 824
 825 /**
 826  * ice_fill_tx_hw_ring - produce the number of Tx descriptors onto ring
 827  * @xdp_ring: XDP ring to produce the HW Tx descriptors on
 828  * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from
 829  * @nb_pkts: count of packets to be send
 830  * @total_bytes: bytes accumulator that will be used for stats update
 831  */
 832 static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs,
 833                                 u32 nb_pkts, unsigned int *total_bytes)
 834 {
 835         u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
 836         u32 batched, leftover, i;
 837
 838         batched = ALIGN_DOWN(nb_pkts, PKTS_PER_BATCH);
 839         leftover = nb_pkts & (PKTS_PER_BATCH - 1);
 840         for (i = 0; i < batched; i += PKTS_PER_BATCH)
 841                 ice_xmit_pkt_batch(xdp_ring, &descs[i], total_bytes);
 842         for (; i < batched + leftover; i++)
 843                 ice_xmit_pkt(xdp_ring, &descs[i], total_bytes);
 844
 845         if (xdp_ring->next_to_use > xdp_ring->next_rs) {
 846                 struct ice_tx_desc *tx_desc;
 847
 848                 tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
 849                 tx_desc->cmd_type_offset_bsz |=
 850                         cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
 851                 xdp_ring->next_rs += tx_thresh;
 852         }
 853 }
 854
 855 /**
 856  * ice_xmit_zc - take entries from XSK Tx ring and place them onto HW Tx ring
 857  * @xdp_ring: XDP ring to produce the HW Tx descriptors on
 858  * @budget: number of free descriptors on HW Tx ring that can be used
 859  * @napi_budget: amount of descriptors that NAPI allows us to clean
 860  *
 861  * Returns true if there is no more work that needs to be done, false otherwise
 862  */
 863 bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, u32 budget, int napi_budget)
 864 {
 865         struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs;
 866         u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
 867         u32 nb_pkts, nb_processed = 0;
 868         unsigned int total_bytes = 0;
 869
 870         if (budget < tx_thresh)
 871                 budget += ice_clean_xdp_irq_zc(xdp_ring, napi_budget);
 872
 873         nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget);
 874         if (!nb_pkts)
 875                 return true;
 876
 877         if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) {
 878                 struct ice_tx_desc *tx_desc;
 879
 880                 nb_processed = xdp_ring->count - xdp_ring->next_to_use;
 881                 ice_fill_tx_hw_ring(xdp_ring, descs, nb_processed, &total_bytes);
 882                 tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
 883                 tx_desc->cmd_type_offset_bsz |=
 884                         cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
 885                 xdp_ring->next_rs = tx_thresh - 1;
 886                 xdp_ring->next_to_use = 0;
 887         }
 888
 889         ice_fill_tx_hw_ring(xdp_ring, &descs[nb_processed], nb_pkts - nb_processed,
 890                             &total_bytes);
 891
 892         ice_xdp_ring_update_tail(xdp_ring);
 893         ice_update_tx_ring_stats(xdp_ring, nb_pkts, total_bytes);
 894
 895         if (xsk_uses_need_wakeup(xdp_ring->xsk_pool))
 896                 xsk_set_tx_need_wakeup(xdp_ring->xsk_pool);
 897
 898         return nb_pkts < budget;
 899 }
 900
 901 /**
 902  * ice_xsk_wakeup - Implements ndo_xsk_wakeup
 903  * @netdev: net_device
 904  * @queue_id: queue to wake up
 905  * @flags: ignored in our case, since we have Rx and Tx in the same NAPI
 906  *
 907  * Returns negative on error, zero otherwise.
 908  */
 909 int
 910 ice_xsk_wakeup(struct net_device *netdev, u32 queue_id,
 911                u32 __always_unused flags)
 912 {
 913         struct ice_netdev_priv *np = netdev_priv(netdev);
 914         struct ice_q_vector *q_vector;
 915         struct ice_vsi *vsi = np->vsi;
 916         struct ice_tx_ring *ring;
 917
 918         if (test_bit(ICE_DOWN, vsi->state))
 919                 return -ENETDOWN;
 920
 921         if (!ice_is_xdp_ena_vsi(vsi))
 922                 return -ENXIO;
 923
 924         if (queue_id >= vsi->num_txq)
 925                 return -ENXIO;
 926
 927         if (!vsi->xdp_rings[queue_id]->xsk_pool)
 928                 return -ENXIO;
 929
 930         ring = vsi->xdp_rings[queue_id];
 931
 932         /* The idea here is that if NAPI is running, mark a miss, so
 933          * it will run again. If not, trigger an interrupt and
 934          * schedule the NAPI from interrupt context. If NAPI would be
 935          * scheduled here, the interrupt affinity would not be
 936          * honored.
 937          */
 938         q_vector = ring->q_vector;
 939         if (!napi_if_scheduled_mark_missed(&q_vector->napi))
 940                 ice_trigger_sw_intr(&vsi->back->hw, q_vector);
 941
 942         return 0;
 943 }
 944
 945 /**
 946  * ice_xsk_any_rx_ring_ena - Checks if Rx rings have AF_XDP buff pool attached
 947  * @vsi: VSI to be checked
 948  *
 949  * Returns true if any of the Rx rings has an AF_XDP buff pool attached
 950  */
 951 bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi)
 952 {
 953         int i;
 954
 955         ice_for_each_rxq(vsi, i) {
 956                 if (xsk_get_pool_from_qid(vsi->netdev, i))
 957                         return true;
 958         }
 959
 960         return false;
 961 }
 962
 963 /**
 964  * ice_xsk_clean_rx_ring - clean buffer pool queues connected to a given Rx ring
 965  * @rx_ring: ring to be cleaned
 966  */
 967 void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring)
 968 {
 969         u16 count_mask = rx_ring->count - 1;
 970         u16 ntc = rx_ring->next_to_clean;
 971         u16 ntu = rx_ring->next_to_use;
 972
 973         for ( ; ntc != ntu; ntc = (ntc + 1) & count_mask) {
 974                 struct xdp_buff *xdp = *ice_xdp_buf(rx_ring, ntc);
 975
 976                 xsk_buff_free(xdp);
 977         }
 978 }
 979
 980 /**
 981  * ice_xsk_clean_xdp_ring - Clean the XDP Tx ring and its buffer pool queues
 982  * @xdp_ring: XDP_Tx ring
 983  */
 984 void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring)
 985 {
 986         u16 ntc = xdp_ring->next_to_clean, ntu = xdp_ring->next_to_use;
 987         u32 xsk_frames = 0;
 988
 989         while (ntc != ntu) {
 990                 struct ice_tx_buf *tx_buf = &xdp_ring->tx_buf[ntc];
 991
 992                 if (tx_buf->raw_buf)
 993                         ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
 994                 else
 995                         xsk_frames++;
 996
 997                 tx_buf->raw_buf = NULL;
 998
 999                 ntc++;
1000                 if (ntc >= xdp_ring->count)
1001                         ntc = 0;
1002         }
1003
1004         if (xsk_frames)
1005                 xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
1006 }