Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
[linux-2.6-microblaze.git] / drivers / net / ethernet / stmicro / stmmac / stmmac_main.c
index c6f24ab..a9a984c 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/if_vlan.h>
 #include <linux/dma-mapping.h>
 #include <linux/slab.h>
+#include <linux/pm_runtime.h>
 #include <linux/prefetch.h>
 #include <linux/pinctrl/consumer.h>
 #ifdef CONFIG_DEBUG_FS
 #include <linux/net_tstamp.h>
 #include <linux/phylink.h>
 #include <linux/udp.h>
+#include <linux/bpf_trace.h>
 #include <net/pkt_cls.h>
+#include <net/xdp_sock_drv.h>
 #include "stmmac_ptp.h"
 #include "stmmac.h"
+#include "stmmac_xdp.h"
 #include <linux/reset.h>
 #include <linux/of_mdio.h>
 #include "dwmac1000.h"
@@ -66,6 +70,16 @@ MODULE_PARM_DESC(phyaddr, "Physical device address");
 #define STMMAC_TX_THRESH(x)    ((x)->dma_tx_size / 4)
 #define STMMAC_RX_THRESH(x)    ((x)->dma_rx_size / 4)
 
+/* Limit to make sure XDP TX and slow path can coexist */
+#define STMMAC_XSK_TX_BUDGET_MAX       256
+#define STMMAC_TX_XSK_AVAIL            16
+#define STMMAC_RX_FILL_BATCH           16
+
+#define STMMAC_XDP_PASS                0
+#define STMMAC_XDP_CONSUMED    BIT(0)
+#define STMMAC_XDP_TX          BIT(1)
+#define STMMAC_XDP_REDIRECT    BIT(2)
+
 static int flow_ctrl = FLOW_AUTO;
 module_param(flow_ctrl, int, 0644);
 MODULE_PARM_DESC(flow_ctrl, "Flow control ability [on/off]");
@@ -104,6 +118,13 @@ module_param(chain_mode, int, 0444);
 MODULE_PARM_DESC(chain_mode, "To use chain instead of ring mode");
 
 static irqreturn_t stmmac_interrupt(int irq, void *dev_id);
+/* For MSI interrupts handling */
+static irqreturn_t stmmac_mac_interrupt(int irq, void *dev_id);
+static irqreturn_t stmmac_safety_interrupt(int irq, void *dev_id);
+static irqreturn_t stmmac_msi_intr_tx(int irq, void *data);
+static irqreturn_t stmmac_msi_intr_rx(int irq, void *data);
+static void stmmac_tx_timer_arm(struct stmmac_priv *priv, u32 queue);
+static void stmmac_flush_tx_descriptors(struct stmmac_priv *priv, int queue);
 
 #ifdef CONFIG_DEBUG_FS
 static const struct net_device_ops stmmac_netdev_ops;
@@ -113,6 +134,38 @@ static void stmmac_exit_fs(struct net_device *dev);
 
 #define STMMAC_COAL_TIMER(x) (ns_to_ktime((x) * NSEC_PER_USEC))
 
+int stmmac_bus_clks_config(struct stmmac_priv *priv, bool enabled)
+{
+       int ret = 0;
+
+       if (enabled) {
+               ret = clk_prepare_enable(priv->plat->stmmac_clk);
+               if (ret)
+                       return ret;
+               ret = clk_prepare_enable(priv->plat->pclk);
+               if (ret) {
+                       clk_disable_unprepare(priv->plat->stmmac_clk);
+                       return ret;
+               }
+               if (priv->plat->clks_config) {
+                       ret = priv->plat->clks_config(priv->plat->bsp_priv, enabled);
+                       if (ret) {
+                               clk_disable_unprepare(priv->plat->stmmac_clk);
+                               clk_disable_unprepare(priv->plat->pclk);
+                               return ret;
+                       }
+               }
+       } else {
+               clk_disable_unprepare(priv->plat->stmmac_clk);
+               clk_disable_unprepare(priv->plat->pclk);
+               if (priv->plat->clks_config)
+                       priv->plat->clks_config(priv->plat->bsp_priv, enabled);
+       }
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(stmmac_bus_clks_config);
+
 /**
  * stmmac_verify_args - verify the driver parameters.
  * Description: it checks the driver parameters and set a default in case of
@@ -134,11 +187,7 @@ static void stmmac_verify_args(void)
                eee_timer = STMMAC_DEFAULT_LPI_TIMER;
 }
 
-/**
- * stmmac_disable_all_queues - Disable all queues
- * @priv: driver private structure
- */
-static void stmmac_disable_all_queues(struct stmmac_priv *priv)
+static void __stmmac_disable_all_queues(struct stmmac_priv *priv)
 {
        u32 rx_queues_cnt = priv->plat->rx_queues_to_use;
        u32 tx_queues_cnt = priv->plat->tx_queues_to_use;
@@ -148,6 +197,12 @@ static void stmmac_disable_all_queues(struct stmmac_priv *priv)
        for (queue = 0; queue < maxq; queue++) {
                struct stmmac_channel *ch = &priv->channel[queue];
 
+               if (stmmac_xdp_is_enabled(priv) &&
+                   test_bit(queue, priv->af_xdp_zc_qps)) {
+                       napi_disable(&ch->rxtx_napi);
+                       continue;
+               }
+
                if (queue < rx_queues_cnt)
                        napi_disable(&ch->rx_napi);
                if (queue < tx_queues_cnt)
@@ -155,6 +210,28 @@ static void stmmac_disable_all_queues(struct stmmac_priv *priv)
        }
 }
 
+/**
+ * stmmac_disable_all_queues - Disable all queues
+ * @priv: driver private structure
+ */
+static void stmmac_disable_all_queues(struct stmmac_priv *priv)
+{
+       u32 rx_queues_cnt = priv->plat->rx_queues_to_use;
+       struct stmmac_rx_queue *rx_q;
+       u32 queue;
+
+       /* synchronize_rcu() needed for pending XDP buffers to drain */
+       for (queue = 0; queue < rx_queues_cnt; queue++) {
+               rx_q = &priv->rx_queue[queue];
+               if (rx_q->xsk_pool) {
+                       synchronize_rcu();
+                       break;
+               }
+       }
+
+       __stmmac_disable_all_queues(priv);
+}
+
 /**
  * stmmac_enable_all_queues - Enable all queues
  * @priv: driver private structure
@@ -169,6 +246,12 @@ static void stmmac_enable_all_queues(struct stmmac_priv *priv)
        for (queue = 0; queue < maxq; queue++) {
                struct stmmac_channel *ch = &priv->channel[queue];
 
+               if (stmmac_xdp_is_enabled(priv) &&
+                   test_bit(queue, priv->af_xdp_zc_qps)) {
+                       napi_enable(&ch->rxtx_napi);
+                       continue;
+               }
+
                if (queue < rx_queues_cnt)
                        napi_enable(&ch->rx_napi);
                if (queue < tx_queues_cnt)
@@ -433,6 +516,7 @@ static void stmmac_get_tx_hwtstamp(struct stmmac_priv *priv,
 {
        struct skb_shared_hwtstamps shhwtstamp;
        bool found = false;
+       s64 adjust = 0;
        u64 ns = 0;
 
        if (!priv->hwts_tx_en)
@@ -451,6 +535,13 @@ static void stmmac_get_tx_hwtstamp(struct stmmac_priv *priv,
        }
 
        if (found) {
+               /* Correct the clk domain crossing(CDC) error */
+               if (priv->plat->has_gmac4 && priv->plat->clk_ptp_rate) {
+                       adjust += -(2 * (NSEC_PER_SEC /
+                                        priv->plat->clk_ptp_rate));
+                       ns += adjust;
+               }
+
                memset(&shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps));
                shhwtstamp.hwtstamp = ns_to_ktime(ns);
 
@@ -474,6 +565,7 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p,
 {
        struct skb_shared_hwtstamps *shhwtstamp = NULL;
        struct dma_desc *desc = p;
+       u64 adjust = 0;
        u64 ns = 0;
 
        if (!priv->hwts_rx_en)
@@ -485,6 +577,13 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p,
        /* Check if timestamp is available */
        if (stmmac_get_rx_timestamp_status(priv, p, np, priv->adv_ts)) {
                stmmac_get_timestamp(priv, desc, priv->adv_ts, &ns);
+
+               /* Correct the clk domain crossing(CDC) error */
+               if (priv->plat->has_gmac4 && priv->plat->clk_ptp_rate) {
+                       adjust += 2 * (NSEC_PER_SEC / priv->plat->clk_ptp_rate);
+                       ns -= adjust;
+               }
+
                netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns);
                shhwtstamp = skb_hwtstamps(skb);
                memset(shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps));
@@ -922,6 +1021,21 @@ static void stmmac_mac_an_restart(struct phylink_config *config)
        /* Not Supported */
 }
 
+static void stmmac_fpe_link_state_handle(struct stmmac_priv *priv, bool is_up)
+{
+       struct stmmac_fpe_cfg *fpe_cfg = priv->plat->fpe_cfg;
+       enum stmmac_fpe_state *lo_state = &fpe_cfg->lo_fpe_state;
+       enum stmmac_fpe_state *lp_state = &fpe_cfg->lp_fpe_state;
+       bool *hs_enable = &fpe_cfg->hs_enable;
+
+       if (is_up && *hs_enable) {
+               stmmac_fpe_send_mpacket(priv, priv->ioaddr, MPACKET_VERIFY);
+       } else {
+               *lo_state = FPE_EVENT_UNKNOWN;
+               *lp_state = FPE_EVENT_UNKNOWN;
+       }
+}
+
 static void stmmac_mac_link_down(struct phylink_config *config,
                                 unsigned int mode, phy_interface_t interface)
 {
@@ -932,6 +1046,9 @@ static void stmmac_mac_link_down(struct phylink_config *config,
        priv->tx_lpi_enabled = false;
        stmmac_eee_init(priv);
        stmmac_set_eee_pls(priv, priv->hw, false);
+
+       if (priv->dma_cap.fpesel)
+               stmmac_fpe_link_state_handle(priv, false);
 }
 
 static void stmmac_mac_link_up(struct phylink_config *config,
@@ -1030,6 +1147,9 @@ static void stmmac_mac_link_up(struct phylink_config *config,
                priv->tx_lpi_enabled = priv->eee_enabled;
                stmmac_set_eee_pls(priv, priv->hw, true);
        }
+
+       if (priv->dma_cap.fpesel)
+               stmmac_fpe_link_state_handle(priv, true);
 }
 
 static const struct phylink_mac_ops stmmac_phylink_mac_ops = {
@@ -1117,6 +1237,8 @@ static int stmmac_phy_setup(struct stmmac_priv *priv)
        priv->phylink_config.dev = &priv->dev->dev;
        priv->phylink_config.type = PHYLINK_NETDEV;
        priv->phylink_config.pcs_poll = true;
+       priv->phylink_config.ovr_an_inband =
+               priv->plat->mdio_bus_data->xpcs_an_inband;
 
        if (!fwnode)
                fwnode = dev_fwnode(priv->device);
@@ -1304,11 +1426,14 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p,
        struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
        struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i];
 
-       buf->page = page_pool_dev_alloc_pages(rx_q->page_pool);
-       if (!buf->page)
-               return -ENOMEM;
+       if (!buf->page) {
+               buf->page = page_pool_dev_alloc_pages(rx_q->page_pool);
+               if (!buf->page)
+                       return -ENOMEM;
+               buf->page_offset = stmmac_rx_offset(priv);
+       }
 
-       if (priv->sph) {
+       if (priv->sph && !buf->sec_page) {
                buf->sec_page = page_pool_dev_alloc_pages(rx_q->page_pool);
                if (!buf->sec_page)
                        return -ENOMEM;
@@ -1320,7 +1445,8 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p,
                stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, false);
        }
 
-       buf->addr = page_pool_get_dma_addr(buf->page);
+       buf->addr = page_pool_get_dma_addr(buf->page) + buf->page_offset;
+
        stmmac_set_desc_addr(priv, p, buf->addr);
        if (priv->dma_buf_sz == BUF_SIZE_16KiB)
                stmmac_init_desc3(priv, p);
@@ -1358,7 +1484,8 @@ static void stmmac_free_tx_buffer(struct stmmac_priv *priv, u32 queue, int i)
 {
        struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
 
-       if (tx_q->tx_skbuff_dma[i].buf) {
+       if (tx_q->tx_skbuff_dma[i].buf &&
+           tx_q->tx_skbuff_dma[i].buf_type != STMMAC_TXBUF_T_XDP_TX) {
                if (tx_q->tx_skbuff_dma[i].map_as_page)
                        dma_unmap_page(priv->device,
                                       tx_q->tx_skbuff_dma[i].buf,
@@ -1371,84 +1498,227 @@ static void stmmac_free_tx_buffer(struct stmmac_priv *priv, u32 queue, int i)
                                         DMA_TO_DEVICE);
        }
 
-       if (tx_q->tx_skbuff[i]) {
+       if (tx_q->xdpf[i] &&
+           (tx_q->tx_skbuff_dma[i].buf_type == STMMAC_TXBUF_T_XDP_TX ||
+            tx_q->tx_skbuff_dma[i].buf_type == STMMAC_TXBUF_T_XDP_NDO)) {
+               xdp_return_frame(tx_q->xdpf[i]);
+               tx_q->xdpf[i] = NULL;
+       }
+
+       if (tx_q->tx_skbuff_dma[i].buf_type == STMMAC_TXBUF_T_XSK_TX)
+               tx_q->xsk_frames_done++;
+
+       if (tx_q->tx_skbuff[i] &&
+           tx_q->tx_skbuff_dma[i].buf_type == STMMAC_TXBUF_T_SKB) {
                dev_kfree_skb_any(tx_q->tx_skbuff[i]);
                tx_q->tx_skbuff[i] = NULL;
-               tx_q->tx_skbuff_dma[i].buf = 0;
-               tx_q->tx_skbuff_dma[i].map_as_page = false;
        }
+
+       tx_q->tx_skbuff_dma[i].buf = 0;
+       tx_q->tx_skbuff_dma[i].map_as_page = false;
 }
 
 /**
- * init_dma_rx_desc_rings - init the RX descriptor rings
- * @dev: net device structure
+ * dma_free_rx_skbufs - free RX dma buffers
+ * @priv: private structure
+ * @queue: RX queue index
+ */
+static void dma_free_rx_skbufs(struct stmmac_priv *priv, u32 queue)
+{
+       int i;
+
+       for (i = 0; i < priv->dma_rx_size; i++)
+               stmmac_free_rx_buffer(priv, queue, i);
+}
+
+static int stmmac_alloc_rx_buffers(struct stmmac_priv *priv, u32 queue,
+                                  gfp_t flags)
+{
+       struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+       int i;
+
+       for (i = 0; i < priv->dma_rx_size; i++) {
+               struct dma_desc *p;
+               int ret;
+
+               if (priv->extend_desc)
+                       p = &((rx_q->dma_erx + i)->basic);
+               else
+                       p = rx_q->dma_rx + i;
+
+               ret = stmmac_init_rx_buffers(priv, p, i, flags,
+                                            queue);
+               if (ret)
+                       return ret;
+
+               rx_q->buf_alloc_num++;
+       }
+
+       return 0;
+}
+
+/**
+ * dma_free_rx_xskbufs - free RX dma buffers from XSK pool
+ * @priv: private structure
+ * @queue: RX queue index
+ */
+static void dma_free_rx_xskbufs(struct stmmac_priv *priv, u32 queue)
+{
+       struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+       int i;
+
+       for (i = 0; i < priv->dma_rx_size; i++) {
+               struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i];
+
+               if (!buf->xdp)
+                       continue;
+
+               xsk_buff_free(buf->xdp);
+               buf->xdp = NULL;
+       }
+}
+
+static int stmmac_alloc_rx_buffers_zc(struct stmmac_priv *priv, u32 queue)
+{
+       struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+       int i;
+
+       for (i = 0; i < priv->dma_rx_size; i++) {
+               struct stmmac_rx_buffer *buf;
+               dma_addr_t dma_addr;
+               struct dma_desc *p;
+
+               if (priv->extend_desc)
+                       p = (struct dma_desc *)(rx_q->dma_erx + i);
+               else
+                       p = rx_q->dma_rx + i;
+
+               buf = &rx_q->buf_pool[i];
+
+               buf->xdp = xsk_buff_alloc(rx_q->xsk_pool);
+               if (!buf->xdp)
+                       return -ENOMEM;
+
+               dma_addr = xsk_buff_xdp_get_dma(buf->xdp);
+               stmmac_set_desc_addr(priv, p, dma_addr);
+               rx_q->buf_alloc_num++;
+       }
+
+       return 0;
+}
+
+static struct xsk_buff_pool *stmmac_get_xsk_pool(struct stmmac_priv *priv, u32 queue)
+{
+       if (!stmmac_xdp_is_enabled(priv) || !test_bit(queue, priv->af_xdp_zc_qps))
+               return NULL;
+
+       return xsk_get_pool_from_qid(priv->dev, queue);
+}
+
+/**
+ * __init_dma_rx_desc_rings - init the RX descriptor ring (per queue)
+ * @priv: driver private structure
+ * @queue: RX queue index
  * @flags: gfp flag.
  * Description: this function initializes the DMA RX descriptors
  * and allocates the socket buffers. It supports the chained and ring
  * modes.
  */
-static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags)
+static int __init_dma_rx_desc_rings(struct stmmac_priv *priv, u32 queue, gfp_t flags)
 {
-       struct stmmac_priv *priv = netdev_priv(dev);
-       u32 rx_count = priv->plat->rx_queues_to_use;
-       int ret = -ENOMEM;
-       int queue;
-       int i;
+       struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+       int ret;
 
-       /* RX INITIALIZATION */
        netif_dbg(priv, probe, priv->dev,
-                 "SKB addresses:\nskb\t\tskb data\tdma data\n");
+                 "(%s) dma_rx_phy=0x%08x\n", __func__,
+                 (u32)rx_q->dma_rx_phy);
 
-       for (queue = 0; queue < rx_count; queue++) {
-               struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+       stmmac_clear_rx_descriptors(priv, queue);
 
-               netif_dbg(priv, probe, priv->dev,
-                         "(%s) dma_rx_phy=0x%08x\n", __func__,
-                         (u32)rx_q->dma_rx_phy);
+       xdp_rxq_info_unreg_mem_model(&rx_q->xdp_rxq);
 
-               stmmac_clear_rx_descriptors(priv, queue);
+       rx_q->xsk_pool = stmmac_get_xsk_pool(priv, queue);
 
-               for (i = 0; i < priv->dma_rx_size; i++) {
-                       struct dma_desc *p;
+       if (rx_q->xsk_pool) {
+               WARN_ON(xdp_rxq_info_reg_mem_model(&rx_q->xdp_rxq,
+                                                  MEM_TYPE_XSK_BUFF_POOL,
+                                                  NULL));
+               netdev_info(priv->dev,
+                           "Register MEM_TYPE_XSK_BUFF_POOL RxQ-%d\n",
+                           rx_q->queue_index);
+               xsk_pool_set_rxq_info(rx_q->xsk_pool, &rx_q->xdp_rxq);
+       } else {
+               WARN_ON(xdp_rxq_info_reg_mem_model(&rx_q->xdp_rxq,
+                                                  MEM_TYPE_PAGE_POOL,
+                                                  rx_q->page_pool));
+               netdev_info(priv->dev,
+                           "Register MEM_TYPE_PAGE_POOL RxQ-%d\n",
+                           rx_q->queue_index);
+       }
 
-                       if (priv->extend_desc)
-                               p = &((rx_q->dma_erx + i)->basic);
-                       else
-                               p = rx_q->dma_rx + i;
+       if (rx_q->xsk_pool) {
+               /* RX XDP ZC buffer pool may not be populated, e.g.
+                * xdpsock TX-only.
+                */
+               stmmac_alloc_rx_buffers_zc(priv, queue);
+       } else {
+               ret = stmmac_alloc_rx_buffers(priv, queue, flags);
+               if (ret < 0)
+                       return -ENOMEM;
+       }
 
-                       ret = stmmac_init_rx_buffers(priv, p, i, flags,
-                                                    queue);
-                       if (ret)
-                               goto err_init_rx_buffers;
-               }
+       rx_q->cur_rx = 0;
+       rx_q->dirty_rx = 0;
 
-               rx_q->cur_rx = 0;
-               rx_q->dirty_rx = (unsigned int)(i - priv->dma_rx_size);
-
-               /* Setup the chained descriptor addresses */
-               if (priv->mode == STMMAC_CHAIN_MODE) {
-                       if (priv->extend_desc)
-                               stmmac_mode_init(priv, rx_q->dma_erx,
-                                                rx_q->dma_rx_phy,
-                                                priv->dma_rx_size, 1);
-                       else
-                               stmmac_mode_init(priv, rx_q->dma_rx,
-                                                rx_q->dma_rx_phy,
-                                                priv->dma_rx_size, 0);
-               }
+       /* Setup the chained descriptor addresses */
+       if (priv->mode == STMMAC_CHAIN_MODE) {
+               if (priv->extend_desc)
+                       stmmac_mode_init(priv, rx_q->dma_erx,
+                                        rx_q->dma_rx_phy,
+                                        priv->dma_rx_size, 1);
+               else
+                       stmmac_mode_init(priv, rx_q->dma_rx,
+                                        rx_q->dma_rx_phy,
+                                        priv->dma_rx_size, 0);
+       }
+
+       return 0;
+}
+
+static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags)
+{
+       struct stmmac_priv *priv = netdev_priv(dev);
+       u32 rx_count = priv->plat->rx_queues_to_use;
+       u32 queue;
+       int ret;
+
+       /* RX INITIALIZATION */
+       netif_dbg(priv, probe, priv->dev,
+                 "SKB addresses:\nskb\t\tskb data\tdma data\n");
+
+       for (queue = 0; queue < rx_count; queue++) {
+               ret = __init_dma_rx_desc_rings(priv, queue, flags);
+               if (ret)
+                       goto err_init_rx_buffers;
        }
 
        return 0;
 
 err_init_rx_buffers:
        while (queue >= 0) {
-               while (--i >= 0)
-                       stmmac_free_rx_buffer(priv, queue, i);
+               struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+
+               if (rx_q->xsk_pool)
+                       dma_free_rx_xskbufs(priv, queue);
+               else
+                       dma_free_rx_skbufs(priv, queue);
+
+               rx_q->buf_alloc_num = 0;
+               rx_q->xsk_pool = NULL;
 
                if (queue == 0)
                        break;
 
-               i = priv->dma_rx_size;
                queue--;
        }
 
@@ -1456,63 +1726,75 @@ err_init_rx_buffers:
 }
 
 /**
- * init_dma_tx_desc_rings - init the TX descriptor rings
- * @dev: net device structure.
+ * __init_dma_tx_desc_rings - init the TX descriptor ring (per queue)
+ * @priv: driver private structure
+ * @queue : TX queue index
  * Description: this function initializes the DMA TX descriptors
  * and allocates the socket buffers. It supports the chained and ring
  * modes.
  */
-static int init_dma_tx_desc_rings(struct net_device *dev)
+static int __init_dma_tx_desc_rings(struct stmmac_priv *priv, u32 queue)
 {
-       struct stmmac_priv *priv = netdev_priv(dev);
-       u32 tx_queue_cnt = priv->plat->tx_queues_to_use;
-       u32 queue;
+       struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
        int i;
 
-       for (queue = 0; queue < tx_queue_cnt; queue++) {
-               struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+       netif_dbg(priv, probe, priv->dev,
+                 "(%s) dma_tx_phy=0x%08x\n", __func__,
+                 (u32)tx_q->dma_tx_phy);
 
-               netif_dbg(priv, probe, priv->dev,
-                         "(%s) dma_tx_phy=0x%08x\n", __func__,
-                        (u32)tx_q->dma_tx_phy);
-
-               /* Setup the chained descriptor addresses */
-               if (priv->mode == STMMAC_CHAIN_MODE) {
-                       if (priv->extend_desc)
-                               stmmac_mode_init(priv, tx_q->dma_etx,
-                                                tx_q->dma_tx_phy,
-                                                priv->dma_tx_size, 1);
-                       else if (!(tx_q->tbs & STMMAC_TBS_AVAIL))
-                               stmmac_mode_init(priv, tx_q->dma_tx,
-                                                tx_q->dma_tx_phy,
-                                                priv->dma_tx_size, 0);
-               }
+       /* Setup the chained descriptor addresses */
+       if (priv->mode == STMMAC_CHAIN_MODE) {
+               if (priv->extend_desc)
+                       stmmac_mode_init(priv, tx_q->dma_etx,
+                                        tx_q->dma_tx_phy,
+                                        priv->dma_tx_size, 1);
+               else if (!(tx_q->tbs & STMMAC_TBS_AVAIL))
+                       stmmac_mode_init(priv, tx_q->dma_tx,
+                                        tx_q->dma_tx_phy,
+                                        priv->dma_tx_size, 0);
+       }
 
-               for (i = 0; i < priv->dma_tx_size; i++) {
-                       struct dma_desc *p;
-                       if (priv->extend_desc)
-                               p = &((tx_q->dma_etx + i)->basic);
-                       else if (tx_q->tbs & STMMAC_TBS_AVAIL)
-                               p = &((tx_q->dma_entx + i)->basic);
-                       else
-                               p = tx_q->dma_tx + i;
+       tx_q->xsk_pool = stmmac_get_xsk_pool(priv, queue);
 
-                       stmmac_clear_desc(priv, p);
+       for (i = 0; i < priv->dma_tx_size; i++) {
+               struct dma_desc *p;
 
-                       tx_q->tx_skbuff_dma[i].buf = 0;
-                       tx_q->tx_skbuff_dma[i].map_as_page = false;
-                       tx_q->tx_skbuff_dma[i].len = 0;
-                       tx_q->tx_skbuff_dma[i].last_segment = false;
-                       tx_q->tx_skbuff[i] = NULL;
-               }
+               if (priv->extend_desc)
+                       p = &((tx_q->dma_etx + i)->basic);
+               else if (tx_q->tbs & STMMAC_TBS_AVAIL)
+                       p = &((tx_q->dma_entx + i)->basic);
+               else
+                       p = tx_q->dma_tx + i;
 
-               tx_q->dirty_tx = 0;
-               tx_q->cur_tx = 0;
-               tx_q->mss = 0;
+               stmmac_clear_desc(priv, p);
 
-               netdev_tx_reset_queue(netdev_get_tx_queue(priv->dev, queue));
+               tx_q->tx_skbuff_dma[i].buf = 0;
+               tx_q->tx_skbuff_dma[i].map_as_page = false;
+               tx_q->tx_skbuff_dma[i].len = 0;
+               tx_q->tx_skbuff_dma[i].last_segment = false;
+               tx_q->tx_skbuff[i] = NULL;
        }
 
+       tx_q->dirty_tx = 0;
+       tx_q->cur_tx = 0;
+       tx_q->mss = 0;
+
+       netdev_tx_reset_queue(netdev_get_tx_queue(priv->dev, queue));
+
+       return 0;
+}
+
+static int init_dma_tx_desc_rings(struct net_device *dev)
+{
+       struct stmmac_priv *priv = netdev_priv(dev);
+       u32 tx_queue_cnt;
+       u32 queue;
+
+       tx_queue_cnt = priv->plat->tx_queues_to_use;
+
+       for (queue = 0; queue < tx_queue_cnt; queue++)
+               __init_dma_tx_desc_rings(priv, queue);
+
        return 0;
 }
 
@@ -1543,19 +1825,6 @@ static int init_dma_desc_rings(struct net_device *dev, gfp_t flags)
        return ret;
 }
 
-/**
- * dma_free_rx_skbufs - free RX dma buffers
- * @priv: private structure
- * @queue: RX queue index
- */
-static void dma_free_rx_skbufs(struct stmmac_priv *priv, u32 queue)
-{
-       int i;
-
-       for (i = 0; i < priv->dma_rx_size; i++)
-               stmmac_free_rx_buffer(priv, queue, i);
-}
-
 /**
  * dma_free_tx_skbufs - free TX dma buffers
  * @priv: private structure
@@ -1563,10 +1832,19 @@ static void dma_free_rx_skbufs(struct stmmac_priv *priv, u32 queue)
  */
 static void dma_free_tx_skbufs(struct stmmac_priv *priv, u32 queue)
 {
+       struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
        int i;
 
+       tx_q->xsk_frames_done = 0;
+
        for (i = 0; i < priv->dma_tx_size; i++)
                stmmac_free_tx_buffer(priv, queue, i);
+
+       if (tx_q->xsk_pool && tx_q->xsk_frames_done) {
+               xsk_tx_completed(tx_q->xsk_pool, tx_q->xsk_frames_done);
+               tx_q->xsk_frames_done = 0;
+               tx_q->xsk_pool = NULL;
+       }
 }
 
 /**
@@ -1583,137 +1861,186 @@ static void stmmac_free_tx_skbufs(struct stmmac_priv *priv)
 }
 
 /**
- * free_dma_rx_desc_resources - free RX dma desc resources
+ * __free_dma_rx_desc_resources - free RX dma desc resources (per queue)
  * @priv: private structure
+ * @queue: RX queue index
  */
-static void free_dma_rx_desc_resources(struct stmmac_priv *priv)
+static void __free_dma_rx_desc_resources(struct stmmac_priv *priv, u32 queue)
 {
-       u32 rx_count = priv->plat->rx_queues_to_use;
-       u32 queue;
+       struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
 
-       /* Free RX queue resources */
-       for (queue = 0; queue < rx_count; queue++) {
-               struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
-
-               /* Release the DMA RX socket buffers */
+       /* Release the DMA RX socket buffers */
+       if (rx_q->xsk_pool)
+               dma_free_rx_xskbufs(priv, queue);
+       else
                dma_free_rx_skbufs(priv, queue);
 
-               /* Free DMA regions of consistent memory previously allocated */
-               if (!priv->extend_desc)
-                       dma_free_coherent(priv->device, priv->dma_rx_size *
-                                         sizeof(struct dma_desc),
-                                         rx_q->dma_rx, rx_q->dma_rx_phy);
-               else
-                       dma_free_coherent(priv->device, priv->dma_rx_size *
-                                         sizeof(struct dma_extended_desc),
-                                         rx_q->dma_erx, rx_q->dma_rx_phy);
+       rx_q->buf_alloc_num = 0;
+       rx_q->xsk_pool = NULL;
 
-               kfree(rx_q->buf_pool);
-               if (rx_q->page_pool)
-                       page_pool_destroy(rx_q->page_pool);
-       }
+       /* Free DMA regions of consistent memory previously allocated */
+       if (!priv->extend_desc)
+               dma_free_coherent(priv->device, priv->dma_rx_size *
+                                 sizeof(struct dma_desc),
+                                 rx_q->dma_rx, rx_q->dma_rx_phy);
+       else
+               dma_free_coherent(priv->device, priv->dma_rx_size *
+                                 sizeof(struct dma_extended_desc),
+                                 rx_q->dma_erx, rx_q->dma_rx_phy);
+
+       if (xdp_rxq_info_is_reg(&rx_q->xdp_rxq))
+               xdp_rxq_info_unreg(&rx_q->xdp_rxq);
+
+       kfree(rx_q->buf_pool);
+       if (rx_q->page_pool)
+               page_pool_destroy(rx_q->page_pool);
+}
+
+static void free_dma_rx_desc_resources(struct stmmac_priv *priv)
+{
+       u32 rx_count = priv->plat->rx_queues_to_use;
+       u32 queue;
+
+       /* Free RX queue resources */
+       for (queue = 0; queue < rx_count; queue++)
+               __free_dma_rx_desc_resources(priv, queue);
 }
 
 /**
- * free_dma_tx_desc_resources - free TX dma desc resources
+ * __free_dma_tx_desc_resources - free TX dma desc resources (per queue)
  * @priv: private structure
+ * @queue: TX queue index
  */
-static void free_dma_tx_desc_resources(struct stmmac_priv *priv)
+static void __free_dma_tx_desc_resources(struct stmmac_priv *priv, u32 queue)
 {
-       u32 tx_count = priv->plat->tx_queues_to_use;
-       u32 queue;
+       struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+       size_t size;
+       void *addr;
 
-       /* Free TX queue resources */
-       for (queue = 0; queue < tx_count; queue++) {
-               struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
-               size_t size;
-               void *addr;
+       /* Release the DMA TX socket buffers */
+       dma_free_tx_skbufs(priv, queue);
+
+       if (priv->extend_desc) {
+               size = sizeof(struct dma_extended_desc);
+               addr = tx_q->dma_etx;
+       } else if (tx_q->tbs & STMMAC_TBS_AVAIL) {
+               size = sizeof(struct dma_edesc);
+               addr = tx_q->dma_entx;
+       } else {
+               size = sizeof(struct dma_desc);
+               addr = tx_q->dma_tx;
+       }
 
-               /* Release the DMA TX socket buffers */
-               dma_free_tx_skbufs(priv, queue);
+       size *= priv->dma_tx_size;
 
-               if (priv->extend_desc) {
-                       size = sizeof(struct dma_extended_desc);
-                       addr = tx_q->dma_etx;
-               } else if (tx_q->tbs & STMMAC_TBS_AVAIL) {
-                       size = sizeof(struct dma_edesc);
-                       addr = tx_q->dma_entx;
-               } else {
-                       size = sizeof(struct dma_desc);
-                       addr = tx_q->dma_tx;
-               }
+       dma_free_coherent(priv->device, size, addr, tx_q->dma_tx_phy);
 
-               size *= priv->dma_tx_size;
+       kfree(tx_q->tx_skbuff_dma);
+       kfree(tx_q->tx_skbuff);
+}
 
-               dma_free_coherent(priv->device, size, addr, tx_q->dma_tx_phy);
+static void free_dma_tx_desc_resources(struct stmmac_priv *priv)
+{
+       u32 tx_count = priv->plat->tx_queues_to_use;
+       u32 queue;
 
-               kfree(tx_q->tx_skbuff_dma);
-               kfree(tx_q->tx_skbuff);
-       }
+       /* Free TX queue resources */
+       for (queue = 0; queue < tx_count; queue++)
+               __free_dma_tx_desc_resources(priv, queue);
 }
 
 /**
- * alloc_dma_rx_desc_resources - alloc RX resources.
+ * __alloc_dma_rx_desc_resources - alloc RX resources (per queue).
  * @priv: private structure
+ * @queue: RX queue index
  * Description: according to which descriptor can be used (extend or basic)
  * this function allocates the resources for TX and RX paths. In case of
  * reception, for example, it pre-allocated the RX socket buffer in order to
  * allow zero-copy mechanism.
  */
+static int __alloc_dma_rx_desc_resources(struct stmmac_priv *priv, u32 queue)
+{
+       struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+       struct stmmac_channel *ch = &priv->channel[queue];
+       bool xdp_prog = stmmac_xdp_is_enabled(priv);
+       struct page_pool_params pp_params = { 0 };
+       unsigned int num_pages;
+       unsigned int napi_id;
+       int ret;
+
+       rx_q->queue_index = queue;
+       rx_q->priv_data = priv;
+
+       pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV;
+       pp_params.pool_size = priv->dma_rx_size;
+       num_pages = DIV_ROUND_UP(priv->dma_buf_sz, PAGE_SIZE);
+       pp_params.order = ilog2(num_pages);
+       pp_params.nid = dev_to_node(priv->device);
+       pp_params.dev = priv->device;
+       pp_params.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
+       pp_params.offset = stmmac_rx_offset(priv);
+       pp_params.max_len = STMMAC_MAX_RX_BUF_SIZE(num_pages);
+
+       rx_q->page_pool = page_pool_create(&pp_params);
+       if (IS_ERR(rx_q->page_pool)) {
+               ret = PTR_ERR(rx_q->page_pool);
+               rx_q->page_pool = NULL;
+               return ret;
+       }
+
+       rx_q->buf_pool = kcalloc(priv->dma_rx_size,
+                                sizeof(*rx_q->buf_pool),
+                                GFP_KERNEL);
+       if (!rx_q->buf_pool)
+               return -ENOMEM;
+
+       if (priv->extend_desc) {
+               rx_q->dma_erx = dma_alloc_coherent(priv->device,
+                                                  priv->dma_rx_size *
+                                                  sizeof(struct dma_extended_desc),
+                                                  &rx_q->dma_rx_phy,
+                                                  GFP_KERNEL);
+               if (!rx_q->dma_erx)
+                       return -ENOMEM;
+
+       } else {
+               rx_q->dma_rx = dma_alloc_coherent(priv->device,
+                                                 priv->dma_rx_size *
+                                                 sizeof(struct dma_desc),
+                                                 &rx_q->dma_rx_phy,
+                                                 GFP_KERNEL);
+               if (!rx_q->dma_rx)
+                       return -ENOMEM;
+       }
+
+       if (stmmac_xdp_is_enabled(priv) &&
+           test_bit(queue, priv->af_xdp_zc_qps))
+               napi_id = ch->rxtx_napi.napi_id;
+       else
+               napi_id = ch->rx_napi.napi_id;
+
+       ret = xdp_rxq_info_reg(&rx_q->xdp_rxq, priv->dev,
+                              rx_q->queue_index,
+                              napi_id);
+       if (ret) {
+               netdev_err(priv->dev, "Failed to register xdp rxq info\n");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 static int alloc_dma_rx_desc_resources(struct stmmac_priv *priv)
 {
        u32 rx_count = priv->plat->rx_queues_to_use;
-       int ret = -ENOMEM;
        u32 queue;
+       int ret;
 
        /* RX queues buffers and DMA */
        for (queue = 0; queue < rx_count; queue++) {
-               struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
-               struct page_pool_params pp_params = { 0 };
-               unsigned int num_pages;
-
-               rx_q->queue_index = queue;
-               rx_q->priv_data = priv;
-
-               pp_params.flags = PP_FLAG_DMA_MAP;
-               pp_params.pool_size = priv->dma_rx_size;
-               num_pages = DIV_ROUND_UP(priv->dma_buf_sz, PAGE_SIZE);
-               pp_params.order = ilog2(num_pages);
-               pp_params.nid = dev_to_node(priv->device);
-               pp_params.dev = priv->device;
-               pp_params.dma_dir = DMA_FROM_DEVICE;
-
-               rx_q->page_pool = page_pool_create(&pp_params);
-               if (IS_ERR(rx_q->page_pool)) {
-                       ret = PTR_ERR(rx_q->page_pool);
-                       rx_q->page_pool = NULL;
-                       goto err_dma;
-               }
-
-               rx_q->buf_pool = kcalloc(priv->dma_rx_size,
-                                        sizeof(*rx_q->buf_pool),
-                                        GFP_KERNEL);
-               if (!rx_q->buf_pool)
+               ret = __alloc_dma_rx_desc_resources(priv, queue);
+               if (ret)
                        goto err_dma;
-
-               if (priv->extend_desc) {
-                       rx_q->dma_erx = dma_alloc_coherent(priv->device,
-                                                          priv->dma_rx_size *
-                                                          sizeof(struct dma_extended_desc),
-                                                          &rx_q->dma_rx_phy,
-                                                          GFP_KERNEL);
-                       if (!rx_q->dma_erx)
-                               goto err_dma;
-
-               } else {
-                       rx_q->dma_rx = dma_alloc_coherent(priv->device,
-                                                         priv->dma_rx_size *
-                                                         sizeof(struct dma_desc),
-                                                         &rx_q->dma_rx_phy,
-                                                         GFP_KERNEL);
-                       if (!rx_q->dma_rx)
-                               goto err_dma;
-               }
        }
 
        return 0;
@@ -1725,60 +2052,70 @@ err_dma:
 }
 
 /**
- * alloc_dma_tx_desc_resources - alloc TX resources.
+ * __alloc_dma_tx_desc_resources - alloc TX resources (per queue).
  * @priv: private structure
+ * @queue: TX queue index
  * Description: according to which descriptor can be used (extend or basic)
  * this function allocates the resources for TX and RX paths. In case of
  * reception, for example, it pre-allocated the RX socket buffer in order to
  * allow zero-copy mechanism.
  */
-static int alloc_dma_tx_desc_resources(struct stmmac_priv *priv)
+static int __alloc_dma_tx_desc_resources(struct stmmac_priv *priv, u32 queue)
 {
-       u32 tx_count = priv->plat->tx_queues_to_use;
-       int ret = -ENOMEM;
-       u32 queue;
+       struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+       size_t size;
+       void *addr;
 
-       /* TX queues buffers and DMA */
-       for (queue = 0; queue < tx_count; queue++) {
-               struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
-               size_t size;
-               void *addr;
+       tx_q->queue_index = queue;
+       tx_q->priv_data = priv;
 
-               tx_q->queue_index = queue;
-               tx_q->priv_data = priv;
+       tx_q->tx_skbuff_dma = kcalloc(priv->dma_tx_size,
+                                     sizeof(*tx_q->tx_skbuff_dma),
+                                     GFP_KERNEL);
+       if (!tx_q->tx_skbuff_dma)
+               return -ENOMEM;
 
-               tx_q->tx_skbuff_dma = kcalloc(priv->dma_tx_size,
-                                             sizeof(*tx_q->tx_skbuff_dma),
-                                             GFP_KERNEL);
-               if (!tx_q->tx_skbuff_dma)
-                       goto err_dma;
+       tx_q->tx_skbuff = kcalloc(priv->dma_tx_size,
+                                 sizeof(struct sk_buff *),
+                                 GFP_KERNEL);
+       if (!tx_q->tx_skbuff)
+               return -ENOMEM;
 
-               tx_q->tx_skbuff = kcalloc(priv->dma_tx_size,
-                                         sizeof(struct sk_buff *),
-                                         GFP_KERNEL);
-               if (!tx_q->tx_skbuff)
-                       goto err_dma;
+       if (priv->extend_desc)
+               size = sizeof(struct dma_extended_desc);
+       else if (tx_q->tbs & STMMAC_TBS_AVAIL)
+               size = sizeof(struct dma_edesc);
+       else
+               size = sizeof(struct dma_desc);
 
-               if (priv->extend_desc)
-                       size = sizeof(struct dma_extended_desc);
-               else if (tx_q->tbs & STMMAC_TBS_AVAIL)
-                       size = sizeof(struct dma_edesc);
-               else
-                       size = sizeof(struct dma_desc);
+       size *= priv->dma_tx_size;
 
-               size *= priv->dma_tx_size;
+       addr = dma_alloc_coherent(priv->device, size,
+                                 &tx_q->dma_tx_phy, GFP_KERNEL);
+       if (!addr)
+               return -ENOMEM;
 
-               addr = dma_alloc_coherent(priv->device, size,
-                                         &tx_q->dma_tx_phy, GFP_KERNEL);
-               if (!addr)
-                       goto err_dma;
+       if (priv->extend_desc)
+               tx_q->dma_etx = addr;
+       else if (tx_q->tbs & STMMAC_TBS_AVAIL)
+               tx_q->dma_entx = addr;
+       else
+               tx_q->dma_tx = addr;
 
-               if (priv->extend_desc)
-                       tx_q->dma_etx = addr;
-               else if (tx_q->tbs & STMMAC_TBS_AVAIL)
-                       tx_q->dma_entx = addr;
-               else
-                       tx_q->dma_tx = addr;
+       return 0;
+}
+
+static int alloc_dma_tx_desc_resources(struct stmmac_priv *priv)
+{
+       u32 tx_count = priv->plat->tx_queues_to_use;
+       u32 queue;
+       int ret;
+
+       /* TX queues buffers and DMA */
+       for (queue = 0; queue < tx_count; queue++) {
+               ret = __alloc_dma_tx_desc_resources(priv, queue);
+               if (ret)
+                       goto err_dma;
        }
 
        return 0;
@@ -1815,11 +2152,13 @@ static int alloc_dma_desc_resources(struct stmmac_priv *priv)
  */
 static void free_dma_desc_resources(struct stmmac_priv *priv)
 {
-       /* Release the DMA RX socket buffers */
-       free_dma_rx_desc_resources(priv);
-
        /* Release the DMA TX socket buffers */
        free_dma_tx_desc_resources(priv);
+
+       /* Release the DMA RX socket buffers later
+        * to ensure all pending XDP_TX buffers are returned.
+        */
+       free_dma_rx_desc_resources(priv);
 }
 
 /**
@@ -1976,12 +2315,24 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
 
        /* configure all channels */
        for (chan = 0; chan < rx_channels_count; chan++) {
+               struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan];
+               u32 buf_size;
+
                qmode = priv->plat->rx_queues_cfg[chan].mode_to_use;
 
                stmmac_dma_rx_mode(priv, priv->ioaddr, rxmode, chan,
                                rxfifosz, qmode);
-               stmmac_set_dma_bfsize(priv, priv->ioaddr, priv->dma_buf_sz,
-                               chan);
+
+               if (rx_q->xsk_pool) {
+                       buf_size = xsk_pool_get_rx_frame_size(rx_q->xsk_pool);
+                       stmmac_set_dma_bfsize(priv, priv->ioaddr,
+                                             buf_size,
+                                             chan);
+               } else {
+                       stmmac_set_dma_bfsize(priv, priv->ioaddr,
+                                             priv->dma_buf_sz,
+                                             chan);
+               }
        }
 
        for (chan = 0; chan < tx_channels_count; chan++) {
@@ -1992,6 +2343,101 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
        }
 }
 
+static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
+{
+       struct netdev_queue *nq = netdev_get_tx_queue(priv->dev, queue);
+       struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+       struct xsk_buff_pool *pool = tx_q->xsk_pool;
+       unsigned int entry = tx_q->cur_tx;
+       struct dma_desc *tx_desc = NULL;
+       struct xdp_desc xdp_desc;
+       bool work_done = true;
+
+       /* Avoids TX time-out as we are sharing with slow path */
+       nq->trans_start = jiffies;
+
+       budget = min(budget, stmmac_tx_avail(priv, queue));
+
+       while (budget-- > 0) {
+               dma_addr_t dma_addr;
+               bool set_ic;
+
+               /* We are sharing with slow path and stop XSK TX desc submission when
+                * available TX ring is less than threshold.
+                */
+               if (unlikely(stmmac_tx_avail(priv, queue) < STMMAC_TX_XSK_AVAIL) ||
+                   !netif_carrier_ok(priv->dev)) {
+                       work_done = false;
+                       break;
+               }
+
+               if (!xsk_tx_peek_desc(pool, &xdp_desc))
+                       break;
+
+               if (likely(priv->extend_desc))
+                       tx_desc = (struct dma_desc *)(tx_q->dma_etx + entry);
+               else if (tx_q->tbs & STMMAC_TBS_AVAIL)
+                       tx_desc = &tx_q->dma_entx[entry].basic;
+               else
+                       tx_desc = tx_q->dma_tx + entry;
+
+               dma_addr = xsk_buff_raw_get_dma(pool, xdp_desc.addr);
+               xsk_buff_raw_dma_sync_for_device(pool, dma_addr, xdp_desc.len);
+
+               tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_XSK_TX;
+
+               /* To return XDP buffer to XSK pool, we simple call
+                * xsk_tx_completed(), so we don't need to fill up
+                * 'buf' and 'xdpf'.
+                */
+               tx_q->tx_skbuff_dma[entry].buf = 0;
+               tx_q->xdpf[entry] = NULL;
+
+               tx_q->tx_skbuff_dma[entry].map_as_page = false;
+               tx_q->tx_skbuff_dma[entry].len = xdp_desc.len;
+               tx_q->tx_skbuff_dma[entry].last_segment = true;
+               tx_q->tx_skbuff_dma[entry].is_jumbo = false;
+
+               stmmac_set_desc_addr(priv, tx_desc, dma_addr);
+
+               tx_q->tx_count_frames++;
+
+               if (!priv->tx_coal_frames[queue])
+                       set_ic = false;
+               else if (tx_q->tx_count_frames % priv->tx_coal_frames[queue] == 0)
+                       set_ic = true;
+               else
+                       set_ic = false;
+
+               if (set_ic) {
+                       tx_q->tx_count_frames = 0;
+                       stmmac_set_tx_ic(priv, tx_desc);
+                       priv->xstats.tx_set_ic_bit++;
+               }
+
+               stmmac_prepare_tx_desc(priv, tx_desc, 1, xdp_desc.len,
+                                      true, priv->mode, true, true,
+                                      xdp_desc.len);
+
+               stmmac_enable_dma_transmission(priv, priv->ioaddr);
+
+               tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, priv->dma_tx_size);
+               entry = tx_q->cur_tx;
+       }
+
+       if (tx_desc) {
+               stmmac_flush_tx_descriptors(priv, queue);
+               xsk_tx_release(pool);
+       }
+
+       /* Return true if all of the 3 conditions are met
+        *  a) TX Budget is still available
+        *  b) work_done = true when XSK TX desc peek is empty (no more
+        *     pending XSK TX for transmission)
+        */
+       return !!budget && work_done;
+}
+
 /**
  * stmmac_tx_clean - to manage the transmission completion
  * @priv: driver private structure
@@ -2003,18 +2449,35 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
 {
        struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
        unsigned int bytes_compl = 0, pkts_compl = 0;
-       unsigned int entry, count = 0;
+       unsigned int entry, xmits = 0, count = 0;
 
        __netif_tx_lock_bh(netdev_get_tx_queue(priv->dev, queue));
 
        priv->xstats.tx_clean++;
 
+       tx_q->xsk_frames_done = 0;
+
        entry = tx_q->dirty_tx;
-       while ((entry != tx_q->cur_tx) && (count < budget)) {
-               struct sk_buff *skb = tx_q->tx_skbuff[entry];
+
+       /* Try to clean all TX complete frame in 1 shot */
+       while ((entry != tx_q->cur_tx) && count < priv->dma_tx_size) {
+               struct xdp_frame *xdpf;
+               struct sk_buff *skb;
                struct dma_desc *p;
                int status;
 
+               if (tx_q->tx_skbuff_dma[entry].buf_type == STMMAC_TXBUF_T_XDP_TX ||
+                   tx_q->tx_skbuff_dma[entry].buf_type == STMMAC_TXBUF_T_XDP_NDO) {
+                       xdpf = tx_q->xdpf[entry];
+                       skb = NULL;
+               } else if (tx_q->tx_skbuff_dma[entry].buf_type == STMMAC_TXBUF_T_SKB) {
+                       xdpf = NULL;
+                       skb = tx_q->tx_skbuff[entry];
+               } else {
+                       xdpf = NULL;
+                       skb = NULL;
+               }
+
                if (priv->extend_desc)
                        p = (struct dma_desc *)(tx_q->dma_etx + entry);
                else if (tx_q->tbs & STMMAC_TBS_AVAIL)
@@ -2044,10 +2507,12 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
                                priv->dev->stats.tx_packets++;
                                priv->xstats.tx_pkt_n++;
                        }
-                       stmmac_get_tx_hwtstamp(priv, p, skb);
+                       if (skb)
+                               stmmac_get_tx_hwtstamp(priv, p, skb);
                }
 
-               if (likely(tx_q->tx_skbuff_dma[entry].buf)) {
+               if (likely(tx_q->tx_skbuff_dma[entry].buf &&
+                          tx_q->tx_skbuff_dma[entry].buf_type != STMMAC_TXBUF_T_XDP_TX)) {
                        if (tx_q->tx_skbuff_dma[entry].map_as_page)
                                dma_unmap_page(priv->device,
                                               tx_q->tx_skbuff_dma[entry].buf,
@@ -2068,11 +2533,28 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
                tx_q->tx_skbuff_dma[entry].last_segment = false;
                tx_q->tx_skbuff_dma[entry].is_jumbo = false;
 
-               if (likely(skb != NULL)) {
-                       pkts_compl++;
-                       bytes_compl += skb->len;
-                       dev_consume_skb_any(skb);
-                       tx_q->tx_skbuff[entry] = NULL;
+               if (xdpf &&
+                   tx_q->tx_skbuff_dma[entry].buf_type == STMMAC_TXBUF_T_XDP_TX) {
+                       xdp_return_frame_rx_napi(xdpf);
+                       tx_q->xdpf[entry] = NULL;
+               }
+
+               if (xdpf &&
+                   tx_q->tx_skbuff_dma[entry].buf_type == STMMAC_TXBUF_T_XDP_NDO) {
+                       xdp_return_frame(xdpf);
+                       tx_q->xdpf[entry] = NULL;
+               }
+
+               if (tx_q->tx_skbuff_dma[entry].buf_type == STMMAC_TXBUF_T_XSK_TX)
+                       tx_q->xsk_frames_done++;
+
+               if (tx_q->tx_skbuff_dma[entry].buf_type == STMMAC_TXBUF_T_SKB) {
+                       if (likely(skb)) {
+                               pkts_compl++;
+                               bytes_compl += skb->len;
+                               dev_consume_skb_any(skb);
+                               tx_q->tx_skbuff[entry] = NULL;
+                       }
                }
 
                stmmac_release_tx_desc(priv, p, priv->mode);
@@ -2093,6 +2575,28 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
                netif_tx_wake_queue(netdev_get_tx_queue(priv->dev, queue));
        }
 
+       if (tx_q->xsk_pool) {
+               bool work_done;
+
+               if (tx_q->xsk_frames_done)
+                       xsk_tx_completed(tx_q->xsk_pool, tx_q->xsk_frames_done);
+
+               if (xsk_uses_need_wakeup(tx_q->xsk_pool))
+                       xsk_set_tx_need_wakeup(tx_q->xsk_pool);
+
+               /* For XSK TX, we try to send as many as possible.
+                * If XSK work done (XSK TX desc empty and budget still
+                * available), return "budget - 1" to reenable TX IRQ.
+                * Else, return "budget" to make NAPI continue polling.
+                */
+               work_done = stmmac_xdp_xmit_zc(priv, queue,
+                                              STMMAC_XSK_TX_BUDGET_MAX);
+               if (work_done)
+                       xmits = budget - 1;
+               else
+                       xmits = budget;
+       }
+
        if (priv->eee_enabled && !priv->tx_path_in_lpi_mode &&
            priv->eee_sw_timer_en) {
                stmmac_enable_eee_mode(priv);
@@ -2101,12 +2605,14 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
 
        /* We still have pending packets, let's call for a new scheduling */
        if (tx_q->dirty_tx != tx_q->cur_tx)
-               hrtimer_start(&tx_q->txtimer, STMMAC_COAL_TIMER(priv->tx_coal_timer),
+               hrtimer_start(&tx_q->txtimer,
+                             STMMAC_COAL_TIMER(priv->tx_coal_timer[queue]),
                              HRTIMER_MODE_REL);
 
        __netif_tx_unlock_bh(netdev_get_tx_queue(priv->dev, queue));
 
-       return count;
+       /* Combine decisions from TX clean and XSK TX */
+       return max(count, xmits);
 }
 
 /**
@@ -2184,28 +2690,35 @@ static bool stmmac_safety_feat_interrupt(struct stmmac_priv *priv)
        return false;
 }
 
-static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan)
+static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan, u32 dir)
 {
        int status = stmmac_dma_interrupt_status(priv, priv->ioaddr,
-                                                &priv->xstats, chan);
+                                                &priv->xstats, chan, dir);
+       struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan];
+       struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan];
        struct stmmac_channel *ch = &priv->channel[chan];
+       struct napi_struct *rx_napi;
+       struct napi_struct *tx_napi;
        unsigned long flags;
 
+       rx_napi = rx_q->xsk_pool ? &ch->rxtx_napi : &ch->rx_napi;
+       tx_napi = tx_q->xsk_pool ? &ch->rxtx_napi : &ch->tx_napi;
+
        if ((status & handle_rx) && (chan < priv->plat->rx_queues_to_use)) {
-               if (napi_schedule_prep(&ch->rx_napi)) {
+               if (napi_schedule_prep(rx_napi)) {
                        spin_lock_irqsave(&ch->lock, flags);
                        stmmac_disable_dma_irq(priv, priv->ioaddr, chan, 1, 0);
                        spin_unlock_irqrestore(&ch->lock, flags);
-                       __napi_schedule(&ch->rx_napi);
+                       __napi_schedule(rx_napi);
                }
        }
 
        if ((status & handle_tx) && (chan < priv->plat->tx_queues_to_use)) {
-               if (napi_schedule_prep(&ch->tx_napi)) {
+               if (napi_schedule_prep(tx_napi)) {
                        spin_lock_irqsave(&ch->lock, flags);
                        stmmac_disable_dma_irq(priv, priv->ioaddr, chan, 0, 1);
                        spin_unlock_irqrestore(&ch->lock, flags);
-                       __napi_schedule(&ch->tx_napi);
+                       __napi_schedule(tx_napi);
                }
        }
 
@@ -2233,7 +2746,8 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv)
                channels_to_check = ARRAY_SIZE(status);
 
        for (chan = 0; chan < channels_to_check; chan++)
-               status[chan] = stmmac_napi_check(priv, chan);
+               status[chan] = stmmac_napi_check(priv, chan,
+                                                DMA_DIR_RXTX);
 
        for (chan = 0; chan < tx_channel_count; chan++) {
                if (unlikely(status[chan] & tx_hard_error_bump_tc)) {
@@ -2361,7 +2875,7 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
                                    rx_q->dma_rx_phy, chan);
 
                rx_q->rx_tail_addr = rx_q->dma_rx_phy +
-                                    (priv->dma_rx_size *
+                                    (rx_q->buf_alloc_num *
                                      sizeof(struct dma_desc));
                stmmac_set_rx_tail_ptr(priv, priv->ioaddr,
                                       rx_q->rx_tail_addr, chan);
@@ -2386,7 +2900,8 @@ static void stmmac_tx_timer_arm(struct stmmac_priv *priv, u32 queue)
 {
        struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
 
-       hrtimer_start(&tx_q->txtimer, STMMAC_COAL_TIMER(priv->tx_coal_timer),
+       hrtimer_start(&tx_q->txtimer,
+                     STMMAC_COAL_TIMER(priv->tx_coal_timer[queue]),
                      HRTIMER_MODE_REL);
 }
 
@@ -2401,16 +2916,18 @@ static enum hrtimer_restart stmmac_tx_timer(struct hrtimer *t)
        struct stmmac_tx_queue *tx_q = container_of(t, struct stmmac_tx_queue, txtimer);
        struct stmmac_priv *priv = tx_q->priv_data;
        struct stmmac_channel *ch;
+       struct napi_struct *napi;
 
        ch = &priv->channel[tx_q->queue_index];
+       napi = tx_q->xsk_pool ? &ch->rxtx_napi : &ch->tx_napi;
 
-       if (likely(napi_schedule_prep(&ch->tx_napi))) {
+       if (likely(napi_schedule_prep(napi))) {
                unsigned long flags;
 
                spin_lock_irqsave(&ch->lock, flags);
                stmmac_disable_dma_irq(priv, priv->ioaddr, ch->index, 0, 1);
                spin_unlock_irqrestore(&ch->lock, flags);
-               __napi_schedule(&ch->tx_napi);
+               __napi_schedule(napi);
        }
 
        return HRTIMER_NORESTART;
@@ -2427,18 +2944,21 @@ static enum hrtimer_restart stmmac_tx_timer(struct hrtimer *t)
 static void stmmac_init_coalesce(struct stmmac_priv *priv)
 {
        u32 tx_channel_count = priv->plat->tx_queues_to_use;
+       u32 rx_channel_count = priv->plat->rx_queues_to_use;
        u32 chan;
 
-       priv->tx_coal_frames = STMMAC_TX_FRAMES;
-       priv->tx_coal_timer = STMMAC_COAL_TX_TIMER;
-       priv->rx_coal_frames = STMMAC_RX_FRAMES;
-
        for (chan = 0; chan < tx_channel_count; chan++) {
                struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan];
 
+               priv->tx_coal_frames[chan] = STMMAC_TX_FRAMES;
+               priv->tx_coal_timer[chan] = STMMAC_COAL_TX_TIMER;
+
                hrtimer_init(&tx_q->txtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
                tx_q->txtimer.function = stmmac_tx_timer;
        }
+
+       for (chan = 0; chan < rx_channel_count; chan++)
+               priv->rx_coal_frames[chan] = STMMAC_RX_FRAMES;
 }
 
 static void stmmac_set_rings_length(struct stmmac_priv *priv)
@@ -2655,6 +3175,26 @@ static void stmmac_safety_feat_configuration(struct stmmac_priv *priv)
        }
 }
 
+static int stmmac_fpe_start_wq(struct stmmac_priv *priv)
+{
+       char *name;
+
+       clear_bit(__FPE_TASK_SCHED, &priv->fpe_task_state);
+
+       name = priv->wq_name;
+       sprintf(name, "%s-fpe", priv->dev->name);
+
+       priv->fpe_wq = create_singlethread_workqueue(name);
+       if (!priv->fpe_wq) {
+               netdev_err(priv->dev, "%s: Failed to create workqueue\n", name);
+
+               return -ENOMEM;
+       }
+       netdev_info(priv->dev, "FPE workqueue start");
+
+       return 0;
+}
+
 /**
  * stmmac_hw_setup - setup mac in a usable state.
  *  @dev : pointer to the device structure.
@@ -2673,6 +3213,7 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
        struct stmmac_priv *priv = netdev_priv(dev);
        u32 rx_cnt = priv->plat->rx_queues_to_use;
        u32 tx_cnt = priv->plat->tx_queues_to_use;
+       bool sph_en;
        u32 chan;
        int ret;
 
@@ -2743,10 +3284,15 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
                priv->tx_lpi_timer = eee_timer * 1000;
 
        if (priv->use_riwt) {
-               if (!priv->rx_riwt)
-                       priv->rx_riwt = DEF_DMA_RIWT;
+               u32 queue;
+
+               for (queue = 0; queue < rx_cnt; queue++) {
+                       if (!priv->rx_riwt[queue])
+                               priv->rx_riwt[queue] = DEF_DMA_RIWT;
 
-               ret = stmmac_rx_watchdog(priv, priv->ioaddr, priv->rx_riwt, rx_cnt);
+                       stmmac_rx_watchdog(priv, priv->ioaddr,
+                                          priv->rx_riwt[queue], queue);
+               }
        }
 
        if (priv->hw->pcs)
@@ -2769,10 +3315,10 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
        }
 
        /* Enable Split Header */
-       if (priv->sph && priv->hw->rx_csum) {
-               for (chan = 0; chan < rx_cnt; chan++)
-                       stmmac_enable_sph(priv, priv->ioaddr, 1, chan);
-       }
+       sph_en = (priv->hw->rx_csum > 0) && priv->sph;
+       for (chan = 0; chan < rx_cnt; chan++)
+               stmmac_enable_sph(priv, priv->ioaddr, sph_en, chan);
+
 
        /* VLAN Tag Insertion */
        if (priv->dma_cap.vlins)
@@ -2793,6 +3339,13 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
        /* Start the ball rolling... */
        stmmac_start_all_dma(priv);
 
+       if (priv->dma_cap.fpesel) {
+               stmmac_fpe_start_wq(priv);
+
+               if (priv->plat->fpe_cfg->enable)
+                       stmmac_fpe_handshake(priv, true);
+       }
+
        return 0;
 }
 
@@ -2803,37 +3356,308 @@ static void stmmac_hw_teardown(struct net_device *dev)
        clk_disable_unprepare(priv->plat->clk_ptp_ref);
 }
 
-/**
- *  stmmac_open - open entry point of the driver
- *  @dev : pointer to the device structure.
- *  Description:
- *  This function is the open entry point of the driver.
- *  Return value:
- *  0 on success and an appropriate (-)ve integer as defined in errno.h
- *  file on failure.
- */
-static int stmmac_open(struct net_device *dev)
+static void stmmac_free_irq(struct net_device *dev,
+                           enum request_irq_err irq_err, int irq_idx)
 {
        struct stmmac_priv *priv = netdev_priv(dev);
-       int bfsize = 0;
-       u32 chan;
+       int j;
+
+       switch (irq_err) {
+       case REQ_IRQ_ERR_ALL:
+               irq_idx = priv->plat->tx_queues_to_use;
+               fallthrough;
+       case REQ_IRQ_ERR_TX:
+               for (j = irq_idx - 1; j >= 0; j--) {
+                       if (priv->tx_irq[j] > 0) {
+                               irq_set_affinity_hint(priv->tx_irq[j], NULL);
+                               free_irq(priv->tx_irq[j], &priv->tx_queue[j]);
+                       }
+               }
+               irq_idx = priv->plat->rx_queues_to_use;
+               fallthrough;
+       case REQ_IRQ_ERR_RX:
+               for (j = irq_idx - 1; j >= 0; j--) {
+                       if (priv->rx_irq[j] > 0) {
+                               irq_set_affinity_hint(priv->rx_irq[j], NULL);
+                               free_irq(priv->rx_irq[j], &priv->rx_queue[j]);
+                       }
+               }
+
+               if (priv->sfty_ue_irq > 0 && priv->sfty_ue_irq != dev->irq)
+                       free_irq(priv->sfty_ue_irq, dev);
+               fallthrough;
+       case REQ_IRQ_ERR_SFTY_UE:
+               if (priv->sfty_ce_irq > 0 && priv->sfty_ce_irq != dev->irq)
+                       free_irq(priv->sfty_ce_irq, dev);
+               fallthrough;
+       case REQ_IRQ_ERR_SFTY_CE:
+               if (priv->lpi_irq > 0 && priv->lpi_irq != dev->irq)
+                       free_irq(priv->lpi_irq, dev);
+               fallthrough;
+       case REQ_IRQ_ERR_LPI:
+               if (priv->wol_irq > 0 && priv->wol_irq != dev->irq)
+                       free_irq(priv->wol_irq, dev);
+               fallthrough;
+       case REQ_IRQ_ERR_WOL:
+               free_irq(dev->irq, dev);
+               fallthrough;
+       case REQ_IRQ_ERR_MAC:
+       case REQ_IRQ_ERR_NO:
+               /* If MAC IRQ request error, no more IRQ to free */
+               break;
+       }
+}
+
+static int stmmac_request_irq_multi_msi(struct net_device *dev)
+{
+       enum request_irq_err irq_err = REQ_IRQ_ERR_NO;
+       struct stmmac_priv *priv = netdev_priv(dev);
+       cpumask_t cpu_mask;
+       int irq_idx = 0;
+       char *int_name;
        int ret;
+       int i;
 
-       if (priv->hw->pcs != STMMAC_PCS_TBI &&
-           priv->hw->pcs != STMMAC_PCS_RTBI &&
-           priv->hw->xpcs == NULL) {
-               ret = stmmac_init_phy(dev);
-               if (ret) {
+       /* For common interrupt */
+       int_name = priv->int_name_mac;
+       sprintf(int_name, "%s:%s", dev->name, "mac");
+       ret = request_irq(dev->irq, stmmac_mac_interrupt,
+                         0, int_name, dev);
+       if (unlikely(ret < 0)) {
+               netdev_err(priv->dev,
+                          "%s: alloc mac MSI %d (error: %d)\n",
+                          __func__, dev->irq, ret);
+               irq_err = REQ_IRQ_ERR_MAC;
+               goto irq_error;
+       }
+
+       /* Request the Wake IRQ in case of another line
+        * is used for WoL
+        */
+       if (priv->wol_irq > 0 && priv->wol_irq != dev->irq) {
+               int_name = priv->int_name_wol;
+               sprintf(int_name, "%s:%s", dev->name, "wol");
+               ret = request_irq(priv->wol_irq,
+                                 stmmac_mac_interrupt,
+                                 0, int_name, dev);
+               if (unlikely(ret < 0)) {
                        netdev_err(priv->dev,
-                                  "%s: Cannot attach to PHY (error: %d)\n",
-                                  __func__, ret);
-                       return ret;
+                                  "%s: alloc wol MSI %d (error: %d)\n",
+                                  __func__, priv->wol_irq, ret);
+                       irq_err = REQ_IRQ_ERR_WOL;
+                       goto irq_error;
                }
        }
 
-       /* Extra statistics */
-       memset(&priv->xstats, 0, sizeof(struct stmmac_extra_stats));
-       priv->xstats.threshold = tc;
+       /* Request the LPI IRQ in case of another line
+        * is used for LPI
+        */
+       if (priv->lpi_irq > 0 && priv->lpi_irq != dev->irq) {
+               int_name = priv->int_name_lpi;
+               sprintf(int_name, "%s:%s", dev->name, "lpi");
+               ret = request_irq(priv->lpi_irq,
+                                 stmmac_mac_interrupt,
+                                 0, int_name, dev);
+               if (unlikely(ret < 0)) {
+                       netdev_err(priv->dev,
+                                  "%s: alloc lpi MSI %d (error: %d)\n",
+                                  __func__, priv->lpi_irq, ret);
+                       irq_err = REQ_IRQ_ERR_LPI;
+                       goto irq_error;
+               }
+       }
+
+       /* Request the Safety Feature Correctible Error line in
+        * case of another line is used
+        */
+       if (priv->sfty_ce_irq > 0 && priv->sfty_ce_irq != dev->irq) {
+               int_name = priv->int_name_sfty_ce;
+               sprintf(int_name, "%s:%s", dev->name, "safety-ce");
+               ret = request_irq(priv->sfty_ce_irq,
+                                 stmmac_safety_interrupt,
+                                 0, int_name, dev);
+               if (unlikely(ret < 0)) {
+                       netdev_err(priv->dev,
+                                  "%s: alloc sfty ce MSI %d (error: %d)\n",
+                                  __func__, priv->sfty_ce_irq, ret);
+                       irq_err = REQ_IRQ_ERR_SFTY_CE;
+                       goto irq_error;
+               }
+       }
+
+       /* Request the Safety Feature Uncorrectible Error line in
+        * case of another line is used
+        */
+       if (priv->sfty_ue_irq > 0 && priv->sfty_ue_irq != dev->irq) {
+               int_name = priv->int_name_sfty_ue;
+               sprintf(int_name, "%s:%s", dev->name, "safety-ue");
+               ret = request_irq(priv->sfty_ue_irq,
+                                 stmmac_safety_interrupt,
+                                 0, int_name, dev);
+               if (unlikely(ret < 0)) {
+                       netdev_err(priv->dev,
+                                  "%s: alloc sfty ue MSI %d (error: %d)\n",
+                                  __func__, priv->sfty_ue_irq, ret);
+                       irq_err = REQ_IRQ_ERR_SFTY_UE;
+                       goto irq_error;
+               }
+       }
+
+       /* Request Rx MSI irq */
+       for (i = 0; i < priv->plat->rx_queues_to_use; i++) {
+               if (priv->rx_irq[i] == 0)
+                       continue;
+
+               int_name = priv->int_name_rx_irq[i];
+               sprintf(int_name, "%s:%s-%d", dev->name, "rx", i);
+               ret = request_irq(priv->rx_irq[i],
+                                 stmmac_msi_intr_rx,
+                                 0, int_name, &priv->rx_queue[i]);
+               if (unlikely(ret < 0)) {
+                       netdev_err(priv->dev,
+                                  "%s: alloc rx-%d  MSI %d (error: %d)\n",
+                                  __func__, i, priv->rx_irq[i], ret);
+                       irq_err = REQ_IRQ_ERR_RX;
+                       irq_idx = i;
+                       goto irq_error;
+               }
+               cpumask_clear(&cpu_mask);
+               cpumask_set_cpu(i % num_online_cpus(), &cpu_mask);
+               irq_set_affinity_hint(priv->rx_irq[i], &cpu_mask);
+       }
+
+       /* Request Tx MSI irq */
+       for (i = 0; i < priv->plat->tx_queues_to_use; i++) {
+               if (priv->tx_irq[i] == 0)
+                       continue;
+
+               int_name = priv->int_name_tx_irq[i];
+               sprintf(int_name, "%s:%s-%d", dev->name, "tx", i);
+               ret = request_irq(priv->tx_irq[i],
+                                 stmmac_msi_intr_tx,
+                                 0, int_name, &priv->tx_queue[i]);
+               if (unlikely(ret < 0)) {
+                       netdev_err(priv->dev,
+                                  "%s: alloc tx-%d  MSI %d (error: %d)\n",
+                                  __func__, i, priv->tx_irq[i], ret);
+                       irq_err = REQ_IRQ_ERR_TX;
+                       irq_idx = i;
+                       goto irq_error;
+               }
+               cpumask_clear(&cpu_mask);
+               cpumask_set_cpu(i % num_online_cpus(), &cpu_mask);
+               irq_set_affinity_hint(priv->tx_irq[i], &cpu_mask);
+       }
+
+       return 0;
+
+irq_error:
+       stmmac_free_irq(dev, irq_err, irq_idx);
+       return ret;
+}
+
+static int stmmac_request_irq_single(struct net_device *dev)
+{
+       enum request_irq_err irq_err = REQ_IRQ_ERR_NO;
+       struct stmmac_priv *priv = netdev_priv(dev);
+       int ret;
+
+       ret = request_irq(dev->irq, stmmac_interrupt,
+                         IRQF_SHARED, dev->name, dev);
+       if (unlikely(ret < 0)) {
+               netdev_err(priv->dev,
+                          "%s: ERROR: allocating the IRQ %d (error: %d)\n",
+                          __func__, dev->irq, ret);
+               irq_err = REQ_IRQ_ERR_MAC;
+               return ret;
+       }
+
+       /* Request the Wake IRQ in case of another line
+        * is used for WoL
+        */
+       if (priv->wol_irq > 0 && priv->wol_irq != dev->irq) {
+               ret = request_irq(priv->wol_irq, stmmac_interrupt,
+                                 IRQF_SHARED, dev->name, dev);
+               if (unlikely(ret < 0)) {
+                       netdev_err(priv->dev,
+                                  "%s: ERROR: allocating the WoL IRQ %d (%d)\n",
+                                  __func__, priv->wol_irq, ret);
+                       irq_err = REQ_IRQ_ERR_WOL;
+                       return ret;
+               }
+       }
+
+       /* Request the IRQ lines */
+       if (priv->lpi_irq > 0 && priv->lpi_irq != dev->irq) {
+               ret = request_irq(priv->lpi_irq, stmmac_interrupt,
+                                 IRQF_SHARED, dev->name, dev);
+               if (unlikely(ret < 0)) {
+                       netdev_err(priv->dev,
+                                  "%s: ERROR: allocating the LPI IRQ %d (%d)\n",
+                                  __func__, priv->lpi_irq, ret);
+                       irq_err = REQ_IRQ_ERR_LPI;
+                       goto irq_error;
+               }
+       }
+
+       return 0;
+
+irq_error:
+       stmmac_free_irq(dev, irq_err, 0);
+       return ret;
+}
+
+static int stmmac_request_irq(struct net_device *dev)
+{
+       struct stmmac_priv *priv = netdev_priv(dev);
+       int ret;
+
+       /* Request the IRQ lines */
+       if (priv->plat->multi_msi_en)
+               ret = stmmac_request_irq_multi_msi(dev);
+       else
+               ret = stmmac_request_irq_single(dev);
+
+       return ret;
+}
+
+/**
+ *  stmmac_open - open entry point of the driver
+ *  @dev : pointer to the device structure.
+ *  Description:
+ *  This function is the open entry point of the driver.
+ *  Return value:
+ *  0 on success and an appropriate (-)ve integer as defined in errno.h
+ *  file on failure.
+ */
+int stmmac_open(struct net_device *dev)
+{
+       struct stmmac_priv *priv = netdev_priv(dev);
+       int bfsize = 0;
+       u32 chan;
+       int ret;
+
+       ret = pm_runtime_get_sync(priv->device);
+       if (ret < 0) {
+               pm_runtime_put_noidle(priv->device);
+               return ret;
+       }
+
+       if (priv->hw->pcs != STMMAC_PCS_TBI &&
+           priv->hw->pcs != STMMAC_PCS_RTBI &&
+           priv->hw->xpcs_args.an_mode != DW_AN_C73) {
+               ret = stmmac_init_phy(dev);
+               if (ret) {
+                       netdev_err(priv->dev,
+                                  "%s: Cannot attach to PHY (error: %d)\n",
+                                  __func__, ret);
+                       goto init_phy_error;
+               }
+       }
+
+       /* Extra statistics */
+       memset(&priv->xstats, 0, sizeof(struct stmmac_extra_stats));
+       priv->xstats.threshold = tc;
 
        bfsize = stmmac_set_16kib_bfsize(priv, dev->mtu);
        if (bfsize < 0)
@@ -2887,50 +3711,15 @@ static int stmmac_open(struct net_device *dev)
        /* We may have called phylink_speed_down before */
        phylink_speed_up(priv->phylink);
 
-       /* Request the IRQ lines */
-       ret = request_irq(dev->irq, stmmac_interrupt,
-                         IRQF_SHARED, dev->name, dev);
-       if (unlikely(ret < 0)) {
-               netdev_err(priv->dev,
-                          "%s: ERROR: allocating the IRQ %d (error: %d)\n",
-                          __func__, dev->irq, ret);
+       ret = stmmac_request_irq(dev);
+       if (ret)
                goto irq_error;
-       }
-
-       /* Request the Wake IRQ in case of another line is used for WoL */
-       if (priv->wol_irq != dev->irq) {
-               ret = request_irq(priv->wol_irq, stmmac_interrupt,
-                                 IRQF_SHARED, dev->name, dev);
-               if (unlikely(ret < 0)) {
-                       netdev_err(priv->dev,
-                                  "%s: ERROR: allocating the WoL IRQ %d (%d)\n",
-                                  __func__, priv->wol_irq, ret);
-                       goto wolirq_error;
-               }
-       }
-
-       /* Request the IRQ lines */
-       if (priv->lpi_irq > 0) {
-               ret = request_irq(priv->lpi_irq, stmmac_interrupt, IRQF_SHARED,
-                                 dev->name, dev);
-               if (unlikely(ret < 0)) {
-                       netdev_err(priv->dev,
-                                  "%s: ERROR: allocating the LPI IRQ %d (%d)\n",
-                                  __func__, priv->lpi_irq, ret);
-                       goto lpiirq_error;
-               }
-       }
 
        stmmac_enable_all_queues(priv);
        netif_tx_start_all_queues(priv->dev);
 
        return 0;
 
-lpiirq_error:
-       if (priv->wol_irq != dev->irq)
-               free_irq(priv->wol_irq, dev);
-wolirq_error:
-       free_irq(dev->irq, dev);
 irq_error:
        phylink_stop(priv->phylink);
 
@@ -2942,16 +3731,28 @@ init_error:
        free_dma_desc_resources(priv);
 dma_desc_error:
        phylink_disconnect_phy(priv->phylink);
+init_phy_error:
+       pm_runtime_put(priv->device);
        return ret;
 }
 
+static void stmmac_fpe_stop_wq(struct stmmac_priv *priv)
+{
+       set_bit(__FPE_REMOVING, &priv->fpe_task_state);
+
+       if (priv->fpe_wq)
+               destroy_workqueue(priv->fpe_wq);
+
+       netdev_info(priv->dev, "FPE workqueue stop");
+}
+
 /**
  *  stmmac_release - close entry point of the driver
  *  @dev : device pointer.
  *  Description:
  *  This is the stop entry point of the driver.
  */
-static int stmmac_release(struct net_device *dev)
+int stmmac_release(struct net_device *dev)
 {
        struct stmmac_priv *priv = netdev_priv(dev);
        u32 chan;
@@ -2968,11 +3769,7 @@ static int stmmac_release(struct net_device *dev)
                hrtimer_cancel(&priv->tx_queue[chan].txtimer);
 
        /* Free the IRQ lines */
-       free_irq(dev->irq, dev);
-       if (priv->wol_irq != dev->irq)
-               free_irq(priv->wol_irq, dev);
-       if (priv->lpi_irq > 0)
-               free_irq(priv->lpi_irq, dev);
+       stmmac_free_irq(dev, REQ_IRQ_ERR_ALL, 0);
 
        if (priv->eee_enabled) {
                priv->tx_path_in_lpi_mode = false;
@@ -2992,6 +3789,11 @@ static int stmmac_release(struct net_device *dev)
 
        stmmac_release_ptp(priv);
 
+       pm_runtime_put(priv->device);
+
+       if (priv->dma_cap.fpesel)
+               stmmac_fpe_stop_wq(priv);
+
        return 0;
 }
 
@@ -3077,6 +3879,28 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, dma_addr_t des,
        }
 }
 
+static void stmmac_flush_tx_descriptors(struct stmmac_priv *priv, int queue)
+{
+       struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+       int desc_size;
+
+       if (likely(priv->extend_desc))
+               desc_size = sizeof(struct dma_extended_desc);
+       else if (tx_q->tbs & STMMAC_TBS_AVAIL)
+               desc_size = sizeof(struct dma_edesc);
+       else
+               desc_size = sizeof(struct dma_desc);
+
+       /* The own bit must be the latest setting done when prepare the
+        * descriptor and then barrier is needed to make sure that
+        * all is coherent before granting the DMA engine.
+        */
+       wmb();
+
+       tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx * desc_size);
+       stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue);
+}
+
 /**
  *  stmmac_tso_xmit - Tx entry point of the driver for oversized frames (TSO)
  *  @skb : the socket buffer
@@ -3108,10 +3932,10 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct dma_desc *desc, *first, *mss_desc = NULL;
        struct stmmac_priv *priv = netdev_priv(dev);
-       int desc_size, tmp_pay_len = 0, first_tx;
        int nfrags = skb_shinfo(skb)->nr_frags;
        u32 queue = skb_get_queue_mapping(skb);
        unsigned int first_entry, tx_packets;
+       int tmp_pay_len = 0, first_tx;
        struct stmmac_tx_queue *tx_q;
        bool has_vlan, set_ic;
        u8 proto_hdr_len, hdr;
@@ -3193,6 +4017,8 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 
        tx_q->tx_skbuff_dma[first_entry].buf = des;
        tx_q->tx_skbuff_dma[first_entry].len = skb_headlen(skb);
+       tx_q->tx_skbuff_dma[first_entry].map_as_page = false;
+       tx_q->tx_skbuff_dma[first_entry].buf_type = STMMAC_TXBUF_T_SKB;
 
        if (priv->dma_cap.addr64 <= 32) {
                first->des0 = cpu_to_le32(des);
@@ -3228,12 +4054,14 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
                tx_q->tx_skbuff_dma[tx_q->cur_tx].buf = des;
                tx_q->tx_skbuff_dma[tx_q->cur_tx].len = skb_frag_size(frag);
                tx_q->tx_skbuff_dma[tx_q->cur_tx].map_as_page = true;
+               tx_q->tx_skbuff_dma[tx_q->cur_tx].buf_type = STMMAC_TXBUF_T_SKB;
        }
 
        tx_q->tx_skbuff_dma[tx_q->cur_tx].last_segment = true;
 
        /* Only the last descriptor gets to point to the skb. */
        tx_q->tx_skbuff[tx_q->cur_tx] = skb;
+       tx_q->tx_skbuff_dma[tx_q->cur_tx].buf_type = STMMAC_TXBUF_T_SKB;
 
        /* Manage tx mitigation */
        tx_packets = (tx_q->cur_tx + 1) - first_tx;
@@ -3241,11 +4069,12 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 
        if ((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && priv->hwts_tx_en)
                set_ic = true;
-       else if (!priv->tx_coal_frames)
+       else if (!priv->tx_coal_frames[queue])
                set_ic = false;
-       else if (tx_packets > priv->tx_coal_frames)
+       else if (tx_packets > priv->tx_coal_frames[queue])
                set_ic = true;
-       else if ((tx_q->tx_count_frames % priv->tx_coal_frames) < tx_packets)
+       else if ((tx_q->tx_count_frames %
+                 priv->tx_coal_frames[queue]) < tx_packets)
                set_ic = true;
        else
                set_ic = false;
@@ -3308,12 +4137,6 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
                stmmac_set_tx_owner(priv, mss_desc);
        }
 
-       /* The own bit must be the latest setting done when prepare the
-        * descriptor and then barrier is needed to make sure that
-        * all is coherent before granting the DMA engine.
-        */
-       wmb();
-
        if (netif_msg_pktdata(priv)) {
                pr_info("%s: curr=%d dirty=%d f=%d, e=%d, f_p=%p, nfrags %d\n",
                        __func__, tx_q->cur_tx, tx_q->dirty_tx, first_entry,
@@ -3324,13 +4147,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 
        netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);
 
-       if (tx_q->tbs & STMMAC_TBS_AVAIL)
-               desc_size = sizeof(struct dma_edesc);
-       else
-               desc_size = sizeof(struct dma_desc);
-
-       tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx * desc_size);
-       stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue);
+       stmmac_flush_tx_descriptors(priv, queue);
        stmmac_tx_timer_arm(priv, queue);
 
        return NETDEV_TX_OK;
@@ -3360,10 +4177,10 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
        int nfrags = skb_shinfo(skb)->nr_frags;
        int gso = skb_shinfo(skb)->gso_type;
        struct dma_edesc *tbs_desc = NULL;
-       int entry, desc_size, first_tx;
        struct dma_desc *desc, *first;
        struct stmmac_tx_queue *tx_q;
        bool has_vlan, set_ic;
+       int entry, first_tx;
        dma_addr_t des;
 
        tx_q = &priv->tx_queue[queue];
@@ -3451,6 +4268,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
                tx_q->tx_skbuff_dma[entry].map_as_page = true;
                tx_q->tx_skbuff_dma[entry].len = len;
                tx_q->tx_skbuff_dma[entry].last_segment = last_segment;
+               tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_SKB;
 
                /* Prepare the descriptor and set the own bit too */
                stmmac_prepare_tx_desc(priv, desc, 0, len, csum_insertion,
@@ -3459,6 +4277,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
        /* Only the last descriptor gets to point to the skb. */
        tx_q->tx_skbuff[entry] = skb;
+       tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_SKB;
 
        /* According to the coalesce parameter the IC bit for the latest
         * segment is reset and the timer re-started to clean the tx status.
@@ -3470,11 +4289,12 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
        if ((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && priv->hwts_tx_en)
                set_ic = true;
-       else if (!priv->tx_coal_frames)
+       else if (!priv->tx_coal_frames[queue])
                set_ic = false;
-       else if (tx_packets > priv->tx_coal_frames)
+       else if (tx_packets > priv->tx_coal_frames[queue])
                set_ic = true;
-       else if ((tx_q->tx_count_frames % priv->tx_coal_frames) < tx_packets)
+       else if ((tx_q->tx_count_frames %
+                 priv->tx_coal_frames[queue]) < tx_packets)
                set_ic = true;
        else
                set_ic = false;
@@ -3536,6 +4356,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
                        goto dma_map_err;
 
                tx_q->tx_skbuff_dma[first_entry].buf = des;
+               tx_q->tx_skbuff_dma[first_entry].buf_type = STMMAC_TXBUF_T_SKB;
+               tx_q->tx_skbuff_dma[first_entry].map_as_page = false;
 
                stmmac_set_desc_addr(priv, first, des);
 
@@ -3564,25 +4386,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
        stmmac_set_tx_owner(priv, first);
 
-       /* The own bit must be the latest setting done when prepare the
-        * descriptor and then barrier is needed to make sure that
-        * all is coherent before granting the DMA engine.
-        */
-       wmb();
-
        netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);
 
        stmmac_enable_dma_transmission(priv, priv->ioaddr);
 
-       if (likely(priv->extend_desc))
-               desc_size = sizeof(struct dma_extended_desc);
-       else if (tx_q->tbs & STMMAC_TBS_AVAIL)
-               desc_size = sizeof(struct dma_edesc);
-       else
-               desc_size = sizeof(struct dma_desc);
-
-       tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx * desc_size);
-       stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue);
+       stmmac_flush_tx_descriptors(priv, queue);
        stmmac_tx_timer_arm(priv, queue);
 
        return NETDEV_TX_OK;
@@ -3625,11 +4433,9 @@ static void stmmac_rx_vlan(struct net_device *dev, struct sk_buff *skb)
 static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue)
 {
        struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
-       int len, dirty = stmmac_rx_dirty(priv, queue);
+       int dirty = stmmac_rx_dirty(priv, queue);
        unsigned int entry = rx_q->dirty_rx;
 
-       len = DIV_ROUND_UP(priv->dma_buf_sz, PAGE_SIZE) * PAGE_SIZE;
-
        while (dirty-- > 0) {
                struct stmmac_rx_buffer *buf = &rx_q->buf_pool[entry];
                struct dma_desc *p;
@@ -3652,18 +4458,9 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue)
                                break;
 
                        buf->sec_addr = page_pool_get_dma_addr(buf->sec_page);
-
-                       dma_sync_single_for_device(priv->device, buf->sec_addr,
-                                                  len, DMA_FROM_DEVICE);
                }
 
-               buf->addr = page_pool_get_dma_addr(buf->page);
-
-               /* Sync whole allocation to device. This will invalidate old
-                * data.
-                */
-               dma_sync_single_for_device(priv->device, buf->addr, len,
-                                          DMA_FROM_DEVICE);
+               buf->addr = page_pool_get_dma_addr(buf->page) + buf->page_offset;
 
                stmmac_set_desc_addr(priv, p, buf->addr);
                if (priv->sph)
@@ -3673,11 +4470,11 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue)
                stmmac_refill_desc3(priv, rx_q, p);
 
                rx_q->rx_count_frames++;
-               rx_q->rx_count_frames += priv->rx_coal_frames;
-               if (rx_q->rx_count_frames > priv->rx_coal_frames)
+               rx_q->rx_count_frames += priv->rx_coal_frames[queue];
+               if (rx_q->rx_count_frames > priv->rx_coal_frames[queue])
                        rx_q->rx_count_frames = 0;
 
-               use_rx_wd = !priv->rx_coal_frames;
+               use_rx_wd = !priv->rx_coal_frames[queue];
                use_rx_wd |= rx_q->rx_count_frames > 0;
                if (!priv->use_riwt)
                        use_rx_wd = false;
@@ -3742,62 +4539,550 @@ static unsigned int stmmac_rx_buf2_len(struct stmmac_priv *priv,
        return plen - len;
 }
 
-/**
- * stmmac_rx - manage the receive process
- * @priv: driver private structure
- * @limit: napi bugget
- * @queue: RX queue index.
- * Description :  this the function called by the napi poll method.
- * It gets all the frames inside the ring.
- */
-static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
+static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue,
+                               struct xdp_frame *xdpf, bool dma_map)
 {
-       struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
-       struct stmmac_channel *ch = &priv->channel[queue];
-       unsigned int count = 0, error = 0, len = 0;
-       int status = 0, coe = priv->hw->rx_csum;
-       unsigned int next_entry = rx_q->cur_rx;
-       unsigned int desc_size;
-       struct sk_buff *skb = NULL;
+       struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+       unsigned int entry = tx_q->cur_tx;
+       struct dma_desc *tx_desc;
+       dma_addr_t dma_addr;
+       bool set_ic;
 
-       if (netif_msg_rx_status(priv)) {
-               void *rx_head;
+       if (stmmac_tx_avail(priv, queue) < STMMAC_TX_THRESH(priv))
+               return STMMAC_XDP_CONSUMED;
 
-               netdev_dbg(priv->dev, "%s: descriptor ring:\n", __func__);
-               if (priv->extend_desc) {
-                       rx_head = (void *)rx_q->dma_erx;
-                       desc_size = sizeof(struct dma_extended_desc);
-               } else {
-                       rx_head = (void *)rx_q->dma_rx;
-                       desc_size = sizeof(struct dma_desc);
-               }
+       if (likely(priv->extend_desc))
+               tx_desc = (struct dma_desc *)(tx_q->dma_etx + entry);
+       else if (tx_q->tbs & STMMAC_TBS_AVAIL)
+               tx_desc = &tx_q->dma_entx[entry].basic;
+       else
+               tx_desc = tx_q->dma_tx + entry;
 
-               stmmac_display_ring(priv, rx_head, priv->dma_rx_size, true,
-                                   rx_q->dma_rx_phy, desc_size);
+       if (dma_map) {
+               dma_addr = dma_map_single(priv->device, xdpf->data,
+                                         xdpf->len, DMA_TO_DEVICE);
+               if (dma_mapping_error(priv->device, dma_addr))
+                       return STMMAC_XDP_CONSUMED;
+
+               tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_XDP_NDO;
+       } else {
+               struct page *page = virt_to_page(xdpf->data);
+
+               dma_addr = page_pool_get_dma_addr(page) + sizeof(*xdpf) +
+                          xdpf->headroom;
+               dma_sync_single_for_device(priv->device, dma_addr,
+                                          xdpf->len, DMA_BIDIRECTIONAL);
+
+               tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_XDP_TX;
        }
-       while (count < limit) {
-               unsigned int buf1_len = 0, buf2_len = 0;
-               enum pkt_hash_types hash_type;
-               struct stmmac_rx_buffer *buf;
-               struct dma_desc *np, *p;
-               int entry;
-               u32 hash;
 
-               if (!count && rx_q->state_saved) {
-                       skb = rx_q->state.skb;
-                       error = rx_q->state.error;
-                       len = rx_q->state.len;
-               } else {
-                       rx_q->state_saved = false;
-                       skb = NULL;
-                       error = 0;
-                       len = 0;
-               }
+       tx_q->tx_skbuff_dma[entry].buf = dma_addr;
+       tx_q->tx_skbuff_dma[entry].map_as_page = false;
+       tx_q->tx_skbuff_dma[entry].len = xdpf->len;
+       tx_q->tx_skbuff_dma[entry].last_segment = true;
+       tx_q->tx_skbuff_dma[entry].is_jumbo = false;
 
-               if (count >= limit)
-                       break;
+       tx_q->xdpf[entry] = xdpf;
 
-read_again:
+       stmmac_set_desc_addr(priv, tx_desc, dma_addr);
+
+       stmmac_prepare_tx_desc(priv, tx_desc, 1, xdpf->len,
+                              true, priv->mode, true, true,
+                              xdpf->len);
+
+       tx_q->tx_count_frames++;
+
+       if (tx_q->tx_count_frames % priv->tx_coal_frames[queue] == 0)
+               set_ic = true;
+       else
+               set_ic = false;
+
+       if (set_ic) {
+               tx_q->tx_count_frames = 0;
+               stmmac_set_tx_ic(priv, tx_desc);
+               priv->xstats.tx_set_ic_bit++;
+       }
+
+       stmmac_enable_dma_transmission(priv, priv->ioaddr);
+
+       entry = STMMAC_GET_ENTRY(entry, priv->dma_tx_size);
+       tx_q->cur_tx = entry;
+
+       return STMMAC_XDP_TX;
+}
+
+static int stmmac_xdp_get_tx_queue(struct stmmac_priv *priv,
+                                  int cpu)
+{
+       int index = cpu;
+
+       if (unlikely(index < 0))
+               index = 0;
+
+       while (index >= priv->plat->tx_queues_to_use)
+               index -= priv->plat->tx_queues_to_use;
+
+       return index;
+}
+
+static int stmmac_xdp_xmit_back(struct stmmac_priv *priv,
+                               struct xdp_buff *xdp)
+{
+       struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
+       int cpu = smp_processor_id();
+       struct netdev_queue *nq;
+       int queue;
+       int res;
+
+       if (unlikely(!xdpf))
+               return STMMAC_XDP_CONSUMED;
+
+       queue = stmmac_xdp_get_tx_queue(priv, cpu);
+       nq = netdev_get_tx_queue(priv->dev, queue);
+
+       __netif_tx_lock(nq, cpu);
+       /* Avoids TX time-out as we are sharing with slow path */
+       nq->trans_start = jiffies;
+
+       res = stmmac_xdp_xmit_xdpf(priv, queue, xdpf, false);
+       if (res == STMMAC_XDP_TX)
+               stmmac_flush_tx_descriptors(priv, queue);
+
+       __netif_tx_unlock(nq);
+
+       return res;
+}
+
+/* This function assumes rcu_read_lock() is held by the caller. */
+static int __stmmac_xdp_run_prog(struct stmmac_priv *priv,
+                                struct bpf_prog *prog,
+                                struct xdp_buff *xdp)
+{
+       u32 act;
+       int res;
+
+       act = bpf_prog_run_xdp(prog, xdp);
+       switch (act) {
+       case XDP_PASS:
+               res = STMMAC_XDP_PASS;
+               break;
+       case XDP_TX:
+               res = stmmac_xdp_xmit_back(priv, xdp);
+               break;
+       case XDP_REDIRECT:
+               if (xdp_do_redirect(priv->dev, xdp, prog) < 0)
+                       res = STMMAC_XDP_CONSUMED;
+               else
+                       res = STMMAC_XDP_REDIRECT;
+               break;
+       default:
+               bpf_warn_invalid_xdp_action(act);
+               fallthrough;
+       case XDP_ABORTED:
+               trace_xdp_exception(priv->dev, prog, act);
+               fallthrough;
+       case XDP_DROP:
+               res = STMMAC_XDP_CONSUMED;
+               break;
+       }
+
+       return res;
+}
+
+static struct sk_buff *stmmac_xdp_run_prog(struct stmmac_priv *priv,
+                                          struct xdp_buff *xdp)
+{
+       struct bpf_prog *prog;
+       int res;
+
+       rcu_read_lock();
+
+       prog = READ_ONCE(priv->xdp_prog);
+       if (!prog) {
+               res = STMMAC_XDP_PASS;
+               goto unlock;
+       }
+
+       res = __stmmac_xdp_run_prog(priv, prog, xdp);
+unlock:
+       rcu_read_unlock();
+       return ERR_PTR(-res);
+}
+
+static void stmmac_finalize_xdp_rx(struct stmmac_priv *priv,
+                                  int xdp_status)
+{
+       int cpu = smp_processor_id();
+       int queue;
+
+       queue = stmmac_xdp_get_tx_queue(priv, cpu);
+
+       if (xdp_status & STMMAC_XDP_TX)
+               stmmac_tx_timer_arm(priv, queue);
+
+       if (xdp_status & STMMAC_XDP_REDIRECT)
+               xdp_do_flush();
+}
+
+static struct sk_buff *stmmac_construct_skb_zc(struct stmmac_channel *ch,
+                                              struct xdp_buff *xdp)
+{
+       unsigned int metasize = xdp->data - xdp->data_meta;
+       unsigned int datasize = xdp->data_end - xdp->data;
+       struct sk_buff *skb;
+
+       skb = __napi_alloc_skb(&ch->rxtx_napi,
+                              xdp->data_end - xdp->data_hard_start,
+                              GFP_ATOMIC | __GFP_NOWARN);
+       if (unlikely(!skb))
+               return NULL;
+
+       skb_reserve(skb, xdp->data - xdp->data_hard_start);
+       memcpy(__skb_put(skb, datasize), xdp->data, datasize);
+       if (metasize)
+               skb_metadata_set(skb, metasize);
+
+       return skb;
+}
+
+static void stmmac_dispatch_skb_zc(struct stmmac_priv *priv, u32 queue,
+                                  struct dma_desc *p, struct dma_desc *np,
+                                  struct xdp_buff *xdp)
+{
+       struct stmmac_channel *ch = &priv->channel[queue];
+       unsigned int len = xdp->data_end - xdp->data;
+       enum pkt_hash_types hash_type;
+       int coe = priv->hw->rx_csum;
+       struct sk_buff *skb;
+       u32 hash;
+
+       skb = stmmac_construct_skb_zc(ch, xdp);
+       if (!skb) {
+               priv->dev->stats.rx_dropped++;
+               return;
+       }
+
+       stmmac_get_rx_hwtstamp(priv, p, np, skb);
+       stmmac_rx_vlan(priv->dev, skb);
+       skb->protocol = eth_type_trans(skb, priv->dev);
+
+       if (unlikely(!coe))
+               skb_checksum_none_assert(skb);
+       else
+               skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+       if (!stmmac_get_rx_hash(priv, p, &hash, &hash_type))
+               skb_set_hash(skb, hash, hash_type);
+
+       skb_record_rx_queue(skb, queue);
+       napi_gro_receive(&ch->rxtx_napi, skb);
+
+       priv->dev->stats.rx_packets++;
+       priv->dev->stats.rx_bytes += len;
+}
+
+static bool stmmac_rx_refill_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
+{
+       struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+       unsigned int entry = rx_q->dirty_rx;
+       struct dma_desc *rx_desc = NULL;
+       bool ret = true;
+
+       budget = min(budget, stmmac_rx_dirty(priv, queue));
+
+       while (budget-- > 0 && entry != rx_q->cur_rx) {
+               struct stmmac_rx_buffer *buf = &rx_q->buf_pool[entry];
+               dma_addr_t dma_addr;
+               bool use_rx_wd;
+
+               if (!buf->xdp) {
+                       buf->xdp = xsk_buff_alloc(rx_q->xsk_pool);
+                       if (!buf->xdp) {
+                               ret = false;
+                               break;
+                       }
+               }
+
+               if (priv->extend_desc)
+                       rx_desc = (struct dma_desc *)(rx_q->dma_erx + entry);
+               else
+                       rx_desc = rx_q->dma_rx + entry;
+
+               dma_addr = xsk_buff_xdp_get_dma(buf->xdp);
+               stmmac_set_desc_addr(priv, rx_desc, dma_addr);
+               stmmac_set_desc_sec_addr(priv, rx_desc, 0, false);
+               stmmac_refill_desc3(priv, rx_q, rx_desc);
+
+               rx_q->rx_count_frames++;
+               rx_q->rx_count_frames += priv->rx_coal_frames[queue];
+               if (rx_q->rx_count_frames > priv->rx_coal_frames[queue])
+                       rx_q->rx_count_frames = 0;
+
+               use_rx_wd = !priv->rx_coal_frames[queue];
+               use_rx_wd |= rx_q->rx_count_frames > 0;
+               if (!priv->use_riwt)
+                       use_rx_wd = false;
+
+               dma_wmb();
+               stmmac_set_rx_owner(priv, rx_desc, use_rx_wd);
+
+               entry = STMMAC_GET_ENTRY(entry, priv->dma_rx_size);
+       }
+
+       if (rx_desc) {
+               rx_q->dirty_rx = entry;
+               rx_q->rx_tail_addr = rx_q->dma_rx_phy +
+                                    (rx_q->dirty_rx * sizeof(struct dma_desc));
+               stmmac_set_rx_tail_ptr(priv, priv->ioaddr, rx_q->rx_tail_addr, queue);
+       }
+
+       return ret;
+}
+
+static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue)
+{
+       struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+       unsigned int count = 0, error = 0, len = 0;
+       int dirty = stmmac_rx_dirty(priv, queue);
+       unsigned int next_entry = rx_q->cur_rx;
+       unsigned int desc_size;
+       struct bpf_prog *prog;
+       bool failure = false;
+       int xdp_status = 0;
+       int status = 0;
+
+       if (netif_msg_rx_status(priv)) {
+               void *rx_head;
+
+               netdev_dbg(priv->dev, "%s: descriptor ring:\n", __func__);
+               if (priv->extend_desc) {
+                       rx_head = (void *)rx_q->dma_erx;
+                       desc_size = sizeof(struct dma_extended_desc);
+               } else {
+                       rx_head = (void *)rx_q->dma_rx;
+                       desc_size = sizeof(struct dma_desc);
+               }
+
+               stmmac_display_ring(priv, rx_head, priv->dma_rx_size, true,
+                                   rx_q->dma_rx_phy, desc_size);
+       }
+       while (count < limit) {
+               struct stmmac_rx_buffer *buf;
+               unsigned int buf1_len = 0;
+               struct dma_desc *np, *p;
+               int entry;
+               int res;
+
+               if (!count && rx_q->state_saved) {
+                       error = rx_q->state.error;
+                       len = rx_q->state.len;
+               } else {
+                       rx_q->state_saved = false;
+                       error = 0;
+                       len = 0;
+               }
+
+               if (count >= limit)
+                       break;
+
+read_again:
+               buf1_len = 0;
+               entry = next_entry;
+               buf = &rx_q->buf_pool[entry];
+
+               if (dirty >= STMMAC_RX_FILL_BATCH) {
+                       failure = failure ||
+                                 !stmmac_rx_refill_zc(priv, queue, dirty);
+                       dirty = 0;
+               }
+
+               if (priv->extend_desc)
+                       p = (struct dma_desc *)(rx_q->dma_erx + entry);
+               else
+                       p = rx_q->dma_rx + entry;
+
+               /* read the status of the incoming frame */
+               status = stmmac_rx_status(priv, &priv->dev->stats,
+                                         &priv->xstats, p);
+               /* check if managed by the DMA otherwise go ahead */
+               if (unlikely(status & dma_own))
+                       break;
+
+               /* Prefetch the next RX descriptor */
+               rx_q->cur_rx = STMMAC_GET_ENTRY(rx_q->cur_rx,
+                                               priv->dma_rx_size);
+               next_entry = rx_q->cur_rx;
+
+               if (priv->extend_desc)
+                       np = (struct dma_desc *)(rx_q->dma_erx + next_entry);
+               else
+                       np = rx_q->dma_rx + next_entry;
+
+               prefetch(np);
+
+               if (priv->extend_desc)
+                       stmmac_rx_extended_status(priv, &priv->dev->stats,
+                                                 &priv->xstats,
+                                                 rx_q->dma_erx + entry);
+               if (unlikely(status == discard_frame)) {
+                       xsk_buff_free(buf->xdp);
+                       buf->xdp = NULL;
+                       dirty++;
+                       error = 1;
+                       if (!priv->hwts_rx_en)
+                               priv->dev->stats.rx_errors++;
+               }
+
+               if (unlikely(error && (status & rx_not_ls)))
+                       goto read_again;
+               if (unlikely(error)) {
+                       count++;
+                       continue;
+               }
+
+               /* Ensure a valid XSK buffer before proceed */
+               if (!buf->xdp)
+                       break;
+
+               /* XSK pool expects RX frame 1:1 mapped to XSK buffer */
+               if (likely(status & rx_not_ls)) {
+                       xsk_buff_free(buf->xdp);
+                       buf->xdp = NULL;
+                       dirty++;
+                       count++;
+                       goto read_again;
+               }
+
+               /* XDP ZC Frame only support primary buffers for now */
+               buf1_len = stmmac_rx_buf1_len(priv, p, status, len);
+               len += buf1_len;
+
+               /* ACS is set; GMAC core strips PAD/FCS for IEEE 802.3
+                * Type frames (LLC/LLC-SNAP)
+                *
+                * llc_snap is never checked in GMAC >= 4, so this ACS
+                * feature is always disabled and packets need to be
+                * stripped manually.
+                */
+               if (likely(!(status & rx_not_ls)) &&
+                   (likely(priv->synopsys_id >= DWMAC_CORE_4_00) ||
+                    unlikely(status != llc_snap))) {
+                       buf1_len -= ETH_FCS_LEN;
+                       len -= ETH_FCS_LEN;
+               }
+
+               /* RX buffer is good and fit into a XSK pool buffer */
+               buf->xdp->data_end = buf->xdp->data + buf1_len;
+               xsk_buff_dma_sync_for_cpu(buf->xdp, rx_q->xsk_pool);
+
+               rcu_read_lock();
+               prog = READ_ONCE(priv->xdp_prog);
+               res = __stmmac_xdp_run_prog(priv, prog, buf->xdp);
+               rcu_read_unlock();
+
+               switch (res) {
+               case STMMAC_XDP_PASS:
+                       stmmac_dispatch_skb_zc(priv, queue, p, np, buf->xdp);
+                       xsk_buff_free(buf->xdp);
+                       break;
+               case STMMAC_XDP_CONSUMED:
+                       xsk_buff_free(buf->xdp);
+                       priv->dev->stats.rx_dropped++;
+                       break;
+               case STMMAC_XDP_TX:
+               case STMMAC_XDP_REDIRECT:
+                       xdp_status |= res;
+                       break;
+               }
+
+               buf->xdp = NULL;
+               dirty++;
+               count++;
+       }
+
+       if (status & rx_not_ls) {
+               rx_q->state_saved = true;
+               rx_q->state.error = error;
+               rx_q->state.len = len;
+       }
+
+       stmmac_finalize_xdp_rx(priv, xdp_status);
+
+       if (xsk_uses_need_wakeup(rx_q->xsk_pool)) {
+               if (failure || stmmac_rx_dirty(priv, queue) > 0)
+                       xsk_set_rx_need_wakeup(rx_q->xsk_pool);
+               else
+                       xsk_clear_rx_need_wakeup(rx_q->xsk_pool);
+
+               return (int)count;
+       }
+
+       return failure ? limit : (int)count;
+}
+
+/**
+ * stmmac_rx - manage the receive process
+ * @priv: driver private structure
+ * @limit: napi bugget
+ * @queue: RX queue index.
+ * Description :  this the function called by the napi poll method.
+ * It gets all the frames inside the ring.
+ */
+static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
+{
+       struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+       struct stmmac_channel *ch = &priv->channel[queue];
+       unsigned int count = 0, error = 0, len = 0;
+       int status = 0, coe = priv->hw->rx_csum;
+       unsigned int next_entry = rx_q->cur_rx;
+       enum dma_data_direction dma_dir;
+       unsigned int desc_size;
+       struct sk_buff *skb = NULL;
+       struct xdp_buff xdp;
+       int xdp_status = 0;
+       int buf_sz;
+
+       dma_dir = page_pool_get_dma_dir(rx_q->page_pool);
+       buf_sz = DIV_ROUND_UP(priv->dma_buf_sz, PAGE_SIZE) * PAGE_SIZE;
+
+       if (netif_msg_rx_status(priv)) {
+               void *rx_head;
+
+               netdev_dbg(priv->dev, "%s: descriptor ring:\n", __func__);
+               if (priv->extend_desc) {
+                       rx_head = (void *)rx_q->dma_erx;
+                       desc_size = sizeof(struct dma_extended_desc);
+               } else {
+                       rx_head = (void *)rx_q->dma_rx;
+                       desc_size = sizeof(struct dma_desc);
+               }
+
+               stmmac_display_ring(priv, rx_head, priv->dma_rx_size, true,
+                                   rx_q->dma_rx_phy, desc_size);
+       }
+       while (count < limit) {
+               unsigned int buf1_len = 0, buf2_len = 0;
+               enum pkt_hash_types hash_type;
+               struct stmmac_rx_buffer *buf;
+               struct dma_desc *np, *p;
+               int entry;
+               u32 hash;
+
+               if (!count && rx_q->state_saved) {
+                       skb = rx_q->state.skb;
+                       error = rx_q->state.error;
+                       len = rx_q->state.len;
+               } else {
+                       rx_q->state_saved = false;
+                       skb = NULL;
+                       error = 0;
+                       len = 0;
+               }
+
+               if (count >= limit)
+                       break;
+
+read_again:
                buf1_len = 0;
                buf2_len = 0;
                entry = next_entry;
@@ -3876,6 +5161,64 @@ read_again:
                }
 
                if (!skb) {
+                       unsigned int pre_len, sync_len;
+
+                       dma_sync_single_for_cpu(priv->device, buf->addr,
+                                               buf1_len, dma_dir);
+
+                       xdp.data = page_address(buf->page) + buf->page_offset;
+                       xdp.data_end = xdp.data + buf1_len;
+                       xdp.data_hard_start = page_address(buf->page);
+                       xdp_set_data_meta_invalid(&xdp);
+                       xdp.frame_sz = buf_sz;
+                       xdp.rxq = &rx_q->xdp_rxq;
+
+                       pre_len = xdp.data_end - xdp.data_hard_start -
+                                 buf->page_offset;
+                       skb = stmmac_xdp_run_prog(priv, &xdp);
+                       /* Due xdp_adjust_tail: DMA sync for_device
+                        * cover max len CPU touch
+                        */
+                       sync_len = xdp.data_end - xdp.data_hard_start -
+                                  buf->page_offset;
+                       sync_len = max(sync_len, pre_len);
+
+                       /* For Not XDP_PASS verdict */
+                       if (IS_ERR(skb)) {
+                               unsigned int xdp_res = -PTR_ERR(skb);
+
+                               if (xdp_res & STMMAC_XDP_CONSUMED) {
+                                       page_pool_put_page(rx_q->page_pool,
+                                                          virt_to_head_page(xdp.data),
+                                                          sync_len, true);
+                                       buf->page = NULL;
+                                       priv->dev->stats.rx_dropped++;
+
+                                       /* Clear skb as it was set as
+                                        * status by XDP program.
+                                        */
+                                       skb = NULL;
+
+                                       if (unlikely((status & rx_not_ls)))
+                                               goto read_again;
+
+                                       count++;
+                                       continue;
+                               } else if (xdp_res & (STMMAC_XDP_TX |
+                                                     STMMAC_XDP_REDIRECT)) {
+                                       xdp_status |= xdp_res;
+                                       buf->page = NULL;
+                                       skb = NULL;
+                                       count++;
+                                       continue;
+                               }
+                       }
+               }
+
+               if (!skb) {
+                       /* XDP program may expand or reduce tail */
+                       buf1_len = xdp.data_end - xdp.data;
+
                        skb = napi_alloc_skb(&ch->rx_napi, buf1_len);
                        if (!skb) {
                                priv->dev->stats.rx_dropped++;
@@ -3883,10 +5226,8 @@ read_again:
                                goto drain_data;
                        }
 
-                       dma_sync_single_for_cpu(priv->device, buf->addr,
-                                               buf1_len, DMA_FROM_DEVICE);
-                       skb_copy_to_linear_data(skb, page_address(buf->page),
-                                               buf1_len);
+                       /* XDP program may adjust header */
+                       skb_copy_to_linear_data(skb, xdp.data, buf1_len);
                        skb_put(skb, buf1_len);
 
                        /* Data payload copied into SKB, page ready for recycle */
@@ -3894,9 +5235,9 @@ read_again:
                        buf->page = NULL;
                } else if (buf1_len) {
                        dma_sync_single_for_cpu(priv->device, buf->addr,
-                                               buf1_len, DMA_FROM_DEVICE);
+                                               buf1_len, dma_dir);
                        skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
-                                       buf->page, 0, buf1_len,
+                                       buf->page, buf->page_offset, buf1_len,
                                        priv->dma_buf_sz);
 
                        /* Data payload appended into SKB */
@@ -3906,7 +5247,7 @@ read_again:
 
                if (buf2_len) {
                        dma_sync_single_for_cpu(priv->device, buf->sec_addr,
-                                               buf2_len, DMA_FROM_DEVICE);
+                                               buf2_len, dma_dir);
                        skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
                                        buf->sec_page, 0, buf2_len,
                                        priv->dma_buf_sz);
@@ -3952,6 +5293,8 @@ drain_data:
                rx_q->state.len = len;
        }
 
+       stmmac_finalize_xdp_rx(priv, xdp_status);
+
        stmmac_rx_refill(priv, queue);
 
        priv->xstats.rx_pkt_n += count;
@@ -3991,7 +5334,7 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget)
 
        priv->xstats.napi_poll++;
 
-       work_done = stmmac_tx_clean(priv, priv->dma_tx_size, chan);
+       work_done = stmmac_tx_clean(priv, budget, chan);
        work_done = min(work_done, budget);
 
        if (work_done < budget && napi_complete_done(napi, work_done)) {
@@ -4005,6 +5348,42 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget)
        return work_done;
 }
 
+static int stmmac_napi_poll_rxtx(struct napi_struct *napi, int budget)
+{
+       struct stmmac_channel *ch =
+               container_of(napi, struct stmmac_channel, rxtx_napi);
+       struct stmmac_priv *priv = ch->priv_data;
+       int rx_done, tx_done;
+       u32 chan = ch->index;
+
+       priv->xstats.napi_poll++;
+
+       tx_done = stmmac_tx_clean(priv, budget, chan);
+       tx_done = min(tx_done, budget);
+
+       rx_done = stmmac_rx_zc(priv, budget, chan);
+
+       /* If either TX or RX work is not complete, return budget
+        * and keep pooling
+        */
+       if (tx_done >= budget || rx_done >= budget)
+               return budget;
+
+       /* all work done, exit the polling mode */
+       if (napi_complete_done(napi, rx_done)) {
+               unsigned long flags;
+
+               spin_lock_irqsave(&ch->lock, flags);
+               /* Both RX and TX work done are compelte,
+                * so enable both RX & TX IRQs.
+                */
+               stmmac_enable_dma_irq(priv, priv->ioaddr, chan, 1, 1);
+               spin_unlock_irqrestore(&ch->lock, flags);
+       }
+
+       return min(rx_done, budget - 1);
+}
+
 /**
  *  stmmac_tx_timeout
  *  @dev : Pointer to net device structure
@@ -4064,6 +5443,11 @@ static int stmmac_change_mtu(struct net_device *dev, int new_mtu)
                return -EBUSY;
        }
 
+       if (stmmac_xdp_is_enabled(priv) && new_mtu > ETH_DATA_LEN) {
+               netdev_dbg(priv->dev, "Jumbo frames not supported for XDP\n");
+               return -EINVAL;
+       }
+
        new_mtu = STMMAC_ALIGN(new_mtu);
 
        /* If condition true, FIFO is too small or MTU too large */
@@ -4125,27 +5509,57 @@ static int stmmac_set_features(struct net_device *netdev,
        stmmac_rx_ipc(priv, priv->hw);
 
        sph_en = (priv->hw->rx_csum > 0) && priv->sph;
+
        for (chan = 0; chan < priv->plat->rx_queues_to_use; chan++)
                stmmac_enable_sph(priv, priv->ioaddr, sph_en, chan);
 
        return 0;
 }
 
-/**
- *  stmmac_interrupt - main ISR
- *  @irq: interrupt number.
- *  @dev_id: to pass the net device pointer (must be valid).
- *  Description: this is the main driver interrupt service routine.
- *  It can call:
- *  o DMA service routine (to manage incoming frame reception and transmission
- *    status)
- *  o Core interrupts to manage: remote wake-up, management counter, LPI
- *    interrupts.
- */
-static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
+static void stmmac_fpe_event_status(struct stmmac_priv *priv, int status)
+{
+       struct stmmac_fpe_cfg *fpe_cfg = priv->plat->fpe_cfg;
+       enum stmmac_fpe_state *lo_state = &fpe_cfg->lo_fpe_state;
+       enum stmmac_fpe_state *lp_state = &fpe_cfg->lp_fpe_state;
+       bool *hs_enable = &fpe_cfg->hs_enable;
+
+       if (status == FPE_EVENT_UNKNOWN || !*hs_enable)
+               return;
+
+       /* If LP has sent verify mPacket, LP is FPE capable */
+       if ((status & FPE_EVENT_RVER) == FPE_EVENT_RVER) {
+               if (*lp_state < FPE_STATE_CAPABLE)
+                       *lp_state = FPE_STATE_CAPABLE;
+
+               /* If user has requested FPE enable, quickly response */
+               if (*hs_enable)
+                       stmmac_fpe_send_mpacket(priv, priv->ioaddr,
+                                               MPACKET_RESPONSE);
+       }
+
+       /* If Local has sent verify mPacket, Local is FPE capable */
+       if ((status & FPE_EVENT_TVER) == FPE_EVENT_TVER) {
+               if (*lo_state < FPE_STATE_CAPABLE)
+                       *lo_state = FPE_STATE_CAPABLE;
+       }
+
+       /* If LP has sent response mPacket, LP is entering FPE ON */
+       if ((status & FPE_EVENT_RRSP) == FPE_EVENT_RRSP)
+               *lp_state = FPE_STATE_ENTERING_ON;
+
+       /* If Local has sent response mPacket, Local is entering FPE ON */
+       if ((status & FPE_EVENT_TRSP) == FPE_EVENT_TRSP)
+               *lo_state = FPE_STATE_ENTERING_ON;
+
+       if (!test_bit(__FPE_REMOVING, &priv->fpe_task_state) &&
+           !test_and_set_bit(__FPE_TASK_SCHED, &priv->fpe_task_state) &&
+           priv->fpe_wq) {
+               queue_work(priv->fpe_wq, &priv->fpe_task);
+       }
+}
+
+static void stmmac_common_interrupt(struct stmmac_priv *priv)
 {
-       struct net_device *dev = (struct net_device *)dev_id;
-       struct stmmac_priv *priv = netdev_priv(dev);
        u32 rx_cnt = priv->plat->rx_queues_to_use;
        u32 tx_cnt = priv->plat->tx_queues_to_use;
        u32 queues_count;
@@ -4158,12 +5572,16 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
        if (priv->irq_wake)
                pm_wakeup_event(priv->device, 0);
 
-       /* Check if adapter is up */
-       if (test_bit(STMMAC_DOWN, &priv->state))
-               return IRQ_HANDLED;
-       /* Check if a fatal error happened */
-       if (stmmac_safety_feat_interrupt(priv))
-               return IRQ_HANDLED;
+       if (priv->dma_cap.estsel)
+               stmmac_est_irq_status(priv, priv->ioaddr, priv->dev,
+                                     &priv->xstats, tx_cnt);
+
+       if (priv->dma_cap.fpesel) {
+               int status = stmmac_fpe_irq_status(priv, priv->ioaddr,
+                                                  priv->dev);
+
+               stmmac_fpe_event_status(priv, status);
+       }
 
        /* To handle GMAC own interrupts */
        if ((priv->plat->has_gmac) || xmac) {
@@ -4195,11 +5613,41 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
                /* PCS link status */
                if (priv->hw->pcs) {
                        if (priv->xstats.pcs_link)
-                               netif_carrier_on(dev);
+                               netif_carrier_on(priv->dev);
                        else
-                               netif_carrier_off(dev);
+                               netif_carrier_off(priv->dev);
                }
+
+               stmmac_timestamp_interrupt(priv, priv);
        }
+}
+
+/**
+ *  stmmac_interrupt - main ISR
+ *  @irq: interrupt number.
+ *  @dev_id: to pass the net device pointer.
+ *  Description: this is the main driver interrupt service routine.
+ *  It can call:
+ *  o DMA service routine (to manage incoming frame reception and transmission
+ *    status)
+ *  o Core interrupts to manage: remote wake-up, management counter, LPI
+ *    interrupts.
+ */
+static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
+{
+       struct net_device *dev = (struct net_device *)dev_id;
+       struct stmmac_priv *priv = netdev_priv(dev);
+
+       /* Check if adapter is up */
+       if (test_bit(STMMAC_DOWN, &priv->state))
+               return IRQ_HANDLED;
+
+       /* Check if a fatal error happened */
+       if (stmmac_safety_feat_interrupt(priv))
+               return IRQ_HANDLED;
+
+       /* To handle Common interrupts */
+       stmmac_common_interrupt(priv);
 
        /* To handle DMA interrupts */
        stmmac_dma_interrupt(priv);
@@ -4207,15 +5655,136 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
        return IRQ_HANDLED;
 }
 
+static irqreturn_t stmmac_mac_interrupt(int irq, void *dev_id)
+{
+       struct net_device *dev = (struct net_device *)dev_id;
+       struct stmmac_priv *priv = netdev_priv(dev);
+
+       if (unlikely(!dev)) {
+               netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__);
+               return IRQ_NONE;
+       }
+
+       /* Check if adapter is up */
+       if (test_bit(STMMAC_DOWN, &priv->state))
+               return IRQ_HANDLED;
+
+       /* To handle Common interrupts */
+       stmmac_common_interrupt(priv);
+
+       return IRQ_HANDLED;
+}
+
+static irqreturn_t stmmac_safety_interrupt(int irq, void *dev_id)
+{
+       struct net_device *dev = (struct net_device *)dev_id;
+       struct stmmac_priv *priv = netdev_priv(dev);
+
+       if (unlikely(!dev)) {
+               netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__);
+               return IRQ_NONE;
+       }
+
+       /* Check if adapter is up */
+       if (test_bit(STMMAC_DOWN, &priv->state))
+               return IRQ_HANDLED;
+
+       /* Check if a fatal error happened */
+       stmmac_safety_feat_interrupt(priv);
+
+       return IRQ_HANDLED;
+}
+
+static irqreturn_t stmmac_msi_intr_tx(int irq, void *data)
+{
+       struct stmmac_tx_queue *tx_q = (struct stmmac_tx_queue *)data;
+       int chan = tx_q->queue_index;
+       struct stmmac_priv *priv;
+       int status;
+
+       priv = container_of(tx_q, struct stmmac_priv, tx_queue[chan]);
+
+       if (unlikely(!data)) {
+               netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__);
+               return IRQ_NONE;
+       }
+
+       /* Check if adapter is up */
+       if (test_bit(STMMAC_DOWN, &priv->state))
+               return IRQ_HANDLED;
+
+       status = stmmac_napi_check(priv, chan, DMA_DIR_TX);
+
+       if (unlikely(status & tx_hard_error_bump_tc)) {
+               /* Try to bump up the dma threshold on this failure */
+               if (unlikely(priv->xstats.threshold != SF_DMA_MODE) &&
+                   tc <= 256) {
+                       tc += 64;
+                       if (priv->plat->force_thresh_dma_mode)
+                               stmmac_set_dma_operation_mode(priv,
+                                                             tc,
+                                                             tc,
+                                                             chan);
+                       else
+                               stmmac_set_dma_operation_mode(priv,
+                                                             tc,
+                                                             SF_DMA_MODE,
+                                                             chan);
+                       priv->xstats.threshold = tc;
+               }
+       } else if (unlikely(status == tx_hard_error)) {
+               stmmac_tx_err(priv, chan);
+       }
+
+       return IRQ_HANDLED;
+}
+
+static irqreturn_t stmmac_msi_intr_rx(int irq, void *data)
+{
+       struct stmmac_rx_queue *rx_q = (struct stmmac_rx_queue *)data;
+       int chan = rx_q->queue_index;
+       struct stmmac_priv *priv;
+
+       priv = container_of(rx_q, struct stmmac_priv, rx_queue[chan]);
+
+       if (unlikely(!data)) {
+               netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__);
+               return IRQ_NONE;
+       }
+
+       /* Check if adapter is up */
+       if (test_bit(STMMAC_DOWN, &priv->state))
+               return IRQ_HANDLED;
+
+       stmmac_napi_check(priv, chan, DMA_DIR_RX);
+
+       return IRQ_HANDLED;
+}
+
 #ifdef CONFIG_NET_POLL_CONTROLLER
 /* Polling receive - used by NETCONSOLE and other diagnostic tools
  * to allow network I/O with interrupts disabled.
  */
 static void stmmac_poll_controller(struct net_device *dev)
 {
-       disable_irq(dev->irq);
-       stmmac_interrupt(dev->irq, dev);
-       enable_irq(dev->irq);
+       struct stmmac_priv *priv = netdev_priv(dev);
+       int i;
+
+       /* If adapter is down, do nothing */
+       if (test_bit(STMMAC_DOWN, &priv->state))
+               return;
+
+       if (priv->plat->multi_msi_en) {
+               for (i = 0; i < priv->plat->rx_queues_to_use; i++)
+                       stmmac_msi_intr_rx(0, &priv->rx_queue[i]);
+
+               for (i = 0; i < priv->plat->tx_queues_to_use; i++)
+                       stmmac_msi_intr_tx(0, &priv->tx_queue[i]);
+       } else {
+               disable_irq(dev->irq);
+               stmmac_interrupt(dev->irq, dev);
+               enable_irq(dev->irq);
+       }
 }
 #endif
 
@@ -4264,7 +5833,7 @@ static int stmmac_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
        if (!tc_cls_can_offload_and_chain0(priv->dev, type_data))
                return ret;
 
-       stmmac_disable_all_queues(priv);
+       __stmmac_disable_all_queues(priv);
 
        switch (type) {
        case TC_SETUP_CLSU32:
@@ -4628,6 +6197,12 @@ static int stmmac_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid
        bool is_double = false;
        int ret;
 
+       ret = pm_runtime_get_sync(priv->device);
+       if (ret < 0) {
+               pm_runtime_put_noidle(priv->device);
+               return ret;
+       }
+
        if (be16_to_cpu(proto) == ETH_P_8021AD)
                is_double = true;
 
@@ -4661,10 +6236,222 @@ static int stmmac_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vi
        if (priv->hw->num_vlan) {
                ret = stmmac_del_hw_vlan_rx_fltr(priv, ndev, priv->hw, proto, vid);
                if (ret)
-                       return ret;
+                       goto del_vlan_error;
+       }
+
+       ret = stmmac_vlan_update(priv, is_double);
+
+del_vlan_error:
+       pm_runtime_put(priv->device);
+
+       return ret;
+}
+
+static int stmmac_bpf(struct net_device *dev, struct netdev_bpf *bpf)
+{
+       struct stmmac_priv *priv = netdev_priv(dev);
+
+       switch (bpf->command) {
+       case XDP_SETUP_PROG:
+               return stmmac_xdp_set_prog(priv, bpf->prog, bpf->extack);
+       case XDP_SETUP_XSK_POOL:
+               return stmmac_xdp_setup_pool(priv, bpf->xsk.pool,
+                                            bpf->xsk.queue_id);
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static int stmmac_xdp_xmit(struct net_device *dev, int num_frames,
+                          struct xdp_frame **frames, u32 flags)
+{
+       struct stmmac_priv *priv = netdev_priv(dev);
+       int cpu = smp_processor_id();
+       struct netdev_queue *nq;
+       int i, nxmit = 0;
+       int queue;
+
+       if (unlikely(test_bit(STMMAC_DOWN, &priv->state)))
+               return -ENETDOWN;
+
+       if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+               return -EINVAL;
+
+       queue = stmmac_xdp_get_tx_queue(priv, cpu);
+       nq = netdev_get_tx_queue(priv->dev, queue);
+
+       __netif_tx_lock(nq, cpu);
+       /* Avoids TX time-out as we are sharing with slow path */
+       nq->trans_start = jiffies;
+
+       for (i = 0; i < num_frames; i++) {
+               int res;
+
+               res = stmmac_xdp_xmit_xdpf(priv, queue, frames[i], true);
+               if (res == STMMAC_XDP_CONSUMED)
+                       break;
+
+               nxmit++;
+       }
+
+       if (flags & XDP_XMIT_FLUSH) {
+               stmmac_flush_tx_descriptors(priv, queue);
+               stmmac_tx_timer_arm(priv, queue);
+       }
+
+       __netif_tx_unlock(nq);
+
+       return nxmit;
+}
+
+void stmmac_disable_rx_queue(struct stmmac_priv *priv, u32 queue)
+{
+       struct stmmac_channel *ch = &priv->channel[queue];
+       unsigned long flags;
+
+       spin_lock_irqsave(&ch->lock, flags);
+       stmmac_disable_dma_irq(priv, priv->ioaddr, queue, 1, 0);
+       spin_unlock_irqrestore(&ch->lock, flags);
+
+       stmmac_stop_rx_dma(priv, queue);
+       __free_dma_rx_desc_resources(priv, queue);
+}
+
+void stmmac_enable_rx_queue(struct stmmac_priv *priv, u32 queue)
+{
+       struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+       struct stmmac_channel *ch = &priv->channel[queue];
+       unsigned long flags;
+       u32 buf_size;
+       int ret;
+
+       ret = __alloc_dma_rx_desc_resources(priv, queue);
+       if (ret) {
+               netdev_err(priv->dev, "Failed to alloc RX desc.\n");
+               return;
+       }
+
+       ret = __init_dma_rx_desc_rings(priv, queue, GFP_KERNEL);
+       if (ret) {
+               __free_dma_rx_desc_resources(priv, queue);
+               netdev_err(priv->dev, "Failed to init RX desc.\n");
+               return;
+       }
+
+       stmmac_clear_rx_descriptors(priv, queue);
+
+       stmmac_init_rx_chan(priv, priv->ioaddr, priv->plat->dma_cfg,
+                           rx_q->dma_rx_phy, rx_q->queue_index);
+
+       rx_q->rx_tail_addr = rx_q->dma_rx_phy + (rx_q->buf_alloc_num *
+                            sizeof(struct dma_desc));
+       stmmac_set_rx_tail_ptr(priv, priv->ioaddr,
+                              rx_q->rx_tail_addr, rx_q->queue_index);
+
+       if (rx_q->xsk_pool && rx_q->buf_alloc_num) {
+               buf_size = xsk_pool_get_rx_frame_size(rx_q->xsk_pool);
+               stmmac_set_dma_bfsize(priv, priv->ioaddr,
+                                     buf_size,
+                                     rx_q->queue_index);
+       } else {
+               stmmac_set_dma_bfsize(priv, priv->ioaddr,
+                                     priv->dma_buf_sz,
+                                     rx_q->queue_index);
+       }
+
+       stmmac_start_rx_dma(priv, queue);
+
+       spin_lock_irqsave(&ch->lock, flags);
+       stmmac_enable_dma_irq(priv, priv->ioaddr, queue, 1, 0);
+       spin_unlock_irqrestore(&ch->lock, flags);
+}
+
+void stmmac_disable_tx_queue(struct stmmac_priv *priv, u32 queue)
+{
+       struct stmmac_channel *ch = &priv->channel[queue];
+       unsigned long flags;
+
+       spin_lock_irqsave(&ch->lock, flags);
+       stmmac_disable_dma_irq(priv, priv->ioaddr, queue, 0, 1);
+       spin_unlock_irqrestore(&ch->lock, flags);
+
+       stmmac_stop_tx_dma(priv, queue);
+       __free_dma_tx_desc_resources(priv, queue);
+}
+
+void stmmac_enable_tx_queue(struct stmmac_priv *priv, u32 queue)
+{
+       struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+       struct stmmac_channel *ch = &priv->channel[queue];
+       unsigned long flags;
+       int ret;
+
+       ret = __alloc_dma_tx_desc_resources(priv, queue);
+       if (ret) {
+               netdev_err(priv->dev, "Failed to alloc TX desc.\n");
+               return;
+       }
+
+       ret = __init_dma_tx_desc_rings(priv, queue);
+       if (ret) {
+               __free_dma_tx_desc_resources(priv, queue);
+               netdev_err(priv->dev, "Failed to init TX desc.\n");
+               return;
+       }
+
+       stmmac_clear_tx_descriptors(priv, queue);
+
+       stmmac_init_tx_chan(priv, priv->ioaddr, priv->plat->dma_cfg,
+                           tx_q->dma_tx_phy, tx_q->queue_index);
+
+       if (tx_q->tbs & STMMAC_TBS_AVAIL)
+               stmmac_enable_tbs(priv, priv->ioaddr, 1, tx_q->queue_index);
+
+       tx_q->tx_tail_addr = tx_q->dma_tx_phy;
+       stmmac_set_tx_tail_ptr(priv, priv->ioaddr,
+                              tx_q->tx_tail_addr, tx_q->queue_index);
+
+       stmmac_start_tx_dma(priv, queue);
+
+       spin_lock_irqsave(&ch->lock, flags);
+       stmmac_enable_dma_irq(priv, priv->ioaddr, queue, 0, 1);
+       spin_unlock_irqrestore(&ch->lock, flags);
+}
+
+int stmmac_xsk_wakeup(struct net_device *dev, u32 queue, u32 flags)
+{
+       struct stmmac_priv *priv = netdev_priv(dev);
+       struct stmmac_rx_queue *rx_q;
+       struct stmmac_tx_queue *tx_q;
+       struct stmmac_channel *ch;
+
+       if (test_bit(STMMAC_DOWN, &priv->state) ||
+           !netif_carrier_ok(priv->dev))
+               return -ENETDOWN;
+
+       if (!stmmac_xdp_is_enabled(priv))
+               return -ENXIO;
+
+       if (queue >= priv->plat->rx_queues_to_use ||
+           queue >= priv->plat->tx_queues_to_use)
+               return -EINVAL;
+
+       rx_q = &priv->rx_queue[queue];
+       tx_q = &priv->tx_queue[queue];
+       ch = &priv->channel[queue];
+
+       if (!rx_q->xsk_pool && !tx_q->xsk_pool)
+               return -ENXIO;
+
+       if (!napi_if_scheduled_mark_missed(&ch->rxtx_napi)) {
+               /* EQoS does not have per-DMA channel SW interrupt,
+                * so we schedule RX Napi straight-away.
+                */
+               if (likely(napi_schedule_prep(&ch->rxtx_napi)))
+                       __napi_schedule(&ch->rxtx_napi);
        }
 
-       return stmmac_vlan_update(priv, is_double);
+       return 0;
 }
 
 static const struct net_device_ops stmmac_netdev_ops = {
@@ -4685,6 +6472,9 @@ static const struct net_device_ops stmmac_netdev_ops = {
        .ndo_set_mac_address = stmmac_set_mac_address,
        .ndo_vlan_rx_add_vid = stmmac_vlan_rx_add_vid,
        .ndo_vlan_rx_kill_vid = stmmac_vlan_rx_kill_vid,
+       .ndo_bpf = stmmac_bpf,
+       .ndo_xdp_xmit = stmmac_xdp_xmit,
+       .ndo_xsk_wakeup = stmmac_xsk_wakeup,
 };
 
 static void stmmac_reset_subtask(struct stmmac_priv *priv)
@@ -4843,6 +6633,12 @@ static void stmmac_napi_add(struct net_device *dev)
                                          stmmac_napi_poll_tx,
                                          NAPI_POLL_WEIGHT);
                }
+               if (queue < priv->plat->rx_queues_to_use &&
+                   queue < priv->plat->tx_queues_to_use) {
+                       netif_napi_add(dev, &ch->rxtx_napi,
+                                      stmmac_napi_poll_rxtx,
+                                      NAPI_POLL_WEIGHT);
+               }
        }
 }
 
@@ -4860,6 +6656,10 @@ static void stmmac_napi_del(struct net_device *dev)
                        netif_napi_del(&ch->rx_napi);
                if (queue < priv->plat->tx_queues_to_use)
                        netif_napi_del(&ch->tx_napi);
+               if (queue < priv->plat->rx_queues_to_use &&
+                   queue < priv->plat->tx_queues_to_use) {
+                       netif_napi_del(&ch->rxtx_napi);
+               }
        }
 }
 
@@ -4901,6 +6701,68 @@ int stmmac_reinit_ringparam(struct net_device *dev, u32 rx_size, u32 tx_size)
        return ret;
 }
 
+#define SEND_VERIFY_MPAKCET_FMT "Send Verify mPacket lo_state=%d lp_state=%d\n"
+static void stmmac_fpe_lp_task(struct work_struct *work)
+{
+       struct stmmac_priv *priv = container_of(work, struct stmmac_priv,
+                                               fpe_task);
+       struct stmmac_fpe_cfg *fpe_cfg = priv->plat->fpe_cfg;
+       enum stmmac_fpe_state *lo_state = &fpe_cfg->lo_fpe_state;
+       enum stmmac_fpe_state *lp_state = &fpe_cfg->lp_fpe_state;
+       bool *hs_enable = &fpe_cfg->hs_enable;
+       bool *enable = &fpe_cfg->enable;
+       int retries = 20;
+
+       while (retries-- > 0) {
+               /* Bail out immediately if FPE handshake is OFF */
+               if (*lo_state == FPE_STATE_OFF || !*hs_enable)
+                       break;
+
+               if (*lo_state == FPE_STATE_ENTERING_ON &&
+                   *lp_state == FPE_STATE_ENTERING_ON) {
+                       stmmac_fpe_configure(priv, priv->ioaddr,
+                                            priv->plat->tx_queues_to_use,
+                                            priv->plat->rx_queues_to_use,
+                                            *enable);
+
+                       netdev_info(priv->dev, "configured FPE\n");
+
+                       *lo_state = FPE_STATE_ON;
+                       *lp_state = FPE_STATE_ON;
+                       netdev_info(priv->dev, "!!! BOTH FPE stations ON\n");
+                       break;
+               }
+
+               if ((*lo_state == FPE_STATE_CAPABLE ||
+                    *lo_state == FPE_STATE_ENTERING_ON) &&
+                    *lp_state != FPE_STATE_ON) {
+                       netdev_info(priv->dev, SEND_VERIFY_MPAKCET_FMT,
+                                   *lo_state, *lp_state);
+                       stmmac_fpe_send_mpacket(priv, priv->ioaddr,
+                                               MPACKET_VERIFY);
+               }
+               /* Sleep then retry */
+               msleep(500);
+       }
+
+       clear_bit(__FPE_TASK_SCHED, &priv->fpe_task_state);
+}
+
+void stmmac_fpe_handshake(struct stmmac_priv *priv, bool enable)
+{
+       if (priv->plat->fpe_cfg->hs_enable != enable) {
+               if (enable) {
+                       stmmac_fpe_send_mpacket(priv, priv->ioaddr,
+                                               MPACKET_VERIFY);
+               } else {
+                       priv->plat->fpe_cfg->lo_fpe_state = FPE_STATE_OFF;
+                       priv->plat->fpe_cfg->lp_fpe_state = FPE_STATE_OFF;
+               }
+
+               priv->plat->fpe_cfg->hs_enable = enable;
+       }
+}
+
 /**
  * stmmac_dvr_probe
  * @device: device pointer
@@ -4936,12 +6798,19 @@ int stmmac_dvr_probe(struct device *device,
        priv->plat = plat_dat;
        priv->ioaddr = res->addr;
        priv->dev->base_addr = (unsigned long)res->addr;
+       priv->plat->dma_cfg->multi_msi_en = priv->plat->multi_msi_en;
 
        priv->dev->irq = res->irq;
        priv->wol_irq = res->wol_irq;
        priv->lpi_irq = res->lpi_irq;
-
-       if (!IS_ERR_OR_NULL(res->mac))
+       priv->sfty_ce_irq = res->sfty_ce_irq;
+       priv->sfty_ue_irq = res->sfty_ue_irq;
+       for (i = 0; i < MTL_MAX_RX_QUEUES; i++)
+               priv->rx_irq[i] = res->rx_irq[i];
+       for (i = 0; i < MTL_MAX_TX_QUEUES; i++)
+               priv->tx_irq[i] = res->tx_irq[i];
+
+       if (!is_zero_ether_addr(res->mac))
                memcpy(priv->dev->dev_addr, res->mac, ETH_ALEN);
 
        dev_set_drvdata(device, priv->dev);
@@ -4949,6 +6818,10 @@ int stmmac_dvr_probe(struct device *device,
        /* Verify driver arguments */
        stmmac_verify_args();
 
+       priv->af_xdp_zc_qps = bitmap_zalloc(MTL_MAX_TX_QUEUES, GFP_KERNEL);
+       if (!priv->af_xdp_zc_qps)
+               return -ENOMEM;
+
        /* Allocate workqueue */
        priv->wq = create_singlethread_workqueue("stmmac_wq");
        if (!priv->wq) {
@@ -4958,6 +6831,9 @@ int stmmac_dvr_probe(struct device *device,
 
        INIT_WORK(&priv->service_task, stmmac_service_task);
 
+       /* Initialize Link Partner FPE workqueue */
+       INIT_WORK(&priv->fpe_task, stmmac_fpe_lp_task);
+
        /* Override with kernel parameters if supplied XXX CRS XXX
         * this needs to have multiple instances
         */
@@ -4979,6 +6855,11 @@ int stmmac_dvr_probe(struct device *device,
        if (ret)
                goto error_hw_init;
 
+       /* Only DWMAC core version 5.20 onwards supports HW descriptor prefetch.
+        */
+       if (priv->synopsys_id < DWMAC_CORE_5_20)
+               priv->plat->dma_cfg->dche = false;
+
        stmmac_check_ether_addr(priv);
 
        ndev->netdev_ops = &stmmac_netdev_ops;
@@ -5001,7 +6882,8 @@ int stmmac_dvr_probe(struct device *device,
 
        if (priv->dma_cap.sphen) {
                ndev->hw_features |= NETIF_F_GRO;
-               priv->sph = true;
+               priv->sph_cap = true;
+               priv->sph = priv->sph_cap;
                dev_info(priv->device, "SPH feature enabled\n");
        }
 
@@ -5103,6 +6985,10 @@ int stmmac_dvr_probe(struct device *device,
 
        stmmac_check_pcs_mode(priv);
 
+       pm_runtime_get_noresume(device);
+       pm_runtime_set_active(device);
+       pm_runtime_enable(device);
+
        if (priv->hw->pcs != STMMAC_PCS_TBI &&
            priv->hw->pcs != STMMAC_PCS_RTBI) {
                /* MDIO bus Registration */
@@ -5140,6 +7026,11 @@ int stmmac_dvr_probe(struct device *device,
        stmmac_init_fs(ndev);
 #endif
 
+       /* Let pm_runtime_put() disable the clocks.
+        * If CONFIG_PM is not enabled, the clocks will stay powered.
+        */
+       pm_runtime_put(device);
+
        return ret;
 
 error_serdes_powerup:
@@ -5154,6 +7045,8 @@ error_mdio_register:
        stmmac_napi_del(ndev);
 error_hw_init:
        destroy_workqueue(priv->wq);
+       stmmac_bus_clks_config(priv, false);
+       bitmap_free(priv->af_xdp_zc_qps);
 
        return ret;
 }
@@ -5189,13 +7082,14 @@ int stmmac_dvr_remove(struct device *dev)
        phylink_destroy(priv->phylink);
        if (priv->plat->stmmac_rst)
                reset_control_assert(priv->plat->stmmac_rst);
-       clk_disable_unprepare(priv->plat->pclk);
-       clk_disable_unprepare(priv->plat->stmmac_clk);
+       pm_runtime_put(dev);
+       pm_runtime_disable(dev);
        if (priv->hw->pcs != STMMAC_PCS_TBI &&
            priv->hw->pcs != STMMAC_PCS_RTBI)
                stmmac_mdio_unregister(ndev);
        destroy_workqueue(priv->wq);
        mutex_destroy(&priv->lock);
+       bitmap_free(priv->af_xdp_zc_qps);
 
        return 0;
 }
@@ -5213,6 +7107,7 @@ int stmmac_suspend(struct device *dev)
        struct net_device *ndev = dev_get_drvdata(dev);
        struct stmmac_priv *priv = netdev_priv(ndev);
        u32 chan;
+       int ret;
 
        if (!ndev || !netif_running(ndev))
                return 0;
@@ -5256,11 +7151,24 @@ int stmmac_suspend(struct device *dev)
                pinctrl_pm_select_sleep_state(priv->device);
                /* Disable clock in case of PWM is off */
                clk_disable_unprepare(priv->plat->clk_ptp_ref);
-               clk_disable_unprepare(priv->plat->pclk);
-               clk_disable_unprepare(priv->plat->stmmac_clk);
+               ret = pm_runtime_force_suspend(dev);
+               if (ret) {
+                       mutex_unlock(&priv->lock);
+                       return ret;
+               }
        }
+
        mutex_unlock(&priv->lock);
 
+       if (priv->dma_cap.fpesel) {
+               /* Disable FPE */
+               stmmac_fpe_configure(priv, priv->ioaddr,
+                                    priv->plat->tx_queues_to_use,
+                                    priv->plat->rx_queues_to_use, false);
+
+               stmmac_fpe_handshake(priv, false);
+       }
+
        priv->speed = SPEED_UNKNOWN;
        return 0;
 }
@@ -5323,8 +7231,9 @@ int stmmac_resume(struct device *dev)
        } else {
                pinctrl_pm_select_default_state(priv->device);
                /* enable the clk previously disabled */
-               clk_prepare_enable(priv->plat->stmmac_clk);
-               clk_prepare_enable(priv->plat->pclk);
+               ret = pm_runtime_force_resume(dev);
+               if (ret)
+                       return ret;
                if (priv->plat->clk_ptp_ref)
                        clk_prepare_enable(priv->plat->clk_ptp_ref);
                /* reset the phy so that it's ready */