net: mvpp2: handle misc PPv2.1/PPv2.2 differences
[linux-2.6-microblaze.git] / drivers / net / ethernet / marvell / mvpp2.c
index d00421b..bd7dc4b 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/of_mdio.h>
 #include <linux/of_net.h>
 #include <linux/of_address.h>
+#include <linux/of_device.h>
 #include <linux/phy.h>
 #include <linux/clk.h>
 #include <linux/hrtimer.h>
 #define     MVPP2_SNOOP_PKT_SIZE_MASK          0x1ff
 #define     MVPP2_SNOOP_BUF_HDR_MASK           BIT(9)
 #define     MVPP2_RXQ_POOL_SHORT_OFFS          20
-#define     MVPP2_RXQ_POOL_SHORT_MASK          0x700000
+#define     MVPP21_RXQ_POOL_SHORT_MASK         0x700000
+#define     MVPP22_RXQ_POOL_SHORT_MASK         0xf00000
 #define     MVPP2_RXQ_POOL_LONG_OFFS           24
-#define     MVPP2_RXQ_POOL_LONG_MASK           0x7000000
+#define     MVPP21_RXQ_POOL_LONG_MASK          0x7000000
+#define     MVPP22_RXQ_POOL_LONG_MASK          0xf000000
 #define     MVPP2_RXQ_PACKET_OFFSET_OFFS       28
 #define     MVPP2_RXQ_PACKET_OFFSET_MASK       0x70000000
 #define     MVPP2_RXQ_DISABLE_MASK             BIT(31)
 /* Descriptor Manager Top Registers */
 #define MVPP2_RXQ_NUM_REG                      0x2040
 #define MVPP2_RXQ_DESC_ADDR_REG                        0x2044
+#define     MVPP22_DESC_ADDR_OFFS              8
 #define MVPP2_RXQ_DESC_SIZE_REG                        0x2048
 #define     MVPP2_RXQ_DESC_SIZE_MASK           0x3ff0
 #define MVPP2_RXQ_STATUS_UPDATE_REG(rxq)       (0x3000 + 4 * (rxq))
 #define MVPP2_TXQ_DESC_SIZE_REG                        0x2088
 #define     MVPP2_TXQ_DESC_SIZE_MASK           0x3ff0
 #define MVPP2_AGGR_TXQ_UPDATE_REG              0x2090
-#define MVPP2_TXQ_THRESH_REG                   0x2094
-#define     MVPP2_TRANSMITTED_THRESH_OFFSET    16
-#define     MVPP2_TRANSMITTED_THRESH_MASK      0x3fff0000
 #define MVPP2_TXQ_INDEX_REG                    0x2098
 #define MVPP2_TXQ_PREF_BUF_REG                 0x209c
 #define     MVPP2_PREF_BUF_PTR(desc)           ((desc) & 0xfff)
 #define MVPP2_TXQ_RSVD_CLR_REG                 0x20b8
 #define     MVPP2_TXQ_RSVD_CLR_OFFSET          16
 #define MVPP2_AGGR_TXQ_DESC_ADDR_REG(cpu)      (0x2100 + 4 * (cpu))
+#define     MVPP22_AGGR_TXQ_DESC_ADDR_OFFS     8
 #define MVPP2_AGGR_TXQ_DESC_SIZE_REG(cpu)      (0x2140 + 4 * (cpu))
 #define     MVPP2_AGGR_TXQ_DESC_SIZE_MASK      0x3ff0
 #define MVPP2_AGGR_TXQ_STATUS_REG(cpu)         (0x2180 + 4 * (cpu))
 #define MVPP2_BM_PHY_ALLOC_REG(pool)           (0x6400 + ((pool) * 4))
 #define     MVPP2_BM_PHY_ALLOC_GRNTD_MASK      BIT(0)
 #define MVPP2_BM_VIRT_ALLOC_REG                        0x6440
+#define MVPP22_BM_ADDR_HIGH_ALLOC              0x6444
+#define     MVPP22_BM_ADDR_HIGH_PHYS_MASK      0xff
+#define     MVPP22_BM_ADDR_HIGH_VIRT_MASK      0xff00
+#define     MVPP22_BM_ADDR_HIGH_VIRT_SHIFT     8
 #define MVPP2_BM_PHY_RLS_REG(pool)             (0x6480 + ((pool) * 4))
 #define     MVPP2_BM_PHY_RLS_MC_BUFF_MASK      BIT(0)
 #define     MVPP2_BM_PHY_RLS_PRIO_EN_MASK      BIT(1)
 #define     MVPP2_BM_PHY_RLS_GRNTD_MASK                BIT(2)
 #define MVPP2_BM_VIRT_RLS_REG                  0x64c0
-#define MVPP2_BM_MC_RLS_REG                    0x64c4
-#define     MVPP2_BM_MC_ID_MASK                        0xfff
-#define     MVPP2_BM_FORCE_RELEASE_MASK                BIT(12)
+#define MVPP22_BM_ADDR_HIGH_RLS_REG            0x64c4
+#define     MVPP22_BM_ADDR_HIGH_PHYS_RLS_MASK  0xff
+#define            MVPP22_BM_ADDR_HIGH_VIRT_RLS_MASK   0xff00
+#define     MVPP22_BM_ADDR_HIGH_VIRT_RLS_SHIFT 8
 
 /* TX Scheduler registers */
 #define MVPP2_TXP_SCHED_PORT_INDEX_REG         0x8000
 #define      MVPP2_GMAC_TX_FIFO_MIN_TH_ALL_MASK        0x1fc0
 #define      MVPP2_GMAC_TX_FIFO_MIN_TH_MASK(v) (((v) << 6) & \
                                        MVPP2_GMAC_TX_FIFO_MIN_TH_ALL_MASK)
+#define MVPP22_GMAC_CTRL_4_REG                 0x90
+#define      MVPP22_CTRL4_EXT_PIN_GMII_SEL     BIT(0)
+#define      MVPP22_CTRL4_DP_CLK_SEL           BIT(5)
+#define      MVPP22_CTRL4_SYNC_BYPASS          BIT(6)
+#define      MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE BIT(7)
+
+/* Per-port XGMAC registers. PPv2.2 only, only for GOP port 0,
+ * relative to port->base.
+ */
+#define MVPP22_XLG_CTRL3_REG                   0x11c
+#define      MVPP22_XLG_CTRL3_MACMODESELECT_MASK       (7 << 13)
+#define      MVPP22_XLG_CTRL3_MACMODESELECT_GMAC       (0 << 13)
+
+/* SMI registers. PPv2.2 only, relative to priv->iface_base. */
+#define MVPP22_SMI_MISC_CFG_REG                        0x1204
+#define      MVPP22_SMI_POLLING_EN             BIT(10)
+
+#define MVPP22_GMAC_BASE(port)         (0x7000 + (port) * 0x1000 + 0xe00)
 
 #define MVPP2_CAUSE_TXQ_SENT_DESC_ALL_MASK     0xff
 
@@ -615,6 +640,11 @@ enum mvpp2_prs_l3_cast {
  */
 #define MVPP2_BM_SHORT_PKT_SIZE                MVPP2_RX_MAX_PKT_SIZE(512)
 
+#define MVPP21_ADDR_SPACE_SZ           0
+#define MVPP22_ADDR_SPACE_SZ           SZ_64K
+
+#define MVPP2_MAX_CPUS                 4
+
 enum mvpp2_bm_type {
        MVPP2_BM_FREE,
        MVPP2_BM_SWF_LONG,
@@ -626,8 +656,14 @@ enum mvpp2_bm_type {
 /* Shared Packet Processor resources */
 struct mvpp2 {
        /* Shared registers' base addresses */
-       void __iomem *base;
        void __iomem *lms_base;
+       void __iomem *iface_base;
+
+       /* On PPv2.2, each CPU can access the base register through a
+        * separate address space, each 64 KB apart from each
+        * other.
+        */
+       void __iomem *cpu_base[MVPP2_MAX_CPUS];
 
        /* Common clocks */
        struct clk *pp_clk;
@@ -649,6 +685,9 @@ struct mvpp2 {
 
        /* Tclk value */
        u32 tclk;
+
+       /* HW version */
+       enum { MVPP21, MVPP22 } hw_version;
 };
 
 struct mvpp2_pcpu_stats {
@@ -670,6 +709,11 @@ struct mvpp2_port_pcpu {
 struct mvpp2_port {
        u8 id;
 
+       /* Index of the port from the "group of ports" complex point
+        * of view
+        */
+       int gop_id;
+
        int irq;
 
        struct mvpp2 *priv;
@@ -741,22 +785,24 @@ struct mvpp2_port {
 #define MVPP2_RXD_L3_IP6               BIT(30)
 #define MVPP2_RXD_BUF_HDR              BIT(31)
 
-struct mvpp2_tx_desc {
+/* HW TX descriptor for PPv2.1 */
+struct mvpp21_tx_desc {
        u32 command;            /* Options used by HW for packet transmitting.*/
        u8  packet_offset;      /* the offset from the buffer beginning */
        u8  phys_txq;           /* destination queue ID                 */
        u16 data_size;          /* data size of transmitted packet in bytes */
-       u32 buf_phys_addr;      /* physical addr of transmitted buffer  */
+       u32 buf_dma_addr;       /* physical addr of transmitted buffer  */
        u32 buf_cookie;         /* cookie for access to TX buffer in tx path */
        u32 reserved1[3];       /* hw_cmd (for future use, BM, PON, PNC) */
        u32 reserved2;          /* reserved (for future use)            */
 };
 
-struct mvpp2_rx_desc {
+/* HW RX descriptor for PPv2.1 */
+struct mvpp21_rx_desc {
        u32 status;             /* info about received packet           */
        u16 reserved1;          /* parser_info (for future use, PnC)    */
        u16 data_size;          /* size of received packet in bytes     */
-       u32 buf_phys_addr;      /* physical address of the buffer       */
+       u32 buf_dma_addr;       /* physical address of the buffer       */
        u32 buf_cookie;         /* cookie for access to RX buffer in rx path */
        u16 reserved2;          /* gem_port_id (for future use, PON)    */
        u16 reserved3;          /* csum_l4 (for future use, PnC)        */
@@ -767,12 +813,51 @@ struct mvpp2_rx_desc {
        u32 reserved8;
 };
 
+/* HW TX descriptor for PPv2.2 */
+struct mvpp22_tx_desc {
+       u32 command;
+       u8  packet_offset;
+       u8  phys_txq;
+       u16 data_size;
+       u64 reserved1;
+       u64 buf_dma_addr_ptp;
+       u64 buf_cookie_misc;
+};
+
+/* HW RX descriptor for PPv2.2 */
+struct mvpp22_rx_desc {
+       u32 status;
+       u16 reserved1;
+       u16 data_size;
+       u32 reserved2;
+       u32 reserved3;
+       u64 buf_dma_addr_key_hash;
+       u64 buf_cookie_misc;
+};
+
+/* Opaque type used by the driver to manipulate the HW TX and RX
+ * descriptors
+ */
+struct mvpp2_tx_desc {
+       union {
+               struct mvpp21_tx_desc pp21;
+               struct mvpp22_tx_desc pp22;
+       };
+};
+
+struct mvpp2_rx_desc {
+       union {
+               struct mvpp21_rx_desc pp21;
+               struct mvpp22_rx_desc pp22;
+       };
+};
+
 struct mvpp2_txq_pcpu_buf {
        /* Transmitted SKB */
        struct sk_buff *skb;
 
        /* Physical address of transmitted buffer */
-       dma_addr_t phys;
+       dma_addr_t dma;
 
        /* Size transmitted */
        size_t size;
@@ -825,7 +910,7 @@ struct mvpp2_tx_queue {
        struct mvpp2_tx_desc *descs;
 
        /* DMA address of the Tx DMA descriptors array */
-       dma_addr_t descs_phys;
+       dma_addr_t descs_dma;
 
        /* Index of the last Tx DMA descriptor */
        int last_desc;
@@ -848,7 +933,7 @@ struct mvpp2_rx_queue {
        struct mvpp2_rx_desc *descs;
 
        /* DMA address of the RX DMA descriptors array */
-       dma_addr_t descs_phys;
+       dma_addr_t descs_dma;
 
        /* Index of the last RX DMA descriptor */
        int last_desc;
@@ -912,6 +997,8 @@ struct mvpp2_bm_pool {
 
        /* Buffer Pointers Pool External (BPPE) size */
        int size;
+       /* BPPE size in bytes */
+       int size_bytes;
        /* Number of buffers for this pool */
        int buf_num;
        /* Pool buffer size */
@@ -922,29 +1009,13 @@ struct mvpp2_bm_pool {
 
        /* BPPE virtual base address */
        u32 *virt_addr;
-       /* BPPE physical base address */
-       dma_addr_t phys_addr;
+       /* BPPE DMA base address */
+       dma_addr_t dma_addr;
 
        /* Ports using BM pool */
        u32 port_map;
 };
 
-struct mvpp2_buff_hdr {
-       u32 next_buff_phys_addr;
-       u32 next_buff_virt_addr;
-       u16 byte_count;
-       u16 info;
-       u8  reserved1;          /* bm_qset (for future use, BM)         */
-};
-
-/* Buffer header info bits */
-#define MVPP2_B_HDR_INFO_MC_ID_MASK    0xfff
-#define MVPP2_B_HDR_INFO_MC_ID(info)   ((info) & MVPP2_B_HDR_INFO_MC_ID_MASK)
-#define MVPP2_B_HDR_INFO_LAST_OFFS     12
-#define MVPP2_B_HDR_INFO_LAST_MASK     BIT(12)
-#define MVPP2_B_HDR_INFO_IS_LAST(info) \
-          ((info & MVPP2_B_HDR_INFO_LAST_MASK) >> MVPP2_B_HDR_INFO_LAST_OFFS)
-
 /* Static declaractions */
 
 /* Number of RXQs used by single port */
@@ -959,12 +1030,177 @@ static int txq_number = MVPP2_MAX_TXQ;
 
 static void mvpp2_write(struct mvpp2 *priv, u32 offset, u32 data)
 {
-       writel(data, priv->base + offset);
+       writel(data, priv->cpu_base[0] + offset);
 }
 
 static u32 mvpp2_read(struct mvpp2 *priv, u32 offset)
 {
-       return readl(priv->base + offset);
+       return readl(priv->cpu_base[0] + offset);
+}
+
+/* These accessors should be used to access:
+ *
+ * - per-CPU registers, where each CPU has its own copy of the
+ *   register.
+ *
+ *   MVPP2_BM_VIRT_ALLOC_REG
+ *   MVPP2_BM_ADDR_HIGH_ALLOC
+ *   MVPP22_BM_ADDR_HIGH_RLS_REG
+ *   MVPP2_BM_VIRT_RLS_REG
+ *   MVPP2_ISR_RX_TX_CAUSE_REG
+ *   MVPP2_ISR_RX_TX_MASK_REG
+ *   MVPP2_TXQ_NUM_REG
+ *   MVPP2_AGGR_TXQ_UPDATE_REG
+ *   MVPP2_TXQ_RSVD_REQ_REG
+ *   MVPP2_TXQ_RSVD_RSLT_REG
+ *   MVPP2_TXQ_SENT_REG
+ *   MVPP2_RXQ_NUM_REG
+ *
+ * - global registers that must be accessed through a specific CPU
+ *   window, because they are related to an access to a per-CPU
+ *   register
+ *
+ *   MVPP2_BM_PHY_ALLOC_REG    (related to MVPP2_BM_VIRT_ALLOC_REG)
+ *   MVPP2_BM_PHY_RLS_REG      (related to MVPP2_BM_VIRT_RLS_REG)
+ *   MVPP2_RXQ_THRESH_REG      (related to MVPP2_RXQ_NUM_REG)
+ *   MVPP2_RXQ_DESC_ADDR_REG   (related to MVPP2_RXQ_NUM_REG)
+ *   MVPP2_RXQ_DESC_SIZE_REG   (related to MVPP2_RXQ_NUM_REG)
+ *   MVPP2_RXQ_INDEX_REG       (related to MVPP2_RXQ_NUM_REG)
+ *   MVPP2_TXQ_PENDING_REG     (related to MVPP2_TXQ_NUM_REG)
+ *   MVPP2_TXQ_DESC_ADDR_REG   (related to MVPP2_TXQ_NUM_REG)
+ *   MVPP2_TXQ_DESC_SIZE_REG   (related to MVPP2_TXQ_NUM_REG)
+ *   MVPP2_TXQ_INDEX_REG       (related to MVPP2_TXQ_NUM_REG)
+ *   MVPP2_TXQ_PENDING_REG     (related to MVPP2_TXQ_NUM_REG)
+ *   MVPP2_TXQ_PREF_BUF_REG    (related to MVPP2_TXQ_NUM_REG)
+ *   MVPP2_TXQ_PREF_BUF_REG    (related to MVPP2_TXQ_NUM_REG)
+ */
+static void mvpp2_percpu_write(struct mvpp2 *priv, int cpu,
+                              u32 offset, u32 data)
+{
+       writel(data, priv->cpu_base[cpu] + offset);
+}
+
+static u32 mvpp2_percpu_read(struct mvpp2 *priv, int cpu,
+                            u32 offset)
+{
+       return readl(priv->cpu_base[cpu] + offset);
+}
+
+static dma_addr_t mvpp2_txdesc_dma_addr_get(struct mvpp2_port *port,
+                                           struct mvpp2_tx_desc *tx_desc)
+{
+       if (port->priv->hw_version == MVPP21)
+               return tx_desc->pp21.buf_dma_addr;
+       else
+               return tx_desc->pp22.buf_dma_addr_ptp & GENMASK_ULL(40, 0);
+}
+
+static void mvpp2_txdesc_dma_addr_set(struct mvpp2_port *port,
+                                     struct mvpp2_tx_desc *tx_desc,
+                                     dma_addr_t dma_addr)
+{
+       if (port->priv->hw_version == MVPP21) {
+               tx_desc->pp21.buf_dma_addr = dma_addr;
+       } else {
+               u64 val = (u64)dma_addr;
+
+               tx_desc->pp22.buf_dma_addr_ptp &= ~GENMASK_ULL(40, 0);
+               tx_desc->pp22.buf_dma_addr_ptp |= val;
+       }
+}
+
+static size_t mvpp2_txdesc_size_get(struct mvpp2_port *port,
+                                   struct mvpp2_tx_desc *tx_desc)
+{
+       if (port->priv->hw_version == MVPP21)
+               return tx_desc->pp21.data_size;
+       else
+               return tx_desc->pp22.data_size;
+}
+
+static void mvpp2_txdesc_size_set(struct mvpp2_port *port,
+                                 struct mvpp2_tx_desc *tx_desc,
+                                 size_t size)
+{
+       if (port->priv->hw_version == MVPP21)
+               tx_desc->pp21.data_size = size;
+       else
+               tx_desc->pp22.data_size = size;
+}
+
+static void mvpp2_txdesc_txq_set(struct mvpp2_port *port,
+                                struct mvpp2_tx_desc *tx_desc,
+                                unsigned int txq)
+{
+       if (port->priv->hw_version == MVPP21)
+               tx_desc->pp21.phys_txq = txq;
+       else
+               tx_desc->pp22.phys_txq = txq;
+}
+
+static void mvpp2_txdesc_cmd_set(struct mvpp2_port *port,
+                                struct mvpp2_tx_desc *tx_desc,
+                                unsigned int command)
+{
+       if (port->priv->hw_version == MVPP21)
+               tx_desc->pp21.command = command;
+       else
+               tx_desc->pp22.command = command;
+}
+
+static void mvpp2_txdesc_offset_set(struct mvpp2_port *port,
+                                   struct mvpp2_tx_desc *tx_desc,
+                                   unsigned int offset)
+{
+       if (port->priv->hw_version == MVPP21)
+               tx_desc->pp21.packet_offset = offset;
+       else
+               tx_desc->pp22.packet_offset = offset;
+}
+
+static unsigned int mvpp2_txdesc_offset_get(struct mvpp2_port *port,
+                                           struct mvpp2_tx_desc *tx_desc)
+{
+       if (port->priv->hw_version == MVPP21)
+               return tx_desc->pp21.packet_offset;
+       else
+               return tx_desc->pp22.packet_offset;
+}
+
+static dma_addr_t mvpp2_rxdesc_dma_addr_get(struct mvpp2_port *port,
+                                           struct mvpp2_rx_desc *rx_desc)
+{
+       if (port->priv->hw_version == MVPP21)
+               return rx_desc->pp21.buf_dma_addr;
+       else
+               return rx_desc->pp22.buf_dma_addr_key_hash & GENMASK_ULL(40, 0);
+}
+
+static unsigned long mvpp2_rxdesc_cookie_get(struct mvpp2_port *port,
+                                            struct mvpp2_rx_desc *rx_desc)
+{
+       if (port->priv->hw_version == MVPP21)
+               return rx_desc->pp21.buf_cookie;
+       else
+               return rx_desc->pp22.buf_cookie_misc & GENMASK_ULL(40, 0);
+}
+
+static size_t mvpp2_rxdesc_size_get(struct mvpp2_port *port,
+                                   struct mvpp2_rx_desc *rx_desc)
+{
+       if (port->priv->hw_version == MVPP21)
+               return rx_desc->pp21.data_size;
+       else
+               return rx_desc->pp22.data_size;
+}
+
+static u32 mvpp2_rxdesc_status_get(struct mvpp2_port *port,
+                                  struct mvpp2_rx_desc *rx_desc)
+{
+       if (port->priv->hw_version == MVPP21)
+               return rx_desc->pp21.status;
+       else
+               return rx_desc->pp22.status;
 }
 
 static void mvpp2_txq_inc_get(struct mvpp2_txq_pcpu *txq_pcpu)
@@ -974,15 +1210,17 @@ static void mvpp2_txq_inc_get(struct mvpp2_txq_pcpu *txq_pcpu)
                txq_pcpu->txq_get_index = 0;
 }
 
-static void mvpp2_txq_inc_put(struct mvpp2_txq_pcpu *txq_pcpu,
+static void mvpp2_txq_inc_put(struct mvpp2_port *port,
+                             struct mvpp2_txq_pcpu *txq_pcpu,
                              struct sk_buff *skb,
                              struct mvpp2_tx_desc *tx_desc)
 {
        struct mvpp2_txq_pcpu_buf *tx_buf =
                txq_pcpu->buffs + txq_pcpu->txq_put_index;
        tx_buf->skb = skb;
-       tx_buf->size = tx_desc->data_size;
-       tx_buf->phys = tx_desc->buf_phys_addr + tx_desc->packet_offset;
+       tx_buf->size = mvpp2_txdesc_size_get(port, tx_desc);
+       tx_buf->dma = mvpp2_txdesc_dma_addr_get(port, tx_desc) +
+               mvpp2_txdesc_offset_get(port, tx_desc);
        txq_pcpu->txq_put_index++;
        if (txq_pcpu->txq_put_index == txq_pcpu->size)
                txq_pcpu->txq_put_index = 0;
@@ -3378,27 +3616,39 @@ static int mvpp2_bm_pool_create(struct platform_device *pdev,
                                struct mvpp2 *priv,
                                struct mvpp2_bm_pool *bm_pool, int size)
 {
-       int size_bytes;
        u32 val;
 
-       size_bytes = sizeof(u32) * size;
-       bm_pool->virt_addr = dma_alloc_coherent(&pdev->dev, size_bytes,
-                                               &bm_pool->phys_addr,
+       /* Number of buffer pointers must be a multiple of 16, as per
+        * hardware constraints
+        */
+       if (!IS_ALIGNED(size, 16))
+               return -EINVAL;
+
+       /* PPv2.1 needs 8 bytes per buffer pointer, PPv2.2 needs 16
+        * bytes per buffer pointer
+        */
+       if (priv->hw_version == MVPP21)
+               bm_pool->size_bytes = 2 * sizeof(u32) * size;
+       else
+               bm_pool->size_bytes = 2 * sizeof(u64) * size;
+
+       bm_pool->virt_addr = dma_alloc_coherent(&pdev->dev, bm_pool->size_bytes,
+                                               &bm_pool->dma_addr,
                                                GFP_KERNEL);
        if (!bm_pool->virt_addr)
                return -ENOMEM;
 
        if (!IS_ALIGNED((unsigned long)bm_pool->virt_addr,
                        MVPP2_BM_POOL_PTR_ALIGN)) {
-               dma_free_coherent(&pdev->dev, size_bytes, bm_pool->virt_addr,
-                                 bm_pool->phys_addr);
+               dma_free_coherent(&pdev->dev, bm_pool->size_bytes,
+                                 bm_pool->virt_addr, bm_pool->dma_addr);
                dev_err(&pdev->dev, "BM pool %d is not %d bytes aligned\n",
                        bm_pool->id, MVPP2_BM_POOL_PTR_ALIGN);
                return -ENOMEM;
        }
 
        mvpp2_write(priv, MVPP2_BM_POOL_BASE_REG(bm_pool->id),
-                   bm_pool->phys_addr);
+                   lower_32_bits(bm_pool->dma_addr));
        mvpp2_write(priv, MVPP2_BM_POOL_SIZE_REG(bm_pool->id), size);
 
        val = mvpp2_read(priv, MVPP2_BM_POOL_CTRL_REG(bm_pool->id));
@@ -3426,6 +3676,34 @@ static void mvpp2_bm_pool_bufsize_set(struct mvpp2 *priv,
        mvpp2_write(priv, MVPP2_POOL_BUF_SIZE_REG(bm_pool->id), val);
 }
 
+static void mvpp2_bm_bufs_get_addrs(struct device *dev, struct mvpp2 *priv,
+                                   struct mvpp2_bm_pool *bm_pool,
+                                   dma_addr_t *dma_addr,
+                                   phys_addr_t *phys_addr)
+{
+       int cpu = smp_processor_id();
+
+       *dma_addr = mvpp2_percpu_read(priv, cpu,
+                                     MVPP2_BM_PHY_ALLOC_REG(bm_pool->id));
+       *phys_addr = mvpp2_percpu_read(priv, cpu, MVPP2_BM_VIRT_ALLOC_REG);
+
+       if (priv->hw_version == MVPP22) {
+               u32 val;
+               u32 dma_addr_highbits, phys_addr_highbits;
+
+               val = mvpp2_percpu_read(priv, cpu, MVPP22_BM_ADDR_HIGH_ALLOC);
+               dma_addr_highbits = (val & MVPP22_BM_ADDR_HIGH_PHYS_MASK);
+               phys_addr_highbits = (val & MVPP22_BM_ADDR_HIGH_VIRT_MASK) >>
+                       MVPP22_BM_ADDR_HIGH_VIRT_SHIFT;
+
+               if (sizeof(dma_addr_t) == 8)
+                       *dma_addr |= (u64)dma_addr_highbits << 32;
+
+               if (sizeof(phys_addr_t) == 8)
+                       *phys_addr |= (u64)phys_addr_highbits << 32;
+       }
+}
+
 /* Free all buffers from the pool */
 static void mvpp2_bm_bufs_free(struct device *dev, struct mvpp2 *priv,
                               struct mvpp2_bm_pool *bm_pool)
@@ -3433,21 +3711,21 @@ static void mvpp2_bm_bufs_free(struct device *dev, struct mvpp2 *priv,
        int i;
 
        for (i = 0; i < bm_pool->buf_num; i++) {
-               dma_addr_t buf_phys_addr;
-               unsigned long vaddr;
+               dma_addr_t buf_dma_addr;
+               phys_addr_t buf_phys_addr;
+               void *data;
 
-               /* Get buffer virtual address (indirect access) */
-               buf_phys_addr = mvpp2_read(priv,
-                                          MVPP2_BM_PHY_ALLOC_REG(bm_pool->id));
-               vaddr = mvpp2_read(priv, MVPP2_BM_VIRT_ALLOC_REG);
+               mvpp2_bm_bufs_get_addrs(dev, priv, bm_pool,
+                                       &buf_dma_addr, &buf_phys_addr);
 
-               dma_unmap_single(dev, buf_phys_addr,
+               dma_unmap_single(dev, buf_dma_addr,
                                 bm_pool->buf_size, DMA_FROM_DEVICE);
 
-               if (!vaddr)
+               data = (void *)phys_to_virt(buf_phys_addr);
+               if (!data)
                        break;
 
-               mvpp2_frag_free(bm_pool, (void *)vaddr);
+               mvpp2_frag_free(bm_pool, data);
        }
 
        /* Update BM driver with number of buffers removed from pool */
@@ -3471,9 +3749,9 @@ static int mvpp2_bm_pool_destroy(struct platform_device *pdev,
        val |= MVPP2_BM_STOP_MASK;
        mvpp2_write(priv, MVPP2_BM_POOL_CTRL_REG(bm_pool->id), val);
 
-       dma_free_coherent(&pdev->dev, sizeof(u32) * bm_pool->size,
+       dma_free_coherent(&pdev->dev, bm_pool->size_bytes,
                          bm_pool->virt_addr,
-                         bm_pool->phys_addr);
+                         bm_pool->dma_addr);
        return 0;
 }
 
@@ -3529,17 +3807,20 @@ static int mvpp2_bm_init(struct platform_device *pdev, struct mvpp2 *priv)
 static void mvpp2_rxq_long_pool_set(struct mvpp2_port *port,
                                    int lrxq, int long_pool)
 {
-       u32 val;
+       u32 val, mask;
        int prxq;
 
        /* Get queue physical ID */
        prxq = port->rxqs[lrxq]->id;
 
-       val = mvpp2_read(port->priv, MVPP2_RXQ_CONFIG_REG(prxq));
-       val &= ~MVPP2_RXQ_POOL_LONG_MASK;
-       val |= ((long_pool << MVPP2_RXQ_POOL_LONG_OFFS) &
-                   MVPP2_RXQ_POOL_LONG_MASK);
+       if (port->priv->hw_version == MVPP21)
+               mask = MVPP21_RXQ_POOL_LONG_MASK;
+       else
+               mask = MVPP22_RXQ_POOL_LONG_MASK;
 
+       val = mvpp2_read(port->priv, MVPP2_RXQ_CONFIG_REG(prxq));
+       val &= ~mask;
+       val |= (long_pool << MVPP2_RXQ_POOL_LONG_OFFS) & mask;
        mvpp2_write(port->priv, MVPP2_RXQ_CONFIG_REG(prxq), val);
 }
 
@@ -3547,40 +3828,45 @@ static void mvpp2_rxq_long_pool_set(struct mvpp2_port *port,
 static void mvpp2_rxq_short_pool_set(struct mvpp2_port *port,
                                     int lrxq, int short_pool)
 {
-       u32 val;
+       u32 val, mask;
        int prxq;
 
        /* Get queue physical ID */
        prxq = port->rxqs[lrxq]->id;
 
-       val = mvpp2_read(port->priv, MVPP2_RXQ_CONFIG_REG(prxq));
-       val &= ~MVPP2_RXQ_POOL_SHORT_MASK;
-       val |= ((short_pool << MVPP2_RXQ_POOL_SHORT_OFFS) &
-                   MVPP2_RXQ_POOL_SHORT_MASK);
+       if (port->priv->hw_version == MVPP21)
+               mask = MVPP21_RXQ_POOL_SHORT_MASK;
+       else
+               mask = MVPP22_RXQ_POOL_SHORT_MASK;
 
+       val = mvpp2_read(port->priv, MVPP2_RXQ_CONFIG_REG(prxq));
+       val &= ~mask;
+       val |= (short_pool << MVPP2_RXQ_POOL_SHORT_OFFS) & mask;
        mvpp2_write(port->priv, MVPP2_RXQ_CONFIG_REG(prxq), val);
 }
 
 static void *mvpp2_buf_alloc(struct mvpp2_port *port,
                             struct mvpp2_bm_pool *bm_pool,
-                            dma_addr_t *buf_phys_addr,
+                            dma_addr_t *buf_dma_addr,
+                            phys_addr_t *buf_phys_addr,
                             gfp_t gfp_mask)
 {
-       dma_addr_t phys_addr;
+       dma_addr_t dma_addr;
        void *data;
 
        data = mvpp2_frag_alloc(bm_pool);
        if (!data)
                return NULL;
 
-       phys_addr = dma_map_single(port->dev->dev.parent, data,
-                                  MVPP2_RX_BUF_SIZE(bm_pool->pkt_size),
-                                   DMA_FROM_DEVICE);
-       if (unlikely(dma_mapping_error(port->dev->dev.parent, phys_addr))) {
+       dma_addr = dma_map_single(port->dev->dev.parent, data,
+                                 MVPP2_RX_BUF_SIZE(bm_pool->pkt_size),
+                                 DMA_FROM_DEVICE);
+       if (unlikely(dma_mapping_error(port->dev->dev.parent, dma_addr))) {
                mvpp2_frag_free(bm_pool, data);
                return NULL;
        }
-       *buf_phys_addr = phys_addr;
+       *buf_dma_addr = dma_addr;
+       *buf_phys_addr = virt_to_phys(data);
 
        return data;
 }
@@ -3604,37 +3890,46 @@ static inline int mvpp2_bm_cookie_pool_get(unsigned long cookie)
 
 /* Release buffer to BM */
 static inline void mvpp2_bm_pool_put(struct mvpp2_port *port, int pool,
-                                    dma_addr_t buf_phys_addr,
-                                    unsigned long buf_virt_addr)
+                                    dma_addr_t buf_dma_addr,
+                                    phys_addr_t buf_phys_addr)
 {
-       mvpp2_write(port->priv, MVPP2_BM_VIRT_RLS_REG, buf_virt_addr);
-       mvpp2_write(port->priv, MVPP2_BM_PHY_RLS_REG(pool), buf_phys_addr);
-}
+       int cpu = smp_processor_id();
 
-/* Release multicast buffer */
-static void mvpp2_bm_pool_mc_put(struct mvpp2_port *port, int pool,
-                                dma_addr_t buf_phys_addr,
-                                unsigned long buf_virt_addr,
-                                int mc_id)
-{
-       u32 val = 0;
+       if (port->priv->hw_version == MVPP22) {
+               u32 val = 0;
+
+               if (sizeof(dma_addr_t) == 8)
+                       val |= upper_32_bits(buf_dma_addr) &
+                               MVPP22_BM_ADDR_HIGH_PHYS_RLS_MASK;
+
+               if (sizeof(phys_addr_t) == 8)
+                       val |= (upper_32_bits(buf_phys_addr)
+                               << MVPP22_BM_ADDR_HIGH_VIRT_RLS_SHIFT) &
+                               MVPP22_BM_ADDR_HIGH_VIRT_RLS_MASK;
 
-       val |= (mc_id & MVPP2_BM_MC_ID_MASK);
-       mvpp2_write(port->priv, MVPP2_BM_MC_RLS_REG, val);
+               mvpp2_percpu_write(port->priv, cpu,
+                                  MVPP22_BM_ADDR_HIGH_RLS_REG, val);
+       }
 
-       mvpp2_bm_pool_put(port, pool,
-                         buf_phys_addr | MVPP2_BM_PHY_RLS_MC_BUFF_MASK,
-                         buf_virt_addr);
+       /* MVPP2_BM_VIRT_RLS_REG is not interpreted by HW, and simply
+        * returned in the "cookie" field of the RX
+        * descriptor. Instead of storing the virtual address, we
+        * store the physical address
+        */
+       mvpp2_percpu_write(port->priv, cpu,
+                          MVPP2_BM_VIRT_RLS_REG, buf_phys_addr);
+       mvpp2_percpu_write(port->priv, cpu,
+                          MVPP2_BM_PHY_RLS_REG(pool), buf_dma_addr);
 }
 
 /* Refill BM pool */
 static void mvpp2_pool_refill(struct mvpp2_port *port, u32 bm,
-                             dma_addr_t phys_addr,
-                             unsigned long cookie)
+                             dma_addr_t dma_addr,
+                             phys_addr_t phys_addr)
 {
        int pool = mvpp2_bm_cookie_pool_get(bm);
 
-       mvpp2_bm_pool_put(port, pool, phys_addr, cookie);
+       mvpp2_bm_pool_put(port, pool, dma_addr, phys_addr);
 }
 
 /* Allocate buffers for the pool */
@@ -3642,7 +3937,8 @@ static int mvpp2_bm_bufs_add(struct mvpp2_port *port,
                             struct mvpp2_bm_pool *bm_pool, int buf_num)
 {
        int i, buf_size, total_size;
-       dma_addr_t phys_addr;
+       dma_addr_t dma_addr;
+       phys_addr_t phys_addr;
        void *buf;
 
        buf_size = MVPP2_RX_BUF_SIZE(bm_pool->pkt_size);
@@ -3657,12 +3953,13 @@ static int mvpp2_bm_bufs_add(struct mvpp2_port *port,
        }
 
        for (i = 0; i < buf_num; i++) {
-               buf = mvpp2_buf_alloc(port, bm_pool, &phys_addr, GFP_KERNEL);
+               buf = mvpp2_buf_alloc(port, bm_pool, &dma_addr,
+                                     &phys_addr, GFP_KERNEL);
                if (!buf)
                        break;
 
-               mvpp2_bm_pool_put(port, bm_pool->id, phys_addr,
-                                 (unsigned long)buf);
+               mvpp2_bm_pool_put(port, bm_pool->id, dma_addr,
+                                 phys_addr);
        }
 
        /* Update BM driver with number of buffers added to pool */
@@ -3830,7 +4127,8 @@ static void mvpp2_interrupts_mask(void *arg)
 {
        struct mvpp2_port *port = arg;
 
-       mvpp2_write(port->priv, MVPP2_ISR_RX_TX_MASK_REG(port->id), 0);
+       mvpp2_percpu_write(port->priv, smp_processor_id(),
+                          MVPP2_ISR_RX_TX_MASK_REG(port->id), 0);
 }
 
 /* Unmask the current CPU's Rx/Tx interrupts */
@@ -3838,17 +4136,46 @@ static void mvpp2_interrupts_unmask(void *arg)
 {
        struct mvpp2_port *port = arg;
 
-       mvpp2_write(port->priv, MVPP2_ISR_RX_TX_MASK_REG(port->id),
-                   (MVPP2_CAUSE_MISC_SUM_MASK |
-                    MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK));
+       mvpp2_percpu_write(port->priv, smp_processor_id(),
+                          MVPP2_ISR_RX_TX_MASK_REG(port->id),
+                          (MVPP2_CAUSE_MISC_SUM_MASK |
+                           MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK));
 }
 
 /* Port configuration routines */
 
+static void mvpp22_port_mii_set(struct mvpp2_port *port)
+{
+       u32 val;
+
+       return;
+
+       /* Only GOP port 0 has an XLG MAC */
+       if (port->gop_id == 0) {
+               val = readl(port->base + MVPP22_XLG_CTRL3_REG);
+               val &= ~MVPP22_XLG_CTRL3_MACMODESELECT_MASK;
+               val |= MVPP22_XLG_CTRL3_MACMODESELECT_GMAC;
+               writel(val, port->base + MVPP22_XLG_CTRL3_REG);
+       }
+
+       val = readl(port->base + MVPP22_GMAC_CTRL_4_REG);
+       if (port->phy_interface == PHY_INTERFACE_MODE_RGMII)
+               val |= MVPP22_CTRL4_EXT_PIN_GMII_SEL;
+       else
+               val &= ~MVPP22_CTRL4_EXT_PIN_GMII_SEL;
+       val &= ~MVPP22_CTRL4_DP_CLK_SEL;
+       val |= MVPP22_CTRL4_SYNC_BYPASS;
+       val |= MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE;
+       writel(val, port->base + MVPP22_GMAC_CTRL_4_REG);
+}
+
 static void mvpp2_port_mii_set(struct mvpp2_port *port)
 {
        u32 val;
 
+       if (port->priv->hw_version == MVPP22)
+               mvpp22_port_mii_set(port);
+
        val = readl(port->base + MVPP2_GMAC_CTRL_2_REG);
 
        switch (port->phy_interface) {
@@ -3952,16 +4279,18 @@ static void mvpp2_defaults_set(struct mvpp2_port *port)
 {
        int tx_port_num, val, queue, ptxq, lrxq;
 
-       /* Configure port to loopback if needed */
-       if (port->flags & MVPP2_F_LOOPBACK)
-               mvpp2_port_loopback_set(port);
+       if (port->priv->hw_version == MVPP21) {
+               /* Configure port to loopback if needed */
+               if (port->flags & MVPP2_F_LOOPBACK)
+                       mvpp2_port_loopback_set(port);
 
-       /* Update TX FIFO MIN Threshold */
-       val = readl(port->base + MVPP2_GMAC_PORT_FIFO_CFG_1_REG);
-       val &= ~MVPP2_GMAC_TX_FIFO_MIN_TH_ALL_MASK;
-       /* Min. TX threshold must be less than minimal packet length */
-       val |= MVPP2_GMAC_TX_FIFO_MIN_TH_MASK(64 - 4 - 2);
-       writel(val, port->base + MVPP2_GMAC_PORT_FIFO_CFG_1_REG);
+               /* Update TX FIFO MIN Threshold */
+               val = readl(port->base + MVPP2_GMAC_PORT_FIFO_CFG_1_REG);
+               val &= ~MVPP2_GMAC_TX_FIFO_MIN_TH_ALL_MASK;
+               /* Min. TX threshold must be less than minimal packet length */
+               val |= MVPP2_GMAC_TX_FIFO_MIN_TH_MASK(64 - 4 - 2);
+               writel(val, port->base + MVPP2_GMAC_PORT_FIFO_CFG_1_REG);
+       }
 
        /* Disable Legacy WRR, Disable EJP, Release from reset */
        tx_port_num = mvpp2_egress_port(port);
@@ -4149,11 +4478,15 @@ static void mvpp2_rxq_offset_set(struct mvpp2_port *port,
 }
 
 /* Obtain BM cookie information from descriptor */
-static u32 mvpp2_bm_cookie_build(struct mvpp2_rx_desc *rx_desc)
+static u32 mvpp2_bm_cookie_build(struct mvpp2_port *port,
+                                struct mvpp2_rx_desc *rx_desc)
 {
-       int pool = (rx_desc->status & MVPP2_RXD_BM_POOL_ID_MASK) >>
-                  MVPP2_RXD_BM_POOL_ID_OFFS;
        int cpu = smp_processor_id();
+       int pool;
+
+       pool = (mvpp2_rxdesc_status_get(port, rx_desc) &
+               MVPP2_RXD_BM_POOL_ID_MASK) >>
+               MVPP2_RXD_BM_POOL_ID_OFFS;
 
        return ((pool & 0xFF) << MVPP2_BM_COOKIE_POOL_OFFS) |
               ((cpu & 0xFF) << MVPP2_BM_COOKIE_CPU_OFFS);
@@ -4161,18 +4494,6 @@ static u32 mvpp2_bm_cookie_build(struct mvpp2_rx_desc *rx_desc)
 
 /* Tx descriptors helper methods */
 
-/* Get number of Tx descriptors waiting to be transmitted by HW */
-static int mvpp2_txq_pend_desc_num_get(struct mvpp2_port *port,
-                                      struct mvpp2_tx_queue *txq)
-{
-       u32 val;
-
-       mvpp2_write(port->priv, MVPP2_TXQ_NUM_REG, txq->id);
-       val = mvpp2_read(port->priv, MVPP2_TXQ_PENDING_REG);
-
-       return val & MVPP2_TXQ_PENDING_MASK;
-}
-
 /* Get pointer to next Tx descriptor to be processed (send) by HW */
 static struct mvpp2_tx_desc *
 mvpp2_txq_next_desc_get(struct mvpp2_tx_queue *txq)
@@ -4187,7 +4508,8 @@ mvpp2_txq_next_desc_get(struct mvpp2_tx_queue *txq)
 static void mvpp2_aggr_txq_pend_desc_add(struct mvpp2_port *port, int pending)
 {
        /* aggregated access - relevant TXQ number is written in TX desc */
-       mvpp2_write(port->priv, MVPP2_AGGR_TXQ_UPDATE_REG, pending);
+       mvpp2_percpu_write(port->priv, smp_processor_id(),
+                          MVPP2_AGGR_TXQ_UPDATE_REG, pending);
 }
 
 
@@ -4216,11 +4538,12 @@ static int mvpp2_txq_alloc_reserved_desc(struct mvpp2 *priv,
                                         struct mvpp2_tx_queue *txq, int num)
 {
        u32 val;
+       int cpu = smp_processor_id();
 
        val = (txq->id << MVPP2_TXQ_RSVD_REQ_Q_OFFSET) | num;
-       mvpp2_write(priv, MVPP2_TXQ_RSVD_REQ_REG, val);
+       mvpp2_percpu_write(priv, cpu, MVPP2_TXQ_RSVD_REQ_REG, val);
 
-       val = mvpp2_read(priv, MVPP2_TXQ_RSVD_RSLT_REG);
+       val = mvpp2_percpu_read(priv, cpu, MVPP2_TXQ_RSVD_RSLT_REG);
 
        return val & MVPP2_TXQ_RSVD_RSLT_MASK;
 }
@@ -4321,7 +4644,8 @@ static inline int mvpp2_txq_sent_desc_proc(struct mvpp2_port *port,
        u32 val;
 
        /* Reading status reg resets transmitted descriptor counter */
-       val = mvpp2_read(port->priv, MVPP2_TXQ_SENT_REG(txq->id));
+       val = mvpp2_percpu_read(port->priv, smp_processor_id(),
+                               MVPP2_TXQ_SENT_REG(txq->id));
 
        return (val & MVPP2_TRANSMITTED_COUNT_MASK) >>
                MVPP2_TRANSMITTED_COUNT_OFFSET;
@@ -4335,7 +4659,8 @@ static void mvpp2_txq_sent_counter_clear(void *arg)
        for (queue = 0; queue < txq_number; queue++) {
                int id = port->txqs[queue]->id;
 
-               mvpp2_read(port->priv, MVPP2_TXQ_SENT_REG(id));
+               mvpp2_percpu_read(port->priv, smp_processor_id(),
+                                 MVPP2_TXQ_SENT_REG(id));
        }
 }
 
@@ -4394,12 +4719,14 @@ static void mvpp2_txp_max_tx_size_set(struct mvpp2_port *port)
 static void mvpp2_rx_pkts_coal_set(struct mvpp2_port *port,
                                   struct mvpp2_rx_queue *rxq)
 {
+       int cpu = smp_processor_id();
+
        if (rxq->pkts_coal > MVPP2_OCCUPIED_THRESH_MASK)
                rxq->pkts_coal = MVPP2_OCCUPIED_THRESH_MASK;
 
-       mvpp2_write(port->priv, MVPP2_RXQ_NUM_REG, rxq->id);
-       mvpp2_write(port->priv, MVPP2_RXQ_THRESH_REG,
-                   rxq->pkts_coal);
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_NUM_REG, rxq->id);
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_THRESH_REG,
+                          rxq->pkts_coal);
 }
 
 static u32 mvpp2_usec_to_cycles(u32 usec, unsigned long clk_hz)
@@ -4449,7 +4776,7 @@ static void mvpp2_txq_bufs_free(struct mvpp2_port *port,
                struct mvpp2_txq_pcpu_buf *tx_buf =
                        txq_pcpu->buffs + txq_pcpu->txq_get_index;
 
-               dma_unmap_single(port->dev->dev.parent, tx_buf->phys,
+               dma_unmap_single(port->dev->dev.parent, tx_buf->dma,
                                 tx_buf->size, DMA_TO_DEVICE);
                if (tx_buf->skb)
                        dev_kfree_skb_any(tx_buf->skb);
@@ -4527,10 +4854,12 @@ static int mvpp2_aggr_txq_init(struct platform_device *pdev,
                               int desc_num, int cpu,
                               struct mvpp2 *priv)
 {
+       u32 txq_dma;
+
        /* Allocate memory for TX descriptors */
        aggr_txq->descs = dma_alloc_coherent(&pdev->dev,
                                desc_num * MVPP2_DESC_ALIGNED_SIZE,
-                               &aggr_txq->descs_phys, GFP_KERNEL);
+                               &aggr_txq->descs_dma, GFP_KERNEL);
        if (!aggr_txq->descs)
                return -ENOMEM;
 
@@ -4540,10 +4869,16 @@ static int mvpp2_aggr_txq_init(struct platform_device *pdev,
        aggr_txq->next_desc_to_proc = mvpp2_read(priv,
                                                 MVPP2_AGGR_TXQ_INDEX_REG(cpu));
 
-       /* Set Tx descriptors queue starting address */
-       /* indirect access */
-       mvpp2_write(priv, MVPP2_AGGR_TXQ_DESC_ADDR_REG(cpu),
-                   aggr_txq->descs_phys);
+       /* Set Tx descriptors queue starting address indirect
+        * access
+        */
+       if (priv->hw_version == MVPP21)
+               txq_dma = aggr_txq->descs_dma;
+       else
+               txq_dma = aggr_txq->descs_dma >>
+                       MVPP22_AGGR_TXQ_DESC_ADDR_OFFS;
+
+       mvpp2_write(priv, MVPP2_AGGR_TXQ_DESC_ADDR_REG(cpu), txq_dma);
        mvpp2_write(priv, MVPP2_AGGR_TXQ_DESC_SIZE_REG(cpu), desc_num);
 
        return 0;
@@ -4554,12 +4889,15 @@ static int mvpp2_rxq_init(struct mvpp2_port *port,
                          struct mvpp2_rx_queue *rxq)
 
 {
+       u32 rxq_dma;
+       int cpu;
+
        rxq->size = port->rx_ring_size;
 
        /* Allocate memory for RX descriptors */
        rxq->descs = dma_alloc_coherent(port->dev->dev.parent,
                                        rxq->size * MVPP2_DESC_ALIGNED_SIZE,
-                                       &rxq->descs_phys, GFP_KERNEL);
+                                       &rxq->descs_dma, GFP_KERNEL);
        if (!rxq->descs)
                return -ENOMEM;
 
@@ -4569,10 +4907,15 @@ static int mvpp2_rxq_init(struct mvpp2_port *port,
        mvpp2_write(port->priv, MVPP2_RXQ_STATUS_REG(rxq->id), 0);
 
        /* Set Rx descriptors queue starting address - indirect access */
-       mvpp2_write(port->priv, MVPP2_RXQ_NUM_REG, rxq->id);
-       mvpp2_write(port->priv, MVPP2_RXQ_DESC_ADDR_REG, rxq->descs_phys);
-       mvpp2_write(port->priv, MVPP2_RXQ_DESC_SIZE_REG, rxq->size);
-       mvpp2_write(port->priv, MVPP2_RXQ_INDEX_REG, 0);
+       cpu = smp_processor_id();
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_NUM_REG, rxq->id);
+       if (port->priv->hw_version == MVPP21)
+               rxq_dma = rxq->descs_dma;
+       else
+               rxq_dma = rxq->descs_dma >> MVPP22_DESC_ADDR_OFFS;
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_DESC_ADDR_REG, rxq_dma);
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_DESC_SIZE_REG, rxq->size);
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_INDEX_REG, 0);
 
        /* Set Offset */
        mvpp2_rxq_offset_set(port, rxq->id, NET_SKB_PAD);
@@ -4599,10 +4942,11 @@ static void mvpp2_rxq_drop_pkts(struct mvpp2_port *port,
 
        for (i = 0; i < rx_received; i++) {
                struct mvpp2_rx_desc *rx_desc = mvpp2_rxq_next_desc_get(rxq);
-               u32 bm = mvpp2_bm_cookie_build(rx_desc);
+               u32 bm = mvpp2_bm_cookie_build(port, rx_desc);
 
-               mvpp2_pool_refill(port, bm, rx_desc->buf_phys_addr,
-                                 rx_desc->buf_cookie);
+               mvpp2_pool_refill(port, bm,
+                                 mvpp2_rxdesc_dma_addr_get(port, rx_desc),
+                                 mvpp2_rxdesc_cookie_get(port, rx_desc));
        }
        mvpp2_rxq_status_update(port, rxq->id, rx_received, rx_received);
 }
@@ -4611,26 +4955,29 @@ static void mvpp2_rxq_drop_pkts(struct mvpp2_port *port,
 static void mvpp2_rxq_deinit(struct mvpp2_port *port,
                             struct mvpp2_rx_queue *rxq)
 {
+       int cpu;
+
        mvpp2_rxq_drop_pkts(port, rxq);
 
        if (rxq->descs)
                dma_free_coherent(port->dev->dev.parent,
                                  rxq->size * MVPP2_DESC_ALIGNED_SIZE,
                                  rxq->descs,
-                                 rxq->descs_phys);
+                                 rxq->descs_dma);
 
        rxq->descs             = NULL;
        rxq->last_desc         = 0;
        rxq->next_desc_to_proc = 0;
-       rxq->descs_phys        = 0;
+       rxq->descs_dma         = 0;
 
        /* Clear Rx descriptors queue starting address and size;
         * free descriptor number
         */
        mvpp2_write(port->priv, MVPP2_RXQ_STATUS_REG(rxq->id), 0);
-       mvpp2_write(port->priv, MVPP2_RXQ_NUM_REG, rxq->id);
-       mvpp2_write(port->priv, MVPP2_RXQ_DESC_ADDR_REG, 0);
-       mvpp2_write(port->priv, MVPP2_RXQ_DESC_SIZE_REG, 0);
+       cpu = smp_processor_id();
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_NUM_REG, rxq->id);
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_DESC_ADDR_REG, 0);
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_DESC_SIZE_REG, 0);
 }
 
 /* Create and initialize a Tx queue */
@@ -4646,23 +4993,25 @@ static int mvpp2_txq_init(struct mvpp2_port *port,
        /* Allocate memory for Tx descriptors */
        txq->descs = dma_alloc_coherent(port->dev->dev.parent,
                                txq->size * MVPP2_DESC_ALIGNED_SIZE,
-                               &txq->descs_phys, GFP_KERNEL);
+                               &txq->descs_dma, GFP_KERNEL);
        if (!txq->descs)
                return -ENOMEM;
 
        txq->last_desc = txq->size - 1;
 
        /* Set Tx descriptors queue starting address - indirect access */
-       mvpp2_write(port->priv, MVPP2_TXQ_NUM_REG, txq->id);
-       mvpp2_write(port->priv, MVPP2_TXQ_DESC_ADDR_REG, txq->descs_phys);
-       mvpp2_write(port->priv, MVPP2_TXQ_DESC_SIZE_REG, txq->size &
-                                            MVPP2_TXQ_DESC_SIZE_MASK);
-       mvpp2_write(port->priv, MVPP2_TXQ_INDEX_REG, 0);
-       mvpp2_write(port->priv, MVPP2_TXQ_RSVD_CLR_REG,
-                   txq->id << MVPP2_TXQ_RSVD_CLR_OFFSET);
-       val = mvpp2_read(port->priv, MVPP2_TXQ_PENDING_REG);
+       cpu = smp_processor_id();
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_NUM_REG, txq->id);
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_DESC_ADDR_REG,
+                          txq->descs_dma);
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_DESC_SIZE_REG,
+                          txq->size & MVPP2_TXQ_DESC_SIZE_MASK);
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_INDEX_REG, 0);
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_RSVD_CLR_REG,
+                          txq->id << MVPP2_TXQ_RSVD_CLR_OFFSET);
+       val = mvpp2_percpu_read(port->priv, cpu, MVPP2_TXQ_PENDING_REG);
        val &= ~MVPP2_TXQ_PENDING_MASK;
-       mvpp2_write(port->priv, MVPP2_TXQ_PENDING_REG, val);
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_PENDING_REG, val);
 
        /* Calculate base address in prefetch buffer. We reserve 16 descriptors
         * for each existing TXQ.
@@ -4673,9 +5022,9 @@ static int mvpp2_txq_init(struct mvpp2_port *port,
        desc = (port->id * MVPP2_MAX_TXQ * desc_per_txq) +
               (txq->log_id * desc_per_txq);
 
-       mvpp2_write(port->priv, MVPP2_TXQ_PREF_BUF_REG,
-                   MVPP2_PREF_BUF_PTR(desc) | MVPP2_PREF_BUF_SIZE_16 |
-                   MVPP2_PREF_BUF_THRESH(desc_per_txq/2));
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_PREF_BUF_REG,
+                          MVPP2_PREF_BUF_PTR(desc) | MVPP2_PREF_BUF_SIZE_16 |
+                          MVPP2_PREF_BUF_THRESH(desc_per_txq / 2));
 
        /* WRR / EJP configuration - indirect access */
        tx_port_num = mvpp2_egress_port(port);
@@ -4716,7 +5065,7 @@ error:
 
        dma_free_coherent(port->dev->dev.parent,
                          txq->size * MVPP2_DESC_ALIGNED_SIZE,
-                         txq->descs, txq->descs_phys);
+                         txq->descs, txq->descs_dma);
 
        return -ENOMEM;
 }
@@ -4736,20 +5085,21 @@ static void mvpp2_txq_deinit(struct mvpp2_port *port,
        if (txq->descs)
                dma_free_coherent(port->dev->dev.parent,
                                  txq->size * MVPP2_DESC_ALIGNED_SIZE,
-                                 txq->descs, txq->descs_phys);
+                                 txq->descs, txq->descs_dma);
 
        txq->descs             = NULL;
        txq->last_desc         = 0;
        txq->next_desc_to_proc = 0;
-       txq->descs_phys        = 0;
+       txq->descs_dma         = 0;
 
        /* Set minimum bandwidth for disabled TXQs */
        mvpp2_write(port->priv, MVPP2_TXQ_SCHED_TOKEN_CNTR_REG(txq->id), 0);
 
        /* Set Tx descriptors queue starting address and size */
-       mvpp2_write(port->priv, MVPP2_TXQ_NUM_REG, txq->id);
-       mvpp2_write(port->priv, MVPP2_TXQ_DESC_ADDR_REG, 0);
-       mvpp2_write(port->priv, MVPP2_TXQ_DESC_SIZE_REG, 0);
+       cpu = smp_processor_id();
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_NUM_REG, txq->id);
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_DESC_ADDR_REG, 0);
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_DESC_SIZE_REG, 0);
 }
 
 /* Cleanup Tx ports */
@@ -4759,10 +5109,11 @@ static void mvpp2_txq_clean(struct mvpp2_port *port, struct mvpp2_tx_queue *txq)
        int delay, pending, cpu;
        u32 val;
 
-       mvpp2_write(port->priv, MVPP2_TXQ_NUM_REG, txq->id);
-       val = mvpp2_read(port->priv, MVPP2_TXQ_PREF_BUF_REG);
+       cpu = smp_processor_id();
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_NUM_REG, txq->id);
+       val = mvpp2_percpu_read(port->priv, cpu, MVPP2_TXQ_PREF_BUF_REG);
        val |= MVPP2_TXQ_DRAIN_EN_MASK;
-       mvpp2_write(port->priv, MVPP2_TXQ_PREF_BUF_REG, val);
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_PREF_BUF_REG, val);
 
        /* The napi queue has been stopped so wait for all packets
         * to be transmitted.
@@ -4778,11 +5129,13 @@ static void mvpp2_txq_clean(struct mvpp2_port *port, struct mvpp2_tx_queue *txq)
                mdelay(1);
                delay++;
 
-               pending = mvpp2_txq_pend_desc_num_get(port, txq);
+               pending = mvpp2_percpu_read(port->priv, cpu,
+                                           MVPP2_TXQ_PENDING_REG);
+               pending &= MVPP2_TXQ_PENDING_MASK;
        } while (pending);
 
        val &= ~MVPP2_TXQ_DRAIN_EN_MASK;
-       mvpp2_write(port->priv, MVPP2_TXQ_PREF_BUF_REG, val);
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_PREF_BUF_REG, val);
 
        for_each_present_cpu(cpu) {
                txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
@@ -4991,20 +5344,21 @@ static enum hrtimer_restart mvpp2_hr_timer_cb(struct hrtimer *timer)
 static void mvpp2_rx_error(struct mvpp2_port *port,
                           struct mvpp2_rx_desc *rx_desc)
 {
-       u32 status = rx_desc->status;
+       u32 status = mvpp2_rxdesc_status_get(port, rx_desc);
+       size_t sz = mvpp2_rxdesc_size_get(port, rx_desc);
 
        switch (status & MVPP2_RXD_ERR_CODE_MASK) {
        case MVPP2_RXD_ERR_CRC:
-               netdev_err(port->dev, "bad rx status %08x (crc error), size=%d\n",
-                          status, rx_desc->data_size);
+               netdev_err(port->dev, "bad rx status %08x (crc error), size=%zu\n",
+                          status, sz);
                break;
        case MVPP2_RXD_ERR_OVERRUN:
-               netdev_err(port->dev, "bad rx status %08x (overrun error), size=%d\n",
-                          status, rx_desc->data_size);
+               netdev_err(port->dev, "bad rx status %08x (overrun error), size=%zu\n",
+                          status, sz);
                break;
        case MVPP2_RXD_ERR_RESOURCE:
-               netdev_err(port->dev, "bad rx status %08x (resource error), size=%d\n",
-                          status, rx_desc->data_size);
+               netdev_err(port->dev, "bad rx status %08x (resource error), size=%zu\n",
+                          status, sz);
                break;
        }
 }
@@ -5031,15 +5385,17 @@ static void mvpp2_rx_csum(struct mvpp2_port *port, u32 status,
 static int mvpp2_rx_refill(struct mvpp2_port *port,
                           struct mvpp2_bm_pool *bm_pool, u32 bm)
 {
-       dma_addr_t phys_addr;
+       dma_addr_t dma_addr;
+       phys_addr_t phys_addr;
        void *buf;
 
        /* No recycle or too many buffers are in use, so allocate a new skb */
-       buf = mvpp2_buf_alloc(port, bm_pool, &phys_addr, GFP_ATOMIC);
+       buf = mvpp2_buf_alloc(port, bm_pool, &dma_addr, &phys_addr,
+                             GFP_ATOMIC);
        if (!buf)
                return -ENOMEM;
 
-       mvpp2_pool_refill(port, bm, phys_addr, (unsigned long)buf);
+       mvpp2_pool_refill(port, bm, dma_addr, phys_addr);
 
        return 0;
 }
@@ -5075,43 +5431,6 @@ static u32 mvpp2_skb_tx_csum(struct mvpp2_port *port, struct sk_buff *skb)
        return MVPP2_TXD_L4_CSUM_NOT | MVPP2_TXD_IP_CSUM_DISABLE;
 }
 
-static void mvpp2_buff_hdr_rx(struct mvpp2_port *port,
-                             struct mvpp2_rx_desc *rx_desc)
-{
-       struct mvpp2_buff_hdr *buff_hdr;
-       struct sk_buff *skb;
-       u32 rx_status = rx_desc->status;
-       dma_addr_t buff_phys_addr;
-       unsigned long buff_virt_addr;
-       dma_addr_t buff_phys_addr_next;
-       unsigned long buff_virt_addr_next;
-       int mc_id;
-       int pool_id;
-
-       pool_id = (rx_status & MVPP2_RXD_BM_POOL_ID_MASK) >>
-                  MVPP2_RXD_BM_POOL_ID_OFFS;
-       buff_phys_addr = rx_desc->buf_phys_addr;
-       buff_virt_addr = rx_desc->buf_cookie;
-
-       do {
-               skb = (struct sk_buff *)buff_virt_addr;
-               buff_hdr = (struct mvpp2_buff_hdr *)skb->head;
-
-               mc_id = MVPP2_B_HDR_INFO_MC_ID(buff_hdr->info);
-
-               buff_phys_addr_next = buff_hdr->next_buff_phys_addr;
-               buff_virt_addr_next = buff_hdr->next_buff_virt_addr;
-
-               /* Release buffer */
-               mvpp2_bm_pool_mc_put(port, pool_id, buff_phys_addr,
-                                    buff_virt_addr, mc_id);
-
-               buff_phys_addr = buff_phys_addr_next;
-               buff_virt_addr = buff_virt_addr_next;
-
-       } while (!MVPP2_B_HDR_INFO_IS_LAST(buff_hdr->info));
-}
-
 /* Main rx processing */
 static int mvpp2_rx(struct mvpp2_port *port, int rx_todo,
                    struct mvpp2_rx_queue *rxq)
@@ -5132,25 +5451,23 @@ static int mvpp2_rx(struct mvpp2_port *port, int rx_todo,
                struct mvpp2_bm_pool *bm_pool;
                struct sk_buff *skb;
                unsigned int frag_size;
-               dma_addr_t phys_addr;
+               dma_addr_t dma_addr;
+               phys_addr_t phys_addr;
                u32 bm, rx_status;
                int pool, rx_bytes, err;
                void *data;
 
                rx_done++;
-               rx_status = rx_desc->status;
-               rx_bytes = rx_desc->data_size - MVPP2_MH_SIZE;
-               phys_addr = rx_desc->buf_phys_addr;
-               data = (void *)(uintptr_t)rx_desc->buf_cookie;
-
-               bm = mvpp2_bm_cookie_build(rx_desc);
+               rx_status = mvpp2_rxdesc_status_get(port, rx_desc);
+               rx_bytes = mvpp2_rxdesc_size_get(port, rx_desc);
+               rx_bytes -= MVPP2_MH_SIZE;
+               dma_addr = mvpp2_rxdesc_dma_addr_get(port, rx_desc);
+               phys_addr = mvpp2_rxdesc_cookie_get(port, rx_desc);
+               data = (void *)phys_to_virt(phys_addr);
+
+               bm = mvpp2_bm_cookie_build(port, rx_desc);
                pool = mvpp2_bm_cookie_pool_get(bm);
                bm_pool = &port->priv->bm_pools[pool];
-               /* Check if buffer header is used */
-               if (rx_status & MVPP2_RXD_BUF_HDR) {
-                       mvpp2_buff_hdr_rx(port, rx_desc);
-                       continue;
-               }
 
                /* In case of an error, release the requested buffer pointer
                 * to the Buffer Manager. This request process is controlled
@@ -5162,9 +5479,7 @@ static int mvpp2_rx(struct mvpp2_port *port, int rx_todo,
                        dev->stats.rx_errors++;
                        mvpp2_rx_error(port, rx_desc);
                        /* Return the buffer to the pool */
-
-                       mvpp2_pool_refill(port, bm, rx_desc->buf_phys_addr,
-                                         rx_desc->buf_cookie);
+                       mvpp2_pool_refill(port, bm, dma_addr, phys_addr);
                        continue;
                }
 
@@ -5185,7 +5500,7 @@ static int mvpp2_rx(struct mvpp2_port *port, int rx_todo,
                        goto err_drop_frame;
                }
 
-               dma_unmap_single(dev->dev.parent, phys_addr,
+               dma_unmap_single(dev->dev.parent, dma_addr,
                                 bm_pool->buf_size, DMA_FROM_DEVICE);
 
                rcvd_pkts++;
@@ -5216,11 +5531,15 @@ static int mvpp2_rx(struct mvpp2_port *port, int rx_todo,
 }
 
 static inline void
-tx_desc_unmap_put(struct device *dev, struct mvpp2_tx_queue *txq,
+tx_desc_unmap_put(struct mvpp2_port *port, struct mvpp2_tx_queue *txq,
                  struct mvpp2_tx_desc *desc)
 {
-       dma_unmap_single(dev, desc->buf_phys_addr,
-                        desc->data_size, DMA_TO_DEVICE);
+       dma_addr_t buf_dma_addr =
+               mvpp2_txdesc_dma_addr_get(port, desc);
+       size_t buf_sz =
+               mvpp2_txdesc_size_get(port, desc);
+       dma_unmap_single(port->dev->dev.parent, buf_dma_addr,
+                        buf_sz, DMA_TO_DEVICE);
        mvpp2_txq_desc_put(txq);
 }
 
@@ -5232,35 +5551,38 @@ static int mvpp2_tx_frag_process(struct mvpp2_port *port, struct sk_buff *skb,
        struct mvpp2_txq_pcpu *txq_pcpu = this_cpu_ptr(txq->pcpu);
        struct mvpp2_tx_desc *tx_desc;
        int i;
-       dma_addr_t buf_phys_addr;
+       dma_addr_t buf_dma_addr;
 
        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
                skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
                void *addr = page_address(frag->page.p) + frag->page_offset;
 
                tx_desc = mvpp2_txq_next_desc_get(aggr_txq);
-               tx_desc->phys_txq = txq->id;
-               tx_desc->data_size = frag->size;
+               mvpp2_txdesc_txq_set(port, tx_desc, txq->id);
+               mvpp2_txdesc_size_set(port, tx_desc, frag->size);
 
-               buf_phys_addr = dma_map_single(port->dev->dev.parent, addr,
-                                              tx_desc->data_size,
+               buf_dma_addr = dma_map_single(port->dev->dev.parent, addr,
+                                              frag->size,
                                               DMA_TO_DEVICE);
-               if (dma_mapping_error(port->dev->dev.parent, buf_phys_addr)) {
+               if (dma_mapping_error(port->dev->dev.parent, buf_dma_addr)) {
                        mvpp2_txq_desc_put(txq);
                        goto error;
                }
 
-               tx_desc->packet_offset = buf_phys_addr & MVPP2_TX_DESC_ALIGN;
-               tx_desc->buf_phys_addr = buf_phys_addr & (~MVPP2_TX_DESC_ALIGN);
+               mvpp2_txdesc_offset_set(port, tx_desc,
+                                       buf_dma_addr & MVPP2_TX_DESC_ALIGN);
+               mvpp2_txdesc_dma_addr_set(port, tx_desc,
+                                         buf_dma_addr & ~MVPP2_TX_DESC_ALIGN);
 
                if (i == (skb_shinfo(skb)->nr_frags - 1)) {
                        /* Last descriptor */
-                       tx_desc->command = MVPP2_TXD_L_DESC;
-                       mvpp2_txq_inc_put(txq_pcpu, skb, tx_desc);
+                       mvpp2_txdesc_cmd_set(port, tx_desc,
+                                            MVPP2_TXD_L_DESC);
+                       mvpp2_txq_inc_put(port, txq_pcpu, skb, tx_desc);
                } else {
                        /* Descriptor in the middle: Not First, Not Last */
-                       tx_desc->command = 0;
-                       mvpp2_txq_inc_put(txq_pcpu, NULL, tx_desc);
+                       mvpp2_txdesc_cmd_set(port, tx_desc, 0);
+                       mvpp2_txq_inc_put(port, txq_pcpu, NULL, tx_desc);
                }
        }
 
@@ -5272,7 +5594,7 @@ error:
         */
        for (i = i - 1; i >= 0; i--) {
                tx_desc = txq->descs + i;
-               tx_desc_unmap_put(port->dev->dev.parent, txq, tx_desc);
+               tx_desc_unmap_put(port, txq, tx_desc);
        }
 
        return -ENOMEM;
@@ -5285,7 +5607,7 @@ static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev)
        struct mvpp2_tx_queue *txq, *aggr_txq;
        struct mvpp2_txq_pcpu *txq_pcpu;
        struct mvpp2_tx_desc *tx_desc;
-       dma_addr_t buf_phys_addr;
+       dma_addr_t buf_dma_addr;
        int frags = 0;
        u16 txq_id;
        u32 tx_cmd;
@@ -5307,35 +5629,38 @@ static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev)
 
        /* Get a descriptor for the first part of the packet */
        tx_desc = mvpp2_txq_next_desc_get(aggr_txq);
-       tx_desc->phys_txq = txq->id;
-       tx_desc->data_size = skb_headlen(skb);
+       mvpp2_txdesc_txq_set(port, tx_desc, txq->id);
+       mvpp2_txdesc_size_set(port, tx_desc, skb_headlen(skb));
 
-       buf_phys_addr = dma_map_single(dev->dev.parent, skb->data,
-                                      tx_desc->data_size, DMA_TO_DEVICE);
-       if (unlikely(dma_mapping_error(dev->dev.parent, buf_phys_addr))) {
+       buf_dma_addr = dma_map_single(dev->dev.parent, skb->data,
+                                     skb_headlen(skb), DMA_TO_DEVICE);
+       if (unlikely(dma_mapping_error(dev->dev.parent, buf_dma_addr))) {
                mvpp2_txq_desc_put(txq);
                frags = 0;
                goto out;
        }
-       tx_desc->packet_offset = buf_phys_addr & MVPP2_TX_DESC_ALIGN;
-       tx_desc->buf_phys_addr = buf_phys_addr & ~MVPP2_TX_DESC_ALIGN;
+
+       mvpp2_txdesc_offset_set(port, tx_desc,
+                               buf_dma_addr & MVPP2_TX_DESC_ALIGN);
+       mvpp2_txdesc_dma_addr_set(port, tx_desc,
+                                 buf_dma_addr & ~MVPP2_TX_DESC_ALIGN);
 
        tx_cmd = mvpp2_skb_tx_csum(port, skb);
 
        if (frags == 1) {
                /* First and Last descriptor */
                tx_cmd |= MVPP2_TXD_F_DESC | MVPP2_TXD_L_DESC;
-               tx_desc->command = tx_cmd;
-               mvpp2_txq_inc_put(txq_pcpu, skb, tx_desc);
+               mvpp2_txdesc_cmd_set(port, tx_desc, tx_cmd);
+               mvpp2_txq_inc_put(port, txq_pcpu, skb, tx_desc);
        } else {
                /* First but not Last */
                tx_cmd |= MVPP2_TXD_F_DESC | MVPP2_TXD_PADDING_DISABLE;
-               tx_desc->command = tx_cmd;
-               mvpp2_txq_inc_put(txq_pcpu, NULL, tx_desc);
+               mvpp2_txdesc_cmd_set(port, tx_desc, tx_cmd);
+               mvpp2_txq_inc_put(port, txq_pcpu, NULL, tx_desc);
 
                /* Continue with other skb fragments */
                if (mvpp2_tx_frag_process(port, skb, aggr_txq, txq)) {
-                       tx_desc_unmap_put(port->dev->dev.parent, txq, tx_desc);
+                       tx_desc_unmap_put(port, txq, tx_desc);
                        frags = 0;
                        goto out;
                }
@@ -5396,6 +5721,7 @@ static int mvpp2_poll(struct napi_struct *napi, int budget)
        u32 cause_rx_tx, cause_rx, cause_misc;
        int rx_done = 0;
        struct mvpp2_port *port = netdev_priv(napi->dev);
+       int cpu = smp_processor_id();
 
        /* Rx/Tx cause register
         *
@@ -5407,8 +5733,8 @@ static int mvpp2_poll(struct napi_struct *napi, int budget)
         *
         * Each CPU has its own Rx/Tx cause register
         */
-       cause_rx_tx = mvpp2_read(port->priv,
-                                MVPP2_ISR_RX_TX_CAUSE_REG(port->id));
+       cause_rx_tx = mvpp2_percpu_read(port->priv, cpu,
+                                       MVPP2_ISR_RX_TX_CAUSE_REG(port->id));
        cause_rx_tx &= ~MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK;
        cause_misc = cause_rx_tx & MVPP2_CAUSE_MISC_SUM_MASK;
 
@@ -5417,8 +5743,9 @@ static int mvpp2_poll(struct napi_struct *napi, int budget)
 
                /* Clear the cause register */
                mvpp2_write(port->priv, MVPP2_ISR_MISC_CAUSE_REG, 0);
-               mvpp2_write(port->priv, MVPP2_ISR_RX_TX_CAUSE_REG(port->id),
-                           cause_rx_tx & ~MVPP2_CAUSE_MISC_SUM_MASK);
+               mvpp2_percpu_write(port->priv, cpu,
+                                  MVPP2_ISR_RX_TX_CAUSE_REG(port->id),
+                                  cause_rx_tx & ~MVPP2_CAUSE_MISC_SUM_MASK);
        }
 
        cause_rx = cause_rx_tx & MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK;
@@ -5530,7 +5857,7 @@ static int mvpp2_check_ringparam_valid(struct net_device *dev,
        return 0;
 }
 
-static void mvpp2_get_mac_address(struct mvpp2_port *port, unsigned char *addr)
+static void mvpp21_get_mac_address(struct mvpp2_port *port, unsigned char *addr)
 {
        u32 mac_addr_l, mac_addr_m, mac_addr_h;
 
@@ -5975,16 +6302,6 @@ static const struct ethtool_ops mvpp2_eth_tool_ops = {
        .set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
-/* Driver initialization */
-
-static void mvpp2_port_power_up(struct mvpp2_port *port)
-{
-       mvpp2_port_mii_set(port);
-       mvpp2_port_periodic_xon_disable(port);
-       mvpp2_port_fc_adv_enable(port);
-       mvpp2_port_reset(port);
-}
-
 /* Initialize port HW */
 static int mvpp2_port_init(struct mvpp2_port *port)
 {
@@ -6117,7 +6434,6 @@ static int mvpp2_port_probe(struct platform_device *pdev,
        u32 id;
        int features;
        int phy_mode;
-       int priv_common_regs_num = 2;
        int err, i, cpu;
 
        dev = alloc_etherdev_mqs(sizeof(struct mvpp2_port), txq_number,
@@ -6167,12 +6483,22 @@ static int mvpp2_port_probe(struct platform_device *pdev,
        port->phy_node = phy_node;
        port->phy_interface = phy_mode;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM,
-                                   priv_common_regs_num + id);
-       port->base = devm_ioremap_resource(&pdev->dev, res);
-       if (IS_ERR(port->base)) {
-               err = PTR_ERR(port->base);
-               goto err_free_irq;
+       if (priv->hw_version == MVPP21) {
+               res = platform_get_resource(pdev, IORESOURCE_MEM, 2 + id);
+               port->base = devm_ioremap_resource(&pdev->dev, res);
+               if (IS_ERR(port->base)) {
+                       err = PTR_ERR(port->base);
+                       goto err_free_irq;
+               }
+       } else {
+               if (of_property_read_u32(port_node, "gop-port-id",
+                                        &port->gop_id)) {
+                       err = -EINVAL;
+                       dev_err(&pdev->dev, "missing gop-port-id value\n");
+                       goto err_free_irq;
+               }
+
+               port->base = priv->iface_base + MVPP22_GMAC_BASE(port->gop_id);
        }
 
        /* Alloc per-cpu stats */
@@ -6187,7 +6513,8 @@ static int mvpp2_port_probe(struct platform_device *pdev,
                mac_from = "device tree";
                ether_addr_copy(dev->dev_addr, dt_mac_addr);
        } else {
-               mvpp2_get_mac_address(port, hw_mac_addr);
+               if (priv->hw_version == MVPP21)
+                       mvpp21_get_mac_address(port, hw_mac_addr);
                if (is_valid_ether_addr(hw_mac_addr)) {
                        mac_from = "hardware";
                        ether_addr_copy(dev->dev_addr, hw_mac_addr);
@@ -6207,7 +6534,14 @@ static int mvpp2_port_probe(struct platform_device *pdev,
                dev_err(&pdev->dev, "failed to init port %d\n", id);
                goto err_free_stats;
        }
-       mvpp2_port_power_up(port);
+
+       mvpp2_port_mii_set(port);
+       mvpp2_port_periodic_xon_disable(port);
+
+       if (priv->hw_version == MVPP21)
+               mvpp2_port_fc_adv_enable(port);
+
+       mvpp2_port_reset(port);
 
        port->pcpu = alloc_percpu(struct mvpp2_port_pcpu);
        if (!port->pcpu) {
@@ -6350,9 +6684,15 @@ static int mvpp2_init(struct platform_device *pdev, struct mvpp2 *priv)
                mvpp2_conf_mbus_windows(dram_target_info, priv);
 
        /* Disable HW PHY polling */
-       val = readl(priv->lms_base + MVPP2_PHY_AN_CFG0_REG);
-       val |= MVPP2_PHY_AN_STOP_SMI0_MASK;
-       writel(val, priv->lms_base + MVPP2_PHY_AN_CFG0_REG);
+       if (priv->hw_version == MVPP21) {
+               val = readl(priv->lms_base + MVPP2_PHY_AN_CFG0_REG);
+               val |= MVPP2_PHY_AN_STOP_SMI0_MASK;
+               writel(val, priv->lms_base + MVPP2_PHY_AN_CFG0_REG);
+       } else {
+               val = readl(priv->iface_base + MVPP22_SMI_MISC_CFG_REG);
+               val &= ~MVPP22_SMI_POLLING_EN;
+               writel(val, priv->iface_base + MVPP22_SMI_MISC_CFG_REG);
+       }
 
        /* Allocate and initialize aggregated TXQs */
        priv->aggr_txqs = devm_kcalloc(&pdev->dev, num_present_cpus(),
@@ -6377,8 +6717,9 @@ static int mvpp2_init(struct platform_device *pdev, struct mvpp2 *priv)
        for (i = 0; i < MVPP2_MAX_PORTS; i++)
                mvpp2_write(priv, MVPP2_ISR_RXQ_GROUP_REG(i), rxq_number);
 
-       writel(MVPP2_EXT_GLOBAL_CTRL_DEFAULT,
-              priv->lms_base + MVPP2_MNG_EXTENDED_GLOBAL_CTRL_REG);
+       if (priv->hw_version == MVPP21)
+               writel(MVPP2_EXT_GLOBAL_CTRL_DEFAULT,
+                      priv->lms_base + MVPP2_MNG_EXTENDED_GLOBAL_CTRL_REG);
 
        /* Allow cache snoop when transmiting packets */
        mvpp2_write(priv, MVPP2_TX_SNOOP_REG, 0x1);
@@ -6405,22 +6746,41 @@ static int mvpp2_probe(struct platform_device *pdev)
        struct device_node *port_node;
        struct mvpp2 *priv;
        struct resource *res;
-       int port_count, first_rxq;
+       void __iomem *base;
+       int port_count, first_rxq, cpu;
        int err;
 
        priv = devm_kzalloc(&pdev->dev, sizeof(struct mvpp2), GFP_KERNEL);
        if (!priv)
                return -ENOMEM;
 
+       priv->hw_version =
+               (unsigned long)of_device_get_match_data(&pdev->dev);
+
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       priv->base = devm_ioremap_resource(&pdev->dev, res);
-       if (IS_ERR(priv->base))
-               return PTR_ERR(priv->base);
+       base = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(base))
+               return PTR_ERR(base);
+
+       if (priv->hw_version == MVPP21) {
+               res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+               priv->lms_base = devm_ioremap_resource(&pdev->dev, res);
+               if (IS_ERR(priv->lms_base))
+                       return PTR_ERR(priv->lms_base);
+       } else {
+               res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+               priv->iface_base = devm_ioremap_resource(&pdev->dev, res);
+               if (IS_ERR(priv->iface_base))
+                       return PTR_ERR(priv->iface_base);
+       }
+
+       for_each_present_cpu(cpu) {
+               u32 addr_space_sz;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-       priv->lms_base = devm_ioremap_resource(&pdev->dev, res);
-       if (IS_ERR(priv->lms_base))
-               return PTR_ERR(priv->lms_base);
+               addr_space_sz = (priv->hw_version == MVPP21 ?
+                                MVPP21_ADDR_SPACE_SZ : MVPP22_ADDR_SPACE_SZ);
+               priv->cpu_base[cpu] = base + cpu * addr_space_sz;
+       }
 
        priv->pp_clk = devm_clk_get(&pdev->dev, "pp_clk");
        if (IS_ERR(priv->pp_clk))
@@ -6506,7 +6866,7 @@ static int mvpp2_remove(struct platform_device *pdev)
                dma_free_coherent(&pdev->dev,
                                  MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE,
                                  aggr_txq->descs,
-                                 aggr_txq->descs_phys);
+                                 aggr_txq->descs_dma);
        }
 
        clk_disable_unprepare(priv->pp_clk);
@@ -6516,7 +6876,10 @@ static int mvpp2_remove(struct platform_device *pdev)
 }
 
 static const struct of_device_id mvpp2_match[] = {
-       { .compatible = "marvell,armada-375-pp2" },
+       {
+               .compatible = "marvell,armada-375-pp2",
+               .data = (void *)MVPP21,
+       },
        { }
 };
 MODULE_DEVICE_TABLE(of, mvpp2_match);