octeontx2-pf: cn10K: Reserve LMTST lines per core

author Geetha sowjanya <gakula@marvell.com>

Wed, 1 Sep 2021 09:55:50 +0000 (15:25 +0530)

committer David S. Miller <davem@davemloft.net>

Wed, 1 Sep 2021 10:42:16 +0000 (11:42 +0100)
author Geetha sowjanya <gakula@marvell.com>
Wed, 1 Sep 2021 09:55:50 +0000 (15:25 +0530)
committer David S. Miller <davem@davemloft.net>
Wed, 1 Sep 2021 10:42:16 +0000 (11:42 +0100)
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c

index 3cc76f1..95f21df 100644 (file)
--- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
@@ -27,7 +27,8 @@ int cn10k_lmtst_init(struct otx2_nic *pfvf)
  {
  
         struct lmtst_tbl_setup_req *req;
  {
  
         struct lmtst_tbl_setup_req *req;
-       int qcount, err;
+       struct otx2_lmt_info *lmt_info;
+       int err, cpu;
  
         if (!test_bit(CN10K_LMTST, &pfvf->hw.cap_flag)) {
                 pfvf->hw_ops = &otx2_hw_ops;
  
         if (!test_bit(CN10K_LMTST, &pfvf->hw.cap_flag)) {
                 pfvf->hw_ops = &otx2_hw_ops;
@@ -35,15 +36,9 @@ int cn10k_lmtst_init(struct otx2_nic *pfvf)
         }
  
         pfvf->hw_ops = &cn10k_hw_ops;
         }
  
         pfvf->hw_ops = &cn10k_hw_ops;
-       qcount = pfvf->hw.max_queues;
-       /* LMTST lines allocation
-        * qcount = num_online_cpus();
-        * NPA = TX + RX + XDP.
-        * NIX = TX * 32 (For Burst SQE flush).
-        */
-       pfvf->tot_lmt_lines = (qcount * 3) + (qcount * 32);
-       pfvf->npa_lmt_lines = qcount * 3;
-       pfvf->nix_lmt_size =  LMT_BURST_SIZE * LMT_LINE_SIZE;
+       /* Total LMTLINES = num_online_cpus() * 32 (For Burst flush).*/
+       pfvf->tot_lmt_lines = (num_online_cpus() * LMT_BURST_SIZE);
+       pfvf->hw.lmt_info = alloc_percpu(struct otx2_lmt_info);
  
         mutex_lock(&pfvf->mbox.lock);
         req = otx2_mbox_alloc_msg_lmtst_tbl_setup(&pfvf->mbox);
  
         mutex_lock(&pfvf->mbox.lock);
         req = otx2_mbox_alloc_msg_lmtst_tbl_setup(&pfvf->mbox);
@@ -66,6 +61,13 @@ int cn10k_lmtst_init(struct otx2_nic *pfvf)
         err = otx2_sync_mbox_msg(&pfvf->mbox);
         mutex_unlock(&pfvf->mbox.lock);
  
         err = otx2_sync_mbox_msg(&pfvf->mbox);
         mutex_unlock(&pfvf->mbox.lock);
  
+       for_each_possible_cpu(cpu) {
+               lmt_info = per_cpu_ptr(pfvf->hw.lmt_info, cpu);
+               lmt_info->lmt_addr = ((u64)pfvf->hw.lmt_base +
+                                     (cpu * LMT_BURST_SIZE * LMT_LINE_SIZE));
+               lmt_info->lmt_id = cpu * LMT_BURST_SIZE;
+       }
+
         return 0;
  }
  EXPORT_SYMBOL(cn10k_lmtst_init);
         return 0;
  }
  EXPORT_SYMBOL(cn10k_lmtst_init);
@@ -74,13 +76,6 @@ int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura)
  {
         struct nix_cn10k_aq_enq_req *aq;
         struct otx2_nic *pfvf = dev;
  {
         struct nix_cn10k_aq_enq_req *aq;
         struct otx2_nic *pfvf = dev;
-       struct otx2_snd_queue *sq;
-
-       sq = &pfvf->qset.sq[qidx];
-       sq->lmt_addr = (u64 *)((u64)pfvf->hw.nix_lmt_base +
-                              (qidx * pfvf->nix_lmt_size));
-
-       sq->lmt_id = pfvf->npa_lmt_lines + (qidx * LMT_BURST_SIZE);
  
         /* Get memory to put this msg */
         aq = otx2_mbox_alloc_msg_nix_cn10k_aq_enq(&pfvf->mbox);
  
         /* Get memory to put this msg */
         aq = otx2_mbox_alloc_msg_nix_cn10k_aq_enq(&pfvf->mbox);
@@ -125,8 +120,7 @@ void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq)
                 if (otx2_alloc_buffer(pfvf, cq, &bufptr)) {
                         if (num_ptrs--)
                                 __cn10k_aura_freeptr(pfvf, cq->cq_idx, ptrs,
                 if (otx2_alloc_buffer(pfvf, cq, &bufptr)) {
                         if (num_ptrs--)
                                 __cn10k_aura_freeptr(pfvf, cq->cq_idx, ptrs,
-                                                    num_ptrs,
-                                                    cq->rbpool->lmt_addr);
+                                                    num_ptrs);
                         break;
                 }
                 cq->pool_ptrs--;
                         break;
                 }
                 cq->pool_ptrs--;
@@ -134,8 +128,7 @@ void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq)
                 num_ptrs++;
                 if (num_ptrs == NPA_MAX_BURST || cq->pool_ptrs == 0) {
                         __cn10k_aura_freeptr(pfvf, cq->cq_idx, ptrs,
                 num_ptrs++;
                 if (num_ptrs == NPA_MAX_BURST || cq->pool_ptrs == 0) {
                         __cn10k_aura_freeptr(pfvf, cq->cq_idx, ptrs,
-                                            num_ptrs,
-                                            cq->rbpool->lmt_addr);
+                                            num_ptrs);
                         num_ptrs = 1;
                 }
         }
                         num_ptrs = 1;
                 }
         }
@@ -143,20 +136,23 @@ void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq)
  
  void cn10k_sqe_flush(void *dev, struct otx2_snd_queue *sq, int size, int qidx)
  {
  
  void cn10k_sqe_flush(void *dev, struct otx2_snd_queue *sq, int size, int qidx)
  {
+       struct otx2_lmt_info *lmt_info;
+       struct otx2_nic *pfvf = dev;
         u64 val = 0, tar_addr = 0;
  
         u64 val = 0, tar_addr = 0;
  
+       lmt_info = per_cpu_ptr(pfvf->hw.lmt_info, smp_processor_id());
         /* FIXME: val[0:10] LMT_ID.
          * [12:15] no of LMTST - 1 in the burst.
          * [19:63] data size of each LMTST in the burst except first.
          */
         /* FIXME: val[0:10] LMT_ID.
          * [12:15] no of LMTST - 1 in the burst.
          * [19:63] data size of each LMTST in the burst except first.
          */
-       val = (sq->lmt_id & 0x7FF);
+       val = (lmt_info->lmt_id & 0x7FF);
         /* Target address for LMTST flush tells HW how many 128bit
          * words are present.
          * tar_addr[6:4] size of first LMTST - 1 in units of 128b.
          */
         tar_addr |= sq->io_addr | (((size / 16) - 1) & 0x7) << 4;
         dma_wmb();
         /* Target address for LMTST flush tells HW how many 128bit
          * words are present.
          * tar_addr[6:4] size of first LMTST - 1 in units of 128b.
          */
         tar_addr |= sq->io_addr | (((size / 16) - 1) & 0x7) << 4;
         dma_wmb();
-       memcpy(sq->lmt_addr, sq->sqe_base, size);
+       memcpy((u64 *)lmt_info->lmt_addr, sq->sqe_base, size);
         cn10k_lmt_flush(val, tar_addr);
  
         sq->head++;
         cn10k_lmt_flush(val, tar_addr);
  
         sq->head++;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c

index ce25c27..78df173 100644 (file)
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -1230,11 +1230,6 @@ static int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id,
  
         pool->rbsize = buf_size;
  
  
         pool->rbsize = buf_size;
  
-       /* Set LMTST addr for NPA batch free */
-       if (test_bit(CN10K_LMTST, &pfvf->hw.cap_flag))
-               pool->lmt_addr = (__force u64 *)((u64)pfvf->hw.npa_lmt_base +
-                                                (pool_id * LMT_LINE_SIZE));
-
         /* Initialize this pool's context via AF */
         aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox);
         if (!aq) {
         /* Initialize this pool's context via AF */
         aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox);
         if (!aq) {
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h

index 48227ce..a51ecd7 100644 (file)
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -53,6 +53,10 @@ enum arua_mapped_qtypes {
  /* Send skid of 2000 packets required for CQ size of 4K CQEs. */
  #define SEND_CQ_SKID   2000
  
  /* Send skid of 2000 packets required for CQ size of 4K CQEs. */
  #define SEND_CQ_SKID   2000
  
+struct otx2_lmt_info {
+       u64 lmt_addr;
+       u16 lmt_id;
+};
  /* RSS configuration */
  struct otx2_rss_ctx {
         u8  ind_tbl[MAX_RSS_INDIR_TBL_SIZE];
  /* RSS configuration */
  struct otx2_rss_ctx {
         u8  ind_tbl[MAX_RSS_INDIR_TBL_SIZE];
@@ -224,8 +228,7 @@ struct otx2_hw {
  #define LMT_LINE_SIZE          128
  #define LMT_BURST_SIZE         32 /* 32 LMTST lines for burst SQE flush */
         u64                     *lmt_base;
  #define LMT_LINE_SIZE          128
  #define LMT_BURST_SIZE         32 /* 32 LMTST lines for burst SQE flush */
         u64                     *lmt_base;
-       u64                     *npa_lmt_base;
-       u64                     *nix_lmt_base;
+       struct otx2_lmt_info    __percpu *lmt_info;
  };
  
  enum vfperm {
  };
  
  enum vfperm {
@@ -407,17 +410,18 @@ static inline bool is_96xx_B0(struct pci_dev *pdev)
   */
  #define PCI_REVISION_ID_96XX           0x00
  #define PCI_REVISION_ID_95XX           0x10
   */
  #define PCI_REVISION_ID_96XX           0x00
  #define PCI_REVISION_ID_95XX           0x10
-#define PCI_REVISION_ID_LOKI           0x20
+#define PCI_REVISION_ID_95XXN          0x20
  #define PCI_REVISION_ID_98XX           0x30
  #define PCI_REVISION_ID_95XXMM         0x40
  #define PCI_REVISION_ID_98XX           0x30
  #define PCI_REVISION_ID_95XXMM         0x40
+#define PCI_REVISION_ID_95XXO          0xE0
  
  static inline bool is_dev_otx2(struct pci_dev *pdev)
  {
         u8 midr = pdev->revision & 0xF0;
  
         return (midr == PCI_REVISION_ID_96XX || midr == PCI_REVISION_ID_95XX ||
  
  static inline bool is_dev_otx2(struct pci_dev *pdev)
  {
         u8 midr = pdev->revision & 0xF0;
  
         return (midr == PCI_REVISION_ID_96XX || midr == PCI_REVISION_ID_95XX ||
-               midr == PCI_REVISION_ID_LOKI || midr == PCI_REVISION_ID_98XX ||
-               midr == PCI_REVISION_ID_95XXMM);
+               midr == PCI_REVISION_ID_95XXN || midr == PCI_REVISION_ID_98XX ||
+               midr == PCI_REVISION_ID_95XXMM || midr == PCI_REVISION_ID_95XXO);
  }
  
  static inline void otx2_setup_dev_hw_settings(struct otx2_nic *pfvf)
  }
  
  static inline void otx2_setup_dev_hw_settings(struct otx2_nic *pfvf)
@@ -562,15 +566,16 @@ static inline u64 otx2_atomic64_add(u64 incr, u64 *ptr)
  #endif
  
  static inline void __cn10k_aura_freeptr(struct otx2_nic *pfvf, u64 aura,
  #endif
  
  static inline void __cn10k_aura_freeptr(struct otx2_nic *pfvf, u64 aura,
-                                       u64 *ptrs, u64 num_ptrs,
-                                       u64 *lmt_addr)
+                                       u64 *ptrs, u64 num_ptrs)
  {
  {
+       struct otx2_lmt_info *lmt_info;
         u64 size = 0, count_eot = 0;
         u64 tar_addr, val = 0;
  
         u64 size = 0, count_eot = 0;
         u64 tar_addr, val = 0;
  
+       lmt_info = per_cpu_ptr(pfvf->hw.lmt_info, smp_processor_id());
         tar_addr = (__force u64)otx2_get_regaddr(pfvf, NPA_LF_AURA_BATCH_FREE0);
         /* LMTID is same as AURA Id */
         tar_addr = (__force u64)otx2_get_regaddr(pfvf, NPA_LF_AURA_BATCH_FREE0);
         /* LMTID is same as AURA Id */
-       val = (aura & 0x7FF) | BIT_ULL(63);
+       val = (lmt_info->lmt_id & 0x7FF) | BIT_ULL(63);
         /* Set if [127:64] of last 128bit word has a valid pointer */
         count_eot = (num_ptrs % 2) ? 0ULL : 1ULL;
         /* Set AURA ID to free pointer */
         /* Set if [127:64] of last 128bit word has a valid pointer */
         count_eot = (num_ptrs % 2) ? 0ULL : 1ULL;
         /* Set AURA ID to free pointer */
@@ -586,7 +591,7 @@ static inline void __cn10k_aura_freeptr(struct otx2_nic *pfvf, u64 aura,
                         size++;
                 tar_addr |=  ((size - 1) & 0x7) << 4;
         }
                         size++;
                 tar_addr |=  ((size - 1) & 0x7) << 4;
         }
-       memcpy(lmt_addr, ptrs, sizeof(u64) * num_ptrs);
+       memcpy((u64 *)lmt_info->lmt_addr, ptrs, sizeof(u64) * num_ptrs);
         /* Perform LMTST flush */
         cn10k_lmt_flush(val, tar_addr);
  }
         /* Perform LMTST flush */
         cn10k_lmt_flush(val, tar_addr);
  }
@@ -594,12 +599,11 @@ static inline void __cn10k_aura_freeptr(struct otx2_nic *pfvf, u64 aura,
  static inline void cn10k_aura_freeptr(void *dev, int aura, u64 buf)
  {
         struct otx2_nic *pfvf = dev;
  static inline void cn10k_aura_freeptr(void *dev, int aura, u64 buf)
  {
         struct otx2_nic *pfvf = dev;
-       struct otx2_pool *pool;
         u64 ptrs[2];
  
         u64 ptrs[2];
  
-       pool = &pfvf->qset.pool[aura];
         ptrs[1] = buf;
         ptrs[1] = buf;
-       __cn10k_aura_freeptr(pfvf, aura, ptrs, 2, pool->lmt_addr);
+       /* Free only one buffer at time during init and teardown */
+       __cn10k_aura_freeptr(pfvf, aura, ptrs, 2);
  }
  
  /* Alloc pointer from pool/aura */
  }
  
  /* Alloc pointer from pool/aura */
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c

index 799486c..dbfa3bc 100644 (file)
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
@@ -16,8 +16,8 @@
  #include "otx2_common.h"
  #include "otx2_ptp.h"
  
  #include "otx2_common.h"
  #include "otx2_ptp.h"
  
-#define DRV_NAME       "octeontx2-nicpf"
-#define DRV_VF_NAME    "octeontx2-nicvf"
+#define DRV_NAME       "rvu-nicpf"
+#define DRV_VF_NAME    "rvu-nicvf"
  
  struct otx2_stat {
         char name[ETH_GSTRING_LEN];
  
  struct otx2_stat {
         char name[ETH_GSTRING_LEN];
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c

index 2f2e8a3..53df7ff 100644 (file)
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -1533,14 +1533,6 @@ int otx2_open(struct net_device *netdev)
         if (!qset->rq)
                 goto err_free_mem;
  
         if (!qset->rq)
                 goto err_free_mem;
  
-       if (test_bit(CN10K_LMTST, &pf->hw.cap_flag)) {
-               /* Reserve LMT lines for NPA AURA batch free */
-               pf->hw.npa_lmt_base = pf->hw.lmt_base;
-               /* Reserve LMT lines for NIX TX */
-               pf->hw.nix_lmt_base = (u64 *)((u64)pf->hw.npa_lmt_base +
-                                     (pf->npa_lmt_lines * LMT_LINE_SIZE));
-       }
-
         err = otx2_init_hw_resources(pf);
         if (err)
                 goto err_free_mem;
         err = otx2_init_hw_resources(pf);
         if (err)
                 goto err_free_mem;
@@ -2668,6 +2660,8 @@ err_del_mcam_entries:
  err_ptp_destroy:
         otx2_ptp_destroy(pf);
  err_detach_rsrc:
  err_ptp_destroy:
         otx2_ptp_destroy(pf);
  err_detach_rsrc:
+       if (pf->hw.lmt_info)
+               free_percpu(pf->hw.lmt_info);
         if (test_bit(CN10K_LMTST, &pf->hw.cap_flag))
                 qmem_free(pf->dev, pf->dync_lmt);
         otx2_detach_resources(&pf->mbox);
         if (test_bit(CN10K_LMTST, &pf->hw.cap_flag))
                 qmem_free(pf->dev, pf->dync_lmt);
         otx2_detach_resources(&pf->mbox);
@@ -2811,6 +2805,8 @@ static void otx2_remove(struct pci_dev *pdev)
         otx2_mcam_flow_del(pf);
         otx2_shutdown_tc(pf);
         otx2_detach_resources(&pf->mbox);
         otx2_mcam_flow_del(pf);
         otx2_shutdown_tc(pf);
         otx2_detach_resources(&pf->mbox);
+       if (pf->hw.lmt_info)
+               free_percpu(pf->hw.lmt_info);
         if (test_bit(CN10K_LMTST, &pf->hw.cap_flag))
                 qmem_free(pf->dev, pf->dync_lmt);
         otx2_disable_mbox_intr(pf);
         if (test_bit(CN10K_LMTST, &pf->hw.cap_flag))
                 qmem_free(pf->dev, pf->dync_lmt);
         otx2_disable_mbox_intr(pf);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h

index 869de5f..3ff1ad7 100644 (file)
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
@@ -80,7 +80,6 @@ struct otx2_snd_queue {
         u16                     num_sqbs;
         u16                     sqe_thresh;
         u8                      sqe_per_sqb;
         u16                     num_sqbs;
         u16                     sqe_thresh;
         u8                      sqe_per_sqb;
-       u32                     lmt_id;
         u64                      io_addr;
         u64                     *aura_fc_addr;
         u64                     *lmt_addr;
         u64                      io_addr;
         u64                     *aura_fc_addr;
         u64                     *lmt_addr;
@@ -111,7 +110,6 @@ struct otx2_cq_poll {
  struct otx2_pool {
         struct qmem             *stack;
         struct qmem             *fc_addr;
  struct otx2_pool {
         struct qmem             *stack;
         struct qmem             *fc_addr;
-       u64                     *lmt_addr;
         u16                     rbsize;
  };
  
         u16                     rbsize;
  };
  
diff --git a/include/linux/soc/marvell/octeontx2/asm.h b/include/linux/soc/marvell/octeontx2/asm.h

index 28c04d9..fa1d6af 100644 (file)
--- a/include/linux/soc/marvell/octeontx2/asm.h
+++ b/include/linux/soc/marvell/octeontx2/asm.h
@@ -22,12 +22,17 @@
                          : [rs]"r" (ioaddr));           \
         (result);                                       \
  })
                          : [rs]"r" (ioaddr));           \
         (result);                                       \
  })
+/*
+ * STEORL store to memory with release semantics.
+ * This will avoid using DMB barrier after each LMTST
+ * operation.
+ */
  #define cn10k_lmt_flush(val, addr)                     \
  ({                                                     \
         __asm__ volatile(".cpu  generic+lse\n"          \
  #define cn10k_lmt_flush(val, addr)                     \
  ({                                                     \
         __asm__ volatile(".cpu  generic+lse\n"          \
-                        "steor %x[rf],[%[rs]]"         \
-                        : [rf]"+r"(val)                \
-                        : [rs]"r"(addr));              \
+                        "steorl %x[rf],[%[rs]]"                \
+                        : [rf] "+r"(val)               \
+                        : [rs] "r"(addr));             \
  })
  #else
  #define otx2_lmt_flush(ioaddr)          ({ 0; })
  })
  #else
  #define otx2_lmt_flush(ioaddr)          ({ 0; })
author	Geetha sowjanya <gakula@marvell.com>
	Wed, 1 Sep 2021 09:55:50 +0000 (15:25 +0530)
committer	David S. Miller <davem@davemloft.net>
	Wed, 1 Sep 2021 10:42:16 +0000 (11:42 +0100)
drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c		patch \| blob \| history
drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c		patch \| blob \| history
drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h		patch \| blob \| history
drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c		patch \| blob \| history
drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c		patch \| blob \| history
drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h		patch \| blob \| history
include/linux/soc/marvell/octeontx2/asm.h		patch \| blob \| history