net/mlx5e: SHAMPO, Use KSMs instead of KLMs
authorYoray Zack <yorayz@nvidia.com>
Mon, 3 Jun 2024 21:22:17 +0000 (00:22 +0300)
committerJakub Kicinski <kuba@kernel.org>
Thu, 6 Jun 2024 03:20:46 +0000 (20:20 -0700)
KSM Mkey is KLM Mkey with a fixed buffer size. Due to this fact,
it is a faster mechanism than KLM.

SHAMPO feature used KLMs Mkeys for memory mappings of its headers buffer.
As it used KLMs with the same buffer size for each entry,
we can use KSMs instead.

This commit changes the Mkeys that map the SHAMPO headers buffer
from KLMs to KSMs.

Signed-off-by: Yoray Zack <yorayz@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://lore.kernel.org/r/20240603212219.1037656-13-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en/params.c
drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
include/linux/mlx5/device.h

index ff32660..bec784d 100644 (file)
@@ -80,6 +80,7 @@ struct page_pool;
                                 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
 
 #define MLX5E_RX_MAX_HEAD (256)
+#define MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE (8)
 #define MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE (9)
 #define MLX5E_SHAMPO_WQ_HEADER_PER_PAGE (PAGE_SIZE >> MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE)
 #define MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE (64)
@@ -146,25 +147,6 @@ struct page_pool;
 #define MLX5E_TX_XSK_POLL_BUDGET       64
 #define MLX5E_SQ_RECOVER_MIN_INTERVAL  500 /* msecs */
 
-#define MLX5E_KLM_UMR_WQE_SZ(sgl_len)\
-       (sizeof(struct mlx5e_umr_wqe) +\
-       (sizeof(struct mlx5_klm) * (sgl_len)))
-
-#define MLX5E_KLM_UMR_WQEBBS(klm_entries) \
-       (DIV_ROUND_UP(MLX5E_KLM_UMR_WQE_SZ(klm_entries), MLX5_SEND_WQE_BB))
-
-#define MLX5E_KLM_UMR_DS_CNT(klm_entries)\
-       (DIV_ROUND_UP(MLX5E_KLM_UMR_WQE_SZ(klm_entries), MLX5_SEND_WQE_DS))
-
-#define MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size)\
-       (((wqe_size) - sizeof(struct mlx5e_umr_wqe)) / sizeof(struct mlx5_klm))
-
-#define MLX5E_KLM_ENTRIES_PER_WQE(wqe_size)\
-       ALIGN_DOWN(MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT)
-
-#define MLX5E_MAX_KLM_PER_WQE(mdev) \
-       MLX5E_KLM_ENTRIES_PER_WQE(MLX5_SEND_WQE_BB * mlx5e_get_max_sq_aligned_wqebbs(mdev))
-
 #define mlx5e_state_dereference(priv, p) \
        rcu_dereference_protected((p), lockdep_is_held(&(priv)->state_lock))
 
index ec819df..6c9cccc 100644 (file)
@@ -1071,18 +1071,18 @@ static u32 mlx5e_shampo_icosq_sz(struct mlx5_core_dev *mdev,
                                 struct mlx5e_params *params,
                                 struct mlx5e_rq_param *rq_param)
 {
-       int max_num_of_umr_per_wqe, max_hd_per_wqe, max_klm_per_umr, rest;
+       int max_num_of_umr_per_wqe, max_hd_per_wqe, max_ksm_per_umr, rest;
        void *wqc = MLX5_ADDR_OF(rqc, rq_param->rqc, wq);
        int wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz));
        u32 wqebbs;
 
-       max_klm_per_umr = MLX5E_MAX_KLM_PER_WQE(mdev);
+       max_ksm_per_umr = MLX5E_MAX_KSM_PER_WQE(mdev);
        max_hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rq_param);
-       max_num_of_umr_per_wqe = max_hd_per_wqe / max_klm_per_umr;
-       rest = max_hd_per_wqe % max_klm_per_umr;
-       wqebbs = MLX5E_KLM_UMR_WQEBBS(max_klm_per_umr) * max_num_of_umr_per_wqe;
+       max_num_of_umr_per_wqe = max_hd_per_wqe / max_ksm_per_umr;
+       rest = max_hd_per_wqe % max_ksm_per_umr;
+       wqebbs = MLX5E_KSM_UMR_WQEBBS(max_ksm_per_umr) * max_num_of_umr_per_wqe;
        if (rest)
-               wqebbs += MLX5E_KLM_UMR_WQEBBS(rest);
+               wqebbs += MLX5E_KSM_UMR_WQEBBS(rest);
        wqebbs *= wq_size;
        return wqebbs;
 }
index 879d698..d1f0f86 100644 (file)
 
 #define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)
 
+#define MLX5E_KSM_UMR_WQE_SZ(sgl_len)\
+       (sizeof(struct mlx5e_umr_wqe) +\
+       (sizeof(struct mlx5_ksm) * (sgl_len)))
+
+#define MLX5E_KSM_UMR_WQEBBS(ksm_entries) \
+       (DIV_ROUND_UP(MLX5E_KSM_UMR_WQE_SZ(ksm_entries), MLX5_SEND_WQE_BB))
+
+#define MLX5E_KSM_UMR_DS_CNT(ksm_entries)\
+       (DIV_ROUND_UP(MLX5E_KSM_UMR_WQE_SZ(ksm_entries), MLX5_SEND_WQE_DS))
+
+#define MLX5E_KSM_MAX_ENTRIES_PER_WQE(wqe_size)\
+       (((wqe_size) - sizeof(struct mlx5e_umr_wqe)) / sizeof(struct mlx5_ksm))
+
+#define MLX5E_KSM_ENTRIES_PER_WQE(wqe_size)\
+       ALIGN_DOWN(MLX5E_KSM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT)
+
+#define MLX5E_MAX_KSM_PER_WQE(mdev) \
+       MLX5E_KSM_ENTRIES_PER_WQE(MLX5_SEND_WQE_BB * mlx5e_get_max_sq_aligned_wqebbs(mdev))
+
 static inline
 ktime_t mlx5e_cqe_ts_to_ns(cqe_ts_to_ns func, struct mlx5_clock *clock, u64 cqe_ts)
 {
index d21a87d..2a3e0de 100644 (file)
@@ -504,8 +504,8 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
        return err;
 }
 
-static int mlx5e_create_umr_klm_mkey(struct mlx5_core_dev *mdev,
-                                    u64 nentries,
+static int mlx5e_create_umr_ksm_mkey(struct mlx5_core_dev *mdev,
+                                    u64 nentries, u8 log_entry_size,
                                     u32 *umr_mkey)
 {
        int inlen;
@@ -525,12 +525,13 @@ static int mlx5e_create_umr_klm_mkey(struct mlx5_core_dev *mdev,
        MLX5_SET(mkc, mkc, umr_en, 1);
        MLX5_SET(mkc, mkc, lw, 1);
        MLX5_SET(mkc, mkc, lr, 1);
-       MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS);
+       MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KSM);
        mlx5e_mkey_set_relaxed_ordering(mdev, mkc);
        MLX5_SET(mkc, mkc, qpn, 0xffffff);
        MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn);
        MLX5_SET(mkc, mkc, translations_octword_size, nentries);
-       MLX5_SET(mkc, mkc, length64, 1);
+       MLX5_SET(mkc, mkc, log_page_size, log_entry_size);
+       MLX5_SET64(mkc, mkc, len, nentries << log_entry_size);
        err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen);
 
        kvfree(in);
@@ -565,14 +566,16 @@ static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq
 static int mlx5e_create_rq_hd_umr_mkey(struct mlx5_core_dev *mdev,
                                       struct mlx5e_rq *rq)
 {
-       u32 max_klm_size = BIT(MLX5_CAP_GEN(mdev, log_max_klm_list_size));
+       u32 max_ksm_size = BIT(MLX5_CAP_GEN(mdev, log_max_klm_list_size));
 
-       if (max_klm_size < rq->mpwqe.shampo->hd_per_wq) {
-               mlx5_core_err(mdev, "max klm list size 0x%x is smaller than shampo header buffer list size 0x%x\n",
-                             max_klm_size, rq->mpwqe.shampo->hd_per_wq);
+       if (max_ksm_size < rq->mpwqe.shampo->hd_per_wq) {
+               mlx5_core_err(mdev, "max ksm list size 0x%x is smaller than shampo header buffer list size 0x%x\n",
+                             max_ksm_size, rq->mpwqe.shampo->hd_per_wq);
                return -EINVAL;
        }
-       return mlx5e_create_umr_klm_mkey(mdev, rq->mpwqe.shampo->hd_per_wq,
+
+       return mlx5e_create_umr_ksm_mkey(mdev, rq->mpwqe.shampo->hd_per_wq,
+                                        MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE,
                                         &rq->mpwqe.shampo->mkey);
 }
 
index 3af4f70..f1fbf60 100644 (file)
@@ -619,25 +619,25 @@ static int bitmap_find_window(unsigned long *bitmap, int len,
        return min(len, count);
 }
 
-static void build_klm_umr(struct mlx5e_icosq *sq, struct mlx5e_umr_wqe *umr_wqe,
-                         __be32 key, u16 offset, u16 klm_len, u16 wqe_bbs)
+static void build_ksm_umr(struct mlx5e_icosq *sq, struct mlx5e_umr_wqe *umr_wqe,
+                         __be32 key, u16 offset, u16 ksm_len)
 {
-       memset(umr_wqe, 0, offsetof(struct mlx5e_umr_wqe, inline_klms));
+       memset(umr_wqe, 0, offsetof(struct mlx5e_umr_wqe, inline_ksms));
        umr_wqe->ctrl.opmod_idx_opcode =
                cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
                             MLX5_OPCODE_UMR);
        umr_wqe->ctrl.umr_mkey = key;
        umr_wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT)
-                                           | MLX5E_KLM_UMR_DS_CNT(klm_len));
+                                           | MLX5E_KSM_UMR_DS_CNT(ksm_len));
        umr_wqe->uctrl.flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE;
        umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset);
-       umr_wqe->uctrl.xlt_octowords = cpu_to_be16(klm_len);
+       umr_wqe->uctrl.xlt_octowords = cpu_to_be16(ksm_len);
        umr_wqe->uctrl.mkey_mask     = cpu_to_be64(MLX5_MKEY_MASK_FREE);
 }
 
 static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq,
                                     struct mlx5e_icosq *sq,
-                                    u16 klm_entries, u16 index)
+                                    u16 ksm_entries, u16 index)
 {
        struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
        u16 entries, pi, header_offset, err, wqe_bbs, new_entries;
@@ -650,20 +650,20 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq,
        int headroom, i;
 
        headroom = rq->buff.headroom;
-       new_entries = klm_entries - (shampo->pi & (MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT - 1));
-       entries = ALIGN(klm_entries, MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT);
-       wqe_bbs = MLX5E_KLM_UMR_WQEBBS(entries);
+       new_entries = ksm_entries - (shampo->pi & (MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT - 1));
+       entries = ALIGN(ksm_entries, MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT);
+       wqe_bbs = MLX5E_KSM_UMR_WQEBBS(entries);
        pi = mlx5e_icosq_get_next_pi(sq, wqe_bbs);
        umr_wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
-       build_klm_umr(sq, umr_wqe, shampo->key, index, entries, wqe_bbs);
+       build_ksm_umr(sq, umr_wqe, shampo->key, index, entries);
 
        frag_page = &shampo->pages[page_index];
 
        for (i = 0; i < entries; i++, index++) {
                dma_info = &shampo->info[index];
-               if (i >= klm_entries || (index < shampo->pi && shampo->pi - index <
-                                        MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT))
-                       goto update_klm;
+               if (i >= ksm_entries || (index < shampo->pi && shampo->pi - index <
+                                        MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT))
+                       goto update_ksm;
                header_offset = (index & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) <<
                        MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE;
                if (!(header_offset & (PAGE_SIZE - 1))) {
@@ -683,12 +683,11 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq,
                        dma_info->frag_page = frag_page;
                }
 
-update_klm:
-               umr_wqe->inline_klms[i].bcount =
-                       cpu_to_be32(MLX5E_RX_MAX_HEAD);
-               umr_wqe->inline_klms[i].key    = cpu_to_be32(lkey);
-               umr_wqe->inline_klms[i].va     =
-                       cpu_to_be64(dma_info->addr + headroom);
+update_ksm:
+               umr_wqe->inline_ksms[i] = (struct mlx5_ksm) {
+                       .key = cpu_to_be32(lkey),
+                       .va  = cpu_to_be64(dma_info->addr + headroom),
+               };
        }
 
        sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
@@ -720,37 +719,37 @@ err_unmap:
 static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq)
 {
        struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
-       u16 klm_entries, num_wqe, index, entries_before;
+       u16 ksm_entries, num_wqe, index, entries_before;
        struct mlx5e_icosq *sq = rq->icosq;
-       int i, err, max_klm_entries, len;
+       int i, err, max_ksm_entries, len;
 
-       max_klm_entries = MLX5E_MAX_KLM_PER_WQE(rq->mdev);
-       klm_entries = bitmap_find_window(shampo->bitmap,
+       max_ksm_entries = MLX5E_MAX_KSM_PER_WQE(rq->mdev);
+       ksm_entries = bitmap_find_window(shampo->bitmap,
                                         shampo->hd_per_wqe,
                                         shampo->hd_per_wq, shampo->pi);
-       if (!klm_entries)
+       if (!ksm_entries)
                return 0;
 
-       klm_entries += (shampo->pi & (MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT - 1));
-       index = ALIGN_DOWN(shampo->pi, MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT);
+       ksm_entries += (shampo->pi & (MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT - 1));
+       index = ALIGN_DOWN(shampo->pi, MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT);
        entries_before = shampo->hd_per_wq - index;
 
-       if (unlikely(entries_before < klm_entries))
-               num_wqe = DIV_ROUND_UP(entries_before, max_klm_entries) +
-                         DIV_ROUND_UP(klm_entries - entries_before, max_klm_entries);
+       if (unlikely(entries_before < ksm_entries))
+               num_wqe = DIV_ROUND_UP(entries_before, max_ksm_entries) +
+                         DIV_ROUND_UP(ksm_entries - entries_before, max_ksm_entries);
        else
-               num_wqe = DIV_ROUND_UP(klm_entries, max_klm_entries);
+               num_wqe = DIV_ROUND_UP(ksm_entries, max_ksm_entries);
 
        for (i = 0; i < num_wqe; i++) {
-               len = (klm_entries > max_klm_entries) ? max_klm_entries :
-                                                       klm_entries;
+               len = (ksm_entries > max_ksm_entries) ? max_ksm_entries :
+                                                       ksm_entries;
                if (unlikely(index + len > shampo->hd_per_wq))
                        len = shampo->hd_per_wq - index;
                err = mlx5e_build_shampo_hd_umr(rq, sq, len, index);
                if (unlikely(err))
                        return err;
                index = (index + len) & (rq->mpwqe.shampo->hd_per_wq - 1);
-               klm_entries -= len;
+               ksm_entries -= len;
        }
 
        return 0;
index d7bb31d..da09bfa 100644 (file)
@@ -294,6 +294,7 @@ enum {
 #define MLX5_UMR_FLEX_ALIGNMENT 0x40
 #define MLX5_UMR_MTT_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_mtt))
 #define MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_klm))
+#define MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_ksm))
 
 #define MLX5_USER_INDEX_LEN (MLX5_FLD_SZ_BYTES(qpc, user_index) * 8)