RDMA/mlx5: Change debug log level for remote access error syndromes
authorArumugam Kolappan <aru.kolappan@oracle.com>
Tue, 1 Nov 2022 07:27:44 +0000 (00:27 -0700)
committerLeon Romanovsky <leon@kernel.org>
Sun, 6 Nov 2022 18:31:35 +0000 (20:31 +0200)
The mlx5 driver dumps the entire CQE buffer by default for few syndromes.
Some syndromes are expected due to the application behavior [ex:
MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR, MLX5_CQE_SYNDROME_REMOTE_OP_ERR and
MLX5_CQE_SYNDROME_LOCAL_PROT_ERR]. Hence, for these syndromes, the patch
converts the log level from KERN_WARNING to KERN_DEBUG. This enables the
application to get the CQE buffer dump by changing to KERN_DEBUG level
as and when needed.

Suggested-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Arumugam Kolappan <aru.kolappan@oracle.com>
Link: https://lore.kernel.org/r/1667287664-19377-1-git-send-email-aru.kolappan@oracle.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
drivers/infiniband/hw/mlx5/cq.c
drivers/infiniband/hw/mlx5/mlx5_ib.h

index be189e0..efc9e4a 100644 (file)
@@ -267,17 +267,20 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
        wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
 }
 
-static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe)
+static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe,
+                    struct ib_wc *wc, const char *level)
 {
-       mlx5_ib_warn(dev, "dump error cqe\n");
-       mlx5_dump_err_cqe(dev->mdev, cqe);
+       mlx5_ib_log(level, dev, "WC error: %d, Message: %s\n", wc->status,
+                   ib_wc_status_msg(wc->status));
+       print_hex_dump(level, "cqe_dump: ", DUMP_PREFIX_OFFSET, 16, 1,
+                      cqe, sizeof(*cqe), false);
 }
 
 static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
                                  struct mlx5_err_cqe *cqe,
                                  struct ib_wc *wc)
 {
-       int dump = 1;
+       const char *dump = KERN_WARNING;
 
        switch (cqe->syndrome) {
        case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR:
@@ -287,10 +290,11 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
                wc->status = IB_WC_LOC_QP_OP_ERR;
                break;
        case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR:
+               dump = KERN_DEBUG;
                wc->status = IB_WC_LOC_PROT_ERR;
                break;
        case MLX5_CQE_SYNDROME_WR_FLUSH_ERR:
-               dump = 0;
+               dump = NULL;
                wc->status = IB_WC_WR_FLUSH_ERR;
                break;
        case MLX5_CQE_SYNDROME_MW_BIND_ERR:
@@ -306,18 +310,20 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
                wc->status = IB_WC_REM_INV_REQ_ERR;
                break;
        case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR:
+               dump = KERN_DEBUG;
                wc->status = IB_WC_REM_ACCESS_ERR;
                break;
        case MLX5_CQE_SYNDROME_REMOTE_OP_ERR:
+               dump = KERN_DEBUG;
                wc->status = IB_WC_REM_OP_ERR;
                break;
        case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
+               dump = NULL;
                wc->status = IB_WC_RETRY_EXC_ERR;
-               dump = 0;
                break;
        case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
+               dump = NULL;
                wc->status = IB_WC_RNR_RETRY_EXC_ERR;
-               dump = 0;
                break;
        case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR:
                wc->status = IB_WC_REM_ABORT_ERR;
@@ -328,11 +334,8 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
        }
 
        wc->vendor_err = cqe->vendor_err_synd;
-       if (dump) {
-               mlx5_ib_warn(dev, "WC error: %d, Message: %s\n", wc->status,
-                            ib_wc_status_msg(wc->status));
-               dump_cqe(dev, cqe);
-       }
+       if (dump)
+               dump_cqe(dev, cqe, wc, dump);
 }
 
 static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
index 4a7f706..8b91bab 100644 (file)
        dev_warn(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__,     \
                 __LINE__, current->pid, ##arg)
 
+#define mlx5_ib_log(lvl, _dev, format, arg...)                                 \
+       dev_printk(lvl, &(_dev)->ib_dev.dev,  "%s:%d:(pid %d): " format,       \
+                  __func__, __LINE__, current->pid, ##arg)
+
 #define MLX5_IB_DEFAULT_UIDX 0xffffff
 #define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index)