1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
3 * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
7 #include <linux/mlx5/eswitch.h>
12 struct mlx5_ib_counter {
17 #define INIT_Q_COUNTER(_name) \
18 { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
20 static const struct mlx5_ib_counter basic_q_cnts[] = {
21 INIT_Q_COUNTER(rx_write_requests),
22 INIT_Q_COUNTER(rx_read_requests),
23 INIT_Q_COUNTER(rx_atomic_requests),
24 INIT_Q_COUNTER(out_of_buffer),
27 static const struct mlx5_ib_counter out_of_seq_q_cnts[] = {
28 INIT_Q_COUNTER(out_of_sequence),
31 static const struct mlx5_ib_counter retrans_q_cnts[] = {
32 INIT_Q_COUNTER(duplicate_request),
33 INIT_Q_COUNTER(rnr_nak_retry_err),
34 INIT_Q_COUNTER(packet_seq_err),
35 INIT_Q_COUNTER(implied_nak_seq_err),
36 INIT_Q_COUNTER(local_ack_timeout_err),
39 #define INIT_CONG_COUNTER(_name) \
40 { .name = #_name, .offset = \
41 MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)}
43 static const struct mlx5_ib_counter cong_cnts[] = {
44 INIT_CONG_COUNTER(rp_cnp_ignored),
45 INIT_CONG_COUNTER(rp_cnp_handled),
46 INIT_CONG_COUNTER(np_ecn_marked_roce_packets),
47 INIT_CONG_COUNTER(np_cnp_sent),
50 static const struct mlx5_ib_counter extended_err_cnts[] = {
51 INIT_Q_COUNTER(resp_local_length_error),
52 INIT_Q_COUNTER(resp_cqe_error),
53 INIT_Q_COUNTER(req_cqe_error),
54 INIT_Q_COUNTER(req_remote_invalid_request),
55 INIT_Q_COUNTER(req_remote_access_errors),
56 INIT_Q_COUNTER(resp_remote_access_errors),
57 INIT_Q_COUNTER(resp_cqe_flush_error),
58 INIT_Q_COUNTER(req_cqe_flush_error),
61 static const struct mlx5_ib_counter roce_accl_cnts[] = {
62 INIT_Q_COUNTER(roce_adp_retrans),
63 INIT_Q_COUNTER(roce_adp_retrans_to),
64 INIT_Q_COUNTER(roce_slow_restart),
65 INIT_Q_COUNTER(roce_slow_restart_cnps),
66 INIT_Q_COUNTER(roce_slow_restart_trans),
69 #define INIT_EXT_PPCNT_COUNTER(_name) \
70 { .name = #_name, .offset = \
71 MLX5_BYTE_OFF(ppcnt_reg, \
72 counter_set.eth_extended_cntrs_grp_data_layout._name##_high)}
74 static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
75 INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
78 static int mlx5_ib_read_counters(struct ib_counters *counters,
79 struct ib_counters_read_attr *read_attr,
80 struct uverbs_attr_bundle *attrs)
82 struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
83 struct mlx5_read_counters_attr mread_attr = {};
84 struct mlx5_ib_flow_counters_desc *desc;
87 mutex_lock(&mcounters->mcntrs_mutex);
88 if (mcounters->cntrs_max_index > read_attr->ncounters) {
93 mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64),
95 if (!mread_attr.out) {
100 mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl;
101 mread_attr.flags = read_attr->flags;
102 ret = mcounters->read_counters(counters->device, &mread_attr);
106 /* do the pass over the counters data array to assign according to the
107 * descriptions and indexing pairs
109 desc = mcounters->counters_data;
110 for (i = 0; i < mcounters->ncounters; i++)
111 read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description];
114 kfree(mread_attr.out);
116 mutex_unlock(&mcounters->mcntrs_mutex);
120 static void mlx5_ib_destroy_counters(struct ib_counters *counters)
122 struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
124 mlx5_ib_counters_clear_description(counters);
125 if (mcounters->hw_cntrs_hndl)
126 mlx5_fc_destroy(to_mdev(counters->device)->mdev,
127 mcounters->hw_cntrs_hndl);
130 static int mlx5_ib_create_counters(struct ib_counters *counters,
131 struct uverbs_attr_bundle *attrs)
133 struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
135 mutex_init(&mcounters->mcntrs_mutex);
140 static bool is_mdev_switchdev_mode(const struct mlx5_core_dev *mdev)
142 return MLX5_ESWITCH_MANAGER(mdev) &&
143 mlx5_ib_eswitch_mode(mdev->priv.eswitch) ==
144 MLX5_ESWITCH_OFFLOADS;
147 static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
150 return is_mdev_switchdev_mode(dev->mdev) ? &dev->port[0].cnts :
151 &dev->port[port_num].cnts;
155 * mlx5_ib_get_counters_id - Returns counters id to use for device+port
156 * @dev: Pointer to mlx5 IB device
157 * @port_num: Zero based port number
159 * mlx5_ib_get_counters_id() Returns counters set id to use for given
160 * device port combination in switchdev and non switchdev mode of the
163 u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num)
165 const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
170 static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
173 struct mlx5_ib_dev *dev = to_mdev(ibdev);
174 const struct mlx5_ib_counters *cnts;
175 bool is_switchdev = is_mdev_switchdev_mode(dev->mdev);
177 if ((is_switchdev && port_num) || (!is_switchdev && !port_num))
180 cnts = get_counters(dev, port_num - 1);
182 return rdma_alloc_hw_stats_struct(cnts->names,
183 cnts->num_q_counters +
184 cnts->num_cong_counters +
185 cnts->num_ext_ppcnt_counters,
186 RDMA_HW_STATS_DEFAULT_LIFESPAN);
189 static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
190 const struct mlx5_ib_counters *cnts,
191 struct rdma_hw_stats *stats,
194 u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
195 u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
199 MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
200 MLX5_SET(query_q_counter_in, in, counter_set_id, set_id);
201 ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
205 for (i = 0; i < cnts->num_q_counters; i++) {
206 val = *(__be32 *)((void *)out + cnts->offsets[i]);
207 stats->value[i] = (u64)be32_to_cpu(val);
213 static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
214 const struct mlx5_ib_counters *cnts,
215 struct rdma_hw_stats *stats)
217 int offset = cnts->num_q_counters + cnts->num_cong_counters;
218 u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
219 int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
223 out = kvzalloc(sz, GFP_KERNEL);
227 MLX5_SET(ppcnt_reg, in, local_port, 1);
228 MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
229 ret = mlx5_core_access_reg(dev->mdev, in, sz, out, sz, MLX5_REG_PPCNT,
234 for (i = 0; i < cnts->num_ext_ppcnt_counters; i++)
235 stats->value[i + offset] =
236 be64_to_cpup((__be64 *)(out +
237 cnts->offsets[i + offset]));
243 static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
244 struct rdma_hw_stats *stats,
245 u8 port_num, int index)
247 struct mlx5_ib_dev *dev = to_mdev(ibdev);
248 const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1);
249 struct mlx5_core_dev *mdev;
250 int ret, num_counters;
256 num_counters = cnts->num_q_counters +
257 cnts->num_cong_counters +
258 cnts->num_ext_ppcnt_counters;
260 /* q_counters are per IB device, query the master mdev */
261 ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats, cnts->set_id);
265 if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
266 ret = mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats);
271 if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
272 mdev = mlx5_ib_get_native_port_mdev(dev, port_num,
275 /* If port is not affiliated yet, its in down state
276 * which doesn't have any counters yet, so it would be
277 * zero. So no need to read from the HCA.
281 ret = mlx5_lag_query_cong_counters(dev->mdev,
283 cnts->num_q_counters,
284 cnts->num_cong_counters,
286 cnts->num_q_counters);
288 mlx5_ib_put_native_port_mdev(dev, port_num);
297 static struct rdma_hw_stats *
298 mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
300 struct mlx5_ib_dev *dev = to_mdev(counter->device);
301 const struct mlx5_ib_counters *cnts =
302 get_counters(dev, counter->port - 1);
304 return rdma_alloc_hw_stats_struct(cnts->names,
305 cnts->num_q_counters +
306 cnts->num_cong_counters +
307 cnts->num_ext_ppcnt_counters,
308 RDMA_HW_STATS_DEFAULT_LIFESPAN);
311 static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
313 struct mlx5_ib_dev *dev = to_mdev(counter->device);
314 const struct mlx5_ib_counters *cnts =
315 get_counters(dev, counter->port - 1);
317 return mlx5_ib_query_q_counters(dev->mdev, cnts,
318 counter->stats, counter->id);
321 static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
323 struct mlx5_ib_dev *dev = to_mdev(counter->device);
324 u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
329 MLX5_SET(dealloc_q_counter_in, in, opcode,
330 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
331 MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id);
332 return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
335 static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
338 struct mlx5_ib_dev *dev = to_mdev(qp->device);
342 u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
343 u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
345 MLX5_SET(alloc_q_counter_in, in, opcode,
346 MLX5_CMD_OP_ALLOC_Q_COUNTER);
347 MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID);
348 err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
352 MLX5_GET(alloc_q_counter_out, out, counter_set_id);
355 err = mlx5_ib_qp_set_counter(qp, counter);
357 goto fail_set_counter;
362 mlx5_ib_counter_dealloc(counter);
368 static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp)
370 return mlx5_ib_qp_set_counter(qp, NULL);
374 static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
381 for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
382 names[j] = basic_q_cnts[i].name;
383 offsets[j] = basic_q_cnts[i].offset;
386 if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
387 for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
388 names[j] = out_of_seq_q_cnts[i].name;
389 offsets[j] = out_of_seq_q_cnts[i].offset;
393 if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
394 for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
395 names[j] = retrans_q_cnts[i].name;
396 offsets[j] = retrans_q_cnts[i].offset;
400 if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
401 for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) {
402 names[j] = extended_err_cnts[i].name;
403 offsets[j] = extended_err_cnts[i].offset;
407 if (MLX5_CAP_GEN(dev->mdev, roce_accl)) {
408 for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) {
409 names[j] = roce_accl_cnts[i].name;
410 offsets[j] = roce_accl_cnts[i].offset;
414 if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
415 for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
416 names[j] = cong_cnts[i].name;
417 offsets[j] = cong_cnts[i].offset;
421 if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
422 for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) {
423 names[j] = ext_ppcnt_cnts[i].name;
424 offsets[j] = ext_ppcnt_cnts[i].offset;
430 static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
431 struct mlx5_ib_counters *cnts)
435 num_counters = ARRAY_SIZE(basic_q_cnts);
437 if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
438 num_counters += ARRAY_SIZE(out_of_seq_q_cnts);
440 if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
441 num_counters += ARRAY_SIZE(retrans_q_cnts);
443 if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
444 num_counters += ARRAY_SIZE(extended_err_cnts);
446 if (MLX5_CAP_GEN(dev->mdev, roce_accl))
447 num_counters += ARRAY_SIZE(roce_accl_cnts);
449 cnts->num_q_counters = num_counters;
451 if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
452 cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
453 num_counters += ARRAY_SIZE(cong_cnts);
455 if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
456 cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
457 num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
459 cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL);
463 cnts->offsets = kcalloc(num_counters,
464 sizeof(cnts->offsets), GFP_KERNEL);
476 static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
478 u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
482 num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
484 MLX5_SET(dealloc_q_counter_in, in, opcode,
485 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
487 for (i = 0; i < num_cnt_ports; i++) {
488 if (dev->port[i].cnts.set_id) {
489 MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
490 dev->port[i].cnts.set_id);
491 mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
493 kfree(dev->port[i].cnts.names);
494 kfree(dev->port[i].cnts.offsets);
498 static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
500 u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
501 u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
507 MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
508 is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
509 num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
511 for (i = 0; i < num_cnt_ports; i++) {
512 err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts);
516 mlx5_ib_fill_counters(dev, dev->port[i].cnts.names,
517 dev->port[i].cnts.offsets);
519 MLX5_SET(alloc_q_counter_in, in, uid,
520 is_shared ? MLX5_SHARED_RESOURCE_UID : 0);
522 err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
525 "couldn't allocate queue counter for port %d, err %d\n",
530 dev->port[i].cnts.set_id =
531 MLX5_GET(alloc_q_counter_out, out, counter_set_id);
536 mlx5_ib_dealloc_counters(dev);
540 static int read_flow_counters(struct ib_device *ibdev,
541 struct mlx5_read_counters_attr *read_attr)
543 struct mlx5_fc *fc = read_attr->hw_cntrs_hndl;
544 struct mlx5_ib_dev *dev = to_mdev(ibdev);
546 return mlx5_fc_query(dev->mdev, fc,
547 &read_attr->out[IB_COUNTER_PACKETS],
548 &read_attr->out[IB_COUNTER_BYTES]);
551 /* flow counters currently expose two counters packets and bytes */
552 #define FLOW_COUNTERS_NUM 2
553 static int counters_set_description(
554 struct ib_counters *counters, enum mlx5_ib_counters_type counters_type,
555 struct mlx5_ib_flow_counters_desc *desc_data, u32 ncounters)
557 struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
558 u32 cntrs_max_index = 0;
561 if (counters_type != MLX5_IB_COUNTERS_FLOW)
564 /* init the fields for the object */
565 mcounters->type = counters_type;
566 mcounters->read_counters = read_flow_counters;
567 mcounters->counters_num = FLOW_COUNTERS_NUM;
568 mcounters->ncounters = ncounters;
569 /* each counter entry have both description and index pair */
570 for (i = 0; i < ncounters; i++) {
571 if (desc_data[i].description > IB_COUNTER_BYTES)
574 if (cntrs_max_index <= desc_data[i].index)
575 cntrs_max_index = desc_data[i].index + 1;
578 mutex_lock(&mcounters->mcntrs_mutex);
579 mcounters->counters_data = desc_data;
580 mcounters->cntrs_max_index = cntrs_max_index;
581 mutex_unlock(&mcounters->mcntrs_mutex);
586 #define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2))
587 int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters,
588 struct mlx5_ib_create_flow *ucmd)
590 struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters);
591 struct mlx5_ib_flow_counters_data *cntrs_data = NULL;
592 struct mlx5_ib_flow_counters_desc *desc_data = NULL;
593 bool hw_hndl = false;
596 if (ucmd && ucmd->ncounters_data != 0) {
597 cntrs_data = ucmd->data;
598 if (cntrs_data->ncounters > MAX_COUNTERS_NUM)
601 desc_data = kcalloc(cntrs_data->ncounters,
607 if (copy_from_user(desc_data,
608 u64_to_user_ptr(cntrs_data->counters_data),
609 sizeof(*desc_data) * cntrs_data->ncounters)) {
615 if (!mcounters->hw_cntrs_hndl) {
616 mcounters->hw_cntrs_hndl = mlx5_fc_create(
617 to_mdev(ibcounters->device)->mdev, false);
618 if (IS_ERR(mcounters->hw_cntrs_hndl)) {
619 ret = PTR_ERR(mcounters->hw_cntrs_hndl);
626 /* counters already bound to at least one flow */
627 if (mcounters->cntrs_max_index) {
632 ret = counters_set_description(ibcounters,
633 MLX5_IB_COUNTERS_FLOW,
635 cntrs_data->ncounters);
639 } else if (!mcounters->cntrs_max_index) {
640 /* counters not bound yet, must have udata passed */
649 mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev,
650 mcounters->hw_cntrs_hndl);
651 mcounters->hw_cntrs_hndl = NULL;
658 void mlx5_ib_counters_clear_description(struct ib_counters *counters)
660 struct mlx5_ib_mcounters *mcounters;
662 if (!counters || atomic_read(&counters->usecnt) != 1)
665 mcounters = to_mcounters(counters);
667 mutex_lock(&mcounters->mcntrs_mutex);
668 kfree(mcounters->counters_data);
669 mcounters->counters_data = NULL;
670 mcounters->cntrs_max_index = 0;
671 mutex_unlock(&mcounters->mcntrs_mutex);
674 static const struct ib_device_ops hw_stats_ops = {
675 .alloc_hw_stats = mlx5_ib_alloc_hw_stats,
676 .get_hw_stats = mlx5_ib_get_hw_stats,
677 .counter_bind_qp = mlx5_ib_counter_bind_qp,
678 .counter_unbind_qp = mlx5_ib_counter_unbind_qp,
679 .counter_dealloc = mlx5_ib_counter_dealloc,
680 .counter_alloc_stats = mlx5_ib_counter_alloc_stats,
681 .counter_update_stats = mlx5_ib_counter_update_stats,
684 static const struct ib_device_ops counters_ops = {
685 .create_counters = mlx5_ib_create_counters,
686 .destroy_counters = mlx5_ib_destroy_counters,
687 .read_counters = mlx5_ib_read_counters,
689 INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs),
692 int mlx5_ib_counters_init(struct mlx5_ib_dev *dev)
694 ib_set_device_ops(&dev->ib_dev, &counters_ops);
696 if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
699 ib_set_device_ops(&dev->ib_dev, &hw_stats_ops);
700 return mlx5_ib_alloc_counters(dev);
703 void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev)
705 if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
708 mlx5_ib_dealloc_counters(dev);