1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
4 #include <linux/module.h>
5 #include <linux/vdpa.h>
6 #include <linux/vringh.h>
7 #include <uapi/linux/virtio_net.h>
8 #include <uapi/linux/virtio_ids.h>
9 #include <uapi/linux/vdpa.h>
10 #include <uapi/linux/vhost_types.h>
11 #include <linux/virtio_config.h>
12 #include <linux/auxiliary_bus.h>
13 #include <linux/mlx5/cq.h>
14 #include <linux/mlx5/qp.h>
15 #include <linux/mlx5/device.h>
16 #include <linux/mlx5/driver.h>
17 #include <linux/mlx5/vport.h>
18 #include <linux/mlx5/fs.h>
19 #include <linux/mlx5/mlx5_ifc_vdpa.h>
20 #include <linux/mlx5/mpfs.h>
21 #include "mlx5_vdpa.h"
22 #include "mlx5_vnet.h"
24 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
25 MODULE_DESCRIPTION("Mellanox VDPA driver");
26 MODULE_LICENSE("Dual BSD/GPL");
28 #define VALID_FEATURES_MASK \
29 (BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | \
30 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) | \
31 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) | \
32 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
33 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) | \
34 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | \
35 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) | \
36 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) | \
37 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) | \
38 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) | \
39 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) | \
40 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) | \
41 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
43 #define VALID_STATUS_MASK \
44 (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK | \
45 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
47 #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature)))
49 #define MLX5V_UNTAGGED 0x1000
51 struct mlx5_vdpa_cq_buf {
52 struct mlx5_frag_buf_ctrl fbc;
53 struct mlx5_frag_buf frag_buf;
59 struct mlx5_core_cq mcq;
60 struct mlx5_vdpa_cq_buf buf;
65 struct mlx5_vdpa_umem {
66 struct mlx5_frag_buf_ctrl fbc;
67 struct mlx5_frag_buf frag_buf;
73 struct mlx5_core_qp mqp;
74 struct mlx5_frag_buf frag_buf;
80 struct mlx5_vq_restore_info {
92 struct mlx5_vdpa_virtqueue {
99 /* Resources for implementing the notification channel from the device
100 * to the driver. fwqp is the firmware end of an RC connection; the
101 * other end is vqqp used by the driver. cq is where completions are
104 struct mlx5_vdpa_cq cq;
105 struct mlx5_vdpa_qp fwqp;
106 struct mlx5_vdpa_qp vqqp;
108 /* umem resources are required for the virtqueue operation. They're use
109 * is internal and they must be provided by the driver.
111 struct mlx5_vdpa_umem umem1;
112 struct mlx5_vdpa_umem umem2;
113 struct mlx5_vdpa_umem umem3;
119 struct mlx5_vdpa_net *ndev;
126 struct mlx5_vdpa_mr *vq_mr;
127 struct mlx5_vdpa_mr *desc_mr;
131 /* keep last in the struct */
132 struct mlx5_vq_restore_info ri;
135 static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
137 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) {
138 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
144 return idx <= mvdev->max_idx;
147 static void free_resources(struct mlx5_vdpa_net *ndev);
148 static void init_mvqs(struct mlx5_vdpa_net *ndev);
149 static int setup_driver(struct mlx5_vdpa_dev *mvdev);
150 static void teardown_driver(struct mlx5_vdpa_net *ndev);
152 static bool mlx5_vdpa_debug;
154 #define MLX5_LOG_VIO_FLAG(_feature) \
156 if (features & BIT_ULL(_feature)) \
157 mlx5_vdpa_info(mvdev, "%s\n", #_feature); \
160 #define MLX5_LOG_VIO_STAT(_status) \
162 if (status & (_status)) \
163 mlx5_vdpa_info(mvdev, "%s\n", #_status); \
166 /* TODO: cross-endian support */
167 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
169 return virtio_legacy_is_little_endian() ||
170 (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
173 static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
175 return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
178 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
180 return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
183 static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
185 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
188 return mvdev->max_vqs;
191 static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
193 return idx == ctrl_vq_idx(mvdev);
196 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
198 if (status & ~VALID_STATUS_MASK)
199 mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
200 status & ~VALID_STATUS_MASK);
202 if (!mlx5_vdpa_debug)
205 mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
206 if (set && !status) {
207 mlx5_vdpa_info(mvdev, "driver resets the device\n");
211 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
212 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
213 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
214 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
215 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
216 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
219 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
221 if (features & ~VALID_FEATURES_MASK)
222 mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
223 features & ~VALID_FEATURES_MASK);
225 if (!mlx5_vdpa_debug)
228 mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
230 mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
232 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
233 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
234 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
235 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
236 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
237 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
238 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
239 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
240 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
241 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
242 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
243 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
244 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
245 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
246 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
247 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
248 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
249 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
250 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
251 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
252 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
253 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
254 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
255 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
256 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
257 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
258 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
259 MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
260 MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
261 MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
262 MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
263 MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
264 MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
265 MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
268 static int create_tis(struct mlx5_vdpa_net *ndev)
270 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
271 u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
275 tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
276 MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
277 err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
279 mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
284 static void destroy_tis(struct mlx5_vdpa_net *ndev)
286 mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
289 #define MLX5_VDPA_CQE_SIZE 64
290 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
292 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
294 struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
295 u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
296 u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
299 err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
300 ndev->mvdev.mdev->priv.numa_node);
304 mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
306 buf->cqe_size = MLX5_VDPA_CQE_SIZE;
312 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
314 struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
316 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
317 ndev->mvdev.mdev->priv.numa_node);
320 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
322 mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
325 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
327 return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
330 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
332 struct mlx5_cqe64 *cqe64;
336 for (i = 0; i < buf->nent; i++) {
337 cqe = get_cqe(vcq, i);
339 cqe64->op_own = MLX5_CQE_INVALID << 4;
343 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
345 struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
347 if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
348 !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
354 static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
357 vqp->db.db[0] = cpu_to_be32(vqp->head);
360 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
361 struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
363 struct mlx5_vdpa_qp *vqp;
367 vqp = fw ? &mvq->fwqp : &mvq->vqqp;
368 MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
369 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
371 /* Firmware QP is allocated by the driver for the firmware's
372 * use so we can skip part of the params as they will be chosen by firmware
374 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
375 MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
376 MLX5_SET(qpc, qpc, no_sq, 1);
380 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
381 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
382 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
383 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
384 MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
385 MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
386 MLX5_SET(qpc, qpc, no_sq, 1);
387 MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
388 MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
389 MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
390 pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
391 mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
394 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
396 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
397 num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
398 ndev->mvdev.mdev->priv.numa_node);
401 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
403 mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
406 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
407 struct mlx5_vdpa_qp *vqp)
409 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
410 int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
411 u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
418 err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
422 err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
425 inlen += vqp->frag_buf.npages * sizeof(__be64);
428 in = kzalloc(inlen, GFP_KERNEL);
434 qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
435 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
436 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
437 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
438 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
439 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
441 MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
442 MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
443 err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
448 vqp->mqp.uid = ndev->mvdev.res.uid;
449 vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
452 rx_post(vqp, mvq->num_ent);
458 mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
461 rq_buf_free(ndev, vqp);
466 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
468 u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
470 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
471 MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
472 MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
473 if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
474 mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
476 mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
477 rq_buf_free(ndev, vqp);
481 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
483 return get_sw_cqe(cq, cq->mcq.cons_index);
486 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
488 struct mlx5_cqe64 *cqe64;
490 cqe64 = next_cqe_sw(vcq);
494 vcq->mcq.cons_index++;
498 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
500 struct mlx5_vdpa_net *ndev = mvq->ndev;
501 struct vdpa_callback *event_cb;
503 event_cb = &ndev->event_cbs[mvq->index];
504 mlx5_cq_set_ci(&mvq->cq.mcq);
506 /* make sure CQ cosumer update is visible to the hardware before updating
507 * RX doorbell record.
510 rx_post(&mvq->vqqp, num);
511 if (event_cb->callback)
512 event_cb->callback(event_cb->private);
515 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
517 struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
518 struct mlx5_vdpa_net *ndev = mvq->ndev;
519 void __iomem *uar_page = ndev->mvdev.res.uar->map;
522 while (!mlx5_vdpa_poll_one(&mvq->cq)) {
524 if (num > mvq->num_ent / 2) {
525 /* If completions keep coming while we poll, we want to
526 * let the hardware know that we consumed them by
527 * updating the doorbell record. We also let vdpa core
528 * know about this so it passes it on the virtio driver
531 mlx5_vdpa_handle_completions(mvq, num);
537 mlx5_vdpa_handle_completions(mvq, num);
539 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
542 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
544 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
545 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
546 void __iomem *uar_page = ndev->mvdev.res.uar->map;
547 u32 out[MLX5_ST_SZ_DW(create_cq_out)];
548 struct mlx5_vdpa_cq *vcq = &mvq->cq;
556 err = mlx5_db_alloc(mdev, &vcq->db);
560 vcq->mcq.set_ci_db = vcq->db.db;
561 vcq->mcq.arm_db = vcq->db.db + 1;
562 vcq->mcq.cqe_sz = 64;
564 err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
568 cq_frag_buf_init(vcq, &vcq->buf);
570 inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
571 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
572 in = kzalloc(inlen, GFP_KERNEL);
578 MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
579 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
580 mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
582 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
583 MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
585 /* Use vector 0 by default. Consider adding code to choose least used
588 err = mlx5_comp_eqn_get(mdev, 0, &eqn);
592 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
593 MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
594 MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
595 MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
596 MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
598 err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
602 vcq->mcq.comp = mlx5_vdpa_cq_comp;
604 vcq->mcq.set_ci_db = vcq->db.db;
605 vcq->mcq.arm_db = vcq->db.db + 1;
606 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
613 cq_frag_buf_free(ndev, &vcq->buf);
615 mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
619 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
621 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
622 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
623 struct mlx5_vdpa_cq *vcq = &mvq->cq;
625 if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
626 mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
629 cq_frag_buf_free(ndev, &vcq->buf);
630 mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
633 static int read_umem_params(struct mlx5_vdpa_net *ndev)
635 u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
636 u16 opmod = (MLX5_CAP_VDPA_EMULATION << 1) | (HCA_CAP_OPMOD_GET_CUR & 0x01);
637 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
643 out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
644 out = kzalloc(out_size, GFP_KERNEL);
648 MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
649 MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
650 err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
652 mlx5_vdpa_warn(&ndev->mvdev,
653 "Failed reading vdpa umem capabilities with err %d\n", err);
657 caps = MLX5_ADDR_OF(query_hca_cap_out, out, capability);
659 ndev->umem_1_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_a);
660 ndev->umem_1_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_b);
662 ndev->umem_2_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_a);
663 ndev->umem_2_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_b);
665 ndev->umem_3_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_a);
666 ndev->umem_3_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_b);
673 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
674 struct mlx5_vdpa_umem **umemp)
681 p_a = ndev->umem_1_buffer_param_a;
682 p_b = ndev->umem_1_buffer_param_b;
683 *umemp = &mvq->umem1;
686 p_a = ndev->umem_2_buffer_param_a;
687 p_b = ndev->umem_2_buffer_param_b;
688 *umemp = &mvq->umem2;
691 p_a = ndev->umem_3_buffer_param_a;
692 p_b = ndev->umem_3_buffer_param_b;
693 *umemp = &mvq->umem3;
697 (*umemp)->size = p_a * mvq->num_ent + p_b;
700 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
702 mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
705 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
708 u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
713 struct mlx5_vdpa_umem *umem;
715 set_umem_size(ndev, mvq, num, &umem);
716 err = umem_frag_buf_alloc(ndev, umem, umem->size);
720 inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
722 in = kzalloc(inlen, GFP_KERNEL);
728 MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
729 MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
730 um = MLX5_ADDR_OF(create_umem_in, in, umem);
731 MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
732 MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
734 pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
735 mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
737 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
739 mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
744 umem->id = MLX5_GET(create_umem_out, out, umem_id);
751 umem_frag_buf_free(ndev, umem);
755 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
757 u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
758 u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
759 struct mlx5_vdpa_umem *umem;
773 MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
774 MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
775 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
778 umem_frag_buf_free(ndev, umem);
781 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
786 for (num = 1; num <= 3; num++) {
787 err = create_umem(ndev, mvq, num);
794 for (num--; num > 0; num--)
795 umem_destroy(ndev, mvq, num);
800 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
804 for (num = 3; num > 0; num--)
805 umem_destroy(ndev, mvq, num);
808 static int get_queue_type(struct mlx5_vdpa_net *ndev)
812 type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
814 /* prefer split queue */
815 if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)
816 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
818 WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED));
820 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
823 static bool vq_is_tx(u16 idx)
829 MLX5_VIRTIO_NET_F_MRG_RXBUF = 2,
830 MLX5_VIRTIO_NET_F_HOST_ECN = 4,
831 MLX5_VIRTIO_NET_F_GUEST_ECN = 6,
832 MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7,
833 MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8,
834 MLX5_VIRTIO_NET_F_GUEST_CSUM = 9,
835 MLX5_VIRTIO_NET_F_CSUM = 10,
836 MLX5_VIRTIO_NET_F_HOST_TSO6 = 11,
837 MLX5_VIRTIO_NET_F_HOST_TSO4 = 12,
840 static u16 get_features(u64 features)
842 return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << MLX5_VIRTIO_NET_F_MRG_RXBUF) |
843 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << MLX5_VIRTIO_NET_F_HOST_ECN) |
844 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << MLX5_VIRTIO_NET_F_GUEST_ECN) |
845 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << MLX5_VIRTIO_NET_F_GUEST_TSO6) |
846 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << MLX5_VIRTIO_NET_F_GUEST_TSO4) |
847 (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << MLX5_VIRTIO_NET_F_CSUM) |
848 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << MLX5_VIRTIO_NET_F_HOST_TSO6) |
849 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << MLX5_VIRTIO_NET_F_HOST_TSO4);
852 static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
854 return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) &
855 BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
858 static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev)
860 return MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) &
861 (1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) &&
862 pci_msix_can_alloc_dyn(mvdev->mdev->pdev);
865 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
867 int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
868 u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
869 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
870 struct mlx5_vdpa_mr *vq_mr;
871 struct mlx5_vdpa_mr *vq_desc_mr;
879 err = umems_create(ndev, mvq);
883 in = kzalloc(inlen, GFP_KERNEL);
889 mlx_features = get_features(ndev->mvdev.actual_features);
890 cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
892 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
893 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
894 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
896 obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
897 MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
898 MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
899 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
901 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0,
903 vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
904 MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
906 if (vq_is_tx(mvq->index))
907 MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
910 MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE);
911 MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index);
913 MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
914 MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
917 MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
918 MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
919 MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
920 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
921 MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
922 MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
923 MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
924 vq_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]];
926 MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey);
928 vq_desc_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
929 if (vq_desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported))
930 MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, vq_desc_mr->mkey);
932 MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
933 MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
934 MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
935 MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
936 MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
937 MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
938 MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
939 if (counters_supported(&ndev->mvdev))
940 MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id);
942 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
946 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
948 mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
950 mlx5_vdpa_get_mr(mvdev, vq_mr);
953 if (vq_desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) {
954 mlx5_vdpa_get_mr(mvdev, vq_desc_mr);
955 mvq->desc_mr = vq_desc_mr;
963 umems_destroy(ndev, mvq);
967 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
969 u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
970 u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
972 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
973 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
974 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
975 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
976 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
977 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
978 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
979 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
982 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
983 umems_destroy(ndev, mvq);
985 mlx5_vdpa_put_mr(&ndev->mvdev, mvq->vq_mr);
988 mlx5_vdpa_put_mr(&ndev->mvdev, mvq->desc_mr);
992 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
994 return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
997 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
999 return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
1002 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
1003 int *outlen, u32 qpn, u32 rqpn)
1009 case MLX5_CMD_OP_2RST_QP:
1010 *inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
1011 *outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
1012 *in = kzalloc(*inlen, GFP_KERNEL);
1013 *out = kzalloc(*outlen, GFP_KERNEL);
1017 MLX5_SET(qp_2rst_in, *in, opcode, cmd);
1018 MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
1019 MLX5_SET(qp_2rst_in, *in, qpn, qpn);
1021 case MLX5_CMD_OP_RST2INIT_QP:
1022 *inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
1023 *outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
1024 *in = kzalloc(*inlen, GFP_KERNEL);
1025 *out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
1029 MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
1030 MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
1031 MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
1032 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1033 MLX5_SET(qpc, qpc, remote_qpn, rqpn);
1034 MLX5_SET(qpc, qpc, rwe, 1);
1035 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1036 MLX5_SET(ads, pp, vhca_port_num, 1);
1038 case MLX5_CMD_OP_INIT2RTR_QP:
1039 *inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
1040 *outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
1041 *in = kzalloc(*inlen, GFP_KERNEL);
1042 *out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
1046 MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
1047 MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
1048 MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
1049 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1050 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
1051 MLX5_SET(qpc, qpc, log_msg_max, 30);
1052 MLX5_SET(qpc, qpc, remote_qpn, rqpn);
1053 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1054 MLX5_SET(ads, pp, fl, 1);
1056 case MLX5_CMD_OP_RTR2RTS_QP:
1057 *inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
1058 *outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
1059 *in = kzalloc(*inlen, GFP_KERNEL);
1060 *out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
1064 MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
1065 MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
1066 MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
1067 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1068 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1069 MLX5_SET(ads, pp, ack_timeout, 14);
1070 MLX5_SET(qpc, qpc, retry_count, 7);
1071 MLX5_SET(qpc, qpc, rnr_retry, 7);
1074 goto outerr_nullify;
1087 static void free_inout(void *in, void *out)
1093 /* Two QPs are used by each virtqueue. One is used by the driver and one by
1094 * firmware. The fw argument indicates whether the subjected QP is the one used
1097 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
1105 alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
1109 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
1110 free_inout(in, out);
1114 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1118 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
1122 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
1126 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
1130 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1134 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1138 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1142 return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1145 struct mlx5_virtq_attr {
1147 u16 available_index;
1151 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
1152 struct mlx5_virtq_attr *attr)
1154 int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
1155 u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
1161 out = kzalloc(outlen, GFP_KERNEL);
1165 cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1167 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1168 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1169 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1170 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1171 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
1175 obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
1176 memset(attr, 0, sizeof(*attr));
1177 attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1178 attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1179 attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
1188 static bool is_resumable(struct mlx5_vdpa_net *ndev)
1190 return ndev->mvdev.vdev.config->resume;
1193 static bool is_valid_state_change(int oldstate, int newstate, bool resumable)
1196 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
1197 return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
1198 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
1199 return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
1200 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
1201 return resumable ? newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY : false;
1202 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
1208 static bool modifiable_virtqueue_fields(struct mlx5_vdpa_virtqueue *mvq)
1210 /* Only state is always modifiable */
1211 if (mvq->modified_fields & ~MLX5_VIRTQ_MODIFY_MASK_STATE)
1212 return mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT ||
1213 mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
1218 static int modify_virtqueue(struct mlx5_vdpa_net *ndev,
1219 struct mlx5_vdpa_virtqueue *mvq,
1222 int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
1223 u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
1224 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
1225 struct mlx5_vdpa_mr *desc_mr = NULL;
1226 struct mlx5_vdpa_mr *vq_mr = NULL;
1227 bool state_change = false;
1234 if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
1237 if (!modifiable_virtqueue_fields(mvq))
1240 in = kzalloc(inlen, GFP_KERNEL);
1244 cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1246 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1247 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1248 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1249 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1251 obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
1252 vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
1254 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_STATE) {
1255 if (!is_valid_state_change(mvq->fw_state, state, is_resumable(ndev))) {
1260 MLX5_SET(virtio_net_q_object, obj_context, state, state);
1261 state_change = true;
1264 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS) {
1265 MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
1266 MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
1267 MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
1270 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX)
1271 MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
1273 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX)
1274 MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
1276 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) {
1277 vq_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]];
1280 MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey);
1282 mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY;
1285 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) {
1286 desc_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
1288 if (desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported))
1289 MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, desc_mr->mkey);
1291 mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY;
1294 MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, mvq->modified_fields);
1295 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
1300 mvq->fw_state = state;
1302 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) {
1303 mlx5_vdpa_put_mr(mvdev, mvq->vq_mr);
1304 mlx5_vdpa_get_mr(mvdev, vq_mr);
1308 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) {
1309 mlx5_vdpa_put_mr(mvdev, mvq->desc_mr);
1310 mlx5_vdpa_get_mr(mvdev, desc_mr);
1311 mvq->desc_mr = desc_mr;
1314 mvq->modified_fields = 0;
1321 static int modify_virtqueue_state(struct mlx5_vdpa_net *ndev,
1322 struct mlx5_vdpa_virtqueue *mvq,
1325 mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_STATE;
1326 return modify_virtqueue(ndev, mvq, state);
1329 static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1331 u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {};
1332 u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {};
1336 if (!counters_supported(&ndev->mvdev))
1339 cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr);
1341 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
1342 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1343 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1345 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
1349 mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
1354 static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1356 u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {};
1357 u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {};
1359 if (!counters_supported(&ndev->mvdev))
1362 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
1363 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id);
1364 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid);
1365 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1366 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
1367 mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id);
1370 static irqreturn_t mlx5_vdpa_int_handler(int irq, void *priv)
1372 struct vdpa_callback *cb = priv;
1375 return cb->callback(cb->private);
1380 static void alloc_vector(struct mlx5_vdpa_net *ndev,
1381 struct mlx5_vdpa_virtqueue *mvq)
1383 struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1384 struct mlx5_vdpa_irq_pool_entry *ent;
1388 for (i = 0; i < irqp->num_ent; i++) {
1389 ent = &irqp->entries[i];
1391 snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
1392 dev_name(&ndev->mvdev.vdev.dev), mvq->index);
1393 ent->dev_id = &ndev->event_cbs[mvq->index];
1394 err = request_irq(ent->map.virq, mlx5_vdpa_int_handler, 0,
1395 ent->name, ent->dev_id);
1400 mvq->map = ent->map;
1406 static void dealloc_vector(struct mlx5_vdpa_net *ndev,
1407 struct mlx5_vdpa_virtqueue *mvq)
1409 struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1412 for (i = 0; i < irqp->num_ent; i++)
1413 if (mvq->map.virq == irqp->entries[i].map.virq) {
1414 free_irq(mvq->map.virq, irqp->entries[i].dev_id);
1415 irqp->entries[i].used = false;
1420 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1422 u16 idx = mvq->index;
1428 if (mvq->initialized)
1431 err = cq_create(ndev, idx, mvq->num_ent);
1435 err = qp_create(ndev, mvq, &mvq->fwqp);
1439 err = qp_create(ndev, mvq, &mvq->vqqp);
1443 err = connect_qps(ndev, mvq);
1447 err = counter_set_alloc(ndev, mvq);
1451 alloc_vector(ndev, mvq);
1452 err = create_virtqueue(ndev, mvq);
1457 err = modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1459 mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
1465 mvq->initialized = true;
1469 destroy_virtqueue(ndev, mvq);
1471 dealloc_vector(ndev, mvq);
1472 counter_set_dealloc(ndev, mvq);
1474 qp_destroy(ndev, &mvq->vqqp);
1476 qp_destroy(ndev, &mvq->fwqp);
1478 cq_destroy(ndev, idx);
1482 static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1484 struct mlx5_virtq_attr attr;
1486 if (!mvq->initialized)
1489 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1492 if (modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
1493 mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
1495 if (query_virtqueue(ndev, mvq, &attr)) {
1496 mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1499 mvq->avail_idx = attr.available_index;
1500 mvq->used_idx = attr.used_index;
1503 static void suspend_vqs(struct mlx5_vdpa_net *ndev)
1507 for (i = 0; i < ndev->mvdev.max_vqs; i++)
1508 suspend_vq(ndev, &ndev->vqs[i]);
1511 static void resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1513 if (!mvq->initialized || !is_resumable(ndev))
1516 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND)
1519 if (modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY))
1520 mlx5_vdpa_warn(&ndev->mvdev, "modify to resume failed for vq %u\n", mvq->index);
1523 static void resume_vqs(struct mlx5_vdpa_net *ndev)
1525 for (int i = 0; i < ndev->mvdev.max_vqs; i++)
1526 resume_vq(ndev, &ndev->vqs[i]);
1529 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1531 if (!mvq->initialized)
1534 suspend_vq(ndev, mvq);
1535 mvq->modified_fields = 0;
1536 destroy_virtqueue(ndev, mvq);
1537 dealloc_vector(ndev, mvq);
1538 counter_set_dealloc(ndev, mvq);
1539 qp_destroy(ndev, &mvq->vqqp);
1540 qp_destroy(ndev, &mvq->fwqp);
1541 cq_destroy(ndev, mvq->index);
1542 mvq->initialized = false;
1545 static int create_rqt(struct mlx5_vdpa_net *ndev)
1547 int rqt_table_size = roundup_pow_of_two(ndev->rqt_size);
1548 int act_sz = roundup_pow_of_two(ndev->cur_num_vqs / 2);
1556 inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + rqt_table_size * MLX5_ST_SZ_BYTES(rq_num);
1557 in = kzalloc(inlen, GFP_KERNEL);
1561 MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1562 rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1564 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1565 MLX5_SET(rqtc, rqtc, rqt_max_size, rqt_table_size);
1566 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1567 for (i = 0, j = 0; i < act_sz; i++, j += 2)
1568 list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id);
1570 MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1571 err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1579 #define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
1581 static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
1583 int act_sz = roundup_pow_of_two(num / 2);
1591 inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + act_sz * MLX5_ST_SZ_BYTES(rq_num);
1592 in = kzalloc(inlen, GFP_KERNEL);
1596 MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
1597 MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
1598 rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
1599 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1601 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1602 for (i = 0, j = 0; i < act_sz; i++, j = j + 2)
1603 list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id);
1605 MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1606 err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
1614 static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1616 mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1619 static int create_tir(struct mlx5_vdpa_net *ndev)
1621 #define HASH_IP_L4PORTS \
1622 (MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT | \
1623 MLX5_HASH_FIELD_SEL_L4_DPORT)
1624 static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1625 0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1626 0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1627 0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1628 0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1635 in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1639 MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1640 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1641 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1643 MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1644 MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1645 rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1646 memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1648 outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1649 MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1650 MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1651 MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1653 MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1654 MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1656 err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1661 mlx5_vdpa_add_tirn(ndev);
1665 static void destroy_tir(struct mlx5_vdpa_net *ndev)
1667 mlx5_vdpa_remove_tirn(ndev);
1668 mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1671 #define MAX_STEERING_ENT 0x8000
1672 #define MAX_STEERING_GROUPS 2
1674 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1680 static int add_steering_counters(struct mlx5_vdpa_net *ndev,
1681 struct macvlan_node *node,
1682 struct mlx5_flow_act *flow_act,
1683 struct mlx5_flow_destination *dests)
1685 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1688 node->ucast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1689 if (IS_ERR(node->ucast_counter.counter))
1690 return PTR_ERR(node->ucast_counter.counter);
1692 node->mcast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1693 if (IS_ERR(node->mcast_counter.counter)) {
1694 err = PTR_ERR(node->mcast_counter.counter);
1695 goto err_mcast_counter;
1698 dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1699 flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
1703 mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1710 static void remove_steering_counters(struct mlx5_vdpa_net *ndev,
1711 struct macvlan_node *node)
1713 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1714 mlx5_fc_destroy(ndev->mvdev.mdev, node->mcast_counter.counter);
1715 mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1719 static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
1720 struct macvlan_node *node)
1722 struct mlx5_flow_destination dests[NUM_DESTS] = {};
1723 struct mlx5_flow_act flow_act = {};
1724 struct mlx5_flow_spec *spec;
1732 spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1736 vid = key2vid(node->macvlan);
1737 spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
1738 headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
1739 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
1740 dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
1741 dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
1742 eth_broadcast_addr(dmac_c);
1743 ether_addr_copy(dmac_v, mac);
1744 if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) {
1745 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
1746 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid);
1749 MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
1750 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid);
1752 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1753 dests[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1754 dests[0].tir_num = ndev->res.tirn;
1755 err = add_steering_counters(ndev, node, &flow_act, dests);
1759 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1760 dests[1].counter_id = mlx5_fc_id(node->ucast_counter.counter);
1762 node->ucast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1763 if (IS_ERR(node->ucast_rule)) {
1764 err = PTR_ERR(node->ucast_rule);
1768 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1769 dests[1].counter_id = mlx5_fc_id(node->mcast_counter.counter);
1772 memset(dmac_c, 0, ETH_ALEN);
1773 memset(dmac_v, 0, ETH_ALEN);
1776 node->mcast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1777 if (IS_ERR(node->mcast_rule)) {
1778 err = PTR_ERR(node->mcast_rule);
1782 mlx5_vdpa_add_rx_counters(ndev, node);
1786 mlx5_del_flow_rules(node->ucast_rule);
1788 remove_steering_counters(ndev, node);
1794 static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev,
1795 struct macvlan_node *node)
1797 mlx5_vdpa_remove_rx_counters(ndev, node);
1798 mlx5_del_flow_rules(node->ucast_rule);
1799 mlx5_del_flow_rules(node->mcast_rule);
1802 static u64 search_val(u8 *mac, u16 vlan, bool tagged)
1807 vlan = MLX5V_UNTAGGED;
1809 val = (u64)vlan << 48 |
1820 static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value)
1822 struct macvlan_node *pos;
1825 idx = hash_64(value, 8); // tbd 8
1826 hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) {
1827 if (pos->macvlan == value)
1833 static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vid, bool tagged)
1835 struct macvlan_node *ptr;
1840 val = search_val(mac, vid, tagged);
1841 if (mac_vlan_lookup(ndev, val))
1844 ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
1848 ptr->tagged = tagged;
1851 err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, ptr);
1855 idx = hash_64(val, 8);
1856 hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]);
1864 static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged)
1866 struct macvlan_node *ptr;
1868 ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged));
1872 hlist_del(&ptr->hlist);
1873 mlx5_vdpa_del_mac_vlan_rules(ndev, ptr);
1874 remove_steering_counters(ndev, ptr);
1878 static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev)
1880 struct macvlan_node *pos;
1881 struct hlist_node *n;
1884 for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) {
1885 hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) {
1886 hlist_del(&pos->hlist);
1887 mlx5_vdpa_del_mac_vlan_rules(ndev, pos);
1888 remove_steering_counters(ndev, pos);
1894 static int setup_steering(struct mlx5_vdpa_net *ndev)
1896 struct mlx5_flow_table_attr ft_attr = {};
1897 struct mlx5_flow_namespace *ns;
1900 ft_attr.max_fte = MAX_STEERING_ENT;
1901 ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS;
1903 ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
1905 mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n");
1909 ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
1910 if (IS_ERR(ndev->rxft)) {
1911 mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n");
1912 return PTR_ERR(ndev->rxft);
1914 mlx5_vdpa_add_rx_flow_table(ndev);
1916 err = mac_vlan_add(ndev, ndev->config.mac, 0, false);
1923 mlx5_vdpa_remove_rx_flow_table(ndev);
1924 mlx5_destroy_flow_table(ndev->rxft);
1928 static void teardown_steering(struct mlx5_vdpa_net *ndev)
1930 clear_mac_vlan_table(ndev);
1931 mlx5_vdpa_remove_rx_flow_table(ndev);
1932 mlx5_destroy_flow_table(ndev->rxft);
1935 static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1937 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1938 struct mlx5_control_vq *cvq = &mvdev->cvq;
1939 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1940 struct mlx5_core_dev *pfmdev;
1942 u8 mac[ETH_ALEN], mac_back[ETH_ALEN];
1944 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
1946 case VIRTIO_NET_CTRL_MAC_ADDR_SET:
1947 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
1948 if (read != ETH_ALEN)
1951 if (!memcmp(ndev->config.mac, mac, 6)) {
1952 status = VIRTIO_NET_OK;
1956 if (is_zero_ether_addr(mac))
1959 if (!is_zero_ether_addr(ndev->config.mac)) {
1960 if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1961 mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
1967 if (mlx5_mpfs_add_mac(pfmdev, mac)) {
1968 mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
1973 /* backup the original mac address so that if failed to add the forward rules
1974 * we could restore it
1976 memcpy(mac_back, ndev->config.mac, ETH_ALEN);
1978 memcpy(ndev->config.mac, mac, ETH_ALEN);
1980 /* Need recreate the flow table entry, so that the packet could forward back
1982 mac_vlan_del(ndev, mac_back, 0, false);
1984 if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) {
1985 mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n");
1987 /* Although it hardly run here, we still need double check */
1988 if (is_zero_ether_addr(mac_back)) {
1989 mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n");
1993 /* Try to restore original mac address to MFPS table, and try to restore
1994 * the forward rule entry.
1996 if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1997 mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n",
2001 if (mlx5_mpfs_add_mac(pfmdev, mac_back)) {
2002 mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n",
2006 memcpy(ndev->config.mac, mac_back, ETH_ALEN);
2008 if (mac_vlan_add(ndev, ndev->config.mac, 0, false))
2009 mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n");
2014 status = VIRTIO_NET_OK;
2024 static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
2026 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2027 int cur_qps = ndev->cur_num_vqs / 2;
2031 if (cur_qps > newqps) {
2032 err = modify_rqt(ndev, 2 * newqps);
2036 for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--)
2037 teardown_vq(ndev, &ndev->vqs[i]);
2039 ndev->cur_num_vqs = 2 * newqps;
2041 ndev->cur_num_vqs = 2 * newqps;
2042 for (i = cur_qps * 2; i < 2 * newqps; i++) {
2043 err = setup_vq(ndev, &ndev->vqs[i]);
2047 err = modify_rqt(ndev, 2 * newqps);
2054 for (--i; i >= 2 * cur_qps; --i)
2055 teardown_vq(ndev, &ndev->vqs[i]);
2057 ndev->cur_num_vqs = 2 * cur_qps;
2062 static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
2064 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2065 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2066 struct mlx5_control_vq *cvq = &mvdev->cvq;
2067 struct virtio_net_ctrl_mq mq;
2072 case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
2073 /* This mq feature check aligns with pre-existing userspace
2076 * Without it, an untrusted driver could fake a multiqueue config
2077 * request down to a non-mq device that may cause kernel to
2078 * panic due to uninitialized resources for extra vqs. Even with
2079 * a well behaving guest driver, it is not expected to allow
2080 * changing the number of vqs on a non-mq device.
2082 if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ))
2085 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
2086 if (read != sizeof(mq))
2089 newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
2090 if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
2091 newqps > ndev->rqt_size)
2094 if (ndev->cur_num_vqs == 2 * newqps) {
2095 status = VIRTIO_NET_OK;
2099 if (!change_num_qps(mvdev, newqps))
2100 status = VIRTIO_NET_OK;
2110 static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd)
2112 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2113 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2114 struct mlx5_control_vq *cvq = &mvdev->cvq;
2119 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)))
2123 case VIRTIO_NET_CTRL_VLAN_ADD:
2124 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
2125 if (read != sizeof(vlan))
2128 id = mlx5vdpa16_to_cpu(mvdev, vlan);
2129 if (mac_vlan_add(ndev, ndev->config.mac, id, true))
2132 status = VIRTIO_NET_OK;
2134 case VIRTIO_NET_CTRL_VLAN_DEL:
2135 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
2136 if (read != sizeof(vlan))
2139 id = mlx5vdpa16_to_cpu(mvdev, vlan);
2140 mac_vlan_del(ndev, ndev->config.mac, id, true);
2141 status = VIRTIO_NET_OK;
2150 static void mlx5_cvq_kick_handler(struct work_struct *work)
2152 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2153 struct virtio_net_ctrl_hdr ctrl;
2154 struct mlx5_vdpa_wq_ent *wqent;
2155 struct mlx5_vdpa_dev *mvdev;
2156 struct mlx5_control_vq *cvq;
2157 struct mlx5_vdpa_net *ndev;
2161 wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2162 mvdev = wqent->mvdev;
2163 ndev = to_mlx5_vdpa_ndev(mvdev);
2166 down_write(&ndev->reslock);
2168 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2171 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
2178 err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
2183 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
2184 if (read != sizeof(ctrl))
2187 cvq->received_desc++;
2188 switch (ctrl.class) {
2189 case VIRTIO_NET_CTRL_MAC:
2190 status = handle_ctrl_mac(mvdev, ctrl.cmd);
2192 case VIRTIO_NET_CTRL_MQ:
2193 status = handle_ctrl_mq(mvdev, ctrl.cmd);
2195 case VIRTIO_NET_CTRL_VLAN:
2196 status = handle_ctrl_vlan(mvdev, ctrl.cmd);
2202 /* Make sure data is written before advancing index */
2205 write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
2206 vringh_complete_iotlb(&cvq->vring, cvq->head, write);
2207 vringh_kiov_cleanup(&cvq->riov);
2208 vringh_kiov_cleanup(&cvq->wiov);
2210 if (vringh_need_notify_iotlb(&cvq->vring))
2211 vringh_notify(&cvq->vring);
2213 cvq->completed_desc++;
2214 queue_work(mvdev->wq, &wqent->work);
2219 up_write(&ndev->reslock);
2222 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
2224 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2225 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2226 struct mlx5_vdpa_virtqueue *mvq;
2228 if (!is_index_valid(mvdev, idx))
2231 if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
2232 if (!mvdev->wq || !mvdev->cvq.ready)
2235 queue_work(mvdev->wq, &ndev->cvq_ent.work);
2239 mvq = &ndev->vqs[idx];
2240 if (unlikely(!mvq->ready))
2243 iowrite16(idx, ndev->mvdev.res.kick_addr);
2246 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
2247 u64 driver_area, u64 device_area)
2249 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2250 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2251 struct mlx5_vdpa_virtqueue *mvq;
2253 if (!is_index_valid(mvdev, idx))
2256 if (is_ctrl_vq_idx(mvdev, idx)) {
2257 mvdev->cvq.desc_addr = desc_area;
2258 mvdev->cvq.device_addr = device_area;
2259 mvdev->cvq.driver_addr = driver_area;
2263 mvq = &ndev->vqs[idx];
2264 mvq->desc_addr = desc_area;
2265 mvq->device_addr = device_area;
2266 mvq->driver_addr = driver_area;
2267 mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS;
2271 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
2273 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2274 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2275 struct mlx5_vdpa_virtqueue *mvq;
2277 if (!is_index_valid(mvdev, idx))
2280 if (is_ctrl_vq_idx(mvdev, idx)) {
2281 struct mlx5_control_vq *cvq = &mvdev->cvq;
2283 cvq->vring.vring.num = num;
2287 mvq = &ndev->vqs[idx];
2291 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
2293 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2294 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2296 ndev->event_cbs[idx] = *cb;
2297 if (is_ctrl_vq_idx(mvdev, idx))
2298 mvdev->cvq.event_cb = *cb;
2301 static void mlx5_cvq_notify(struct vringh *vring)
2303 struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
2305 if (!cvq->event_cb.callback)
2308 cvq->event_cb.callback(cvq->event_cb.private);
2311 static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
2313 struct mlx5_control_vq *cvq = &mvdev->cvq;
2319 cvq->vring.notify = mlx5_cvq_notify;
2322 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
2324 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2325 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2326 struct mlx5_vdpa_virtqueue *mvq;
2329 if (!mvdev->actual_features)
2332 if (!is_index_valid(mvdev, idx))
2335 if (is_ctrl_vq_idx(mvdev, idx)) {
2336 set_cvq_ready(mvdev, ready);
2340 mvq = &ndev->vqs[idx];
2342 suspend_vq(ndev, mvq);
2344 err = modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
2346 mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
2355 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
2357 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2358 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2360 if (!is_index_valid(mvdev, idx))
2363 if (is_ctrl_vq_idx(mvdev, idx))
2364 return mvdev->cvq.ready;
2366 return ndev->vqs[idx].ready;
2369 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
2370 const struct vdpa_vq_state *state)
2372 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2373 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2374 struct mlx5_vdpa_virtqueue *mvq;
2376 if (!is_index_valid(mvdev, idx))
2379 if (is_ctrl_vq_idx(mvdev, idx)) {
2380 mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
2384 mvq = &ndev->vqs[idx];
2385 if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
2386 mlx5_vdpa_warn(mvdev, "can't modify available index\n");
2390 mvq->used_idx = state->split.avail_index;
2391 mvq->avail_idx = state->split.avail_index;
2392 mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX |
2393 MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX;
2397 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
2399 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2400 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2401 struct mlx5_vdpa_virtqueue *mvq;
2402 struct mlx5_virtq_attr attr;
2405 if (!is_index_valid(mvdev, idx))
2408 if (is_ctrl_vq_idx(mvdev, idx)) {
2409 state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
2413 mvq = &ndev->vqs[idx];
2414 /* If the virtq object was destroyed, use the value saved at
2415 * the last minute of suspend_vq. This caters for userspace
2416 * that cares about emulating the index after vq is stopped.
2418 if (!mvq->initialized) {
2419 /* Firmware returns a wrong value for the available index.
2420 * Since both values should be identical, we take the value of
2421 * used_idx which is reported correctly.
2423 state->split.avail_index = mvq->used_idx;
2427 err = query_virtqueue(ndev, mvq, &attr);
2429 mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
2432 state->split.avail_index = attr.used_index;
2436 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
2441 static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
2443 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2445 if (is_ctrl_vq_idx(mvdev, idx))
2446 return MLX5_VDPA_CVQ_GROUP;
2448 return MLX5_VDPA_DATAVQ_GROUP;
2451 static u32 mlx5_vdpa_get_vq_desc_group(struct vdpa_device *vdev, u16 idx)
2453 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2455 if (is_ctrl_vq_idx(mvdev, idx))
2456 return MLX5_VDPA_CVQ_GROUP;
2458 return MLX5_VDPA_DATAVQ_DESC_GROUP;
2461 static u64 mlx_to_vritio_features(u16 dev_features)
2465 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_MRG_RXBUF))
2466 result |= BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
2467 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_ECN))
2468 result |= BIT_ULL(VIRTIO_NET_F_HOST_ECN);
2469 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_ECN))
2470 result |= BIT_ULL(VIRTIO_NET_F_GUEST_ECN);
2471 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO6))
2472 result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO6);
2473 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO4))
2474 result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO4);
2475 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_CSUM))
2476 result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
2477 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_CSUM))
2478 result |= BIT_ULL(VIRTIO_NET_F_CSUM);
2479 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO6))
2480 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
2481 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO4))
2482 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
2487 static u64 get_supported_features(struct mlx5_core_dev *mdev)
2489 u64 mlx_vdpa_features = 0;
2492 dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask);
2493 mlx_vdpa_features |= mlx_to_vritio_features(dev_features);
2494 if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0))
2495 mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1);
2496 mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
2497 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
2498 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
2499 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
2500 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
2501 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
2502 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN);
2503 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MAC);
2505 return mlx_vdpa_features;
2508 static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev)
2510 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2511 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2513 print_features(mvdev, ndev->mvdev.mlx_features, false);
2514 return ndev->mvdev.mlx_features;
2517 static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features)
2519 /* Minimum features to expect */
2520 if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
2523 /* Double check features combination sent down by the driver.
2524 * Fail invalid features due to absence of the depended feature.
2526 * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit
2527 * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ".
2528 * By failing the invalid features sent down by untrusted drivers,
2529 * we're assured the assumption made upon is_index_valid() and
2530 * is_ctrl_vq_idx() will not be compromised.
2532 if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) ==
2533 BIT_ULL(VIRTIO_NET_F_MQ))
2539 static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
2541 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2545 for (i = 0; i < mvdev->max_vqs; i++) {
2546 err = setup_vq(ndev, &ndev->vqs[i]);
2554 for (--i; i >= 0; i--)
2555 teardown_vq(ndev, &ndev->vqs[i]);
2560 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
2562 struct mlx5_vdpa_virtqueue *mvq;
2565 for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
2566 mvq = &ndev->vqs[i];
2567 if (!mvq->initialized)
2570 teardown_vq(ndev, mvq);
2574 static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
2576 if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
2577 if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) {
2578 /* MQ supported. CVQ index is right above the last data virtqueue's */
2579 mvdev->max_idx = mvdev->max_vqs;
2581 /* Only CVQ supportted. data virtqueues occupy indices 0 and 1.
2587 /* Two data virtqueues only: one for rx and one for tx */
2592 static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
2594 u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
2595 u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
2598 MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
2599 MLX5_SET(query_vport_state_in, in, op_mod, opmod);
2600 MLX5_SET(query_vport_state_in, in, vport_number, vport);
2602 MLX5_SET(query_vport_state_in, in, other_vport, 1);
2604 err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
2608 return MLX5_GET(query_vport_state_out, out, state);
2611 static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
2613 if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
2620 static void update_carrier(struct work_struct *work)
2622 struct mlx5_vdpa_wq_ent *wqent;
2623 struct mlx5_vdpa_dev *mvdev;
2624 struct mlx5_vdpa_net *ndev;
2626 wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2627 mvdev = wqent->mvdev;
2628 ndev = to_mlx5_vdpa_ndev(mvdev);
2629 if (get_link_state(mvdev))
2630 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
2632 ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
2634 if (ndev->config_cb.callback)
2635 ndev->config_cb.callback(ndev->config_cb.private);
2640 static int queue_link_work(struct mlx5_vdpa_net *ndev)
2642 struct mlx5_vdpa_wq_ent *wqent;
2644 wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
2648 wqent->mvdev = &ndev->mvdev;
2649 INIT_WORK(&wqent->work, update_carrier);
2650 queue_work(ndev->mvdev.wq, &wqent->work);
2654 static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
2656 struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
2657 struct mlx5_eqe *eqe = param;
2658 int ret = NOTIFY_DONE;
2660 if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
2661 switch (eqe->sub_type) {
2662 case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
2663 case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
2664 if (queue_link_work(ndev))
2677 static void register_link_notifier(struct mlx5_vdpa_net *ndev)
2679 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS)))
2682 ndev->nb.notifier_call = event_handler;
2683 mlx5_notifier_register(ndev->mvdev.mdev, &ndev->nb);
2684 ndev->nb_registered = true;
2685 queue_link_work(ndev);
2688 static void unregister_link_notifier(struct mlx5_vdpa_net *ndev)
2690 if (!ndev->nb_registered)
2693 ndev->nb_registered = false;
2694 mlx5_notifier_unregister(ndev->mvdev.mdev, &ndev->nb);
2696 flush_workqueue(ndev->mvdev.wq);
2699 static u64 mlx5_vdpa_get_backend_features(const struct vdpa_device *vdpa)
2701 return BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK);
2704 static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
2706 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2707 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2710 print_features(mvdev, features, true);
2712 err = verify_driver_features(mvdev, features);
2716 ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
2717 if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ))
2718 ndev->rqt_size = mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs);
2722 /* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section
2723 * 5.1.6.5.5 "Device operation in multiqueue mode":
2725 * Multiqueue is disabled by default.
2726 * The driver enables multiqueue by sending a command using class
2727 * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue
2728 * operation, as follows: ...
2730 ndev->cur_num_vqs = 2;
2732 update_cvq_info(mvdev);
2736 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
2738 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2739 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2741 ndev->config_cb = *cb;
2744 #define MLX5_VDPA_MAX_VQ_ENTRIES 256
2745 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
2747 return MLX5_VDPA_MAX_VQ_ENTRIES;
2750 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
2752 return VIRTIO_ID_NET;
2755 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
2757 return PCI_VENDOR_ID_MELLANOX;
2760 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
2762 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2763 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2765 print_status(mvdev, ndev->mvdev.status, false);
2766 return ndev->mvdev.status;
2769 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
2771 struct mlx5_vq_restore_info *ri = &mvq->ri;
2772 struct mlx5_virtq_attr attr = {};
2775 if (mvq->initialized) {
2776 err = query_virtqueue(ndev, mvq, &attr);
2781 ri->avail_index = attr.available_index;
2782 ri->used_index = attr.used_index;
2783 ri->ready = mvq->ready;
2784 ri->num_ent = mvq->num_ent;
2785 ri->desc_addr = mvq->desc_addr;
2786 ri->device_addr = mvq->device_addr;
2787 ri->driver_addr = mvq->driver_addr;
2793 static int save_channels_info(struct mlx5_vdpa_net *ndev)
2797 for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2798 memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
2799 save_channel_info(ndev, &ndev->vqs[i]);
2804 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
2808 for (i = 0; i < ndev->mvdev.max_vqs; i++)
2809 memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2812 static void restore_channels_info(struct mlx5_vdpa_net *ndev)
2814 struct mlx5_vdpa_virtqueue *mvq;
2815 struct mlx5_vq_restore_info *ri;
2818 mlx5_clear_vqs(ndev);
2820 for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2821 mvq = &ndev->vqs[i];
2826 mvq->avail_idx = ri->avail_index;
2827 mvq->used_idx = ri->used_index;
2828 mvq->ready = ri->ready;
2829 mvq->num_ent = ri->num_ent;
2830 mvq->desc_addr = ri->desc_addr;
2831 mvq->device_addr = ri->device_addr;
2832 mvq->driver_addr = ri->driver_addr;
2837 static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
2838 struct mlx5_vdpa_mr *new_mr,
2841 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2842 bool teardown = !is_resumable(ndev);
2847 err = save_channels_info(ndev);
2851 teardown_driver(ndev);
2854 mlx5_vdpa_update_mr(mvdev, new_mr, asid);
2856 for (int i = 0; i < ndev->cur_num_vqs; i++)
2857 ndev->vqs[i].modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY |
2858 MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY;
2860 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended)
2864 restore_channels_info(ndev);
2865 err = setup_driver(mvdev);
2875 /* reslock must be held for this function */
2876 static int setup_driver(struct mlx5_vdpa_dev *mvdev)
2878 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2881 WARN_ON(!rwsem_is_locked(&ndev->reslock));
2884 mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
2888 mlx5_vdpa_add_debugfs(ndev);
2890 err = read_umem_params(ndev);
2894 err = setup_virtqueues(mvdev);
2896 mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
2900 err = create_rqt(ndev);
2902 mlx5_vdpa_warn(mvdev, "create_rqt\n");
2906 err = create_tir(ndev);
2908 mlx5_vdpa_warn(mvdev, "create_tir\n");
2912 err = setup_steering(ndev);
2914 mlx5_vdpa_warn(mvdev, "setup_steering\n");
2926 teardown_virtqueues(ndev);
2928 mlx5_vdpa_remove_debugfs(ndev);
2933 /* reslock must be held for this function */
2934 static void teardown_driver(struct mlx5_vdpa_net *ndev)
2937 WARN_ON(!rwsem_is_locked(&ndev->reslock));
2942 mlx5_vdpa_remove_debugfs(ndev);
2943 teardown_steering(ndev);
2946 teardown_virtqueues(ndev);
2947 ndev->setup = false;
2950 static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
2954 for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2955 ndev->vqs[i].ready = false;
2956 ndev->vqs[i].modified_fields = 0;
2959 ndev->mvdev.cvq.ready = false;
2962 static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
2964 struct mlx5_control_vq *cvq = &mvdev->cvq;
2967 if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
2968 u16 idx = cvq->vring.last_avail_idx;
2970 err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
2971 cvq->vring.vring.num, false,
2972 (struct vring_desc *)(uintptr_t)cvq->desc_addr,
2973 (struct vring_avail *)(uintptr_t)cvq->driver_addr,
2974 (struct vring_used *)(uintptr_t)cvq->device_addr);
2977 cvq->vring.last_avail_idx = cvq->vring.last_used_idx = idx;
2982 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
2984 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2985 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2988 print_status(mvdev, status, true);
2990 down_write(&ndev->reslock);
2992 if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
2993 if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
2994 err = setup_cvq_vring(mvdev);
2996 mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n");
2999 register_link_notifier(ndev);
3000 err = setup_driver(mvdev);
3002 mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
3006 mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
3011 ndev->mvdev.status = status;
3012 up_write(&ndev->reslock);
3016 unregister_link_notifier(ndev);
3018 mlx5_vdpa_destroy_mr_resources(&ndev->mvdev);
3019 ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
3021 up_write(&ndev->reslock);
3024 static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
3028 /* default mapping all groups are mapped to asid 0 */
3029 for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
3030 mvdev->group2asid[i] = 0;
3033 static int mlx5_vdpa_compat_reset(struct vdpa_device *vdev, u32 flags)
3035 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3036 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3038 print_status(mvdev, 0, true);
3039 mlx5_vdpa_info(mvdev, "performing device reset\n");
3041 down_write(&ndev->reslock);
3042 unregister_link_notifier(ndev);
3043 teardown_driver(ndev);
3044 clear_vqs_ready(ndev);
3045 if (flags & VDPA_RESET_F_CLEAN_MAP)
3046 mlx5_vdpa_destroy_mr_resources(&ndev->mvdev);
3047 ndev->mvdev.status = 0;
3048 ndev->mvdev.suspended = false;
3049 ndev->cur_num_vqs = 0;
3050 ndev->mvdev.cvq.received_desc = 0;
3051 ndev->mvdev.cvq.completed_desc = 0;
3052 memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
3053 ndev->mvdev.actual_features = 0;
3054 init_group_to_asid_map(mvdev);
3055 ++mvdev->generation;
3057 if ((flags & VDPA_RESET_F_CLEAN_MAP) &&
3058 MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
3059 if (mlx5_vdpa_create_dma_mr(mvdev))
3060 mlx5_vdpa_warn(mvdev, "create MR failed\n");
3062 up_write(&ndev->reslock);
3067 static int mlx5_vdpa_reset(struct vdpa_device *vdev)
3069 return mlx5_vdpa_compat_reset(vdev, 0);
3072 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
3074 return sizeof(struct virtio_net_config);
3077 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
3080 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3081 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3083 if (offset + len <= sizeof(struct virtio_net_config))
3084 memcpy(buf, (u8 *)&ndev->config + offset, len);
3087 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
3093 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
3095 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3097 return mvdev->generation;
3100 static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
3103 struct mlx5_vdpa_mr *new_mr;
3106 if (asid >= MLX5_VDPA_NUM_AS)
3109 if (vhost_iotlb_itree_first(iotlb, 0, U64_MAX)) {
3110 new_mr = mlx5_vdpa_create_mr(mvdev, iotlb);
3111 if (IS_ERR(new_mr)) {
3112 err = PTR_ERR(new_mr);
3113 mlx5_vdpa_warn(mvdev, "create map failed(%d)\n", err);
3117 /* Empty iotlbs don't have an mr but will clear the previous mr. */
3121 if (!mvdev->mr[asid]) {
3122 mlx5_vdpa_update_mr(mvdev, new_mr, asid);
3124 err = mlx5_vdpa_change_map(mvdev, new_mr, asid);
3126 mlx5_vdpa_warn(mvdev, "change map failed(%d)\n", err);
3131 return mlx5_vdpa_update_cvq_iotlb(mvdev, iotlb, asid);
3134 mlx5_vdpa_put_mr(mvdev, new_mr);
3138 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
3139 struct vhost_iotlb *iotlb)
3141 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3142 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3145 down_write(&ndev->reslock);
3146 err = set_map_data(mvdev, iotlb, asid);
3147 up_write(&ndev->reslock);
3151 static int mlx5_vdpa_reset_map(struct vdpa_device *vdev, unsigned int asid)
3153 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3154 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3157 down_write(&ndev->reslock);
3158 err = mlx5_vdpa_reset_mr(mvdev, asid);
3159 up_write(&ndev->reslock);
3163 static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx)
3165 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3167 if (is_ctrl_vq_idx(mvdev, idx))
3170 return mvdev->vdev.dma_dev;
3173 static void free_irqs(struct mlx5_vdpa_net *ndev)
3175 struct mlx5_vdpa_irq_pool_entry *ent;
3178 if (!msix_mode_supported(&ndev->mvdev))
3181 if (!ndev->irqp.entries)
3184 for (i = ndev->irqp.num_ent - 1; i >= 0; i--) {
3185 ent = ndev->irqp.entries + i;
3187 pci_msix_free_irq(ndev->mvdev.mdev->pdev, ent->map);
3189 kfree(ndev->irqp.entries);
3192 static void mlx5_vdpa_free(struct vdpa_device *vdev)
3194 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3195 struct mlx5_core_dev *pfmdev;
3196 struct mlx5_vdpa_net *ndev;
3198 ndev = to_mlx5_vdpa_ndev(mvdev);
3200 free_resources(ndev);
3201 mlx5_vdpa_destroy_mr_resources(mvdev);
3202 if (!is_zero_ether_addr(ndev->config.mac)) {
3203 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
3204 mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
3206 mlx5_vdpa_free_resources(&ndev->mvdev);
3208 kfree(ndev->event_cbs);
3212 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
3214 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3215 struct vdpa_notification_area ret = {};
3216 struct mlx5_vdpa_net *ndev;
3219 if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
3222 /* If SF BAR size is smaller than PAGE_SIZE, do not use direct
3223 * notification to avoid the risk of mapping pages that contain BAR of more
3226 if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT)
3229 ndev = to_mlx5_vdpa_ndev(mvdev);
3230 addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr;
3232 ret.size = PAGE_SIZE;
3236 static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx)
3238 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3239 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3240 struct mlx5_vdpa_virtqueue *mvq;
3242 if (!is_index_valid(mvdev, idx))
3245 if (is_ctrl_vq_idx(mvdev, idx))
3248 mvq = &ndev->vqs[idx];
3252 return mvq->map.virq;
3255 static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
3257 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3259 return mvdev->actual_features;
3262 static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
3263 u64 *received_desc, u64 *completed_desc)
3265 u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {};
3266 u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {};
3271 if (!counters_supported(&ndev->mvdev))
3274 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
3277 cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr);
3279 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
3280 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
3281 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
3282 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id);
3284 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
3288 ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters);
3289 *received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc);
3290 *completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc);
3294 static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
3295 struct sk_buff *msg,
3296 struct netlink_ext_ack *extack)
3298 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3299 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3300 struct mlx5_vdpa_virtqueue *mvq;
3301 struct mlx5_control_vq *cvq;
3306 down_read(&ndev->reslock);
3307 if (!is_index_valid(mvdev, idx)) {
3308 NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid");
3313 if (idx == ctrl_vq_idx(mvdev)) {
3315 received_desc = cvq->received_desc;
3316 completed_desc = cvq->completed_desc;
3320 mvq = &ndev->vqs[idx];
3321 err = counter_set_query(ndev, mvq, &received_desc, &completed_desc);
3323 NL_SET_ERR_MSG_MOD(extack, "failed to query hardware");
3329 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc"))
3332 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc,
3336 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc"))
3339 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc,
3345 up_read(&ndev->reslock);
3349 static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev)
3351 struct mlx5_control_vq *cvq;
3353 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
3360 static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
3362 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3363 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3364 struct mlx5_vdpa_virtqueue *mvq;
3367 mlx5_vdpa_info(mvdev, "suspending device\n");
3369 down_write(&ndev->reslock);
3370 unregister_link_notifier(ndev);
3371 for (i = 0; i < ndev->cur_num_vqs; i++) {
3372 mvq = &ndev->vqs[i];
3373 suspend_vq(ndev, mvq);
3375 mlx5_vdpa_cvq_suspend(mvdev);
3376 mvdev->suspended = true;
3377 up_write(&ndev->reslock);
3381 static int mlx5_vdpa_resume(struct vdpa_device *vdev)
3383 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3384 struct mlx5_vdpa_net *ndev;
3386 ndev = to_mlx5_vdpa_ndev(mvdev);
3388 mlx5_vdpa_info(mvdev, "resuming device\n");
3390 down_write(&ndev->reslock);
3391 mvdev->suspended = false;
3393 register_link_notifier(ndev);
3394 up_write(&ndev->reslock);
3398 static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
3401 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3404 if (group >= MLX5_VDPA_NUMVQ_GROUPS)
3407 mvdev->group2asid[group] = asid;
3409 mutex_lock(&mvdev->mr_mtx);
3410 if (group == MLX5_VDPA_CVQ_GROUP && mvdev->mr[asid])
3411 err = mlx5_vdpa_update_cvq_iotlb(mvdev, mvdev->mr[asid]->iotlb, asid);
3412 mutex_unlock(&mvdev->mr_mtx);
3417 static const struct vdpa_config_ops mlx5_vdpa_ops = {
3418 .set_vq_address = mlx5_vdpa_set_vq_address,
3419 .set_vq_num = mlx5_vdpa_set_vq_num,
3420 .kick_vq = mlx5_vdpa_kick_vq,
3421 .set_vq_cb = mlx5_vdpa_set_vq_cb,
3422 .set_vq_ready = mlx5_vdpa_set_vq_ready,
3423 .get_vq_ready = mlx5_vdpa_get_vq_ready,
3424 .set_vq_state = mlx5_vdpa_set_vq_state,
3425 .get_vq_state = mlx5_vdpa_get_vq_state,
3426 .get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats,
3427 .get_vq_notification = mlx5_get_vq_notification,
3428 .get_vq_irq = mlx5_get_vq_irq,
3429 .get_vq_align = mlx5_vdpa_get_vq_align,
3430 .get_vq_group = mlx5_vdpa_get_vq_group,
3431 .get_vq_desc_group = mlx5_vdpa_get_vq_desc_group, /* Op disabled if not supported. */
3432 .get_device_features = mlx5_vdpa_get_device_features,
3433 .get_backend_features = mlx5_vdpa_get_backend_features,
3434 .set_driver_features = mlx5_vdpa_set_driver_features,
3435 .get_driver_features = mlx5_vdpa_get_driver_features,
3436 .set_config_cb = mlx5_vdpa_set_config_cb,
3437 .get_vq_num_max = mlx5_vdpa_get_vq_num_max,
3438 .get_device_id = mlx5_vdpa_get_device_id,
3439 .get_vendor_id = mlx5_vdpa_get_vendor_id,
3440 .get_status = mlx5_vdpa_get_status,
3441 .set_status = mlx5_vdpa_set_status,
3442 .reset = mlx5_vdpa_reset,
3443 .compat_reset = mlx5_vdpa_compat_reset,
3444 .get_config_size = mlx5_vdpa_get_config_size,
3445 .get_config = mlx5_vdpa_get_config,
3446 .set_config = mlx5_vdpa_set_config,
3447 .get_generation = mlx5_vdpa_get_generation,
3448 .set_map = mlx5_vdpa_set_map,
3449 .reset_map = mlx5_vdpa_reset_map,
3450 .set_group_asid = mlx5_set_group_asid,
3451 .get_vq_dma_dev = mlx5_get_vq_dma_dev,
3452 .free = mlx5_vdpa_free,
3453 .suspend = mlx5_vdpa_suspend,
3454 .resume = mlx5_vdpa_resume, /* Op disabled if not supported. */
3457 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
3462 err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
3466 *mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
3470 static int alloc_resources(struct mlx5_vdpa_net *ndev)
3472 struct mlx5_vdpa_net_resources *res = &ndev->res;
3476 mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
3480 err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
3484 err = create_tis(ndev);
3493 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3497 static void free_resources(struct mlx5_vdpa_net *ndev)
3499 struct mlx5_vdpa_net_resources *res = &ndev->res;
3505 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3509 static void init_mvqs(struct mlx5_vdpa_net *ndev)
3511 struct mlx5_vdpa_virtqueue *mvq;
3514 for (i = 0; i < ndev->mvdev.max_vqs; ++i) {
3515 mvq = &ndev->vqs[i];
3516 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3519 mvq->fwqp.fw = true;
3520 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
3522 for (; i < ndev->mvdev.max_vqs; i++) {
3523 mvq = &ndev->vqs[i];
3524 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3530 struct mlx5_vdpa_mgmtdev {
3531 struct vdpa_mgmt_dev mgtdev;
3532 struct mlx5_adev *madev;
3533 struct mlx5_vdpa_net *ndev;
3534 struct vdpa_config_ops vdpa_ops;
3537 static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu)
3539 int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
3543 in = kvzalloc(inlen, GFP_KERNEL);
3547 MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1);
3548 MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu,
3549 mtu + MLX5V_ETH_HARD_MTU);
3550 MLX5_SET(modify_nic_vport_context_in, in, opcode,
3551 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
3553 err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
3559 static void allocate_irqs(struct mlx5_vdpa_net *ndev)
3561 struct mlx5_vdpa_irq_pool_entry *ent;
3564 if (!msix_mode_supported(&ndev->mvdev))
3567 if (!ndev->mvdev.mdev->pdev)
3570 ndev->irqp.entries = kcalloc(ndev->mvdev.max_vqs, sizeof(*ndev->irqp.entries), GFP_KERNEL);
3571 if (!ndev->irqp.entries)
3575 for (i = 0; i < ndev->mvdev.max_vqs; i++) {
3576 ent = ndev->irqp.entries + i;
3577 snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
3578 dev_name(&ndev->mvdev.vdev.dev), i);
3579 ent->map = pci_msix_alloc_irq_at(ndev->mvdev.mdev->pdev, MSI_ANY_INDEX, NULL);
3583 ndev->irqp.num_ent++;
3587 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
3588 const struct vdpa_dev_set_config *add_config)
3590 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3591 struct virtio_net_config *config;
3592 struct mlx5_core_dev *pfmdev;
3593 struct mlx5_vdpa_dev *mvdev;
3594 struct mlx5_vdpa_net *ndev;
3595 struct mlx5_core_dev *mdev;
3596 u64 device_features;
3604 mdev = mgtdev->madev->mdev;
3605 device_features = mgtdev->mgtdev.supported_features;
3606 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) {
3607 if (add_config->device_features & ~device_features) {
3608 dev_warn(mdev->device,
3609 "The provisioned features 0x%llx are not supported by this device with features 0x%llx\n",
3610 add_config->device_features, device_features);
3613 device_features &= add_config->device_features;
3615 device_features &= ~BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
3617 if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) &&
3618 device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) {
3619 dev_warn(mdev->device,
3620 "Must provision minimum features 0x%llx for this device",
3621 BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM));
3625 if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
3626 MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
3627 dev_warn(mdev->device, "missing support for split virtqueues\n");
3631 max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues),
3632 1 << MLX5_CAP_GEN(mdev, log_max_rqt_size));
3634 dev_warn(mdev->device,
3635 "%d virtqueues are supported. At least 2 are required\n",
3640 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
3641 if (add_config->net.max_vq_pairs > max_vqs / 2)
3643 max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs);
3648 ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mgtdev->vdpa_ops,
3649 MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
3651 return PTR_ERR(ndev);
3653 ndev->mvdev.max_vqs = max_vqs;
3654 mvdev = &ndev->mvdev;
3657 ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
3658 ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL);
3659 if (!ndev->vqs || !ndev->event_cbs) {
3665 allocate_irqs(ndev);
3666 init_rwsem(&ndev->reslock);
3667 config = &ndev->config;
3669 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) {
3670 err = config_func_mtu(mdev, add_config->net.mtu);
3675 if (device_features & BIT_ULL(VIRTIO_NET_F_MTU)) {
3676 err = query_mtu(mdev, &mtu);
3680 ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu);
3683 if (device_features & BIT_ULL(VIRTIO_NET_F_STATUS)) {
3684 if (get_link_state(mvdev))
3685 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
3687 ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
3690 if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
3691 memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN);
3692 /* No bother setting mac address in config if not going to provision _F_MAC */
3693 } else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0 ||
3694 device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3695 err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
3700 if (!is_zero_ether_addr(config->mac)) {
3701 pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
3702 err = mlx5_mpfs_add_mac(pfmdev, config->mac);
3705 } else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0) {
3707 * We used to clear _F_MAC feature bit if seeing
3708 * zero mac address when device features are not
3709 * specifically provisioned. Keep the behaviour
3710 * so old scripts do not break.
3712 device_features &= ~BIT_ULL(VIRTIO_NET_F_MAC);
3713 } else if (device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3714 /* Don't provision zero mac address for _F_MAC */
3715 mlx5_vdpa_warn(&ndev->mvdev,
3716 "No mac address provisioned?\n");
3721 if (device_features & BIT_ULL(VIRTIO_NET_F_MQ))
3722 config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2);
3724 ndev->mvdev.mlx_features = device_features;
3725 mvdev->vdev.dma_dev = &mdev->pdev->dev;
3726 err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
3730 INIT_LIST_HEAD(&mvdev->mr_list_head);
3732 if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
3733 err = mlx5_vdpa_create_dma_mr(mvdev);
3738 err = alloc_resources(ndev);
3742 ndev->cvq_ent.mvdev = mvdev;
3743 INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler);
3744 mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
3750 mvdev->vdev.mdev = &mgtdev->mgtdev;
3751 err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
3755 mgtdev->ndev = ndev;
3759 destroy_workqueue(mvdev->wq);
3761 free_resources(ndev);
3763 mlx5_vdpa_destroy_mr_resources(mvdev);
3765 mlx5_vdpa_free_resources(&ndev->mvdev);
3767 if (!is_zero_ether_addr(config->mac))
3768 mlx5_mpfs_del_mac(pfmdev, config->mac);
3770 put_device(&mvdev->vdev.dev);
3774 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
3776 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3777 struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
3778 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3779 struct workqueue_struct *wq;
3781 unregister_link_notifier(ndev);
3782 _vdpa_unregister_device(dev);
3785 destroy_workqueue(wq);
3786 mgtdev->ndev = NULL;
3789 static const struct vdpa_mgmtdev_ops mdev_ops = {
3790 .dev_add = mlx5_vdpa_dev_add,
3791 .dev_del = mlx5_vdpa_dev_del,
3794 static struct virtio_device_id id_table[] = {
3795 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
3799 static int mlx5v_probe(struct auxiliary_device *adev,
3800 const struct auxiliary_device_id *id)
3803 struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
3804 struct mlx5_core_dev *mdev = madev->mdev;
3805 struct mlx5_vdpa_mgmtdev *mgtdev;
3808 mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
3812 mgtdev->mgtdev.ops = &mdev_ops;
3813 mgtdev->mgtdev.device = mdev->device;
3814 mgtdev->mgtdev.id_table = id_table;
3815 mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
3816 BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) |
3817 BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) |
3818 BIT_ULL(VDPA_ATTR_DEV_FEATURES);
3819 mgtdev->mgtdev.max_supported_vqs =
3820 MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1;
3821 mgtdev->mgtdev.supported_features = get_supported_features(mdev);
3822 mgtdev->madev = madev;
3823 mgtdev->vdpa_ops = mlx5_vdpa_ops;
3825 if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, desc_group_mkey_supported))
3826 mgtdev->vdpa_ops.get_vq_desc_group = NULL;
3828 if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, freeze_to_rdy_supported))
3829 mgtdev->vdpa_ops.resume = NULL;
3831 err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
3835 auxiliary_set_drvdata(adev, mgtdev);
3844 static void mlx5v_remove(struct auxiliary_device *adev)
3846 struct mlx5_vdpa_mgmtdev *mgtdev;
3848 mgtdev = auxiliary_get_drvdata(adev);
3849 vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
3853 static const struct auxiliary_device_id mlx5v_id_table[] = {
3854 { .name = MLX5_ADEV_NAME ".vnet", },
3858 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
3860 static struct auxiliary_driver mlx5v_driver = {
3862 .probe = mlx5v_probe,
3863 .remove = mlx5v_remove,
3864 .id_table = mlx5v_id_table,
3867 module_auxiliary_driver(mlx5v_driver);