1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
4 #include <linux/module.h>
5 #include <linux/vdpa.h>
6 #include <linux/vringh.h>
7 #include <uapi/linux/virtio_net.h>
8 #include <uapi/linux/virtio_ids.h>
9 #include <uapi/linux/vdpa.h>
10 #include <uapi/linux/vhost_types.h>
11 #include <linux/virtio_config.h>
12 #include <linux/auxiliary_bus.h>
13 #include <linux/mlx5/cq.h>
14 #include <linux/mlx5/qp.h>
15 #include <linux/mlx5/device.h>
16 #include <linux/mlx5/driver.h>
17 #include <linux/mlx5/vport.h>
18 #include <linux/mlx5/fs.h>
19 #include <linux/mlx5/mlx5_ifc_vdpa.h>
20 #include <linux/mlx5/mpfs.h>
21 #include "mlx5_vdpa.h"
22 #include "mlx5_vnet.h"
24 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
25 MODULE_DESCRIPTION("Mellanox VDPA driver");
26 MODULE_LICENSE("Dual BSD/GPL");
28 #define VALID_FEATURES_MASK \
29 (BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | \
30 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) | \
31 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) | \
32 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
33 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) | \
34 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | \
35 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) | \
36 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) | \
37 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) | \
38 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) | \
39 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) | \
40 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) | \
41 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
43 #define VALID_STATUS_MASK \
44 (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK | \
45 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
47 #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature)))
49 #define MLX5V_UNTAGGED 0x1000
51 struct mlx5_vdpa_cq_buf {
52 struct mlx5_frag_buf_ctrl fbc;
53 struct mlx5_frag_buf frag_buf;
59 struct mlx5_core_cq mcq;
60 struct mlx5_vdpa_cq_buf buf;
65 struct mlx5_vdpa_umem {
66 struct mlx5_frag_buf_ctrl fbc;
67 struct mlx5_frag_buf frag_buf;
73 struct mlx5_core_qp mqp;
74 struct mlx5_frag_buf frag_buf;
80 struct mlx5_vq_restore_info {
92 struct mlx5_vdpa_virtqueue {
99 /* Resources for implementing the notification channel from the device
100 * to the driver. fwqp is the firmware end of an RC connection; the
101 * other end is vqqp used by the driver. cq is where completions are
104 struct mlx5_vdpa_cq cq;
105 struct mlx5_vdpa_qp fwqp;
106 struct mlx5_vdpa_qp vqqp;
108 /* umem resources are required for the virtqueue operation. They're use
109 * is internal and they must be provided by the driver.
111 struct mlx5_vdpa_umem umem1;
112 struct mlx5_vdpa_umem umem2;
113 struct mlx5_vdpa_umem umem3;
119 struct mlx5_vdpa_net *ndev;
126 struct mlx5_vdpa_mr *vq_mr;
127 struct mlx5_vdpa_mr *desc_mr;
131 /* keep last in the struct */
132 struct mlx5_vq_restore_info ri;
135 static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
137 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) {
138 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
144 return idx <= mvdev->max_idx;
147 static void free_resources(struct mlx5_vdpa_net *ndev);
148 static void init_mvqs(struct mlx5_vdpa_net *ndev);
149 static int setup_driver(struct mlx5_vdpa_dev *mvdev);
150 static void teardown_driver(struct mlx5_vdpa_net *ndev);
152 static bool mlx5_vdpa_debug;
154 #define MLX5_CVQ_MAX_ENT 16
156 #define MLX5_LOG_VIO_FLAG(_feature) \
158 if (features & BIT_ULL(_feature)) \
159 mlx5_vdpa_info(mvdev, "%s\n", #_feature); \
162 #define MLX5_LOG_VIO_STAT(_status) \
164 if (status & (_status)) \
165 mlx5_vdpa_info(mvdev, "%s\n", #_status); \
168 /* TODO: cross-endian support */
169 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
171 return virtio_legacy_is_little_endian() ||
172 (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
175 static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
177 return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
180 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
182 return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
185 static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
187 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
190 return mvdev->max_vqs;
193 static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
195 return idx == ctrl_vq_idx(mvdev);
198 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
200 if (status & ~VALID_STATUS_MASK)
201 mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
202 status & ~VALID_STATUS_MASK);
204 if (!mlx5_vdpa_debug)
207 mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
208 if (set && !status) {
209 mlx5_vdpa_info(mvdev, "driver resets the device\n");
213 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
214 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
215 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
216 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
217 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
218 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
221 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
223 if (features & ~VALID_FEATURES_MASK)
224 mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
225 features & ~VALID_FEATURES_MASK);
227 if (!mlx5_vdpa_debug)
230 mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
232 mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
234 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
235 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
236 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
237 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
238 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
239 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
240 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
241 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
242 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
243 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
244 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
245 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
246 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
247 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
248 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
249 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
250 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
251 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
252 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
253 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
254 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
255 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
256 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
257 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
258 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
259 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
260 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
261 MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
262 MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
263 MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
264 MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
265 MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
266 MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
267 MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
270 static int create_tis(struct mlx5_vdpa_net *ndev)
272 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
273 u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
277 tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
278 MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
279 err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
281 mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
286 static void destroy_tis(struct mlx5_vdpa_net *ndev)
288 mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
291 #define MLX5_VDPA_CQE_SIZE 64
292 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
294 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
296 struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
297 u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
298 u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
301 err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
302 ndev->mvdev.mdev->priv.numa_node);
306 mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
308 buf->cqe_size = MLX5_VDPA_CQE_SIZE;
314 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
316 struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
318 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
319 ndev->mvdev.mdev->priv.numa_node);
322 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
324 mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
327 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
329 return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
332 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
334 struct mlx5_cqe64 *cqe64;
338 for (i = 0; i < buf->nent; i++) {
339 cqe = get_cqe(vcq, i);
341 cqe64->op_own = MLX5_CQE_INVALID << 4;
345 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
347 struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
349 if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
350 !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
356 static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
359 vqp->db.db[0] = cpu_to_be32(vqp->head);
362 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
363 struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
365 struct mlx5_vdpa_qp *vqp;
369 vqp = fw ? &mvq->fwqp : &mvq->vqqp;
370 MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
371 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
373 /* Firmware QP is allocated by the driver for the firmware's
374 * use so we can skip part of the params as they will be chosen by firmware
376 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
377 MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
378 MLX5_SET(qpc, qpc, no_sq, 1);
382 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
383 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
384 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
385 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
386 MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
387 MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
388 MLX5_SET(qpc, qpc, no_sq, 1);
389 MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
390 MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
391 MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
392 pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
393 mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
396 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
398 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
399 num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
400 ndev->mvdev.mdev->priv.numa_node);
403 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
405 mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
408 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
409 struct mlx5_vdpa_qp *vqp)
411 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
412 int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
413 u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
420 err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
424 err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
427 inlen += vqp->frag_buf.npages * sizeof(__be64);
430 in = kzalloc(inlen, GFP_KERNEL);
436 qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
437 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
438 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
439 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
440 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
441 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
443 MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
444 MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
445 err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
450 vqp->mqp.uid = ndev->mvdev.res.uid;
451 vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
454 rx_post(vqp, mvq->num_ent);
460 mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
463 rq_buf_free(ndev, vqp);
468 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
470 u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
472 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
473 MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
474 MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
475 if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
476 mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
478 mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
479 rq_buf_free(ndev, vqp);
483 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
485 return get_sw_cqe(cq, cq->mcq.cons_index);
488 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
490 struct mlx5_cqe64 *cqe64;
492 cqe64 = next_cqe_sw(vcq);
496 vcq->mcq.cons_index++;
500 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
502 struct mlx5_vdpa_net *ndev = mvq->ndev;
503 struct vdpa_callback *event_cb;
505 event_cb = &ndev->event_cbs[mvq->index];
506 mlx5_cq_set_ci(&mvq->cq.mcq);
508 /* make sure CQ cosumer update is visible to the hardware before updating
509 * RX doorbell record.
512 rx_post(&mvq->vqqp, num);
513 if (event_cb->callback)
514 event_cb->callback(event_cb->private);
517 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
519 struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
520 struct mlx5_vdpa_net *ndev = mvq->ndev;
521 void __iomem *uar_page = ndev->mvdev.res.uar->map;
524 while (!mlx5_vdpa_poll_one(&mvq->cq)) {
526 if (num > mvq->num_ent / 2) {
527 /* If completions keep coming while we poll, we want to
528 * let the hardware know that we consumed them by
529 * updating the doorbell record. We also let vdpa core
530 * know about this so it passes it on the virtio driver
533 mlx5_vdpa_handle_completions(mvq, num);
539 mlx5_vdpa_handle_completions(mvq, num);
541 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
544 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
546 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
547 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
548 void __iomem *uar_page = ndev->mvdev.res.uar->map;
549 u32 out[MLX5_ST_SZ_DW(create_cq_out)];
550 struct mlx5_vdpa_cq *vcq = &mvq->cq;
558 err = mlx5_db_alloc(mdev, &vcq->db);
562 vcq->mcq.set_ci_db = vcq->db.db;
563 vcq->mcq.arm_db = vcq->db.db + 1;
564 vcq->mcq.cqe_sz = 64;
566 err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
570 cq_frag_buf_init(vcq, &vcq->buf);
572 inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
573 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
574 in = kzalloc(inlen, GFP_KERNEL);
580 MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
581 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
582 mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
584 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
585 MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
587 /* Use vector 0 by default. Consider adding code to choose least used
590 err = mlx5_comp_eqn_get(mdev, 0, &eqn);
594 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
595 MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
596 MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
597 MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
598 MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
600 err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
604 vcq->mcq.comp = mlx5_vdpa_cq_comp;
606 vcq->mcq.set_ci_db = vcq->db.db;
607 vcq->mcq.arm_db = vcq->db.db + 1;
608 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
615 cq_frag_buf_free(ndev, &vcq->buf);
617 mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
621 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
623 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
624 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
625 struct mlx5_vdpa_cq *vcq = &mvq->cq;
627 if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
628 mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
631 cq_frag_buf_free(ndev, &vcq->buf);
632 mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
635 static int read_umem_params(struct mlx5_vdpa_net *ndev)
637 u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
638 u16 opmod = (MLX5_CAP_VDPA_EMULATION << 1) | (HCA_CAP_OPMOD_GET_CUR & 0x01);
639 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
645 out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
646 out = kzalloc(out_size, GFP_KERNEL);
650 MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
651 MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
652 err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
654 mlx5_vdpa_warn(&ndev->mvdev,
655 "Failed reading vdpa umem capabilities with err %d\n", err);
659 caps = MLX5_ADDR_OF(query_hca_cap_out, out, capability);
661 ndev->umem_1_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_a);
662 ndev->umem_1_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_b);
664 ndev->umem_2_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_a);
665 ndev->umem_2_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_b);
667 ndev->umem_3_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_a);
668 ndev->umem_3_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_b);
675 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
676 struct mlx5_vdpa_umem **umemp)
683 p_a = ndev->umem_1_buffer_param_a;
684 p_b = ndev->umem_1_buffer_param_b;
685 *umemp = &mvq->umem1;
688 p_a = ndev->umem_2_buffer_param_a;
689 p_b = ndev->umem_2_buffer_param_b;
690 *umemp = &mvq->umem2;
693 p_a = ndev->umem_3_buffer_param_a;
694 p_b = ndev->umem_3_buffer_param_b;
695 *umemp = &mvq->umem3;
699 (*umemp)->size = p_a * mvq->num_ent + p_b;
702 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
704 mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
707 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
710 u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
715 struct mlx5_vdpa_umem *umem;
717 set_umem_size(ndev, mvq, num, &umem);
718 err = umem_frag_buf_alloc(ndev, umem, umem->size);
722 inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
724 in = kzalloc(inlen, GFP_KERNEL);
730 MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
731 MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
732 um = MLX5_ADDR_OF(create_umem_in, in, umem);
733 MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
734 MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
736 pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
737 mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
739 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
741 mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
746 umem->id = MLX5_GET(create_umem_out, out, umem_id);
753 umem_frag_buf_free(ndev, umem);
757 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
759 u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
760 u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
761 struct mlx5_vdpa_umem *umem;
775 MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
776 MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
777 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
780 umem_frag_buf_free(ndev, umem);
783 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
788 for (num = 1; num <= 3; num++) {
789 err = create_umem(ndev, mvq, num);
796 for (num--; num > 0; num--)
797 umem_destroy(ndev, mvq, num);
802 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
806 for (num = 3; num > 0; num--)
807 umem_destroy(ndev, mvq, num);
810 static int get_queue_type(struct mlx5_vdpa_net *ndev)
814 type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
816 /* prefer split queue */
817 if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)
818 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
820 WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED));
822 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
825 static bool vq_is_tx(u16 idx)
831 MLX5_VIRTIO_NET_F_MRG_RXBUF = 2,
832 MLX5_VIRTIO_NET_F_HOST_ECN = 4,
833 MLX5_VIRTIO_NET_F_GUEST_ECN = 6,
834 MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7,
835 MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8,
836 MLX5_VIRTIO_NET_F_GUEST_CSUM = 9,
837 MLX5_VIRTIO_NET_F_CSUM = 10,
838 MLX5_VIRTIO_NET_F_HOST_TSO6 = 11,
839 MLX5_VIRTIO_NET_F_HOST_TSO4 = 12,
842 static u16 get_features(u64 features)
844 return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << MLX5_VIRTIO_NET_F_MRG_RXBUF) |
845 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << MLX5_VIRTIO_NET_F_HOST_ECN) |
846 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << MLX5_VIRTIO_NET_F_GUEST_ECN) |
847 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << MLX5_VIRTIO_NET_F_GUEST_TSO6) |
848 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << MLX5_VIRTIO_NET_F_GUEST_TSO4) |
849 (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << MLX5_VIRTIO_NET_F_CSUM) |
850 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << MLX5_VIRTIO_NET_F_HOST_TSO6) |
851 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << MLX5_VIRTIO_NET_F_HOST_TSO4);
854 static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
856 return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) &
857 BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
860 static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev)
862 return MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) &
863 (1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) &&
864 pci_msix_can_alloc_dyn(mvdev->mdev->pdev);
867 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
869 int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
870 u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
871 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
872 struct mlx5_vdpa_mr *vq_mr;
873 struct mlx5_vdpa_mr *vq_desc_mr;
881 err = umems_create(ndev, mvq);
885 in = kzalloc(inlen, GFP_KERNEL);
891 mlx_features = get_features(ndev->mvdev.actual_features);
892 cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
894 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
895 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
896 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
898 obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
899 MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
900 MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
901 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
903 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0,
905 vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
906 MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
908 if (vq_is_tx(mvq->index))
909 MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
912 MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE);
913 MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index);
915 MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
916 MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
919 MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
920 MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
921 MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
922 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
923 MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
924 MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
925 MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
926 vq_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]];
928 MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey);
930 vq_desc_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
931 if (vq_desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported))
932 MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, vq_desc_mr->mkey);
934 MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
935 MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
936 MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
937 MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
938 MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
939 MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
940 MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
941 if (counters_supported(&ndev->mvdev))
942 MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id);
944 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
948 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
950 mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
952 mlx5_vdpa_get_mr(mvdev, vq_mr);
955 if (vq_desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) {
956 mlx5_vdpa_get_mr(mvdev, vq_desc_mr);
957 mvq->desc_mr = vq_desc_mr;
965 umems_destroy(ndev, mvq);
969 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
971 u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
972 u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
974 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
975 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
976 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
977 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
978 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
979 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
980 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
981 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
984 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
985 umems_destroy(ndev, mvq);
987 mlx5_vdpa_put_mr(&ndev->mvdev, mvq->vq_mr);
990 mlx5_vdpa_put_mr(&ndev->mvdev, mvq->desc_mr);
994 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
996 return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
999 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
1001 return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
1004 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
1005 int *outlen, u32 qpn, u32 rqpn)
1011 case MLX5_CMD_OP_2RST_QP:
1012 *inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
1013 *outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
1014 *in = kzalloc(*inlen, GFP_KERNEL);
1015 *out = kzalloc(*outlen, GFP_KERNEL);
1019 MLX5_SET(qp_2rst_in, *in, opcode, cmd);
1020 MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
1021 MLX5_SET(qp_2rst_in, *in, qpn, qpn);
1023 case MLX5_CMD_OP_RST2INIT_QP:
1024 *inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
1025 *outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
1026 *in = kzalloc(*inlen, GFP_KERNEL);
1027 *out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
1031 MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
1032 MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
1033 MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
1034 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1035 MLX5_SET(qpc, qpc, remote_qpn, rqpn);
1036 MLX5_SET(qpc, qpc, rwe, 1);
1037 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1038 MLX5_SET(ads, pp, vhca_port_num, 1);
1040 case MLX5_CMD_OP_INIT2RTR_QP:
1041 *inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
1042 *outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
1043 *in = kzalloc(*inlen, GFP_KERNEL);
1044 *out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
1048 MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
1049 MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
1050 MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
1051 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1052 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
1053 MLX5_SET(qpc, qpc, log_msg_max, 30);
1054 MLX5_SET(qpc, qpc, remote_qpn, rqpn);
1055 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1056 MLX5_SET(ads, pp, fl, 1);
1058 case MLX5_CMD_OP_RTR2RTS_QP:
1059 *inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
1060 *outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
1061 *in = kzalloc(*inlen, GFP_KERNEL);
1062 *out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
1066 MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
1067 MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
1068 MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
1069 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1070 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1071 MLX5_SET(ads, pp, ack_timeout, 14);
1072 MLX5_SET(qpc, qpc, retry_count, 7);
1073 MLX5_SET(qpc, qpc, rnr_retry, 7);
1076 goto outerr_nullify;
1089 static void free_inout(void *in, void *out)
1095 /* Two QPs are used by each virtqueue. One is used by the driver and one by
1096 * firmware. The fw argument indicates whether the subjected QP is the one used
1099 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
1107 alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
1111 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
1112 free_inout(in, out);
1116 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1120 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
1124 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
1128 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
1132 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1136 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1140 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1144 return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1147 struct mlx5_virtq_attr {
1149 u16 available_index;
1153 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
1154 struct mlx5_virtq_attr *attr)
1156 int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
1157 u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
1163 out = kzalloc(outlen, GFP_KERNEL);
1167 cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1169 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1170 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1171 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1172 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1173 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
1177 obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
1178 memset(attr, 0, sizeof(*attr));
1179 attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1180 attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1181 attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
1190 static bool is_resumable(struct mlx5_vdpa_net *ndev)
1192 return ndev->mvdev.vdev.config->resume;
1195 static bool is_valid_state_change(int oldstate, int newstate, bool resumable)
1198 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
1199 return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
1200 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
1201 return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
1202 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
1203 return resumable ? newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY : false;
1204 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
1210 static bool modifiable_virtqueue_fields(struct mlx5_vdpa_virtqueue *mvq)
1212 /* Only state is always modifiable */
1213 if (mvq->modified_fields & ~MLX5_VIRTQ_MODIFY_MASK_STATE)
1214 return mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT ||
1215 mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
1220 static int modify_virtqueue(struct mlx5_vdpa_net *ndev,
1221 struct mlx5_vdpa_virtqueue *mvq,
1224 int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
1225 u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
1226 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
1227 struct mlx5_vdpa_mr *desc_mr = NULL;
1228 struct mlx5_vdpa_mr *vq_mr = NULL;
1229 bool state_change = false;
1236 if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
1239 if (!modifiable_virtqueue_fields(mvq))
1242 in = kzalloc(inlen, GFP_KERNEL);
1246 cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1248 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1249 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1250 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1251 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1253 obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
1254 vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
1256 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_STATE) {
1257 if (!is_valid_state_change(mvq->fw_state, state, is_resumable(ndev))) {
1262 MLX5_SET(virtio_net_q_object, obj_context, state, state);
1263 state_change = true;
1266 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS) {
1267 MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
1268 MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
1269 MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
1272 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX)
1273 MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
1275 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX)
1276 MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
1278 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) {
1279 vq_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]];
1282 MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey);
1284 mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY;
1287 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) {
1288 desc_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
1290 if (desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported))
1291 MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, desc_mr->mkey);
1293 mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY;
1296 MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, mvq->modified_fields);
1297 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
1302 mvq->fw_state = state;
1304 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) {
1305 mlx5_vdpa_put_mr(mvdev, mvq->vq_mr);
1306 mlx5_vdpa_get_mr(mvdev, vq_mr);
1310 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) {
1311 mlx5_vdpa_put_mr(mvdev, mvq->desc_mr);
1312 mlx5_vdpa_get_mr(mvdev, desc_mr);
1313 mvq->desc_mr = desc_mr;
1316 mvq->modified_fields = 0;
1323 static int modify_virtqueue_state(struct mlx5_vdpa_net *ndev,
1324 struct mlx5_vdpa_virtqueue *mvq,
1327 mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_STATE;
1328 return modify_virtqueue(ndev, mvq, state);
1331 static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1333 u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {};
1334 u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {};
1338 if (!counters_supported(&ndev->mvdev))
1341 cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr);
1343 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
1344 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1345 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1347 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
1351 mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
1356 static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1358 u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {};
1359 u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {};
1361 if (!counters_supported(&ndev->mvdev))
1364 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
1365 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id);
1366 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid);
1367 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1368 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
1369 mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id);
1372 static irqreturn_t mlx5_vdpa_int_handler(int irq, void *priv)
1374 struct vdpa_callback *cb = priv;
1377 return cb->callback(cb->private);
1382 static void alloc_vector(struct mlx5_vdpa_net *ndev,
1383 struct mlx5_vdpa_virtqueue *mvq)
1385 struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1386 struct mlx5_vdpa_irq_pool_entry *ent;
1390 for (i = 0; i < irqp->num_ent; i++) {
1391 ent = &irqp->entries[i];
1393 snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
1394 dev_name(&ndev->mvdev.vdev.dev), mvq->index);
1395 ent->dev_id = &ndev->event_cbs[mvq->index];
1396 err = request_irq(ent->map.virq, mlx5_vdpa_int_handler, 0,
1397 ent->name, ent->dev_id);
1402 mvq->map = ent->map;
1408 static void dealloc_vector(struct mlx5_vdpa_net *ndev,
1409 struct mlx5_vdpa_virtqueue *mvq)
1411 struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1414 for (i = 0; i < irqp->num_ent; i++)
1415 if (mvq->map.virq == irqp->entries[i].map.virq) {
1416 free_irq(mvq->map.virq, irqp->entries[i].dev_id);
1417 irqp->entries[i].used = false;
1422 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1424 u16 idx = mvq->index;
1430 if (mvq->initialized)
1433 err = cq_create(ndev, idx, mvq->num_ent);
1437 err = qp_create(ndev, mvq, &mvq->fwqp);
1441 err = qp_create(ndev, mvq, &mvq->vqqp);
1445 err = connect_qps(ndev, mvq);
1449 err = counter_set_alloc(ndev, mvq);
1453 alloc_vector(ndev, mvq);
1454 err = create_virtqueue(ndev, mvq);
1459 err = modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1461 mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
1467 mvq->initialized = true;
1471 destroy_virtqueue(ndev, mvq);
1473 dealloc_vector(ndev, mvq);
1474 counter_set_dealloc(ndev, mvq);
1476 qp_destroy(ndev, &mvq->vqqp);
1478 qp_destroy(ndev, &mvq->fwqp);
1480 cq_destroy(ndev, idx);
1484 static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1486 struct mlx5_virtq_attr attr;
1488 if (!mvq->initialized)
1491 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1494 if (modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
1495 mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
1497 if (query_virtqueue(ndev, mvq, &attr)) {
1498 mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1501 mvq->avail_idx = attr.available_index;
1502 mvq->used_idx = attr.used_index;
1505 static void suspend_vqs(struct mlx5_vdpa_net *ndev)
1509 for (i = 0; i < ndev->mvdev.max_vqs; i++)
1510 suspend_vq(ndev, &ndev->vqs[i]);
1513 static void resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1515 if (!mvq->initialized || !is_resumable(ndev))
1518 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND)
1521 if (modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY))
1522 mlx5_vdpa_warn(&ndev->mvdev, "modify to resume failed for vq %u\n", mvq->index);
1525 static void resume_vqs(struct mlx5_vdpa_net *ndev)
1527 for (int i = 0; i < ndev->mvdev.max_vqs; i++)
1528 resume_vq(ndev, &ndev->vqs[i]);
1531 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1533 if (!mvq->initialized)
1536 suspend_vq(ndev, mvq);
1537 mvq->modified_fields = 0;
1538 destroy_virtqueue(ndev, mvq);
1539 dealloc_vector(ndev, mvq);
1540 counter_set_dealloc(ndev, mvq);
1541 qp_destroy(ndev, &mvq->vqqp);
1542 qp_destroy(ndev, &mvq->fwqp);
1543 cq_destroy(ndev, mvq->index);
1544 mvq->initialized = false;
1547 static int create_rqt(struct mlx5_vdpa_net *ndev)
1549 int rqt_table_size = roundup_pow_of_two(ndev->rqt_size);
1550 int act_sz = roundup_pow_of_two(ndev->cur_num_vqs / 2);
1558 inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + rqt_table_size * MLX5_ST_SZ_BYTES(rq_num);
1559 in = kzalloc(inlen, GFP_KERNEL);
1563 MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1564 rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1566 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1567 MLX5_SET(rqtc, rqtc, rqt_max_size, rqt_table_size);
1568 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1569 for (i = 0, j = 0; i < act_sz; i++, j += 2)
1570 list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id);
1572 MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1573 err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1581 #define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
1583 static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
1585 int act_sz = roundup_pow_of_two(num / 2);
1593 inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + act_sz * MLX5_ST_SZ_BYTES(rq_num);
1594 in = kzalloc(inlen, GFP_KERNEL);
1598 MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
1599 MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
1600 rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
1601 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1603 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1604 for (i = 0, j = 0; i < act_sz; i++, j = j + 2)
1605 list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id);
1607 MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1608 err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
1616 static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1618 mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1621 static int create_tir(struct mlx5_vdpa_net *ndev)
1623 #define HASH_IP_L4PORTS \
1624 (MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT | \
1625 MLX5_HASH_FIELD_SEL_L4_DPORT)
1626 static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1627 0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1628 0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1629 0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1630 0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1637 in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1641 MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1642 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1643 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1645 MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1646 MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1647 rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1648 memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1650 outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1651 MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1652 MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1653 MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1655 MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1656 MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1658 err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1663 mlx5_vdpa_add_tirn(ndev);
1667 static void destroy_tir(struct mlx5_vdpa_net *ndev)
1669 mlx5_vdpa_remove_tirn(ndev);
1670 mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1673 #define MAX_STEERING_ENT 0x8000
1674 #define MAX_STEERING_GROUPS 2
1676 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1682 static int add_steering_counters(struct mlx5_vdpa_net *ndev,
1683 struct macvlan_node *node,
1684 struct mlx5_flow_act *flow_act,
1685 struct mlx5_flow_destination *dests)
1687 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1690 node->ucast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1691 if (IS_ERR(node->ucast_counter.counter))
1692 return PTR_ERR(node->ucast_counter.counter);
1694 node->mcast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1695 if (IS_ERR(node->mcast_counter.counter)) {
1696 err = PTR_ERR(node->mcast_counter.counter);
1697 goto err_mcast_counter;
1700 dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1701 flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
1705 mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1712 static void remove_steering_counters(struct mlx5_vdpa_net *ndev,
1713 struct macvlan_node *node)
1715 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1716 mlx5_fc_destroy(ndev->mvdev.mdev, node->mcast_counter.counter);
1717 mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1721 static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
1722 struct macvlan_node *node)
1724 struct mlx5_flow_destination dests[NUM_DESTS] = {};
1725 struct mlx5_flow_act flow_act = {};
1726 struct mlx5_flow_spec *spec;
1734 spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1738 vid = key2vid(node->macvlan);
1739 spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
1740 headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
1741 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
1742 dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
1743 dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
1744 eth_broadcast_addr(dmac_c);
1745 ether_addr_copy(dmac_v, mac);
1746 if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) {
1747 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
1748 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid);
1751 MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
1752 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid);
1754 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1755 dests[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1756 dests[0].tir_num = ndev->res.tirn;
1757 err = add_steering_counters(ndev, node, &flow_act, dests);
1761 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1762 dests[1].counter_id = mlx5_fc_id(node->ucast_counter.counter);
1764 node->ucast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1765 if (IS_ERR(node->ucast_rule)) {
1766 err = PTR_ERR(node->ucast_rule);
1770 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1771 dests[1].counter_id = mlx5_fc_id(node->mcast_counter.counter);
1774 memset(dmac_c, 0, ETH_ALEN);
1775 memset(dmac_v, 0, ETH_ALEN);
1778 node->mcast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1779 if (IS_ERR(node->mcast_rule)) {
1780 err = PTR_ERR(node->mcast_rule);
1784 mlx5_vdpa_add_rx_counters(ndev, node);
1788 mlx5_del_flow_rules(node->ucast_rule);
1790 remove_steering_counters(ndev, node);
1796 static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev,
1797 struct macvlan_node *node)
1799 mlx5_vdpa_remove_rx_counters(ndev, node);
1800 mlx5_del_flow_rules(node->ucast_rule);
1801 mlx5_del_flow_rules(node->mcast_rule);
1804 static u64 search_val(u8 *mac, u16 vlan, bool tagged)
1809 vlan = MLX5V_UNTAGGED;
1811 val = (u64)vlan << 48 |
1822 static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value)
1824 struct macvlan_node *pos;
1827 idx = hash_64(value, 8); // tbd 8
1828 hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) {
1829 if (pos->macvlan == value)
1835 static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vid, bool tagged)
1837 struct macvlan_node *ptr;
1842 val = search_val(mac, vid, tagged);
1843 if (mac_vlan_lookup(ndev, val))
1846 ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
1850 ptr->tagged = tagged;
1853 err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, ptr);
1857 idx = hash_64(val, 8);
1858 hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]);
1866 static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged)
1868 struct macvlan_node *ptr;
1870 ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged));
1874 hlist_del(&ptr->hlist);
1875 mlx5_vdpa_del_mac_vlan_rules(ndev, ptr);
1876 remove_steering_counters(ndev, ptr);
1880 static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev)
1882 struct macvlan_node *pos;
1883 struct hlist_node *n;
1886 for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) {
1887 hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) {
1888 hlist_del(&pos->hlist);
1889 mlx5_vdpa_del_mac_vlan_rules(ndev, pos);
1890 remove_steering_counters(ndev, pos);
1896 static int setup_steering(struct mlx5_vdpa_net *ndev)
1898 struct mlx5_flow_table_attr ft_attr = {};
1899 struct mlx5_flow_namespace *ns;
1902 ft_attr.max_fte = MAX_STEERING_ENT;
1903 ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS;
1905 ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
1907 mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n");
1911 ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
1912 if (IS_ERR(ndev->rxft)) {
1913 mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n");
1914 return PTR_ERR(ndev->rxft);
1916 mlx5_vdpa_add_rx_flow_table(ndev);
1918 err = mac_vlan_add(ndev, ndev->config.mac, 0, false);
1925 mlx5_vdpa_remove_rx_flow_table(ndev);
1926 mlx5_destroy_flow_table(ndev->rxft);
1930 static void teardown_steering(struct mlx5_vdpa_net *ndev)
1932 clear_mac_vlan_table(ndev);
1933 mlx5_vdpa_remove_rx_flow_table(ndev);
1934 mlx5_destroy_flow_table(ndev->rxft);
1937 static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1939 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1940 struct mlx5_control_vq *cvq = &mvdev->cvq;
1941 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1942 struct mlx5_core_dev *pfmdev;
1944 u8 mac[ETH_ALEN], mac_back[ETH_ALEN];
1946 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
1948 case VIRTIO_NET_CTRL_MAC_ADDR_SET:
1949 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
1950 if (read != ETH_ALEN)
1953 if (!memcmp(ndev->config.mac, mac, 6)) {
1954 status = VIRTIO_NET_OK;
1958 if (is_zero_ether_addr(mac))
1961 if (!is_zero_ether_addr(ndev->config.mac)) {
1962 if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1963 mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
1969 if (mlx5_mpfs_add_mac(pfmdev, mac)) {
1970 mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
1975 /* backup the original mac address so that if failed to add the forward rules
1976 * we could restore it
1978 memcpy(mac_back, ndev->config.mac, ETH_ALEN);
1980 memcpy(ndev->config.mac, mac, ETH_ALEN);
1982 /* Need recreate the flow table entry, so that the packet could forward back
1984 mac_vlan_del(ndev, mac_back, 0, false);
1986 if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) {
1987 mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n");
1989 /* Although it hardly run here, we still need double check */
1990 if (is_zero_ether_addr(mac_back)) {
1991 mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n");
1995 /* Try to restore original mac address to MFPS table, and try to restore
1996 * the forward rule entry.
1998 if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1999 mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n",
2003 if (mlx5_mpfs_add_mac(pfmdev, mac_back)) {
2004 mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n",
2008 memcpy(ndev->config.mac, mac_back, ETH_ALEN);
2010 if (mac_vlan_add(ndev, ndev->config.mac, 0, false))
2011 mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n");
2016 status = VIRTIO_NET_OK;
2026 static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
2028 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2029 int cur_qps = ndev->cur_num_vqs / 2;
2033 if (cur_qps > newqps) {
2034 err = modify_rqt(ndev, 2 * newqps);
2038 for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--)
2039 teardown_vq(ndev, &ndev->vqs[i]);
2041 ndev->cur_num_vqs = 2 * newqps;
2043 ndev->cur_num_vqs = 2 * newqps;
2044 for (i = cur_qps * 2; i < 2 * newqps; i++) {
2045 err = setup_vq(ndev, &ndev->vqs[i]);
2049 err = modify_rqt(ndev, 2 * newqps);
2056 for (--i; i >= 2 * cur_qps; --i)
2057 teardown_vq(ndev, &ndev->vqs[i]);
2059 ndev->cur_num_vqs = 2 * cur_qps;
2064 static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
2066 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2067 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2068 struct mlx5_control_vq *cvq = &mvdev->cvq;
2069 struct virtio_net_ctrl_mq mq;
2074 case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
2075 /* This mq feature check aligns with pre-existing userspace
2078 * Without it, an untrusted driver could fake a multiqueue config
2079 * request down to a non-mq device that may cause kernel to
2080 * panic due to uninitialized resources for extra vqs. Even with
2081 * a well behaving guest driver, it is not expected to allow
2082 * changing the number of vqs on a non-mq device.
2084 if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ))
2087 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
2088 if (read != sizeof(mq))
2091 newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
2092 if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
2093 newqps > ndev->rqt_size)
2096 if (ndev->cur_num_vqs == 2 * newqps) {
2097 status = VIRTIO_NET_OK;
2101 if (!change_num_qps(mvdev, newqps))
2102 status = VIRTIO_NET_OK;
2112 static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd)
2114 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2115 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2116 struct mlx5_control_vq *cvq = &mvdev->cvq;
2121 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)))
2125 case VIRTIO_NET_CTRL_VLAN_ADD:
2126 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
2127 if (read != sizeof(vlan))
2130 id = mlx5vdpa16_to_cpu(mvdev, vlan);
2131 if (mac_vlan_add(ndev, ndev->config.mac, id, true))
2134 status = VIRTIO_NET_OK;
2136 case VIRTIO_NET_CTRL_VLAN_DEL:
2137 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
2138 if (read != sizeof(vlan))
2141 id = mlx5vdpa16_to_cpu(mvdev, vlan);
2142 mac_vlan_del(ndev, ndev->config.mac, id, true);
2143 status = VIRTIO_NET_OK;
2152 static void mlx5_cvq_kick_handler(struct work_struct *work)
2154 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2155 struct virtio_net_ctrl_hdr ctrl;
2156 struct mlx5_vdpa_wq_ent *wqent;
2157 struct mlx5_vdpa_dev *mvdev;
2158 struct mlx5_control_vq *cvq;
2159 struct mlx5_vdpa_net *ndev;
2163 wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2164 mvdev = wqent->mvdev;
2165 ndev = to_mlx5_vdpa_ndev(mvdev);
2168 down_write(&ndev->reslock);
2170 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2173 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
2180 err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
2185 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
2186 if (read != sizeof(ctrl))
2189 cvq->received_desc++;
2190 switch (ctrl.class) {
2191 case VIRTIO_NET_CTRL_MAC:
2192 status = handle_ctrl_mac(mvdev, ctrl.cmd);
2194 case VIRTIO_NET_CTRL_MQ:
2195 status = handle_ctrl_mq(mvdev, ctrl.cmd);
2197 case VIRTIO_NET_CTRL_VLAN:
2198 status = handle_ctrl_vlan(mvdev, ctrl.cmd);
2204 /* Make sure data is written before advancing index */
2207 write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
2208 vringh_complete_iotlb(&cvq->vring, cvq->head, write);
2209 vringh_kiov_cleanup(&cvq->riov);
2210 vringh_kiov_cleanup(&cvq->wiov);
2212 if (vringh_need_notify_iotlb(&cvq->vring))
2213 vringh_notify(&cvq->vring);
2215 cvq->completed_desc++;
2216 queue_work(mvdev->wq, &wqent->work);
2221 up_write(&ndev->reslock);
2224 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
2226 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2227 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2228 struct mlx5_vdpa_virtqueue *mvq;
2230 if (!is_index_valid(mvdev, idx))
2233 if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
2234 if (!mvdev->wq || !mvdev->cvq.ready)
2237 queue_work(mvdev->wq, &ndev->cvq_ent.work);
2241 mvq = &ndev->vqs[idx];
2242 if (unlikely(!mvq->ready))
2245 iowrite16(idx, ndev->mvdev.res.kick_addr);
2248 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
2249 u64 driver_area, u64 device_area)
2251 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2252 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2253 struct mlx5_vdpa_virtqueue *mvq;
2255 if (!is_index_valid(mvdev, idx))
2258 if (is_ctrl_vq_idx(mvdev, idx)) {
2259 mvdev->cvq.desc_addr = desc_area;
2260 mvdev->cvq.device_addr = device_area;
2261 mvdev->cvq.driver_addr = driver_area;
2265 mvq = &ndev->vqs[idx];
2266 mvq->desc_addr = desc_area;
2267 mvq->device_addr = device_area;
2268 mvq->driver_addr = driver_area;
2269 mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS;
2273 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
2275 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2276 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2277 struct mlx5_vdpa_virtqueue *mvq;
2279 if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
2282 mvq = &ndev->vqs[idx];
2286 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
2288 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2289 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2291 ndev->event_cbs[idx] = *cb;
2292 if (is_ctrl_vq_idx(mvdev, idx))
2293 mvdev->cvq.event_cb = *cb;
2296 static void mlx5_cvq_notify(struct vringh *vring)
2298 struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
2300 if (!cvq->event_cb.callback)
2303 cvq->event_cb.callback(cvq->event_cb.private);
2306 static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
2308 struct mlx5_control_vq *cvq = &mvdev->cvq;
2314 cvq->vring.notify = mlx5_cvq_notify;
2317 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
2319 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2320 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2321 struct mlx5_vdpa_virtqueue *mvq;
2324 if (!mvdev->actual_features)
2327 if (!is_index_valid(mvdev, idx))
2330 if (is_ctrl_vq_idx(mvdev, idx)) {
2331 set_cvq_ready(mvdev, ready);
2335 mvq = &ndev->vqs[idx];
2337 suspend_vq(ndev, mvq);
2339 err = modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
2341 mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
2350 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
2352 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2353 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2355 if (!is_index_valid(mvdev, idx))
2358 if (is_ctrl_vq_idx(mvdev, idx))
2359 return mvdev->cvq.ready;
2361 return ndev->vqs[idx].ready;
2364 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
2365 const struct vdpa_vq_state *state)
2367 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2368 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2369 struct mlx5_vdpa_virtqueue *mvq;
2371 if (!is_index_valid(mvdev, idx))
2374 if (is_ctrl_vq_idx(mvdev, idx)) {
2375 mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
2379 mvq = &ndev->vqs[idx];
2380 if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
2381 mlx5_vdpa_warn(mvdev, "can't modify available index\n");
2385 mvq->used_idx = state->split.avail_index;
2386 mvq->avail_idx = state->split.avail_index;
2387 mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX |
2388 MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX;
2392 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
2394 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2395 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2396 struct mlx5_vdpa_virtqueue *mvq;
2397 struct mlx5_virtq_attr attr;
2400 if (!is_index_valid(mvdev, idx))
2403 if (is_ctrl_vq_idx(mvdev, idx)) {
2404 state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
2408 mvq = &ndev->vqs[idx];
2409 /* If the virtq object was destroyed, use the value saved at
2410 * the last minute of suspend_vq. This caters for userspace
2411 * that cares about emulating the index after vq is stopped.
2413 if (!mvq->initialized) {
2414 /* Firmware returns a wrong value for the available index.
2415 * Since both values should be identical, we take the value of
2416 * used_idx which is reported correctly.
2418 state->split.avail_index = mvq->used_idx;
2422 err = query_virtqueue(ndev, mvq, &attr);
2424 mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
2427 state->split.avail_index = attr.used_index;
2431 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
2436 static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
2438 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2440 if (is_ctrl_vq_idx(mvdev, idx))
2441 return MLX5_VDPA_CVQ_GROUP;
2443 return MLX5_VDPA_DATAVQ_GROUP;
2446 static u32 mlx5_vdpa_get_vq_desc_group(struct vdpa_device *vdev, u16 idx)
2448 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2450 if (is_ctrl_vq_idx(mvdev, idx))
2451 return MLX5_VDPA_CVQ_GROUP;
2453 return MLX5_VDPA_DATAVQ_DESC_GROUP;
2456 static u64 mlx_to_vritio_features(u16 dev_features)
2460 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_MRG_RXBUF))
2461 result |= BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
2462 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_ECN))
2463 result |= BIT_ULL(VIRTIO_NET_F_HOST_ECN);
2464 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_ECN))
2465 result |= BIT_ULL(VIRTIO_NET_F_GUEST_ECN);
2466 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO6))
2467 result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO6);
2468 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO4))
2469 result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO4);
2470 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_CSUM))
2471 result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
2472 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_CSUM))
2473 result |= BIT_ULL(VIRTIO_NET_F_CSUM);
2474 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO6))
2475 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
2476 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO4))
2477 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
2482 static u64 get_supported_features(struct mlx5_core_dev *mdev)
2484 u64 mlx_vdpa_features = 0;
2487 dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask);
2488 mlx_vdpa_features |= mlx_to_vritio_features(dev_features);
2489 if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0))
2490 mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1);
2491 mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
2492 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
2493 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
2494 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
2495 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
2496 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
2497 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN);
2498 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MAC);
2500 return mlx_vdpa_features;
2503 static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev)
2505 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2506 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2508 print_features(mvdev, ndev->mvdev.mlx_features, false);
2509 return ndev->mvdev.mlx_features;
2512 static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features)
2514 /* Minimum features to expect */
2515 if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
2518 /* Double check features combination sent down by the driver.
2519 * Fail invalid features due to absence of the depended feature.
2521 * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit
2522 * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ".
2523 * By failing the invalid features sent down by untrusted drivers,
2524 * we're assured the assumption made upon is_index_valid() and
2525 * is_ctrl_vq_idx() will not be compromised.
2527 if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) ==
2528 BIT_ULL(VIRTIO_NET_F_MQ))
2534 static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
2536 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2540 for (i = 0; i < mvdev->max_vqs; i++) {
2541 err = setup_vq(ndev, &ndev->vqs[i]);
2549 for (--i; i >= 0; i--)
2550 teardown_vq(ndev, &ndev->vqs[i]);
2555 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
2557 struct mlx5_vdpa_virtqueue *mvq;
2560 for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
2561 mvq = &ndev->vqs[i];
2562 if (!mvq->initialized)
2565 teardown_vq(ndev, mvq);
2569 static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
2571 if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
2572 if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) {
2573 /* MQ supported. CVQ index is right above the last data virtqueue's */
2574 mvdev->max_idx = mvdev->max_vqs;
2576 /* Only CVQ supportted. data virtqueues occupy indices 0 and 1.
2582 /* Two data virtqueues only: one for rx and one for tx */
2587 static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
2589 u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
2590 u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
2593 MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
2594 MLX5_SET(query_vport_state_in, in, op_mod, opmod);
2595 MLX5_SET(query_vport_state_in, in, vport_number, vport);
2597 MLX5_SET(query_vport_state_in, in, other_vport, 1);
2599 err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
2603 return MLX5_GET(query_vport_state_out, out, state);
2606 static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
2608 if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
2615 static void update_carrier(struct work_struct *work)
2617 struct mlx5_vdpa_wq_ent *wqent;
2618 struct mlx5_vdpa_dev *mvdev;
2619 struct mlx5_vdpa_net *ndev;
2621 wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2622 mvdev = wqent->mvdev;
2623 ndev = to_mlx5_vdpa_ndev(mvdev);
2624 if (get_link_state(mvdev))
2625 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
2627 ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
2629 if (ndev->config_cb.callback)
2630 ndev->config_cb.callback(ndev->config_cb.private);
2635 static int queue_link_work(struct mlx5_vdpa_net *ndev)
2637 struct mlx5_vdpa_wq_ent *wqent;
2639 wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
2643 wqent->mvdev = &ndev->mvdev;
2644 INIT_WORK(&wqent->work, update_carrier);
2645 queue_work(ndev->mvdev.wq, &wqent->work);
2649 static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
2651 struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
2652 struct mlx5_eqe *eqe = param;
2653 int ret = NOTIFY_DONE;
2655 if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
2656 switch (eqe->sub_type) {
2657 case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
2658 case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
2659 if (queue_link_work(ndev))
2672 static void register_link_notifier(struct mlx5_vdpa_net *ndev)
2674 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS)))
2677 ndev->nb.notifier_call = event_handler;
2678 mlx5_notifier_register(ndev->mvdev.mdev, &ndev->nb);
2679 ndev->nb_registered = true;
2680 queue_link_work(ndev);
2683 static void unregister_link_notifier(struct mlx5_vdpa_net *ndev)
2685 if (!ndev->nb_registered)
2688 ndev->nb_registered = false;
2689 mlx5_notifier_unregister(ndev->mvdev.mdev, &ndev->nb);
2691 flush_workqueue(ndev->mvdev.wq);
2694 static u64 mlx5_vdpa_get_backend_features(const struct vdpa_device *vdpa)
2696 return BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK);
2699 static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
2701 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2702 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2705 print_features(mvdev, features, true);
2707 err = verify_driver_features(mvdev, features);
2711 ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
2712 if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ))
2713 ndev->rqt_size = mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs);
2717 /* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section
2718 * 5.1.6.5.5 "Device operation in multiqueue mode":
2720 * Multiqueue is disabled by default.
2721 * The driver enables multiqueue by sending a command using class
2722 * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue
2723 * operation, as follows: ...
2725 ndev->cur_num_vqs = 2;
2727 update_cvq_info(mvdev);
2731 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
2733 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2734 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2736 ndev->config_cb = *cb;
2739 #define MLX5_VDPA_MAX_VQ_ENTRIES 256
2740 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
2742 return MLX5_VDPA_MAX_VQ_ENTRIES;
2745 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
2747 return VIRTIO_ID_NET;
2750 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
2752 return PCI_VENDOR_ID_MELLANOX;
2755 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
2757 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2758 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2760 print_status(mvdev, ndev->mvdev.status, false);
2761 return ndev->mvdev.status;
2764 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
2766 struct mlx5_vq_restore_info *ri = &mvq->ri;
2767 struct mlx5_virtq_attr attr = {};
2770 if (mvq->initialized) {
2771 err = query_virtqueue(ndev, mvq, &attr);
2776 ri->avail_index = attr.available_index;
2777 ri->used_index = attr.used_index;
2778 ri->ready = mvq->ready;
2779 ri->num_ent = mvq->num_ent;
2780 ri->desc_addr = mvq->desc_addr;
2781 ri->device_addr = mvq->device_addr;
2782 ri->driver_addr = mvq->driver_addr;
2788 static int save_channels_info(struct mlx5_vdpa_net *ndev)
2792 for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2793 memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
2794 save_channel_info(ndev, &ndev->vqs[i]);
2799 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
2803 for (i = 0; i < ndev->mvdev.max_vqs; i++)
2804 memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2807 static void restore_channels_info(struct mlx5_vdpa_net *ndev)
2809 struct mlx5_vdpa_virtqueue *mvq;
2810 struct mlx5_vq_restore_info *ri;
2813 mlx5_clear_vqs(ndev);
2815 for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2816 mvq = &ndev->vqs[i];
2821 mvq->avail_idx = ri->avail_index;
2822 mvq->used_idx = ri->used_index;
2823 mvq->ready = ri->ready;
2824 mvq->num_ent = ri->num_ent;
2825 mvq->desc_addr = ri->desc_addr;
2826 mvq->device_addr = ri->device_addr;
2827 mvq->driver_addr = ri->driver_addr;
2832 static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
2833 struct mlx5_vdpa_mr *new_mr,
2836 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2837 bool teardown = !is_resumable(ndev);
2842 err = save_channels_info(ndev);
2846 teardown_driver(ndev);
2849 mlx5_vdpa_update_mr(mvdev, new_mr, asid);
2851 for (int i = 0; i < ndev->cur_num_vqs; i++)
2852 ndev->vqs[i].modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY |
2853 MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY;
2855 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended)
2859 restore_channels_info(ndev);
2860 err = setup_driver(mvdev);
2870 /* reslock must be held for this function */
2871 static int setup_driver(struct mlx5_vdpa_dev *mvdev)
2873 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2876 WARN_ON(!rwsem_is_locked(&ndev->reslock));
2879 mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
2883 mlx5_vdpa_add_debugfs(ndev);
2885 err = read_umem_params(ndev);
2889 err = setup_virtqueues(mvdev);
2891 mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
2895 err = create_rqt(ndev);
2897 mlx5_vdpa_warn(mvdev, "create_rqt\n");
2901 err = create_tir(ndev);
2903 mlx5_vdpa_warn(mvdev, "create_tir\n");
2907 err = setup_steering(ndev);
2909 mlx5_vdpa_warn(mvdev, "setup_steering\n");
2921 teardown_virtqueues(ndev);
2923 mlx5_vdpa_remove_debugfs(ndev);
2928 /* reslock must be held for this function */
2929 static void teardown_driver(struct mlx5_vdpa_net *ndev)
2932 WARN_ON(!rwsem_is_locked(&ndev->reslock));
2937 mlx5_vdpa_remove_debugfs(ndev);
2938 teardown_steering(ndev);
2941 teardown_virtqueues(ndev);
2942 ndev->setup = false;
2945 static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
2949 for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2950 ndev->vqs[i].ready = false;
2951 ndev->vqs[i].modified_fields = 0;
2954 ndev->mvdev.cvq.ready = false;
2957 static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
2959 struct mlx5_control_vq *cvq = &mvdev->cvq;
2962 if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
2963 u16 idx = cvq->vring.last_avail_idx;
2965 err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
2966 MLX5_CVQ_MAX_ENT, false,
2967 (struct vring_desc *)(uintptr_t)cvq->desc_addr,
2968 (struct vring_avail *)(uintptr_t)cvq->driver_addr,
2969 (struct vring_used *)(uintptr_t)cvq->device_addr);
2972 cvq->vring.last_avail_idx = cvq->vring.last_used_idx = idx;
2977 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
2979 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2980 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2983 print_status(mvdev, status, true);
2985 down_write(&ndev->reslock);
2987 if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
2988 if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
2989 err = setup_cvq_vring(mvdev);
2991 mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n");
2994 register_link_notifier(ndev);
2995 err = setup_driver(mvdev);
2997 mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
3001 mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
3006 ndev->mvdev.status = status;
3007 up_write(&ndev->reslock);
3011 unregister_link_notifier(ndev);
3013 mlx5_vdpa_destroy_mr_resources(&ndev->mvdev);
3014 ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
3016 up_write(&ndev->reslock);
3019 static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
3023 /* default mapping all groups are mapped to asid 0 */
3024 for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
3025 mvdev->group2asid[i] = 0;
3028 static int mlx5_vdpa_compat_reset(struct vdpa_device *vdev, u32 flags)
3030 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3031 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3033 print_status(mvdev, 0, true);
3034 mlx5_vdpa_info(mvdev, "performing device reset\n");
3036 down_write(&ndev->reslock);
3037 unregister_link_notifier(ndev);
3038 teardown_driver(ndev);
3039 clear_vqs_ready(ndev);
3040 if (flags & VDPA_RESET_F_CLEAN_MAP)
3041 mlx5_vdpa_destroy_mr_resources(&ndev->mvdev);
3042 ndev->mvdev.status = 0;
3043 ndev->mvdev.suspended = false;
3044 ndev->cur_num_vqs = 0;
3045 ndev->mvdev.cvq.received_desc = 0;
3046 ndev->mvdev.cvq.completed_desc = 0;
3047 memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
3048 ndev->mvdev.actual_features = 0;
3049 init_group_to_asid_map(mvdev);
3050 ++mvdev->generation;
3052 if ((flags & VDPA_RESET_F_CLEAN_MAP) &&
3053 MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
3054 if (mlx5_vdpa_create_dma_mr(mvdev))
3055 mlx5_vdpa_warn(mvdev, "create MR failed\n");
3057 up_write(&ndev->reslock);
3062 static int mlx5_vdpa_reset(struct vdpa_device *vdev)
3064 return mlx5_vdpa_compat_reset(vdev, 0);
3067 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
3069 return sizeof(struct virtio_net_config);
3072 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
3075 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3076 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3078 if (offset + len <= sizeof(struct virtio_net_config))
3079 memcpy(buf, (u8 *)&ndev->config + offset, len);
3082 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
3088 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
3090 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3092 return mvdev->generation;
3095 static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
3098 struct mlx5_vdpa_mr *new_mr;
3101 if (asid >= MLX5_VDPA_NUM_AS)
3104 if (vhost_iotlb_itree_first(iotlb, 0, U64_MAX)) {
3105 new_mr = mlx5_vdpa_create_mr(mvdev, iotlb);
3106 if (IS_ERR(new_mr)) {
3107 err = PTR_ERR(new_mr);
3108 mlx5_vdpa_warn(mvdev, "create map failed(%d)\n", err);
3112 /* Empty iotlbs don't have an mr but will clear the previous mr. */
3116 if (!mvdev->mr[asid]) {
3117 mlx5_vdpa_update_mr(mvdev, new_mr, asid);
3119 err = mlx5_vdpa_change_map(mvdev, new_mr, asid);
3121 mlx5_vdpa_warn(mvdev, "change map failed(%d)\n", err);
3126 return mlx5_vdpa_update_cvq_iotlb(mvdev, iotlb, asid);
3129 mlx5_vdpa_put_mr(mvdev, new_mr);
3133 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
3134 struct vhost_iotlb *iotlb)
3136 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3137 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3140 down_write(&ndev->reslock);
3141 err = set_map_data(mvdev, iotlb, asid);
3142 up_write(&ndev->reslock);
3146 static int mlx5_vdpa_reset_map(struct vdpa_device *vdev, unsigned int asid)
3148 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3149 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3152 down_write(&ndev->reslock);
3153 err = mlx5_vdpa_reset_mr(mvdev, asid);
3154 up_write(&ndev->reslock);
3158 static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx)
3160 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3162 if (is_ctrl_vq_idx(mvdev, idx))
3165 return mvdev->vdev.dma_dev;
3168 static void free_irqs(struct mlx5_vdpa_net *ndev)
3170 struct mlx5_vdpa_irq_pool_entry *ent;
3173 if (!msix_mode_supported(&ndev->mvdev))
3176 if (!ndev->irqp.entries)
3179 for (i = ndev->irqp.num_ent - 1; i >= 0; i--) {
3180 ent = ndev->irqp.entries + i;
3182 pci_msix_free_irq(ndev->mvdev.mdev->pdev, ent->map);
3184 kfree(ndev->irqp.entries);
3187 static void mlx5_vdpa_free(struct vdpa_device *vdev)
3189 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3190 struct mlx5_core_dev *pfmdev;
3191 struct mlx5_vdpa_net *ndev;
3193 ndev = to_mlx5_vdpa_ndev(mvdev);
3195 free_resources(ndev);
3196 mlx5_vdpa_destroy_mr_resources(mvdev);
3197 if (!is_zero_ether_addr(ndev->config.mac)) {
3198 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
3199 mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
3201 mlx5_vdpa_free_resources(&ndev->mvdev);
3203 kfree(ndev->event_cbs);
3207 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
3209 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3210 struct vdpa_notification_area ret = {};
3211 struct mlx5_vdpa_net *ndev;
3214 if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
3217 /* If SF BAR size is smaller than PAGE_SIZE, do not use direct
3218 * notification to avoid the risk of mapping pages that contain BAR of more
3221 if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT)
3224 ndev = to_mlx5_vdpa_ndev(mvdev);
3225 addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr;
3227 ret.size = PAGE_SIZE;
3231 static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx)
3233 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3234 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3235 struct mlx5_vdpa_virtqueue *mvq;
3237 if (!is_index_valid(mvdev, idx))
3240 if (is_ctrl_vq_idx(mvdev, idx))
3243 mvq = &ndev->vqs[idx];
3247 return mvq->map.virq;
3250 static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
3252 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3254 return mvdev->actual_features;
3257 static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
3258 u64 *received_desc, u64 *completed_desc)
3260 u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {};
3261 u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {};
3266 if (!counters_supported(&ndev->mvdev))
3269 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
3272 cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr);
3274 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
3275 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
3276 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
3277 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id);
3279 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
3283 ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters);
3284 *received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc);
3285 *completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc);
3289 static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
3290 struct sk_buff *msg,
3291 struct netlink_ext_ack *extack)
3293 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3294 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3295 struct mlx5_vdpa_virtqueue *mvq;
3296 struct mlx5_control_vq *cvq;
3301 down_read(&ndev->reslock);
3302 if (!is_index_valid(mvdev, idx)) {
3303 NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid");
3308 if (idx == ctrl_vq_idx(mvdev)) {
3310 received_desc = cvq->received_desc;
3311 completed_desc = cvq->completed_desc;
3315 mvq = &ndev->vqs[idx];
3316 err = counter_set_query(ndev, mvq, &received_desc, &completed_desc);
3318 NL_SET_ERR_MSG_MOD(extack, "failed to query hardware");
3324 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc"))
3327 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc,
3331 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc"))
3334 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc,
3340 up_read(&ndev->reslock);
3344 static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev)
3346 struct mlx5_control_vq *cvq;
3348 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
3355 static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
3357 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3358 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3359 struct mlx5_vdpa_virtqueue *mvq;
3362 mlx5_vdpa_info(mvdev, "suspending device\n");
3364 down_write(&ndev->reslock);
3365 unregister_link_notifier(ndev);
3366 for (i = 0; i < ndev->cur_num_vqs; i++) {
3367 mvq = &ndev->vqs[i];
3368 suspend_vq(ndev, mvq);
3370 mlx5_vdpa_cvq_suspend(mvdev);
3371 mvdev->suspended = true;
3372 up_write(&ndev->reslock);
3376 static int mlx5_vdpa_resume(struct vdpa_device *vdev)
3378 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3379 struct mlx5_vdpa_net *ndev;
3381 ndev = to_mlx5_vdpa_ndev(mvdev);
3383 mlx5_vdpa_info(mvdev, "resuming device\n");
3385 down_write(&ndev->reslock);
3386 mvdev->suspended = false;
3388 register_link_notifier(ndev);
3389 up_write(&ndev->reslock);
3393 static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
3396 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3399 if (group >= MLX5_VDPA_NUMVQ_GROUPS)
3402 mvdev->group2asid[group] = asid;
3404 mutex_lock(&mvdev->mr_mtx);
3405 if (group == MLX5_VDPA_CVQ_GROUP && mvdev->mr[asid])
3406 err = mlx5_vdpa_update_cvq_iotlb(mvdev, mvdev->mr[asid]->iotlb, asid);
3407 mutex_unlock(&mvdev->mr_mtx);
3412 static const struct vdpa_config_ops mlx5_vdpa_ops = {
3413 .set_vq_address = mlx5_vdpa_set_vq_address,
3414 .set_vq_num = mlx5_vdpa_set_vq_num,
3415 .kick_vq = mlx5_vdpa_kick_vq,
3416 .set_vq_cb = mlx5_vdpa_set_vq_cb,
3417 .set_vq_ready = mlx5_vdpa_set_vq_ready,
3418 .get_vq_ready = mlx5_vdpa_get_vq_ready,
3419 .set_vq_state = mlx5_vdpa_set_vq_state,
3420 .get_vq_state = mlx5_vdpa_get_vq_state,
3421 .get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats,
3422 .get_vq_notification = mlx5_get_vq_notification,
3423 .get_vq_irq = mlx5_get_vq_irq,
3424 .get_vq_align = mlx5_vdpa_get_vq_align,
3425 .get_vq_group = mlx5_vdpa_get_vq_group,
3426 .get_vq_desc_group = mlx5_vdpa_get_vq_desc_group, /* Op disabled if not supported. */
3427 .get_device_features = mlx5_vdpa_get_device_features,
3428 .get_backend_features = mlx5_vdpa_get_backend_features,
3429 .set_driver_features = mlx5_vdpa_set_driver_features,
3430 .get_driver_features = mlx5_vdpa_get_driver_features,
3431 .set_config_cb = mlx5_vdpa_set_config_cb,
3432 .get_vq_num_max = mlx5_vdpa_get_vq_num_max,
3433 .get_device_id = mlx5_vdpa_get_device_id,
3434 .get_vendor_id = mlx5_vdpa_get_vendor_id,
3435 .get_status = mlx5_vdpa_get_status,
3436 .set_status = mlx5_vdpa_set_status,
3437 .reset = mlx5_vdpa_reset,
3438 .compat_reset = mlx5_vdpa_compat_reset,
3439 .get_config_size = mlx5_vdpa_get_config_size,
3440 .get_config = mlx5_vdpa_get_config,
3441 .set_config = mlx5_vdpa_set_config,
3442 .get_generation = mlx5_vdpa_get_generation,
3443 .set_map = mlx5_vdpa_set_map,
3444 .reset_map = mlx5_vdpa_reset_map,
3445 .set_group_asid = mlx5_set_group_asid,
3446 .get_vq_dma_dev = mlx5_get_vq_dma_dev,
3447 .free = mlx5_vdpa_free,
3448 .suspend = mlx5_vdpa_suspend,
3449 .resume = mlx5_vdpa_resume, /* Op disabled if not supported. */
3452 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
3457 err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
3461 *mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
3465 static int alloc_resources(struct mlx5_vdpa_net *ndev)
3467 struct mlx5_vdpa_net_resources *res = &ndev->res;
3471 mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
3475 err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
3479 err = create_tis(ndev);
3488 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3492 static void free_resources(struct mlx5_vdpa_net *ndev)
3494 struct mlx5_vdpa_net_resources *res = &ndev->res;
3500 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3504 static void init_mvqs(struct mlx5_vdpa_net *ndev)
3506 struct mlx5_vdpa_virtqueue *mvq;
3509 for (i = 0; i < ndev->mvdev.max_vqs; ++i) {
3510 mvq = &ndev->vqs[i];
3511 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3514 mvq->fwqp.fw = true;
3515 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
3517 for (; i < ndev->mvdev.max_vqs; i++) {
3518 mvq = &ndev->vqs[i];
3519 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3525 struct mlx5_vdpa_mgmtdev {
3526 struct vdpa_mgmt_dev mgtdev;
3527 struct mlx5_adev *madev;
3528 struct mlx5_vdpa_net *ndev;
3529 struct vdpa_config_ops vdpa_ops;
3532 static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu)
3534 int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
3538 in = kvzalloc(inlen, GFP_KERNEL);
3542 MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1);
3543 MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu,
3544 mtu + MLX5V_ETH_HARD_MTU);
3545 MLX5_SET(modify_nic_vport_context_in, in, opcode,
3546 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
3548 err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
3554 static void allocate_irqs(struct mlx5_vdpa_net *ndev)
3556 struct mlx5_vdpa_irq_pool_entry *ent;
3559 if (!msix_mode_supported(&ndev->mvdev))
3562 if (!ndev->mvdev.mdev->pdev)
3565 ndev->irqp.entries = kcalloc(ndev->mvdev.max_vqs, sizeof(*ndev->irqp.entries), GFP_KERNEL);
3566 if (!ndev->irqp.entries)
3570 for (i = 0; i < ndev->mvdev.max_vqs; i++) {
3571 ent = ndev->irqp.entries + i;
3572 snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
3573 dev_name(&ndev->mvdev.vdev.dev), i);
3574 ent->map = pci_msix_alloc_irq_at(ndev->mvdev.mdev->pdev, MSI_ANY_INDEX, NULL);
3578 ndev->irqp.num_ent++;
3582 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
3583 const struct vdpa_dev_set_config *add_config)
3585 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3586 struct virtio_net_config *config;
3587 struct mlx5_core_dev *pfmdev;
3588 struct mlx5_vdpa_dev *mvdev;
3589 struct mlx5_vdpa_net *ndev;
3590 struct mlx5_core_dev *mdev;
3591 u64 device_features;
3599 mdev = mgtdev->madev->mdev;
3600 device_features = mgtdev->mgtdev.supported_features;
3601 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) {
3602 if (add_config->device_features & ~device_features) {
3603 dev_warn(mdev->device,
3604 "The provisioned features 0x%llx are not supported by this device with features 0x%llx\n",
3605 add_config->device_features, device_features);
3608 device_features &= add_config->device_features;
3610 device_features &= ~BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
3612 if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) &&
3613 device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) {
3614 dev_warn(mdev->device,
3615 "Must provision minimum features 0x%llx for this device",
3616 BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM));
3620 if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
3621 MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
3622 dev_warn(mdev->device, "missing support for split virtqueues\n");
3626 max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues),
3627 1 << MLX5_CAP_GEN(mdev, log_max_rqt_size));
3629 dev_warn(mdev->device,
3630 "%d virtqueues are supported. At least 2 are required\n",
3635 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
3636 if (add_config->net.max_vq_pairs > max_vqs / 2)
3638 max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs);
3643 ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mgtdev->vdpa_ops,
3644 MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
3646 return PTR_ERR(ndev);
3648 ndev->mvdev.max_vqs = max_vqs;
3649 mvdev = &ndev->mvdev;
3652 ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
3653 ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL);
3654 if (!ndev->vqs || !ndev->event_cbs) {
3660 allocate_irqs(ndev);
3661 init_rwsem(&ndev->reslock);
3662 config = &ndev->config;
3664 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) {
3665 err = config_func_mtu(mdev, add_config->net.mtu);
3670 if (device_features & BIT_ULL(VIRTIO_NET_F_MTU)) {
3671 err = query_mtu(mdev, &mtu);
3675 ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu);
3678 if (device_features & BIT_ULL(VIRTIO_NET_F_STATUS)) {
3679 if (get_link_state(mvdev))
3680 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
3682 ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
3685 if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
3686 memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN);
3687 /* No bother setting mac address in config if not going to provision _F_MAC */
3688 } else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0 ||
3689 device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3690 err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
3695 if (!is_zero_ether_addr(config->mac)) {
3696 pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
3697 err = mlx5_mpfs_add_mac(pfmdev, config->mac);
3700 } else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0) {
3702 * We used to clear _F_MAC feature bit if seeing
3703 * zero mac address when device features are not
3704 * specifically provisioned. Keep the behaviour
3705 * so old scripts do not break.
3707 device_features &= ~BIT_ULL(VIRTIO_NET_F_MAC);
3708 } else if (device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3709 /* Don't provision zero mac address for _F_MAC */
3710 mlx5_vdpa_warn(&ndev->mvdev,
3711 "No mac address provisioned?\n");
3716 if (device_features & BIT_ULL(VIRTIO_NET_F_MQ))
3717 config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2);
3719 ndev->mvdev.mlx_features = device_features;
3720 mvdev->vdev.dma_dev = &mdev->pdev->dev;
3721 err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
3725 INIT_LIST_HEAD(&mvdev->mr_list_head);
3727 if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
3728 err = mlx5_vdpa_create_dma_mr(mvdev);
3733 err = alloc_resources(ndev);
3737 ndev->cvq_ent.mvdev = mvdev;
3738 INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler);
3739 mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
3745 mvdev->vdev.mdev = &mgtdev->mgtdev;
3746 err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
3750 mgtdev->ndev = ndev;
3754 destroy_workqueue(mvdev->wq);
3756 free_resources(ndev);
3758 mlx5_vdpa_destroy_mr_resources(mvdev);
3760 mlx5_vdpa_free_resources(&ndev->mvdev);
3762 if (!is_zero_ether_addr(config->mac))
3763 mlx5_mpfs_del_mac(pfmdev, config->mac);
3765 put_device(&mvdev->vdev.dev);
3769 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
3771 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3772 struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
3773 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3774 struct workqueue_struct *wq;
3776 unregister_link_notifier(ndev);
3777 _vdpa_unregister_device(dev);
3780 destroy_workqueue(wq);
3781 mgtdev->ndev = NULL;
3784 static const struct vdpa_mgmtdev_ops mdev_ops = {
3785 .dev_add = mlx5_vdpa_dev_add,
3786 .dev_del = mlx5_vdpa_dev_del,
3789 static struct virtio_device_id id_table[] = {
3790 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
3794 static int mlx5v_probe(struct auxiliary_device *adev,
3795 const struct auxiliary_device_id *id)
3798 struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
3799 struct mlx5_core_dev *mdev = madev->mdev;
3800 struct mlx5_vdpa_mgmtdev *mgtdev;
3803 mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
3807 mgtdev->mgtdev.ops = &mdev_ops;
3808 mgtdev->mgtdev.device = mdev->device;
3809 mgtdev->mgtdev.id_table = id_table;
3810 mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
3811 BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) |
3812 BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) |
3813 BIT_ULL(VDPA_ATTR_DEV_FEATURES);
3814 mgtdev->mgtdev.max_supported_vqs =
3815 MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1;
3816 mgtdev->mgtdev.supported_features = get_supported_features(mdev);
3817 mgtdev->madev = madev;
3818 mgtdev->vdpa_ops = mlx5_vdpa_ops;
3820 if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, desc_group_mkey_supported))
3821 mgtdev->vdpa_ops.get_vq_desc_group = NULL;
3823 if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, freeze_to_rdy_supported))
3824 mgtdev->vdpa_ops.resume = NULL;
3826 err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
3830 auxiliary_set_drvdata(adev, mgtdev);
3839 static void mlx5v_remove(struct auxiliary_device *adev)
3841 struct mlx5_vdpa_mgmtdev *mgtdev;
3843 mgtdev = auxiliary_get_drvdata(adev);
3844 vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
3848 static const struct auxiliary_device_id mlx5v_id_table[] = {
3849 { .name = MLX5_ADEV_NAME ".vnet", },
3853 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
3855 static struct auxiliary_driver mlx5v_driver = {
3857 .probe = mlx5v_probe,
3858 .remove = mlx5v_remove,
3859 .id_table = mlx5v_id_table,
3862 module_auxiliary_driver(mlx5v_driver);