1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
4 #include <linux/module.h>
5 #include <linux/vdpa.h>
6 #include <linux/vringh.h>
7 #include <uapi/linux/virtio_net.h>
8 #include <uapi/linux/virtio_ids.h>
9 #include <linux/virtio_config.h>
10 #include <linux/auxiliary_bus.h>
11 #include <linux/mlx5/cq.h>
12 #include <linux/mlx5/qp.h>
13 #include <linux/mlx5/device.h>
14 #include <linux/mlx5/driver.h>
15 #include <linux/mlx5/vport.h>
16 #include <linux/mlx5/fs.h>
17 #include <linux/mlx5/mlx5_ifc_vdpa.h>
18 #include <linux/mlx5/mpfs.h>
19 #include "mlx5_vdpa.h"
21 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
22 MODULE_DESCRIPTION("Mellanox VDPA driver");
23 MODULE_LICENSE("Dual BSD/GPL");
25 #define to_mlx5_vdpa_ndev(__mvdev) \
26 container_of(__mvdev, struct mlx5_vdpa_net, mvdev)
27 #define to_mvdev(__vdev) container_of((__vdev), struct mlx5_vdpa_dev, vdev)
29 #define VALID_FEATURES_MASK \
30 (BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | \
31 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) | \
32 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) | \
33 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
34 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) | \
35 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | \
36 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) | \
37 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) | \
38 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) | \
39 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) | \
40 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) | \
41 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) | \
42 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
44 #define VALID_STATUS_MASK \
45 (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK | \
46 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
48 struct mlx5_vdpa_net_resources {
56 struct mlx5_vdpa_cq_buf {
57 struct mlx5_frag_buf_ctrl fbc;
58 struct mlx5_frag_buf frag_buf;
64 struct mlx5_core_cq mcq;
65 struct mlx5_vdpa_cq_buf buf;
70 struct mlx5_vdpa_umem {
71 struct mlx5_frag_buf_ctrl fbc;
72 struct mlx5_frag_buf frag_buf;
78 struct mlx5_core_qp mqp;
79 struct mlx5_frag_buf frag_buf;
85 struct mlx5_vq_restore_info {
96 struct mlx5_vdpa_virtqueue {
103 /* Resources for implementing the notification channel from the device
104 * to the driver. fwqp is the firmware end of an RC connection; the
105 * other end is vqqp used by the driver. cq is is where completions are
108 struct mlx5_vdpa_cq cq;
109 struct mlx5_vdpa_qp fwqp;
110 struct mlx5_vdpa_qp vqqp;
112 /* umem resources are required for the virtqueue operation. They're use
113 * is internal and they must be provided by the driver.
115 struct mlx5_vdpa_umem umem1;
116 struct mlx5_vdpa_umem umem2;
117 struct mlx5_vdpa_umem umem3;
122 struct mlx5_vdpa_net *ndev;
127 /* keep last in the struct */
128 struct mlx5_vq_restore_info ri;
131 /* We will remove this limitation once mlx5_vdpa_alloc_resources()
132 * provides for driver space allocation
134 #define MLX5_MAX_SUPPORTED_VQS 16
136 struct mlx5_vdpa_net {
137 struct mlx5_vdpa_dev mvdev;
138 struct mlx5_vdpa_net_resources res;
139 struct virtio_net_config config;
140 struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS];
141 struct vdpa_callback event_cbs[MLX5_MAX_SUPPORTED_VQS + 1];
143 /* Serialize vq resources creation and destruction. This is required
144 * since memory map might change and we need to destroy and create
145 * resources while driver in operational.
147 struct mutex reslock;
148 struct mlx5_flow_table *rxft;
149 struct mlx5_fc *rx_counter;
150 struct mlx5_flow_handle *rx_rule;
155 static void free_resources(struct mlx5_vdpa_net *ndev);
156 static void init_mvqs(struct mlx5_vdpa_net *ndev);
157 static int setup_driver(struct mlx5_vdpa_dev *mvdev);
158 static void teardown_driver(struct mlx5_vdpa_net *ndev);
160 static bool mlx5_vdpa_debug;
162 #define MLX5_LOG_VIO_FLAG(_feature) \
164 if (features & BIT_ULL(_feature)) \
165 mlx5_vdpa_info(mvdev, "%s\n", #_feature); \
168 #define MLX5_LOG_VIO_STAT(_status) \
170 if (status & (_status)) \
171 mlx5_vdpa_info(mvdev, "%s\n", #_status); \
174 static inline u32 mlx5_vdpa_max_qps(int max_vqs)
179 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
181 if (status & ~VALID_STATUS_MASK)
182 mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
183 status & ~VALID_STATUS_MASK);
185 if (!mlx5_vdpa_debug)
188 mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
189 if (set && !status) {
190 mlx5_vdpa_info(mvdev, "driver resets the device\n");
194 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
195 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
196 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
197 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
198 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
199 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
202 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
204 if (features & ~VALID_FEATURES_MASK)
205 mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
206 features & ~VALID_FEATURES_MASK);
208 if (!mlx5_vdpa_debug)
211 mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
213 mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
215 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
216 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
217 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
218 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
219 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
220 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
221 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
222 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
223 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
224 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
225 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
226 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
227 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
228 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
229 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
230 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
231 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
232 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
233 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
234 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
235 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
236 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
237 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
238 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
239 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
240 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
241 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
242 MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
243 MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
244 MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
245 MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
246 MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
247 MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
248 MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
251 static int create_tis(struct mlx5_vdpa_net *ndev)
253 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
254 u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
258 tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
259 MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
260 err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
262 mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
267 static void destroy_tis(struct mlx5_vdpa_net *ndev)
269 mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
272 #define MLX5_VDPA_CQE_SIZE 64
273 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
275 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
277 struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
278 u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
279 u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
282 err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
283 ndev->mvdev.mdev->priv.numa_node);
287 mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
289 buf->cqe_size = MLX5_VDPA_CQE_SIZE;
295 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
297 struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
299 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
300 ndev->mvdev.mdev->priv.numa_node);
303 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
305 mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
308 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
310 return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
313 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
315 struct mlx5_cqe64 *cqe64;
319 for (i = 0; i < buf->nent; i++) {
320 cqe = get_cqe(vcq, i);
322 cqe64->op_own = MLX5_CQE_INVALID << 4;
326 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
328 struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
330 if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
331 !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
337 static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
340 vqp->db.db[0] = cpu_to_be32(vqp->head);
343 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
344 struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
346 struct mlx5_vdpa_qp *vqp;
350 vqp = fw ? &mvq->fwqp : &mvq->vqqp;
351 MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
352 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
354 /* Firmware QP is allocated by the driver for the firmware's
355 * use so we can skip part of the params as they will be chosen by firmware
357 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
358 MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
359 MLX5_SET(qpc, qpc, no_sq, 1);
363 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
364 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
365 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
366 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
367 MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
368 MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
369 MLX5_SET(qpc, qpc, no_sq, 1);
370 MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
371 MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
372 MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
373 pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
374 mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
377 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
379 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
380 num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
381 ndev->mvdev.mdev->priv.numa_node);
384 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
386 mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
389 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
390 struct mlx5_vdpa_qp *vqp)
392 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
393 int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
394 u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
401 err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
405 err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
408 inlen += vqp->frag_buf.npages * sizeof(__be64);
411 in = kzalloc(inlen, GFP_KERNEL);
417 qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
418 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
419 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
420 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
421 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
422 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
424 MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
425 MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
426 err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
431 vqp->mqp.uid = ndev->mvdev.res.uid;
432 vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
435 rx_post(vqp, mvq->num_ent);
441 mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
444 rq_buf_free(ndev, vqp);
449 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
451 u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
453 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
454 MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
455 MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
456 if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
457 mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
459 mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
460 rq_buf_free(ndev, vqp);
464 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
466 return get_sw_cqe(cq, cq->mcq.cons_index);
469 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
471 struct mlx5_cqe64 *cqe64;
473 cqe64 = next_cqe_sw(vcq);
477 vcq->mcq.cons_index++;
481 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
483 struct mlx5_vdpa_net *ndev = mvq->ndev;
484 struct vdpa_callback *event_cb;
486 event_cb = &ndev->event_cbs[mvq->index];
487 mlx5_cq_set_ci(&mvq->cq.mcq);
489 /* make sure CQ cosumer update is visible to the hardware before updating
490 * RX doorbell record.
493 rx_post(&mvq->vqqp, num);
494 if (event_cb->callback)
495 event_cb->callback(event_cb->private);
498 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
500 struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
501 struct mlx5_vdpa_net *ndev = mvq->ndev;
502 void __iomem *uar_page = ndev->mvdev.res.uar->map;
505 while (!mlx5_vdpa_poll_one(&mvq->cq)) {
507 if (num > mvq->num_ent / 2) {
508 /* If completions keep coming while we poll, we want to
509 * let the hardware know that we consumed them by
510 * updating the doorbell record. We also let vdpa core
511 * know about this so it passes it on the virtio driver
514 mlx5_vdpa_handle_completions(mvq, num);
520 mlx5_vdpa_handle_completions(mvq, num);
522 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
525 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
527 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
528 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
529 void __iomem *uar_page = ndev->mvdev.res.uar->map;
530 u32 out[MLX5_ST_SZ_DW(create_cq_out)];
531 struct mlx5_vdpa_cq *vcq = &mvq->cq;
539 err = mlx5_db_alloc(mdev, &vcq->db);
543 vcq->mcq.set_ci_db = vcq->db.db;
544 vcq->mcq.arm_db = vcq->db.db + 1;
545 vcq->mcq.cqe_sz = 64;
547 err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
551 cq_frag_buf_init(vcq, &vcq->buf);
553 inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
554 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
555 in = kzalloc(inlen, GFP_KERNEL);
561 MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
562 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
563 mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
565 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
566 MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
568 /* Use vector 0 by default. Consider adding code to choose least used
571 err = mlx5_vector2eqn(mdev, 0, &eqn);
575 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
576 MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
577 MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
578 MLX5_SET(cqc, cqc, c_eqn, eqn);
579 MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
581 err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
585 vcq->mcq.comp = mlx5_vdpa_cq_comp;
587 vcq->mcq.set_ci_db = vcq->db.db;
588 vcq->mcq.arm_db = vcq->db.db + 1;
589 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
596 cq_frag_buf_free(ndev, &vcq->buf);
598 mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
602 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
604 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
605 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
606 struct mlx5_vdpa_cq *vcq = &mvq->cq;
608 if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
609 mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
612 cq_frag_buf_free(ndev, &vcq->buf);
613 mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
616 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
617 struct mlx5_vdpa_umem **umemp)
619 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
625 p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a);
626 p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b);
627 *umemp = &mvq->umem1;
630 p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a);
631 p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b);
632 *umemp = &mvq->umem2;
635 p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a);
636 p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b);
637 *umemp = &mvq->umem3;
640 (*umemp)->size = p_a * mvq->num_ent + p_b;
643 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
645 mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
648 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
651 u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
656 struct mlx5_vdpa_umem *umem;
658 set_umem_size(ndev, mvq, num, &umem);
659 err = umem_frag_buf_alloc(ndev, umem, umem->size);
663 inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
665 in = kzalloc(inlen, GFP_KERNEL);
671 MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
672 MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
673 um = MLX5_ADDR_OF(create_umem_in, in, umem);
674 MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
675 MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
677 pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
678 mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
680 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
682 mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
687 umem->id = MLX5_GET(create_umem_out, out, umem_id);
694 umem_frag_buf_free(ndev, umem);
698 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
700 u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
701 u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
702 struct mlx5_vdpa_umem *umem;
716 MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
717 MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
718 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
721 umem_frag_buf_free(ndev, umem);
724 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
729 for (num = 1; num <= 3; num++) {
730 err = create_umem(ndev, mvq, num);
737 for (num--; num > 0; num--)
738 umem_destroy(ndev, mvq, num);
743 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
747 for (num = 3; num > 0; num--)
748 umem_destroy(ndev, mvq, num);
751 static int get_queue_type(struct mlx5_vdpa_net *ndev)
755 type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
757 /* prefer split queue */
758 if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)
759 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
761 WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED));
763 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
766 static bool vq_is_tx(u16 idx)
771 static u16 get_features_12_3(u64 features)
773 return (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << 9) |
774 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << 8) |
775 (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << 7) |
776 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_CSUM)) << 6);
779 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
781 int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
782 u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
789 err = umems_create(ndev, mvq);
793 in = kzalloc(inlen, GFP_KERNEL);
799 cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
801 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
802 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
803 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
805 obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
806 MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
807 MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
808 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
809 get_features_12_3(ndev->mvdev.actual_features));
810 vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
811 MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
813 if (vq_is_tx(mvq->index))
814 MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
816 MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
817 MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
818 MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
819 MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
820 MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
821 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
822 MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
823 MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
824 MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
825 MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey.key);
826 MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
827 MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
828 MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
829 MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
830 MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
831 MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
832 MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
833 if (MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, eth_frame_offload_type))
834 MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 1);
836 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
841 mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
848 umems_destroy(ndev, mvq);
852 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
854 u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
855 u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
857 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
858 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
859 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
860 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
861 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
862 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
863 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
864 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
867 umems_destroy(ndev, mvq);
870 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
872 return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
875 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
877 return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
880 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
881 int *outlen, u32 qpn, u32 rqpn)
887 case MLX5_CMD_OP_2RST_QP:
888 *inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
889 *outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
890 *in = kzalloc(*inlen, GFP_KERNEL);
891 *out = kzalloc(*outlen, GFP_KERNEL);
895 MLX5_SET(qp_2rst_in, *in, opcode, cmd);
896 MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
897 MLX5_SET(qp_2rst_in, *in, qpn, qpn);
899 case MLX5_CMD_OP_RST2INIT_QP:
900 *inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
901 *outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
902 *in = kzalloc(*inlen, GFP_KERNEL);
903 *out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
907 MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
908 MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
909 MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
910 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
911 MLX5_SET(qpc, qpc, remote_qpn, rqpn);
912 MLX5_SET(qpc, qpc, rwe, 1);
913 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
914 MLX5_SET(ads, pp, vhca_port_num, 1);
916 case MLX5_CMD_OP_INIT2RTR_QP:
917 *inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
918 *outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
919 *in = kzalloc(*inlen, GFP_KERNEL);
920 *out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
924 MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
925 MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
926 MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
927 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
928 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
929 MLX5_SET(qpc, qpc, log_msg_max, 30);
930 MLX5_SET(qpc, qpc, remote_qpn, rqpn);
931 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
932 MLX5_SET(ads, pp, fl, 1);
934 case MLX5_CMD_OP_RTR2RTS_QP:
935 *inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
936 *outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
937 *in = kzalloc(*inlen, GFP_KERNEL);
938 *out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
942 MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
943 MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
944 MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
945 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
946 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
947 MLX5_SET(ads, pp, ack_timeout, 14);
948 MLX5_SET(qpc, qpc, retry_count, 7);
949 MLX5_SET(qpc, qpc, rnr_retry, 7);
965 static void free_inout(void *in, void *out)
971 /* Two QPs are used by each virtqueue. One is used by the driver and one by
972 * firmware. The fw argument indicates whether the subjected QP is the one used
975 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
983 alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
987 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
992 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
996 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
1000 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
1004 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
1008 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1012 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1016 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1020 return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1023 struct mlx5_virtq_attr {
1025 u16 available_index;
1029 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
1030 struct mlx5_virtq_attr *attr)
1032 int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
1033 u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
1039 out = kzalloc(outlen, GFP_KERNEL);
1043 cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1045 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1046 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1047 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1048 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1049 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
1053 obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
1054 memset(attr, 0, sizeof(*attr));
1055 attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1056 attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1057 attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
1066 static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
1068 int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
1069 u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
1075 in = kzalloc(inlen, GFP_KERNEL);
1079 cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1081 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1082 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1083 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1084 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1086 obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
1087 MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select,
1088 MLX5_VIRTQ_MODIFY_MASK_STATE);
1089 MLX5_SET(virtio_net_q_object, obj_context, state, state);
1090 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
1093 mvq->fw_state = state;
1098 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1100 u16 idx = mvq->index;
1106 if (mvq->initialized) {
1107 mlx5_vdpa_warn(&ndev->mvdev, "attempt re init\n");
1111 err = cq_create(ndev, idx, mvq->num_ent);
1115 err = qp_create(ndev, mvq, &mvq->fwqp);
1119 err = qp_create(ndev, mvq, &mvq->vqqp);
1123 err = connect_qps(ndev, mvq);
1127 err = create_virtqueue(ndev, mvq);
1132 err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1134 mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
1140 mvq->initialized = true;
1144 qp_destroy(ndev, &mvq->vqqp);
1146 qp_destroy(ndev, &mvq->fwqp);
1148 cq_destroy(ndev, idx);
1152 static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1154 struct mlx5_virtq_attr attr;
1156 if (!mvq->initialized)
1159 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1162 if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
1163 mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
1165 if (query_virtqueue(ndev, mvq, &attr)) {
1166 mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1169 mvq->avail_idx = attr.available_index;
1170 mvq->used_idx = attr.used_index;
1173 static void suspend_vqs(struct mlx5_vdpa_net *ndev)
1177 for (i = 0; i < MLX5_MAX_SUPPORTED_VQS; i++)
1178 suspend_vq(ndev, &ndev->vqs[i]);
1181 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1183 if (!mvq->initialized)
1186 suspend_vq(ndev, mvq);
1187 destroy_virtqueue(ndev, mvq);
1188 qp_destroy(ndev, &mvq->vqqp);
1189 qp_destroy(ndev, &mvq->fwqp);
1190 cq_destroy(ndev, mvq->index);
1191 mvq->initialized = false;
1194 static int create_rqt(struct mlx5_vdpa_net *ndev)
1204 log_max_rqt = min_t(int, 1, MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
1205 if (log_max_rqt < 1)
1208 inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + (1 << log_max_rqt) * MLX5_ST_SZ_BYTES(rq_num);
1209 in = kzalloc(inlen, GFP_KERNEL);
1213 MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1214 rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1216 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1217 MLX5_SET(rqtc, rqtc, rqt_max_size, 1 << log_max_rqt);
1218 MLX5_SET(rqtc, rqtc, rqt_actual_size, 1);
1219 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1220 for (i = 0, j = 0; j < ndev->mvdev.max_vqs; j++) {
1221 if (!ndev->vqs[j].initialized)
1224 if (!vq_is_tx(ndev->vqs[j].index)) {
1225 list[i] = cpu_to_be32(ndev->vqs[j].virtq_id);
1230 err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1238 static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1240 mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1243 static int create_tir(struct mlx5_vdpa_net *ndev)
1245 #define HASH_IP_L4PORTS \
1246 (MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT | \
1247 MLX5_HASH_FIELD_SEL_L4_DPORT)
1248 static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1249 0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1250 0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1251 0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1252 0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1259 in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1263 MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1264 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1265 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1267 MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1268 MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1269 rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1270 memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1272 outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1273 MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1274 MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1275 MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1277 MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1278 MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1280 err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1285 static void destroy_tir(struct mlx5_vdpa_net *ndev)
1287 mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1290 static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev)
1292 struct mlx5_flow_destination dest[2] = {};
1293 struct mlx5_flow_table_attr ft_attr = {};
1294 struct mlx5_flow_act flow_act = {};
1295 struct mlx5_flow_namespace *ns;
1298 /* for now, one entry, match all, forward to tir */
1299 ft_attr.max_fte = 1;
1300 ft_attr.autogroup.max_num_groups = 1;
1302 ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
1304 mlx5_vdpa_warn(&ndev->mvdev, "get flow namespace\n");
1308 ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
1309 if (IS_ERR(ndev->rxft))
1310 return PTR_ERR(ndev->rxft);
1312 ndev->rx_counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1313 if (IS_ERR(ndev->rx_counter)) {
1314 err = PTR_ERR(ndev->rx_counter);
1318 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT;
1319 dest[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1320 dest[0].tir_num = ndev->res.tirn;
1321 dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1322 dest[1].counter_id = mlx5_fc_id(ndev->rx_counter);
1323 ndev->rx_rule = mlx5_add_flow_rules(ndev->rxft, NULL, &flow_act, dest, 2);
1324 if (IS_ERR(ndev->rx_rule)) {
1325 err = PTR_ERR(ndev->rx_rule);
1326 ndev->rx_rule = NULL;
1333 mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter);
1335 mlx5_destroy_flow_table(ndev->rxft);
1339 static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev)
1344 mlx5_del_flow_rules(ndev->rx_rule);
1345 mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter);
1346 mlx5_destroy_flow_table(ndev->rxft);
1348 ndev->rx_rule = NULL;
1351 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
1353 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1354 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1355 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1357 if (unlikely(!mvq->ready))
1360 iowrite16(idx, ndev->mvdev.res.kick_addr);
1363 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
1364 u64 driver_area, u64 device_area)
1366 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1367 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1368 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1370 mvq->desc_addr = desc_area;
1371 mvq->device_addr = device_area;
1372 mvq->driver_addr = driver_area;
1376 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
1378 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1379 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1380 struct mlx5_vdpa_virtqueue *mvq;
1382 mvq = &ndev->vqs[idx];
1386 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
1388 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1389 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1391 ndev->event_cbs[idx] = *cb;
1394 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
1396 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1397 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1398 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1401 suspend_vq(ndev, mvq);
1406 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
1408 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1409 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1410 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1415 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
1416 const struct vdpa_vq_state *state)
1418 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1419 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1420 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1422 if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
1423 mlx5_vdpa_warn(mvdev, "can't modify available index\n");
1427 mvq->used_idx = state->split.avail_index;
1428 mvq->avail_idx = state->split.avail_index;
1432 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
1434 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1435 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1436 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1437 struct mlx5_virtq_attr attr;
1440 /* If the virtq object was destroyed, use the value saved at
1441 * the last minute of suspend_vq. This caters for userspace
1442 * that cares about emulating the index after vq is stopped.
1444 if (!mvq->initialized) {
1445 /* Firmware returns a wrong value for the available index.
1446 * Since both values should be identical, we take the value of
1447 * used_idx which is reported correctly.
1449 state->split.avail_index = mvq->used_idx;
1453 err = query_virtqueue(ndev, mvq, &attr);
1455 mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
1458 state->split.avail_index = attr.used_index;
1462 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
1467 enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
1468 MLX5_VIRTIO_NET_F_CSUM = 1 << 10,
1469 MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11,
1470 MLX5_VIRTIO_NET_F_HOST_TSO4 = 1 << 12,
1473 static u64 mlx_to_vritio_features(u16 dev_features)
1477 if (dev_features & MLX5_VIRTIO_NET_F_GUEST_CSUM)
1478 result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
1479 if (dev_features & MLX5_VIRTIO_NET_F_CSUM)
1480 result |= BIT_ULL(VIRTIO_NET_F_CSUM);
1481 if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO6)
1482 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
1483 if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO4)
1484 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
1489 static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev)
1491 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1492 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1495 dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, device_features_bits_mask);
1496 ndev->mvdev.mlx_features = mlx_to_vritio_features(dev_features);
1497 if (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, virtio_version_1_0))
1498 ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_VERSION_1);
1499 ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
1500 print_features(mvdev, ndev->mvdev.mlx_features, false);
1501 return ndev->mvdev.mlx_features;
1504 static int verify_min_features(struct mlx5_vdpa_dev *mvdev, u64 features)
1506 if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
1512 static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
1514 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1518 for (i = 0; i < 2 * mlx5_vdpa_max_qps(mvdev->max_vqs); i++) {
1519 err = setup_vq(ndev, &ndev->vqs[i]);
1527 for (--i; i >= 0; i--)
1528 teardown_vq(ndev, &ndev->vqs[i]);
1533 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
1535 struct mlx5_vdpa_virtqueue *mvq;
1538 for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
1539 mvq = &ndev->vqs[i];
1540 if (!mvq->initialized)
1543 teardown_vq(ndev, mvq);
1547 /* TODO: cross-endian support */
1548 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
1550 return virtio_legacy_is_little_endian() ||
1551 (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
1554 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
1556 return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
1559 static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features)
1561 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1562 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1565 print_features(mvdev, features, true);
1567 err = verify_min_features(mvdev, features);
1571 ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
1572 ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, ndev->mtu);
1573 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
1577 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
1579 /* not implemented */
1580 mlx5_vdpa_warn(to_mvdev(vdev), "set config callback not supported\n");
1583 #define MLX5_VDPA_MAX_VQ_ENTRIES 256
1584 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
1586 return MLX5_VDPA_MAX_VQ_ENTRIES;
1589 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
1591 return VIRTIO_ID_NET;
1594 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
1596 return PCI_VENDOR_ID_MELLANOX;
1599 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
1601 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1602 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1604 print_status(mvdev, ndev->mvdev.status, false);
1605 return ndev->mvdev.status;
1608 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1610 struct mlx5_vq_restore_info *ri = &mvq->ri;
1611 struct mlx5_virtq_attr attr;
1614 if (!mvq->initialized)
1617 err = query_virtqueue(ndev, mvq, &attr);
1621 ri->avail_index = attr.available_index;
1622 ri->used_index = attr.used_index;
1623 ri->ready = mvq->ready;
1624 ri->num_ent = mvq->num_ent;
1625 ri->desc_addr = mvq->desc_addr;
1626 ri->device_addr = mvq->device_addr;
1627 ri->driver_addr = mvq->driver_addr;
1632 static int save_channels_info(struct mlx5_vdpa_net *ndev)
1636 for (i = 0; i < ndev->mvdev.max_vqs; i++) {
1637 memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
1638 save_channel_info(ndev, &ndev->vqs[i]);
1643 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
1647 for (i = 0; i < ndev->mvdev.max_vqs; i++)
1648 memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
1651 static void restore_channels_info(struct mlx5_vdpa_net *ndev)
1653 struct mlx5_vdpa_virtqueue *mvq;
1654 struct mlx5_vq_restore_info *ri;
1657 mlx5_clear_vqs(ndev);
1659 for (i = 0; i < ndev->mvdev.max_vqs; i++) {
1660 mvq = &ndev->vqs[i];
1665 mvq->avail_idx = ri->avail_index;
1666 mvq->used_idx = ri->used_index;
1667 mvq->ready = ri->ready;
1668 mvq->num_ent = ri->num_ent;
1669 mvq->desc_addr = ri->desc_addr;
1670 mvq->device_addr = ri->device_addr;
1671 mvq->driver_addr = ri->driver_addr;
1675 static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
1677 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1681 err = save_channels_info(ndev);
1685 teardown_driver(ndev);
1686 mlx5_vdpa_destroy_mr(mvdev);
1687 err = mlx5_vdpa_create_mr(mvdev, iotlb);
1691 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
1694 restore_channels_info(ndev);
1695 err = setup_driver(mvdev);
1702 mlx5_vdpa_destroy_mr(mvdev);
1707 static int setup_driver(struct mlx5_vdpa_dev *mvdev)
1709 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1712 mutex_lock(&ndev->reslock);
1714 mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
1718 err = setup_virtqueues(mvdev);
1720 mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
1724 err = create_rqt(ndev);
1726 mlx5_vdpa_warn(mvdev, "create_rqt\n");
1730 err = create_tir(ndev);
1732 mlx5_vdpa_warn(mvdev, "create_tir\n");
1736 err = add_fwd_to_tir(ndev);
1738 mlx5_vdpa_warn(mvdev, "add_fwd_to_tir\n");
1742 mutex_unlock(&ndev->reslock);
1751 teardown_virtqueues(ndev);
1753 mutex_unlock(&ndev->reslock);
1757 static void teardown_driver(struct mlx5_vdpa_net *ndev)
1759 mutex_lock(&ndev->reslock);
1763 remove_fwd_to_tir(ndev);
1766 teardown_virtqueues(ndev);
1767 ndev->setup = false;
1769 mutex_unlock(&ndev->reslock);
1772 static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
1776 for (i = 0; i < ndev->mvdev.max_vqs; i++)
1777 ndev->vqs[i].ready = false;
1780 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
1782 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1783 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1786 print_status(mvdev, status, true);
1788 mlx5_vdpa_info(mvdev, "performing device reset\n");
1789 teardown_driver(ndev);
1790 clear_vqs_ready(ndev);
1791 mlx5_vdpa_destroy_mr(&ndev->mvdev);
1792 ndev->mvdev.status = 0;
1793 ndev->mvdev.mlx_features = 0;
1794 memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs));
1795 ++mvdev->generation;
1796 if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
1797 if (mlx5_vdpa_create_mr(mvdev, NULL))
1798 mlx5_vdpa_warn(mvdev, "create MR failed\n");
1803 if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
1804 if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
1805 err = setup_driver(mvdev);
1807 mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
1811 mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
1816 ndev->mvdev.status = status;
1820 mlx5_vdpa_destroy_mr(&ndev->mvdev);
1821 ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
1824 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
1826 return sizeof(struct virtio_net_config);
1829 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
1832 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1833 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1835 if (offset + len <= sizeof(struct virtio_net_config))
1836 memcpy(buf, (u8 *)&ndev->config + offset, len);
1839 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
1845 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
1847 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1849 return mvdev->generation;
1852 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb)
1854 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1858 err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map);
1860 mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
1865 return mlx5_vdpa_change_map(mvdev, iotlb);
1870 static void mlx5_vdpa_free(struct vdpa_device *vdev)
1872 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1873 struct mlx5_core_dev *pfmdev;
1874 struct mlx5_vdpa_net *ndev;
1876 ndev = to_mlx5_vdpa_ndev(mvdev);
1878 free_resources(ndev);
1879 mlx5_vdpa_destroy_mr(mvdev);
1880 if (!is_zero_ether_addr(ndev->config.mac)) {
1881 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
1882 mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
1884 mlx5_vdpa_free_resources(&ndev->mvdev);
1885 mutex_destroy(&ndev->reslock);
1888 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
1890 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1891 struct vdpa_notification_area ret = {};
1892 struct mlx5_vdpa_net *ndev;
1895 /* If SF BAR size is smaller than PAGE_SIZE, do not use direct
1896 * notification to avoid the risk of mapping pages that contain BAR of more
1899 if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT)
1902 ndev = to_mlx5_vdpa_ndev(mvdev);
1903 addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr;
1905 ret.size = PAGE_SIZE;
1909 static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx)
1914 static const struct vdpa_config_ops mlx5_vdpa_ops = {
1915 .set_vq_address = mlx5_vdpa_set_vq_address,
1916 .set_vq_num = mlx5_vdpa_set_vq_num,
1917 .kick_vq = mlx5_vdpa_kick_vq,
1918 .set_vq_cb = mlx5_vdpa_set_vq_cb,
1919 .set_vq_ready = mlx5_vdpa_set_vq_ready,
1920 .get_vq_ready = mlx5_vdpa_get_vq_ready,
1921 .set_vq_state = mlx5_vdpa_set_vq_state,
1922 .get_vq_state = mlx5_vdpa_get_vq_state,
1923 .get_vq_notification = mlx5_get_vq_notification,
1924 .get_vq_irq = mlx5_get_vq_irq,
1925 .get_vq_align = mlx5_vdpa_get_vq_align,
1926 .get_features = mlx5_vdpa_get_features,
1927 .set_features = mlx5_vdpa_set_features,
1928 .set_config_cb = mlx5_vdpa_set_config_cb,
1929 .get_vq_num_max = mlx5_vdpa_get_vq_num_max,
1930 .get_device_id = mlx5_vdpa_get_device_id,
1931 .get_vendor_id = mlx5_vdpa_get_vendor_id,
1932 .get_status = mlx5_vdpa_get_status,
1933 .set_status = mlx5_vdpa_set_status,
1934 .get_config_size = mlx5_vdpa_get_config_size,
1935 .get_config = mlx5_vdpa_get_config,
1936 .set_config = mlx5_vdpa_set_config,
1937 .get_generation = mlx5_vdpa_get_generation,
1938 .set_map = mlx5_vdpa_set_map,
1939 .free = mlx5_vdpa_free,
1942 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
1947 err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
1951 *mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
1955 static int alloc_resources(struct mlx5_vdpa_net *ndev)
1957 struct mlx5_vdpa_net_resources *res = &ndev->res;
1961 mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
1965 err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
1969 err = create_tis(ndev);
1978 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
1982 static void free_resources(struct mlx5_vdpa_net *ndev)
1984 struct mlx5_vdpa_net_resources *res = &ndev->res;
1990 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
1994 static void init_mvqs(struct mlx5_vdpa_net *ndev)
1996 struct mlx5_vdpa_virtqueue *mvq;
1999 for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); ++i) {
2000 mvq = &ndev->vqs[i];
2001 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2004 mvq->fwqp.fw = true;
2006 for (; i < ndev->mvdev.max_vqs; i++) {
2007 mvq = &ndev->vqs[i];
2008 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2014 struct mlx5_vdpa_mgmtdev {
2015 struct vdpa_mgmt_dev mgtdev;
2016 struct mlx5_adev *madev;
2017 struct mlx5_vdpa_net *ndev;
2020 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
2022 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
2023 struct virtio_net_config *config;
2024 struct mlx5_core_dev *pfmdev;
2025 struct mlx5_vdpa_dev *mvdev;
2026 struct mlx5_vdpa_net *ndev;
2027 struct mlx5_core_dev *mdev;
2034 mdev = mgtdev->madev->mdev;
2035 if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
2036 MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
2037 dev_warn(mdev->device, "missing support for split virtqueues\n");
2041 /* we save one virtqueue for control virtqueue should we require it */
2042 max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues);
2043 max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
2045 ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
2048 return PTR_ERR(ndev);
2050 ndev->mvdev.max_vqs = max_vqs;
2051 mvdev = &ndev->mvdev;
2054 mutex_init(&ndev->reslock);
2055 config = &ndev->config;
2056 err = query_mtu(mdev, &ndev->mtu);
2060 err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
2064 if (!is_zero_ether_addr(config->mac)) {
2065 pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
2066 err = mlx5_mpfs_add_mac(pfmdev, config->mac);
2071 mvdev->vdev.dma_dev = &mdev->pdev->dev;
2072 err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
2076 if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
2077 err = mlx5_vdpa_create_mr(mvdev, NULL);
2082 err = alloc_resources(ndev);
2086 mvdev->vdev.mdev = &mgtdev->mgtdev;
2087 err = _vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs));
2091 mgtdev->ndev = ndev;
2095 free_resources(ndev);
2097 mlx5_vdpa_destroy_mr(mvdev);
2099 mlx5_vdpa_free_resources(&ndev->mvdev);
2101 if (!is_zero_ether_addr(config->mac))
2102 mlx5_mpfs_del_mac(pfmdev, config->mac);
2104 mutex_destroy(&ndev->reslock);
2105 put_device(&mvdev->vdev.dev);
2109 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
2111 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
2113 _vdpa_unregister_device(dev);
2114 mgtdev->ndev = NULL;
2117 static const struct vdpa_mgmtdev_ops mdev_ops = {
2118 .dev_add = mlx5_vdpa_dev_add,
2119 .dev_del = mlx5_vdpa_dev_del,
2122 static struct virtio_device_id id_table[] = {
2123 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
2127 static int mlx5v_probe(struct auxiliary_device *adev,
2128 const struct auxiliary_device_id *id)
2131 struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
2132 struct mlx5_core_dev *mdev = madev->mdev;
2133 struct mlx5_vdpa_mgmtdev *mgtdev;
2136 mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
2140 mgtdev->mgtdev.ops = &mdev_ops;
2141 mgtdev->mgtdev.device = mdev->device;
2142 mgtdev->mgtdev.id_table = id_table;
2143 mgtdev->madev = madev;
2145 err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
2149 dev_set_drvdata(&adev->dev, mgtdev);
2158 static void mlx5v_remove(struct auxiliary_device *adev)
2160 struct mlx5_vdpa_mgmtdev *mgtdev;
2162 mgtdev = dev_get_drvdata(&adev->dev);
2163 vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
2167 static const struct auxiliary_device_id mlx5v_id_table[] = {
2168 { .name = MLX5_ADEV_NAME ".vnet", },
2172 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
2174 static struct auxiliary_driver mlx5v_driver = {
2176 .probe = mlx5v_probe,
2177 .remove = mlx5v_remove,
2178 .id_table = mlx5v_id_table,
2181 module_auxiliary_driver(mlx5v_driver);