1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
4 #include <linux/module.h>
5 #include <linux/vdpa.h>
6 #include <linux/vringh.h>
7 #include <uapi/linux/virtio_net.h>
8 #include <uapi/linux/virtio_ids.h>
9 #include <linux/virtio_config.h>
10 #include <linux/auxiliary_bus.h>
11 #include <linux/mlx5/cq.h>
12 #include <linux/mlx5/qp.h>
13 #include <linux/mlx5/device.h>
14 #include <linux/mlx5/driver.h>
15 #include <linux/mlx5/vport.h>
16 #include <linux/mlx5/fs.h>
17 #include <linux/mlx5/mlx5_ifc_vdpa.h>
18 #include <linux/mlx5/mpfs.h>
19 #include "mlx5_vdpa.h"
21 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
22 MODULE_DESCRIPTION("Mellanox VDPA driver");
23 MODULE_LICENSE("Dual BSD/GPL");
25 #define to_mlx5_vdpa_ndev(__mvdev) \
26 container_of(__mvdev, struct mlx5_vdpa_net, mvdev)
27 #define to_mvdev(__vdev) container_of((__vdev), struct mlx5_vdpa_dev, vdev)
29 #define VALID_FEATURES_MASK \
30 (BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | \
31 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) | \
32 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) | \
33 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
34 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) | \
35 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | \
36 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) | \
37 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) | \
38 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) | \
39 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) | \
40 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) | \
41 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) | \
42 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
44 #define VALID_STATUS_MASK \
45 (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK | \
46 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
48 struct mlx5_vdpa_net_resources {
56 struct mlx5_vdpa_cq_buf {
57 struct mlx5_frag_buf_ctrl fbc;
58 struct mlx5_frag_buf frag_buf;
64 struct mlx5_core_cq mcq;
65 struct mlx5_vdpa_cq_buf buf;
70 struct mlx5_vdpa_umem {
71 struct mlx5_frag_buf_ctrl fbc;
72 struct mlx5_frag_buf frag_buf;
78 struct mlx5_core_qp mqp;
79 struct mlx5_frag_buf frag_buf;
85 struct mlx5_vq_restore_info {
93 struct vdpa_callback cb;
97 struct mlx5_vdpa_virtqueue {
103 struct vdpa_callback event_cb;
105 /* Resources for implementing the notification channel from the device
106 * to the driver. fwqp is the firmware end of an RC connection; the
107 * other end is vqqp used by the driver. cq is is where completions are
110 struct mlx5_vdpa_cq cq;
111 struct mlx5_vdpa_qp fwqp;
112 struct mlx5_vdpa_qp vqqp;
114 /* umem resources are required for the virtqueue operation. They're use
115 * is internal and they must be provided by the driver.
117 struct mlx5_vdpa_umem umem1;
118 struct mlx5_vdpa_umem umem2;
119 struct mlx5_vdpa_umem umem3;
124 struct mlx5_vdpa_net *ndev;
129 /* keep last in the struct */
130 struct mlx5_vq_restore_info ri;
133 /* We will remove this limitation once mlx5_vdpa_alloc_resources()
134 * provides for driver space allocation
136 #define MLX5_MAX_SUPPORTED_VQS 16
138 struct mlx5_vdpa_net {
139 struct mlx5_vdpa_dev mvdev;
140 struct mlx5_vdpa_net_resources res;
141 struct virtio_net_config config;
142 struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS];
144 /* Serialize vq resources creation and destruction. This is required
145 * since memory map might change and we need to destroy and create
146 * resources while driver in operational.
148 struct mutex reslock;
149 struct mlx5_flow_table *rxft;
150 struct mlx5_fc *rx_counter;
151 struct mlx5_flow_handle *rx_rule;
156 static void free_resources(struct mlx5_vdpa_net *ndev);
157 static void init_mvqs(struct mlx5_vdpa_net *ndev);
158 static int setup_driver(struct mlx5_vdpa_net *ndev);
159 static void teardown_driver(struct mlx5_vdpa_net *ndev);
161 static bool mlx5_vdpa_debug;
163 #define MLX5_LOG_VIO_FLAG(_feature) \
165 if (features & BIT_ULL(_feature)) \
166 mlx5_vdpa_info(mvdev, "%s\n", #_feature); \
169 #define MLX5_LOG_VIO_STAT(_status) \
171 if (status & (_status)) \
172 mlx5_vdpa_info(mvdev, "%s\n", #_status); \
175 static inline u32 mlx5_vdpa_max_qps(int max_vqs)
180 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
182 if (status & ~VALID_STATUS_MASK)
183 mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
184 status & ~VALID_STATUS_MASK);
186 if (!mlx5_vdpa_debug)
189 mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
190 if (set && !status) {
191 mlx5_vdpa_info(mvdev, "driver resets the device\n");
195 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
196 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
197 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
198 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
199 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
200 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
203 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
205 if (features & ~VALID_FEATURES_MASK)
206 mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
207 features & ~VALID_FEATURES_MASK);
209 if (!mlx5_vdpa_debug)
212 mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
214 mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
216 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
217 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
218 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
219 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
220 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
221 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
222 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
223 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
224 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
225 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
226 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
227 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
228 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
229 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
230 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
231 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
232 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
233 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
234 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
235 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
236 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
237 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
238 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
239 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
240 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
241 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
242 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
243 MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
244 MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
245 MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
246 MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
247 MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
248 MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
249 MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
252 static int create_tis(struct mlx5_vdpa_net *ndev)
254 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
255 u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
259 tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
260 MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
261 err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
263 mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
268 static void destroy_tis(struct mlx5_vdpa_net *ndev)
270 mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
273 #define MLX5_VDPA_CQE_SIZE 64
274 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
276 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
278 struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
279 u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
280 u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
283 err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
284 ndev->mvdev.mdev->priv.numa_node);
288 mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
290 buf->cqe_size = MLX5_VDPA_CQE_SIZE;
296 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
298 struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
300 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
301 ndev->mvdev.mdev->priv.numa_node);
304 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
306 mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
309 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
311 return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
314 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
316 struct mlx5_cqe64 *cqe64;
320 for (i = 0; i < buf->nent; i++) {
321 cqe = get_cqe(vcq, i);
323 cqe64->op_own = MLX5_CQE_INVALID << 4;
327 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
329 struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
331 if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
332 !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
338 static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
341 vqp->db.db[0] = cpu_to_be32(vqp->head);
344 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
345 struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
347 struct mlx5_vdpa_qp *vqp;
351 vqp = fw ? &mvq->fwqp : &mvq->vqqp;
352 MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
353 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
355 /* Firmware QP is allocated by the driver for the firmware's
356 * use so we can skip part of the params as they will be chosen by firmware
358 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
359 MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
360 MLX5_SET(qpc, qpc, no_sq, 1);
364 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
365 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
366 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
367 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
368 MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
369 MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
370 MLX5_SET(qpc, qpc, no_sq, 1);
371 MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
372 MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
373 MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
374 pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
375 mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
378 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
380 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
381 num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
382 ndev->mvdev.mdev->priv.numa_node);
385 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
387 mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
390 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
391 struct mlx5_vdpa_qp *vqp)
393 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
394 int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
395 u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
402 err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
406 err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
409 inlen += vqp->frag_buf.npages * sizeof(__be64);
412 in = kzalloc(inlen, GFP_KERNEL);
418 qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
419 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
420 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
421 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
422 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
423 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
425 MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
426 MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
427 err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
432 vqp->mqp.uid = ndev->mvdev.res.uid;
433 vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
436 rx_post(vqp, mvq->num_ent);
442 mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
445 rq_buf_free(ndev, vqp);
450 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
452 u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
454 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
455 MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
456 MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
457 if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
458 mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
460 mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
461 rq_buf_free(ndev, vqp);
465 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
467 return get_sw_cqe(cq, cq->mcq.cons_index);
470 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
472 struct mlx5_cqe64 *cqe64;
474 cqe64 = next_cqe_sw(vcq);
478 vcq->mcq.cons_index++;
482 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
484 mlx5_cq_set_ci(&mvq->cq.mcq);
486 /* make sure CQ cosumer update is visible to the hardware before updating
487 * RX doorbell record.
490 rx_post(&mvq->vqqp, num);
491 if (mvq->event_cb.callback)
492 mvq->event_cb.callback(mvq->event_cb.private);
495 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
497 struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
498 struct mlx5_vdpa_net *ndev = mvq->ndev;
499 void __iomem *uar_page = ndev->mvdev.res.uar->map;
502 while (!mlx5_vdpa_poll_one(&mvq->cq)) {
504 if (num > mvq->num_ent / 2) {
505 /* If completions keep coming while we poll, we want to
506 * let the hardware know that we consumed them by
507 * updating the doorbell record. We also let vdpa core
508 * know about this so it passes it on the virtio driver
511 mlx5_vdpa_handle_completions(mvq, num);
517 mlx5_vdpa_handle_completions(mvq, num);
519 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
522 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
524 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
525 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
526 void __iomem *uar_page = ndev->mvdev.res.uar->map;
527 u32 out[MLX5_ST_SZ_DW(create_cq_out)];
528 struct mlx5_vdpa_cq *vcq = &mvq->cq;
537 err = mlx5_db_alloc(mdev, &vcq->db);
541 vcq->mcq.set_ci_db = vcq->db.db;
542 vcq->mcq.arm_db = vcq->db.db + 1;
543 vcq->mcq.cqe_sz = 64;
545 err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
549 cq_frag_buf_init(vcq, &vcq->buf);
551 inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
552 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
553 in = kzalloc(inlen, GFP_KERNEL);
559 MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
560 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
561 mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
563 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
564 MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
566 /* Use vector 0 by default. Consider adding code to choose least used
569 err = mlx5_vector2eqn(mdev, 0, &eqn, &irqn);
573 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
574 MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
575 MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
576 MLX5_SET(cqc, cqc, c_eqn, eqn);
577 MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
579 err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
583 vcq->mcq.comp = mlx5_vdpa_cq_comp;
585 vcq->mcq.set_ci_db = vcq->db.db;
586 vcq->mcq.arm_db = vcq->db.db + 1;
587 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
594 cq_frag_buf_free(ndev, &vcq->buf);
596 mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
600 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
602 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
603 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
604 struct mlx5_vdpa_cq *vcq = &mvq->cq;
606 if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
607 mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
610 cq_frag_buf_free(ndev, &vcq->buf);
611 mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
614 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
615 struct mlx5_vdpa_umem **umemp)
617 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
623 p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a);
624 p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b);
625 *umemp = &mvq->umem1;
628 p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a);
629 p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b);
630 *umemp = &mvq->umem2;
633 p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a);
634 p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b);
635 *umemp = &mvq->umem3;
638 (*umemp)->size = p_a * mvq->num_ent + p_b;
641 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
643 mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
646 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
649 u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
654 struct mlx5_vdpa_umem *umem;
656 set_umem_size(ndev, mvq, num, &umem);
657 err = umem_frag_buf_alloc(ndev, umem, umem->size);
661 inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
663 in = kzalloc(inlen, GFP_KERNEL);
669 MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
670 MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
671 um = MLX5_ADDR_OF(create_umem_in, in, umem);
672 MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
673 MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
675 pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
676 mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
678 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
680 mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
685 umem->id = MLX5_GET(create_umem_out, out, umem_id);
692 umem_frag_buf_free(ndev, umem);
696 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
698 u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
699 u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
700 struct mlx5_vdpa_umem *umem;
714 MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
715 MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
716 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
719 umem_frag_buf_free(ndev, umem);
722 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
727 for (num = 1; num <= 3; num++) {
728 err = create_umem(ndev, mvq, num);
735 for (num--; num > 0; num--)
736 umem_destroy(ndev, mvq, num);
741 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
745 for (num = 3; num > 0; num--)
746 umem_destroy(ndev, mvq, num);
749 static int get_queue_type(struct mlx5_vdpa_net *ndev)
753 type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
755 /* prefer split queue */
756 if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED)
757 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
759 WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT));
761 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
764 static bool vq_is_tx(u16 idx)
769 static u16 get_features_12_3(u64 features)
771 return (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << 9) |
772 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << 8) |
773 (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << 7) |
774 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_CSUM)) << 6);
777 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
779 int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
780 u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
787 err = umems_create(ndev, mvq);
791 in = kzalloc(inlen, GFP_KERNEL);
797 cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
799 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
800 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
801 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
803 obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
804 MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
805 MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
806 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
807 get_features_12_3(ndev->mvdev.actual_features));
808 vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
809 MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
811 if (vq_is_tx(mvq->index))
812 MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
814 MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
815 MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
816 MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
817 MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
818 MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
819 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
820 MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
821 MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
822 MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
823 MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey.key);
824 MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
825 MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
826 MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
827 MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
828 MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
829 MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
830 MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
831 if (MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, eth_frame_offload_type))
832 MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 1);
834 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
839 mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
846 umems_destroy(ndev, mvq);
850 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
852 u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
853 u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
855 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
856 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
857 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
858 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
859 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
860 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
861 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
862 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
865 umems_destroy(ndev, mvq);
868 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
870 return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
873 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
875 return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
878 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
879 int *outlen, u32 qpn, u32 rqpn)
885 case MLX5_CMD_OP_2RST_QP:
886 *inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
887 *outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
888 *in = kzalloc(*inlen, GFP_KERNEL);
889 *out = kzalloc(*outlen, GFP_KERNEL);
893 MLX5_SET(qp_2rst_in, *in, opcode, cmd);
894 MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
895 MLX5_SET(qp_2rst_in, *in, qpn, qpn);
897 case MLX5_CMD_OP_RST2INIT_QP:
898 *inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
899 *outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
900 *in = kzalloc(*inlen, GFP_KERNEL);
901 *out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
905 MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
906 MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
907 MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
908 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
909 MLX5_SET(qpc, qpc, remote_qpn, rqpn);
910 MLX5_SET(qpc, qpc, rwe, 1);
911 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
912 MLX5_SET(ads, pp, vhca_port_num, 1);
914 case MLX5_CMD_OP_INIT2RTR_QP:
915 *inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
916 *outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
917 *in = kzalloc(*inlen, GFP_KERNEL);
918 *out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
922 MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
923 MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
924 MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
925 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
926 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
927 MLX5_SET(qpc, qpc, log_msg_max, 30);
928 MLX5_SET(qpc, qpc, remote_qpn, rqpn);
929 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
930 MLX5_SET(ads, pp, fl, 1);
932 case MLX5_CMD_OP_RTR2RTS_QP:
933 *inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
934 *outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
935 *in = kzalloc(*inlen, GFP_KERNEL);
936 *out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
940 MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
941 MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
942 MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
943 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
944 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
945 MLX5_SET(ads, pp, ack_timeout, 14);
946 MLX5_SET(qpc, qpc, retry_count, 7);
947 MLX5_SET(qpc, qpc, rnr_retry, 7);
963 static void free_inout(void *in, void *out)
969 /* Two QPs are used by each virtqueue. One is used by the driver and one by
970 * firmware. The fw argument indicates whether the subjected QP is the one used
973 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
981 alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
985 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
990 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
994 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
998 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
1002 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
1006 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1010 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1014 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1018 return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1021 struct mlx5_virtq_attr {
1023 u16 available_index;
1027 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
1028 struct mlx5_virtq_attr *attr)
1030 int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
1031 u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
1037 out = kzalloc(outlen, GFP_KERNEL);
1041 cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1043 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1044 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1045 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1046 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1047 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
1051 obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
1052 memset(attr, 0, sizeof(*attr));
1053 attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1054 attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1055 attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
1064 static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
1066 int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
1067 u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
1073 in = kzalloc(inlen, GFP_KERNEL);
1077 cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1079 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1080 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1081 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1082 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1084 obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
1085 MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select,
1086 MLX5_VIRTQ_MODIFY_MASK_STATE);
1087 MLX5_SET(virtio_net_q_object, obj_context, state, state);
1088 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
1091 mvq->fw_state = state;
1096 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1098 u16 idx = mvq->index;
1104 if (mvq->initialized) {
1105 mlx5_vdpa_warn(&ndev->mvdev, "attempt re init\n");
1109 err = cq_create(ndev, idx, mvq->num_ent);
1113 err = qp_create(ndev, mvq, &mvq->fwqp);
1117 err = qp_create(ndev, mvq, &mvq->vqqp);
1121 err = connect_qps(ndev, mvq);
1125 err = create_virtqueue(ndev, mvq);
1130 err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1132 mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
1138 mvq->initialized = true;
1142 qp_destroy(ndev, &mvq->vqqp);
1144 qp_destroy(ndev, &mvq->fwqp);
1146 cq_destroy(ndev, idx);
1150 static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1152 struct mlx5_virtq_attr attr;
1154 if (!mvq->initialized)
1157 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1160 if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
1161 mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
1163 if (query_virtqueue(ndev, mvq, &attr)) {
1164 mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1167 mvq->avail_idx = attr.available_index;
1168 mvq->used_idx = attr.used_index;
1171 static void suspend_vqs(struct mlx5_vdpa_net *ndev)
1175 for (i = 0; i < MLX5_MAX_SUPPORTED_VQS; i++)
1176 suspend_vq(ndev, &ndev->vqs[i]);
1179 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1181 if (!mvq->initialized)
1184 suspend_vq(ndev, mvq);
1185 destroy_virtqueue(ndev, mvq);
1186 qp_destroy(ndev, &mvq->vqqp);
1187 qp_destroy(ndev, &mvq->fwqp);
1188 cq_destroy(ndev, mvq->index);
1189 mvq->initialized = false;
1192 static int create_rqt(struct mlx5_vdpa_net *ndev)
1202 log_max_rqt = min_t(int, 1, MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
1203 if (log_max_rqt < 1)
1206 inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + (1 << log_max_rqt) * MLX5_ST_SZ_BYTES(rq_num);
1207 in = kzalloc(inlen, GFP_KERNEL);
1211 MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1212 rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1214 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1215 MLX5_SET(rqtc, rqtc, rqt_max_size, 1 << log_max_rqt);
1216 MLX5_SET(rqtc, rqtc, rqt_actual_size, 1);
1217 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1218 for (i = 0, j = 0; j < ndev->mvdev.max_vqs; j++) {
1219 if (!ndev->vqs[j].initialized)
1222 if (!vq_is_tx(ndev->vqs[j].index)) {
1223 list[i] = cpu_to_be32(ndev->vqs[j].virtq_id);
1228 err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1236 static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1238 mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1241 static int create_tir(struct mlx5_vdpa_net *ndev)
1243 #define HASH_IP_L4PORTS \
1244 (MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT | \
1245 MLX5_HASH_FIELD_SEL_L4_DPORT)
1246 static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1247 0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1248 0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1249 0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1250 0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1257 in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1261 MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1262 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1263 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1265 MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1266 MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1267 rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1268 memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1270 outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1271 MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1272 MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1273 MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1275 MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1276 MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1278 err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1283 static void destroy_tir(struct mlx5_vdpa_net *ndev)
1285 mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1288 static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev)
1290 struct mlx5_flow_destination dest[2] = {};
1291 struct mlx5_flow_table_attr ft_attr = {};
1292 struct mlx5_flow_act flow_act = {};
1293 struct mlx5_flow_namespace *ns;
1296 /* for now, one entry, match all, forward to tir */
1297 ft_attr.max_fte = 1;
1298 ft_attr.autogroup.max_num_groups = 1;
1300 ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
1302 mlx5_vdpa_warn(&ndev->mvdev, "get flow namespace\n");
1306 ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
1307 if (IS_ERR(ndev->rxft))
1308 return PTR_ERR(ndev->rxft);
1310 ndev->rx_counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1311 if (IS_ERR(ndev->rx_counter)) {
1312 err = PTR_ERR(ndev->rx_counter);
1316 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT;
1317 dest[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1318 dest[0].tir_num = ndev->res.tirn;
1319 dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1320 dest[1].counter_id = mlx5_fc_id(ndev->rx_counter);
1321 ndev->rx_rule = mlx5_add_flow_rules(ndev->rxft, NULL, &flow_act, dest, 2);
1322 if (IS_ERR(ndev->rx_rule)) {
1323 err = PTR_ERR(ndev->rx_rule);
1324 ndev->rx_rule = NULL;
1331 mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter);
1333 mlx5_destroy_flow_table(ndev->rxft);
1337 static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev)
1342 mlx5_del_flow_rules(ndev->rx_rule);
1343 mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter);
1344 mlx5_destroy_flow_table(ndev->rxft);
1346 ndev->rx_rule = NULL;
1349 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
1351 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1352 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1353 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1355 if (unlikely(!mvq->ready))
1358 iowrite16(idx, ndev->mvdev.res.kick_addr);
1361 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
1362 u64 driver_area, u64 device_area)
1364 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1365 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1366 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1368 mvq->desc_addr = desc_area;
1369 mvq->device_addr = device_area;
1370 mvq->driver_addr = driver_area;
1374 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
1376 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1377 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1378 struct mlx5_vdpa_virtqueue *mvq;
1380 mvq = &ndev->vqs[idx];
1384 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
1386 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1387 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1388 struct mlx5_vdpa_virtqueue *vq = &ndev->vqs[idx];
1393 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
1395 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1396 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1397 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1400 suspend_vq(ndev, mvq);
1405 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
1407 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1408 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1409 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1414 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
1415 const struct vdpa_vq_state *state)
1417 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1418 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1419 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1421 if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
1422 mlx5_vdpa_warn(mvdev, "can't modify available index\n");
1426 mvq->used_idx = state->avail_index;
1427 mvq->avail_idx = state->avail_index;
1431 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
1433 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1434 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1435 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1436 struct mlx5_virtq_attr attr;
1439 /* If the virtq object was destroyed, use the value saved at
1440 * the last minute of suspend_vq. This caters for userspace
1441 * that cares about emulating the index after vq is stopped.
1443 if (!mvq->initialized) {
1444 /* Firmware returns a wrong value for the available index.
1445 * Since both values should be identical, we take the value of
1446 * used_idx which is reported correctly.
1448 state->avail_index = mvq->used_idx;
1452 err = query_virtqueue(ndev, mvq, &attr);
1454 mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
1457 state->avail_index = attr.used_index;
1461 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
1466 enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
1467 MLX5_VIRTIO_NET_F_CSUM = 1 << 10,
1468 MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11,
1469 MLX5_VIRTIO_NET_F_HOST_TSO4 = 1 << 12,
1472 static u64 mlx_to_vritio_features(u16 dev_features)
1476 if (dev_features & MLX5_VIRTIO_NET_F_GUEST_CSUM)
1477 result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
1478 if (dev_features & MLX5_VIRTIO_NET_F_CSUM)
1479 result |= BIT_ULL(VIRTIO_NET_F_CSUM);
1480 if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO6)
1481 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
1482 if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO4)
1483 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
1488 static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev)
1490 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1491 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1494 dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, device_features_bits_mask);
1495 ndev->mvdev.mlx_features = mlx_to_vritio_features(dev_features);
1496 if (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, virtio_version_1_0))
1497 ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_VERSION_1);
1498 ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
1499 print_features(mvdev, ndev->mvdev.mlx_features, false);
1500 return ndev->mvdev.mlx_features;
1503 static int verify_min_features(struct mlx5_vdpa_dev *mvdev, u64 features)
1505 if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
1511 static int setup_virtqueues(struct mlx5_vdpa_net *ndev)
1516 for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); i++) {
1517 err = setup_vq(ndev, &ndev->vqs[i]);
1525 for (--i; i >= 0; i--)
1526 teardown_vq(ndev, &ndev->vqs[i]);
1531 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
1533 struct mlx5_vdpa_virtqueue *mvq;
1536 for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
1537 mvq = &ndev->vqs[i];
1538 if (!mvq->initialized)
1541 teardown_vq(ndev, mvq);
1545 /* TODO: cross-endian support */
1546 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
1548 return virtio_legacy_is_little_endian() ||
1549 (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
1552 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
1554 return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
1557 static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features)
1559 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1560 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1563 print_features(mvdev, features, true);
1565 err = verify_min_features(mvdev, features);
1569 ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
1570 ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, ndev->mtu);
1571 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
1575 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
1577 /* not implemented */
1578 mlx5_vdpa_warn(to_mvdev(vdev), "set config callback not supported\n");
1581 #define MLX5_VDPA_MAX_VQ_ENTRIES 256
1582 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
1584 return MLX5_VDPA_MAX_VQ_ENTRIES;
1587 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
1589 return VIRTIO_ID_NET;
1592 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
1594 return PCI_VENDOR_ID_MELLANOX;
1597 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
1599 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1600 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1602 print_status(mvdev, ndev->mvdev.status, false);
1603 return ndev->mvdev.status;
1606 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1608 struct mlx5_vq_restore_info *ri = &mvq->ri;
1609 struct mlx5_virtq_attr attr;
1612 if (!mvq->initialized)
1615 err = query_virtqueue(ndev, mvq, &attr);
1619 ri->avail_index = attr.available_index;
1620 ri->used_index = attr.used_index;
1621 ri->ready = mvq->ready;
1622 ri->num_ent = mvq->num_ent;
1623 ri->desc_addr = mvq->desc_addr;
1624 ri->device_addr = mvq->device_addr;
1625 ri->driver_addr = mvq->driver_addr;
1626 ri->cb = mvq->event_cb;
1631 static int save_channels_info(struct mlx5_vdpa_net *ndev)
1635 for (i = 0; i < ndev->mvdev.max_vqs; i++) {
1636 memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
1637 save_channel_info(ndev, &ndev->vqs[i]);
1642 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
1646 for (i = 0; i < ndev->mvdev.max_vqs; i++)
1647 memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
1650 static void restore_channels_info(struct mlx5_vdpa_net *ndev)
1652 struct mlx5_vdpa_virtqueue *mvq;
1653 struct mlx5_vq_restore_info *ri;
1656 mlx5_clear_vqs(ndev);
1658 for (i = 0; i < ndev->mvdev.max_vqs; i++) {
1659 mvq = &ndev->vqs[i];
1664 mvq->avail_idx = ri->avail_index;
1665 mvq->used_idx = ri->used_index;
1666 mvq->ready = ri->ready;
1667 mvq->num_ent = ri->num_ent;
1668 mvq->desc_addr = ri->desc_addr;
1669 mvq->device_addr = ri->device_addr;
1670 mvq->driver_addr = ri->driver_addr;
1671 mvq->event_cb = ri->cb;
1675 static int mlx5_vdpa_change_map(struct mlx5_vdpa_net *ndev, struct vhost_iotlb *iotlb)
1680 err = save_channels_info(ndev);
1684 teardown_driver(ndev);
1685 mlx5_vdpa_destroy_mr(&ndev->mvdev);
1686 err = mlx5_vdpa_create_mr(&ndev->mvdev, iotlb);
1690 if (!(ndev->mvdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
1693 restore_channels_info(ndev);
1694 err = setup_driver(ndev);
1701 mlx5_vdpa_destroy_mr(&ndev->mvdev);
1706 static int setup_driver(struct mlx5_vdpa_net *ndev)
1710 mutex_lock(&ndev->reslock);
1712 mlx5_vdpa_warn(&ndev->mvdev, "setup driver called for already setup driver\n");
1716 err = setup_virtqueues(ndev);
1718 mlx5_vdpa_warn(&ndev->mvdev, "setup_virtqueues\n");
1722 err = create_rqt(ndev);
1724 mlx5_vdpa_warn(&ndev->mvdev, "create_rqt\n");
1728 err = create_tir(ndev);
1730 mlx5_vdpa_warn(&ndev->mvdev, "create_tir\n");
1734 err = add_fwd_to_tir(ndev);
1736 mlx5_vdpa_warn(&ndev->mvdev, "add_fwd_to_tir\n");
1740 mutex_unlock(&ndev->reslock);
1749 teardown_virtqueues(ndev);
1751 mutex_unlock(&ndev->reslock);
1755 static void teardown_driver(struct mlx5_vdpa_net *ndev)
1757 mutex_lock(&ndev->reslock);
1761 remove_fwd_to_tir(ndev);
1764 teardown_virtqueues(ndev);
1765 ndev->setup = false;
1767 mutex_unlock(&ndev->reslock);
1770 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
1772 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1773 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1776 print_status(mvdev, status, true);
1778 mlx5_vdpa_info(mvdev, "performing device reset\n");
1779 teardown_driver(ndev);
1780 mlx5_vdpa_destroy_mr(&ndev->mvdev);
1781 ndev->mvdev.status = 0;
1782 ndev->mvdev.mlx_features = 0;
1783 ++mvdev->generation;
1787 if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
1788 if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
1789 err = setup_driver(ndev);
1791 mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
1795 mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
1800 ndev->mvdev.status = status;
1804 mlx5_vdpa_destroy_mr(&ndev->mvdev);
1805 ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
1808 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
1810 return sizeof(struct virtio_net_config);
1813 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
1816 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1817 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1819 if (offset + len <= sizeof(struct virtio_net_config))
1820 memcpy(buf, (u8 *)&ndev->config + offset, len);
1823 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
1829 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
1831 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1833 return mvdev->generation;
1836 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb)
1838 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1839 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1843 err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map);
1845 mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
1850 return mlx5_vdpa_change_map(ndev, iotlb);
1855 static void mlx5_vdpa_free(struct vdpa_device *vdev)
1857 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1858 struct mlx5_core_dev *pfmdev;
1859 struct mlx5_vdpa_net *ndev;
1861 ndev = to_mlx5_vdpa_ndev(mvdev);
1863 free_resources(ndev);
1864 if (!is_zero_ether_addr(ndev->config.mac)) {
1865 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
1866 mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
1868 mlx5_vdpa_free_resources(&ndev->mvdev);
1869 mutex_destroy(&ndev->reslock);
1872 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
1874 struct vdpa_notification_area ret = {};
1879 static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx)
1884 static const struct vdpa_config_ops mlx5_vdpa_ops = {
1885 .set_vq_address = mlx5_vdpa_set_vq_address,
1886 .set_vq_num = mlx5_vdpa_set_vq_num,
1887 .kick_vq = mlx5_vdpa_kick_vq,
1888 .set_vq_cb = mlx5_vdpa_set_vq_cb,
1889 .set_vq_ready = mlx5_vdpa_set_vq_ready,
1890 .get_vq_ready = mlx5_vdpa_get_vq_ready,
1891 .set_vq_state = mlx5_vdpa_set_vq_state,
1892 .get_vq_state = mlx5_vdpa_get_vq_state,
1893 .get_vq_notification = mlx5_get_vq_notification,
1894 .get_vq_irq = mlx5_get_vq_irq,
1895 .get_vq_align = mlx5_vdpa_get_vq_align,
1896 .get_features = mlx5_vdpa_get_features,
1897 .set_features = mlx5_vdpa_set_features,
1898 .set_config_cb = mlx5_vdpa_set_config_cb,
1899 .get_vq_num_max = mlx5_vdpa_get_vq_num_max,
1900 .get_device_id = mlx5_vdpa_get_device_id,
1901 .get_vendor_id = mlx5_vdpa_get_vendor_id,
1902 .get_status = mlx5_vdpa_get_status,
1903 .set_status = mlx5_vdpa_set_status,
1904 .get_config_size = mlx5_vdpa_get_config_size,
1905 .get_config = mlx5_vdpa_get_config,
1906 .set_config = mlx5_vdpa_set_config,
1907 .get_generation = mlx5_vdpa_get_generation,
1908 .set_map = mlx5_vdpa_set_map,
1909 .free = mlx5_vdpa_free,
1912 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
1917 err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
1921 *mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
1925 static int alloc_resources(struct mlx5_vdpa_net *ndev)
1927 struct mlx5_vdpa_net_resources *res = &ndev->res;
1931 mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
1935 err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
1939 err = create_tis(ndev);
1948 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
1952 static void free_resources(struct mlx5_vdpa_net *ndev)
1954 struct mlx5_vdpa_net_resources *res = &ndev->res;
1960 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
1964 static void init_mvqs(struct mlx5_vdpa_net *ndev)
1966 struct mlx5_vdpa_virtqueue *mvq;
1969 for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); ++i) {
1970 mvq = &ndev->vqs[i];
1971 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
1974 mvq->fwqp.fw = true;
1976 for (; i < ndev->mvdev.max_vqs; i++) {
1977 mvq = &ndev->vqs[i];
1978 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
1984 struct mlx5_vdpa_mgmtdev {
1985 struct vdpa_mgmt_dev mgtdev;
1986 struct mlx5_adev *madev;
1987 struct mlx5_vdpa_net *ndev;
1990 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
1992 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
1993 struct virtio_net_config *config;
1994 struct mlx5_core_dev *pfmdev;
1995 struct mlx5_vdpa_dev *mvdev;
1996 struct mlx5_vdpa_net *ndev;
1997 struct mlx5_core_dev *mdev;
2004 mdev = mgtdev->madev->mdev;
2005 /* we save one virtqueue for control virtqueue should we require it */
2006 max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues);
2007 max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
2009 ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
2012 return PTR_ERR(ndev);
2014 ndev->mvdev.max_vqs = max_vqs;
2015 mvdev = &ndev->mvdev;
2018 mutex_init(&ndev->reslock);
2019 config = &ndev->config;
2020 err = query_mtu(mdev, &ndev->mtu);
2024 err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
2028 if (!is_zero_ether_addr(config->mac)) {
2029 pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
2030 err = mlx5_mpfs_add_mac(pfmdev, config->mac);
2035 mvdev->vdev.dma_dev = mdev->device;
2036 err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
2040 err = alloc_resources(ndev);
2044 mvdev->vdev.mdev = &mgtdev->mgtdev;
2045 err = _vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs));
2049 mgtdev->ndev = ndev;
2053 free_resources(ndev);
2055 mlx5_vdpa_free_resources(&ndev->mvdev);
2057 if (!is_zero_ether_addr(config->mac))
2058 mlx5_mpfs_del_mac(pfmdev, config->mac);
2060 mutex_destroy(&ndev->reslock);
2061 put_device(&mvdev->vdev.dev);
2065 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
2067 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
2069 _vdpa_unregister_device(dev);
2070 mgtdev->ndev = NULL;
2073 static const struct vdpa_mgmtdev_ops mdev_ops = {
2074 .dev_add = mlx5_vdpa_dev_add,
2075 .dev_del = mlx5_vdpa_dev_del,
2078 static struct virtio_device_id id_table[] = {
2079 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
2083 static int mlx5v_probe(struct auxiliary_device *adev,
2084 const struct auxiliary_device_id *id)
2087 struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
2088 struct mlx5_core_dev *mdev = madev->mdev;
2089 struct mlx5_vdpa_mgmtdev *mgtdev;
2092 mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
2096 mgtdev->mgtdev.ops = &mdev_ops;
2097 mgtdev->mgtdev.device = mdev->device;
2098 mgtdev->mgtdev.id_table = id_table;
2099 mgtdev->madev = madev;
2101 err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
2105 dev_set_drvdata(&adev->dev, mgtdev);
2114 static void mlx5v_remove(struct auxiliary_device *adev)
2116 struct mlx5_vdpa_mgmtdev *mgtdev;
2118 mgtdev = dev_get_drvdata(&adev->dev);
2119 vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
2123 static const struct auxiliary_device_id mlx5v_id_table[] = {
2124 { .name = MLX5_ADEV_NAME ".vnet", },
2128 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
2130 static struct auxiliary_driver mlx5v_driver = {
2132 .probe = mlx5v_probe,
2133 .remove = mlx5v_remove,
2134 .id_table = mlx5v_id_table,
2137 module_auxiliary_driver(mlx5v_driver);