1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
4 #include <linux/module.h>
5 #include <linux/vdpa.h>
6 #include <linux/vringh.h>
7 #include <uapi/linux/virtio_net.h>
8 #include <uapi/linux/virtio_ids.h>
9 #include <linux/virtio_config.h>
10 #include <linux/auxiliary_bus.h>
11 #include <linux/mlx5/cq.h>
12 #include <linux/mlx5/qp.h>
13 #include <linux/mlx5/device.h>
14 #include <linux/mlx5/driver.h>
15 #include <linux/mlx5/vport.h>
16 #include <linux/mlx5/fs.h>
17 #include <linux/mlx5/mlx5_ifc_vdpa.h>
18 #include <linux/mlx5/mpfs.h>
19 #include "mlx5_vdpa.h"
21 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
22 MODULE_DESCRIPTION("Mellanox VDPA driver");
23 MODULE_LICENSE("Dual BSD/GPL");
25 #define to_mlx5_vdpa_ndev(__mvdev) \
26 container_of(__mvdev, struct mlx5_vdpa_net, mvdev)
27 #define to_mvdev(__vdev) container_of((__vdev), struct mlx5_vdpa_dev, vdev)
29 #define VALID_FEATURES_MASK \
30 (BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | \
31 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) | \
32 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) | \
33 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
34 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) | \
35 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | \
36 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) | \
37 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) | \
38 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) | \
39 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) | \
40 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) | \
41 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) | \
42 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
44 #define VALID_STATUS_MASK \
45 (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK | \
46 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
48 #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature)))
50 struct mlx5_vdpa_net_resources {
58 struct mlx5_vdpa_cq_buf {
59 struct mlx5_frag_buf_ctrl fbc;
60 struct mlx5_frag_buf frag_buf;
66 struct mlx5_core_cq mcq;
67 struct mlx5_vdpa_cq_buf buf;
72 struct mlx5_vdpa_umem {
73 struct mlx5_frag_buf_ctrl fbc;
74 struct mlx5_frag_buf frag_buf;
80 struct mlx5_core_qp mqp;
81 struct mlx5_frag_buf frag_buf;
87 struct mlx5_vq_restore_info {
98 struct mlx5_vdpa_virtqueue {
105 /* Resources for implementing the notification channel from the device
106 * to the driver. fwqp is the firmware end of an RC connection; the
107 * other end is vqqp used by the driver. cq is is where completions are
110 struct mlx5_vdpa_cq cq;
111 struct mlx5_vdpa_qp fwqp;
112 struct mlx5_vdpa_qp vqqp;
114 /* umem resources are required for the virtqueue operation. They're use
115 * is internal and they must be provided by the driver.
117 struct mlx5_vdpa_umem umem1;
118 struct mlx5_vdpa_umem umem2;
119 struct mlx5_vdpa_umem umem3;
124 struct mlx5_vdpa_net *ndev;
129 /* keep last in the struct */
130 struct mlx5_vq_restore_info ri;
133 /* We will remove this limitation once mlx5_vdpa_alloc_resources()
134 * provides for driver space allocation
136 #define MLX5_MAX_SUPPORTED_VQS 16
138 static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
140 if (unlikely(idx > mvdev->max_idx))
146 struct mlx5_vdpa_net {
147 struct mlx5_vdpa_dev mvdev;
148 struct mlx5_vdpa_net_resources res;
149 struct virtio_net_config config;
150 struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS];
151 struct vdpa_callback event_cbs[MLX5_MAX_SUPPORTED_VQS + 1];
153 /* Serialize vq resources creation and destruction. This is required
154 * since memory map might change and we need to destroy and create
155 * resources while driver in operational.
157 struct mutex reslock;
158 struct mlx5_flow_table *rxft;
159 struct mlx5_fc *rx_counter;
160 struct mlx5_flow_handle *rx_rule;
166 static void free_resources(struct mlx5_vdpa_net *ndev);
167 static void init_mvqs(struct mlx5_vdpa_net *ndev);
168 static int setup_driver(struct mlx5_vdpa_dev *mvdev);
169 static void teardown_driver(struct mlx5_vdpa_net *ndev);
171 static bool mlx5_vdpa_debug;
173 #define MLX5_CVQ_MAX_ENT 16
175 #define MLX5_LOG_VIO_FLAG(_feature) \
177 if (features & BIT_ULL(_feature)) \
178 mlx5_vdpa_info(mvdev, "%s\n", #_feature); \
181 #define MLX5_LOG_VIO_STAT(_status) \
183 if (status & (_status)) \
184 mlx5_vdpa_info(mvdev, "%s\n", #_status); \
187 /* TODO: cross-endian support */
188 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
190 return virtio_legacy_is_little_endian() ||
191 (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
194 static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
196 return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
199 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
201 return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
204 static inline u32 mlx5_vdpa_max_qps(int max_vqs)
209 static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
211 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
214 return 2 * mlx5_vdpa_max_qps(mvdev->max_vqs);
217 static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
219 return idx == ctrl_vq_idx(mvdev);
222 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
224 if (status & ~VALID_STATUS_MASK)
225 mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
226 status & ~VALID_STATUS_MASK);
228 if (!mlx5_vdpa_debug)
231 mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
232 if (set && !status) {
233 mlx5_vdpa_info(mvdev, "driver resets the device\n");
237 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
238 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
239 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
240 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
241 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
242 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
245 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
247 if (features & ~VALID_FEATURES_MASK)
248 mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
249 features & ~VALID_FEATURES_MASK);
251 if (!mlx5_vdpa_debug)
254 mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
256 mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
258 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
259 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
260 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
261 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
262 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
263 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
264 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
265 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
266 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
267 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
268 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
269 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
270 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
271 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
272 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
273 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
274 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
275 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
276 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
277 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
278 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
279 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
280 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
281 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
282 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
283 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
284 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
285 MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
286 MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
287 MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
288 MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
289 MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
290 MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
291 MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
294 static int create_tis(struct mlx5_vdpa_net *ndev)
296 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
297 u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
301 tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
302 MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
303 err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
305 mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
310 static void destroy_tis(struct mlx5_vdpa_net *ndev)
312 mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
315 #define MLX5_VDPA_CQE_SIZE 64
316 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
318 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
320 struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
321 u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
322 u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
325 err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
326 ndev->mvdev.mdev->priv.numa_node);
330 mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
332 buf->cqe_size = MLX5_VDPA_CQE_SIZE;
338 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
340 struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
342 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
343 ndev->mvdev.mdev->priv.numa_node);
346 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
348 mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
351 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
353 return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
356 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
358 struct mlx5_cqe64 *cqe64;
362 for (i = 0; i < buf->nent; i++) {
363 cqe = get_cqe(vcq, i);
365 cqe64->op_own = MLX5_CQE_INVALID << 4;
369 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
371 struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
373 if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
374 !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
380 static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
383 vqp->db.db[0] = cpu_to_be32(vqp->head);
386 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
387 struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
389 struct mlx5_vdpa_qp *vqp;
393 vqp = fw ? &mvq->fwqp : &mvq->vqqp;
394 MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
395 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
397 /* Firmware QP is allocated by the driver for the firmware's
398 * use so we can skip part of the params as they will be chosen by firmware
400 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
401 MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
402 MLX5_SET(qpc, qpc, no_sq, 1);
406 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
407 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
408 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
409 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
410 MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
411 MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
412 MLX5_SET(qpc, qpc, no_sq, 1);
413 MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
414 MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
415 MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
416 pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
417 mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
420 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
422 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
423 num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
424 ndev->mvdev.mdev->priv.numa_node);
427 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
429 mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
432 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
433 struct mlx5_vdpa_qp *vqp)
435 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
436 int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
437 u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
444 err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
448 err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
451 inlen += vqp->frag_buf.npages * sizeof(__be64);
454 in = kzalloc(inlen, GFP_KERNEL);
460 qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
461 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
462 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
463 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
464 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
465 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
467 MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
468 MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
469 err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
474 vqp->mqp.uid = ndev->mvdev.res.uid;
475 vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
478 rx_post(vqp, mvq->num_ent);
484 mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
487 rq_buf_free(ndev, vqp);
492 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
494 u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
496 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
497 MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
498 MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
499 if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
500 mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
502 mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
503 rq_buf_free(ndev, vqp);
507 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
509 return get_sw_cqe(cq, cq->mcq.cons_index);
512 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
514 struct mlx5_cqe64 *cqe64;
516 cqe64 = next_cqe_sw(vcq);
520 vcq->mcq.cons_index++;
524 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
526 struct mlx5_vdpa_net *ndev = mvq->ndev;
527 struct vdpa_callback *event_cb;
529 event_cb = &ndev->event_cbs[mvq->index];
530 mlx5_cq_set_ci(&mvq->cq.mcq);
532 /* make sure CQ cosumer update is visible to the hardware before updating
533 * RX doorbell record.
536 rx_post(&mvq->vqqp, num);
537 if (event_cb->callback)
538 event_cb->callback(event_cb->private);
541 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
543 struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
544 struct mlx5_vdpa_net *ndev = mvq->ndev;
545 void __iomem *uar_page = ndev->mvdev.res.uar->map;
548 while (!mlx5_vdpa_poll_one(&mvq->cq)) {
550 if (num > mvq->num_ent / 2) {
551 /* If completions keep coming while we poll, we want to
552 * let the hardware know that we consumed them by
553 * updating the doorbell record. We also let vdpa core
554 * know about this so it passes it on the virtio driver
557 mlx5_vdpa_handle_completions(mvq, num);
563 mlx5_vdpa_handle_completions(mvq, num);
565 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
568 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
570 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
571 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
572 void __iomem *uar_page = ndev->mvdev.res.uar->map;
573 u32 out[MLX5_ST_SZ_DW(create_cq_out)];
574 struct mlx5_vdpa_cq *vcq = &mvq->cq;
582 err = mlx5_db_alloc(mdev, &vcq->db);
586 vcq->mcq.set_ci_db = vcq->db.db;
587 vcq->mcq.arm_db = vcq->db.db + 1;
588 vcq->mcq.cqe_sz = 64;
590 err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
594 cq_frag_buf_init(vcq, &vcq->buf);
596 inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
597 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
598 in = kzalloc(inlen, GFP_KERNEL);
604 MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
605 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
606 mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
608 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
609 MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
611 /* Use vector 0 by default. Consider adding code to choose least used
614 err = mlx5_vector2eqn(mdev, 0, &eqn);
618 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
619 MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
620 MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
621 MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
622 MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
624 err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
628 vcq->mcq.comp = mlx5_vdpa_cq_comp;
630 vcq->mcq.set_ci_db = vcq->db.db;
631 vcq->mcq.arm_db = vcq->db.db + 1;
632 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
639 cq_frag_buf_free(ndev, &vcq->buf);
641 mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
645 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
647 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
648 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
649 struct mlx5_vdpa_cq *vcq = &mvq->cq;
651 if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
652 mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
655 cq_frag_buf_free(ndev, &vcq->buf);
656 mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
659 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
660 struct mlx5_vdpa_umem **umemp)
662 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
668 p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a);
669 p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b);
670 *umemp = &mvq->umem1;
673 p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a);
674 p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b);
675 *umemp = &mvq->umem2;
678 p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a);
679 p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b);
680 *umemp = &mvq->umem3;
683 (*umemp)->size = p_a * mvq->num_ent + p_b;
686 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
688 mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
691 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
694 u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
699 struct mlx5_vdpa_umem *umem;
701 set_umem_size(ndev, mvq, num, &umem);
702 err = umem_frag_buf_alloc(ndev, umem, umem->size);
706 inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
708 in = kzalloc(inlen, GFP_KERNEL);
714 MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
715 MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
716 um = MLX5_ADDR_OF(create_umem_in, in, umem);
717 MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
718 MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
720 pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
721 mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
723 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
725 mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
730 umem->id = MLX5_GET(create_umem_out, out, umem_id);
737 umem_frag_buf_free(ndev, umem);
741 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
743 u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
744 u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
745 struct mlx5_vdpa_umem *umem;
759 MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
760 MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
761 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
764 umem_frag_buf_free(ndev, umem);
767 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
772 for (num = 1; num <= 3; num++) {
773 err = create_umem(ndev, mvq, num);
780 for (num--; num > 0; num--)
781 umem_destroy(ndev, mvq, num);
786 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
790 for (num = 3; num > 0; num--)
791 umem_destroy(ndev, mvq, num);
794 static int get_queue_type(struct mlx5_vdpa_net *ndev)
798 type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
800 /* prefer split queue */
801 if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)
802 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
804 WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED));
806 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
809 static bool vq_is_tx(u16 idx)
814 static u16 get_features_12_3(u64 features)
816 return (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << 9) |
817 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << 8) |
818 (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << 7) |
819 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_CSUM)) << 6);
822 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
824 int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
825 u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
832 err = umems_create(ndev, mvq);
836 in = kzalloc(inlen, GFP_KERNEL);
842 cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
844 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
845 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
846 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
848 obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
849 MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
850 MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
851 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
852 get_features_12_3(ndev->mvdev.actual_features));
853 vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
854 MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
856 if (vq_is_tx(mvq->index))
857 MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
859 MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
860 MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
861 MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
862 MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
863 MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
864 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
865 MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
866 MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
867 MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
868 MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey.key);
869 MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
870 MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
871 MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
872 MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
873 MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
874 MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
875 MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
876 if (MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, eth_frame_offload_type))
877 MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 1);
879 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
884 mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
891 umems_destroy(ndev, mvq);
895 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
897 u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
898 u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
900 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
901 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
902 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
903 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
904 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
905 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
906 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
907 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
910 umems_destroy(ndev, mvq);
913 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
915 return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
918 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
920 return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
923 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
924 int *outlen, u32 qpn, u32 rqpn)
930 case MLX5_CMD_OP_2RST_QP:
931 *inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
932 *outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
933 *in = kzalloc(*inlen, GFP_KERNEL);
934 *out = kzalloc(*outlen, GFP_KERNEL);
938 MLX5_SET(qp_2rst_in, *in, opcode, cmd);
939 MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
940 MLX5_SET(qp_2rst_in, *in, qpn, qpn);
942 case MLX5_CMD_OP_RST2INIT_QP:
943 *inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
944 *outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
945 *in = kzalloc(*inlen, GFP_KERNEL);
946 *out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
950 MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
951 MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
952 MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
953 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
954 MLX5_SET(qpc, qpc, remote_qpn, rqpn);
955 MLX5_SET(qpc, qpc, rwe, 1);
956 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
957 MLX5_SET(ads, pp, vhca_port_num, 1);
959 case MLX5_CMD_OP_INIT2RTR_QP:
960 *inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
961 *outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
962 *in = kzalloc(*inlen, GFP_KERNEL);
963 *out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
967 MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
968 MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
969 MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
970 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
971 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
972 MLX5_SET(qpc, qpc, log_msg_max, 30);
973 MLX5_SET(qpc, qpc, remote_qpn, rqpn);
974 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
975 MLX5_SET(ads, pp, fl, 1);
977 case MLX5_CMD_OP_RTR2RTS_QP:
978 *inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
979 *outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
980 *in = kzalloc(*inlen, GFP_KERNEL);
981 *out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
985 MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
986 MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
987 MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
988 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
989 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
990 MLX5_SET(ads, pp, ack_timeout, 14);
991 MLX5_SET(qpc, qpc, retry_count, 7);
992 MLX5_SET(qpc, qpc, rnr_retry, 7);
1008 static void free_inout(void *in, void *out)
1014 /* Two QPs are used by each virtqueue. One is used by the driver and one by
1015 * firmware. The fw argument indicates whether the subjected QP is the one used
1018 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
1026 alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
1030 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
1031 free_inout(in, out);
1035 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1039 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
1043 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
1047 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
1051 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1055 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1059 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1063 return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1066 struct mlx5_virtq_attr {
1068 u16 available_index;
1072 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
1073 struct mlx5_virtq_attr *attr)
1075 int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
1076 u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
1082 out = kzalloc(outlen, GFP_KERNEL);
1086 cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1088 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1089 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1090 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1091 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1092 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
1096 obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
1097 memset(attr, 0, sizeof(*attr));
1098 attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1099 attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1100 attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
1109 static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
1111 int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
1112 u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
1118 in = kzalloc(inlen, GFP_KERNEL);
1122 cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1124 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1125 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1126 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1127 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1129 obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
1130 MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select,
1131 MLX5_VIRTQ_MODIFY_MASK_STATE);
1132 MLX5_SET(virtio_net_q_object, obj_context, state, state);
1133 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
1136 mvq->fw_state = state;
1141 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1143 u16 idx = mvq->index;
1149 if (mvq->initialized)
1152 err = cq_create(ndev, idx, mvq->num_ent);
1156 err = qp_create(ndev, mvq, &mvq->fwqp);
1160 err = qp_create(ndev, mvq, &mvq->vqqp);
1164 err = connect_qps(ndev, mvq);
1168 err = create_virtqueue(ndev, mvq);
1173 err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1175 mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
1181 mvq->initialized = true;
1185 qp_destroy(ndev, &mvq->vqqp);
1187 qp_destroy(ndev, &mvq->fwqp);
1189 cq_destroy(ndev, idx);
1193 static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1195 struct mlx5_virtq_attr attr;
1197 if (!mvq->initialized)
1200 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1203 if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
1204 mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
1206 if (query_virtqueue(ndev, mvq, &attr)) {
1207 mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1210 mvq->avail_idx = attr.available_index;
1211 mvq->used_idx = attr.used_index;
1214 static void suspend_vqs(struct mlx5_vdpa_net *ndev)
1218 for (i = 0; i < MLX5_MAX_SUPPORTED_VQS; i++)
1219 suspend_vq(ndev, &ndev->vqs[i]);
1222 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1224 if (!mvq->initialized)
1227 suspend_vq(ndev, mvq);
1228 destroy_virtqueue(ndev, mvq);
1229 qp_destroy(ndev, &mvq->vqqp);
1230 qp_destroy(ndev, &mvq->fwqp);
1231 cq_destroy(ndev, mvq->index);
1232 mvq->initialized = false;
1235 static int create_rqt(struct mlx5_vdpa_net *ndev)
1245 max_rqt = min_t(int, MLX5_MAX_SUPPORTED_VQS / 2,
1246 1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
1250 inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + max_rqt * MLX5_ST_SZ_BYTES(rq_num);
1251 in = kzalloc(inlen, GFP_KERNEL);
1255 MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1256 rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1258 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1259 MLX5_SET(rqtc, rqtc, rqt_max_size, max_rqt);
1260 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1261 for (i = 0, j = 0; j < max_rqt; j++) {
1262 if (!ndev->vqs[j].initialized)
1265 if (!vq_is_tx(ndev->vqs[j].index)) {
1266 list[i] = cpu_to_be32(ndev->vqs[j].virtq_id);
1270 MLX5_SET(rqtc, rqtc, rqt_actual_size, i);
1272 err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1280 #define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
1282 static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
1292 max_rqt = min_t(int, ndev->cur_num_vqs / 2,
1293 1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
1297 inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + max_rqt * MLX5_ST_SZ_BYTES(rq_num);
1298 in = kzalloc(inlen, GFP_KERNEL);
1302 MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
1303 MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
1304 rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
1305 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1307 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1308 for (i = 0, j = 0; j < num; j++) {
1309 if (!ndev->vqs[j].initialized)
1312 if (!vq_is_tx(ndev->vqs[j].index)) {
1313 list[i] = cpu_to_be32(ndev->vqs[j].virtq_id);
1317 MLX5_SET(rqtc, rqtc, rqt_actual_size, i);
1318 err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
1326 static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1328 mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1331 static int create_tir(struct mlx5_vdpa_net *ndev)
1333 #define HASH_IP_L4PORTS \
1334 (MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT | \
1335 MLX5_HASH_FIELD_SEL_L4_DPORT)
1336 static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1337 0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1338 0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1339 0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1340 0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1347 in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1351 MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1352 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1353 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1355 MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1356 MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1357 rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1358 memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1360 outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1361 MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1362 MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1363 MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1365 MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1366 MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1368 err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1373 static void destroy_tir(struct mlx5_vdpa_net *ndev)
1375 mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1378 static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev)
1380 struct mlx5_flow_destination dest[2] = {};
1381 struct mlx5_flow_table_attr ft_attr = {};
1382 struct mlx5_flow_act flow_act = {};
1383 struct mlx5_flow_namespace *ns;
1386 /* for now, one entry, match all, forward to tir */
1387 ft_attr.max_fte = 1;
1388 ft_attr.autogroup.max_num_groups = 1;
1390 ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
1392 mlx5_vdpa_warn(&ndev->mvdev, "get flow namespace\n");
1396 ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
1397 if (IS_ERR(ndev->rxft))
1398 return PTR_ERR(ndev->rxft);
1400 ndev->rx_counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1401 if (IS_ERR(ndev->rx_counter)) {
1402 err = PTR_ERR(ndev->rx_counter);
1406 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT;
1407 dest[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1408 dest[0].tir_num = ndev->res.tirn;
1409 dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1410 dest[1].counter_id = mlx5_fc_id(ndev->rx_counter);
1411 ndev->rx_rule = mlx5_add_flow_rules(ndev->rxft, NULL, &flow_act, dest, 2);
1412 if (IS_ERR(ndev->rx_rule)) {
1413 err = PTR_ERR(ndev->rx_rule);
1414 ndev->rx_rule = NULL;
1421 mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter);
1423 mlx5_destroy_flow_table(ndev->rxft);
1427 static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev)
1432 mlx5_del_flow_rules(ndev->rx_rule);
1433 mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter);
1434 mlx5_destroy_flow_table(ndev->rxft);
1436 ndev->rx_rule = NULL;
1439 static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1441 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1442 struct mlx5_control_vq *cvq = &mvdev->cvq;
1443 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1444 struct mlx5_core_dev *pfmdev;
1448 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
1450 case VIRTIO_NET_CTRL_MAC_ADDR_SET:
1451 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
1452 if (read != ETH_ALEN)
1455 if (!memcmp(ndev->config.mac, mac, 6)) {
1456 status = VIRTIO_NET_OK;
1460 if (!is_zero_ether_addr(ndev->config.mac)) {
1461 if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1462 mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
1468 if (mlx5_mpfs_add_mac(pfmdev, mac)) {
1469 mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
1474 memcpy(ndev->config.mac, mac, ETH_ALEN);
1475 status = VIRTIO_NET_OK;
1485 static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
1487 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1488 int cur_qps = ndev->cur_num_vqs / 2;
1492 if (cur_qps > newqps) {
1493 err = modify_rqt(ndev, 2 * newqps);
1497 for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--)
1498 teardown_vq(ndev, &ndev->vqs[i]);
1500 ndev->cur_num_vqs = 2 * newqps;
1502 ndev->cur_num_vqs = 2 * newqps;
1503 for (i = cur_qps * 2; i < 2 * newqps; i++) {
1504 err = setup_vq(ndev, &ndev->vqs[i]);
1508 err = modify_rqt(ndev, 2 * newqps);
1515 for (--i; i >= cur_qps; --i)
1516 teardown_vq(ndev, &ndev->vqs[i]);
1521 static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1523 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1524 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1525 struct mlx5_control_vq *cvq = &mvdev->cvq;
1526 struct virtio_net_ctrl_mq mq;
1531 case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
1532 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
1533 if (read != sizeof(mq))
1536 newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
1537 if (ndev->cur_num_vqs == 2 * newqps) {
1538 status = VIRTIO_NET_OK;
1542 if (newqps & (newqps - 1))
1545 if (!change_num_qps(mvdev, newqps))
1546 status = VIRTIO_NET_OK;
1556 static void mlx5_cvq_kick_handler(struct work_struct *work)
1558 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1559 struct virtio_net_ctrl_hdr ctrl;
1560 struct mlx5_ctrl_wq_ent *wqent;
1561 struct mlx5_vdpa_dev *mvdev;
1562 struct mlx5_control_vq *cvq;
1563 struct mlx5_vdpa_net *ndev;
1567 wqent = container_of(work, struct mlx5_ctrl_wq_ent, work);
1568 mvdev = wqent->mvdev;
1569 ndev = to_mlx5_vdpa_ndev(mvdev);
1571 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
1578 err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
1583 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
1584 if (read != sizeof(ctrl))
1587 switch (ctrl.class) {
1588 case VIRTIO_NET_CTRL_MAC:
1589 status = handle_ctrl_mac(mvdev, ctrl.cmd);
1591 case VIRTIO_NET_CTRL_MQ:
1592 status = handle_ctrl_mq(mvdev, ctrl.cmd);
1599 /* Make sure data is written before advancing index */
1602 write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
1603 vringh_complete_iotlb(&cvq->vring, cvq->head, write);
1604 vringh_kiov_cleanup(&cvq->riov);
1605 vringh_kiov_cleanup(&cvq->wiov);
1607 if (vringh_need_notify_iotlb(&cvq->vring))
1608 vringh_notify(&cvq->vring);
1614 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
1616 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1617 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1618 struct mlx5_vdpa_virtqueue *mvq;
1619 struct mlx5_ctrl_wq_ent *wqent;
1621 if (!is_index_valid(mvdev, idx))
1624 if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
1625 if (!mvdev->cvq.ready)
1628 wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
1632 wqent->mvdev = mvdev;
1633 INIT_WORK(&wqent->work, mlx5_cvq_kick_handler);
1634 queue_work(mvdev->wq, &wqent->work);
1638 mvq = &ndev->vqs[idx];
1639 if (unlikely(!mvq->ready))
1642 iowrite16(idx, ndev->mvdev.res.kick_addr);
1645 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
1646 u64 driver_area, u64 device_area)
1648 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1649 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1650 struct mlx5_vdpa_virtqueue *mvq;
1652 if (!is_index_valid(mvdev, idx))
1655 if (is_ctrl_vq_idx(mvdev, idx)) {
1656 mvdev->cvq.desc_addr = desc_area;
1657 mvdev->cvq.device_addr = device_area;
1658 mvdev->cvq.driver_addr = driver_area;
1662 mvq = &ndev->vqs[idx];
1663 mvq->desc_addr = desc_area;
1664 mvq->device_addr = device_area;
1665 mvq->driver_addr = driver_area;
1669 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
1671 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1672 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1673 struct mlx5_vdpa_virtqueue *mvq;
1675 if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
1678 mvq = &ndev->vqs[idx];
1682 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
1684 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1685 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1687 ndev->event_cbs[idx] = *cb;
1690 static void mlx5_cvq_notify(struct vringh *vring)
1692 struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
1694 if (!cvq->event_cb.callback)
1697 cvq->event_cb.callback(cvq->event_cb.private);
1700 static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
1702 struct mlx5_control_vq *cvq = &mvdev->cvq;
1708 cvq->vring.notify = mlx5_cvq_notify;
1711 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
1713 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1714 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1715 struct mlx5_vdpa_virtqueue *mvq;
1717 if (!is_index_valid(mvdev, idx))
1720 if (is_ctrl_vq_idx(mvdev, idx)) {
1721 set_cvq_ready(mvdev, ready);
1725 mvq = &ndev->vqs[idx];
1727 suspend_vq(ndev, mvq);
1732 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
1734 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1735 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1737 if (!is_index_valid(mvdev, idx))
1740 if (is_ctrl_vq_idx(mvdev, idx))
1741 return mvdev->cvq.ready;
1743 return ndev->vqs[idx].ready;
1746 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
1747 const struct vdpa_vq_state *state)
1749 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1750 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1751 struct mlx5_vdpa_virtqueue *mvq;
1753 if (!is_index_valid(mvdev, idx))
1756 if (is_ctrl_vq_idx(mvdev, idx)) {
1757 mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
1761 mvq = &ndev->vqs[idx];
1762 if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
1763 mlx5_vdpa_warn(mvdev, "can't modify available index\n");
1767 mvq->used_idx = state->split.avail_index;
1768 mvq->avail_idx = state->split.avail_index;
1772 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
1774 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1775 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1776 struct mlx5_vdpa_virtqueue *mvq;
1777 struct mlx5_virtq_attr attr;
1780 if (!is_index_valid(mvdev, idx))
1783 if (is_ctrl_vq_idx(mvdev, idx)) {
1784 state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
1788 mvq = &ndev->vqs[idx];
1789 /* If the virtq object was destroyed, use the value saved at
1790 * the last minute of suspend_vq. This caters for userspace
1791 * that cares about emulating the index after vq is stopped.
1793 if (!mvq->initialized) {
1794 /* Firmware returns a wrong value for the available index.
1795 * Since both values should be identical, we take the value of
1796 * used_idx which is reported correctly.
1798 state->split.avail_index = mvq->used_idx;
1802 err = query_virtqueue(ndev, mvq, &attr);
1804 mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
1807 state->split.avail_index = attr.used_index;
1811 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
1816 enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
1817 MLX5_VIRTIO_NET_F_CSUM = 1 << 10,
1818 MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11,
1819 MLX5_VIRTIO_NET_F_HOST_TSO4 = 1 << 12,
1822 static u64 mlx_to_vritio_features(u16 dev_features)
1826 if (dev_features & MLX5_VIRTIO_NET_F_GUEST_CSUM)
1827 result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
1828 if (dev_features & MLX5_VIRTIO_NET_F_CSUM)
1829 result |= BIT_ULL(VIRTIO_NET_F_CSUM);
1830 if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO6)
1831 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
1832 if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO4)
1833 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
1838 static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev)
1840 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1841 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1844 dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, device_features_bits_mask);
1845 ndev->mvdev.mlx_features |= mlx_to_vritio_features(dev_features);
1846 if (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, virtio_version_1_0))
1847 ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_VERSION_1);
1848 ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
1849 ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
1850 ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
1851 ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MQ);
1853 print_features(mvdev, ndev->mvdev.mlx_features, false);
1854 return ndev->mvdev.mlx_features;
1857 static int verify_min_features(struct mlx5_vdpa_dev *mvdev, u64 features)
1859 if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
1865 static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
1867 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1868 struct mlx5_control_vq *cvq = &mvdev->cvq;
1872 for (i = 0; i < 2 * mlx5_vdpa_max_qps(mvdev->max_vqs); i++) {
1873 err = setup_vq(ndev, &ndev->vqs[i]);
1878 if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
1879 err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
1880 MLX5_CVQ_MAX_ENT, false,
1881 (struct vring_desc *)(uintptr_t)cvq->desc_addr,
1882 (struct vring_avail *)(uintptr_t)cvq->driver_addr,
1883 (struct vring_used *)(uintptr_t)cvq->device_addr);
1891 for (--i; i >= 0; i--)
1892 teardown_vq(ndev, &ndev->vqs[i]);
1897 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
1899 struct mlx5_vdpa_virtqueue *mvq;
1902 for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
1903 mvq = &ndev->vqs[i];
1904 if (!mvq->initialized)
1907 teardown_vq(ndev, mvq);
1911 static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
1913 if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
1914 if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) {
1915 /* MQ supported. CVQ index is right above the last data virtqueue's */
1916 mvdev->max_idx = mvdev->max_vqs;
1918 /* Only CVQ supportted. data virtqueues occupy indices 0 and 1.
1924 /* Two data virtqueues only: one for rx and one for tx */
1929 static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features)
1931 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1932 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1935 print_features(mvdev, features, true);
1937 err = verify_min_features(mvdev, features);
1941 ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
1942 ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, ndev->mtu);
1943 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
1944 update_cvq_info(mvdev);
1948 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
1950 /* not implemented */
1951 mlx5_vdpa_warn(to_mvdev(vdev), "set config callback not supported\n");
1954 #define MLX5_VDPA_MAX_VQ_ENTRIES 256
1955 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
1957 return MLX5_VDPA_MAX_VQ_ENTRIES;
1960 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
1962 return VIRTIO_ID_NET;
1965 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
1967 return PCI_VENDOR_ID_MELLANOX;
1970 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
1972 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1973 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1975 print_status(mvdev, ndev->mvdev.status, false);
1976 return ndev->mvdev.status;
1979 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1981 struct mlx5_vq_restore_info *ri = &mvq->ri;
1982 struct mlx5_virtq_attr attr = {};
1985 if (mvq->initialized) {
1986 err = query_virtqueue(ndev, mvq, &attr);
1991 ri->avail_index = attr.available_index;
1992 ri->used_index = attr.used_index;
1993 ri->ready = mvq->ready;
1994 ri->num_ent = mvq->num_ent;
1995 ri->desc_addr = mvq->desc_addr;
1996 ri->device_addr = mvq->device_addr;
1997 ri->driver_addr = mvq->driver_addr;
2002 static int save_channels_info(struct mlx5_vdpa_net *ndev)
2006 for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2007 memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
2008 save_channel_info(ndev, &ndev->vqs[i]);
2013 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
2017 for (i = 0; i < ndev->mvdev.max_vqs; i++)
2018 memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2021 static void restore_channels_info(struct mlx5_vdpa_net *ndev)
2023 struct mlx5_vdpa_virtqueue *mvq;
2024 struct mlx5_vq_restore_info *ri;
2027 mlx5_clear_vqs(ndev);
2029 for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2030 mvq = &ndev->vqs[i];
2035 mvq->avail_idx = ri->avail_index;
2036 mvq->used_idx = ri->used_index;
2037 mvq->ready = ri->ready;
2038 mvq->num_ent = ri->num_ent;
2039 mvq->desc_addr = ri->desc_addr;
2040 mvq->device_addr = ri->device_addr;
2041 mvq->driver_addr = ri->driver_addr;
2045 static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
2047 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2051 err = save_channels_info(ndev);
2055 teardown_driver(ndev);
2056 mlx5_vdpa_destroy_mr(mvdev);
2057 err = mlx5_vdpa_create_mr(mvdev, iotlb);
2061 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2064 restore_channels_info(ndev);
2065 err = setup_driver(mvdev);
2072 mlx5_vdpa_destroy_mr(mvdev);
2077 static int setup_driver(struct mlx5_vdpa_dev *mvdev)
2079 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2082 mutex_lock(&ndev->reslock);
2084 mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
2088 err = setup_virtqueues(mvdev);
2090 mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
2094 err = create_rqt(ndev);
2096 mlx5_vdpa_warn(mvdev, "create_rqt\n");
2100 err = create_tir(ndev);
2102 mlx5_vdpa_warn(mvdev, "create_tir\n");
2106 err = add_fwd_to_tir(ndev);
2108 mlx5_vdpa_warn(mvdev, "add_fwd_to_tir\n");
2112 mutex_unlock(&ndev->reslock);
2121 teardown_virtqueues(ndev);
2123 mutex_unlock(&ndev->reslock);
2127 static void teardown_driver(struct mlx5_vdpa_net *ndev)
2129 mutex_lock(&ndev->reslock);
2133 remove_fwd_to_tir(ndev);
2136 teardown_virtqueues(ndev);
2137 ndev->setup = false;
2139 mutex_unlock(&ndev->reslock);
2142 static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
2146 for (i = 0; i < ndev->mvdev.max_vqs; i++)
2147 ndev->vqs[i].ready = false;
2150 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
2152 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2153 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2156 print_status(mvdev, status, true);
2158 if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
2159 if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
2160 err = setup_driver(mvdev);
2162 mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
2166 mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
2171 ndev->mvdev.status = status;
2175 mlx5_vdpa_destroy_mr(&ndev->mvdev);
2176 ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
2179 static int mlx5_vdpa_reset(struct vdpa_device *vdev)
2181 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2182 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2184 print_status(mvdev, 0, true);
2185 mlx5_vdpa_info(mvdev, "performing device reset\n");
2186 teardown_driver(ndev);
2187 clear_vqs_ready(ndev);
2188 mlx5_vdpa_destroy_mr(&ndev->mvdev);
2189 ndev->mvdev.status = 0;
2190 ndev->mvdev.mlx_features = 0;
2191 memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs));
2192 ndev->mvdev.actual_features = 0;
2193 ++mvdev->generation;
2194 if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
2195 if (mlx5_vdpa_create_mr(mvdev, NULL))
2196 mlx5_vdpa_warn(mvdev, "create MR failed\n");
2202 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
2204 return sizeof(struct virtio_net_config);
2207 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
2210 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2211 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2213 if (offset + len <= sizeof(struct virtio_net_config))
2214 memcpy(buf, (u8 *)&ndev->config + offset, len);
2217 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
2223 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
2225 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2227 return mvdev->generation;
2230 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb)
2232 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2236 err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map);
2238 mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
2243 return mlx5_vdpa_change_map(mvdev, iotlb);
2248 static void mlx5_vdpa_free(struct vdpa_device *vdev)
2250 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2251 struct mlx5_core_dev *pfmdev;
2252 struct mlx5_vdpa_net *ndev;
2254 ndev = to_mlx5_vdpa_ndev(mvdev);
2256 free_resources(ndev);
2257 mlx5_vdpa_destroy_mr(mvdev);
2258 if (!is_zero_ether_addr(ndev->config.mac)) {
2259 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
2260 mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
2262 mlx5_vdpa_free_resources(&ndev->mvdev);
2263 mutex_destroy(&ndev->reslock);
2266 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
2268 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2269 struct vdpa_notification_area ret = {};
2270 struct mlx5_vdpa_net *ndev;
2273 if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
2276 /* If SF BAR size is smaller than PAGE_SIZE, do not use direct
2277 * notification to avoid the risk of mapping pages that contain BAR of more
2280 if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT)
2283 ndev = to_mlx5_vdpa_ndev(mvdev);
2284 addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr;
2286 ret.size = PAGE_SIZE;
2290 static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx)
2295 static const struct vdpa_config_ops mlx5_vdpa_ops = {
2296 .set_vq_address = mlx5_vdpa_set_vq_address,
2297 .set_vq_num = mlx5_vdpa_set_vq_num,
2298 .kick_vq = mlx5_vdpa_kick_vq,
2299 .set_vq_cb = mlx5_vdpa_set_vq_cb,
2300 .set_vq_ready = mlx5_vdpa_set_vq_ready,
2301 .get_vq_ready = mlx5_vdpa_get_vq_ready,
2302 .set_vq_state = mlx5_vdpa_set_vq_state,
2303 .get_vq_state = mlx5_vdpa_get_vq_state,
2304 .get_vq_notification = mlx5_get_vq_notification,
2305 .get_vq_irq = mlx5_get_vq_irq,
2306 .get_vq_align = mlx5_vdpa_get_vq_align,
2307 .get_features = mlx5_vdpa_get_features,
2308 .set_features = mlx5_vdpa_set_features,
2309 .set_config_cb = mlx5_vdpa_set_config_cb,
2310 .get_vq_num_max = mlx5_vdpa_get_vq_num_max,
2311 .get_device_id = mlx5_vdpa_get_device_id,
2312 .get_vendor_id = mlx5_vdpa_get_vendor_id,
2313 .get_status = mlx5_vdpa_get_status,
2314 .set_status = mlx5_vdpa_set_status,
2315 .reset = mlx5_vdpa_reset,
2316 .get_config_size = mlx5_vdpa_get_config_size,
2317 .get_config = mlx5_vdpa_get_config,
2318 .set_config = mlx5_vdpa_set_config,
2319 .get_generation = mlx5_vdpa_get_generation,
2320 .set_map = mlx5_vdpa_set_map,
2321 .free = mlx5_vdpa_free,
2324 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
2329 err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
2333 *mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
2337 static int alloc_resources(struct mlx5_vdpa_net *ndev)
2339 struct mlx5_vdpa_net_resources *res = &ndev->res;
2343 mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
2347 err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
2351 err = create_tis(ndev);
2360 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
2364 static void free_resources(struct mlx5_vdpa_net *ndev)
2366 struct mlx5_vdpa_net_resources *res = &ndev->res;
2372 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
2376 static void init_mvqs(struct mlx5_vdpa_net *ndev)
2378 struct mlx5_vdpa_virtqueue *mvq;
2381 for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); ++i) {
2382 mvq = &ndev->vqs[i];
2383 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2386 mvq->fwqp.fw = true;
2388 for (; i < ndev->mvdev.max_vqs; i++) {
2389 mvq = &ndev->vqs[i];
2390 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2396 struct mlx5_vdpa_mgmtdev {
2397 struct vdpa_mgmt_dev mgtdev;
2398 struct mlx5_adev *madev;
2399 struct mlx5_vdpa_net *ndev;
2402 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
2404 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
2405 struct virtio_net_config *config;
2406 struct mlx5_core_dev *pfmdev;
2407 struct mlx5_vdpa_dev *mvdev;
2408 struct mlx5_vdpa_net *ndev;
2409 struct mlx5_core_dev *mdev;
2416 mdev = mgtdev->madev->mdev;
2417 if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
2418 MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
2419 dev_warn(mdev->device, "missing support for split virtqueues\n");
2423 /* we save one virtqueue for control virtqueue should we require it */
2424 max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues);
2425 max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
2427 ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
2430 return PTR_ERR(ndev);
2432 ndev->mvdev.max_vqs = max_vqs;
2433 mvdev = &ndev->mvdev;
2436 mutex_init(&ndev->reslock);
2437 config = &ndev->config;
2438 err = query_mtu(mdev, &ndev->mtu);
2442 err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
2446 if (!is_zero_ether_addr(config->mac)) {
2447 pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
2448 err = mlx5_mpfs_add_mac(pfmdev, config->mac);
2452 ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MAC);
2455 config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, mlx5_vdpa_max_qps(max_vqs));
2456 mvdev->vdev.dma_dev = &mdev->pdev->dev;
2457 err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
2461 if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
2462 err = mlx5_vdpa_create_mr(mvdev, NULL);
2467 err = alloc_resources(ndev);
2471 mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_ctrl_wq");
2477 ndev->cur_num_vqs = 2 * mlx5_vdpa_max_qps(max_vqs);
2478 mvdev->vdev.mdev = &mgtdev->mgtdev;
2479 err = _vdpa_register_device(&mvdev->vdev, ndev->cur_num_vqs + 1);
2483 mgtdev->ndev = ndev;
2487 destroy_workqueue(mvdev->wq);
2489 free_resources(ndev);
2491 mlx5_vdpa_destroy_mr(mvdev);
2493 mlx5_vdpa_free_resources(&ndev->mvdev);
2495 if (!is_zero_ether_addr(config->mac))
2496 mlx5_mpfs_del_mac(pfmdev, config->mac);
2498 mutex_destroy(&ndev->reslock);
2499 put_device(&mvdev->vdev.dev);
2503 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
2505 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
2506 struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
2508 destroy_workqueue(mvdev->wq);
2509 _vdpa_unregister_device(dev);
2510 mgtdev->ndev = NULL;
2513 static const struct vdpa_mgmtdev_ops mdev_ops = {
2514 .dev_add = mlx5_vdpa_dev_add,
2515 .dev_del = mlx5_vdpa_dev_del,
2518 static struct virtio_device_id id_table[] = {
2519 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
2523 static int mlx5v_probe(struct auxiliary_device *adev,
2524 const struct auxiliary_device_id *id)
2527 struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
2528 struct mlx5_core_dev *mdev = madev->mdev;
2529 struct mlx5_vdpa_mgmtdev *mgtdev;
2532 mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
2536 mgtdev->mgtdev.ops = &mdev_ops;
2537 mgtdev->mgtdev.device = mdev->device;
2538 mgtdev->mgtdev.id_table = id_table;
2539 mgtdev->madev = madev;
2541 err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
2545 dev_set_drvdata(&adev->dev, mgtdev);
2554 static void mlx5v_remove(struct auxiliary_device *adev)
2556 struct mlx5_vdpa_mgmtdev *mgtdev;
2558 mgtdev = dev_get_drvdata(&adev->dev);
2559 vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
2563 static const struct auxiliary_device_id mlx5v_id_table[] = {
2564 { .name = MLX5_ADEV_NAME ".vnet", },
2568 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
2570 static struct auxiliary_driver mlx5v_driver = {
2572 .probe = mlx5v_probe,
2573 .remove = mlx5v_remove,
2574 .id_table = mlx5v_id_table,
2577 module_auxiliary_driver(mlx5v_driver);