1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
4 #include <linux/module.h>
5 #include <linux/vdpa.h>
6 #include <linux/vringh.h>
7 #include <uapi/linux/virtio_net.h>
8 #include <uapi/linux/virtio_ids.h>
9 #include <linux/virtio_config.h>
10 #include <linux/auxiliary_bus.h>
11 #include <linux/mlx5/cq.h>
12 #include <linux/mlx5/qp.h>
13 #include <linux/mlx5/device.h>
14 #include <linux/mlx5/driver.h>
15 #include <linux/mlx5/vport.h>
16 #include <linux/mlx5/fs.h>
17 #include <linux/mlx5/mlx5_ifc_vdpa.h>
18 #include <linux/mlx5/mpfs.h>
19 #include "mlx5_vdpa.h"
21 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
22 MODULE_DESCRIPTION("Mellanox VDPA driver");
23 MODULE_LICENSE("Dual BSD/GPL");
25 #define to_mlx5_vdpa_ndev(__mvdev) \
26 container_of(__mvdev, struct mlx5_vdpa_net, mvdev)
27 #define to_mvdev(__vdev) container_of((__vdev), struct mlx5_vdpa_dev, vdev)
29 #define VALID_FEATURES_MASK \
30 (BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | \
31 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) | \
32 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) | \
33 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
34 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) | \
35 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | \
36 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) | \
37 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) | \
38 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) | \
39 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) | \
40 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) | \
41 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) | \
42 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
44 #define VALID_STATUS_MASK \
45 (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK | \
46 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
48 struct mlx5_vdpa_net_resources {
56 struct mlx5_vdpa_cq_buf {
57 struct mlx5_frag_buf_ctrl fbc;
58 struct mlx5_frag_buf frag_buf;
64 struct mlx5_core_cq mcq;
65 struct mlx5_vdpa_cq_buf buf;
70 struct mlx5_vdpa_umem {
71 struct mlx5_frag_buf_ctrl fbc;
72 struct mlx5_frag_buf frag_buf;
78 struct mlx5_core_qp mqp;
79 struct mlx5_frag_buf frag_buf;
85 struct mlx5_vq_restore_info {
93 struct vdpa_callback cb;
97 struct mlx5_vdpa_virtqueue {
103 struct vdpa_callback event_cb;
105 /* Resources for implementing the notification channel from the device
106 * to the driver. fwqp is the firmware end of an RC connection; the
107 * other end is vqqp used by the driver. cq is is where completions are
110 struct mlx5_vdpa_cq cq;
111 struct mlx5_vdpa_qp fwqp;
112 struct mlx5_vdpa_qp vqqp;
114 /* umem resources are required for the virtqueue operation. They're use
115 * is internal and they must be provided by the driver.
117 struct mlx5_vdpa_umem umem1;
118 struct mlx5_vdpa_umem umem2;
119 struct mlx5_vdpa_umem umem3;
124 struct mlx5_vdpa_net *ndev;
129 /* keep last in the struct */
130 struct mlx5_vq_restore_info ri;
133 /* We will remove this limitation once mlx5_vdpa_alloc_resources()
134 * provides for driver space allocation
136 #define MLX5_MAX_SUPPORTED_VQS 16
138 struct mlx5_vdpa_net {
139 struct mlx5_vdpa_dev mvdev;
140 struct mlx5_vdpa_net_resources res;
141 struct virtio_net_config config;
142 struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS];
144 /* Serialize vq resources creation and destruction. This is required
145 * since memory map might change and we need to destroy and create
146 * resources while driver in operational.
148 struct mutex reslock;
149 struct mlx5_flow_table *rxft;
150 struct mlx5_fc *rx_counter;
151 struct mlx5_flow_handle *rx_rule;
156 static void free_resources(struct mlx5_vdpa_net *ndev);
157 static void init_mvqs(struct mlx5_vdpa_net *ndev);
158 static int setup_driver(struct mlx5_vdpa_net *ndev);
159 static void teardown_driver(struct mlx5_vdpa_net *ndev);
161 static bool mlx5_vdpa_debug;
163 #define MLX5_LOG_VIO_FLAG(_feature) \
165 if (features & BIT_ULL(_feature)) \
166 mlx5_vdpa_info(mvdev, "%s\n", #_feature); \
169 #define MLX5_LOG_VIO_STAT(_status) \
171 if (status & (_status)) \
172 mlx5_vdpa_info(mvdev, "%s\n", #_status); \
175 static inline u32 mlx5_vdpa_max_qps(int max_vqs)
180 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
182 if (status & ~VALID_STATUS_MASK)
183 mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
184 status & ~VALID_STATUS_MASK);
186 if (!mlx5_vdpa_debug)
189 mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
190 if (set && !status) {
191 mlx5_vdpa_info(mvdev, "driver resets the device\n");
195 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
196 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
197 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
198 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
199 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
200 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
203 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
205 if (features & ~VALID_FEATURES_MASK)
206 mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
207 features & ~VALID_FEATURES_MASK);
209 if (!mlx5_vdpa_debug)
212 mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
214 mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
216 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
217 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
218 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
219 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
220 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
221 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
222 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
223 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
224 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
225 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
226 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
227 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
228 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
229 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
230 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
231 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
232 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
233 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
234 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
235 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
236 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
237 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
238 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
239 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
240 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
241 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
242 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
243 MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
244 MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
245 MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
246 MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
247 MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
248 MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
249 MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
252 static int create_tis(struct mlx5_vdpa_net *ndev)
254 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
255 u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
259 tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
260 MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
261 err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
263 mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
268 static void destroy_tis(struct mlx5_vdpa_net *ndev)
270 mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
273 #define MLX5_VDPA_CQE_SIZE 64
274 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
276 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
278 struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
279 u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
280 u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
283 err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
284 ndev->mvdev.mdev->priv.numa_node);
288 mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
290 buf->cqe_size = MLX5_VDPA_CQE_SIZE;
296 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
298 struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
300 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
301 ndev->mvdev.mdev->priv.numa_node);
304 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
306 mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
309 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
311 return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
314 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
316 struct mlx5_cqe64 *cqe64;
320 for (i = 0; i < buf->nent; i++) {
321 cqe = get_cqe(vcq, i);
323 cqe64->op_own = MLX5_CQE_INVALID << 4;
327 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
329 struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
331 if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
332 !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
338 static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
341 vqp->db.db[0] = cpu_to_be32(vqp->head);
344 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
345 struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
347 struct mlx5_vdpa_qp *vqp;
351 vqp = fw ? &mvq->fwqp : &mvq->vqqp;
352 MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
353 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
355 /* Firmware QP is allocated by the driver for the firmware's
356 * use so we can skip part of the params as they will be chosen by firmware
358 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
359 MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
360 MLX5_SET(qpc, qpc, no_sq, 1);
364 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
365 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
366 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
367 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
368 MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
369 MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
370 MLX5_SET(qpc, qpc, no_sq, 1);
371 MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
372 MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
373 MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
374 pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
375 mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
378 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
380 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
381 num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
382 ndev->mvdev.mdev->priv.numa_node);
385 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
387 mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
390 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
391 struct mlx5_vdpa_qp *vqp)
393 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
394 int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
395 u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
402 err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
406 err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
409 inlen += vqp->frag_buf.npages * sizeof(__be64);
412 in = kzalloc(inlen, GFP_KERNEL);
418 qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
419 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
420 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
421 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
422 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
423 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
425 MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
426 MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
427 err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
432 vqp->mqp.uid = ndev->mvdev.res.uid;
433 vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
436 rx_post(vqp, mvq->num_ent);
442 mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
445 rq_buf_free(ndev, vqp);
450 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
452 u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
454 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
455 MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
456 MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
457 if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
458 mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
460 mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
461 rq_buf_free(ndev, vqp);
465 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
467 return get_sw_cqe(cq, cq->mcq.cons_index);
470 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
472 struct mlx5_cqe64 *cqe64;
474 cqe64 = next_cqe_sw(vcq);
478 vcq->mcq.cons_index++;
482 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
484 mlx5_cq_set_ci(&mvq->cq.mcq);
486 /* make sure CQ cosumer update is visible to the hardware before updating
487 * RX doorbell record.
490 rx_post(&mvq->vqqp, num);
491 if (mvq->event_cb.callback)
492 mvq->event_cb.callback(mvq->event_cb.private);
495 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
497 struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
498 struct mlx5_vdpa_net *ndev = mvq->ndev;
499 void __iomem *uar_page = ndev->mvdev.res.uar->map;
502 while (!mlx5_vdpa_poll_one(&mvq->cq)) {
504 if (num > mvq->num_ent / 2) {
505 /* If completions keep coming while we poll, we want to
506 * let the hardware know that we consumed them by
507 * updating the doorbell record. We also let vdpa core
508 * know about this so it passes it on the virtio driver
511 mlx5_vdpa_handle_completions(mvq, num);
517 mlx5_vdpa_handle_completions(mvq, num);
519 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
522 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
524 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
525 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
526 void __iomem *uar_page = ndev->mvdev.res.uar->map;
527 u32 out[MLX5_ST_SZ_DW(create_cq_out)];
528 struct mlx5_vdpa_cq *vcq = &mvq->cq;
536 err = mlx5_db_alloc(mdev, &vcq->db);
540 vcq->mcq.set_ci_db = vcq->db.db;
541 vcq->mcq.arm_db = vcq->db.db + 1;
542 vcq->mcq.cqe_sz = 64;
544 err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
548 cq_frag_buf_init(vcq, &vcq->buf);
550 inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
551 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
552 in = kzalloc(inlen, GFP_KERNEL);
558 MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
559 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
560 mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
562 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
563 MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
565 /* Use vector 0 by default. Consider adding code to choose least used
568 err = mlx5_vector2eqn(mdev, 0, &eqn);
572 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
573 MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
574 MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
575 MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
576 MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
578 err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
582 vcq->mcq.comp = mlx5_vdpa_cq_comp;
584 vcq->mcq.set_ci_db = vcq->db.db;
585 vcq->mcq.arm_db = vcq->db.db + 1;
586 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
593 cq_frag_buf_free(ndev, &vcq->buf);
595 mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
599 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
601 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
602 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
603 struct mlx5_vdpa_cq *vcq = &mvq->cq;
605 if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
606 mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
609 cq_frag_buf_free(ndev, &vcq->buf);
610 mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
613 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
614 struct mlx5_vdpa_umem **umemp)
616 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
622 p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a);
623 p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b);
624 *umemp = &mvq->umem1;
627 p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a);
628 p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b);
629 *umemp = &mvq->umem2;
632 p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a);
633 p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b);
634 *umemp = &mvq->umem3;
637 (*umemp)->size = p_a * mvq->num_ent + p_b;
640 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
642 mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
645 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
648 u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
653 struct mlx5_vdpa_umem *umem;
655 set_umem_size(ndev, mvq, num, &umem);
656 err = umem_frag_buf_alloc(ndev, umem, umem->size);
660 inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
662 in = kzalloc(inlen, GFP_KERNEL);
668 MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
669 MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
670 um = MLX5_ADDR_OF(create_umem_in, in, umem);
671 MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
672 MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
674 pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
675 mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
677 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
679 mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
684 umem->id = MLX5_GET(create_umem_out, out, umem_id);
691 umem_frag_buf_free(ndev, umem);
695 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
697 u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
698 u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
699 struct mlx5_vdpa_umem *umem;
713 MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
714 MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
715 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
718 umem_frag_buf_free(ndev, umem);
721 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
726 for (num = 1; num <= 3; num++) {
727 err = create_umem(ndev, mvq, num);
734 for (num--; num > 0; num--)
735 umem_destroy(ndev, mvq, num);
740 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
744 for (num = 3; num > 0; num--)
745 umem_destroy(ndev, mvq, num);
748 static int get_queue_type(struct mlx5_vdpa_net *ndev)
752 type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
754 /* prefer split queue */
755 if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)
756 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
758 WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED));
760 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
763 static bool vq_is_tx(u16 idx)
768 static u16 get_features_12_3(u64 features)
770 return (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << 9) |
771 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << 8) |
772 (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << 7) |
773 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_CSUM)) << 6);
776 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
778 int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
779 u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
786 err = umems_create(ndev, mvq);
790 in = kzalloc(inlen, GFP_KERNEL);
796 cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
798 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
799 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
800 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
802 obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
803 MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
804 MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
805 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
806 get_features_12_3(ndev->mvdev.actual_features));
807 vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
808 MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
810 if (vq_is_tx(mvq->index))
811 MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
813 MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
814 MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
815 MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
816 MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
817 MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
818 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
819 MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
820 MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
821 MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
822 MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey.key);
823 MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
824 MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
825 MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
826 MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
827 MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
828 MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
829 MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
830 if (MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, eth_frame_offload_type))
831 MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 1);
833 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
838 mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
845 umems_destroy(ndev, mvq);
849 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
851 u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
852 u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
854 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
855 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
856 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
857 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
858 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
859 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
860 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
861 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
864 umems_destroy(ndev, mvq);
867 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
869 return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
872 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
874 return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
877 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
878 int *outlen, u32 qpn, u32 rqpn)
884 case MLX5_CMD_OP_2RST_QP:
885 *inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
886 *outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
887 *in = kzalloc(*inlen, GFP_KERNEL);
888 *out = kzalloc(*outlen, GFP_KERNEL);
892 MLX5_SET(qp_2rst_in, *in, opcode, cmd);
893 MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
894 MLX5_SET(qp_2rst_in, *in, qpn, qpn);
896 case MLX5_CMD_OP_RST2INIT_QP:
897 *inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
898 *outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
899 *in = kzalloc(*inlen, GFP_KERNEL);
900 *out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
904 MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
905 MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
906 MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
907 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
908 MLX5_SET(qpc, qpc, remote_qpn, rqpn);
909 MLX5_SET(qpc, qpc, rwe, 1);
910 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
911 MLX5_SET(ads, pp, vhca_port_num, 1);
913 case MLX5_CMD_OP_INIT2RTR_QP:
914 *inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
915 *outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
916 *in = kzalloc(*inlen, GFP_KERNEL);
917 *out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
921 MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
922 MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
923 MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
924 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
925 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
926 MLX5_SET(qpc, qpc, log_msg_max, 30);
927 MLX5_SET(qpc, qpc, remote_qpn, rqpn);
928 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
929 MLX5_SET(ads, pp, fl, 1);
931 case MLX5_CMD_OP_RTR2RTS_QP:
932 *inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
933 *outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
934 *in = kzalloc(*inlen, GFP_KERNEL);
935 *out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
939 MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
940 MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
941 MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
942 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
943 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
944 MLX5_SET(ads, pp, ack_timeout, 14);
945 MLX5_SET(qpc, qpc, retry_count, 7);
946 MLX5_SET(qpc, qpc, rnr_retry, 7);
962 static void free_inout(void *in, void *out)
968 /* Two QPs are used by each virtqueue. One is used by the driver and one by
969 * firmware. The fw argument indicates whether the subjected QP is the one used
972 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
980 alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
984 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
989 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
993 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
997 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
1001 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
1005 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1009 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1013 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1017 return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1020 struct mlx5_virtq_attr {
1022 u16 available_index;
1026 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
1027 struct mlx5_virtq_attr *attr)
1029 int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
1030 u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
1036 out = kzalloc(outlen, GFP_KERNEL);
1040 cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1042 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1043 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1044 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1045 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1046 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
1050 obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
1051 memset(attr, 0, sizeof(*attr));
1052 attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1053 attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1054 attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
1063 static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
1065 int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
1066 u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
1072 in = kzalloc(inlen, GFP_KERNEL);
1076 cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1078 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1079 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1080 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1081 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1083 obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
1084 MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select,
1085 MLX5_VIRTQ_MODIFY_MASK_STATE);
1086 MLX5_SET(virtio_net_q_object, obj_context, state, state);
1087 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
1090 mvq->fw_state = state;
1095 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1097 u16 idx = mvq->index;
1103 if (mvq->initialized) {
1104 mlx5_vdpa_warn(&ndev->mvdev, "attempt re init\n");
1108 err = cq_create(ndev, idx, mvq->num_ent);
1112 err = qp_create(ndev, mvq, &mvq->fwqp);
1116 err = qp_create(ndev, mvq, &mvq->vqqp);
1120 err = connect_qps(ndev, mvq);
1124 err = create_virtqueue(ndev, mvq);
1129 err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1131 mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
1137 mvq->initialized = true;
1141 qp_destroy(ndev, &mvq->vqqp);
1143 qp_destroy(ndev, &mvq->fwqp);
1145 cq_destroy(ndev, idx);
1149 static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1151 struct mlx5_virtq_attr attr;
1153 if (!mvq->initialized)
1156 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1159 if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
1160 mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
1162 if (query_virtqueue(ndev, mvq, &attr)) {
1163 mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1166 mvq->avail_idx = attr.available_index;
1167 mvq->used_idx = attr.used_index;
1170 static void suspend_vqs(struct mlx5_vdpa_net *ndev)
1174 for (i = 0; i < MLX5_MAX_SUPPORTED_VQS; i++)
1175 suspend_vq(ndev, &ndev->vqs[i]);
1178 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1180 if (!mvq->initialized)
1183 suspend_vq(ndev, mvq);
1184 destroy_virtqueue(ndev, mvq);
1185 qp_destroy(ndev, &mvq->vqqp);
1186 qp_destroy(ndev, &mvq->fwqp);
1187 cq_destroy(ndev, mvq->index);
1188 mvq->initialized = false;
1191 static int create_rqt(struct mlx5_vdpa_net *ndev)
1201 log_max_rqt = min_t(int, 1, MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
1202 if (log_max_rqt < 1)
1205 inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + (1 << log_max_rqt) * MLX5_ST_SZ_BYTES(rq_num);
1206 in = kzalloc(inlen, GFP_KERNEL);
1210 MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1211 rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1213 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1214 MLX5_SET(rqtc, rqtc, rqt_max_size, 1 << log_max_rqt);
1215 MLX5_SET(rqtc, rqtc, rqt_actual_size, 1);
1216 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1217 for (i = 0, j = 0; j < ndev->mvdev.max_vqs; j++) {
1218 if (!ndev->vqs[j].initialized)
1221 if (!vq_is_tx(ndev->vqs[j].index)) {
1222 list[i] = cpu_to_be32(ndev->vqs[j].virtq_id);
1227 err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1235 static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1237 mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1240 static int create_tir(struct mlx5_vdpa_net *ndev)
1242 #define HASH_IP_L4PORTS \
1243 (MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT | \
1244 MLX5_HASH_FIELD_SEL_L4_DPORT)
1245 static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1246 0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1247 0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1248 0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1249 0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1256 in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1260 MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1261 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1262 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1264 MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1265 MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1266 rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1267 memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1269 outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1270 MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1271 MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1272 MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1274 MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1275 MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1277 err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1282 static void destroy_tir(struct mlx5_vdpa_net *ndev)
1284 mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1287 static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev)
1289 struct mlx5_flow_destination dest[2] = {};
1290 struct mlx5_flow_table_attr ft_attr = {};
1291 struct mlx5_flow_act flow_act = {};
1292 struct mlx5_flow_namespace *ns;
1295 /* for now, one entry, match all, forward to tir */
1296 ft_attr.max_fte = 1;
1297 ft_attr.autogroup.max_num_groups = 1;
1299 ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
1301 mlx5_vdpa_warn(&ndev->mvdev, "get flow namespace\n");
1305 ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
1306 if (IS_ERR(ndev->rxft))
1307 return PTR_ERR(ndev->rxft);
1309 ndev->rx_counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1310 if (IS_ERR(ndev->rx_counter)) {
1311 err = PTR_ERR(ndev->rx_counter);
1315 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT;
1316 dest[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1317 dest[0].tir_num = ndev->res.tirn;
1318 dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1319 dest[1].counter_id = mlx5_fc_id(ndev->rx_counter);
1320 ndev->rx_rule = mlx5_add_flow_rules(ndev->rxft, NULL, &flow_act, dest, 2);
1321 if (IS_ERR(ndev->rx_rule)) {
1322 err = PTR_ERR(ndev->rx_rule);
1323 ndev->rx_rule = NULL;
1330 mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter);
1332 mlx5_destroy_flow_table(ndev->rxft);
1336 static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev)
1341 mlx5_del_flow_rules(ndev->rx_rule);
1342 mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter);
1343 mlx5_destroy_flow_table(ndev->rxft);
1345 ndev->rx_rule = NULL;
1348 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
1350 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1351 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1352 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1354 if (unlikely(!mvq->ready))
1357 iowrite16(idx, ndev->mvdev.res.kick_addr);
1360 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
1361 u64 driver_area, u64 device_area)
1363 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1364 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1365 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1367 mvq->desc_addr = desc_area;
1368 mvq->device_addr = device_area;
1369 mvq->driver_addr = driver_area;
1373 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
1375 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1376 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1377 struct mlx5_vdpa_virtqueue *mvq;
1379 mvq = &ndev->vqs[idx];
1383 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
1385 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1386 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1387 struct mlx5_vdpa_virtqueue *vq = &ndev->vqs[idx];
1392 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
1394 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1395 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1396 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1399 suspend_vq(ndev, mvq);
1404 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
1406 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1407 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1408 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1413 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
1414 const struct vdpa_vq_state *state)
1416 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1417 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1418 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1420 if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
1421 mlx5_vdpa_warn(mvdev, "can't modify available index\n");
1425 mvq->used_idx = state->split.avail_index;
1426 mvq->avail_idx = state->split.avail_index;
1430 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
1432 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1433 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1434 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1435 struct mlx5_virtq_attr attr;
1438 /* If the virtq object was destroyed, use the value saved at
1439 * the last minute of suspend_vq. This caters for userspace
1440 * that cares about emulating the index after vq is stopped.
1442 if (!mvq->initialized) {
1443 /* Firmware returns a wrong value for the available index.
1444 * Since both values should be identical, we take the value of
1445 * used_idx which is reported correctly.
1447 state->split.avail_index = mvq->used_idx;
1451 err = query_virtqueue(ndev, mvq, &attr);
1453 mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
1456 state->split.avail_index = attr.used_index;
1460 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
1465 enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
1466 MLX5_VIRTIO_NET_F_CSUM = 1 << 10,
1467 MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11,
1468 MLX5_VIRTIO_NET_F_HOST_TSO4 = 1 << 12,
1471 static u64 mlx_to_vritio_features(u16 dev_features)
1475 if (dev_features & MLX5_VIRTIO_NET_F_GUEST_CSUM)
1476 result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
1477 if (dev_features & MLX5_VIRTIO_NET_F_CSUM)
1478 result |= BIT_ULL(VIRTIO_NET_F_CSUM);
1479 if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO6)
1480 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
1481 if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO4)
1482 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
1487 static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev)
1489 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1490 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1493 dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, device_features_bits_mask);
1494 ndev->mvdev.mlx_features = mlx_to_vritio_features(dev_features);
1495 if (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, virtio_version_1_0))
1496 ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_VERSION_1);
1497 ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
1498 print_features(mvdev, ndev->mvdev.mlx_features, false);
1499 return ndev->mvdev.mlx_features;
1502 static int verify_min_features(struct mlx5_vdpa_dev *mvdev, u64 features)
1504 if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
1510 static int setup_virtqueues(struct mlx5_vdpa_net *ndev)
1515 for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); i++) {
1516 err = setup_vq(ndev, &ndev->vqs[i]);
1524 for (--i; i >= 0; i--)
1525 teardown_vq(ndev, &ndev->vqs[i]);
1530 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
1532 struct mlx5_vdpa_virtqueue *mvq;
1535 for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
1536 mvq = &ndev->vqs[i];
1537 if (!mvq->initialized)
1540 teardown_vq(ndev, mvq);
1544 /* TODO: cross-endian support */
1545 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
1547 return virtio_legacy_is_little_endian() ||
1548 (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
1551 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
1553 return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
1556 static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features)
1558 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1559 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1562 print_features(mvdev, features, true);
1564 err = verify_min_features(mvdev, features);
1568 ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
1569 ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, ndev->mtu);
1570 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
1574 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
1576 /* not implemented */
1577 mlx5_vdpa_warn(to_mvdev(vdev), "set config callback not supported\n");
1580 #define MLX5_VDPA_MAX_VQ_ENTRIES 256
1581 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
1583 return MLX5_VDPA_MAX_VQ_ENTRIES;
1586 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
1588 return VIRTIO_ID_NET;
1591 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
1593 return PCI_VENDOR_ID_MELLANOX;
1596 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
1598 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1599 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1601 print_status(mvdev, ndev->mvdev.status, false);
1602 return ndev->mvdev.status;
1605 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1607 struct mlx5_vq_restore_info *ri = &mvq->ri;
1608 struct mlx5_virtq_attr attr;
1611 if (!mvq->initialized)
1614 err = query_virtqueue(ndev, mvq, &attr);
1618 ri->avail_index = attr.available_index;
1619 ri->used_index = attr.used_index;
1620 ri->ready = mvq->ready;
1621 ri->num_ent = mvq->num_ent;
1622 ri->desc_addr = mvq->desc_addr;
1623 ri->device_addr = mvq->device_addr;
1624 ri->driver_addr = mvq->driver_addr;
1625 ri->cb = mvq->event_cb;
1630 static int save_channels_info(struct mlx5_vdpa_net *ndev)
1634 for (i = 0; i < ndev->mvdev.max_vqs; i++) {
1635 memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
1636 save_channel_info(ndev, &ndev->vqs[i]);
1641 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
1645 for (i = 0; i < ndev->mvdev.max_vqs; i++)
1646 memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
1649 static void restore_channels_info(struct mlx5_vdpa_net *ndev)
1651 struct mlx5_vdpa_virtqueue *mvq;
1652 struct mlx5_vq_restore_info *ri;
1655 mlx5_clear_vqs(ndev);
1657 for (i = 0; i < ndev->mvdev.max_vqs; i++) {
1658 mvq = &ndev->vqs[i];
1663 mvq->avail_idx = ri->avail_index;
1664 mvq->used_idx = ri->used_index;
1665 mvq->ready = ri->ready;
1666 mvq->num_ent = ri->num_ent;
1667 mvq->desc_addr = ri->desc_addr;
1668 mvq->device_addr = ri->device_addr;
1669 mvq->driver_addr = ri->driver_addr;
1670 mvq->event_cb = ri->cb;
1674 static int mlx5_vdpa_change_map(struct mlx5_vdpa_net *ndev, struct vhost_iotlb *iotlb)
1679 err = save_channels_info(ndev);
1683 teardown_driver(ndev);
1684 mlx5_vdpa_destroy_mr(&ndev->mvdev);
1685 err = mlx5_vdpa_create_mr(&ndev->mvdev, iotlb);
1689 if (!(ndev->mvdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
1692 restore_channels_info(ndev);
1693 err = setup_driver(ndev);
1700 mlx5_vdpa_destroy_mr(&ndev->mvdev);
1705 static int setup_driver(struct mlx5_vdpa_net *ndev)
1709 mutex_lock(&ndev->reslock);
1711 mlx5_vdpa_warn(&ndev->mvdev, "setup driver called for already setup driver\n");
1715 err = setup_virtqueues(ndev);
1717 mlx5_vdpa_warn(&ndev->mvdev, "setup_virtqueues\n");
1721 err = create_rqt(ndev);
1723 mlx5_vdpa_warn(&ndev->mvdev, "create_rqt\n");
1727 err = create_tir(ndev);
1729 mlx5_vdpa_warn(&ndev->mvdev, "create_tir\n");
1733 err = add_fwd_to_tir(ndev);
1735 mlx5_vdpa_warn(&ndev->mvdev, "add_fwd_to_tir\n");
1739 mutex_unlock(&ndev->reslock);
1748 teardown_virtqueues(ndev);
1750 mutex_unlock(&ndev->reslock);
1754 static void teardown_driver(struct mlx5_vdpa_net *ndev)
1756 mutex_lock(&ndev->reslock);
1760 remove_fwd_to_tir(ndev);
1763 teardown_virtqueues(ndev);
1764 ndev->setup = false;
1766 mutex_unlock(&ndev->reslock);
1769 static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
1773 for (i = 0; i < ndev->mvdev.max_vqs; i++)
1774 ndev->vqs[i].ready = false;
1777 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
1779 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1780 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1783 print_status(mvdev, status, true);
1785 mlx5_vdpa_info(mvdev, "performing device reset\n");
1786 teardown_driver(ndev);
1787 clear_vqs_ready(ndev);
1788 mlx5_vdpa_destroy_mr(&ndev->mvdev);
1789 ndev->mvdev.status = 0;
1790 ndev->mvdev.mlx_features = 0;
1791 ++mvdev->generation;
1792 if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
1793 if (mlx5_vdpa_create_mr(mvdev, NULL))
1794 mlx5_vdpa_warn(mvdev, "create MR failed\n");
1799 if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
1800 if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
1801 err = setup_driver(ndev);
1803 mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
1807 mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
1812 ndev->mvdev.status = status;
1816 mlx5_vdpa_destroy_mr(&ndev->mvdev);
1817 ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
1820 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
1822 return sizeof(struct virtio_net_config);
1825 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
1828 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1829 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1831 if (offset + len <= sizeof(struct virtio_net_config))
1832 memcpy(buf, (u8 *)&ndev->config + offset, len);
1835 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
1841 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
1843 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1845 return mvdev->generation;
1848 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb)
1850 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1851 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1855 err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map);
1857 mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
1862 return mlx5_vdpa_change_map(ndev, iotlb);
1867 static void mlx5_vdpa_free(struct vdpa_device *vdev)
1869 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1870 struct mlx5_core_dev *pfmdev;
1871 struct mlx5_vdpa_net *ndev;
1873 ndev = to_mlx5_vdpa_ndev(mvdev);
1875 free_resources(ndev);
1876 mlx5_vdpa_destroy_mr(mvdev);
1877 if (!is_zero_ether_addr(ndev->config.mac)) {
1878 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
1879 mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
1881 mlx5_vdpa_free_resources(&ndev->mvdev);
1882 mutex_destroy(&ndev->reslock);
1885 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
1887 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1888 struct vdpa_notification_area ret = {};
1889 struct mlx5_vdpa_net *ndev;
1892 /* If SF BAR size is smaller than PAGE_SIZE, do not use direct
1893 * notification to avoid the risk of mapping pages that contain BAR of more
1896 if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT)
1899 ndev = to_mlx5_vdpa_ndev(mvdev);
1900 addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr;
1902 ret.size = PAGE_SIZE;
1906 static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx)
1911 static const struct vdpa_config_ops mlx5_vdpa_ops = {
1912 .set_vq_address = mlx5_vdpa_set_vq_address,
1913 .set_vq_num = mlx5_vdpa_set_vq_num,
1914 .kick_vq = mlx5_vdpa_kick_vq,
1915 .set_vq_cb = mlx5_vdpa_set_vq_cb,
1916 .set_vq_ready = mlx5_vdpa_set_vq_ready,
1917 .get_vq_ready = mlx5_vdpa_get_vq_ready,
1918 .set_vq_state = mlx5_vdpa_set_vq_state,
1919 .get_vq_state = mlx5_vdpa_get_vq_state,
1920 .get_vq_notification = mlx5_get_vq_notification,
1921 .get_vq_irq = mlx5_get_vq_irq,
1922 .get_vq_align = mlx5_vdpa_get_vq_align,
1923 .get_features = mlx5_vdpa_get_features,
1924 .set_features = mlx5_vdpa_set_features,
1925 .set_config_cb = mlx5_vdpa_set_config_cb,
1926 .get_vq_num_max = mlx5_vdpa_get_vq_num_max,
1927 .get_device_id = mlx5_vdpa_get_device_id,
1928 .get_vendor_id = mlx5_vdpa_get_vendor_id,
1929 .get_status = mlx5_vdpa_get_status,
1930 .set_status = mlx5_vdpa_set_status,
1931 .get_config_size = mlx5_vdpa_get_config_size,
1932 .get_config = mlx5_vdpa_get_config,
1933 .set_config = mlx5_vdpa_set_config,
1934 .get_generation = mlx5_vdpa_get_generation,
1935 .set_map = mlx5_vdpa_set_map,
1936 .free = mlx5_vdpa_free,
1939 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
1944 err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
1948 *mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
1952 static int alloc_resources(struct mlx5_vdpa_net *ndev)
1954 struct mlx5_vdpa_net_resources *res = &ndev->res;
1958 mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
1962 err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
1966 err = create_tis(ndev);
1975 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
1979 static void free_resources(struct mlx5_vdpa_net *ndev)
1981 struct mlx5_vdpa_net_resources *res = &ndev->res;
1987 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
1991 static void init_mvqs(struct mlx5_vdpa_net *ndev)
1993 struct mlx5_vdpa_virtqueue *mvq;
1996 for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); ++i) {
1997 mvq = &ndev->vqs[i];
1998 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2001 mvq->fwqp.fw = true;
2003 for (; i < ndev->mvdev.max_vqs; i++) {
2004 mvq = &ndev->vqs[i];
2005 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2011 struct mlx5_vdpa_mgmtdev {
2012 struct vdpa_mgmt_dev mgtdev;
2013 struct mlx5_adev *madev;
2014 struct mlx5_vdpa_net *ndev;
2017 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
2019 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
2020 struct virtio_net_config *config;
2021 struct mlx5_core_dev *pfmdev;
2022 struct mlx5_vdpa_dev *mvdev;
2023 struct mlx5_vdpa_net *ndev;
2024 struct mlx5_core_dev *mdev;
2031 mdev = mgtdev->madev->mdev;
2032 if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
2033 MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
2034 dev_warn(mdev->device, "missing support for split virtqueues\n");
2038 /* we save one virtqueue for control virtqueue should we require it */
2039 max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues);
2040 max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
2042 ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
2045 return PTR_ERR(ndev);
2047 ndev->mvdev.max_vqs = max_vqs;
2048 mvdev = &ndev->mvdev;
2051 mutex_init(&ndev->reslock);
2052 config = &ndev->config;
2053 err = query_mtu(mdev, &ndev->mtu);
2057 err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
2061 if (!is_zero_ether_addr(config->mac)) {
2062 pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
2063 err = mlx5_mpfs_add_mac(pfmdev, config->mac);
2068 mvdev->vdev.dma_dev = &mdev->pdev->dev;
2069 err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
2073 if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
2074 err = mlx5_vdpa_create_mr(mvdev, NULL);
2079 err = alloc_resources(ndev);
2083 mvdev->vdev.mdev = &mgtdev->mgtdev;
2084 err = _vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs));
2088 mgtdev->ndev = ndev;
2092 free_resources(ndev);
2094 mlx5_vdpa_destroy_mr(mvdev);
2096 mlx5_vdpa_free_resources(&ndev->mvdev);
2098 if (!is_zero_ether_addr(config->mac))
2099 mlx5_mpfs_del_mac(pfmdev, config->mac);
2101 mutex_destroy(&ndev->reslock);
2102 put_device(&mvdev->vdev.dev);
2106 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
2108 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
2110 _vdpa_unregister_device(dev);
2111 mgtdev->ndev = NULL;
2114 static const struct vdpa_mgmtdev_ops mdev_ops = {
2115 .dev_add = mlx5_vdpa_dev_add,
2116 .dev_del = mlx5_vdpa_dev_del,
2119 static struct virtio_device_id id_table[] = {
2120 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
2124 static int mlx5v_probe(struct auxiliary_device *adev,
2125 const struct auxiliary_device_id *id)
2128 struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
2129 struct mlx5_core_dev *mdev = madev->mdev;
2130 struct mlx5_vdpa_mgmtdev *mgtdev;
2133 mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
2137 mgtdev->mgtdev.ops = &mdev_ops;
2138 mgtdev->mgtdev.device = mdev->device;
2139 mgtdev->mgtdev.id_table = id_table;
2140 mgtdev->madev = madev;
2142 err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
2146 dev_set_drvdata(&adev->dev, mgtdev);
2155 static void mlx5v_remove(struct auxiliary_device *adev)
2157 struct mlx5_vdpa_mgmtdev *mgtdev;
2159 mgtdev = dev_get_drvdata(&adev->dev);
2160 vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
2164 static const struct auxiliary_device_id mlx5v_id_table[] = {
2165 { .name = MLX5_ADEV_NAME ".vnet", },
2169 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
2171 static struct auxiliary_driver mlx5v_driver = {
2173 .probe = mlx5v_probe,
2174 .remove = mlx5v_remove,
2175 .id_table = mlx5v_id_table,
2178 module_auxiliary_driver(mlx5v_driver);