2 * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/init.h>
34 #include <linux/slab.h>
35 #include <linux/errno.h>
37 #include <rdma/ib_user_verbs.h>
38 #include <rdma/ib_addr.h>
39 #include <rdma/uverbs_ioctl.h>
41 #include "usnic_abi.h"
43 #include "usnic_common_util.h"
44 #include "usnic_ib_qp_grp.h"
45 #include "usnic_ib_verbs.h"
46 #include "usnic_fwd.h"
47 #include "usnic_log.h"
48 #include "usnic_uiom.h"
49 #include "usnic_transport.h"
51 #define USNIC_DEFAULT_TRANSPORT USNIC_TRANSPORT_ROCE_CUSTOM
53 const struct usnic_vnic_res_spec min_transport_spec[USNIC_TRANSPORT_MAX] = {
54 { /*USNIC_TRANSPORT_UNKNOWN*/
56 {.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,},
59 { /*USNIC_TRANSPORT_ROCE_CUSTOM*/
61 {.type = USNIC_VNIC_RES_TYPE_WQ, .cnt = 1,},
62 {.type = USNIC_VNIC_RES_TYPE_RQ, .cnt = 1,},
63 {.type = USNIC_VNIC_RES_TYPE_CQ, .cnt = 1,},
64 {.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,},
67 { /*USNIC_TRANSPORT_IPV4_UDP*/
69 {.type = USNIC_VNIC_RES_TYPE_WQ, .cnt = 1,},
70 {.type = USNIC_VNIC_RES_TYPE_RQ, .cnt = 1,},
71 {.type = USNIC_VNIC_RES_TYPE_CQ, .cnt = 1,},
72 {.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,},
77 static void usnic_ib_fw_string_to_u64(char *fw_ver_str, u64 *fw_ver)
79 *fw_ver = *((u64 *)fw_ver_str);
82 static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp,
83 struct ib_udata *udata)
85 struct usnic_ib_dev *us_ibdev;
86 struct usnic_ib_create_qp_resp resp;
88 struct vnic_dev_bar *bar;
89 struct usnic_vnic_res_chunk *chunk;
90 struct usnic_ib_qp_grp_flow *default_flow;
93 memset(&resp, 0, sizeof(resp));
95 us_ibdev = qp_grp->vf->pf;
96 pdev = usnic_vnic_get_pdev(qp_grp->vf->vnic);
98 usnic_err("Failed to get pdev of qp_grp %d\n",
103 bar = usnic_vnic_get_bar(qp_grp->vf->vnic, 0);
105 usnic_err("Failed to get bar0 of qp_grp %d vf %s",
106 qp_grp->grp_id, pci_name(pdev));
110 resp.vfid = usnic_vnic_get_index(qp_grp->vf->vnic);
111 resp.bar_bus_addr = bar->bus_addr;
112 resp.bar_len = bar->len;
114 chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ);
116 usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n",
117 usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_RQ),
120 return PTR_ERR(chunk);
123 WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_RQ);
124 resp.rq_cnt = chunk->cnt;
125 for (i = 0; i < chunk->cnt; i++)
126 resp.rq_idx[i] = chunk->res[i]->vnic_idx;
128 chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_WQ);
130 usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n",
131 usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_WQ),
134 return PTR_ERR(chunk);
137 WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_WQ);
138 resp.wq_cnt = chunk->cnt;
139 for (i = 0; i < chunk->cnt; i++)
140 resp.wq_idx[i] = chunk->res[i]->vnic_idx;
142 chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_CQ);
144 usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n",
145 usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_CQ),
148 return PTR_ERR(chunk);
151 WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_CQ);
152 resp.cq_cnt = chunk->cnt;
153 for (i = 0; i < chunk->cnt; i++)
154 resp.cq_idx[i] = chunk->res[i]->vnic_idx;
156 default_flow = list_first_entry(&qp_grp->flows_lst,
157 struct usnic_ib_qp_grp_flow, link);
158 resp.transport = default_flow->trans_type;
160 err = ib_copy_to_udata(udata, &resp, sizeof(resp));
162 usnic_err("Failed to copy udata for %s",
163 dev_name(&us_ibdev->ib_dev.dev));
171 find_free_vf_and_create_qp_grp(struct ib_qp *qp,
172 struct usnic_transport_spec *trans_spec,
173 struct usnic_vnic_res_spec *res_spec)
175 struct usnic_ib_dev *us_ibdev = to_usdev(qp->device);
176 struct usnic_ib_pd *pd = to_upd(qp->pd);
177 struct usnic_ib_vf *vf;
178 struct usnic_vnic *vnic;
179 struct usnic_ib_qp_grp *qp_grp = to_uqp_grp(qp);
180 struct device *dev, **dev_list;
183 BUG_ON(!mutex_is_locked(&us_ibdev->usdev_lock));
185 if (list_empty(&us_ibdev->vf_dev_list)) {
186 usnic_info("No vfs to allocate\n");
190 if (usnic_ib_share_vf) {
191 /* Try to find resouces on a used vf which is in pd */
192 dev_list = usnic_uiom_get_dev_list(pd->umem_pd);
193 if (IS_ERR(dev_list))
194 return PTR_ERR(dev_list);
195 for (i = 0; dev_list[i]; i++) {
197 vf = dev_get_drvdata(dev);
198 mutex_lock(&vf->lock);
200 if (!usnic_vnic_check_room(vnic, res_spec)) {
201 usnic_dbg("Found used vnic %s from %s\n",
202 dev_name(&us_ibdev->ib_dev.dev),
203 pci_name(usnic_vnic_get_pdev(
205 ret = usnic_ib_qp_grp_create(qp_grp,
210 mutex_unlock(&vf->lock);
213 mutex_unlock(&vf->lock);
216 usnic_uiom_free_dev_list(dev_list);
220 /* Try to find resources on an unused vf */
221 list_for_each_entry(vf, &us_ibdev->vf_dev_list, link) {
222 mutex_lock(&vf->lock);
224 if (vf->qp_grp_ref_cnt == 0 &&
225 usnic_vnic_check_room(vnic, res_spec) == 0) {
226 ret = usnic_ib_qp_grp_create(qp_grp, us_ibdev->ufdev,
230 mutex_unlock(&vf->lock);
233 mutex_unlock(&vf->lock);
236 usnic_info("No free qp grp found on %s\n",
237 dev_name(&us_ibdev->ib_dev.dev));
242 usnic_err("Failed to allocate qp_grp\n");
243 if (usnic_ib_share_vf)
244 usnic_uiom_free_dev_list(dev_list);
249 static void qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp)
251 struct usnic_ib_vf *vf = qp_grp->vf;
253 WARN_ON(qp_grp->state != IB_QPS_RESET);
255 mutex_lock(&vf->lock);
256 usnic_ib_qp_grp_destroy(qp_grp);
257 mutex_unlock(&vf->lock);
260 static int create_qp_validate_user_data(struct usnic_ib_create_qp_cmd cmd)
262 if (cmd.spec.trans_type <= USNIC_TRANSPORT_UNKNOWN ||
263 cmd.spec.trans_type >= USNIC_TRANSPORT_MAX)
269 /* Start of ib callback functions */
271 enum rdma_link_layer usnic_ib_port_link_layer(struct ib_device *device,
274 return IB_LINK_LAYER_ETHERNET;
277 int usnic_ib_query_device(struct ib_device *ibdev,
278 struct ib_device_attr *props,
279 struct ib_udata *uhw)
281 struct usnic_ib_dev *us_ibdev = to_usdev(ibdev);
283 struct ethtool_drvinfo info;
287 if (uhw->inlen || uhw->outlen)
290 mutex_lock(&us_ibdev->usdev_lock);
291 us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info);
292 memset(props, 0, sizeof(*props));
293 usnic_mac_ip_to_gid(us_ibdev->ufdev->mac, us_ibdev->ufdev->inaddr,
295 memcpy(&props->sys_image_guid, &gid.global.interface_id,
296 sizeof(gid.global.interface_id));
297 usnic_ib_fw_string_to_u64(&info.fw_version[0], &props->fw_ver);
298 props->max_mr_size = USNIC_UIOM_MAX_MR_SIZE;
299 props->page_size_cap = USNIC_UIOM_PAGE_SIZE;
300 props->vendor_id = PCI_VENDOR_ID_CISCO;
301 props->vendor_part_id = PCI_DEVICE_ID_CISCO_VIC_USPACE_NIC;
302 props->hw_ver = us_ibdev->pdev->subsystem_device;
303 qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ],
304 us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]);
305 props->max_qp = qp_per_vf *
306 kref_read(&us_ibdev->vf_cnt);
307 props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT |
308 IB_DEVICE_SYS_IMAGE_GUID;
309 props->kernel_cap_flags = IBK_BLOCK_MULTICAST_LOOPBACK;
310 props->max_cq = us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ] *
311 kref_read(&us_ibdev->vf_cnt);
312 props->max_pd = USNIC_UIOM_MAX_PD_CNT;
313 props->max_mr = USNIC_UIOM_MAX_MR_CNT;
314 props->local_ca_ack_delay = 0;
315 props->max_pkeys = 0;
316 props->atomic_cap = IB_ATOMIC_NONE;
317 props->masked_atomic_cap = props->atomic_cap;
318 props->max_qp_rd_atom = 0;
319 props->max_qp_init_rd_atom = 0;
320 props->max_res_rd_atom = 0;
322 props->max_srq_wr = 0;
323 props->max_srq_sge = 0;
324 props->max_fast_reg_page_list_len = 0;
325 props->max_mcast_grp = 0;
326 props->max_mcast_qp_attach = 0;
327 props->max_total_mcast_qp_attach = 0;
328 /* Owned by Userspace
329 * max_qp_wr, max_sge, max_sge_rd, max_cqe */
330 mutex_unlock(&us_ibdev->usdev_lock);
335 int usnic_ib_query_port(struct ib_device *ibdev, u32 port,
336 struct ib_port_attr *props)
338 struct usnic_ib_dev *us_ibdev = to_usdev(ibdev);
342 if (ib_get_eth_speed(ibdev, port, &props->active_speed,
343 &props->active_width))
347 * usdev_lock is acquired after (and not before) ib_get_eth_speed call
348 * because acquiring rtnl_lock in ib_get_eth_speed, while holding
349 * usdev_lock could lead to a deadlock.
351 mutex_lock(&us_ibdev->usdev_lock);
352 /* props being zeroed by the caller, avoid zeroing it here */
359 if (!us_ibdev->ufdev->link_up) {
360 props->state = IB_PORT_DOWN;
361 props->phys_state = IB_PORT_PHYS_STATE_DISABLED;
362 } else if (!us_ibdev->ufdev->inaddr) {
363 props->state = IB_PORT_INIT;
365 IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING;
367 props->state = IB_PORT_ACTIVE;
368 props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
371 props->port_cap_flags = 0;
372 props->gid_tbl_len = 1;
373 props->bad_pkey_cntr = 0;
374 props->qkey_viol_cntr = 0;
375 props->max_mtu = IB_MTU_4096;
376 props->active_mtu = iboe_get_mtu(us_ibdev->ufdev->mtu);
377 /* Userspace will adjust for hdrs */
378 props->max_msg_sz = us_ibdev->ufdev->mtu;
379 props->max_vl_num = 1;
380 mutex_unlock(&us_ibdev->usdev_lock);
385 int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
387 struct ib_qp_init_attr *qp_init_attr)
389 struct usnic_ib_qp_grp *qp_grp;
390 struct usnic_ib_vf *vf;
395 memset(qp_attr, 0, sizeof(*qp_attr));
396 memset(qp_init_attr, 0, sizeof(*qp_init_attr));
398 qp_grp = to_uqp_grp(qp);
400 mutex_lock(&vf->pf->usdev_lock);
402 qp_attr->qp_state = qp_grp->state;
403 qp_attr->cur_qp_state = qp_grp->state;
405 switch (qp_grp->ibqp.qp_type) {
410 usnic_err("Unexpected qp_type %d\n", qp_grp->ibqp.qp_type);
415 mutex_unlock(&vf->pf->usdev_lock);
419 mutex_unlock(&vf->pf->usdev_lock);
423 int usnic_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
427 struct usnic_ib_dev *us_ibdev = to_usdev(ibdev);
433 mutex_lock(&us_ibdev->usdev_lock);
434 memset(&(gid->raw[0]), 0, sizeof(gid->raw));
435 usnic_mac_ip_to_gid(us_ibdev->ufdev->mac, us_ibdev->ufdev->inaddr,
437 mutex_unlock(&us_ibdev->usdev_lock);
442 int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
444 struct usnic_ib_pd *pd = to_upd(ibpd);
446 pd->umem_pd = usnic_uiom_alloc_pd(ibpd->device->dev.parent);
447 if (IS_ERR(pd->umem_pd))
448 return PTR_ERR(pd->umem_pd);
453 int usnic_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
455 usnic_uiom_dealloc_pd((to_upd(pd))->umem_pd);
459 int usnic_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
460 struct ib_udata *udata)
463 struct usnic_ib_dev *us_ibdev;
464 struct usnic_ib_qp_grp *qp_grp = to_uqp_grp(ibqp);
465 struct usnic_ib_ucontext *ucontext = rdma_udata_to_drv_context(
466 udata, struct usnic_ib_ucontext, ibucontext);
468 struct usnic_vnic_res_spec res_spec;
469 struct usnic_ib_create_qp_cmd cmd;
470 struct usnic_transport_spec trans_spec;
474 us_ibdev = to_usdev(ibqp->device);
476 if (init_attr->create_flags)
479 err = ib_copy_from_udata(&cmd, udata, sizeof(cmd));
481 usnic_err("%s: cannot copy udata for create_qp\n",
482 dev_name(&us_ibdev->ib_dev.dev));
486 err = create_qp_validate_user_data(cmd);
488 usnic_err("%s: Failed to validate user data\n",
489 dev_name(&us_ibdev->ib_dev.dev));
493 if (init_attr->qp_type != IB_QPT_UD) {
494 usnic_err("%s asked to make a non-UD QP: %d\n",
495 dev_name(&us_ibdev->ib_dev.dev), init_attr->qp_type);
499 trans_spec = cmd.spec;
500 mutex_lock(&us_ibdev->usdev_lock);
501 cq_cnt = (init_attr->send_cq == init_attr->recv_cq) ? 1 : 2;
502 res_spec = min_transport_spec[trans_spec.trans_type];
503 usnic_vnic_res_spec_update(&res_spec, USNIC_VNIC_RES_TYPE_CQ, cq_cnt);
504 err = find_free_vf_and_create_qp_grp(ibqp, &trans_spec, &res_spec);
506 goto out_release_mutex;
508 err = usnic_ib_fill_create_qp_resp(qp_grp, udata);
511 goto out_release_qp_grp;
514 qp_grp->ctx = ucontext;
515 list_add_tail(&qp_grp->link, &ucontext->qp_grp_list);
516 usnic_ib_log_vf(qp_grp->vf);
517 mutex_unlock(&us_ibdev->usdev_lock);
521 qp_grp_destroy(qp_grp);
523 mutex_unlock(&us_ibdev->usdev_lock);
527 int usnic_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
529 struct usnic_ib_qp_grp *qp_grp;
530 struct usnic_ib_vf *vf;
534 qp_grp = to_uqp_grp(qp);
536 mutex_lock(&vf->pf->usdev_lock);
537 if (usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RESET, NULL)) {
538 usnic_err("Failed to move qp grp %u to reset\n",
542 list_del(&qp_grp->link);
543 qp_grp_destroy(qp_grp);
544 mutex_unlock(&vf->pf->usdev_lock);
549 int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
550 int attr_mask, struct ib_udata *udata)
552 struct usnic_ib_qp_grp *qp_grp;
556 if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
559 qp_grp = to_uqp_grp(ibqp);
561 mutex_lock(&qp_grp->vf->pf->usdev_lock);
562 if ((attr_mask & IB_QP_PORT) && attr->port_num != 1) {
563 /* usnic devices only have one port */
567 if (attr_mask & IB_QP_STATE) {
568 status = usnic_ib_qp_grp_modify(qp_grp, attr->qp_state, NULL);
570 usnic_err("Unhandled request, attr_mask=0x%x\n", attr_mask);
575 mutex_unlock(&qp_grp->vf->pf->usdev_lock);
579 int usnic_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
580 struct uverbs_attr_bundle *attrs)
588 int usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
593 struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
594 u64 virt_addr, int access_flags,
595 struct ib_udata *udata)
597 struct usnic_ib_mr *mr;
600 usnic_dbg("start 0x%llx va 0x%llx length 0x%llx\n", start,
603 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
605 return ERR_PTR(-ENOMEM);
607 mr->umem = usnic_uiom_reg_get(to_upd(pd)->umem_pd, start, length,
609 if (IS_ERR_OR_NULL(mr->umem)) {
610 err = mr->umem ? PTR_ERR(mr->umem) : -EFAULT;
614 mr->ibmr.lkey = mr->ibmr.rkey = 0;
622 int usnic_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
624 struct usnic_ib_mr *mr = to_umr(ibmr);
626 usnic_dbg("va 0x%lx length 0x%zx\n", mr->umem->va, mr->umem->length);
628 usnic_uiom_reg_release(mr->umem);
633 int usnic_ib_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
635 struct ib_device *ibdev = uctx->device;
636 struct usnic_ib_ucontext *context = to_ucontext(uctx);
637 struct usnic_ib_dev *us_ibdev = to_usdev(ibdev);
640 INIT_LIST_HEAD(&context->qp_grp_list);
641 mutex_lock(&us_ibdev->usdev_lock);
642 list_add_tail(&context->link, &us_ibdev->ctx_list);
643 mutex_unlock(&us_ibdev->usdev_lock);
648 void usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
650 struct usnic_ib_ucontext *context = to_uucontext(ibcontext);
651 struct usnic_ib_dev *us_ibdev = to_usdev(ibcontext->device);
654 mutex_lock(&us_ibdev->usdev_lock);
655 WARN_ON_ONCE(!list_empty(&context->qp_grp_list));
656 list_del(&context->link);
657 mutex_unlock(&us_ibdev->usdev_lock);
660 int usnic_ib_mmap(struct ib_ucontext *context,
661 struct vm_area_struct *vma)
663 struct usnic_ib_ucontext *uctx = to_ucontext(context);
664 struct usnic_ib_dev *us_ibdev;
665 struct usnic_ib_qp_grp *qp_grp;
666 struct usnic_ib_vf *vf;
667 struct vnic_dev_bar *bar;
674 us_ibdev = to_usdev(context->device);
675 vm_flags_set(vma, VM_IO);
676 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
677 vfid = vma->vm_pgoff;
678 usnic_dbg("Page Offset %lu PAGE_SHIFT %u VFID %u\n",
679 vma->vm_pgoff, PAGE_SHIFT, vfid);
681 mutex_lock(&us_ibdev->usdev_lock);
682 list_for_each_entry(qp_grp, &uctx->qp_grp_list, link) {
684 if (usnic_vnic_get_index(vf->vnic) == vfid) {
685 bar = usnic_vnic_get_bar(vf->vnic, 0);
686 if ((vma->vm_end - vma->vm_start) != bar->len) {
687 usnic_err("Bar0 Len %lu - Request map %lu\n",
689 vma->vm_end - vma->vm_start);
690 mutex_unlock(&us_ibdev->usdev_lock);
693 bus_addr = bar->bus_addr;
695 usnic_dbg("bus: %pa vaddr: %p size: %ld\n",
696 &bus_addr, bar->vaddr, bar->len);
697 mutex_unlock(&us_ibdev->usdev_lock);
699 return remap_pfn_range(vma,
701 bus_addr >> PAGE_SHIFT,
702 len, vma->vm_page_prot);
706 mutex_unlock(&us_ibdev->usdev_lock);
707 usnic_err("No VF %u found\n", vfid);