2 * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of EITHER the GNU General Public License
6 * version 2 as published by the Free Software Foundation or the BSD
7 * 2-Clause License. This program is distributed in the hope that it
8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
10 * See the GNU General Public License version 2 for more details at
11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
13 * You should have received a copy of the GNU General Public License
14 * along with this program available in the file COPYING in the main
15 * directory of this source tree.
17 * The BSD 2-Clause License
19 * Redistribution and use in source and binary forms, with or
20 * without modification, are permitted provided that the following
23 * - Redistributions of source code must retain the above
24 * copyright notice, this list of conditions and the following
27 * - Redistributions in binary form must reproduce the above
28 * copyright notice, this list of conditions and the following
29 * disclaimer in the documentation and/or other materials
30 * provided with the distribution.
32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 #include <linux/inet.h>
49 #include <rdma/ib_addr.h>
50 #include <rdma/ib_smi.h>
51 #include <rdma/ib_user_verbs.h>
52 #include <rdma/vmw_pvrdma-abi.h>
57 * pvrdma_query_device - query device
58 * @ibdev: the device to query
59 * @props: the device properties
62 * @return: 0 on success, otherwise negative errno
64 int pvrdma_query_device(struct ib_device *ibdev,
65 struct ib_device_attr *props,
68 struct pvrdma_dev *dev = to_vdev(ibdev);
70 if (uhw->inlen || uhw->outlen)
73 memset(props, 0, sizeof(*props));
75 props->fw_ver = dev->dsr->caps.fw_ver;
76 props->sys_image_guid = dev->dsr->caps.sys_image_guid;
77 props->max_mr_size = dev->dsr->caps.max_mr_size;
78 props->page_size_cap = dev->dsr->caps.page_size_cap;
79 props->vendor_id = dev->dsr->caps.vendor_id;
80 props->vendor_part_id = dev->pdev->device;
81 props->hw_ver = dev->dsr->caps.hw_ver;
82 props->max_qp = dev->dsr->caps.max_qp;
83 props->max_qp_wr = dev->dsr->caps.max_qp_wr;
84 props->device_cap_flags = dev->dsr->caps.device_cap_flags;
85 props->max_send_sge = dev->dsr->caps.max_sge;
86 props->max_recv_sge = dev->dsr->caps.max_sge;
87 props->max_sge_rd = PVRDMA_GET_CAP(dev, dev->dsr->caps.max_sge,
88 dev->dsr->caps.max_sge_rd);
89 props->max_srq = dev->dsr->caps.max_srq;
90 props->max_srq_wr = dev->dsr->caps.max_srq_wr;
91 props->max_srq_sge = dev->dsr->caps.max_srq_sge;
92 props->max_cq = dev->dsr->caps.max_cq;
93 props->max_cqe = dev->dsr->caps.max_cqe;
94 props->max_mr = dev->dsr->caps.max_mr;
95 props->max_pd = dev->dsr->caps.max_pd;
96 props->max_qp_rd_atom = dev->dsr->caps.max_qp_rd_atom;
97 props->max_qp_init_rd_atom = dev->dsr->caps.max_qp_init_rd_atom;
99 dev->dsr->caps.atomic_ops &
100 (PVRDMA_ATOMIC_OP_COMP_SWAP | PVRDMA_ATOMIC_OP_FETCH_ADD) ?
101 IB_ATOMIC_HCA : IB_ATOMIC_NONE;
102 props->masked_atomic_cap = props->atomic_cap;
103 props->max_ah = dev->dsr->caps.max_ah;
104 props->max_pkeys = dev->dsr->caps.max_pkeys;
105 props->local_ca_ack_delay = dev->dsr->caps.local_ca_ack_delay;
106 if ((dev->dsr->caps.bmme_flags & PVRDMA_BMME_FLAG_LOCAL_INV) &&
107 (dev->dsr->caps.bmme_flags & PVRDMA_BMME_FLAG_REMOTE_INV) &&
108 (dev->dsr->caps.bmme_flags & PVRDMA_BMME_FLAG_FAST_REG_WR)) {
109 props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
110 props->max_fast_reg_page_list_len = PVRDMA_GET_CAP(dev,
111 PVRDMA_MAX_FAST_REG_PAGES,
112 dev->dsr->caps.max_fast_reg_page_list_len);
115 props->device_cap_flags |= IB_DEVICE_PORT_ACTIVE_EVENT |
116 IB_DEVICE_RC_RNR_NAK_GEN;
122 * pvrdma_query_port - query device port attributes
123 * @ibdev: the device to query
124 * @port: the port number
125 * @props: the device properties
127 * @return: 0 on success, otherwise negative errno
129 int pvrdma_query_port(struct ib_device *ibdev, u8 port,
130 struct ib_port_attr *props)
132 struct pvrdma_dev *dev = to_vdev(ibdev);
133 union pvrdma_cmd_req req;
134 union pvrdma_cmd_resp rsp;
135 struct pvrdma_cmd_query_port *cmd = &req.query_port;
136 struct pvrdma_cmd_query_port_resp *resp = &rsp.query_port_resp;
139 memset(cmd, 0, sizeof(*cmd));
140 cmd->hdr.cmd = PVRDMA_CMD_QUERY_PORT;
141 cmd->port_num = port;
143 err = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_QUERY_PORT_RESP);
145 dev_warn(&dev->pdev->dev,
146 "could not query port, error: %d\n", err);
150 /* props being zeroed by the caller, avoid zeroing it here */
152 props->state = pvrdma_port_state_to_ib(resp->attrs.state);
153 props->max_mtu = pvrdma_mtu_to_ib(resp->attrs.max_mtu);
154 props->active_mtu = pvrdma_mtu_to_ib(resp->attrs.active_mtu);
155 props->gid_tbl_len = resp->attrs.gid_tbl_len;
156 props->port_cap_flags =
157 pvrdma_port_cap_flags_to_ib(resp->attrs.port_cap_flags);
158 props->port_cap_flags |= IB_PORT_CM_SUP;
159 props->ip_gids = true;
160 props->max_msg_sz = resp->attrs.max_msg_sz;
161 props->bad_pkey_cntr = resp->attrs.bad_pkey_cntr;
162 props->qkey_viol_cntr = resp->attrs.qkey_viol_cntr;
163 props->pkey_tbl_len = resp->attrs.pkey_tbl_len;
164 props->lid = resp->attrs.lid;
165 props->sm_lid = resp->attrs.sm_lid;
166 props->lmc = resp->attrs.lmc;
167 props->max_vl_num = resp->attrs.max_vl_num;
168 props->sm_sl = resp->attrs.sm_sl;
169 props->subnet_timeout = resp->attrs.subnet_timeout;
170 props->init_type_reply = resp->attrs.init_type_reply;
171 props->active_width = pvrdma_port_width_to_ib(resp->attrs.active_width);
172 props->active_speed = pvrdma_port_speed_to_ib(resp->attrs.active_speed);
173 props->phys_state = resp->attrs.phys_state;
179 * pvrdma_query_gid - query device gid
180 * @ibdev: the device to query
181 * @port: the port number
183 * @gid: the device gid value
185 * @return: 0 on success, otherwise negative errno
187 int pvrdma_query_gid(struct ib_device *ibdev, u8 port, int index,
190 struct pvrdma_dev *dev = to_vdev(ibdev);
192 if (index >= dev->dsr->caps.gid_tbl_len)
195 memcpy(gid, &dev->sgid_tbl[index], sizeof(union ib_gid));
201 * pvrdma_query_pkey - query device port's P_Key table
202 * @ibdev: the device to query
203 * @port: the port number
205 * @pkey: the device P_Key value
207 * @return: 0 on success, otherwise negative errno
209 int pvrdma_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
213 union pvrdma_cmd_req req;
214 union pvrdma_cmd_resp rsp;
215 struct pvrdma_cmd_query_pkey *cmd = &req.query_pkey;
217 memset(cmd, 0, sizeof(*cmd));
218 cmd->hdr.cmd = PVRDMA_CMD_QUERY_PKEY;
219 cmd->port_num = port;
222 err = pvrdma_cmd_post(to_vdev(ibdev), &req, &rsp,
223 PVRDMA_CMD_QUERY_PKEY_RESP);
225 dev_warn(&to_vdev(ibdev)->pdev->dev,
226 "could not query pkey, error: %d\n", err);
230 *pkey = rsp.query_pkey_resp.pkey;
235 enum rdma_link_layer pvrdma_port_link_layer(struct ib_device *ibdev,
238 return IB_LINK_LAYER_ETHERNET;
241 int pvrdma_modify_device(struct ib_device *ibdev, int mask,
242 struct ib_device_modify *props)
246 if (mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
247 IB_DEVICE_MODIFY_NODE_DESC)) {
248 dev_warn(&to_vdev(ibdev)->pdev->dev,
249 "unsupported device modify mask %#x\n", mask);
253 if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
254 spin_lock_irqsave(&to_vdev(ibdev)->desc_lock, flags);
255 memcpy(ibdev->node_desc, props->node_desc, 64);
256 spin_unlock_irqrestore(&to_vdev(ibdev)->desc_lock, flags);
259 if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) {
260 mutex_lock(&to_vdev(ibdev)->port_mutex);
261 to_vdev(ibdev)->sys_image_guid =
262 cpu_to_be64(props->sys_image_guid);
263 mutex_unlock(&to_vdev(ibdev)->port_mutex);
270 * pvrdma_modify_port - modify device port attributes
271 * @ibdev: the device to modify
272 * @port: the port number
273 * @mask: attributes to modify
274 * @props: the device properties
276 * @return: 0 on success, otherwise negative errno
278 int pvrdma_modify_port(struct ib_device *ibdev, u8 port, int mask,
279 struct ib_port_modify *props)
281 struct ib_port_attr attr;
282 struct pvrdma_dev *vdev = to_vdev(ibdev);
285 if (mask & ~IB_PORT_SHUTDOWN) {
286 dev_warn(&vdev->pdev->dev,
287 "unsupported port modify mask %#x\n", mask);
291 mutex_lock(&vdev->port_mutex);
292 ret = ib_query_port(ibdev, port, &attr);
296 vdev->port_cap_mask |= props->set_port_cap_mask;
297 vdev->port_cap_mask &= ~props->clr_port_cap_mask;
299 if (mask & IB_PORT_SHUTDOWN)
300 vdev->ib_active = false;
303 mutex_unlock(&vdev->port_mutex);
308 * pvrdma_alloc_ucontext - allocate ucontext
309 * @ibdev: the IB device
312 * @return: the ib_ucontext pointer on success, otherwise errno.
314 struct ib_ucontext *pvrdma_alloc_ucontext(struct ib_device *ibdev,
315 struct ib_udata *udata)
317 struct pvrdma_dev *vdev = to_vdev(ibdev);
318 struct pvrdma_ucontext *context;
319 union pvrdma_cmd_req req;
320 union pvrdma_cmd_resp rsp;
321 struct pvrdma_cmd_create_uc *cmd = &req.create_uc;
322 struct pvrdma_cmd_create_uc_resp *resp = &rsp.create_uc_resp;
323 struct pvrdma_alloc_ucontext_resp uresp = {0};
327 if (!vdev->ib_active)
328 return ERR_PTR(-EAGAIN);
330 context = kmalloc(sizeof(*context), GFP_KERNEL);
332 return ERR_PTR(-ENOMEM);
335 ret = pvrdma_uar_alloc(vdev, &context->uar);
338 return ERR_PTR(-ENOMEM);
341 /* get ctx_handle from host */
342 memset(cmd, 0, sizeof(*cmd));
343 cmd->pfn = context->uar.pfn;
344 cmd->hdr.cmd = PVRDMA_CMD_CREATE_UC;
345 ret = pvrdma_cmd_post(vdev, &req, &rsp, PVRDMA_CMD_CREATE_UC_RESP);
347 dev_warn(&vdev->pdev->dev,
348 "could not create ucontext, error: %d\n", ret);
353 context->ctx_handle = resp->ctx_handle;
355 /* copy back to user */
356 uresp.qp_tab_size = vdev->dsr->caps.max_qp;
357 ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
359 pvrdma_uar_free(vdev, &context->uar);
360 context->ibucontext.device = ibdev;
361 pvrdma_dealloc_ucontext(&context->ibucontext);
362 return ERR_PTR(-EFAULT);
365 return &context->ibucontext;
368 pvrdma_uar_free(vdev, &context->uar);
374 * pvrdma_dealloc_ucontext - deallocate ucontext
375 * @ibcontext: the ucontext
377 * @return: 0 on success, otherwise errno.
379 int pvrdma_dealloc_ucontext(struct ib_ucontext *ibcontext)
381 struct pvrdma_ucontext *context = to_vucontext(ibcontext);
382 union pvrdma_cmd_req req;
383 struct pvrdma_cmd_destroy_uc *cmd = &req.destroy_uc;
386 memset(cmd, 0, sizeof(*cmd));
387 cmd->hdr.cmd = PVRDMA_CMD_DESTROY_UC;
388 cmd->ctx_handle = context->ctx_handle;
390 ret = pvrdma_cmd_post(context->dev, &req, NULL, 0);
392 dev_warn(&context->dev->pdev->dev,
393 "destroy ucontext failed, error: %d\n", ret);
395 /* Free the UAR even if the device command failed */
396 pvrdma_uar_free(to_vdev(ibcontext->device), &context->uar);
403 * pvrdma_mmap - create mmap region
404 * @ibcontext: the user context
407 * @return: 0 on success, otherwise errno.
409 int pvrdma_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
411 struct pvrdma_ucontext *context = to_vucontext(ibcontext);
412 unsigned long start = vma->vm_start;
413 unsigned long size = vma->vm_end - vma->vm_start;
414 unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
416 dev_dbg(&context->dev->pdev->dev, "create mmap region\n");
418 if ((size != PAGE_SIZE) || (offset & ~PAGE_MASK)) {
419 dev_warn(&context->dev->pdev->dev,
420 "invalid params for mmap region\n");
424 /* Map UAR to kernel space, VM_LOCKED? */
425 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
426 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
427 if (io_remap_pfn_range(vma, start, context->uar.pfn, size,
435 * pvrdma_alloc_pd - allocate protection domain
436 * @ibdev: the IB device
437 * @context: user context
440 * @return: the ib_pd protection domain pointer on success, otherwise errno.
442 struct ib_pd *pvrdma_alloc_pd(struct ib_device *ibdev,
443 struct ib_ucontext *context,
444 struct ib_udata *udata)
446 struct pvrdma_pd *pd;
447 struct pvrdma_dev *dev = to_vdev(ibdev);
448 union pvrdma_cmd_req req;
449 union pvrdma_cmd_resp rsp;
450 struct pvrdma_cmd_create_pd *cmd = &req.create_pd;
451 struct pvrdma_cmd_create_pd_resp *resp = &rsp.create_pd_resp;
452 struct pvrdma_alloc_pd_resp pd_resp = {0};
456 /* Check allowed max pds */
457 if (!atomic_add_unless(&dev->num_pds, 1, dev->dsr->caps.max_pd))
458 return ERR_PTR(-ENOMEM);
460 pd = kmalloc(sizeof(*pd), GFP_KERNEL);
462 ptr = ERR_PTR(-ENOMEM);
466 memset(cmd, 0, sizeof(*cmd));
467 cmd->hdr.cmd = PVRDMA_CMD_CREATE_PD;
468 cmd->ctx_handle = (context) ? to_vucontext(context)->ctx_handle : 0;
469 ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_PD_RESP);
471 dev_warn(&dev->pdev->dev,
472 "failed to allocate protection domain, error: %d\n",
478 pd->privileged = !context;
479 pd->pd_handle = resp->pd_handle;
480 pd->pdn = resp->pd_handle;
481 pd_resp.pdn = resp->pd_handle;
484 if (ib_copy_to_udata(udata, &pd_resp, sizeof(pd_resp))) {
485 dev_warn(&dev->pdev->dev,
486 "failed to copy back protection domain\n");
487 pvrdma_dealloc_pd(&pd->ibpd);
488 return ERR_PTR(-EFAULT);
498 atomic_dec(&dev->num_pds);
503 * pvrdma_dealloc_pd - deallocate protection domain
504 * @pd: the protection domain to be released
506 * @return: 0 on success, otherwise errno.
508 int pvrdma_dealloc_pd(struct ib_pd *pd)
510 struct pvrdma_dev *dev = to_vdev(pd->device);
511 union pvrdma_cmd_req req;
512 struct pvrdma_cmd_destroy_pd *cmd = &req.destroy_pd;
515 memset(cmd, 0, sizeof(*cmd));
516 cmd->hdr.cmd = PVRDMA_CMD_DESTROY_PD;
517 cmd->pd_handle = to_vpd(pd)->pd_handle;
519 ret = pvrdma_cmd_post(dev, &req, NULL, 0);
521 dev_warn(&dev->pdev->dev,
522 "could not dealloc protection domain, error: %d\n",
526 atomic_dec(&dev->num_pds);
532 * pvrdma_create_ah - create an address handle
533 * @pd: the protection domain
534 * @ah_attr: the attributes of the AH
535 * @udata: user data blob
537 * @return: the ib_ah pointer on success, otherwise errno.
539 struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
540 struct ib_udata *udata)
542 struct pvrdma_dev *dev = to_vdev(pd->device);
543 struct pvrdma_ah *ah;
544 const struct ib_global_route *grh;
545 u8 port_num = rdma_ah_get_port_num(ah_attr);
547 if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH))
548 return ERR_PTR(-EINVAL);
550 grh = rdma_ah_read_grh(ah_attr);
551 if ((ah_attr->type != RDMA_AH_ATTR_TYPE_ROCE) ||
552 rdma_is_multicast_addr((struct in6_addr *)grh->dgid.raw))
553 return ERR_PTR(-EINVAL);
555 if (!atomic_add_unless(&dev->num_ahs, 1, dev->dsr->caps.max_ah))
556 return ERR_PTR(-ENOMEM);
558 ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
560 atomic_dec(&dev->num_ahs);
561 return ERR_PTR(-ENOMEM);
564 ah->av.port_pd = to_vpd(pd)->pd_handle | (port_num << 24);
565 ah->av.src_path_bits = rdma_ah_get_path_bits(ah_attr);
566 ah->av.src_path_bits |= 0x80;
567 ah->av.gid_index = grh->sgid_index;
568 ah->av.hop_limit = grh->hop_limit;
569 ah->av.sl_tclass_flowlabel = (grh->traffic_class << 20) |
571 memcpy(ah->av.dgid, grh->dgid.raw, 16);
572 memcpy(ah->av.dmac, ah_attr->roce.dmac, ETH_ALEN);
574 ah->ibah.device = pd->device;
576 ah->ibah.uobject = NULL;
582 * pvrdma_destroy_ah - destroy an address handle
583 * @ah: the address handle to destroyed
585 * @return: 0 on success.
587 int pvrdma_destroy_ah(struct ib_ah *ah)
589 struct pvrdma_dev *dev = to_vdev(ah->device);
592 atomic_dec(&dev->num_ahs);