1 /* QLogic qedr NIC Driver
2 * Copyright (c) 2015-2016 QLogic Corporation
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and /or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 #include <linux/dma-mapping.h>
33 #include <linux/crc32.h>
37 #include <linux/iommu.h>
39 #include <rdma/ib_verbs.h>
40 #include <rdma/ib_user_verbs.h>
41 #include <rdma/iw_cm.h>
42 #include <rdma/ib_umem.h>
43 #include <rdma/ib_addr.h>
44 #include <rdma/ib_cache.h>
45 #include <rdma/uverbs_ioctl.h>
47 #include <linux/qed/common_hsi.h>
48 #include "qedr_hsi_rdma.h"
49 #include <linux/qed/qed_if.h>
52 #include <rdma/qedr-abi.h>
53 #include "qedr_roce_cm.h"
54 #include "qedr_iw_cm.h"
56 #define QEDR_SRQ_WQE_ELEM_SIZE sizeof(union rdma_srq_elm)
57 #define RDMA_MAX_SGE_PER_SRQ (4)
58 #define RDMA_MAX_SRQ_WQE_SIZE (RDMA_MAX_SGE_PER_SRQ + 1)
60 #define DB_ADDR_SHIFT(addr) ((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
63 QEDR_USER_MMAP_IO_WC = 0,
64 QEDR_USER_MMAP_PHYS_PAGE,
67 static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
70 size_t min_len = min_t(size_t, len, udata->outlen);
72 return ib_copy_to_udata(udata, src, min_len);
75 int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
77 if (index >= QEDR_ROCE_PKEY_TABLE_LEN)
80 *pkey = QEDR_ROCE_PKEY_DEFAULT;
84 int qedr_iw_query_gid(struct ib_device *ibdev, u8 port,
85 int index, union ib_gid *sgid)
87 struct qedr_dev *dev = get_qedr_dev(ibdev);
89 memset(sgid->raw, 0, sizeof(sgid->raw));
90 ether_addr_copy(sgid->raw, dev->ndev->dev_addr);
92 DP_DEBUG(dev, QEDR_MSG_INIT, "QUERY sgid[%d]=%llx:%llx\n", index,
93 sgid->global.interface_id, sgid->global.subnet_prefix);
98 int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
100 struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
101 struct qedr_device_attr *qattr = &dev->attr;
102 struct qedr_srq *srq = get_qedr_srq(ibsrq);
104 srq_attr->srq_limit = srq->srq_limit;
105 srq_attr->max_wr = qattr->max_srq_wr;
106 srq_attr->max_sge = qattr->max_sge;
111 int qedr_query_device(struct ib_device *ibdev,
112 struct ib_device_attr *attr, struct ib_udata *udata)
114 struct qedr_dev *dev = get_qedr_dev(ibdev);
115 struct qedr_device_attr *qattr = &dev->attr;
117 if (!dev->rdma_ctx) {
119 "qedr_query_device called with invalid params rdma_ctx=%p\n",
124 memset(attr, 0, sizeof(*attr));
126 attr->fw_ver = qattr->fw_ver;
127 attr->sys_image_guid = qattr->sys_image_guid;
128 attr->max_mr_size = qattr->max_mr_size;
129 attr->page_size_cap = qattr->page_size_caps;
130 attr->vendor_id = qattr->vendor_id;
131 attr->vendor_part_id = qattr->vendor_part_id;
132 attr->hw_ver = qattr->hw_ver;
133 attr->max_qp = qattr->max_qp;
134 attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
135 attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
136 IB_DEVICE_RC_RNR_NAK_GEN |
137 IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
139 attr->max_send_sge = qattr->max_sge;
140 attr->max_recv_sge = qattr->max_sge;
141 attr->max_sge_rd = qattr->max_sge;
142 attr->max_cq = qattr->max_cq;
143 attr->max_cqe = qattr->max_cqe;
144 attr->max_mr = qattr->max_mr;
145 attr->max_mw = qattr->max_mw;
146 attr->max_pd = qattr->max_pd;
147 attr->atomic_cap = dev->atomic_cap;
148 attr->max_qp_init_rd_atom =
149 1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1);
150 attr->max_qp_rd_atom =
151 min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1),
152 attr->max_qp_init_rd_atom);
154 attr->max_srq = qattr->max_srq;
155 attr->max_srq_sge = qattr->max_srq_sge;
156 attr->max_srq_wr = qattr->max_srq_wr;
158 attr->local_ca_ack_delay = qattr->dev_ack_delay;
159 attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
160 attr->max_pkeys = QEDR_ROCE_PKEY_MAX;
161 attr->max_ah = qattr->max_ah;
166 static inline void get_link_speed_and_width(int speed, u8 *ib_speed,
171 *ib_speed = IB_SPEED_SDR;
172 *ib_width = IB_WIDTH_1X;
175 *ib_speed = IB_SPEED_QDR;
176 *ib_width = IB_WIDTH_1X;
180 *ib_speed = IB_SPEED_DDR;
181 *ib_width = IB_WIDTH_4X;
185 *ib_speed = IB_SPEED_EDR;
186 *ib_width = IB_WIDTH_1X;
190 *ib_speed = IB_SPEED_QDR;
191 *ib_width = IB_WIDTH_4X;
195 *ib_speed = IB_SPEED_HDR;
196 *ib_width = IB_WIDTH_1X;
200 *ib_speed = IB_SPEED_EDR;
201 *ib_width = IB_WIDTH_4X;
206 *ib_speed = IB_SPEED_SDR;
207 *ib_width = IB_WIDTH_1X;
211 int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
213 struct qedr_dev *dev;
214 struct qed_rdma_port *rdma_port;
216 dev = get_qedr_dev(ibdev);
218 if (!dev->rdma_ctx) {
219 DP_ERR(dev, "rdma_ctx is NULL\n");
223 rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
225 /* *attr being zeroed by the caller, avoid zeroing it here */
226 if (rdma_port->port_state == QED_RDMA_PORT_UP) {
227 attr->state = IB_PORT_ACTIVE;
228 attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
230 attr->state = IB_PORT_DOWN;
231 attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
233 attr->max_mtu = IB_MTU_4096;
234 attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
239 attr->ip_gids = true;
240 if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
241 attr->gid_tbl_len = 1;
243 attr->gid_tbl_len = QEDR_MAX_SGID;
244 attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
246 attr->bad_pkey_cntr = rdma_port->pkey_bad_counter;
247 attr->qkey_viol_cntr = 0;
248 get_link_speed_and_width(rdma_port->link_speed,
249 &attr->active_speed, &attr->active_width);
250 attr->max_msg_sz = rdma_port->max_msg_size;
251 attr->max_vl_num = 4;
256 int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
258 struct ib_device *ibdev = uctx->device;
260 struct qedr_ucontext *ctx = get_qedr_ucontext(uctx);
261 struct qedr_alloc_ucontext_resp uresp = {};
262 struct qedr_alloc_ucontext_req ureq = {};
263 struct qedr_dev *dev = get_qedr_dev(ibdev);
264 struct qed_rdma_add_user_out_params oparams;
265 struct qedr_user_mmap_entry *entry;
271 rc = ib_copy_from_udata(&ureq, udata,
272 min(sizeof(ureq), udata->inlen));
274 DP_ERR(dev, "Problem copying data from user space\n");
277 ctx->edpm_mode = !!(ureq.context_flags &
278 QEDR_ALLOC_UCTX_EDPM_MODE);
279 ctx->db_rec = !!(ureq.context_flags & QEDR_ALLOC_UCTX_DB_REC);
282 rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
285 "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
290 ctx->dpi = oparams.dpi;
291 ctx->dpi_addr = oparams.dpi_addr;
292 ctx->dpi_phys_addr = oparams.dpi_phys_addr;
293 ctx->dpi_size = oparams.dpi_size;
294 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
300 entry->io_address = ctx->dpi_phys_addr;
301 entry->length = ctx->dpi_size;
302 entry->mmap_flag = QEDR_USER_MMAP_IO_WC;
303 entry->dpi = ctx->dpi;
305 rc = rdma_user_mmap_entry_insert(uctx, &entry->rdma_entry,
311 ctx->db_mmap_entry = &entry->rdma_entry;
313 if (!dev->user_dpm_enabled)
315 else if (rdma_protocol_iwarp(&dev->ibdev, 1))
316 uresp.dpm_flags = QEDR_DPM_TYPE_IWARP_LEGACY;
318 uresp.dpm_flags = QEDR_DPM_TYPE_ROCE_ENHANCED |
319 QEDR_DPM_TYPE_ROCE_LEGACY |
320 QEDR_DPM_TYPE_ROCE_EDPM_MODE;
322 if (ureq.context_flags & QEDR_SUPPORT_DPM_SIZES) {
323 uresp.dpm_flags |= QEDR_DPM_SIZES_SET;
324 uresp.ldpm_limit_size = QEDR_LDPM_MAX_SIZE;
325 uresp.edpm_trans_size = QEDR_EDPM_TRANS_SIZE;
326 uresp.edpm_limit_size = QEDR_EDPM_MAX_SIZE;
329 uresp.wids_enabled = 1;
330 uresp.wid_count = oparams.wid_count;
331 uresp.db_pa = rdma_user_mmap_get_offset(ctx->db_mmap_entry);
332 uresp.db_size = ctx->dpi_size;
333 uresp.max_send_wr = dev->attr.max_sqe;
334 uresp.max_recv_wr = dev->attr.max_rqe;
335 uresp.max_srq_wr = dev->attr.max_srq_wr;
336 uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE;
337 uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE;
338 uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
339 uresp.max_cqes = QEDR_MAX_CQES;
341 rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
347 DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
352 if (!ctx->db_mmap_entry)
353 dev->ops->rdma_remove_user(dev->rdma_ctx, ctx->dpi);
355 rdma_user_mmap_entry_remove(ctx->db_mmap_entry);
360 void qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
362 struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
364 DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
367 rdma_user_mmap_entry_remove(uctx->db_mmap_entry);
370 void qedr_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
372 struct qedr_user_mmap_entry *entry = get_qedr_mmap_entry(rdma_entry);
373 struct qedr_dev *dev = entry->dev;
375 if (entry->mmap_flag == QEDR_USER_MMAP_PHYS_PAGE)
376 free_page((unsigned long)entry->address);
377 else if (entry->mmap_flag == QEDR_USER_MMAP_IO_WC)
378 dev->ops->rdma_remove_user(dev->rdma_ctx, entry->dpi);
383 int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma)
385 struct ib_device *dev = ucontext->device;
386 size_t length = vma->vm_end - vma->vm_start;
387 struct rdma_user_mmap_entry *rdma_entry;
388 struct qedr_user_mmap_entry *entry;
393 "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n",
394 vma->vm_start, vma->vm_end, length, vma->vm_pgoff);
396 rdma_entry = rdma_user_mmap_entry_get(ucontext, vma);
398 ibdev_dbg(dev, "pgoff[%#lx] does not have valid entry\n",
402 entry = get_qedr_mmap_entry(rdma_entry);
404 "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n",
405 entry->io_address, length, entry->mmap_flag);
407 switch (entry->mmap_flag) {
408 case QEDR_USER_MMAP_IO_WC:
409 pfn = entry->io_address >> PAGE_SHIFT;
410 rc = rdma_user_mmap_io(ucontext, vma, pfn, length,
411 pgprot_writecombine(vma->vm_page_prot),
414 case QEDR_USER_MMAP_PHYS_PAGE:
415 rc = vm_insert_page(vma, vma->vm_start,
416 virt_to_page(entry->address));
424 "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n",
425 entry->io_address, length, entry->mmap_flag, rc);
427 rdma_user_mmap_entry_put(rdma_entry);
431 int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
433 struct ib_device *ibdev = ibpd->device;
434 struct qedr_dev *dev = get_qedr_dev(ibdev);
435 struct qedr_pd *pd = get_qedr_pd(ibpd);
439 DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n",
440 udata ? "User Lib" : "Kernel");
442 if (!dev->rdma_ctx) {
443 DP_ERR(dev, "invalid RDMA context\n");
447 rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
454 struct qedr_alloc_pd_uresp uresp = {
457 struct qedr_ucontext *context = rdma_udata_to_drv_context(
458 udata, struct qedr_ucontext, ibucontext);
460 rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
462 DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
463 dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id);
474 void qedr_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
476 struct qedr_dev *dev = get_qedr_dev(ibpd->device);
477 struct qedr_pd *pd = get_qedr_pd(ibpd);
479 DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
480 dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
483 static void qedr_free_pbl(struct qedr_dev *dev,
484 struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
486 struct pci_dev *pdev = dev->pdev;
489 for (i = 0; i < pbl_info->num_pbls; i++) {
492 dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
493 pbl[i].va, pbl[i].pa);
499 #define MIN_FW_PBL_PAGE_SIZE (4 * 1024)
500 #define MAX_FW_PBL_PAGE_SIZE (64 * 1024)
502 #define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64))
503 #define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE)
504 #define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE)
506 static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
507 struct qedr_pbl_info *pbl_info,
510 struct pci_dev *pdev = dev->pdev;
511 struct qedr_pbl *pbl_table;
512 dma_addr_t *pbl_main_tbl;
517 pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags);
519 return ERR_PTR(-ENOMEM);
521 for (i = 0; i < pbl_info->num_pbls; i++) {
522 va = dma_alloc_coherent(&pdev->dev, pbl_info->pbl_size, &pa,
527 pbl_table[i].va = va;
528 pbl_table[i].pa = pa;
531 /* Two-Layer PBLs, if we have more than one pbl we need to initialize
532 * the first one with physical pointers to all of the rest
534 pbl_main_tbl = (dma_addr_t *)pbl_table[0].va;
535 for (i = 0; i < pbl_info->num_pbls - 1; i++)
536 pbl_main_tbl[i] = pbl_table[i + 1].pa;
541 for (i--; i >= 0; i--)
542 dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
543 pbl_table[i].va, pbl_table[i].pa);
545 qedr_free_pbl(dev, pbl_info, pbl_table);
547 return ERR_PTR(-ENOMEM);
550 static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
551 struct qedr_pbl_info *pbl_info,
552 u32 num_pbes, int two_layer_capable)
558 if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) {
559 if (num_pbes > MAX_PBES_TWO_LAYER) {
560 DP_ERR(dev, "prepare pbl table: too many pages %d\n",
565 /* calculate required pbl page size */
566 pbl_size = MIN_FW_PBL_PAGE_SIZE;
567 pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) *
568 NUM_PBES_ON_PAGE(pbl_size);
570 while (pbl_capacity < num_pbes) {
572 pbl_capacity = pbl_size / sizeof(u64);
573 pbl_capacity = pbl_capacity * pbl_capacity;
576 num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size));
577 num_pbls++; /* One for the layer0 ( points to the pbls) */
578 pbl_info->two_layered = true;
580 /* One layered PBL */
582 pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE,
583 roundup_pow_of_two((num_pbes * sizeof(u64))));
584 pbl_info->two_layered = false;
587 pbl_info->num_pbls = num_pbls;
588 pbl_info->pbl_size = pbl_size;
589 pbl_info->num_pbes = num_pbes;
591 DP_DEBUG(dev, QEDR_MSG_MR,
592 "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n",
593 pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size);
598 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
599 struct qedr_pbl *pbl,
600 struct qedr_pbl_info *pbl_info, u32 pg_shift)
602 int pbe_cnt, total_num_pbes = 0;
603 u32 fw_pg_cnt, fw_pg_per_umem_pg;
604 struct qedr_pbl *pbl_tbl;
605 struct sg_dma_page_iter sg_iter;
609 if (!pbl_info->num_pbes)
612 /* If we have a two layered pbl, the first pbl points to the rest
613 * of the pbls and the first entry lays on the second pbl in the table
615 if (pbl_info->two_layered)
620 pbe = (struct regpair *)pbl_tbl->va;
622 DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n");
628 fw_pg_per_umem_pg = BIT(PAGE_SHIFT - pg_shift);
630 for_each_sg_dma_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
631 pg_addr = sg_page_iter_dma_address(&sg_iter);
632 for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) {
633 pbe->lo = cpu_to_le32(pg_addr);
634 pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
636 pg_addr += BIT(pg_shift);
641 if (total_num_pbes == pbl_info->num_pbes)
644 /* If the given pbl is full storing the pbes,
647 if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
649 pbe = (struct regpair *)pbl_tbl->va;
658 static int qedr_db_recovery_add(struct qedr_dev *dev,
659 void __iomem *db_addr,
661 enum qed_db_rec_width db_width,
662 enum qed_db_rec_space db_space)
665 DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n");
669 return dev->ops->common->db_recovery_add(dev->cdev, db_addr, db_data,
673 static void qedr_db_recovery_del(struct qedr_dev *dev,
674 void __iomem *db_addr,
678 DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n");
682 /* Ignore return code as there is not much we can do about it. Error
683 * log will be printed inside.
685 dev->ops->common->db_recovery_del(dev->cdev, db_addr, db_data);
688 static int qedr_copy_cq_uresp(struct qedr_dev *dev,
689 struct qedr_cq *cq, struct ib_udata *udata,
692 struct qedr_create_cq_uresp uresp;
695 memset(&uresp, 0, sizeof(uresp));
697 uresp.db_offset = db_offset;
698 uresp.icid = cq->icid;
699 if (cq->q.db_mmap_entry)
701 rdma_user_mmap_get_offset(cq->q.db_mmap_entry);
703 rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
705 DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
710 static void consume_cqe(struct qedr_cq *cq)
712 if (cq->latest_cqe == cq->toggle_cqe)
713 cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
715 cq->latest_cqe = qed_chain_consume(&cq->pbl);
718 static inline int qedr_align_cq_entries(int entries)
720 u64 size, aligned_size;
722 /* We allocate an extra entry that we don't report to the FW. */
723 size = (entries + 1) * QEDR_CQE_SIZE;
724 aligned_size = ALIGN(size, PAGE_SIZE);
726 return aligned_size / QEDR_CQE_SIZE;
729 static int qedr_init_user_db_rec(struct ib_udata *udata,
730 struct qedr_dev *dev, struct qedr_userq *q,
731 bool requires_db_rec)
733 struct qedr_ucontext *uctx =
734 rdma_udata_to_drv_context(udata, struct qedr_ucontext,
736 struct qedr_user_mmap_entry *entry;
739 /* Aborting for non doorbell userqueue (SRQ) or non-supporting lib */
740 if (requires_db_rec == 0 || !uctx->db_rec)
743 /* Allocate a page for doorbell recovery, add to mmap */
744 q->db_rec_data = (void *)get_zeroed_page(GFP_USER);
745 if (!q->db_rec_data) {
746 DP_ERR(dev, "get_zeroed_page failed\n");
750 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
752 goto err_free_db_data;
754 entry->address = q->db_rec_data;
755 entry->length = PAGE_SIZE;
756 entry->mmap_flag = QEDR_USER_MMAP_PHYS_PAGE;
757 rc = rdma_user_mmap_entry_insert(&uctx->ibucontext,
763 q->db_mmap_entry = &entry->rdma_entry;
771 free_page((unsigned long)q->db_rec_data);
772 q->db_rec_data = NULL;
776 static inline int qedr_init_user_queue(struct ib_udata *udata,
777 struct qedr_dev *dev,
778 struct qedr_userq *q, u64 buf_addr,
779 size_t buf_len, bool requires_db_rec,
786 q->buf_addr = buf_addr;
787 q->buf_len = buf_len;
788 q->umem = ib_umem_get(&dev->ibdev, q->buf_addr, q->buf_len, access);
789 if (IS_ERR(q->umem)) {
790 DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
792 return PTR_ERR(q->umem);
795 fw_pages = ib_umem_page_count(q->umem) <<
796 (PAGE_SHIFT - FW_PAGE_SHIFT);
798 rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
802 if (alloc_and_init) {
803 q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
804 if (IS_ERR(q->pbl_tbl)) {
805 rc = PTR_ERR(q->pbl_tbl);
808 qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
811 q->pbl_tbl = kzalloc(sizeof(*q->pbl_tbl), GFP_KERNEL);
818 /* mmap the user address used to store doorbell data for recovery */
819 return qedr_init_user_db_rec(udata, dev, q, requires_db_rec);
822 ib_umem_release(q->umem);
828 static inline void qedr_init_cq_params(struct qedr_cq *cq,
829 struct qedr_ucontext *ctx,
830 struct qedr_dev *dev, int vector,
831 int chain_entries, int page_cnt,
833 struct qed_rdma_create_cq_in_params
836 memset(params, 0, sizeof(*params));
837 params->cq_handle_hi = upper_32_bits((uintptr_t)cq);
838 params->cq_handle_lo = lower_32_bits((uintptr_t)cq);
839 params->cnq_id = vector;
840 params->cq_size = chain_entries - 1;
841 params->dpi = (ctx) ? ctx->dpi : dev->dpi;
842 params->pbl_num_pages = page_cnt;
843 params->pbl_ptr = pbl_ptr;
844 params->pbl_two_level = 0;
847 static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
849 cq->db.data.agg_flags = flags;
850 cq->db.data.value = cpu_to_le32(cons);
851 writeq(cq->db.raw, cq->db_addr);
854 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
856 struct qedr_cq *cq = get_qedr_cq(ibcq);
857 unsigned long sflags;
858 struct qedr_dev *dev;
860 dev = get_qedr_dev(ibcq->device);
864 "warning: arm was invoked after destroy for cq %p (icid=%d)\n",
870 if (cq->cq_type == QEDR_CQ_TYPE_GSI)
873 spin_lock_irqsave(&cq->cq_lock, sflags);
877 if (flags & IB_CQ_SOLICITED)
878 cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD;
880 if (flags & IB_CQ_NEXT_COMP)
881 cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD;
883 doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
885 spin_unlock_irqrestore(&cq->cq_lock, sflags);
890 int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
891 struct ib_udata *udata)
893 struct ib_device *ibdev = ibcq->device;
894 struct qedr_ucontext *ctx = rdma_udata_to_drv_context(
895 udata, struct qedr_ucontext, ibucontext);
896 struct qed_rdma_destroy_cq_out_params destroy_oparams;
897 struct qed_rdma_destroy_cq_in_params destroy_iparams;
898 struct qedr_dev *dev = get_qedr_dev(ibdev);
899 struct qed_rdma_create_cq_in_params params;
900 struct qedr_create_cq_ureq ureq = {};
901 int vector = attr->comp_vector;
902 int entries = attr->cqe;
903 struct qedr_cq *cq = get_qedr_cq(ibcq);
911 DP_DEBUG(dev, QEDR_MSG_INIT,
912 "create_cq: called from %s. entries=%d, vector=%d\n",
913 udata ? "User Lib" : "Kernel", entries, vector);
915 if (entries > QEDR_MAX_CQES) {
917 "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
918 entries, QEDR_MAX_CQES);
922 chain_entries = qedr_align_cq_entries(entries);
923 chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
925 /* calc db offset. user will add DPI base, kernel will add db addr */
926 db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
929 if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
932 "create cq: problem copying data from user space\n");
938 "create cq: cannot create a cq with 0 entries\n");
942 cq->cq_type = QEDR_CQ_TYPE_USER;
944 rc = qedr_init_user_queue(udata, dev, &cq->q, ureq.addr,
945 ureq.len, true, IB_ACCESS_LOCAL_WRITE,
950 pbl_ptr = cq->q.pbl_tbl->pa;
951 page_cnt = cq->q.pbl_info.num_pbes;
953 cq->ibcq.cqe = chain_entries;
954 cq->q.db_addr = ctx->dpi_addr + db_offset;
956 cq->cq_type = QEDR_CQ_TYPE_KERNEL;
958 rc = dev->ops->common->chain_alloc(dev->cdev,
959 QED_CHAIN_USE_TO_CONSUME,
961 QED_CHAIN_CNT_TYPE_U32,
963 sizeof(union rdma_cqe),
968 page_cnt = qed_chain_get_page_cnt(&cq->pbl);
969 pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
970 cq->ibcq.cqe = cq->pbl.capacity;
973 qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
976 rc = dev->ops->rdma_create_cq(dev->rdma_ctx, ¶ms, &icid);
981 cq->sig = QEDR_CQ_MAGIC_NUMBER;
982 spin_lock_init(&cq->cq_lock);
985 rc = qedr_copy_cq_uresp(dev, cq, udata, db_offset);
989 rc = qedr_db_recovery_add(dev, cq->q.db_addr,
990 &cq->q.db_rec_data->db_data,
997 /* Generate doorbell address. */
998 cq->db.data.icid = cq->icid;
999 cq->db_addr = dev->db_addr + db_offset;
1000 cq->db.data.params = DB_AGG_CMD_SET <<
1001 RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
1003 /* point to the very last element, passing it we will toggle */
1004 cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
1005 cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
1006 cq->latest_cqe = NULL;
1008 cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
1010 rc = qedr_db_recovery_add(dev, cq->db_addr, &cq->db.data,
1011 DB_REC_WIDTH_64B, DB_REC_KERNEL);
1016 DP_DEBUG(dev, QEDR_MSG_CQ,
1017 "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
1018 cq->icid, cq, params.cq_size);
1023 destroy_iparams.icid = cq->icid;
1024 dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
1028 qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1029 ib_umem_release(cq->q.umem);
1030 if (cq->q.db_mmap_entry)
1031 rdma_user_mmap_entry_remove(cq->q.db_mmap_entry);
1033 dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1039 int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
1041 struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1042 struct qedr_cq *cq = get_qedr_cq(ibcq);
1044 DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq);
1049 #define QEDR_DESTROY_CQ_MAX_ITERATIONS (10)
1050 #define QEDR_DESTROY_CQ_ITER_DURATION (10)
1052 void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
1054 struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1055 struct qed_rdma_destroy_cq_out_params oparams;
1056 struct qed_rdma_destroy_cq_in_params iparams;
1057 struct qedr_cq *cq = get_qedr_cq(ibcq);
1060 DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
1064 /* GSIs CQs are handled by driver, so they don't exist in the FW */
1065 if (cq->cq_type == QEDR_CQ_TYPE_GSI) {
1066 qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data);
1070 iparams.icid = cq->icid;
1071 dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
1072 dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1075 qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1076 ib_umem_release(cq->q.umem);
1078 if (cq->q.db_rec_data) {
1079 qedr_db_recovery_del(dev, cq->q.db_addr,
1080 &cq->q.db_rec_data->db_data);
1081 rdma_user_mmap_entry_remove(cq->q.db_mmap_entry);
1084 qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data);
1087 /* We don't want the IRQ handler to handle a non-existing CQ so we
1088 * wait until all CNQ interrupts, if any, are received. This will always
1089 * happen and will always happen very fast. If not, then a serious error
1090 * has occured. That is why we can use a long delay.
1091 * We spin for a short time so we don’t lose time on context switching
1092 * in case all the completions are handled in that span. Otherwise
1093 * we sleep for a while and check again. Since the CNQ may be
1094 * associated with (only) the current CPU we use msleep to allow the
1095 * current CPU to be freed.
1096 * The CNQ notification is increased in qedr_irq_handler().
1098 iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1099 while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1100 udelay(QEDR_DESTROY_CQ_ITER_DURATION);
1104 iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1105 while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1106 msleep(QEDR_DESTROY_CQ_ITER_DURATION);
1110 /* Note that we don't need to have explicit code to wait for the
1111 * completion of the event handler because it is invoked from the EQ.
1112 * Since the destroy CQ ramrod has also been received on the EQ we can
1113 * be certain that there's no event handler in process.
1117 static inline int get_gid_info_from_table(struct ib_qp *ibqp,
1118 struct ib_qp_attr *attr,
1120 struct qed_rdma_modify_qp_in_params
1123 const struct ib_gid_attr *gid_attr;
1124 enum rdma_network_type nw_type;
1125 const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1130 gid_attr = grh->sgid_attr;
1131 ret = rdma_read_gid_l2_fields(gid_attr, &qp_params->vlan_id, NULL);
1135 nw_type = rdma_gid_attr_network_type(gid_attr);
1137 case RDMA_NETWORK_IPV6:
1138 memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
1139 sizeof(qp_params->sgid));
1140 memcpy(&qp_params->dgid.bytes[0],
1142 sizeof(qp_params->dgid));
1143 qp_params->roce_mode = ROCE_V2_IPV6;
1144 SET_FIELD(qp_params->modify_flags,
1145 QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1147 case RDMA_NETWORK_IB:
1148 memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
1149 sizeof(qp_params->sgid));
1150 memcpy(&qp_params->dgid.bytes[0],
1152 sizeof(qp_params->dgid));
1153 qp_params->roce_mode = ROCE_V1;
1155 case RDMA_NETWORK_IPV4:
1156 memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
1157 memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
1158 ipv4_addr = qedr_get_ipv4_from_gid(gid_attr->gid.raw);
1159 qp_params->sgid.ipv4_addr = ipv4_addr;
1161 qedr_get_ipv4_from_gid(grh->dgid.raw);
1162 qp_params->dgid.ipv4_addr = ipv4_addr;
1163 SET_FIELD(qp_params->modify_flags,
1164 QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1165 qp_params->roce_mode = ROCE_V2_IPV4;
1169 for (i = 0; i < 4; i++) {
1170 qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]);
1171 qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]);
1174 if (qp_params->vlan_id >= VLAN_CFI_MASK)
1175 qp_params->vlan_id = 0;
1180 static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
1181 struct ib_qp_init_attr *attrs,
1182 struct ib_udata *udata)
1184 struct qedr_device_attr *qattr = &dev->attr;
1186 /* QP0... attrs->qp_type == IB_QPT_GSI */
1187 if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) {
1188 DP_DEBUG(dev, QEDR_MSG_QP,
1189 "create qp: unsupported qp type=0x%x requested\n",
1194 if (attrs->cap.max_send_wr > qattr->max_sqe) {
1196 "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n",
1197 attrs->cap.max_send_wr, qattr->max_sqe);
1201 if (attrs->cap.max_inline_data > qattr->max_inline) {
1203 "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n",
1204 attrs->cap.max_inline_data, qattr->max_inline);
1208 if (attrs->cap.max_send_sge > qattr->max_sge) {
1210 "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n",
1211 attrs->cap.max_send_sge, qattr->max_sge);
1215 if (attrs->cap.max_recv_sge > qattr->max_sge) {
1217 "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n",
1218 attrs->cap.max_recv_sge, qattr->max_sge);
1222 /* Unprivileged user space cannot create special QP */
1223 if (udata && attrs->qp_type == IB_QPT_GSI) {
1225 "create qp: userspace can't create special QPs of type=0x%x\n",
1233 static int qedr_copy_srq_uresp(struct qedr_dev *dev,
1234 struct qedr_srq *srq, struct ib_udata *udata)
1236 struct qedr_create_srq_uresp uresp = {};
1239 uresp.srq_id = srq->srq_id;
1241 rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1243 DP_ERR(dev, "create srq: problem copying data to user space\n");
1248 static void qedr_copy_rq_uresp(struct qedr_dev *dev,
1249 struct qedr_create_qp_uresp *uresp,
1252 /* iWARP requires two doorbells per RQ. */
1253 if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1254 uresp->rq_db_offset =
1255 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1256 uresp->rq_db2_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1258 uresp->rq_db_offset =
1259 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1262 uresp->rq_icid = qp->icid;
1263 if (qp->urq.db_mmap_entry)
1264 uresp->rq_db_rec_addr =
1265 rdma_user_mmap_get_offset(qp->urq.db_mmap_entry);
1268 static void qedr_copy_sq_uresp(struct qedr_dev *dev,
1269 struct qedr_create_qp_uresp *uresp,
1272 uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1274 /* iWARP uses the same cid for rq and sq */
1275 if (rdma_protocol_iwarp(&dev->ibdev, 1))
1276 uresp->sq_icid = qp->icid;
1278 uresp->sq_icid = qp->icid + 1;
1280 if (qp->usq.db_mmap_entry)
1281 uresp->sq_db_rec_addr =
1282 rdma_user_mmap_get_offset(qp->usq.db_mmap_entry);
1285 static int qedr_copy_qp_uresp(struct qedr_dev *dev,
1286 struct qedr_qp *qp, struct ib_udata *udata,
1287 struct qedr_create_qp_uresp *uresp)
1291 memset(uresp, 0, sizeof(*uresp));
1292 qedr_copy_sq_uresp(dev, uresp, qp);
1293 qedr_copy_rq_uresp(dev, uresp, qp);
1295 uresp->atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
1296 uresp->qp_id = qp->qp_id;
1298 rc = qedr_ib_copy_to_udata(udata, uresp, sizeof(*uresp));
1301 "create qp: failed a copy to user space with qp icid=0x%x.\n",
1307 static void qedr_set_common_qp_params(struct qedr_dev *dev,
1310 struct ib_qp_init_attr *attrs)
1312 spin_lock_init(&qp->q_lock);
1313 if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1314 kref_init(&qp->refcnt);
1315 init_completion(&qp->iwarp_cm_comp);
1318 qp->qp_type = attrs->qp_type;
1319 qp->max_inline_data = attrs->cap.max_inline_data;
1320 qp->sq.max_sges = attrs->cap.max_send_sge;
1321 qp->state = QED_ROCE_QP_STATE_RESET;
1322 qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
1323 qp->sq_cq = get_qedr_cq(attrs->send_cq);
1327 qp->srq = get_qedr_srq(attrs->srq);
1329 qp->rq_cq = get_qedr_cq(attrs->recv_cq);
1330 qp->rq.max_sges = attrs->cap.max_recv_sge;
1331 DP_DEBUG(dev, QEDR_MSG_QP,
1332 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
1333 qp->rq.max_sges, qp->rq_cq->icid);
1336 DP_DEBUG(dev, QEDR_MSG_QP,
1337 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
1338 pd->pd_id, qp->qp_type, qp->max_inline_data,
1339 qp->state, qp->signaled, (attrs->srq) ? 1 : 0);
1340 DP_DEBUG(dev, QEDR_MSG_QP,
1341 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
1342 qp->sq.max_sges, qp->sq_cq->icid);
1345 static int qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1349 qp->sq.db = dev->db_addr +
1350 DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1351 qp->sq.db_data.data.icid = qp->icid + 1;
1352 rc = qedr_db_recovery_add(dev, qp->sq.db,
1360 qp->rq.db = dev->db_addr +
1361 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1362 qp->rq.db_data.data.icid = qp->icid;
1364 rc = qedr_db_recovery_add(dev, qp->rq.db,
1369 qedr_db_recovery_del(dev, qp->sq.db,
1376 static int qedr_check_srq_params(struct qedr_dev *dev,
1377 struct ib_srq_init_attr *attrs,
1378 struct ib_udata *udata)
1380 struct qedr_device_attr *qattr = &dev->attr;
1382 if (attrs->attr.max_wr > qattr->max_srq_wr) {
1384 "create srq: unsupported srq_wr=0x%x requested (max_srq_wr=0x%x)\n",
1385 attrs->attr.max_wr, qattr->max_srq_wr);
1389 if (attrs->attr.max_sge > qattr->max_sge) {
1391 "create srq: unsupported sge=0x%x requested (max_srq_sge=0x%x)\n",
1392 attrs->attr.max_sge, qattr->max_sge);
1399 static void qedr_free_srq_user_params(struct qedr_srq *srq)
1401 qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
1402 ib_umem_release(srq->usrq.umem);
1403 ib_umem_release(srq->prod_umem);
1406 static void qedr_free_srq_kernel_params(struct qedr_srq *srq)
1408 struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
1409 struct qedr_dev *dev = srq->dev;
1411 dev->ops->common->chain_free(dev->cdev, &hw_srq->pbl);
1413 dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
1414 hw_srq->virt_prod_pair_addr,
1415 hw_srq->phy_prod_pair_addr);
1418 static int qedr_init_srq_user_params(struct ib_udata *udata,
1419 struct qedr_srq *srq,
1420 struct qedr_create_srq_ureq *ureq,
1423 struct scatterlist *sg;
1426 rc = qedr_init_user_queue(udata, srq->dev, &srq->usrq, ureq->srq_addr,
1427 ureq->srq_len, false, access, 1);
1431 srq->prod_umem = ib_umem_get(srq->ibsrq.device, ureq->prod_pair_addr,
1432 sizeof(struct rdma_srq_producers), access);
1433 if (IS_ERR(srq->prod_umem)) {
1434 qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
1435 ib_umem_release(srq->usrq.umem);
1437 "create srq: failed ib_umem_get for producer, got %ld\n",
1438 PTR_ERR(srq->prod_umem));
1439 return PTR_ERR(srq->prod_umem);
1442 sg = srq->prod_umem->sg_head.sgl;
1443 srq->hw_srq.phy_prod_pair_addr = sg_dma_address(sg);
1448 static int qedr_alloc_srq_kernel_params(struct qedr_srq *srq,
1449 struct qedr_dev *dev,
1450 struct ib_srq_init_attr *init_attr)
1452 struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
1453 dma_addr_t phy_prod_pair_addr;
1458 va = dma_alloc_coherent(&dev->pdev->dev,
1459 sizeof(struct rdma_srq_producers),
1460 &phy_prod_pair_addr, GFP_KERNEL);
1463 "create srq: failed to allocate dma memory for producer\n");
1467 hw_srq->phy_prod_pair_addr = phy_prod_pair_addr;
1468 hw_srq->virt_prod_pair_addr = va;
1470 num_elems = init_attr->attr.max_wr * RDMA_MAX_SRQ_WQE_SIZE;
1471 rc = dev->ops->common->chain_alloc(dev->cdev,
1472 QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1474 QED_CHAIN_CNT_TYPE_U32,
1476 QEDR_SRQ_WQE_ELEM_SIZE,
1477 &hw_srq->pbl, NULL);
1481 hw_srq->num_elems = num_elems;
1486 dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
1487 va, phy_prod_pair_addr);
1491 int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr,
1492 struct ib_udata *udata)
1494 struct qed_rdma_destroy_srq_in_params destroy_in_params;
1495 struct qed_rdma_create_srq_in_params in_params = {};
1496 struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1497 struct qed_rdma_create_srq_out_params out_params;
1498 struct qedr_pd *pd = get_qedr_pd(ibsrq->pd);
1499 struct qedr_create_srq_ureq ureq = {};
1500 u64 pbl_base_addr, phy_prod_pair_addr;
1501 struct qedr_srq_hwq_info *hw_srq;
1502 u32 page_cnt, page_size;
1503 struct qedr_srq *srq = get_qedr_srq(ibsrq);
1506 DP_DEBUG(dev, QEDR_MSG_QP,
1507 "create SRQ called from %s (pd %p)\n",
1508 (udata) ? "User lib" : "kernel", pd);
1510 rc = qedr_check_srq_params(dev, init_attr, udata);
1515 hw_srq = &srq->hw_srq;
1516 spin_lock_init(&srq->lock);
1518 hw_srq->max_wr = init_attr->attr.max_wr;
1519 hw_srq->max_sges = init_attr->attr.max_sge;
1522 if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
1525 "create srq: problem copying data from user space\n");
1529 rc = qedr_init_srq_user_params(udata, srq, &ureq, 0);
1533 page_cnt = srq->usrq.pbl_info.num_pbes;
1534 pbl_base_addr = srq->usrq.pbl_tbl->pa;
1535 phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
1536 page_size = PAGE_SIZE;
1538 struct qed_chain *pbl;
1540 rc = qedr_alloc_srq_kernel_params(srq, dev, init_attr);
1545 page_cnt = qed_chain_get_page_cnt(pbl);
1546 pbl_base_addr = qed_chain_get_pbl_phys(pbl);
1547 phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
1548 page_size = QED_CHAIN_PAGE_SIZE;
1551 in_params.pd_id = pd->pd_id;
1552 in_params.pbl_base_addr = pbl_base_addr;
1553 in_params.prod_pair_addr = phy_prod_pair_addr;
1554 in_params.num_pages = page_cnt;
1555 in_params.page_size = page_size;
1557 rc = dev->ops->rdma_create_srq(dev->rdma_ctx, &in_params, &out_params);
1561 srq->srq_id = out_params.srq_id;
1564 rc = qedr_copy_srq_uresp(dev, srq, udata);
1569 rc = xa_insert_irq(&dev->srqs, srq->srq_id, srq, GFP_KERNEL);
1573 DP_DEBUG(dev, QEDR_MSG_SRQ,
1574 "create srq: created srq with srq_id=0x%0x\n", srq->srq_id);
1578 destroy_in_params.srq_id = srq->srq_id;
1580 dev->ops->rdma_destroy_srq(dev->rdma_ctx, &destroy_in_params);
1583 qedr_free_srq_user_params(srq);
1585 qedr_free_srq_kernel_params(srq);
1590 void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
1592 struct qed_rdma_destroy_srq_in_params in_params = {};
1593 struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1594 struct qedr_srq *srq = get_qedr_srq(ibsrq);
1596 xa_erase_irq(&dev->srqs, srq->srq_id);
1597 in_params.srq_id = srq->srq_id;
1598 dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params);
1601 qedr_free_srq_user_params(srq);
1603 qedr_free_srq_kernel_params(srq);
1605 DP_DEBUG(dev, QEDR_MSG_SRQ,
1606 "destroy srq: destroyed srq with srq_id=0x%0x\n",
1610 int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
1611 enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
1613 struct qed_rdma_modify_srq_in_params in_params = {};
1614 struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1615 struct qedr_srq *srq = get_qedr_srq(ibsrq);
1618 if (attr_mask & IB_SRQ_MAX_WR) {
1620 "modify srq: invalid attribute mask=0x%x specified for %p\n",
1625 if (attr_mask & IB_SRQ_LIMIT) {
1626 if (attr->srq_limit >= srq->hw_srq.max_wr) {
1628 "modify srq: invalid srq_limit=0x%x (max_srq_limit=0x%x)\n",
1629 attr->srq_limit, srq->hw_srq.max_wr);
1633 in_params.srq_id = srq->srq_id;
1634 in_params.wqe_limit = attr->srq_limit;
1635 rc = dev->ops->rdma_modify_srq(dev->rdma_ctx, &in_params);
1640 srq->srq_limit = attr->srq_limit;
1642 DP_DEBUG(dev, QEDR_MSG_SRQ,
1643 "modify srq: modified srq with srq_id=0x%0x\n", srq->srq_id);
1649 qedr_init_common_qp_in_params(struct qedr_dev *dev,
1652 struct ib_qp_init_attr *attrs,
1653 bool fmr_and_reserved_lkey,
1654 struct qed_rdma_create_qp_in_params *params)
1656 /* QP handle to be written in an async event */
1657 params->qp_handle_async_lo = lower_32_bits((uintptr_t) qp);
1658 params->qp_handle_async_hi = upper_32_bits((uintptr_t) qp);
1660 params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
1661 params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
1662 params->pd = pd->pd_id;
1663 params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
1664 params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
1665 params->stats_queue = 0;
1667 params->use_srq = false;
1670 params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1673 params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1674 params->srq_id = qp->srq->srq_id;
1675 params->use_srq = true;
1679 static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
1681 DP_DEBUG(dev, QEDR_MSG_QP, "create qp: successfully created user QP. "
1690 qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len);
1694 qedr_iwarp_populate_user_qp(struct qedr_dev *dev,
1696 struct qed_rdma_create_qp_out_params *out_params)
1698 qp->usq.pbl_tbl->va = out_params->sq_pbl_virt;
1699 qp->usq.pbl_tbl->pa = out_params->sq_pbl_phys;
1701 qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl,
1702 &qp->usq.pbl_info, FW_PAGE_SHIFT);
1704 qp->urq.pbl_tbl->va = out_params->rq_pbl_virt;
1705 qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys;
1708 qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl,
1709 &qp->urq.pbl_info, FW_PAGE_SHIFT);
1712 static void qedr_cleanup_user(struct qedr_dev *dev,
1713 struct qedr_ucontext *ctx,
1716 ib_umem_release(qp->usq.umem);
1717 qp->usq.umem = NULL;
1719 ib_umem_release(qp->urq.umem);
1720 qp->urq.umem = NULL;
1722 if (rdma_protocol_roce(&dev->ibdev, 1)) {
1723 qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl);
1724 qedr_free_pbl(dev, &qp->urq.pbl_info, qp->urq.pbl_tbl);
1726 kfree(qp->usq.pbl_tbl);
1727 kfree(qp->urq.pbl_tbl);
1730 if (qp->usq.db_rec_data) {
1731 qedr_db_recovery_del(dev, qp->usq.db_addr,
1732 &qp->usq.db_rec_data->db_data);
1733 rdma_user_mmap_entry_remove(qp->usq.db_mmap_entry);
1736 if (qp->urq.db_rec_data) {
1737 qedr_db_recovery_del(dev, qp->urq.db_addr,
1738 &qp->urq.db_rec_data->db_data);
1739 rdma_user_mmap_entry_remove(qp->urq.db_mmap_entry);
1742 if (rdma_protocol_iwarp(&dev->ibdev, 1))
1743 qedr_db_recovery_del(dev, qp->urq.db_rec_db2_addr,
1744 &qp->urq.db_rec_db2_data);
1747 static int qedr_create_user_qp(struct qedr_dev *dev,
1750 struct ib_udata *udata,
1751 struct ib_qp_init_attr *attrs)
1753 struct qed_rdma_create_qp_in_params in_params;
1754 struct qed_rdma_create_qp_out_params out_params;
1755 struct qedr_pd *pd = get_qedr_pd(ibpd);
1756 struct qedr_create_qp_uresp uresp;
1757 struct qedr_ucontext *ctx = pd ? pd->uctx : NULL;
1758 struct qedr_create_qp_ureq ureq;
1759 int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1);
1762 qp->create_type = QEDR_QP_CREATE_USER;
1763 memset(&ureq, 0, sizeof(ureq));
1764 rc = ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), udata->inlen));
1766 DP_ERR(dev, "Problem copying data from user space\n");
1770 /* SQ - read access only (0) */
1771 rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr,
1772 ureq.sq_len, true, 0, alloc_and_init);
1777 /* RQ - read access only (0) */
1778 rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr,
1779 ureq.rq_len, true, 0, alloc_and_init);
1784 memset(&in_params, 0, sizeof(in_params));
1785 qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
1786 in_params.qp_handle_lo = ureq.qp_handle_lo;
1787 in_params.qp_handle_hi = ureq.qp_handle_hi;
1788 in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
1789 in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
1791 in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
1792 in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
1796 SET_FIELD(in_params.flags, QED_ROCE_EDPM_MODE, ctx->edpm_mode);
1798 qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1799 &in_params, &out_params);
1806 if (rdma_protocol_iwarp(&dev->ibdev, 1))
1807 qedr_iwarp_populate_user_qp(dev, qp, &out_params);
1809 qp->qp_id = out_params.qp_id;
1810 qp->icid = out_params.icid;
1812 rc = qedr_copy_qp_uresp(dev, qp, udata, &uresp);
1816 /* db offset was calculated in copy_qp_uresp, now set in the user q */
1818 qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset;
1819 qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset;
1821 if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1822 qp->urq.db_rec_db2_addr = ctx->dpi_addr + uresp.rq_db2_offset;
1824 /* calculate the db_rec_db2 data since it is constant so no
1825 * need to reflect from user
1827 qp->urq.db_rec_db2_data.data.icid = cpu_to_le16(qp->icid);
1828 qp->urq.db_rec_db2_data.data.value =
1829 cpu_to_le16(DQ_TCM_IWARP_POST_RQ_CF_CMD);
1832 rc = qedr_db_recovery_add(dev, qp->usq.db_addr,
1833 &qp->usq.db_rec_data->db_data,
1839 rc = qedr_db_recovery_add(dev, qp->urq.db_addr,
1840 &qp->urq.db_rec_data->db_data,
1846 if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1847 rc = qedr_db_recovery_add(dev, qp->urq.db_rec_db2_addr,
1848 &qp->urq.db_rec_db2_data,
1854 qedr_qp_user_print(dev, qp);
1858 rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1860 DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
1863 qedr_cleanup_user(dev, ctx, qp);
1867 static int qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1871 qp->sq.db = dev->db_addr +
1872 DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1873 qp->sq.db_data.data.icid = qp->icid;
1875 rc = qedr_db_recovery_add(dev, qp->sq.db,
1882 qp->rq.db = dev->db_addr +
1883 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1884 qp->rq.db_data.data.icid = qp->icid;
1885 qp->rq.iwarp_db2 = dev->db_addr +
1886 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1887 qp->rq.iwarp_db2_data.data.icid = qp->icid;
1888 qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD;
1890 rc = qedr_db_recovery_add(dev, qp->rq.db,
1897 rc = qedr_db_recovery_add(dev, qp->rq.iwarp_db2,
1898 &qp->rq.iwarp_db2_data,
1905 qedr_roce_create_kernel_qp(struct qedr_dev *dev,
1907 struct qed_rdma_create_qp_in_params *in_params,
1908 u32 n_sq_elems, u32 n_rq_elems)
1910 struct qed_rdma_create_qp_out_params out_params;
1913 rc = dev->ops->common->chain_alloc(dev->cdev,
1914 QED_CHAIN_USE_TO_PRODUCE,
1916 QED_CHAIN_CNT_TYPE_U32,
1918 QEDR_SQE_ELEMENT_SIZE,
1924 in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
1925 in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
1927 rc = dev->ops->common->chain_alloc(dev->cdev,
1928 QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1930 QED_CHAIN_CNT_TYPE_U32,
1932 QEDR_RQE_ELEMENT_SIZE,
1937 in_params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
1938 in_params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
1940 qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1941 in_params, &out_params);
1946 qp->qp_id = out_params.qp_id;
1947 qp->icid = out_params.icid;
1949 return qedr_set_roce_db_info(dev, qp);
1953 qedr_iwarp_create_kernel_qp(struct qedr_dev *dev,
1955 struct qed_rdma_create_qp_in_params *in_params,
1956 u32 n_sq_elems, u32 n_rq_elems)
1958 struct qed_rdma_create_qp_out_params out_params;
1959 struct qed_chain_ext_pbl ext_pbl;
1962 in_params->sq_num_pages = QED_CHAIN_PAGE_CNT(n_sq_elems,
1963 QEDR_SQE_ELEMENT_SIZE,
1964 QED_CHAIN_MODE_PBL);
1965 in_params->rq_num_pages = QED_CHAIN_PAGE_CNT(n_rq_elems,
1966 QEDR_RQE_ELEMENT_SIZE,
1967 QED_CHAIN_MODE_PBL);
1969 qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1970 in_params, &out_params);
1975 /* Now we allocate the chain */
1976 ext_pbl.p_pbl_virt = out_params.sq_pbl_virt;
1977 ext_pbl.p_pbl_phys = out_params.sq_pbl_phys;
1979 rc = dev->ops->common->chain_alloc(dev->cdev,
1980 QED_CHAIN_USE_TO_PRODUCE,
1982 QED_CHAIN_CNT_TYPE_U32,
1984 QEDR_SQE_ELEMENT_SIZE,
1985 &qp->sq.pbl, &ext_pbl);
1990 ext_pbl.p_pbl_virt = out_params.rq_pbl_virt;
1991 ext_pbl.p_pbl_phys = out_params.rq_pbl_phys;
1993 rc = dev->ops->common->chain_alloc(dev->cdev,
1994 QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1996 QED_CHAIN_CNT_TYPE_U32,
1998 QEDR_RQE_ELEMENT_SIZE,
1999 &qp->rq.pbl, &ext_pbl);
2004 qp->qp_id = out_params.qp_id;
2005 qp->icid = out_params.icid;
2007 return qedr_set_iwarp_db_info(dev, qp);
2010 dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2015 static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
2017 dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
2018 kfree(qp->wqe_wr_id);
2020 dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
2021 kfree(qp->rqe_wr_id);
2023 /* GSI qp is not registered to db mechanism so no need to delete */
2024 if (qp->qp_type == IB_QPT_GSI)
2027 qedr_db_recovery_del(dev, qp->sq.db, &qp->sq.db_data);
2030 qedr_db_recovery_del(dev, qp->rq.db, &qp->rq.db_data);
2032 if (rdma_protocol_iwarp(&dev->ibdev, 1))
2033 qedr_db_recovery_del(dev, qp->rq.iwarp_db2,
2034 &qp->rq.iwarp_db2_data);
2038 static int qedr_create_kernel_qp(struct qedr_dev *dev,
2041 struct ib_qp_init_attr *attrs)
2043 struct qed_rdma_create_qp_in_params in_params;
2044 struct qedr_pd *pd = get_qedr_pd(ibpd);
2050 memset(&in_params, 0, sizeof(in_params));
2051 qp->create_type = QEDR_QP_CREATE_KERNEL;
2053 /* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
2054 * the ring. The ring should allow at least a single WR, even if the
2055 * user requested none, due to allocation issues.
2056 * We should add an extra WR since the prod and cons indices of
2057 * wqe_wr_id are managed in such a way that the WQ is considered full
2058 * when (prod+1)%max_wr==cons. We currently don't do that because we
2059 * double the number of entries due an iSER issue that pushes far more
2060 * WRs than indicated. If we decline its ib_post_send() then we get
2061 * error prints in the dmesg we'd like to avoid.
2063 qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier,
2066 qp->wqe_wr_id = kcalloc(qp->sq.max_wr, sizeof(*qp->wqe_wr_id),
2068 if (!qp->wqe_wr_id) {
2069 DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n");
2073 /* QP handle to be written in CQE */
2074 in_params.qp_handle_lo = lower_32_bits((uintptr_t) qp);
2075 in_params.qp_handle_hi = upper_32_bits((uintptr_t) qp);
2077 /* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
2078 * the ring. There ring should allow at least a single WR, even if the
2079 * user requested none, due to allocation issues.
2081 qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1);
2083 /* Allocate driver internal RQ array */
2084 qp->rqe_wr_id = kcalloc(qp->rq.max_wr, sizeof(*qp->rqe_wr_id),
2086 if (!qp->rqe_wr_id) {
2088 "create qp: failed RQ shadow memory allocation\n");
2089 kfree(qp->wqe_wr_id);
2093 qedr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params);
2095 n_sq_entries = attrs->cap.max_send_wr;
2096 n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
2097 n_sq_entries = max_t(u32, n_sq_entries, 1);
2098 n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
2100 n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
2102 if (rdma_protocol_iwarp(&dev->ibdev, 1))
2103 rc = qedr_iwarp_create_kernel_qp(dev, qp, &in_params,
2104 n_sq_elems, n_rq_elems);
2106 rc = qedr_roce_create_kernel_qp(dev, qp, &in_params,
2107 n_sq_elems, n_rq_elems);
2109 qedr_cleanup_kernel(dev, qp);
2114 struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
2115 struct ib_qp_init_attr *attrs,
2116 struct ib_udata *udata)
2118 struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2119 struct qedr_pd *pd = get_qedr_pd(ibpd);
2124 DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
2125 udata ? "user library" : "kernel", pd);
2127 rc = qedr_check_qp_attrs(ibpd, dev, attrs, udata);
2131 DP_DEBUG(dev, QEDR_MSG_QP,
2132 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
2133 udata ? "user library" : "kernel", attrs->event_handler, pd,
2134 get_qedr_cq(attrs->send_cq),
2135 get_qedr_cq(attrs->send_cq)->icid,
2136 get_qedr_cq(attrs->recv_cq),
2137 attrs->recv_cq ? get_qedr_cq(attrs->recv_cq)->icid : 0);
2139 qp = kzalloc(sizeof(*qp), GFP_KERNEL);
2141 DP_ERR(dev, "create qp: failed allocating memory\n");
2142 return ERR_PTR(-ENOMEM);
2145 qedr_set_common_qp_params(dev, qp, pd, attrs);
2147 if (attrs->qp_type == IB_QPT_GSI) {
2148 ibqp = qedr_create_gsi_qp(dev, attrs, qp);
2155 rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
2157 rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
2162 qp->ibqp.qp_num = qp->qp_id;
2164 if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
2165 rc = xa_insert(&dev->qps, qp->qp_id, qp, GFP_KERNEL);
2175 return ERR_PTR(-EFAULT);
2178 static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
2181 case QED_ROCE_QP_STATE_RESET:
2182 return IB_QPS_RESET;
2183 case QED_ROCE_QP_STATE_INIT:
2185 case QED_ROCE_QP_STATE_RTR:
2187 case QED_ROCE_QP_STATE_RTS:
2189 case QED_ROCE_QP_STATE_SQD:
2191 case QED_ROCE_QP_STATE_ERR:
2193 case QED_ROCE_QP_STATE_SQE:
2199 static enum qed_roce_qp_state qedr_get_state_from_ibqp(
2200 enum ib_qp_state qp_state)
2204 return QED_ROCE_QP_STATE_RESET;
2206 return QED_ROCE_QP_STATE_INIT;
2208 return QED_ROCE_QP_STATE_RTR;
2210 return QED_ROCE_QP_STATE_RTS;
2212 return QED_ROCE_QP_STATE_SQD;
2214 return QED_ROCE_QP_STATE_ERR;
2216 return QED_ROCE_QP_STATE_ERR;
2220 static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
2222 qed_chain_reset(&qph->pbl);
2226 qph->db_data.data.value = cpu_to_le16(0);
2229 static int qedr_update_qp_state(struct qedr_dev *dev,
2231 enum qed_roce_qp_state cur_state,
2232 enum qed_roce_qp_state new_state)
2236 if (new_state == cur_state)
2239 switch (cur_state) {
2240 case QED_ROCE_QP_STATE_RESET:
2241 switch (new_state) {
2242 case QED_ROCE_QP_STATE_INIT:
2243 qp->prev_wqe_size = 0;
2244 qedr_reset_qp_hwq_info(&qp->sq);
2245 qedr_reset_qp_hwq_info(&qp->rq);
2252 case QED_ROCE_QP_STATE_INIT:
2253 switch (new_state) {
2254 case QED_ROCE_QP_STATE_RTR:
2255 /* Update doorbell (in case post_recv was
2256 * done before move to RTR)
2259 if (rdma_protocol_roce(&dev->ibdev, 1)) {
2260 writel(qp->rq.db_data.raw, qp->rq.db);
2263 case QED_ROCE_QP_STATE_ERR:
2266 /* Invalid state change. */
2271 case QED_ROCE_QP_STATE_RTR:
2273 switch (new_state) {
2274 case QED_ROCE_QP_STATE_RTS:
2276 case QED_ROCE_QP_STATE_ERR:
2279 /* Invalid state change. */
2284 case QED_ROCE_QP_STATE_RTS:
2286 switch (new_state) {
2287 case QED_ROCE_QP_STATE_SQD:
2289 case QED_ROCE_QP_STATE_ERR:
2292 /* Invalid state change. */
2297 case QED_ROCE_QP_STATE_SQD:
2299 switch (new_state) {
2300 case QED_ROCE_QP_STATE_RTS:
2301 case QED_ROCE_QP_STATE_ERR:
2304 /* Invalid state change. */
2309 case QED_ROCE_QP_STATE_ERR:
2311 switch (new_state) {
2312 case QED_ROCE_QP_STATE_RESET:
2313 if ((qp->rq.prod != qp->rq.cons) ||
2314 (qp->sq.prod != qp->sq.cons)) {
2316 "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n",
2317 qp->rq.prod, qp->rq.cons, qp->sq.prod,
2335 int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2336 int attr_mask, struct ib_udata *udata)
2338 struct qedr_qp *qp = get_qedr_qp(ibqp);
2339 struct qed_rdma_modify_qp_in_params qp_params = { 0 };
2340 struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
2341 const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
2342 enum ib_qp_state old_qp_state, new_qp_state;
2343 enum qed_roce_qp_state cur_state;
2346 DP_DEBUG(dev, QEDR_MSG_QP,
2347 "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask,
2350 old_qp_state = qedr_get_ibqp_state(qp->state);
2351 if (attr_mask & IB_QP_STATE)
2352 new_qp_state = attr->qp_state;
2354 new_qp_state = old_qp_state;
2356 if (rdma_protocol_roce(&dev->ibdev, 1)) {
2357 if (!ib_modify_qp_is_ok(old_qp_state, new_qp_state,
2358 ibqp->qp_type, attr_mask)) {
2360 "modify qp: invalid attribute mask=0x%x specified for\n"
2361 "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
2362 attr_mask, qp->qp_id, ibqp->qp_type,
2363 old_qp_state, new_qp_state);
2369 /* Translate the masks... */
2370 if (attr_mask & IB_QP_STATE) {
2371 SET_FIELD(qp_params.modify_flags,
2372 QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1);
2373 qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state);
2376 if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
2377 qp_params.sqd_async = true;
2379 if (attr_mask & IB_QP_PKEY_INDEX) {
2380 SET_FIELD(qp_params.modify_flags,
2381 QED_ROCE_MODIFY_QP_VALID_PKEY, 1);
2382 if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) {
2387 qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT;
2390 if (attr_mask & IB_QP_QKEY)
2391 qp->qkey = attr->qkey;
2393 if (attr_mask & IB_QP_ACCESS_FLAGS) {
2394 SET_FIELD(qp_params.modify_flags,
2395 QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1);
2396 qp_params.incoming_rdma_read_en = attr->qp_access_flags &
2397 IB_ACCESS_REMOTE_READ;
2398 qp_params.incoming_rdma_write_en = attr->qp_access_flags &
2399 IB_ACCESS_REMOTE_WRITE;
2400 qp_params.incoming_atomic_en = attr->qp_access_flags &
2401 IB_ACCESS_REMOTE_ATOMIC;
2404 if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) {
2405 if (rdma_protocol_iwarp(&dev->ibdev, 1))
2408 if (attr_mask & IB_QP_PATH_MTU) {
2409 if (attr->path_mtu < IB_MTU_256 ||
2410 attr->path_mtu > IB_MTU_4096) {
2411 pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n");
2415 qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu),
2416 ib_mtu_enum_to_int(iboe_get_mtu
2422 ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2423 pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu);
2426 SET_FIELD(qp_params.modify_flags,
2427 QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
2429 qp_params.traffic_class_tos = grh->traffic_class;
2430 qp_params.flow_label = grh->flow_label;
2431 qp_params.hop_limit_ttl = grh->hop_limit;
2433 qp->sgid_idx = grh->sgid_index;
2435 rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
2438 "modify qp: problems with GID index %d (rc=%d)\n",
2439 grh->sgid_index, rc);
2443 rc = qedr_get_dmac(dev, &attr->ah_attr,
2444 qp_params.remote_mac_addr);
2448 qp_params.use_local_mac = true;
2449 ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr);
2451 DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n",
2452 qp_params.dgid.dwords[0], qp_params.dgid.dwords[1],
2453 qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]);
2454 DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n",
2455 qp_params.sgid.dwords[0], qp_params.sgid.dwords[1],
2456 qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]);
2457 DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n",
2458 qp_params.remote_mac_addr);
2460 qp_params.mtu = qp->mtu;
2461 qp_params.lb_indication = false;
2464 if (!qp_params.mtu) {
2465 /* Stay with current MTU */
2467 qp_params.mtu = qp->mtu;
2470 ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2473 if (attr_mask & IB_QP_TIMEOUT) {
2474 SET_FIELD(qp_params.modify_flags,
2475 QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1);
2477 /* The received timeout value is an exponent used like this:
2478 * "12.7.34 LOCAL ACK TIMEOUT
2479 * Value representing the transport (ACK) timeout for use by
2480 * the remote, expressed as: 4.096 * 2^timeout [usec]"
2481 * The FW expects timeout in msec so we need to divide the usec
2482 * result by 1000. We'll approximate 1000~2^10, and 4.096 ~ 2^2,
2483 * so we get: 2^2 * 2^timeout / 2^10 = 2^(timeout - 8).
2484 * The value of zero means infinite so we use a 'max_t' to make
2485 * sure that sub 1 msec values will be configured as 1 msec.
2488 qp_params.ack_timeout =
2489 1 << max_t(int, attr->timeout - 8, 0);
2491 qp_params.ack_timeout = 0;
2494 if (attr_mask & IB_QP_RETRY_CNT) {
2495 SET_FIELD(qp_params.modify_flags,
2496 QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1);
2497 qp_params.retry_cnt = attr->retry_cnt;
2500 if (attr_mask & IB_QP_RNR_RETRY) {
2501 SET_FIELD(qp_params.modify_flags,
2502 QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1);
2503 qp_params.rnr_retry_cnt = attr->rnr_retry;
2506 if (attr_mask & IB_QP_RQ_PSN) {
2507 SET_FIELD(qp_params.modify_flags,
2508 QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1);
2509 qp_params.rq_psn = attr->rq_psn;
2510 qp->rq_psn = attr->rq_psn;
2513 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
2514 if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) {
2517 "unsupported max_rd_atomic=%d, supported=%d\n",
2518 attr->max_rd_atomic,
2519 dev->attr.max_qp_req_rd_atomic_resc);
2523 SET_FIELD(qp_params.modify_flags,
2524 QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1);
2525 qp_params.max_rd_atomic_req = attr->max_rd_atomic;
2528 if (attr_mask & IB_QP_MIN_RNR_TIMER) {
2529 SET_FIELD(qp_params.modify_flags,
2530 QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1);
2531 qp_params.min_rnr_nak_timer = attr->min_rnr_timer;
2534 if (attr_mask & IB_QP_SQ_PSN) {
2535 SET_FIELD(qp_params.modify_flags,
2536 QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1);
2537 qp_params.sq_psn = attr->sq_psn;
2538 qp->sq_psn = attr->sq_psn;
2541 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
2542 if (attr->max_dest_rd_atomic >
2543 dev->attr.max_qp_resp_rd_atomic_resc) {
2545 "unsupported max_dest_rd_atomic=%d, supported=%d\n",
2546 attr->max_dest_rd_atomic,
2547 dev->attr.max_qp_resp_rd_atomic_resc);
2553 SET_FIELD(qp_params.modify_flags,
2554 QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1);
2555 qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic;
2558 if (attr_mask & IB_QP_DEST_QPN) {
2559 SET_FIELD(qp_params.modify_flags,
2560 QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1);
2562 qp_params.dest_qp = attr->dest_qp_num;
2563 qp->dest_qp_num = attr->dest_qp_num;
2566 cur_state = qp->state;
2568 /* Update the QP state before the actual ramrod to prevent a race with
2569 * fast path. Modifying the QP state to error will cause the device to
2570 * flush the CQEs and while polling the flushed CQEs will considered as
2571 * a potential issue if the QP isn't in error state.
2573 if ((attr_mask & IB_QP_STATE) && qp->qp_type != IB_QPT_GSI &&
2574 !udata && qp_params.new_state == QED_ROCE_QP_STATE_ERR)
2575 qp->state = QED_ROCE_QP_STATE_ERR;
2577 if (qp->qp_type != IB_QPT_GSI)
2578 rc = dev->ops->rdma_modify_qp(dev->rdma_ctx,
2579 qp->qed_qp, &qp_params);
2581 if (attr_mask & IB_QP_STATE) {
2582 if ((qp->qp_type != IB_QPT_GSI) && (!udata))
2583 rc = qedr_update_qp_state(dev, qp, cur_state,
2584 qp_params.new_state);
2585 qp->state = qp_params.new_state;
2592 static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params)
2594 int ib_qp_acc_flags = 0;
2596 if (params->incoming_rdma_write_en)
2597 ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
2598 if (params->incoming_rdma_read_en)
2599 ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ;
2600 if (params->incoming_atomic_en)
2601 ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
2602 ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
2603 return ib_qp_acc_flags;
2606 int qedr_query_qp(struct ib_qp *ibqp,
2607 struct ib_qp_attr *qp_attr,
2608 int attr_mask, struct ib_qp_init_attr *qp_init_attr)
2610 struct qed_rdma_query_qp_out_params params;
2611 struct qedr_qp *qp = get_qedr_qp(ibqp);
2612 struct qedr_dev *dev = qp->dev;
2615 memset(¶ms, 0, sizeof(params));
2617 rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, ¶ms);
2621 memset(qp_attr, 0, sizeof(*qp_attr));
2622 memset(qp_init_attr, 0, sizeof(*qp_init_attr));
2624 qp_attr->qp_state = qedr_get_ibqp_state(params.state);
2625 qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state);
2626 qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu);
2627 qp_attr->path_mig_state = IB_MIG_MIGRATED;
2628 qp_attr->rq_psn = params.rq_psn;
2629 qp_attr->sq_psn = params.sq_psn;
2630 qp_attr->dest_qp_num = params.dest_qp;
2632 qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(¶ms);
2634 qp_attr->cap.max_send_wr = qp->sq.max_wr;
2635 qp_attr->cap.max_recv_wr = qp->rq.max_wr;
2636 qp_attr->cap.max_send_sge = qp->sq.max_sges;
2637 qp_attr->cap.max_recv_sge = qp->rq.max_sges;
2638 qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE;
2639 qp_init_attr->cap = qp_attr->cap;
2641 qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
2642 rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
2643 params.flow_label, qp->sgid_idx,
2644 params.hop_limit_ttl, params.traffic_class_tos);
2645 rdma_ah_set_dgid_raw(&qp_attr->ah_attr, ¶ms.dgid.bytes[0]);
2646 rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
2647 rdma_ah_set_sl(&qp_attr->ah_attr, 0);
2648 qp_attr->timeout = params.timeout;
2649 qp_attr->rnr_retry = params.rnr_retry;
2650 qp_attr->retry_cnt = params.retry_cnt;
2651 qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
2652 qp_attr->pkey_index = params.pkey_index;
2653 qp_attr->port_num = 1;
2654 rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
2655 rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
2656 qp_attr->alt_pkey_index = 0;
2657 qp_attr->alt_port_num = 0;
2658 qp_attr->alt_timeout = 0;
2659 memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
2661 qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0;
2662 qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic;
2663 qp_attr->max_rd_atomic = params.max_rd_atomic;
2664 qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0;
2666 DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n",
2667 qp_attr->cap.max_inline_data);
2673 static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp,
2674 struct ib_udata *udata)
2676 struct qedr_ucontext *ctx =
2677 rdma_udata_to_drv_context(udata, struct qedr_ucontext,
2681 if (qp->qp_type != IB_QPT_GSI) {
2682 rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2687 if (qp->create_type == QEDR_QP_CREATE_USER)
2688 qedr_cleanup_user(dev, ctx, qp);
2690 qedr_cleanup_kernel(dev, qp);
2695 int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
2697 struct qedr_qp *qp = get_qedr_qp(ibqp);
2698 struct qedr_dev *dev = qp->dev;
2699 struct ib_qp_attr attr;
2702 DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
2705 if (rdma_protocol_roce(&dev->ibdev, 1)) {
2706 if ((qp->state != QED_ROCE_QP_STATE_RESET) &&
2707 (qp->state != QED_ROCE_QP_STATE_ERR) &&
2708 (qp->state != QED_ROCE_QP_STATE_INIT)) {
2710 attr.qp_state = IB_QPS_ERR;
2711 attr_mask |= IB_QP_STATE;
2713 /* Change the QP state to ERROR */
2714 qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
2717 /* If connection establishment started the WAIT_FOR_CONNECT
2718 * bit will be on and we need to Wait for the establishment
2719 * to complete before destroying the qp.
2721 if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT,
2722 &qp->iwarp_cm_flags))
2723 wait_for_completion(&qp->iwarp_cm_comp);
2725 /* If graceful disconnect started, the WAIT_FOR_DISCONNECT
2726 * bit will be on, and we need to wait for the disconnect to
2727 * complete before continuing. We can use the same completion,
2728 * iwarp_cm_comp, since this is the only place that waits for
2729 * this completion and it is sequential. In addition,
2730 * disconnect can't occur before the connection is fully
2731 * established, therefore if WAIT_FOR_DISCONNECT is on it
2732 * means WAIT_FOR_CONNECT is also on and the completion for
2733 * CONNECT already occurred.
2735 if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_DISCONNECT,
2736 &qp->iwarp_cm_flags))
2737 wait_for_completion(&qp->iwarp_cm_comp);
2740 if (qp->qp_type == IB_QPT_GSI)
2741 qedr_destroy_gsi_qp(dev);
2743 /* We need to remove the entry from the xarray before we release the
2744 * qp_id to avoid a race of the qp_id being reallocated and failing
2747 if (rdma_protocol_iwarp(&dev->ibdev, 1))
2748 xa_erase(&dev->qps, qp->qp_id);
2750 qedr_free_qp_resources(dev, qp, udata);
2752 if (rdma_protocol_iwarp(&dev->ibdev, 1))
2753 qedr_iw_qp_rem_ref(&qp->ibqp);
2758 int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
2759 struct ib_udata *udata)
2761 struct qedr_ah *ah = get_qedr_ah(ibah);
2763 rdma_copy_ah_attr(&ah->attr, init_attr->ah_attr);
2768 void qedr_destroy_ah(struct ib_ah *ibah, u32 flags)
2770 struct qedr_ah *ah = get_qedr_ah(ibah);
2772 rdma_destroy_ah_attr(&ah->attr);
2775 static void free_mr_info(struct qedr_dev *dev, struct mr_info *info)
2777 struct qedr_pbl *pbl, *tmp;
2779 if (info->pbl_table)
2780 list_add_tail(&info->pbl_table->list_entry,
2781 &info->free_pbl_list);
2783 if (!list_empty(&info->inuse_pbl_list))
2784 list_splice(&info->inuse_pbl_list, &info->free_pbl_list);
2786 list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) {
2787 list_del(&pbl->list_entry);
2788 qedr_free_pbl(dev, &info->pbl_info, pbl);
2792 static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
2793 size_t page_list_len, bool two_layered)
2795 struct qedr_pbl *tmp;
2798 INIT_LIST_HEAD(&info->free_pbl_list);
2799 INIT_LIST_HEAD(&info->inuse_pbl_list);
2801 rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info,
2802 page_list_len, two_layered);
2806 info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2807 if (IS_ERR(info->pbl_table)) {
2808 rc = PTR_ERR(info->pbl_table);
2812 DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n",
2813 &info->pbl_table->pa);
2815 /* in usual case we use 2 PBLs, so we add one to free
2816 * list and allocating another one
2818 tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2820 DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n");
2824 list_add_tail(&tmp->list_entry, &info->free_pbl_list);
2826 DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa);
2830 free_mr_info(dev, info);
2835 struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
2836 u64 usr_addr, int acc, struct ib_udata *udata)
2838 struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2843 pd = get_qedr_pd(ibpd);
2844 DP_DEBUG(dev, QEDR_MSG_MR,
2845 "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n",
2846 pd->pd_id, start, len, usr_addr, acc);
2848 if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
2849 return ERR_PTR(-EINVAL);
2851 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2855 mr->type = QEDR_MR_USER;
2857 mr->umem = ib_umem_get(ibpd->device, start, len, acc);
2858 if (IS_ERR(mr->umem)) {
2863 rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1);
2867 qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
2868 &mr->info.pbl_info, PAGE_SHIFT);
2870 rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2872 DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2876 /* Index only, 18 bit long, lkey = itid << 8 | key */
2877 mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2879 mr->hw_mr.pd = pd->pd_id;
2880 mr->hw_mr.local_read = 1;
2881 mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2882 mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2883 mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2884 mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2885 mr->hw_mr.mw_bind = false;
2886 mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
2887 mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2888 mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2889 mr->hw_mr.page_size_log = PAGE_SHIFT;
2890 mr->hw_mr.fbo = ib_umem_offset(mr->umem);
2891 mr->hw_mr.length = len;
2892 mr->hw_mr.vaddr = usr_addr;
2893 mr->hw_mr.zbva = false;
2894 mr->hw_mr.phy_mr = false;
2895 mr->hw_mr.dma_mr = false;
2897 rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2899 DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2903 mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2904 if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2905 mr->hw_mr.remote_atomic)
2906 mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2908 DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n",
2913 dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2915 qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2921 int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
2923 struct qedr_mr *mr = get_qedr_mr(ib_mr);
2924 struct qedr_dev *dev = get_qedr_dev(ib_mr->device);
2927 rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid);
2931 dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2933 if (mr->type != QEDR_MR_DMA)
2934 free_mr_info(dev, &mr->info);
2936 /* it could be user registered memory. */
2937 ib_umem_release(mr->umem);
2944 static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
2945 int max_page_list_len)
2947 struct qedr_pd *pd = get_qedr_pd(ibpd);
2948 struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2952 DP_DEBUG(dev, QEDR_MSG_MR,
2953 "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id,
2956 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2961 mr->type = QEDR_MR_FRMR;
2963 rc = init_mr_info(dev, &mr->info, max_page_list_len, 1);
2967 rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2969 DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2973 /* Index only, 18 bit long, lkey = itid << 8 | key */
2974 mr->hw_mr.tid_type = QED_RDMA_TID_FMR;
2976 mr->hw_mr.pd = pd->pd_id;
2977 mr->hw_mr.local_read = 1;
2978 mr->hw_mr.local_write = 0;
2979 mr->hw_mr.remote_read = 0;
2980 mr->hw_mr.remote_write = 0;
2981 mr->hw_mr.remote_atomic = 0;
2982 mr->hw_mr.mw_bind = false;
2983 mr->hw_mr.pbl_ptr = 0;
2984 mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2985 mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2987 mr->hw_mr.length = 0;
2988 mr->hw_mr.vaddr = 0;
2989 mr->hw_mr.zbva = false;
2990 mr->hw_mr.phy_mr = true;
2991 mr->hw_mr.dma_mr = false;
2993 rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2995 DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2999 mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3000 mr->ibmr.rkey = mr->ibmr.lkey;
3002 DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
3006 dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
3012 struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
3017 if (mr_type != IB_MR_TYPE_MEM_REG)
3018 return ERR_PTR(-EINVAL);
3020 mr = __qedr_alloc_mr(ibpd, max_num_sg);
3023 return ERR_PTR(-EINVAL);
3028 static int qedr_set_page(struct ib_mr *ibmr, u64 addr)
3030 struct qedr_mr *mr = get_qedr_mr(ibmr);
3031 struct qedr_pbl *pbl_table;
3032 struct regpair *pbe;
3035 if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) {
3036 DP_ERR(mr->dev, "qedr_set_page fails when %d\n", mr->npages);
3040 DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n",
3043 pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64);
3044 pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page);
3045 pbe = (struct regpair *)pbl_table->va;
3046 pbe += mr->npages % pbes_in_page;
3047 pbe->lo = cpu_to_le32((u32)addr);
3048 pbe->hi = cpu_to_le32((u32)upper_32_bits(addr));
3055 static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info)
3057 int work = info->completed - info->completed_handled - 1;
3059 DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work);
3060 while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) {
3061 struct qedr_pbl *pbl;
3063 /* Free all the page list that are possible to be freed
3064 * (all the ones that were invalidated), under the assumption
3065 * that if an FMR was completed successfully that means that
3066 * if there was an invalidate operation before it also ended
3068 pbl = list_first_entry(&info->inuse_pbl_list,
3069 struct qedr_pbl, list_entry);
3070 list_move_tail(&pbl->list_entry, &info->free_pbl_list);
3071 info->completed_handled++;
3075 int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
3076 int sg_nents, unsigned int *sg_offset)
3078 struct qedr_mr *mr = get_qedr_mr(ibmr);
3082 handle_completed_mrs(mr->dev, &mr->info);
3083 return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page);
3086 struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc)
3088 struct qedr_dev *dev = get_qedr_dev(ibpd->device);
3089 struct qedr_pd *pd = get_qedr_pd(ibpd);
3093 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
3095 return ERR_PTR(-ENOMEM);
3097 mr->type = QEDR_MR_DMA;
3099 rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
3101 DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
3105 /* index only, 18 bit long, lkey = itid << 8 | key */
3106 mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
3107 mr->hw_mr.pd = pd->pd_id;
3108 mr->hw_mr.local_read = 1;
3109 mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
3110 mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
3111 mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
3112 mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
3113 mr->hw_mr.dma_mr = true;
3115 rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
3117 DP_ERR(dev, "roce register tid returned an error %d\n", rc);
3121 mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3122 if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
3123 mr->hw_mr.remote_atomic)
3124 mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3126 DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey);
3130 dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
3136 static inline int qedr_wq_is_full(struct qedr_qp_hwq_info *wq)
3138 return (((wq->prod + 1) % wq->max_wr) == wq->cons);
3141 static int sge_data_len(struct ib_sge *sg_list, int num_sge)
3145 for (i = 0; i < num_sge; i++)
3146 len += sg_list[i].length;
3151 static void swap_wqe_data64(u64 *p)
3155 for (i = 0; i < QEDR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++)
3156 *p = cpu_to_be64(cpu_to_le64(*p));
3159 static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
3160 struct qedr_qp *qp, u8 *wqe_size,
3161 const struct ib_send_wr *wr,
3162 const struct ib_send_wr **bad_wr,
3165 u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
3166 char *seg_prt, *wqe;
3169 if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) {
3170 DP_ERR(dev, "Too much inline data in WR: %d\n", data_size);
3184 /* Copy data inline */
3185 for (i = 0; i < wr->num_sge; i++) {
3186 u32 len = wr->sg_list[i].length;
3187 void *src = (void *)(uintptr_t)wr->sg_list[i].addr;
3192 /* New segment required */
3194 wqe = (char *)qed_chain_produce(&qp->sq.pbl);
3196 seg_siz = sizeof(struct rdma_sq_common_wqe);
3200 /* Calculate currently allowed length */
3201 cur = min_t(u32, len, seg_siz);
3202 memcpy(seg_prt, src, cur);
3204 /* Update segment variables */
3208 /* Update sge variables */
3212 /* Swap fully-completed segments */
3214 swap_wqe_data64((u64 *)wqe);
3218 /* swap last not completed segment */
3220 swap_wqe_data64((u64 *)wqe);
3225 #define RQ_SGE_SET(sge, vaddr, vlength, vflags) \
3227 DMA_REGPAIR_LE(sge->addr, vaddr); \
3228 (sge)->length = cpu_to_le32(vlength); \
3229 (sge)->flags = cpu_to_le32(vflags); \
3232 #define SRQ_HDR_SET(hdr, vwr_id, num_sge) \
3234 DMA_REGPAIR_LE(hdr->wr_id, vwr_id); \
3235 (hdr)->num_sges = num_sge; \
3238 #define SRQ_SGE_SET(sge, vaddr, vlength, vlkey) \
3240 DMA_REGPAIR_LE(sge->addr, vaddr); \
3241 (sge)->length = cpu_to_le32(vlength); \
3242 (sge)->l_key = cpu_to_le32(vlkey); \
3245 static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
3246 const struct ib_send_wr *wr)
3251 for (i = 0; i < wr->num_sge; i++) {
3252 struct rdma_sq_sge *sge = qed_chain_produce(&qp->sq.pbl);
3254 DMA_REGPAIR_LE(sge->addr, wr->sg_list[i].addr);
3255 sge->l_key = cpu_to_le32(wr->sg_list[i].lkey);
3256 sge->length = cpu_to_le32(wr->sg_list[i].length);
3257 data_size += wr->sg_list[i].length;
3261 *wqe_size += wr->num_sge;
3266 static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
3268 struct rdma_sq_rdma_wqe_1st *rwqe,
3269 struct rdma_sq_rdma_wqe_2nd *rwqe2,
3270 const struct ib_send_wr *wr,
3271 const struct ib_send_wr **bad_wr)
3273 rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
3274 DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
3276 if (wr->send_flags & IB_SEND_INLINE &&
3277 (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
3278 wr->opcode == IB_WR_RDMA_WRITE)) {
3281 SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1);
3282 return qedr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, wr,
3283 bad_wr, &rwqe->flags, flags);
3286 return qedr_prepare_sq_sges(qp, &rwqe->wqe_size, wr);
3289 static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
3291 struct rdma_sq_send_wqe_1st *swqe,
3292 struct rdma_sq_send_wqe_2st *swqe2,
3293 const struct ib_send_wr *wr,
3294 const struct ib_send_wr **bad_wr)
3296 memset(swqe2, 0, sizeof(*swqe2));
3297 if (wr->send_flags & IB_SEND_INLINE) {
3300 SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1);
3301 return qedr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, wr,
3302 bad_wr, &swqe->flags, flags);
3305 return qedr_prepare_sq_sges(qp, &swqe->wqe_size, wr);
3308 static int qedr_prepare_reg(struct qedr_qp *qp,
3309 struct rdma_sq_fmr_wqe_1st *fwqe1,
3310 const struct ib_reg_wr *wr)
3312 struct qedr_mr *mr = get_qedr_mr(wr->mr);
3313 struct rdma_sq_fmr_wqe_2nd *fwqe2;
3315 fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)qed_chain_produce(&qp->sq.pbl);
3316 fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova);
3317 fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
3318 fwqe1->l_key = wr->key;
3320 fwqe2->access_ctrl = 0;
3322 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
3323 !!(wr->access & IB_ACCESS_REMOTE_READ));
3324 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
3325 !!(wr->access & IB_ACCESS_REMOTE_WRITE));
3326 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC,
3327 !!(wr->access & IB_ACCESS_REMOTE_ATOMIC));
3328 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1);
3329 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE,
3330 !!(wr->access & IB_ACCESS_LOCAL_WRITE));
3331 fwqe2->fmr_ctrl = 0;
3333 SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG,
3334 ilog2(mr->ibmr.page_size) - 12);
3336 fwqe2->length_hi = 0;
3337 fwqe2->length_lo = mr->ibmr.length;
3338 fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa);
3339 fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa);
3341 qp->wqe_wr_id[qp->sq.prod].mr = mr;
3346 static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
3349 case IB_WR_RDMA_WRITE:
3350 case IB_WR_RDMA_WRITE_WITH_IMM:
3351 return IB_WC_RDMA_WRITE;
3352 case IB_WR_SEND_WITH_IMM:
3354 case IB_WR_SEND_WITH_INV:
3356 case IB_WR_RDMA_READ:
3357 case IB_WR_RDMA_READ_WITH_INV:
3358 return IB_WC_RDMA_READ;
3359 case IB_WR_ATOMIC_CMP_AND_SWP:
3360 return IB_WC_COMP_SWAP;
3361 case IB_WR_ATOMIC_FETCH_AND_ADD:
3362 return IB_WC_FETCH_ADD;
3364 return IB_WC_REG_MR;
3365 case IB_WR_LOCAL_INV:
3366 return IB_WC_LOCAL_INV;
3372 static inline bool qedr_can_post_send(struct qedr_qp *qp,
3373 const struct ib_send_wr *wr)
3375 int wq_is_full, err_wr, pbl_is_full;
3376 struct qedr_dev *dev = qp->dev;
3378 /* prevent SQ overflow and/or processing of a bad WR */
3379 err_wr = wr->num_sge > qp->sq.max_sges;
3380 wq_is_full = qedr_wq_is_full(&qp->sq);
3381 pbl_is_full = qed_chain_get_elem_left_u32(&qp->sq.pbl) <
3382 QEDR_MAX_SQE_ELEMENTS_PER_SQE;
3383 if (wq_is_full || err_wr || pbl_is_full) {
3384 if (wq_is_full && !(qp->err_bitmap & QEDR_QP_ERR_SQ_FULL)) {
3386 "error: WQ is full. Post send on QP %p failed (this error appears only once)\n",
3388 qp->err_bitmap |= QEDR_QP_ERR_SQ_FULL;
3391 if (err_wr && !(qp->err_bitmap & QEDR_QP_ERR_BAD_SR)) {
3393 "error: WR is bad. Post send on QP %p failed (this error appears only once)\n",
3395 qp->err_bitmap |= QEDR_QP_ERR_BAD_SR;
3399 !(qp->err_bitmap & QEDR_QP_ERR_SQ_PBL_FULL)) {
3401 "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n",
3403 qp->err_bitmap |= QEDR_QP_ERR_SQ_PBL_FULL;
3410 static int __qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
3411 const struct ib_send_wr **bad_wr)
3413 struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3414 struct qedr_qp *qp = get_qedr_qp(ibqp);
3415 struct rdma_sq_atomic_wqe_1st *awqe1;
3416 struct rdma_sq_atomic_wqe_2nd *awqe2;
3417 struct rdma_sq_atomic_wqe_3rd *awqe3;
3418 struct rdma_sq_send_wqe_2st *swqe2;
3419 struct rdma_sq_local_inv_wqe *iwqe;
3420 struct rdma_sq_rdma_wqe_2nd *rwqe2;
3421 struct rdma_sq_send_wqe_1st *swqe;
3422 struct rdma_sq_rdma_wqe_1st *rwqe;
3423 struct rdma_sq_fmr_wqe_1st *fwqe1;
3424 struct rdma_sq_common_wqe *wqe;
3429 if (!qedr_can_post_send(qp, wr)) {
3434 wqe = qed_chain_produce(&qp->sq.pbl);
3435 qp->wqe_wr_id[qp->sq.prod].signaled =
3436 !!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled;
3439 SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG,
3440 !!(wr->send_flags & IB_SEND_SOLICITED));
3441 comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || qp->signaled;
3442 SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp);
3443 SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG,
3444 !!(wr->send_flags & IB_SEND_FENCE));
3445 wqe->prev_wqe_size = qp->prev_wqe_size;
3447 qp->wqe_wr_id[qp->sq.prod].opcode = qedr_ib_to_wc_opcode(wr->opcode);
3449 switch (wr->opcode) {
3450 case IB_WR_SEND_WITH_IMM:
3451 if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3456 wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
3457 swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3459 swqe2 = qed_chain_produce(&qp->sq.pbl);
3461 swqe->inv_key_or_imm_data = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
3462 length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3464 swqe->length = cpu_to_le32(length);
3465 qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3466 qp->prev_wqe_size = swqe->wqe_size;
3467 qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3470 wqe->req_type = RDMA_SQ_REQ_TYPE_SEND;
3471 swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3474 swqe2 = qed_chain_produce(&qp->sq.pbl);
3475 length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3477 swqe->length = cpu_to_le32(length);
3478 qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3479 qp->prev_wqe_size = swqe->wqe_size;
3480 qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3482 case IB_WR_SEND_WITH_INV:
3483 wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE;
3484 swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3485 swqe2 = qed_chain_produce(&qp->sq.pbl);
3487 swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.invalidate_rkey);
3488 length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3490 swqe->length = cpu_to_le32(length);
3491 qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3492 qp->prev_wqe_size = swqe->wqe_size;
3493 qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3496 case IB_WR_RDMA_WRITE_WITH_IMM:
3497 if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3502 wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
3503 rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3506 rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data));
3507 rwqe2 = qed_chain_produce(&qp->sq.pbl);
3508 length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3510 rwqe->length = cpu_to_le32(length);
3511 qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3512 qp->prev_wqe_size = rwqe->wqe_size;
3513 qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3515 case IB_WR_RDMA_WRITE:
3516 wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR;
3517 rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3520 rwqe2 = qed_chain_produce(&qp->sq.pbl);
3521 length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3523 rwqe->length = cpu_to_le32(length);
3524 qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3525 qp->prev_wqe_size = rwqe->wqe_size;
3526 qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3528 case IB_WR_RDMA_READ_WITH_INV:
3529 SET_FIELD2(wqe->flags, RDMA_SQ_RDMA_WQE_1ST_READ_INV_FLG, 1);
3530 /* fallthrough -- same is identical to RDMA READ */
3532 case IB_WR_RDMA_READ:
3533 wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD;
3534 rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3537 rwqe2 = qed_chain_produce(&qp->sq.pbl);
3538 length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3540 rwqe->length = cpu_to_le32(length);
3541 qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3542 qp->prev_wqe_size = rwqe->wqe_size;
3543 qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3546 case IB_WR_ATOMIC_CMP_AND_SWP:
3547 case IB_WR_ATOMIC_FETCH_AND_ADD:
3548 awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe;
3549 awqe1->wqe_size = 4;
3551 awqe2 = qed_chain_produce(&qp->sq.pbl);
3552 DMA_REGPAIR_LE(awqe2->remote_va, atomic_wr(wr)->remote_addr);
3553 awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey);
3555 awqe3 = qed_chain_produce(&qp->sq.pbl);
3557 if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
3558 wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD;
3559 DMA_REGPAIR_LE(awqe3->swap_data,
3560 atomic_wr(wr)->compare_add);
3562 wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP;
3563 DMA_REGPAIR_LE(awqe3->swap_data,
3564 atomic_wr(wr)->swap);
3565 DMA_REGPAIR_LE(awqe3->cmp_data,
3566 atomic_wr(wr)->compare_add);
3569 qedr_prepare_sq_sges(qp, NULL, wr);
3571 qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size;
3572 qp->prev_wqe_size = awqe1->wqe_size;
3575 case IB_WR_LOCAL_INV:
3576 iwqe = (struct rdma_sq_local_inv_wqe *)wqe;
3579 iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE;
3580 iwqe->inv_l_key = wr->ex.invalidate_rkey;
3581 qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->wqe_size;
3582 qp->prev_wqe_size = iwqe->wqe_size;
3585 DP_DEBUG(dev, QEDR_MSG_CQ, "REG_MR\n");
3586 wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR;
3587 fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe;
3588 fwqe1->wqe_size = 2;
3590 rc = qedr_prepare_reg(qp, fwqe1, reg_wr(wr));
3592 DP_ERR(dev, "IB_REG_MR failed rc=%d\n", rc);
3597 qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size;
3598 qp->prev_wqe_size = fwqe1->wqe_size;
3601 DP_ERR(dev, "invalid opcode 0x%x!\n", wr->opcode);
3610 /* Restore prod to its position before
3611 * this WR was processed
3613 value = le16_to_cpu(qp->sq.db_data.data.value);
3614 qed_chain_set_prod(&qp->sq.pbl, value, wqe);
3616 /* Restore prev_wqe_size */
3617 qp->prev_wqe_size = wqe->prev_wqe_size;
3619 DP_ERR(dev, "POST SEND FAILED\n");
3625 int qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
3626 const struct ib_send_wr **bad_wr)
3628 struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3629 struct qedr_qp *qp = get_qedr_qp(ibqp);
3630 unsigned long flags;
3635 if (qp->qp_type == IB_QPT_GSI)
3636 return qedr_gsi_post_send(ibqp, wr, bad_wr);
3638 spin_lock_irqsave(&qp->q_lock, flags);
3640 if (rdma_protocol_roce(&dev->ibdev, 1)) {
3641 if ((qp->state != QED_ROCE_QP_STATE_RTS) &&
3642 (qp->state != QED_ROCE_QP_STATE_ERR) &&
3643 (qp->state != QED_ROCE_QP_STATE_SQD)) {
3644 spin_unlock_irqrestore(&qp->q_lock, flags);
3646 DP_DEBUG(dev, QEDR_MSG_CQ,
3647 "QP in wrong state! QP icid=0x%x state %d\n",
3648 qp->icid, qp->state);
3654 rc = __qedr_post_send(ibqp, wr, bad_wr);
3658 qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
3660 qedr_inc_sw_prod(&qp->sq);
3662 qp->sq.db_data.data.value++;
3668 * If there was a failure in the first WR then it will be triggered in
3669 * vane. However this is not harmful (as long as the producer value is
3670 * unchanged). For performance reasons we avoid checking for this
3671 * redundant doorbell.
3673 * qp->wqe_wr_id is accessed during qedr_poll_cq, as
3674 * soon as we give the doorbell, we could get a completion
3675 * for this wr, therefore we need to make sure that the
3676 * memory is updated before giving the doorbell.
3677 * During qedr_poll_cq, rmb is called before accessing the
3678 * cqe. This covers for the smp_rmb as well.
3681 writel(qp->sq.db_data.raw, qp->sq.db);
3683 spin_unlock_irqrestore(&qp->q_lock, flags);
3688 static u32 qedr_srq_elem_left(struct qedr_srq_hwq_info *hw_srq)
3692 /* Calculate number of elements used based on producer
3693 * count and consumer count and subtract it from max
3694 * work request supported so that we get elements left.
3696 used = hw_srq->wr_prod_cnt - (u32)atomic_read(&hw_srq->wr_cons_cnt);
3698 return hw_srq->max_wr - used;
3701 int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
3702 const struct ib_recv_wr **bad_wr)
3704 struct qedr_srq *srq = get_qedr_srq(ibsrq);
3705 struct qedr_srq_hwq_info *hw_srq;
3706 struct qedr_dev *dev = srq->dev;
3707 struct qed_chain *pbl;
3708 unsigned long flags;
3712 spin_lock_irqsave(&srq->lock, flags);
3714 hw_srq = &srq->hw_srq;
3715 pbl = &srq->hw_srq.pbl;
3717 struct rdma_srq_wqe_header *hdr;
3720 if (!qedr_srq_elem_left(hw_srq) ||
3721 wr->num_sge > srq->hw_srq.max_sges) {
3722 DP_ERR(dev, "Can't post WR (%d,%d) || (%d > %d)\n",
3723 hw_srq->wr_prod_cnt,
3724 atomic_read(&hw_srq->wr_cons_cnt),
3725 wr->num_sge, srq->hw_srq.max_sges);
3731 hdr = qed_chain_produce(pbl);
3732 num_sge = wr->num_sge;
3733 /* Set number of sge and work request id in header */
3734 SRQ_HDR_SET(hdr, wr->wr_id, num_sge);
3736 srq->hw_srq.wr_prod_cnt++;
3740 DP_DEBUG(dev, QEDR_MSG_SRQ,
3741 "SRQ WR: SGEs: %d with wr_id[%d] = %llx\n",
3742 wr->num_sge, hw_srq->wqe_prod, wr->wr_id);
3744 for (i = 0; i < wr->num_sge; i++) {
3745 struct rdma_srq_sge *srq_sge = qed_chain_produce(pbl);
3747 /* Set SGE length, lkey and address */
3748 SRQ_SGE_SET(srq_sge, wr->sg_list[i].addr,
3749 wr->sg_list[i].length, wr->sg_list[i].lkey);
3751 DP_DEBUG(dev, QEDR_MSG_SRQ,
3752 "[%d]: len %d key %x addr %x:%x\n",
3753 i, srq_sge->length, srq_sge->l_key,
3754 srq_sge->addr.hi, srq_sge->addr.lo);
3758 /* Update WQE and SGE information before
3759 * updating producer.
3763 /* SRQ producer is 8 bytes. Need to update SGE producer index
3764 * in first 4 bytes and need to update WQE producer in
3767 srq->hw_srq.virt_prod_pair_addr->sge_prod = hw_srq->sge_prod;
3768 /* Make sure sge producer is updated first */
3770 srq->hw_srq.virt_prod_pair_addr->wqe_prod = hw_srq->wqe_prod;
3775 DP_DEBUG(dev, QEDR_MSG_SRQ, "POST: Elements in S-RQ: %d\n",
3776 qed_chain_get_elem_left(pbl));
3777 spin_unlock_irqrestore(&srq->lock, flags);
3782 int qedr_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
3783 const struct ib_recv_wr **bad_wr)
3785 struct qedr_qp *qp = get_qedr_qp(ibqp);
3786 struct qedr_dev *dev = qp->dev;
3787 unsigned long flags;
3790 if (qp->qp_type == IB_QPT_GSI)
3791 return qedr_gsi_post_recv(ibqp, wr, bad_wr);
3793 spin_lock_irqsave(&qp->q_lock, flags);
3795 if (qp->state == QED_ROCE_QP_STATE_RESET) {
3796 spin_unlock_irqrestore(&qp->q_lock, flags);
3804 if (qed_chain_get_elem_left_u32(&qp->rq.pbl) <
3805 QEDR_MAX_RQE_ELEMENTS_PER_RQE ||
3806 wr->num_sge > qp->rq.max_sges) {
3807 DP_ERR(dev, "Can't post WR (%d < %d) || (%d > %d)\n",
3808 qed_chain_get_elem_left_u32(&qp->rq.pbl),
3809 QEDR_MAX_RQE_ELEMENTS_PER_RQE, wr->num_sge,
3815 for (i = 0; i < wr->num_sge; i++) {
3817 struct rdma_rq_sge *rqe =
3818 qed_chain_produce(&qp->rq.pbl);
3820 /* First one must include the number
3821 * of SGE in the list
3824 SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES,
3827 SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO,
3828 wr->sg_list[i].lkey);
3830 RQ_SGE_SET(rqe, wr->sg_list[i].addr,
3831 wr->sg_list[i].length, flags);
3834 /* Special case of no sges. FW requires between 1-4 sges...
3835 * in this case we need to post 1 sge with length zero. this is
3836 * because rdma write with immediate consumes an RQ.
3840 struct rdma_rq_sge *rqe =
3841 qed_chain_produce(&qp->rq.pbl);
3843 /* First one must include the number
3844 * of SGE in the list
3846 SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO, 0);
3847 SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1);
3849 RQ_SGE_SET(rqe, 0, 0, flags);
3853 qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id;
3854 qp->rqe_wr_id[qp->rq.prod].wqe_size = i;
3856 qedr_inc_sw_prod(&qp->rq);
3858 /* qp->rqe_wr_id is accessed during qedr_poll_cq, as
3859 * soon as we give the doorbell, we could get a completion
3860 * for this wr, therefore we need to make sure that the
3861 * memory is update before giving the doorbell.
3862 * During qedr_poll_cq, rmb is called before accessing the
3863 * cqe. This covers for the smp_rmb as well.
3867 qp->rq.db_data.data.value++;
3869 writel(qp->rq.db_data.raw, qp->rq.db);
3871 if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
3872 writel(qp->rq.iwarp_db2_data.raw, qp->rq.iwarp_db2);
3878 spin_unlock_irqrestore(&qp->q_lock, flags);
3883 static int is_valid_cqe(struct qedr_cq *cq, union rdma_cqe *cqe)
3885 struct rdma_cqe_requester *resp_cqe = &cqe->req;
3887 return (resp_cqe->flags & RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK) ==
3891 static struct qedr_qp *cqe_get_qp(union rdma_cqe *cqe)
3893 struct rdma_cqe_requester *resp_cqe = &cqe->req;
3896 qp = (struct qedr_qp *)(uintptr_t)HILO_GEN(resp_cqe->qp_handle.hi,
3897 resp_cqe->qp_handle.lo,
3902 static enum rdma_cqe_type cqe_get_type(union rdma_cqe *cqe)
3904 struct rdma_cqe_requester *resp_cqe = &cqe->req;
3906 return GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE);
3909 /* Return latest CQE (needs processing) */
3910 static union rdma_cqe *get_cqe(struct qedr_cq *cq)
3912 return cq->latest_cqe;
3915 /* In fmr we need to increase the number of fmr completed counter for the fmr
3916 * algorithm determining whether we can free a pbl or not.
3917 * we need to perform this whether the work request was signaled or not. for
3918 * this purpose we call this function from the condition that checks if a wr
3919 * should be skipped, to make sure we don't miss it ( possibly this fmr
3920 * operation was not signalted)
3922 static inline void qedr_chk_if_fmr(struct qedr_qp *qp)
3924 if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_REG_MR)
3925 qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3928 static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
3929 struct qedr_cq *cq, int num_entries,
3930 struct ib_wc *wc, u16 hw_cons, enum ib_wc_status status,
3935 while (num_entries && qp->sq.wqe_cons != hw_cons) {
3936 if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) {
3937 qedr_chk_if_fmr(qp);
3943 wc->status = status;
3946 wc->src_qp = qp->id;
3949 wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id;
3950 wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode;
3952 switch (wc->opcode) {
3953 case IB_WC_RDMA_WRITE:
3954 wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3956 case IB_WC_COMP_SWAP:
3957 case IB_WC_FETCH_ADD:
3961 qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3963 case IB_WC_RDMA_READ:
3965 wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3975 while (qp->wqe_wr_id[qp->sq.cons].wqe_size--)
3976 qed_chain_consume(&qp->sq.pbl);
3977 qedr_inc_sw_cons(&qp->sq);
3983 static int qedr_poll_cq_req(struct qedr_dev *dev,
3984 struct qedr_qp *qp, struct qedr_cq *cq,
3985 int num_entries, struct ib_wc *wc,
3986 struct rdma_cqe_requester *req)
3990 switch (req->status) {
3991 case RDMA_CQE_REQ_STS_OK:
3992 cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3995 case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR:
3996 if (qp->state != QED_ROCE_QP_STATE_ERR)
3997 DP_DEBUG(dev, QEDR_MSG_CQ,
3998 "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3999 cq->icid, qp->icid);
4000 cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
4001 IB_WC_WR_FLUSH_ERR, 1);
4004 /* process all WQE before the cosumer */
4005 qp->state = QED_ROCE_QP_STATE_ERR;
4006 cnt = process_req(dev, qp, cq, num_entries, wc,
4007 req->sq_cons - 1, IB_WC_SUCCESS, 0);
4009 /* if we have extra WC fill it with actual error info */
4010 if (cnt < num_entries) {
4011 enum ib_wc_status wc_status;
4013 switch (req->status) {
4014 case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR:
4016 "Error: POLL CQ with RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4017 cq->icid, qp->icid);
4018 wc_status = IB_WC_BAD_RESP_ERR;
4020 case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR:
4022 "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4023 cq->icid, qp->icid);
4024 wc_status = IB_WC_LOC_LEN_ERR;
4026 case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR:
4028 "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4029 cq->icid, qp->icid);
4030 wc_status = IB_WC_LOC_QP_OP_ERR;
4032 case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR:
4034 "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4035 cq->icid, qp->icid);
4036 wc_status = IB_WC_LOC_PROT_ERR;
4038 case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR:
4040 "Error: POLL CQ with RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4041 cq->icid, qp->icid);
4042 wc_status = IB_WC_MW_BIND_ERR;
4044 case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR:
4046 "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4047 cq->icid, qp->icid);
4048 wc_status = IB_WC_REM_INV_REQ_ERR;
4050 case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR:
4052 "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4053 cq->icid, qp->icid);
4054 wc_status = IB_WC_REM_ACCESS_ERR;
4056 case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR:
4058 "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4059 cq->icid, qp->icid);
4060 wc_status = IB_WC_REM_OP_ERR;
4062 case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR:
4064 "Error: POLL CQ with RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4065 cq->icid, qp->icid);
4066 wc_status = IB_WC_RNR_RETRY_EXC_ERR;
4068 case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR:
4070 "Error: POLL CQ with ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4071 cq->icid, qp->icid);
4072 wc_status = IB_WC_RETRY_EXC_ERR;
4076 "Error: POLL CQ with IB_WC_GENERAL_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4077 cq->icid, qp->icid);
4078 wc_status = IB_WC_GENERAL_ERR;
4080 cnt += process_req(dev, qp, cq, 1, wc, req->sq_cons,
4088 static inline int qedr_cqe_resp_status_to_ib(u8 status)
4091 case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR:
4092 return IB_WC_LOC_ACCESS_ERR;
4093 case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR:
4094 return IB_WC_LOC_LEN_ERR;
4095 case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR:
4096 return IB_WC_LOC_QP_OP_ERR;
4097 case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR:
4098 return IB_WC_LOC_PROT_ERR;
4099 case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR:
4100 return IB_WC_MW_BIND_ERR;
4101 case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR:
4102 return IB_WC_REM_INV_RD_REQ_ERR;
4103 case RDMA_CQE_RESP_STS_OK:
4104 return IB_WC_SUCCESS;
4106 return IB_WC_GENERAL_ERR;
4110 static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp,
4113 wc->status = IB_WC_SUCCESS;
4114 wc->byte_len = le32_to_cpu(resp->length);
4116 if (resp->flags & QEDR_RESP_IMM) {
4117 wc->ex.imm_data = cpu_to_be32(le32_to_cpu(resp->imm_data_or_inv_r_Key));
4118 wc->wc_flags |= IB_WC_WITH_IMM;
4120 if (resp->flags & QEDR_RESP_RDMA)
4121 wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
4123 if (resp->flags & QEDR_RESP_INV)
4126 } else if (resp->flags & QEDR_RESP_INV) {
4127 wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
4128 wc->wc_flags |= IB_WC_WITH_INVALIDATE;
4130 if (resp->flags & QEDR_RESP_RDMA)
4133 } else if (resp->flags & QEDR_RESP_RDMA) {
4140 static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
4141 struct qedr_cq *cq, struct ib_wc *wc,
4142 struct rdma_cqe_responder *resp, u64 wr_id)
4144 /* Must fill fields before qedr_set_ok_cqe_resp_wc() */
4145 wc->opcode = IB_WC_RECV;
4148 if (likely(resp->status == RDMA_CQE_RESP_STS_OK)) {
4149 if (qedr_set_ok_cqe_resp_wc(resp, wc))
4151 "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n",
4152 cq, cq->icid, resp->flags);
4155 wc->status = qedr_cqe_resp_status_to_ib(resp->status);
4156 if (wc->status == IB_WC_GENERAL_ERR)
4158 "CQ %p (icid=%d) contains an invalid CQE status %d\n",
4159 cq, cq->icid, resp->status);
4162 /* Fill the rest of the WC */
4164 wc->src_qp = qp->id;
4169 static int process_resp_one_srq(struct qedr_dev *dev, struct qedr_qp *qp,
4170 struct qedr_cq *cq, struct ib_wc *wc,
4171 struct rdma_cqe_responder *resp)
4173 struct qedr_srq *srq = qp->srq;
4176 wr_id = HILO_GEN(le32_to_cpu(resp->srq_wr_id.hi),
4177 le32_to_cpu(resp->srq_wr_id.lo), u64);
4179 if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
4180 wc->status = IB_WC_WR_FLUSH_ERR;
4184 wc->src_qp = qp->id;
4188 __process_resp_one(dev, qp, cq, wc, resp, wr_id);
4190 atomic_inc(&srq->hw_srq.wr_cons_cnt);
4194 static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
4195 struct qedr_cq *cq, struct ib_wc *wc,
4196 struct rdma_cqe_responder *resp)
4198 u64 wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
4200 __process_resp_one(dev, qp, cq, wc, resp, wr_id);
4202 while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
4203 qed_chain_consume(&qp->rq.pbl);
4204 qedr_inc_sw_cons(&qp->rq);
4209 static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq,
4210 int num_entries, struct ib_wc *wc, u16 hw_cons)
4214 while (num_entries && qp->rq.wqe_cons != hw_cons) {
4216 wc->status = IB_WC_WR_FLUSH_ERR;
4219 wc->src_qp = qp->id;
4221 wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
4226 while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
4227 qed_chain_consume(&qp->rq.pbl);
4228 qedr_inc_sw_cons(&qp->rq);
4234 static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
4235 struct rdma_cqe_responder *resp, int *update)
4237 if (le16_to_cpu(resp->rq_cons_or_srq_id) == qp->rq.wqe_cons) {
4243 static int qedr_poll_cq_resp_srq(struct qedr_dev *dev, struct qedr_qp *qp,
4244 struct qedr_cq *cq, int num_entries,
4246 struct rdma_cqe_responder *resp)
4250 cnt = process_resp_one_srq(dev, qp, cq, wc, resp);
4256 static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp,
4257 struct qedr_cq *cq, int num_entries,
4258 struct ib_wc *wc, struct rdma_cqe_responder *resp,
4263 if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
4264 cnt = process_resp_flush(qp, cq, num_entries, wc,
4265 resp->rq_cons_or_srq_id);
4266 try_consume_resp_cqe(cq, qp, resp, update);
4268 cnt = process_resp_one(dev, qp, cq, wc, resp);
4276 static void try_consume_req_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
4277 struct rdma_cqe_requester *req, int *update)
4279 if (le16_to_cpu(req->sq_cons) == qp->sq.wqe_cons) {
4285 int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
4287 struct qedr_dev *dev = get_qedr_dev(ibcq->device);
4288 struct qedr_cq *cq = get_qedr_cq(ibcq);
4289 union rdma_cqe *cqe;
4290 u32 old_cons, new_cons;
4291 unsigned long flags;
4295 if (cq->destroyed) {
4297 "warning: poll was invoked after destroy for cq %p (icid=%d)\n",
4302 if (cq->cq_type == QEDR_CQ_TYPE_GSI)
4303 return qedr_gsi_poll_cq(ibcq, num_entries, wc);
4305 spin_lock_irqsave(&cq->cq_lock, flags);
4306 cqe = cq->latest_cqe;
4307 old_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
4308 while (num_entries && is_valid_cqe(cq, cqe)) {
4312 /* prevent speculative reads of any field of CQE */
4315 qp = cqe_get_qp(cqe);
4317 WARN(1, "Error: CQE QP pointer is NULL. CQE=%p\n", cqe);
4323 switch (cqe_get_type(cqe)) {
4324 case RDMA_CQE_TYPE_REQUESTER:
4325 cnt = qedr_poll_cq_req(dev, qp, cq, num_entries, wc,
4327 try_consume_req_cqe(cq, qp, &cqe->req, &update);
4329 case RDMA_CQE_TYPE_RESPONDER_RQ:
4330 cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc,
4331 &cqe->resp, &update);
4333 case RDMA_CQE_TYPE_RESPONDER_SRQ:
4334 cnt = qedr_poll_cq_resp_srq(dev, qp, cq, num_entries,
4338 case RDMA_CQE_TYPE_INVALID:
4340 DP_ERR(dev, "Error: invalid CQE type = %d\n",
4349 new_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
4351 cq->cq_cons += new_cons - old_cons;
4354 /* doorbell notifies abount latest VALID entry,
4355 * but chain already point to the next INVALID one
4357 doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
4359 spin_unlock_irqrestore(&cq->cq_lock, flags);
4363 int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
4364 u8 port_num, const struct ib_wc *in_wc,
4365 const struct ib_grh *in_grh, const struct ib_mad *in,
4366 struct ib_mad *out_mad, size_t *out_mad_size,
4367 u16 *out_mad_pkey_index)
4369 return IB_MAD_RESULT_SUCCESS;