1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
3 * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
6 #include <linux/vmalloc.h>
8 #include <rdma/ib_addr.h>
9 #include <rdma/ib_umem.h>
10 #include <rdma/ib_user_verbs.h>
11 #include <rdma/ib_verbs.h>
12 #include <rdma/uverbs_ioctl.h>
16 #define EFA_MMAP_FLAG_SHIFT 56
17 #define EFA_MMAP_PAGE_MASK GENMASK(EFA_MMAP_FLAG_SHIFT - 1, 0)
18 #define EFA_MMAP_INVALID U64_MAX
21 EFA_MMAP_DMA_PAGE = 0,
26 #define EFA_AENQ_ENABLED_GROUPS \
27 (BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \
28 BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE))
30 struct efa_mmap_entry {
38 static inline u64 get_mmap_key(const struct efa_mmap_entry *efa)
40 return ((u64)efa->mmap_flag << EFA_MMAP_FLAG_SHIFT) |
41 ((u64)efa->mmap_page << PAGE_SHIFT);
44 #define EFA_CHUNK_PAYLOAD_SHIFT 12
45 #define EFA_CHUNK_PAYLOAD_SIZE BIT(EFA_CHUNK_PAYLOAD_SHIFT)
46 #define EFA_CHUNK_PAYLOAD_PTR_SIZE 8
48 #define EFA_CHUNK_SHIFT 12
49 #define EFA_CHUNK_SIZE BIT(EFA_CHUNK_SHIFT)
50 #define EFA_CHUNK_PTR_SIZE sizeof(struct efa_com_ctrl_buff_info)
52 #define EFA_PTRS_PER_CHUNK \
53 ((EFA_CHUNK_SIZE - EFA_CHUNK_PTR_SIZE) / EFA_CHUNK_PAYLOAD_PTR_SIZE)
55 #define EFA_CHUNK_USED_SIZE \
56 ((EFA_PTRS_PER_CHUNK * EFA_CHUNK_PAYLOAD_PTR_SIZE) + EFA_CHUNK_PTR_SIZE)
58 #define EFA_SUPPORTED_ACCESS_FLAGS IB_ACCESS_LOCAL_WRITE
66 struct pbl_chunk_list {
67 struct pbl_chunk *chunks;
77 u32 pbl_buf_size_in_pages;
78 struct scatterlist *sgl;
80 struct pbl_chunk_list chunk_list;
84 u32 pbl_buf_size_in_bytes;
85 u8 physically_continuous;
88 static inline struct efa_dev *to_edev(struct ib_device *ibdev)
90 return container_of(ibdev, struct efa_dev, ibdev);
93 static inline struct efa_ucontext *to_eucontext(struct ib_ucontext *ibucontext)
95 return container_of(ibucontext, struct efa_ucontext, ibucontext);
98 static inline struct efa_pd *to_epd(struct ib_pd *ibpd)
100 return container_of(ibpd, struct efa_pd, ibpd);
103 static inline struct efa_mr *to_emr(struct ib_mr *ibmr)
105 return container_of(ibmr, struct efa_mr, ibmr);
108 static inline struct efa_qp *to_eqp(struct ib_qp *ibqp)
110 return container_of(ibqp, struct efa_qp, ibqp);
113 static inline struct efa_cq *to_ecq(struct ib_cq *ibcq)
115 return container_of(ibcq, struct efa_cq, ibcq);
118 static inline struct efa_ah *to_eah(struct ib_ah *ibah)
120 return container_of(ibah, struct efa_ah, ibah);
123 #define field_avail(x, fld, sz) (offsetof(typeof(x), fld) + \
124 sizeof(((typeof(x) *)0)->fld) <= (sz))
126 #define is_reserved_cleared(reserved) \
127 !memchr_inv(reserved, 0, sizeof(reserved))
129 static void *efa_zalloc_mapped(struct efa_dev *dev, dma_addr_t *dma_addr,
130 size_t size, enum dma_data_direction dir)
134 addr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
138 *dma_addr = dma_map_single(&dev->pdev->dev, addr, size, dir);
139 if (dma_mapping_error(&dev->pdev->dev, *dma_addr)) {
140 ibdev_err(&dev->ibdev, "Failed to map DMA address\n");
141 free_pages_exact(addr, size);
149 * This is only called when the ucontext is destroyed and there can be no
150 * concurrent query via mmap or allocate on the xarray, thus we can be sure no
151 * other thread is using the entry pointer. We also know that all the BAR
152 * pages have either been zap'd or munmaped at this point. Normal pages are
153 * refcounted and will be freed at the proper time.
155 static void mmap_entries_remove_free(struct efa_dev *dev,
156 struct efa_ucontext *ucontext)
158 struct efa_mmap_entry *entry;
159 unsigned long mmap_page;
161 xa_for_each(&ucontext->mmap_xa, mmap_page, entry) {
162 xa_erase(&ucontext->mmap_xa, mmap_page);
166 "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
167 entry->obj, get_mmap_key(entry), entry->address,
169 if (entry->mmap_flag == EFA_MMAP_DMA_PAGE)
170 /* DMA mapping is already gone, now free the pages */
171 free_pages_exact(phys_to_virt(entry->address),
177 static struct efa_mmap_entry *mmap_entry_get(struct efa_dev *dev,
178 struct efa_ucontext *ucontext,
181 struct efa_mmap_entry *entry;
184 mmap_page = (key & EFA_MMAP_PAGE_MASK) >> PAGE_SHIFT;
185 if (mmap_page > U32_MAX)
188 entry = xa_load(&ucontext->mmap_xa, mmap_page);
189 if (!entry || get_mmap_key(entry) != key || entry->length != len)
192 ibdev_dbg(&dev->ibdev,
193 "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
194 entry->obj, key, entry->address, entry->length);
200 * Note this locking scheme cannot support removal of entries, except during
201 * ucontext destruction when the core code guarentees no concurrency.
203 static u64 mmap_entry_insert(struct efa_dev *dev, struct efa_ucontext *ucontext,
204 void *obj, u64 address, u64 length, u8 mmap_flag)
206 struct efa_mmap_entry *entry;
209 entry = kmalloc(sizeof(*entry), GFP_KERNEL);
211 return EFA_MMAP_INVALID;
214 entry->address = address;
215 entry->length = length;
216 entry->mmap_flag = mmap_flag;
218 xa_lock(&ucontext->mmap_xa);
219 entry->mmap_page = ucontext->mmap_xa_page;
220 ucontext->mmap_xa_page += DIV_ROUND_UP(length, PAGE_SIZE);
221 err = __xa_insert(&ucontext->mmap_xa, entry->mmap_page, entry,
223 xa_unlock(&ucontext->mmap_xa);
226 return EFA_MMAP_INVALID;
231 "mmap: obj[0x%p] addr[%#llx], len[%#llx], key[%#llx] inserted\n",
232 entry->obj, entry->address, entry->length, get_mmap_key(entry));
234 return get_mmap_key(entry);
237 int efa_query_device(struct ib_device *ibdev,
238 struct ib_device_attr *props,
239 struct ib_udata *udata)
241 struct efa_com_get_device_attr_result *dev_attr;
242 struct efa_ibv_ex_query_device_resp resp = {};
243 struct efa_dev *dev = to_edev(ibdev);
246 if (udata && udata->inlen &&
247 !ib_is_udata_cleared(udata, 0, udata->inlen)) {
249 "Incompatible ABI params, udata not cleared\n");
253 dev_attr = &dev->dev_attr;
255 memset(props, 0, sizeof(*props));
256 props->max_mr_size = dev_attr->max_mr_pages * PAGE_SIZE;
257 props->page_size_cap = dev_attr->page_size_cap;
258 props->vendor_id = dev->pdev->vendor;
259 props->vendor_part_id = dev->pdev->device;
260 props->hw_ver = dev->pdev->subsystem_device;
261 props->max_qp = dev_attr->max_qp;
262 props->max_cq = dev_attr->max_cq;
263 props->max_pd = dev_attr->max_pd;
264 props->max_mr = dev_attr->max_mr;
265 props->max_ah = dev_attr->max_ah;
266 props->max_cqe = dev_attr->max_cq_depth;
267 props->max_qp_wr = min_t(u32, dev_attr->max_sq_depth,
268 dev_attr->max_rq_depth);
269 props->max_send_sge = dev_attr->max_sq_sge;
270 props->max_recv_sge = dev_attr->max_rq_sge;
272 if (udata && udata->outlen) {
273 resp.max_sq_sge = dev_attr->max_sq_sge;
274 resp.max_rq_sge = dev_attr->max_rq_sge;
275 resp.max_sq_wr = dev_attr->max_sq_depth;
276 resp.max_rq_wr = dev_attr->max_rq_depth;
278 err = ib_copy_to_udata(udata, &resp,
279 min(sizeof(resp), udata->outlen));
282 "Failed to copy udata for query_device\n");
290 int efa_query_port(struct ib_device *ibdev, u8 port,
291 struct ib_port_attr *props)
293 struct efa_dev *dev = to_edev(ibdev);
297 props->state = IB_PORT_ACTIVE;
298 props->phys_state = 5;
299 props->gid_tbl_len = 1;
300 props->pkey_tbl_len = 1;
301 props->active_speed = IB_SPEED_EDR;
302 props->active_width = IB_WIDTH_4X;
303 props->max_mtu = ib_mtu_int_to_enum(dev->mtu);
304 props->active_mtu = ib_mtu_int_to_enum(dev->mtu);
305 props->max_msg_sz = dev->mtu;
306 props->max_vl_num = 1;
311 int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
313 struct ib_qp_init_attr *qp_init_attr)
315 struct efa_dev *dev = to_edev(ibqp->device);
316 struct efa_com_query_qp_params params = {};
317 struct efa_com_query_qp_result result;
318 struct efa_qp *qp = to_eqp(ibqp);
321 #define EFA_QUERY_QP_SUPP_MASK \
322 (IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | \
323 IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP)
325 if (qp_attr_mask & ~EFA_QUERY_QP_SUPP_MASK) {
326 ibdev_dbg(&dev->ibdev,
327 "Unsupported qp_attr_mask[%#x] supported[%#x]\n",
328 qp_attr_mask, EFA_QUERY_QP_SUPP_MASK);
332 memset(qp_attr, 0, sizeof(*qp_attr));
333 memset(qp_init_attr, 0, sizeof(*qp_init_attr));
335 params.qp_handle = qp->qp_handle;
336 err = efa_com_query_qp(&dev->edev, ¶ms, &result);
340 qp_attr->qp_state = result.qp_state;
341 qp_attr->qkey = result.qkey;
342 qp_attr->sq_psn = result.sq_psn;
343 qp_attr->sq_draining = result.sq_draining;
344 qp_attr->port_num = 1;
346 qp_attr->cap.max_send_wr = qp->max_send_wr;
347 qp_attr->cap.max_recv_wr = qp->max_recv_wr;
348 qp_attr->cap.max_send_sge = qp->max_send_sge;
349 qp_attr->cap.max_recv_sge = qp->max_recv_sge;
350 qp_attr->cap.max_inline_data = qp->max_inline_data;
352 qp_init_attr->qp_type = ibqp->qp_type;
353 qp_init_attr->recv_cq = ibqp->recv_cq;
354 qp_init_attr->send_cq = ibqp->send_cq;
355 qp_init_attr->qp_context = ibqp->qp_context;
356 qp_init_attr->cap = qp_attr->cap;
361 int efa_query_gid(struct ib_device *ibdev, u8 port, int index,
364 struct efa_dev *dev = to_edev(ibdev);
366 memcpy(gid->raw, dev->addr, sizeof(dev->addr));
371 int efa_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
381 static int efa_pd_dealloc(struct efa_dev *dev, u16 pdn)
383 struct efa_com_dealloc_pd_params params = {
387 return efa_com_dealloc_pd(&dev->edev, ¶ms);
390 int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
392 struct efa_dev *dev = to_edev(ibpd->device);
393 struct efa_ibv_alloc_pd_resp resp = {};
394 struct efa_com_alloc_pd_result result;
395 struct efa_pd *pd = to_epd(ibpd);
399 !ib_is_udata_cleared(udata, 0, udata->inlen)) {
400 ibdev_dbg(&dev->ibdev,
401 "Incompatible ABI params, udata not cleared\n");
406 err = efa_com_alloc_pd(&dev->edev, &result);
410 pd->pdn = result.pdn;
411 resp.pdn = result.pdn;
414 err = ib_copy_to_udata(udata, &resp,
415 min(sizeof(resp), udata->outlen));
417 ibdev_dbg(&dev->ibdev,
418 "Failed to copy udata for alloc_pd\n");
423 ibdev_dbg(&dev->ibdev, "Allocated pd[%d]\n", pd->pdn);
428 efa_pd_dealloc(dev, result.pdn);
430 atomic64_inc(&dev->stats.sw_stats.alloc_pd_err);
434 void efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
436 struct efa_dev *dev = to_edev(ibpd->device);
437 struct efa_pd *pd = to_epd(ibpd);
439 ibdev_dbg(&dev->ibdev, "Dealloc pd[%d]\n", pd->pdn);
440 efa_pd_dealloc(dev, pd->pdn);
443 static int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle)
445 struct efa_com_destroy_qp_params params = { .qp_handle = qp_handle };
447 return efa_com_destroy_qp(&dev->edev, ¶ms);
450 int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
452 struct efa_dev *dev = to_edev(ibqp->pd->device);
453 struct efa_qp *qp = to_eqp(ibqp);
456 ibdev_dbg(&dev->ibdev, "Destroy qp[%u]\n", ibqp->qp_num);
457 err = efa_destroy_qp_handle(dev, qp->qp_handle);
461 if (qp->rq_cpu_addr) {
462 ibdev_dbg(&dev->ibdev,
463 "qp->cpu_addr[0x%p] freed: size[%lu], dma[%pad]\n",
464 qp->rq_cpu_addr, qp->rq_size,
466 dma_unmap_single(&dev->pdev->dev, qp->rq_dma_addr, qp->rq_size,
474 static int qp_mmap_entries_setup(struct efa_qp *qp,
476 struct efa_ucontext *ucontext,
477 struct efa_com_create_qp_params *params,
478 struct efa_ibv_create_qp_resp *resp)
481 * Once an entry is inserted it might be mmapped, hence cannot be
482 * cleaned up until dealloc_ucontext.
484 resp->sq_db_mmap_key =
485 mmap_entry_insert(dev, ucontext, qp,
486 dev->db_bar_addr + resp->sq_db_offset,
487 PAGE_SIZE, EFA_MMAP_IO_NC);
488 if (resp->sq_db_mmap_key == EFA_MMAP_INVALID)
491 resp->sq_db_offset &= ~PAGE_MASK;
493 resp->llq_desc_mmap_key =
494 mmap_entry_insert(dev, ucontext, qp,
495 dev->mem_bar_addr + resp->llq_desc_offset,
496 PAGE_ALIGN(params->sq_ring_size_in_bytes +
497 (resp->llq_desc_offset & ~PAGE_MASK)),
499 if (resp->llq_desc_mmap_key == EFA_MMAP_INVALID)
502 resp->llq_desc_offset &= ~PAGE_MASK;
505 resp->rq_db_mmap_key =
506 mmap_entry_insert(dev, ucontext, qp,
507 dev->db_bar_addr + resp->rq_db_offset,
508 PAGE_SIZE, EFA_MMAP_IO_NC);
509 if (resp->rq_db_mmap_key == EFA_MMAP_INVALID)
512 resp->rq_db_offset &= ~PAGE_MASK;
515 mmap_entry_insert(dev, ucontext, qp,
516 virt_to_phys(qp->rq_cpu_addr),
517 qp->rq_size, EFA_MMAP_DMA_PAGE);
518 if (resp->rq_mmap_key == EFA_MMAP_INVALID)
521 resp->rq_mmap_size = qp->rq_size;
527 static int efa_qp_validate_cap(struct efa_dev *dev,
528 struct ib_qp_init_attr *init_attr)
530 if (init_attr->cap.max_send_wr > dev->dev_attr.max_sq_depth) {
531 ibdev_dbg(&dev->ibdev,
532 "qp: requested send wr[%u] exceeds the max[%u]\n",
533 init_attr->cap.max_send_wr,
534 dev->dev_attr.max_sq_depth);
537 if (init_attr->cap.max_recv_wr > dev->dev_attr.max_rq_depth) {
538 ibdev_dbg(&dev->ibdev,
539 "qp: requested receive wr[%u] exceeds the max[%u]\n",
540 init_attr->cap.max_recv_wr,
541 dev->dev_attr.max_rq_depth);
544 if (init_attr->cap.max_send_sge > dev->dev_attr.max_sq_sge) {
545 ibdev_dbg(&dev->ibdev,
546 "qp: requested sge send[%u] exceeds the max[%u]\n",
547 init_attr->cap.max_send_sge, dev->dev_attr.max_sq_sge);
550 if (init_attr->cap.max_recv_sge > dev->dev_attr.max_rq_sge) {
551 ibdev_dbg(&dev->ibdev,
552 "qp: requested sge recv[%u] exceeds the max[%u]\n",
553 init_attr->cap.max_recv_sge, dev->dev_attr.max_rq_sge);
556 if (init_attr->cap.max_inline_data > dev->dev_attr.inline_buf_size) {
557 ibdev_dbg(&dev->ibdev,
558 "qp: requested inline data[%u] exceeds the max[%u]\n",
559 init_attr->cap.max_inline_data,
560 dev->dev_attr.inline_buf_size);
567 static int efa_qp_validate_attr(struct efa_dev *dev,
568 struct ib_qp_init_attr *init_attr)
570 if (init_attr->qp_type != IB_QPT_DRIVER &&
571 init_attr->qp_type != IB_QPT_UD) {
572 ibdev_dbg(&dev->ibdev,
573 "Unsupported qp type %d\n", init_attr->qp_type);
577 if (init_attr->srq) {
578 ibdev_dbg(&dev->ibdev, "SRQ is not supported\n");
582 if (init_attr->create_flags) {
583 ibdev_dbg(&dev->ibdev, "Unsupported create flags\n");
590 struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
591 struct ib_qp_init_attr *init_attr,
592 struct ib_udata *udata)
594 struct efa_com_create_qp_params create_qp_params = {};
595 struct efa_com_create_qp_result create_qp_resp;
596 struct efa_dev *dev = to_edev(ibpd->device);
597 struct efa_ibv_create_qp_resp resp = {};
598 struct efa_ibv_create_qp cmd = {};
599 bool rq_entry_inserted = false;
600 struct efa_ucontext *ucontext;
604 ucontext = rdma_udata_to_drv_context(udata, struct efa_ucontext,
607 err = efa_qp_validate_cap(dev, init_attr);
611 err = efa_qp_validate_attr(dev, init_attr);
615 if (!field_avail(cmd, driver_qp_type, udata->inlen)) {
616 ibdev_dbg(&dev->ibdev,
617 "Incompatible ABI params, no input udata\n");
622 if (udata->inlen > sizeof(cmd) &&
623 !ib_is_udata_cleared(udata, sizeof(cmd),
624 udata->inlen - sizeof(cmd))) {
625 ibdev_dbg(&dev->ibdev,
626 "Incompatible ABI params, unknown fields in udata\n");
631 err = ib_copy_from_udata(&cmd, udata,
632 min(sizeof(cmd), udata->inlen));
634 ibdev_dbg(&dev->ibdev,
635 "Cannot copy udata for create_qp\n");
640 ibdev_dbg(&dev->ibdev,
641 "Incompatible ABI params, unknown fields in udata\n");
646 qp = kzalloc(sizeof(*qp), GFP_KERNEL);
652 create_qp_params.uarn = ucontext->uarn;
653 create_qp_params.pd = to_epd(ibpd)->pdn;
655 if (init_attr->qp_type == IB_QPT_UD) {
656 create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_UD;
657 } else if (cmd.driver_qp_type == EFA_QP_DRIVER_TYPE_SRD) {
658 create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_SRD;
660 ibdev_dbg(&dev->ibdev,
661 "Unsupported qp type %d driver qp type %d\n",
662 init_attr->qp_type, cmd.driver_qp_type);
667 ibdev_dbg(&dev->ibdev, "Create QP: qp type %d driver qp type %#x\n",
668 init_attr->qp_type, cmd.driver_qp_type);
669 create_qp_params.send_cq_idx = to_ecq(init_attr->send_cq)->cq_idx;
670 create_qp_params.recv_cq_idx = to_ecq(init_attr->recv_cq)->cq_idx;
671 create_qp_params.sq_depth = init_attr->cap.max_send_wr;
672 create_qp_params.sq_ring_size_in_bytes = cmd.sq_ring_size;
674 create_qp_params.rq_depth = init_attr->cap.max_recv_wr;
675 create_qp_params.rq_ring_size_in_bytes = cmd.rq_ring_size;
676 qp->rq_size = PAGE_ALIGN(create_qp_params.rq_ring_size_in_bytes);
678 qp->rq_cpu_addr = efa_zalloc_mapped(dev, &qp->rq_dma_addr,
679 qp->rq_size, DMA_TO_DEVICE);
680 if (!qp->rq_cpu_addr) {
685 ibdev_dbg(&dev->ibdev,
686 "qp->cpu_addr[0x%p] allocated: size[%lu], dma[%pad]\n",
687 qp->rq_cpu_addr, qp->rq_size, &qp->rq_dma_addr);
688 create_qp_params.rq_base_addr = qp->rq_dma_addr;
691 err = efa_com_create_qp(&dev->edev, &create_qp_params,
694 goto err_free_mapped;
696 resp.sq_db_offset = create_qp_resp.sq_db_offset;
697 resp.rq_db_offset = create_qp_resp.rq_db_offset;
698 resp.llq_desc_offset = create_qp_resp.llq_descriptors_offset;
699 resp.send_sub_cq_idx = create_qp_resp.send_sub_cq_idx;
700 resp.recv_sub_cq_idx = create_qp_resp.recv_sub_cq_idx;
702 err = qp_mmap_entries_setup(qp, dev, ucontext, &create_qp_params,
707 rq_entry_inserted = true;
708 qp->qp_handle = create_qp_resp.qp_handle;
709 qp->ibqp.qp_num = create_qp_resp.qp_num;
710 qp->ibqp.qp_type = init_attr->qp_type;
711 qp->max_send_wr = init_attr->cap.max_send_wr;
712 qp->max_recv_wr = init_attr->cap.max_recv_wr;
713 qp->max_send_sge = init_attr->cap.max_send_sge;
714 qp->max_recv_sge = init_attr->cap.max_recv_sge;
715 qp->max_inline_data = init_attr->cap.max_inline_data;
718 err = ib_copy_to_udata(udata, &resp,
719 min(sizeof(resp), udata->outlen));
721 ibdev_dbg(&dev->ibdev,
722 "Failed to copy udata for qp[%u]\n",
723 create_qp_resp.qp_num);
728 ibdev_dbg(&dev->ibdev, "Created qp[%d]\n", qp->ibqp.qp_num);
733 efa_destroy_qp_handle(dev, create_qp_resp.qp_handle);
736 dma_unmap_single(&dev->pdev->dev, qp->rq_dma_addr, qp->rq_size,
738 if (!rq_entry_inserted)
739 free_pages_exact(qp->rq_cpu_addr, qp->rq_size);
744 atomic64_inc(&dev->stats.sw_stats.create_qp_err);
748 static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp,
749 struct ib_qp_attr *qp_attr, int qp_attr_mask,
750 enum ib_qp_state cur_state,
751 enum ib_qp_state new_state)
753 #define EFA_MODIFY_QP_SUPP_MASK \
754 (IB_QP_STATE | IB_QP_CUR_STATE | IB_QP_EN_SQD_ASYNC_NOTIFY | \
755 IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN)
757 if (qp_attr_mask & ~EFA_MODIFY_QP_SUPP_MASK) {
758 ibdev_dbg(&dev->ibdev,
759 "Unsupported qp_attr_mask[%#x] supported[%#x]\n",
760 qp_attr_mask, EFA_MODIFY_QP_SUPP_MASK);
764 if (!ib_modify_qp_is_ok(cur_state, new_state, IB_QPT_UD,
766 ibdev_dbg(&dev->ibdev, "Invalid modify QP parameters\n");
770 if ((qp_attr_mask & IB_QP_PORT) && qp_attr->port_num != 1) {
771 ibdev_dbg(&dev->ibdev, "Can't change port num\n");
775 if ((qp_attr_mask & IB_QP_PKEY_INDEX) && qp_attr->pkey_index) {
776 ibdev_dbg(&dev->ibdev, "Can't change pkey index\n");
783 int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
784 int qp_attr_mask, struct ib_udata *udata)
786 struct efa_dev *dev = to_edev(ibqp->device);
787 struct efa_com_modify_qp_params params = {};
788 struct efa_qp *qp = to_eqp(ibqp);
789 enum ib_qp_state cur_state;
790 enum ib_qp_state new_state;
794 !ib_is_udata_cleared(udata, 0, udata->inlen)) {
795 ibdev_dbg(&dev->ibdev,
796 "Incompatible ABI params, udata not cleared\n");
800 cur_state = qp_attr_mask & IB_QP_CUR_STATE ? qp_attr->cur_qp_state :
802 new_state = qp_attr_mask & IB_QP_STATE ? qp_attr->qp_state : cur_state;
804 err = efa_modify_qp_validate(dev, qp, qp_attr, qp_attr_mask, cur_state,
809 params.qp_handle = qp->qp_handle;
811 if (qp_attr_mask & IB_QP_STATE) {
812 params.modify_mask |= BIT(EFA_ADMIN_QP_STATE_BIT) |
813 BIT(EFA_ADMIN_CUR_QP_STATE_BIT);
814 params.cur_qp_state = qp_attr->cur_qp_state;
815 params.qp_state = qp_attr->qp_state;
818 if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) {
819 params.modify_mask |=
820 BIT(EFA_ADMIN_SQ_DRAINED_ASYNC_NOTIFY_BIT);
821 params.sq_drained_async_notify = qp_attr->en_sqd_async_notify;
824 if (qp_attr_mask & IB_QP_QKEY) {
825 params.modify_mask |= BIT(EFA_ADMIN_QKEY_BIT);
826 params.qkey = qp_attr->qkey;
829 if (qp_attr_mask & IB_QP_SQ_PSN) {
830 params.modify_mask |= BIT(EFA_ADMIN_SQ_PSN_BIT);
831 params.sq_psn = qp_attr->sq_psn;
834 err = efa_com_modify_qp(&dev->edev, ¶ms);
838 qp->state = new_state;
843 static int efa_destroy_cq_idx(struct efa_dev *dev, int cq_idx)
845 struct efa_com_destroy_cq_params params = { .cq_idx = cq_idx };
847 return efa_com_destroy_cq(&dev->edev, ¶ms);
850 void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
852 struct efa_dev *dev = to_edev(ibcq->device);
853 struct efa_cq *cq = to_ecq(ibcq);
855 ibdev_dbg(&dev->ibdev,
856 "Destroy cq[%d] virt[0x%p] freed: size[%lu], dma[%pad]\n",
857 cq->cq_idx, cq->cpu_addr, cq->size, &cq->dma_addr);
859 efa_destroy_cq_idx(dev, cq->cq_idx);
860 dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size,
864 static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq,
865 struct efa_ibv_create_cq_resp *resp)
867 resp->q_mmap_size = cq->size;
868 resp->q_mmap_key = mmap_entry_insert(dev, cq->ucontext, cq,
869 virt_to_phys(cq->cpu_addr),
870 cq->size, EFA_MMAP_DMA_PAGE);
871 if (resp->q_mmap_key == EFA_MMAP_INVALID)
877 int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
878 struct ib_udata *udata)
880 struct efa_ucontext *ucontext = rdma_udata_to_drv_context(
881 udata, struct efa_ucontext, ibucontext);
882 struct efa_ibv_create_cq_resp resp = {};
883 struct efa_com_create_cq_params params;
884 struct efa_com_create_cq_result result;
885 struct ib_device *ibdev = ibcq->device;
886 struct efa_dev *dev = to_edev(ibdev);
887 struct efa_ibv_create_cq cmd = {};
888 struct efa_cq *cq = to_ecq(ibcq);
889 bool cq_entry_inserted = false;
890 int entries = attr->cqe;
893 ibdev_dbg(ibdev, "create_cq entries %d\n", entries);
895 if (entries < 1 || entries > dev->dev_attr.max_cq_depth) {
897 "cq: requested entries[%u] non-positive or greater than max[%u]\n",
898 entries, dev->dev_attr.max_cq_depth);
903 if (!field_avail(cmd, num_sub_cqs, udata->inlen)) {
905 "Incompatible ABI params, no input udata\n");
910 if (udata->inlen > sizeof(cmd) &&
911 !ib_is_udata_cleared(udata, sizeof(cmd),
912 udata->inlen - sizeof(cmd))) {
914 "Incompatible ABI params, unknown fields in udata\n");
919 err = ib_copy_from_udata(&cmd, udata,
920 min(sizeof(cmd), udata->inlen));
922 ibdev_dbg(ibdev, "Cannot copy udata for create_cq\n");
926 if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_50)) {
928 "Incompatible ABI params, unknown fields in udata\n");
933 if (!cmd.cq_entry_size) {
935 "Invalid entry size [%u]\n", cmd.cq_entry_size);
940 if (cmd.num_sub_cqs != dev->dev_attr.sub_cqs_per_cq) {
942 "Invalid number of sub cqs[%u] expected[%u]\n",
943 cmd.num_sub_cqs, dev->dev_attr.sub_cqs_per_cq);
948 cq->ucontext = ucontext;
949 cq->size = PAGE_ALIGN(cmd.cq_entry_size * entries * cmd.num_sub_cqs);
950 cq->cpu_addr = efa_zalloc_mapped(dev, &cq->dma_addr, cq->size,
957 params.uarn = cq->ucontext->uarn;
958 params.cq_depth = entries;
959 params.dma_addr = cq->dma_addr;
960 params.entry_size_in_bytes = cmd.cq_entry_size;
961 params.num_sub_cqs = cmd.num_sub_cqs;
962 err = efa_com_create_cq(&dev->edev, ¶ms, &result);
964 goto err_free_mapped;
966 resp.cq_idx = result.cq_idx;
967 cq->cq_idx = result.cq_idx;
968 cq->ibcq.cqe = result.actual_depth;
969 WARN_ON_ONCE(entries != result.actual_depth);
971 err = cq_mmap_entries_setup(dev, cq, &resp);
973 ibdev_dbg(ibdev, "Could not setup cq[%u] mmap entries\n",
978 cq_entry_inserted = true;
981 err = ib_copy_to_udata(udata, &resp,
982 min(sizeof(resp), udata->outlen));
985 "Failed to copy udata for create_cq\n");
990 ibdev_dbg(ibdev, "Created cq[%d], cq depth[%u]. dma[%pad] virt[0x%p]\n",
991 cq->cq_idx, result.actual_depth, &cq->dma_addr, cq->cpu_addr);
996 efa_destroy_cq_idx(dev, cq->cq_idx);
998 dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size,
1000 if (!cq_entry_inserted)
1001 free_pages_exact(cq->cpu_addr, cq->size);
1003 atomic64_inc(&dev->stats.sw_stats.create_cq_err);
1007 static int umem_to_page_list(struct efa_dev *dev,
1008 struct ib_umem *umem,
1013 u32 pages_in_hp = BIT(hp_shift - PAGE_SHIFT);
1014 struct ib_block_iter biter;
1015 unsigned int hp_idx = 0;
1017 ibdev_dbg(&dev->ibdev, "hp_cnt[%u], pages_in_hp[%u]\n",
1018 hp_cnt, pages_in_hp);
1020 rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap,
1022 page_list[hp_idx++] = rdma_block_iter_dma_address(&biter);
1027 static struct scatterlist *efa_vmalloc_buf_to_sg(u64 *buf, int page_cnt)
1029 struct scatterlist *sglist;
1033 sglist = kcalloc(page_cnt, sizeof(*sglist), GFP_KERNEL);
1036 sg_init_table(sglist, page_cnt);
1037 for (i = 0; i < page_cnt; i++) {
1038 pg = vmalloc_to_page(buf);
1041 sg_set_page(&sglist[i], pg, PAGE_SIZE, 0);
1042 buf += PAGE_SIZE / sizeof(*buf);
1052 * create a chunk list of physical pages dma addresses from the supplied
1053 * scatter gather list
1055 static int pbl_chunk_list_create(struct efa_dev *dev, struct pbl_context *pbl)
1057 struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list;
1058 int page_cnt = pbl->phys.indirect.pbl_buf_size_in_pages;
1059 struct scatterlist *pages_sgl = pbl->phys.indirect.sgl;
1060 unsigned int chunk_list_size, chunk_idx, payload_idx;
1061 int sg_dma_cnt = pbl->phys.indirect.sg_dma_cnt;
1062 struct efa_com_ctrl_buff_info *ctrl_buf;
1063 u64 *cur_chunk_buf, *prev_chunk_buf;
1064 struct ib_block_iter biter;
1065 dma_addr_t dma_addr;
1068 /* allocate a chunk list that consists of 4KB chunks */
1069 chunk_list_size = DIV_ROUND_UP(page_cnt, EFA_PTRS_PER_CHUNK);
1071 chunk_list->size = chunk_list_size;
1072 chunk_list->chunks = kcalloc(chunk_list_size,
1073 sizeof(*chunk_list->chunks),
1075 if (!chunk_list->chunks)
1078 ibdev_dbg(&dev->ibdev,
1079 "chunk_list_size[%u] - pages[%u]\n", chunk_list_size,
1082 /* allocate chunk buffers: */
1083 for (i = 0; i < chunk_list_size; i++) {
1084 chunk_list->chunks[i].buf = kzalloc(EFA_CHUNK_SIZE, GFP_KERNEL);
1085 if (!chunk_list->chunks[i].buf)
1086 goto chunk_list_dealloc;
1088 chunk_list->chunks[i].length = EFA_CHUNK_USED_SIZE;
1090 chunk_list->chunks[chunk_list_size - 1].length =
1091 ((page_cnt % EFA_PTRS_PER_CHUNK) * EFA_CHUNK_PAYLOAD_PTR_SIZE) +
1094 /* fill the dma addresses of sg list pages to chunks: */
1097 cur_chunk_buf = chunk_list->chunks[0].buf;
1098 rdma_for_each_block(pages_sgl, &biter, sg_dma_cnt,
1099 EFA_CHUNK_PAYLOAD_SIZE) {
1100 cur_chunk_buf[payload_idx++] =
1101 rdma_block_iter_dma_address(&biter);
1103 if (payload_idx == EFA_PTRS_PER_CHUNK) {
1105 cur_chunk_buf = chunk_list->chunks[chunk_idx].buf;
1110 /* map chunks to dma and fill chunks next ptrs */
1111 for (i = chunk_list_size - 1; i >= 0; i--) {
1112 dma_addr = dma_map_single(&dev->pdev->dev,
1113 chunk_list->chunks[i].buf,
1114 chunk_list->chunks[i].length,
1116 if (dma_mapping_error(&dev->pdev->dev, dma_addr)) {
1117 ibdev_err(&dev->ibdev,
1118 "chunk[%u] dma_map_failed\n", i);
1119 goto chunk_list_unmap;
1122 chunk_list->chunks[i].dma_addr = dma_addr;
1123 ibdev_dbg(&dev->ibdev,
1124 "chunk[%u] mapped at [%pad]\n", i, &dma_addr);
1129 prev_chunk_buf = chunk_list->chunks[i - 1].buf;
1131 ctrl_buf = (struct efa_com_ctrl_buff_info *)
1132 &prev_chunk_buf[EFA_PTRS_PER_CHUNK];
1133 ctrl_buf->length = chunk_list->chunks[i].length;
1135 efa_com_set_dma_addr(dma_addr,
1136 &ctrl_buf->address.mem_addr_high,
1137 &ctrl_buf->address.mem_addr_low);
1143 for (; i < chunk_list_size; i++) {
1144 dma_unmap_single(&dev->pdev->dev, chunk_list->chunks[i].dma_addr,
1145 chunk_list->chunks[i].length, DMA_TO_DEVICE);
1148 for (i = 0; i < chunk_list_size; i++)
1149 kfree(chunk_list->chunks[i].buf);
1151 kfree(chunk_list->chunks);
1155 static void pbl_chunk_list_destroy(struct efa_dev *dev, struct pbl_context *pbl)
1157 struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list;
1160 for (i = 0; i < chunk_list->size; i++) {
1161 dma_unmap_single(&dev->pdev->dev, chunk_list->chunks[i].dma_addr,
1162 chunk_list->chunks[i].length, DMA_TO_DEVICE);
1163 kfree(chunk_list->chunks[i].buf);
1166 kfree(chunk_list->chunks);
1169 /* initialize pbl continuous mode: map pbl buffer to a dma address. */
1170 static int pbl_continuous_initialize(struct efa_dev *dev,
1171 struct pbl_context *pbl)
1173 dma_addr_t dma_addr;
1175 dma_addr = dma_map_single(&dev->pdev->dev, pbl->pbl_buf,
1176 pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE);
1177 if (dma_mapping_error(&dev->pdev->dev, dma_addr)) {
1178 ibdev_err(&dev->ibdev, "Unable to map pbl to DMA address\n");
1182 pbl->phys.continuous.dma_addr = dma_addr;
1183 ibdev_dbg(&dev->ibdev,
1184 "pbl continuous - dma_addr = %pad, size[%u]\n",
1185 &dma_addr, pbl->pbl_buf_size_in_bytes);
1191 * initialize pbl indirect mode:
1192 * create a chunk list out of the dma addresses of the physical pages of
1195 static int pbl_indirect_initialize(struct efa_dev *dev, struct pbl_context *pbl)
1197 u32 size_in_pages = DIV_ROUND_UP(pbl->pbl_buf_size_in_bytes, PAGE_SIZE);
1198 struct scatterlist *sgl;
1199 int sg_dma_cnt, err;
1201 BUILD_BUG_ON(EFA_CHUNK_PAYLOAD_SIZE > PAGE_SIZE);
1202 sgl = efa_vmalloc_buf_to_sg(pbl->pbl_buf, size_in_pages);
1206 sg_dma_cnt = dma_map_sg(&dev->pdev->dev, sgl, size_in_pages, DMA_TO_DEVICE);
1212 pbl->phys.indirect.pbl_buf_size_in_pages = size_in_pages;
1213 pbl->phys.indirect.sgl = sgl;
1214 pbl->phys.indirect.sg_dma_cnt = sg_dma_cnt;
1215 err = pbl_chunk_list_create(dev, pbl);
1217 ibdev_dbg(&dev->ibdev,
1218 "chunk_list creation failed[%d]\n", err);
1222 ibdev_dbg(&dev->ibdev,
1223 "pbl indirect - size[%u], chunks[%u]\n",
1224 pbl->pbl_buf_size_in_bytes,
1225 pbl->phys.indirect.chunk_list.size);
1230 dma_unmap_sg(&dev->pdev->dev, sgl, size_in_pages, DMA_TO_DEVICE);
1236 static void pbl_indirect_terminate(struct efa_dev *dev, struct pbl_context *pbl)
1238 pbl_chunk_list_destroy(dev, pbl);
1239 dma_unmap_sg(&dev->pdev->dev, pbl->phys.indirect.sgl,
1240 pbl->phys.indirect.pbl_buf_size_in_pages, DMA_TO_DEVICE);
1241 kfree(pbl->phys.indirect.sgl);
1244 /* create a page buffer list from a mapped user memory region */
1245 static int pbl_create(struct efa_dev *dev,
1246 struct pbl_context *pbl,
1247 struct ib_umem *umem,
1253 pbl->pbl_buf_size_in_bytes = hp_cnt * EFA_CHUNK_PAYLOAD_PTR_SIZE;
1254 pbl->pbl_buf = kvzalloc(pbl->pbl_buf_size_in_bytes, GFP_KERNEL);
1258 if (is_vmalloc_addr(pbl->pbl_buf)) {
1259 pbl->physically_continuous = 0;
1260 err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt,
1265 err = pbl_indirect_initialize(dev, pbl);
1269 pbl->physically_continuous = 1;
1270 err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt,
1275 err = pbl_continuous_initialize(dev, pbl);
1280 ibdev_dbg(&dev->ibdev,
1281 "user_pbl_created: user_pages[%u], continuous[%u]\n",
1282 hp_cnt, pbl->physically_continuous);
1287 kvfree(pbl->pbl_buf);
1291 static void pbl_destroy(struct efa_dev *dev, struct pbl_context *pbl)
1293 if (pbl->physically_continuous)
1294 dma_unmap_single(&dev->pdev->dev, pbl->phys.continuous.dma_addr,
1295 pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE);
1297 pbl_indirect_terminate(dev, pbl);
1299 kvfree(pbl->pbl_buf);
1302 static int efa_create_inline_pbl(struct efa_dev *dev, struct efa_mr *mr,
1303 struct efa_com_reg_mr_params *params)
1307 params->inline_pbl = 1;
1308 err = umem_to_page_list(dev, mr->umem, params->pbl.inline_pbl_array,
1309 params->page_num, params->page_shift);
1313 ibdev_dbg(&dev->ibdev,
1314 "inline_pbl_array - pages[%u]\n", params->page_num);
1319 static int efa_create_pbl(struct efa_dev *dev,
1320 struct pbl_context *pbl,
1322 struct efa_com_reg_mr_params *params)
1326 err = pbl_create(dev, pbl, mr->umem, params->page_num,
1327 params->page_shift);
1329 ibdev_dbg(&dev->ibdev, "Failed to create pbl[%d]\n", err);
1333 params->inline_pbl = 0;
1334 params->indirect = !pbl->physically_continuous;
1335 if (pbl->physically_continuous) {
1336 params->pbl.pbl.length = pbl->pbl_buf_size_in_bytes;
1338 efa_com_set_dma_addr(pbl->phys.continuous.dma_addr,
1339 ¶ms->pbl.pbl.address.mem_addr_high,
1340 ¶ms->pbl.pbl.address.mem_addr_low);
1342 params->pbl.pbl.length =
1343 pbl->phys.indirect.chunk_list.chunks[0].length;
1345 efa_com_set_dma_addr(pbl->phys.indirect.chunk_list.chunks[0].dma_addr,
1346 ¶ms->pbl.pbl.address.mem_addr_high,
1347 ¶ms->pbl.pbl.address.mem_addr_low);
1353 struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
1354 u64 virt_addr, int access_flags,
1355 struct ib_udata *udata)
1357 struct efa_dev *dev = to_edev(ibpd->device);
1358 struct efa_com_reg_mr_params params = {};
1359 struct efa_com_reg_mr_result result = {};
1360 struct pbl_context pbl;
1367 !ib_is_udata_cleared(udata, 0, sizeof(udata->inlen))) {
1368 ibdev_dbg(&dev->ibdev,
1369 "Incompatible ABI params, udata not cleared\n");
1374 if (access_flags & ~EFA_SUPPORTED_ACCESS_FLAGS) {
1375 ibdev_dbg(&dev->ibdev,
1376 "Unsupported access flags[%#x], supported[%#x]\n",
1377 access_flags, EFA_SUPPORTED_ACCESS_FLAGS);
1382 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1388 mr->umem = ib_umem_get(udata, start, length, access_flags, 0);
1389 if (IS_ERR(mr->umem)) {
1390 err = PTR_ERR(mr->umem);
1391 ibdev_dbg(&dev->ibdev,
1392 "Failed to pin and map user space memory[%d]\n", err);
1396 params.pd = to_epd(ibpd)->pdn;
1397 params.iova = virt_addr;
1398 params.mr_length_in_bytes = length;
1399 params.permissions = access_flags & 0x1;
1401 pg_sz = ib_umem_find_best_pgsz(mr->umem,
1402 dev->dev_attr.page_size_cap,
1406 ibdev_dbg(&dev->ibdev, "Failed to find a suitable page size in page_size_cap %#llx\n",
1407 dev->dev_attr.page_size_cap);
1411 params.page_shift = __ffs(pg_sz);
1412 params.page_num = DIV_ROUND_UP(length + (start & (pg_sz - 1)),
1415 ibdev_dbg(&dev->ibdev,
1416 "start %#llx length %#llx params.page_shift %u params.page_num %u\n",
1417 start, length, params.page_shift, params.page_num);
1419 inline_size = ARRAY_SIZE(params.pbl.inline_pbl_array);
1420 if (params.page_num <= inline_size) {
1421 err = efa_create_inline_pbl(dev, mr, ¶ms);
1425 err = efa_com_register_mr(&dev->edev, ¶ms, &result);
1429 err = efa_create_pbl(dev, &pbl, mr, ¶ms);
1433 err = efa_com_register_mr(&dev->edev, ¶ms, &result);
1434 pbl_destroy(dev, &pbl);
1440 mr->ibmr.lkey = result.l_key;
1441 mr->ibmr.rkey = result.r_key;
1442 mr->ibmr.length = length;
1443 ibdev_dbg(&dev->ibdev, "Registered mr[%d]\n", mr->ibmr.lkey);
1448 ib_umem_release(mr->umem);
1452 atomic64_inc(&dev->stats.sw_stats.reg_mr_err);
1453 return ERR_PTR(err);
1456 int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
1458 struct efa_dev *dev = to_edev(ibmr->device);
1459 struct efa_com_dereg_mr_params params;
1460 struct efa_mr *mr = to_emr(ibmr);
1463 ibdev_dbg(&dev->ibdev, "Deregister mr[%d]\n", ibmr->lkey);
1466 params.l_key = mr->ibmr.lkey;
1467 err = efa_com_dereg_mr(&dev->edev, ¶ms);
1471 ib_umem_release(mr->umem);
1478 int efa_get_port_immutable(struct ib_device *ibdev, u8 port_num,
1479 struct ib_port_immutable *immutable)
1481 struct ib_port_attr attr;
1484 err = ib_query_port(ibdev, port_num, &attr);
1486 ibdev_dbg(ibdev, "Couldn't query port err[%d]\n", err);
1490 immutable->pkey_tbl_len = attr.pkey_tbl_len;
1491 immutable->gid_tbl_len = attr.gid_tbl_len;
1496 static int efa_dealloc_uar(struct efa_dev *dev, u16 uarn)
1498 struct efa_com_dealloc_uar_params params = {
1502 return efa_com_dealloc_uar(&dev->edev, ¶ms);
1505 int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata)
1507 struct efa_ucontext *ucontext = to_eucontext(ibucontext);
1508 struct efa_dev *dev = to_edev(ibucontext->device);
1509 struct efa_ibv_alloc_ucontext_resp resp = {};
1510 struct efa_com_alloc_uar_result result;
1514 * it's fine if the driver does not know all request fields,
1515 * we will ack input fields in our response.
1518 err = efa_com_alloc_uar(&dev->edev, &result);
1522 ucontext->uarn = result.uarn;
1523 xa_init(&ucontext->mmap_xa);
1525 resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE;
1526 resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_CREATE_AH;
1527 resp.sub_cqs_per_cq = dev->dev_attr.sub_cqs_per_cq;
1528 resp.inline_buf_size = dev->dev_attr.inline_buf_size;
1529 resp.max_llq_size = dev->dev_attr.max_llq_size;
1531 if (udata && udata->outlen) {
1532 err = ib_copy_to_udata(udata, &resp,
1533 min(sizeof(resp), udata->outlen));
1535 goto err_dealloc_uar;
1541 efa_dealloc_uar(dev, result.uarn);
1543 atomic64_inc(&dev->stats.sw_stats.alloc_ucontext_err);
1547 void efa_dealloc_ucontext(struct ib_ucontext *ibucontext)
1549 struct efa_ucontext *ucontext = to_eucontext(ibucontext);
1550 struct efa_dev *dev = to_edev(ibucontext->device);
1552 mmap_entries_remove_free(dev, ucontext);
1553 efa_dealloc_uar(dev, ucontext->uarn);
1556 static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext,
1557 struct vm_area_struct *vma, u64 key, u64 length)
1559 struct efa_mmap_entry *entry;
1564 entry = mmap_entry_get(dev, ucontext, key, length);
1566 ibdev_dbg(&dev->ibdev, "key[%#llx] does not have valid entry\n",
1571 ibdev_dbg(&dev->ibdev,
1572 "Mapping address[%#llx], length[%#llx], mmap_flag[%d]\n",
1573 entry->address, length, entry->mmap_flag);
1575 pfn = entry->address >> PAGE_SHIFT;
1576 switch (entry->mmap_flag) {
1577 case EFA_MMAP_IO_NC:
1578 err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length,
1579 pgprot_noncached(vma->vm_page_prot));
1581 case EFA_MMAP_IO_WC:
1582 err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length,
1583 pgprot_writecombine(vma->vm_page_prot));
1585 case EFA_MMAP_DMA_PAGE:
1586 for (va = vma->vm_start; va < vma->vm_end;
1587 va += PAGE_SIZE, pfn++) {
1588 err = vm_insert_page(vma, va, pfn_to_page(pfn));
1600 "Couldn't mmap address[%#llx] length[%#llx] mmap_flag[%d] err[%d]\n",
1601 entry->address, length, entry->mmap_flag, err);
1606 int efa_mmap(struct ib_ucontext *ibucontext,
1607 struct vm_area_struct *vma)
1609 struct efa_ucontext *ucontext = to_eucontext(ibucontext);
1610 struct efa_dev *dev = to_edev(ibucontext->device);
1611 u64 length = vma->vm_end - vma->vm_start;
1612 u64 key = vma->vm_pgoff << PAGE_SHIFT;
1614 ibdev_dbg(&dev->ibdev,
1615 "start %#lx, end %#lx, length = %#llx, key = %#llx\n",
1616 vma->vm_start, vma->vm_end, length, key);
1618 if (length % PAGE_SIZE != 0 || !(vma->vm_flags & VM_SHARED)) {
1619 ibdev_dbg(&dev->ibdev,
1620 "length[%#llx] is not page size aligned[%#lx] or VM_SHARED is not set [%#lx]\n",
1621 length, PAGE_SIZE, vma->vm_flags);
1625 if (vma->vm_flags & VM_EXEC) {
1626 ibdev_dbg(&dev->ibdev, "Mapping executable pages is not permitted\n");
1629 vma->vm_flags &= ~VM_MAYEXEC;
1631 return __efa_mmap(dev, ucontext, vma, key, length);
1634 static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah)
1636 struct efa_com_destroy_ah_params params = {
1638 .pdn = to_epd(ah->ibah.pd)->pdn,
1641 return efa_com_destroy_ah(&dev->edev, ¶ms);
1644 int efa_create_ah(struct ib_ah *ibah,
1645 struct rdma_ah_attr *ah_attr,
1647 struct ib_udata *udata)
1649 struct efa_dev *dev = to_edev(ibah->device);
1650 struct efa_com_create_ah_params params = {};
1651 struct efa_ibv_create_ah_resp resp = {};
1652 struct efa_com_create_ah_result result;
1653 struct efa_ah *ah = to_eah(ibah);
1656 if (!(flags & RDMA_CREATE_AH_SLEEPABLE)) {
1657 ibdev_dbg(&dev->ibdev,
1658 "Create address handle is not supported in atomic context\n");
1664 !ib_is_udata_cleared(udata, 0, udata->inlen)) {
1665 ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n");
1670 memcpy(params.dest_addr, ah_attr->grh.dgid.raw,
1671 sizeof(params.dest_addr));
1672 params.pdn = to_epd(ibah->pd)->pdn;
1673 err = efa_com_create_ah(&dev->edev, ¶ms, &result);
1677 memcpy(ah->id, ah_attr->grh.dgid.raw, sizeof(ah->id));
1680 resp.efa_address_handle = result.ah;
1682 if (udata->outlen) {
1683 err = ib_copy_to_udata(udata, &resp,
1684 min(sizeof(resp), udata->outlen));
1686 ibdev_dbg(&dev->ibdev,
1687 "Failed to copy udata for create_ah response\n");
1688 goto err_destroy_ah;
1691 ibdev_dbg(&dev->ibdev, "Created ah[%d]\n", ah->ah);
1696 efa_ah_destroy(dev, ah);
1698 atomic64_inc(&dev->stats.sw_stats.create_ah_err);
1702 void efa_destroy_ah(struct ib_ah *ibah, u32 flags)
1704 struct efa_dev *dev = to_edev(ibah->pd->device);
1705 struct efa_ah *ah = to_eah(ibah);
1707 ibdev_dbg(&dev->ibdev, "Destroy ah[%d]\n", ah->ah);
1709 if (!(flags & RDMA_DESTROY_AH_SLEEPABLE)) {
1710 ibdev_dbg(&dev->ibdev,
1711 "Destroy address handle is not supported in atomic context\n");
1715 efa_ah_destroy(dev, ah);
1718 enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev,
1721 return IB_LINK_LAYER_UNSPECIFIED;