1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
10 /* Return a random 8 bit key value that is
11 * different than the last_key. Set last_key to -1
12 * if this is the first key for an MR or MW
14 u8 rxe_get_next_key(u32 last_key)
19 get_random_bytes(&key, 1);
20 } while (key == last_key);
25 int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
32 if (iova < mr->iova || length > mr->length ||
33 iova > mr->iova + mr->length - length)
42 #define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \
43 | IB_ACCESS_REMOTE_WRITE \
44 | IB_ACCESS_REMOTE_ATOMIC)
46 static void rxe_mr_init(int access, struct rxe_mr *mr)
48 u32 lkey = mr->pelem.index << 8 | rxe_get_next_key(-1);
49 u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
53 mr->state = RXE_MR_STATE_INVALID;
54 mr->type = RXE_MR_TYPE_NONE;
55 mr->map_shift = ilog2(RXE_BUF_PER_MAP);
58 static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
62 struct rxe_map **map = mr->map;
64 num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
66 mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL);
70 for (i = 0; i < num_map; i++) {
71 mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL);
76 BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP));
78 mr->map_shift = ilog2(RXE_BUF_PER_MAP);
79 mr->map_mask = RXE_BUF_PER_MAP - 1;
81 mr->num_buf = num_buf;
82 mr->num_map = num_map;
83 mr->max_buf = num_map * RXE_BUF_PER_MAP;
88 for (i--; i >= 0; i--)
96 void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr)
98 rxe_mr_init(access, mr);
100 mr->ibmr.pd = &pd->ibpd;
102 mr->state = RXE_MR_STATE_VALID;
103 mr->type = RXE_MR_TYPE_DMA;
106 int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
107 int access, struct rxe_mr *mr)
109 struct rxe_map **map;
110 struct rxe_phys_buf *buf = NULL;
111 struct ib_umem *umem;
112 struct sg_page_iter sg_iter;
118 umem = ib_umem_get(pd->ibpd.device, start, length, access);
120 pr_warn("%s: Unable to pin memory region err = %d\n",
121 __func__, (int)PTR_ERR(umem));
127 num_buf = ib_umem_num_pages(umem);
129 rxe_mr_init(access, mr);
131 err = rxe_mr_alloc(mr, num_buf);
133 pr_warn("%s: Unable to allocate memory for map\n",
135 goto err_release_umem;
138 mr->page_shift = PAGE_SHIFT;
139 mr->page_mask = PAGE_SIZE - 1;
146 for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
147 if (num_buf >= RXE_BUF_PER_MAP) {
153 vaddr = page_address(sg_page_iter_page(&sg_iter));
155 pr_warn("%s: Unable to get virtual address\n",
158 goto err_cleanup_map;
161 buf->addr = (uintptr_t)vaddr;
162 buf->size = PAGE_SIZE;
169 mr->ibmr.pd = &pd->ibpd;
175 mr->offset = ib_umem_offset(umem);
176 mr->state = RXE_MR_STATE_VALID;
177 mr->type = RXE_MR_TYPE_MR;
182 for (i = 0; i < mr->num_map; i++)
186 ib_umem_release(umem);
191 int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr)
197 /* In fastreg, we also set the rkey */
198 mr->ibmr.rkey = mr->ibmr.lkey;
200 err = rxe_mr_alloc(mr, max_pages);
204 mr->ibmr.pd = &pd->ibpd;
205 mr->max_buf = max_pages;
206 mr->state = RXE_MR_STATE_FREE;
207 mr->type = RXE_MR_TYPE_MR;
215 static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
218 size_t offset = iova - mr->iova + mr->offset;
223 if (likely(mr->page_shift)) {
224 *offset_out = offset & mr->page_mask;
225 offset >>= mr->page_shift;
226 *n_out = offset & mr->map_mask;
227 *m_out = offset >> mr->map_shift;
232 length = mr->map[map_index]->buf[buf_index].size;
234 while (offset >= length) {
238 if (buf_index == RXE_BUF_PER_MAP) {
242 length = mr->map[map_index]->buf[buf_index].size;
247 *offset_out = offset;
251 void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
257 if (mr->state != RXE_MR_STATE_VALID) {
258 pr_warn("mr not in valid state\n");
264 addr = (void *)(uintptr_t)iova;
268 if (mr_check_range(mr, iova, length)) {
269 pr_warn("range violation\n");
274 lookup_iova(mr, iova, &m, &n, &offset);
276 if (offset + length > mr->map[m]->buf[n].size) {
277 pr_warn("crosses page boundary\n");
282 addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset;
288 /* copy data from a range (vaddr, vaddr+length-1) to or from
289 * a mr object starting at iova. Compute incremental value of
290 * crc32 if crcp is not zero. caller must hold a reference to mr
292 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
293 enum rxe_mr_copy_dir dir, u32 *crcp)
298 struct rxe_map **map;
299 struct rxe_phys_buf *buf;
303 u32 crc = crcp ? (*crcp) : 0;
308 if (mr->type == RXE_MR_TYPE_DMA) {
311 src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova);
313 dest = (dir == RXE_TO_MR_OBJ) ? ((void *)(uintptr_t)iova) : addr;
315 memcpy(dest, src, length);
318 *crcp = rxe_crc32(to_rdev(mr->ibmr.device), *crcp, dest,
324 WARN_ON_ONCE(!mr->map);
326 err = mr_check_range(mr, iova, length);
332 lookup_iova(mr, iova, &m, &i, &offset);
335 buf = map[0]->buf + i;
340 va = (u8 *)(uintptr_t)buf->addr + offset;
341 src = (dir == RXE_TO_MR_OBJ) ? addr : va;
342 dest = (dir == RXE_TO_MR_OBJ) ? va : addr;
344 bytes = buf->size - offset;
349 memcpy(dest, src, bytes);
352 crc = rxe_crc32(to_rdev(mr->ibmr.device), crc, dest,
362 if (i == RXE_BUF_PER_MAP) {
378 /* copy data in or out of a wqe, i.e. sg list
379 * under the control of a dma descriptor
384 struct rxe_dma_info *dma,
387 enum rxe_mr_copy_dir dir,
391 struct rxe_sge *sge = &dma->sge[dma->cur_sge];
392 int offset = dma->sge_offset;
393 int resid = dma->resid;
394 struct rxe_mr *mr = NULL;
401 if (length > resid) {
406 if (sge->length && (offset < sge->length)) {
407 mr = lookup_mr(pd, access, sge->lkey, RXE_LOOKUP_LOCAL);
417 if (offset >= sge->length) {
426 if (dma->cur_sge >= dma->num_sge) {
432 mr = lookup_mr(pd, access, sge->lkey,
443 if (bytes > sge->length - offset)
444 bytes = sge->length - offset;
447 iova = sge->addr + offset;
449 err = rxe_mr_copy(mr, iova, addr, bytes, dir, crcp);
460 dma->sge_offset = offset;
475 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
477 struct rxe_sge *sge = &dma->sge[dma->cur_sge];
478 int offset = dma->sge_offset;
479 int resid = dma->resid;
484 if (offset >= sge->length) {
488 if (dma->cur_sge >= dma->num_sge)
494 if (bytes > sge->length - offset)
495 bytes = sge->length - offset;
502 dma->sge_offset = offset;
508 /* (1) find the mr corresponding to lkey/rkey
509 * depending on lookup_type
510 * (2) verify that the (qp) pd matches the mr pd
511 * (3) verify that the mr can support the requested access
512 * (4) verify that mr state is valid
514 struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
515 enum rxe_mr_lookup_type type)
518 struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
519 int index = key >> 8;
521 mr = rxe_pool_get_index(&rxe->mr_pool, index);
525 if (unlikely((type == RXE_LOOKUP_LOCAL && mr_lkey(mr) != key) ||
526 (type == RXE_LOOKUP_REMOTE && mr_rkey(mr) != key) ||
527 mr_pd(mr) != pd || (access && !(access & mr->access)) ||
528 mr->state != RXE_MR_STATE_VALID)) {
536 int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey)
538 struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
542 mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
544 pr_err("%s: No MR for rkey %#x\n", __func__, rkey);
549 if (rkey != mr->ibmr.rkey) {
550 pr_err("%s: rkey (%#x) doesn't match mr->ibmr.rkey (%#x)\n",
551 __func__, rkey, mr->ibmr.rkey);
556 if (atomic_read(&mr->num_mw) > 0) {
557 pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n",
563 mr->state = RXE_MR_STATE_FREE;
572 int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
574 struct rxe_mr *mr = to_rmr(ibmr);
576 if (atomic_read(&mr->num_mw) > 0) {
577 pr_warn("%s: Attempt to deregister an MR while bound to MWs\n",
582 mr->state = RXE_MR_STATE_ZOMBIE;
583 rxe_drop_ref(mr_pd(mr));
590 void rxe_mr_cleanup(struct rxe_pool_entry *arg)
592 struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem);
595 ib_umem_release(mr->umem);
598 for (i = 0; i < mr->num_map; i++)