Merge tag 'samsung-soc-5.10' of https://git.kernel.org/pub/scm/linux/kernel/git/krzk...
[linux-2.6-microblaze.git] / drivers / misc / habanalabs / common / memory.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2019 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include <uapi/misc/habanalabs.h>
9 #include "habanalabs.h"
10 #include "../include/hw_ip/mmu/mmu_general.h"
11
12 #include <linux/uaccess.h>
13 #include <linux/slab.h>
14 #include <linux/genalloc.h>
15
16 #define HL_MMU_DEBUG    0
17
18 /*
19  * The va ranges in context object contain a list with the available chunks of
20  * device virtual memory.
21  * There is one range for host allocations and one for DRAM allocations.
22  *
23  * On initialization each range contains one chunk of all of its available
24  * virtual range which is a half of the total device virtual range.
25  *
26  * On each mapping of physical pages, a suitable virtual range chunk (with a
27  * minimum size) is selected from the list. If the chunk size equals the
28  * requested size, the chunk is returned. Otherwise, the chunk is split into
29  * two chunks - one to return as result and a remainder to stay in the list.
30  *
31  * On each Unmapping of a virtual address, the relevant virtual chunk is
32  * returned to the list. The chunk is added to the list and if its edges match
33  * the edges of the adjacent chunks (means a contiguous chunk can be created),
34  * the chunks are merged.
35  *
36  * On finish, the list is checked to have only one chunk of all the relevant
37  * virtual range (which is a half of the device total virtual range).
38  * If not (means not all mappings were unmapped), a warning is printed.
39  */
40
41 /*
42  * alloc_device_memory - allocate device memory
43  *
44  * @ctx                 : current context
45  * @args                : host parameters containing the requested size
46  * @ret_handle          : result handle
47  *
48  * This function does the following:
49  * - Allocate the requested size rounded up to 2MB pages
50  * - Return unique handle
51  */
52 static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
53                                 u32 *ret_handle)
54 {
55         struct hl_device *hdev = ctx->hdev;
56         struct hl_vm *vm = &hdev->vm;
57         struct hl_vm_phys_pg_pack *phys_pg_pack;
58         u64 paddr = 0, total_size, num_pgs, i;
59         u32 num_curr_pgs, page_size, page_shift;
60         int handle, rc;
61         bool contiguous;
62
63         num_curr_pgs = 0;
64         page_size = hdev->asic_prop.dram_page_size;
65         page_shift = __ffs(page_size);
66         num_pgs = (args->alloc.mem_size + (page_size - 1)) >> page_shift;
67         total_size = num_pgs << page_shift;
68
69         if (!total_size) {
70                 dev_err(hdev->dev, "Cannot allocate 0 bytes\n");
71                 return -EINVAL;
72         }
73
74         contiguous = args->flags & HL_MEM_CONTIGUOUS;
75
76         if (contiguous) {
77                 paddr = (u64) gen_pool_alloc(vm->dram_pg_pool, total_size);
78                 if (!paddr) {
79                         dev_err(hdev->dev,
80                                 "failed to allocate %llu huge contiguous pages\n",
81                                 num_pgs);
82                         return -ENOMEM;
83                 }
84         }
85
86         phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
87         if (!phys_pg_pack) {
88                 rc = -ENOMEM;
89                 goto pages_pack_err;
90         }
91
92         phys_pg_pack->vm_type = VM_TYPE_PHYS_PACK;
93         phys_pg_pack->asid = ctx->asid;
94         phys_pg_pack->npages = num_pgs;
95         phys_pg_pack->page_size = page_size;
96         phys_pg_pack->total_size = total_size;
97         phys_pg_pack->flags = args->flags;
98         phys_pg_pack->contiguous = contiguous;
99
100         phys_pg_pack->pages = kvmalloc_array(num_pgs, sizeof(u64), GFP_KERNEL);
101         if (ZERO_OR_NULL_PTR(phys_pg_pack->pages)) {
102                 rc = -ENOMEM;
103                 goto pages_arr_err;
104         }
105
106         if (phys_pg_pack->contiguous) {
107                 for (i = 0 ; i < num_pgs ; i++)
108                         phys_pg_pack->pages[i] = paddr + i * page_size;
109         } else {
110                 for (i = 0 ; i < num_pgs ; i++) {
111                         phys_pg_pack->pages[i] = (u64) gen_pool_alloc(
112                                                         vm->dram_pg_pool,
113                                                         page_size);
114                         if (!phys_pg_pack->pages[i]) {
115                                 dev_err(hdev->dev,
116                                         "Failed to allocate device memory (out of memory)\n");
117                                 rc = -ENOMEM;
118                                 goto page_err;
119                         }
120
121                         num_curr_pgs++;
122                 }
123         }
124
125         spin_lock(&vm->idr_lock);
126         handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0,
127                                 GFP_ATOMIC);
128         spin_unlock(&vm->idr_lock);
129
130         if (handle < 0) {
131                 dev_err(hdev->dev, "Failed to get handle for page\n");
132                 rc = -EFAULT;
133                 goto idr_err;
134         }
135
136         for (i = 0 ; i < num_pgs ; i++)
137                 kref_get(&vm->dram_pg_pool_refcount);
138
139         phys_pg_pack->handle = handle;
140
141         atomic64_add(phys_pg_pack->total_size, &ctx->dram_phys_mem);
142         atomic64_add(phys_pg_pack->total_size, &hdev->dram_used_mem);
143
144         *ret_handle = handle;
145
146         return 0;
147
148 idr_err:
149 page_err:
150         if (!phys_pg_pack->contiguous)
151                 for (i = 0 ; i < num_curr_pgs ; i++)
152                         gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[i],
153                                         page_size);
154
155         kvfree(phys_pg_pack->pages);
156 pages_arr_err:
157         kfree(phys_pg_pack);
158 pages_pack_err:
159         if (contiguous)
160                 gen_pool_free(vm->dram_pg_pool, paddr, total_size);
161
162         return rc;
163 }
164
165 /*
166  * dma_map_host_va - DMA mapping of the given host virtual address.
167  * @hdev: habanalabs device structure
168  * @addr: the host virtual address of the memory area
169  * @size: the size of the memory area
170  * @p_userptr: pointer to result userptr structure
171  *
172  * This function does the following:
173  * - Allocate userptr structure
174  * - Pin the given host memory using the userptr structure
175  * - Perform DMA mapping to have the DMA addresses of the pages
176  */
177 static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size,
178                                 struct hl_userptr **p_userptr)
179 {
180         struct hl_userptr *userptr;
181         int rc;
182
183         userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
184         if (!userptr) {
185                 rc = -ENOMEM;
186                 goto userptr_err;
187         }
188
189         rc = hl_pin_host_memory(hdev, addr, size, userptr);
190         if (rc) {
191                 dev_err(hdev->dev, "Failed to pin host memory\n");
192                 goto pin_err;
193         }
194
195         rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
196                                         userptr->sgt->nents, DMA_BIDIRECTIONAL);
197         if (rc) {
198                 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
199                 goto dma_map_err;
200         }
201
202         userptr->dma_mapped = true;
203         userptr->dir = DMA_BIDIRECTIONAL;
204         userptr->vm_type = VM_TYPE_USERPTR;
205
206         *p_userptr = userptr;
207
208         return 0;
209
210 dma_map_err:
211         hl_unpin_host_memory(hdev, userptr);
212 pin_err:
213         kfree(userptr);
214 userptr_err:
215
216         return rc;
217 }
218
219 /*
220  * dma_unmap_host_va - DMA unmapping of the given host virtual address.
221  * @hdev: habanalabs device structure
222  * @userptr: userptr to free
223  *
224  * This function does the following:
225  * - Unpins the physical pages
226  * - Frees the userptr structure
227  */
228 static void dma_unmap_host_va(struct hl_device *hdev,
229                                 struct hl_userptr *userptr)
230 {
231         hl_unpin_host_memory(hdev, userptr);
232         kfree(userptr);
233 }
234
235 /*
236  * dram_pg_pool_do_release - free DRAM pages pool
237  *
238  * @ref                 : pointer to reference object
239  *
240  * This function does the following:
241  * - Frees the idr structure of physical pages handles
242  * - Frees the generic pool of DRAM physical pages
243  */
244 static void dram_pg_pool_do_release(struct kref *ref)
245 {
246         struct hl_vm *vm = container_of(ref, struct hl_vm,
247                         dram_pg_pool_refcount);
248
249         /*
250          * free the idr here as only here we know for sure that there are no
251          * allocated physical pages and hence there are no handles in use
252          */
253         idr_destroy(&vm->phys_pg_pack_handles);
254         gen_pool_destroy(vm->dram_pg_pool);
255 }
256
257 /*
258  * free_phys_pg_pack - free physical page pack
259  * @hdev: habanalabs device structure
260  * @phys_pg_pack: physical page pack to free
261  *
262  * This function does the following:
263  * - For DRAM memory only, iterate over the pack and free each physical block
264  *   structure by returning it to the general pool
265  * - Free the hl_vm_phys_pg_pack structure
266  */
267 static void free_phys_pg_pack(struct hl_device *hdev,
268                                 struct hl_vm_phys_pg_pack *phys_pg_pack)
269 {
270         struct hl_vm *vm = &hdev->vm;
271         u64 i;
272
273         if (!phys_pg_pack->created_from_userptr) {
274                 if (phys_pg_pack->contiguous) {
275                         gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0],
276                                         phys_pg_pack->total_size);
277
278                         for (i = 0; i < phys_pg_pack->npages ; i++)
279                                 kref_put(&vm->dram_pg_pool_refcount,
280                                         dram_pg_pool_do_release);
281                 } else {
282                         for (i = 0 ; i < phys_pg_pack->npages ; i++) {
283                                 gen_pool_free(vm->dram_pg_pool,
284                                                 phys_pg_pack->pages[i],
285                                                 phys_pg_pack->page_size);
286                                 kref_put(&vm->dram_pg_pool_refcount,
287                                         dram_pg_pool_do_release);
288                         }
289                 }
290         }
291
292         kvfree(phys_pg_pack->pages);
293         kfree(phys_pg_pack);
294 }
295
296 /*
297  * free_device_memory - free device memory
298  *
299  * @ctx                  : current context
300  * @handle              : handle of the memory chunk to free
301  *
302  * This function does the following:
303  * - Free the device memory related to the given handle
304  */
305 static int free_device_memory(struct hl_ctx *ctx, u32 handle)
306 {
307         struct hl_device *hdev = ctx->hdev;
308         struct hl_vm *vm = &hdev->vm;
309         struct hl_vm_phys_pg_pack *phys_pg_pack;
310
311         spin_lock(&vm->idr_lock);
312         phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
313         if (phys_pg_pack) {
314                 if (atomic_read(&phys_pg_pack->mapping_cnt) > 0) {
315                         dev_err(hdev->dev, "handle %u is mapped, cannot free\n",
316                                 handle);
317                         spin_unlock(&vm->idr_lock);
318                         return -EINVAL;
319                 }
320
321                 /*
322                  * must remove from idr before the freeing of the physical
323                  * pages as the refcount of the pool is also the trigger of the
324                  * idr destroy
325                  */
326                 idr_remove(&vm->phys_pg_pack_handles, handle);
327                 spin_unlock(&vm->idr_lock);
328
329                 atomic64_sub(phys_pg_pack->total_size, &ctx->dram_phys_mem);
330                 atomic64_sub(phys_pg_pack->total_size, &hdev->dram_used_mem);
331
332                 free_phys_pg_pack(hdev, phys_pg_pack);
333         } else {
334                 spin_unlock(&vm->idr_lock);
335                 dev_err(hdev->dev,
336                         "free device memory failed, no match for handle %u\n",
337                         handle);
338                 return -EINVAL;
339         }
340
341         return 0;
342 }
343
344 /*
345  * clear_va_list_locked - free virtual addresses list
346  *
347  * @hdev                : habanalabs device structure
348  * @va_list             : list of virtual addresses to free
349  *
350  * This function does the following:
351  * - Iterate over the list and free each virtual addresses block
352  *
353  * This function should be called only when va_list lock is taken
354  */
355 static void clear_va_list_locked(struct hl_device *hdev,
356                 struct list_head *va_list)
357 {
358         struct hl_vm_va_block *va_block, *tmp;
359
360         list_for_each_entry_safe(va_block, tmp, va_list, node) {
361                 list_del(&va_block->node);
362                 kfree(va_block);
363         }
364 }
365
366 /*
367  * print_va_list_locked    - print virtual addresses list
368  *
369  * @hdev                : habanalabs device structure
370  * @va_list             : list of virtual addresses to print
371  *
372  * This function does the following:
373  * - Iterate over the list and print each virtual addresses block
374  *
375  * This function should be called only when va_list lock is taken
376  */
377 static void print_va_list_locked(struct hl_device *hdev,
378                 struct list_head *va_list)
379 {
380 #if HL_MMU_DEBUG
381         struct hl_vm_va_block *va_block;
382
383         dev_dbg(hdev->dev, "print va list:\n");
384
385         list_for_each_entry(va_block, va_list, node)
386                 dev_dbg(hdev->dev,
387                         "va block, start: 0x%llx, end: 0x%llx, size: %llu\n",
388                         va_block->start, va_block->end, va_block->size);
389 #endif
390 }
391
392 /*
393  * merge_va_blocks_locked - merge a virtual block if possible
394  *
395  * @hdev                : pointer to the habanalabs device structure
396  * @va_list             : pointer to the virtual addresses block list
397  * @va_block            : virtual block to merge with adjacent blocks
398  *
399  * This function does the following:
400  * - Merge the given blocks with the adjacent blocks if their virtual ranges
401  *   create a contiguous virtual range
402  *
403  * This Function should be called only when va_list lock is taken
404  */
405 static void merge_va_blocks_locked(struct hl_device *hdev,
406                 struct list_head *va_list, struct hl_vm_va_block *va_block)
407 {
408         struct hl_vm_va_block *prev, *next;
409
410         prev = list_prev_entry(va_block, node);
411         if (&prev->node != va_list && prev->end + 1 == va_block->start) {
412                 prev->end = va_block->end;
413                 prev->size = prev->end - prev->start;
414                 list_del(&va_block->node);
415                 kfree(va_block);
416                 va_block = prev;
417         }
418
419         next = list_next_entry(va_block, node);
420         if (&next->node != va_list && va_block->end + 1 == next->start) {
421                 next->start = va_block->start;
422                 next->size = next->end - next->start;
423                 list_del(&va_block->node);
424                 kfree(va_block);
425         }
426 }
427
428 /*
429  * add_va_block_locked - add a virtual block to the virtual addresses list
430  *
431  * @hdev                : pointer to the habanalabs device structure
432  * @va_list             : pointer to the virtual addresses block list
433  * @start               : start virtual address
434  * @end                 : end virtual address
435  *
436  * This function does the following:
437  * - Add the given block to the virtual blocks list and merge with other
438  * blocks if a contiguous virtual block can be created
439  *
440  * This Function should be called only when va_list lock is taken
441  */
442 static int add_va_block_locked(struct hl_device *hdev,
443                 struct list_head *va_list, u64 start, u64 end)
444 {
445         struct hl_vm_va_block *va_block, *res = NULL;
446         u64 size = end - start;
447
448         print_va_list_locked(hdev, va_list);
449
450         list_for_each_entry(va_block, va_list, node) {
451                 /* TODO: remove upon matureness */
452                 if (hl_mem_area_crosses_range(start, size, va_block->start,
453                                 va_block->end)) {
454                         dev_err(hdev->dev,
455                                 "block crossing ranges at start 0x%llx, end 0x%llx\n",
456                                 va_block->start, va_block->end);
457                         return -EINVAL;
458                 }
459
460                 if (va_block->end < start)
461                         res = va_block;
462         }
463
464         va_block = kmalloc(sizeof(*va_block), GFP_KERNEL);
465         if (!va_block)
466                 return -ENOMEM;
467
468         va_block->start = start;
469         va_block->end = end;
470         va_block->size = size;
471
472         if (!res)
473                 list_add(&va_block->node, va_list);
474         else
475                 list_add(&va_block->node, &res->node);
476
477         merge_va_blocks_locked(hdev, va_list, va_block);
478
479         print_va_list_locked(hdev, va_list);
480
481         return 0;
482 }
483
484 /*
485  * add_va_block - wrapper for add_va_block_locked
486  *
487  * @hdev                : pointer to the habanalabs device structure
488  * @va_list             : pointer to the virtual addresses block list
489  * @start               : start virtual address
490  * @end                 : end virtual address
491  *
492  * This function does the following:
493  * - Takes the list lock and calls add_va_block_locked
494  */
495 static inline int add_va_block(struct hl_device *hdev,
496                 struct hl_va_range *va_range, u64 start, u64 end)
497 {
498         int rc;
499
500         mutex_lock(&va_range->lock);
501         rc = add_va_block_locked(hdev, &va_range->list, start, end);
502         mutex_unlock(&va_range->lock);
503
504         return rc;
505 }
506
507 /*
508  * get_va_block - get a virtual block with the requested size
509  *
510  * @hdev            : pointer to the habanalabs device structure
511  * @va_range        : pointer to the virtual addresses range
512  * @size            : requested block size
513  * @hint_addr       : hint for request address by the user
514  * @is_userptr      : is host or DRAM memory
515  *
516  * This function does the following:
517  * - Iterate on the virtual block list to find a suitable virtual block for the
518  *   requested size
519  * - Reserve the requested block and update the list
520  * - Return the start address of the virtual block
521  */
522 static u64 get_va_block(struct hl_device *hdev,
523                         struct hl_va_range *va_range, u64 size, u64 hint_addr,
524                         bool is_userptr)
525 {
526         struct hl_vm_va_block *va_block, *new_va_block = NULL;
527         u64 valid_start, valid_size, prev_start, prev_end, page_mask,
528                 res_valid_start = 0, res_valid_size = 0;
529         u32 page_size;
530         bool add_prev = false;
531
532         if (is_userptr)
533                 /*
534                  * We cannot know if the user allocated memory with huge pages
535                  * or not, hence we continue with the biggest possible
536                  * granularity.
537                  */
538                 page_size = hdev->asic_prop.pmmu_huge.page_size;
539         else
540                 page_size = hdev->asic_prop.dmmu.page_size;
541
542         page_mask = ~((u64)page_size - 1);
543
544         mutex_lock(&va_range->lock);
545
546         print_va_list_locked(hdev, &va_range->list);
547
548         list_for_each_entry(va_block, &va_range->list, node) {
549                 /* calc the first possible aligned addr */
550                 valid_start = va_block->start;
551
552                 if (valid_start & (page_size - 1)) {
553                         valid_start &= page_mask;
554                         valid_start += page_size;
555                         if (valid_start > va_block->end)
556                                 continue;
557                 }
558
559                 valid_size = va_block->end - valid_start;
560
561                 if (valid_size >= size &&
562                         (!new_va_block || valid_size < res_valid_size)) {
563                         new_va_block = va_block;
564                         res_valid_start = valid_start;
565                         res_valid_size = valid_size;
566                 }
567
568                 if (hint_addr && hint_addr >= valid_start &&
569                                 ((hint_addr + size) <= va_block->end)) {
570                         new_va_block = va_block;
571                         res_valid_start = hint_addr;
572                         res_valid_size = valid_size;
573                         break;
574                 }
575         }
576
577         if (!new_va_block) {
578                 dev_err(hdev->dev, "no available va block for size %llu\n",
579                                 size);
580                 goto out;
581         }
582
583         if (res_valid_start > new_va_block->start) {
584                 prev_start = new_va_block->start;
585                 prev_end = res_valid_start - 1;
586
587                 new_va_block->start = res_valid_start;
588                 new_va_block->size = res_valid_size;
589
590                 add_prev = true;
591         }
592
593         if (new_va_block->size > size) {
594                 new_va_block->start += size;
595                 new_va_block->size = new_va_block->end - new_va_block->start;
596         } else {
597                 list_del(&new_va_block->node);
598                 kfree(new_va_block);
599         }
600
601         if (add_prev)
602                 add_va_block_locked(hdev, &va_range->list, prev_start,
603                                 prev_end);
604
605         print_va_list_locked(hdev, &va_range->list);
606 out:
607         mutex_unlock(&va_range->lock);
608
609         return res_valid_start;
610 }
611
612 /*
613  * get_sg_info - get number of pages and the DMA address from SG list
614  *
615  * @sg                 : the SG list
616  * @dma_addr           : pointer to DMA address to return
617  *
618  * Calculate the number of consecutive pages described by the SG list. Take the
619  * offset of the address in the first page, add to it the length and round it up
620  * to the number of needed pages.
621  */
622 static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
623 {
624         *dma_addr = sg_dma_address(sg);
625
626         return ((((*dma_addr) & (PAGE_SIZE - 1)) + sg_dma_len(sg)) +
627                         (PAGE_SIZE - 1)) >> PAGE_SHIFT;
628 }
629
630 /*
631  * init_phys_pg_pack_from_userptr - initialize physical page pack from host
632  *                                  memory
633  * @ctx: current context
634  * @userptr: userptr to initialize from
635  * @pphys_pg_pack: result pointer
636  *
637  * This function does the following:
638  * - Pin the physical pages related to the given virtual block
639  * - Create a physical page pack from the physical pages related to the given
640  *   virtual block
641  */
642 static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
643                                 struct hl_userptr *userptr,
644                                 struct hl_vm_phys_pg_pack **pphys_pg_pack)
645 {
646         struct hl_vm_phys_pg_pack *phys_pg_pack;
647         struct scatterlist *sg;
648         dma_addr_t dma_addr;
649         u64 page_mask, total_npages;
650         u32 npages, page_size = PAGE_SIZE,
651                 huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size;
652         bool first = true, is_huge_page_opt = true;
653         int rc, i, j;
654         u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size);
655
656         phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
657         if (!phys_pg_pack)
658                 return -ENOMEM;
659
660         phys_pg_pack->vm_type = userptr->vm_type;
661         phys_pg_pack->created_from_userptr = true;
662         phys_pg_pack->asid = ctx->asid;
663         atomic_set(&phys_pg_pack->mapping_cnt, 1);
664
665         /* Only if all dma_addrs are aligned to 2MB and their
666          * sizes is at least 2MB, we can use huge page mapping.
667          * We limit the 2MB optimization to this condition,
668          * since later on we acquire the related VA range as one
669          * consecutive block.
670          */
671         total_npages = 0;
672         for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
673                 npages = get_sg_info(sg, &dma_addr);
674
675                 total_npages += npages;
676
677                 if ((npages % pgs_in_huge_page) ||
678                                         (dma_addr & (huge_page_size - 1)))
679                         is_huge_page_opt = false;
680         }
681
682         if (is_huge_page_opt) {
683                 page_size = huge_page_size;
684                 do_div(total_npages, pgs_in_huge_page);
685         }
686
687         page_mask = ~(((u64) page_size) - 1);
688
689         phys_pg_pack->pages = kvmalloc_array(total_npages, sizeof(u64),
690                                                 GFP_KERNEL);
691         if (ZERO_OR_NULL_PTR(phys_pg_pack->pages)) {
692                 rc = -ENOMEM;
693                 goto page_pack_arr_mem_err;
694         }
695
696         phys_pg_pack->npages = total_npages;
697         phys_pg_pack->page_size = page_size;
698         phys_pg_pack->total_size = total_npages * page_size;
699
700         j = 0;
701         for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
702                 npages = get_sg_info(sg, &dma_addr);
703
704                 /* align down to physical page size and save the offset */
705                 if (first) {
706                         first = false;
707                         phys_pg_pack->offset = dma_addr & (page_size - 1);
708                         dma_addr &= page_mask;
709                 }
710
711                 while (npages) {
712                         phys_pg_pack->pages[j++] = dma_addr;
713                         dma_addr += page_size;
714
715                         if (is_huge_page_opt)
716                                 npages -= pgs_in_huge_page;
717                         else
718                                 npages--;
719                 }
720         }
721
722         *pphys_pg_pack = phys_pg_pack;
723
724         return 0;
725
726 page_pack_arr_mem_err:
727         kfree(phys_pg_pack);
728
729         return rc;
730 }
731
732 /*
733  * map_phys_pg_pack - maps the physical page pack.
734  * @ctx: current context
735  * @vaddr: start address of the virtual area to map from
736  * @phys_pg_pack: the pack of physical pages to map to
737  *
738  * This function does the following:
739  * - Maps each chunk of virtual memory to matching physical chunk
740  * - Stores number of successful mappings in the given argument
741  * - Returns 0 on success, error code otherwise
742  */
743 static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
744                                 struct hl_vm_phys_pg_pack *phys_pg_pack)
745 {
746         struct hl_device *hdev = ctx->hdev;
747         u64 next_vaddr = vaddr, paddr, mapped_pg_cnt = 0, i;
748         u32 page_size = phys_pg_pack->page_size;
749         int rc = 0;
750
751         for (i = 0 ; i < phys_pg_pack->npages ; i++) {
752                 paddr = phys_pg_pack->pages[i];
753
754                 rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size,
755                                 (i + 1) == phys_pg_pack->npages);
756                 if (rc) {
757                         dev_err(hdev->dev,
758                                 "map failed for handle %u, npages: %llu, mapped: %llu",
759                                 phys_pg_pack->handle, phys_pg_pack->npages,
760                                 mapped_pg_cnt);
761                         goto err;
762                 }
763
764                 mapped_pg_cnt++;
765                 next_vaddr += page_size;
766         }
767
768         return 0;
769
770 err:
771         next_vaddr = vaddr;
772         for (i = 0 ; i < mapped_pg_cnt ; i++) {
773                 if (hl_mmu_unmap(ctx, next_vaddr, page_size,
774                                         (i + 1) == mapped_pg_cnt))
775                         dev_warn_ratelimited(hdev->dev,
776                                 "failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n",
777                                         phys_pg_pack->handle, next_vaddr,
778                                         phys_pg_pack->pages[i], page_size);
779
780                 next_vaddr += page_size;
781         }
782
783         return rc;
784 }
785
786 /*
787  * unmap_phys_pg_pack - unmaps the physical page pack
788  * @ctx: current context
789  * @vaddr: start address of the virtual area to unmap
790  * @phys_pg_pack: the pack of physical pages to unmap
791  */
792 static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
793                                 struct hl_vm_phys_pg_pack *phys_pg_pack)
794 {
795         struct hl_device *hdev = ctx->hdev;
796         u64 next_vaddr, i;
797         u32 page_size;
798
799         page_size = phys_pg_pack->page_size;
800         next_vaddr = vaddr;
801
802         for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) {
803                 if (hl_mmu_unmap(ctx, next_vaddr, page_size,
804                                        (i + 1) == phys_pg_pack->npages))
805                         dev_warn_ratelimited(hdev->dev,
806                         "unmap failed for vaddr: 0x%llx\n", next_vaddr);
807
808                 /*
809                  * unmapping on Palladium can be really long, so avoid a CPU
810                  * soft lockup bug by sleeping a little between unmapping pages
811                  */
812                 if (hdev->pldm)
813                         usleep_range(500, 1000);
814         }
815 }
816
817 static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args,
818                                 u64 *paddr)
819 {
820         struct hl_device *hdev = ctx->hdev;
821         struct hl_vm *vm = &hdev->vm;
822         struct hl_vm_phys_pg_pack *phys_pg_pack;
823         u32 handle;
824
825         handle = lower_32_bits(args->map_device.handle);
826         spin_lock(&vm->idr_lock);
827         phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
828         if (!phys_pg_pack) {
829                 spin_unlock(&vm->idr_lock);
830                 dev_err(hdev->dev, "no match for handle %u\n", handle);
831                 return -EINVAL;
832         }
833
834         *paddr = phys_pg_pack->pages[0];
835
836         spin_unlock(&vm->idr_lock);
837
838         return 0;
839 }
840
841 /*
842  * map_device_va - map the given memory
843  *
844  * @ctx          : current context
845  * @args         : host parameters with handle/host virtual address
846  * @device_addr  : pointer to result device virtual address
847  *
848  * This function does the following:
849  * - If given a physical device memory handle, map to a device virtual block
850  *   and return the start address of this block
851  * - If given a host virtual address and size, find the related physical pages,
852  *   map a device virtual block to this pages and return the start address of
853  *   this block
854  */
855 static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
856                 u64 *device_addr)
857 {
858         struct hl_device *hdev = ctx->hdev;
859         struct hl_vm *vm = &hdev->vm;
860         struct hl_vm_phys_pg_pack *phys_pg_pack;
861         struct hl_userptr *userptr = NULL;
862         struct hl_vm_hash_node *hnode;
863         struct hl_va_range *va_range;
864         enum vm_type_t *vm_type;
865         u64 ret_vaddr, hint_addr;
866         u32 handle = 0;
867         int rc;
868         bool is_userptr = args->flags & HL_MEM_USERPTR;
869
870         /* Assume failure */
871         *device_addr = 0;
872
873         if (is_userptr) {
874                 u64 addr = args->map_host.host_virt_addr,
875                         size = args->map_host.mem_size;
876
877                 rc = dma_map_host_va(hdev, addr, size, &userptr);
878                 if (rc) {
879                         dev_err(hdev->dev, "failed to get userptr from va\n");
880                         return rc;
881                 }
882
883                 rc = init_phys_pg_pack_from_userptr(ctx, userptr,
884                                 &phys_pg_pack);
885                 if (rc) {
886                         dev_err(hdev->dev,
887                                 "unable to init page pack for vaddr 0x%llx\n",
888                                 addr);
889                         goto init_page_pack_err;
890                 }
891
892                 vm_type = (enum vm_type_t *) userptr;
893                 hint_addr = args->map_host.hint_addr;
894                 handle = phys_pg_pack->handle;
895         } else {
896                 handle = lower_32_bits(args->map_device.handle);
897
898                 spin_lock(&vm->idr_lock);
899                 phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
900                 if (!phys_pg_pack) {
901                         spin_unlock(&vm->idr_lock);
902                         dev_err(hdev->dev,
903                                 "no match for handle %u\n", handle);
904                         return -EINVAL;
905                 }
906
907                 /* increment now to avoid freeing device memory while mapping */
908                 atomic_inc(&phys_pg_pack->mapping_cnt);
909
910                 spin_unlock(&vm->idr_lock);
911
912                 vm_type = (enum vm_type_t *) phys_pg_pack;
913
914                 hint_addr = args->map_device.hint_addr;
915         }
916
917         /*
918          * relevant for mapping device physical memory only, as host memory is
919          * implicitly shared
920          */
921         if (!is_userptr && !(phys_pg_pack->flags & HL_MEM_SHARED) &&
922                         phys_pg_pack->asid != ctx->asid) {
923                 dev_err(hdev->dev,
924                         "Failed to map memory, handle %u is not shared\n",
925                         handle);
926                 rc = -EPERM;
927                 goto shared_err;
928         }
929
930         hnode = kzalloc(sizeof(*hnode), GFP_KERNEL);
931         if (!hnode) {
932                 rc = -ENOMEM;
933                 goto hnode_err;
934         }
935
936         if (is_userptr)
937                 if (phys_pg_pack->page_size == hdev->asic_prop.pmmu.page_size)
938                         va_range = ctx->host_va_range;
939                 else
940                         va_range = ctx->host_huge_va_range;
941         else
942                 va_range = ctx->dram_va_range;
943
944         ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size,
945                                         hint_addr, is_userptr);
946         if (!ret_vaddr) {
947                 dev_err(hdev->dev, "no available va block for handle %u\n",
948                                 handle);
949                 rc = -ENOMEM;
950                 goto va_block_err;
951         }
952
953         mutex_lock(&ctx->mmu_lock);
954
955         rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack);
956         if (rc) {
957                 mutex_unlock(&ctx->mmu_lock);
958                 dev_err(hdev->dev, "mapping page pack failed for handle %u\n",
959                                 handle);
960                 goto map_err;
961         }
962
963         rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, false, *vm_type);
964
965         mutex_unlock(&ctx->mmu_lock);
966
967         if (rc) {
968                 dev_err(hdev->dev,
969                         "mapping handle %u failed due to MMU cache invalidation\n",
970                         handle);
971                 goto map_err;
972         }
973
974         ret_vaddr += phys_pg_pack->offset;
975
976         hnode->ptr = vm_type;
977         hnode->vaddr = ret_vaddr;
978
979         mutex_lock(&ctx->mem_hash_lock);
980         hash_add(ctx->mem_hash, &hnode->node, ret_vaddr);
981         mutex_unlock(&ctx->mem_hash_lock);
982
983         *device_addr = ret_vaddr;
984
985         if (is_userptr)
986                 free_phys_pg_pack(hdev, phys_pg_pack);
987
988         return 0;
989
990 map_err:
991         if (add_va_block(hdev, va_range, ret_vaddr,
992                                 ret_vaddr + phys_pg_pack->total_size - 1))
993                 dev_warn(hdev->dev,
994                         "release va block failed for handle 0x%x, vaddr: 0x%llx\n",
995                                 handle, ret_vaddr);
996
997 va_block_err:
998         kfree(hnode);
999 hnode_err:
1000 shared_err:
1001         atomic_dec(&phys_pg_pack->mapping_cnt);
1002         if (is_userptr)
1003                 free_phys_pg_pack(hdev, phys_pg_pack);
1004 init_page_pack_err:
1005         if (is_userptr)
1006                 dma_unmap_host_va(hdev, userptr);
1007
1008         return rc;
1009 }
1010
1011 /*
1012  * unmap_device_va      - unmap the given device virtual address
1013  *
1014  * @ctx                 : current context
1015  * @vaddr               : device virtual address to unmap
1016  * @ctx_free            : true if in context free flow, false otherwise.
1017  *
1018  * This function does the following:
1019  * - Unmap the physical pages related to the given virtual address
1020  * - return the device virtual block to the virtual block list
1021  */
1022 static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free)
1023 {
1024         struct hl_device *hdev = ctx->hdev;
1025         struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
1026         struct hl_vm_hash_node *hnode = NULL;
1027         struct hl_userptr *userptr = NULL;
1028         struct hl_va_range *va_range;
1029         enum vm_type_t *vm_type;
1030         bool is_userptr;
1031         int rc = 0;
1032
1033         /* protect from double entrance */
1034         mutex_lock(&ctx->mem_hash_lock);
1035         hash_for_each_possible(ctx->mem_hash, hnode, node, (unsigned long)vaddr)
1036                 if (vaddr == hnode->vaddr)
1037                         break;
1038
1039         if (!hnode) {
1040                 mutex_unlock(&ctx->mem_hash_lock);
1041                 dev_err(hdev->dev,
1042                         "unmap failed, no mem hnode for vaddr 0x%llx\n",
1043                         vaddr);
1044                 return -EINVAL;
1045         }
1046
1047         hash_del(&hnode->node);
1048         mutex_unlock(&ctx->mem_hash_lock);
1049
1050         vm_type = hnode->ptr;
1051
1052         if (*vm_type == VM_TYPE_USERPTR) {
1053                 is_userptr = true;
1054                 userptr = hnode->ptr;
1055                 rc = init_phys_pg_pack_from_userptr(ctx, userptr,
1056                                                         &phys_pg_pack);
1057                 if (rc) {
1058                         dev_err(hdev->dev,
1059                                 "unable to init page pack for vaddr 0x%llx\n",
1060                                 vaddr);
1061                         goto vm_type_err;
1062                 }
1063
1064                 if (phys_pg_pack->page_size ==
1065                                         hdev->asic_prop.pmmu.page_size)
1066                         va_range = ctx->host_va_range;
1067                 else
1068                         va_range = ctx->host_huge_va_range;
1069         } else if (*vm_type == VM_TYPE_PHYS_PACK) {
1070                 is_userptr = false;
1071                 va_range = ctx->dram_va_range;
1072                 phys_pg_pack = hnode->ptr;
1073         } else {
1074                 dev_warn(hdev->dev,
1075                         "unmap failed, unknown vm desc for vaddr 0x%llx\n",
1076                                 vaddr);
1077                 rc = -EFAULT;
1078                 goto vm_type_err;
1079         }
1080
1081         if (atomic_read(&phys_pg_pack->mapping_cnt) == 0) {
1082                 dev_err(hdev->dev, "vaddr 0x%llx is not mapped\n", vaddr);
1083                 rc = -EINVAL;
1084                 goto mapping_cnt_err;
1085         }
1086
1087         vaddr &= ~(((u64) phys_pg_pack->page_size) - 1);
1088
1089         mutex_lock(&ctx->mmu_lock);
1090
1091         unmap_phys_pg_pack(ctx, vaddr, phys_pg_pack);
1092
1093         /*
1094          * During context free this function is called in a loop to clean all
1095          * the context mappings. Hence the cache invalidation can be called once
1096          * at the loop end rather than for each iteration
1097          */
1098         if (!ctx_free)
1099                 rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
1100                                                                 *vm_type);
1101
1102         mutex_unlock(&ctx->mmu_lock);
1103
1104         /*
1105          * If the context is closing we don't need to check for the MMU cache
1106          * invalidation return code and update the VA free list as in this flow
1107          * we invalidate the MMU cache outside of this unmap function and the VA
1108          * free list will be freed anyway.
1109          */
1110         if (!ctx_free) {
1111                 int tmp_rc;
1112
1113                 if (rc)
1114                         dev_err(hdev->dev,
1115                                 "unmapping vaddr 0x%llx failed due to MMU cache invalidation\n",
1116                                 vaddr);
1117
1118                 tmp_rc = add_va_block(hdev, va_range, vaddr,
1119                                         vaddr + phys_pg_pack->total_size - 1);
1120                 if (tmp_rc) {
1121                         dev_warn(hdev->dev,
1122                                         "add va block failed for vaddr: 0x%llx\n",
1123                                         vaddr);
1124                         if (!rc)
1125                                 rc = tmp_rc;
1126                 }
1127         }
1128
1129         atomic_dec(&phys_pg_pack->mapping_cnt);
1130         kfree(hnode);
1131
1132         if (is_userptr) {
1133                 free_phys_pg_pack(hdev, phys_pg_pack);
1134                 dma_unmap_host_va(hdev, userptr);
1135         }
1136
1137         return rc;
1138
1139 mapping_cnt_err:
1140         if (is_userptr)
1141                 free_phys_pg_pack(hdev, phys_pg_pack);
1142 vm_type_err:
1143         mutex_lock(&ctx->mem_hash_lock);
1144         hash_add(ctx->mem_hash, &hnode->node, vaddr);
1145         mutex_unlock(&ctx->mem_hash_lock);
1146
1147         return rc;
1148 }
1149
1150 static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
1151 {
1152         struct hl_device *hdev = hpriv->hdev;
1153         struct hl_ctx *ctx = hpriv->ctx;
1154         u64 device_addr = 0;
1155         u32 handle = 0;
1156         int rc;
1157
1158         switch (args->in.op) {
1159         case HL_MEM_OP_ALLOC:
1160                 if (args->in.alloc.mem_size == 0) {
1161                         dev_err(hdev->dev,
1162                                 "alloc size must be larger than 0\n");
1163                         rc = -EINVAL;
1164                         goto out;
1165                 }
1166
1167                 /* Force contiguous as there are no real MMU
1168                  * translations to overcome physical memory gaps
1169                  */
1170                 args->in.flags |= HL_MEM_CONTIGUOUS;
1171                 rc = alloc_device_memory(ctx, &args->in, &handle);
1172
1173                 memset(args, 0, sizeof(*args));
1174                 args->out.handle = (__u64) handle;
1175                 break;
1176
1177         case HL_MEM_OP_FREE:
1178                 rc = free_device_memory(ctx, args->in.free.handle);
1179                 break;
1180
1181         case HL_MEM_OP_MAP:
1182                 if (args->in.flags & HL_MEM_USERPTR) {
1183                         device_addr = args->in.map_host.host_virt_addr;
1184                         rc = 0;
1185                 } else {
1186                         rc = get_paddr_from_handle(ctx, &args->in,
1187                                         &device_addr);
1188                 }
1189
1190                 memset(args, 0, sizeof(*args));
1191                 args->out.device_virt_addr = device_addr;
1192                 break;
1193
1194         case HL_MEM_OP_UNMAP:
1195                 rc = 0;
1196                 break;
1197
1198         default:
1199                 dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
1200                 rc = -ENOTTY;
1201                 break;
1202         }
1203
1204 out:
1205         return rc;
1206 }
1207
1208 int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
1209 {
1210         union hl_mem_args *args = data;
1211         struct hl_device *hdev = hpriv->hdev;
1212         struct hl_ctx *ctx = hpriv->ctx;
1213         u64 device_addr = 0;
1214         u32 handle = 0;
1215         int rc;
1216
1217         if (hl_device_disabled_or_in_reset(hdev)) {
1218                 dev_warn_ratelimited(hdev->dev,
1219                         "Device is %s. Can't execute MEMORY IOCTL\n",
1220                         atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
1221                 return -EBUSY;
1222         }
1223
1224         if (!hdev->mmu_enable)
1225                 return mem_ioctl_no_mmu(hpriv, args);
1226
1227         switch (args->in.op) {
1228         case HL_MEM_OP_ALLOC:
1229                 if (!hdev->dram_supports_virtual_memory) {
1230                         dev_err(hdev->dev, "DRAM alloc is not supported\n");
1231                         rc = -EINVAL;
1232                         goto out;
1233                 }
1234
1235                 if (args->in.alloc.mem_size == 0) {
1236                         dev_err(hdev->dev,
1237                                 "alloc size must be larger than 0\n");
1238                         rc = -EINVAL;
1239                         goto out;
1240                 }
1241                 rc = alloc_device_memory(ctx, &args->in, &handle);
1242
1243                 memset(args, 0, sizeof(*args));
1244                 args->out.handle = (__u64) handle;
1245                 break;
1246
1247         case HL_MEM_OP_FREE:
1248                 rc = free_device_memory(ctx, args->in.free.handle);
1249                 break;
1250
1251         case HL_MEM_OP_MAP:
1252                 rc = map_device_va(ctx, &args->in, &device_addr);
1253
1254                 memset(args, 0, sizeof(*args));
1255                 args->out.device_virt_addr = device_addr;
1256                 break;
1257
1258         case HL_MEM_OP_UNMAP:
1259                 rc = unmap_device_va(ctx, args->in.unmap.device_virt_addr,
1260                                         false);
1261                 break;
1262
1263         default:
1264                 dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
1265                 rc = -ENOTTY;
1266                 break;
1267         }
1268
1269 out:
1270         return rc;
1271 }
1272
1273 static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size,
1274                                 u32 npages, u64 start, u32 offset,
1275                                 struct hl_userptr *userptr)
1276 {
1277         int rc;
1278
1279         if (!access_ok((void __user *) (uintptr_t) addr, size)) {
1280                 dev_err(hdev->dev, "user pointer is invalid - 0x%llx\n", addr);
1281                 return -EFAULT;
1282         }
1283
1284         userptr->vec = frame_vector_create(npages);
1285         if (!userptr->vec) {
1286                 dev_err(hdev->dev, "Failed to create frame vector\n");
1287                 return -ENOMEM;
1288         }
1289
1290         rc = get_vaddr_frames(start, npages, FOLL_FORCE | FOLL_WRITE,
1291                                 userptr->vec);
1292
1293         if (rc != npages) {
1294                 dev_err(hdev->dev,
1295                         "Failed to map host memory, user ptr probably wrong\n");
1296                 if (rc < 0)
1297                         goto destroy_framevec;
1298                 rc = -EFAULT;
1299                 goto put_framevec;
1300         }
1301
1302         if (frame_vector_to_pages(userptr->vec) < 0) {
1303                 dev_err(hdev->dev,
1304                         "Failed to translate frame vector to pages\n");
1305                 rc = -EFAULT;
1306                 goto put_framevec;
1307         }
1308
1309         rc = sg_alloc_table_from_pages(userptr->sgt,
1310                                         frame_vector_pages(userptr->vec),
1311                                         npages, offset, size, GFP_ATOMIC);
1312         if (rc < 0) {
1313                 dev_err(hdev->dev, "failed to create SG table from pages\n");
1314                 goto put_framevec;
1315         }
1316
1317         return 0;
1318
1319 put_framevec:
1320         put_vaddr_frames(userptr->vec);
1321 destroy_framevec:
1322         frame_vector_destroy(userptr->vec);
1323         return rc;
1324 }
1325
1326 /*
1327  * hl_pin_host_memory - pins a chunk of host memory.
1328  * @hdev: pointer to the habanalabs device structure
1329  * @addr: the host virtual address of the memory area
1330  * @size: the size of the memory area
1331  * @userptr: pointer to hl_userptr structure
1332  *
1333  * This function does the following:
1334  * - Pins the physical pages
1335  * - Create an SG list from those pages
1336  */
1337 int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
1338                                         struct hl_userptr *userptr)
1339 {
1340         u64 start, end;
1341         u32 npages, offset;
1342         int rc;
1343
1344         if (!size) {
1345                 dev_err(hdev->dev, "size to pin is invalid - %llu\n", size);
1346                 return -EINVAL;
1347         }
1348
1349         /*
1350          * If the combination of the address and size requested for this memory
1351          * region causes an integer overflow, return error.
1352          */
1353         if (((addr + size) < addr) ||
1354                         PAGE_ALIGN(addr + size) < (addr + size)) {
1355                 dev_err(hdev->dev,
1356                         "user pointer 0x%llx + %llu causes integer overflow\n",
1357                         addr, size);
1358                 return -EINVAL;
1359         }
1360
1361         /*
1362          * This function can be called also from data path, hence use atomic
1363          * always as it is not a big allocation.
1364          */
1365         userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_ATOMIC);
1366         if (!userptr->sgt)
1367                 return -ENOMEM;
1368
1369         start = addr & PAGE_MASK;
1370         offset = addr & ~PAGE_MASK;
1371         end = PAGE_ALIGN(addr + size);
1372         npages = (end - start) >> PAGE_SHIFT;
1373
1374         userptr->size = size;
1375         userptr->addr = addr;
1376         userptr->dma_mapped = false;
1377         INIT_LIST_HEAD(&userptr->job_node);
1378
1379         rc = get_user_memory(hdev, addr, size, npages, start, offset,
1380                                 userptr);
1381         if (rc) {
1382                 dev_err(hdev->dev,
1383                         "failed to get user memory for address 0x%llx\n",
1384                         addr);
1385                 goto free_sgt;
1386         }
1387
1388         hl_debugfs_add_userptr(hdev, userptr);
1389
1390         return 0;
1391
1392 free_sgt:
1393         kfree(userptr->sgt);
1394         return rc;
1395 }
1396
1397 /*
1398  * hl_unpin_host_memory - unpins a chunk of host memory.
1399  * @hdev: pointer to the habanalabs device structure
1400  * @userptr: pointer to hl_userptr structure
1401  *
1402  * This function does the following:
1403  * - Unpins the physical pages related to the host memory
1404  * - Free the SG list
1405  */
1406 void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
1407 {
1408         struct page **pages;
1409
1410         hl_debugfs_remove_userptr(hdev, userptr);
1411
1412         if (userptr->dma_mapped)
1413                 hdev->asic_funcs->hl_dma_unmap_sg(hdev, userptr->sgt->sgl,
1414                                                         userptr->sgt->nents,
1415                                                         userptr->dir);
1416
1417         pages = frame_vector_pages(userptr->vec);
1418         if (!IS_ERR(pages)) {
1419                 int i;
1420
1421                 for (i = 0; i < frame_vector_count(userptr->vec); i++)
1422                         set_page_dirty_lock(pages[i]);
1423         }
1424         put_vaddr_frames(userptr->vec);
1425         frame_vector_destroy(userptr->vec);
1426
1427         list_del(&userptr->job_node);
1428
1429         sg_free_table(userptr->sgt);
1430         kfree(userptr->sgt);
1431 }
1432
1433 /*
1434  * hl_userptr_delete_list - clear userptr list
1435  *
1436  * @hdev                : pointer to the habanalabs device structure
1437  * @userptr_list        : pointer to the list to clear
1438  *
1439  * This function does the following:
1440  * - Iterates over the list and unpins the host memory and frees the userptr
1441  *   structure.
1442  */
1443 void hl_userptr_delete_list(struct hl_device *hdev,
1444                                 struct list_head *userptr_list)
1445 {
1446         struct hl_userptr *userptr, *tmp;
1447
1448         list_for_each_entry_safe(userptr, tmp, userptr_list, job_node) {
1449                 hl_unpin_host_memory(hdev, userptr);
1450                 kfree(userptr);
1451         }
1452
1453         INIT_LIST_HEAD(userptr_list);
1454 }
1455
1456 /*
1457  * hl_userptr_is_pinned - returns whether the given userptr is pinned
1458  *
1459  * @hdev                : pointer to the habanalabs device structure
1460  * @userptr_list        : pointer to the list to clear
1461  * @userptr             : pointer to userptr to check
1462  *
1463  * This function does the following:
1464  * - Iterates over the list and checks if the given userptr is in it, means is
1465  *   pinned. If so, returns true, otherwise returns false.
1466  */
1467 bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
1468                                 u32 size, struct list_head *userptr_list,
1469                                 struct hl_userptr **userptr)
1470 {
1471         list_for_each_entry((*userptr), userptr_list, job_node) {
1472                 if ((addr == (*userptr)->addr) && (size == (*userptr)->size))
1473                         return true;
1474         }
1475
1476         return false;
1477 }
1478
1479 /*
1480  * va_range_init - initialize virtual addresses range
1481  * @hdev: pointer to the habanalabs device structure
1482  * @va_range: pointer to the range to initialize
1483  * @start: range start address
1484  * @end: range end address
1485  *
1486  * This function does the following:
1487  * - Initializes the virtual addresses list of the given range with the given
1488  *   addresses.
1489  */
1490 static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range,
1491                                 u64 start, u64 end)
1492 {
1493         int rc;
1494
1495         INIT_LIST_HEAD(&va_range->list);
1496
1497         /* PAGE_SIZE alignment */
1498
1499         if (start & (PAGE_SIZE - 1)) {
1500                 start &= PAGE_MASK;
1501                 start += PAGE_SIZE;
1502         }
1503
1504         if (end & (PAGE_SIZE - 1))
1505                 end &= PAGE_MASK;
1506
1507         if (start >= end) {
1508                 dev_err(hdev->dev, "too small vm range for va list\n");
1509                 return -EFAULT;
1510         }
1511
1512         rc = add_va_block(hdev, va_range, start, end);
1513
1514         if (rc) {
1515                 dev_err(hdev->dev, "Failed to init host va list\n");
1516                 return rc;
1517         }
1518
1519         va_range->start_addr = start;
1520         va_range->end_addr = end;
1521
1522         return 0;
1523 }
1524
1525 /*
1526  * va_range_fini() - clear a virtual addresses range
1527  * @hdev: pointer to the habanalabs structure
1528  * va_range: pointer to virtual addresses range
1529  *
1530  * This function does the following:
1531  * - Frees the virtual addresses block list and its lock
1532  */
1533 static void va_range_fini(struct hl_device *hdev,
1534                 struct hl_va_range *va_range)
1535 {
1536         mutex_lock(&va_range->lock);
1537         clear_va_list_locked(hdev, &va_range->list);
1538         mutex_unlock(&va_range->lock);
1539
1540         mutex_destroy(&va_range->lock);
1541         kfree(va_range);
1542 }
1543
1544 /*
1545  * vm_ctx_init_with_ranges() - initialize virtual memory for context
1546  * @ctx: pointer to the habanalabs context structure
1547  * @host_range_start: host virtual addresses range start.
1548  * @host_range_end: host virtual addresses range end.
1549  * @host_huge_range_start: host virtual addresses range start for memory
1550  *                          allocated with huge pages.
1551  * @host_huge_range_end: host virtual addresses range end for memory allocated
1552  *                        with huge pages.
1553  * @dram_range_start: dram virtual addresses range start.
1554  * @dram_range_end: dram virtual addresses range end.
1555  *
1556  * This function initializes the following:
1557  * - MMU for context
1558  * - Virtual address to area descriptor hashtable
1559  * - Virtual block list of available virtual memory
1560  */
1561 static int vm_ctx_init_with_ranges(struct hl_ctx *ctx,
1562                                         u64 host_range_start,
1563                                         u64 host_range_end,
1564                                         u64 host_huge_range_start,
1565                                         u64 host_huge_range_end,
1566                                         u64 dram_range_start,
1567                                         u64 dram_range_end)
1568 {
1569         struct hl_device *hdev = ctx->hdev;
1570         int rc;
1571
1572         ctx->host_va_range = kzalloc(sizeof(*ctx->host_va_range), GFP_KERNEL);
1573         if (!ctx->host_va_range)
1574                 return -ENOMEM;
1575
1576         ctx->host_huge_va_range = kzalloc(sizeof(*ctx->host_huge_va_range),
1577                                                 GFP_KERNEL);
1578         if (!ctx->host_huge_va_range) {
1579                 rc =  -ENOMEM;
1580                 goto host_huge_va_range_err;
1581         }
1582
1583         ctx->dram_va_range = kzalloc(sizeof(*ctx->dram_va_range), GFP_KERNEL);
1584         if (!ctx->dram_va_range) {
1585                 rc = -ENOMEM;
1586                 goto dram_va_range_err;
1587         }
1588
1589         rc = hl_mmu_ctx_init(ctx);
1590         if (rc) {
1591                 dev_err(hdev->dev, "failed to init context %d\n", ctx->asid);
1592                 goto mmu_ctx_err;
1593         }
1594
1595         mutex_init(&ctx->mem_hash_lock);
1596         hash_init(ctx->mem_hash);
1597
1598         mutex_init(&ctx->host_va_range->lock);
1599
1600         rc = va_range_init(hdev, ctx->host_va_range, host_range_start,
1601                                 host_range_end);
1602         if (rc) {
1603                 dev_err(hdev->dev, "failed to init host vm range\n");
1604                 goto host_page_range_err;
1605         }
1606
1607         if (hdev->pmmu_huge_range) {
1608                 mutex_init(&ctx->host_huge_va_range->lock);
1609
1610                 rc = va_range_init(hdev, ctx->host_huge_va_range,
1611                                         host_huge_range_start,
1612                                         host_huge_range_end);
1613                 if (rc) {
1614                         dev_err(hdev->dev,
1615                                 "failed to init host huge vm range\n");
1616                         goto host_hpage_range_err;
1617                 }
1618         } else {
1619                 ctx->host_huge_va_range = ctx->host_va_range;
1620         }
1621
1622         mutex_init(&ctx->dram_va_range->lock);
1623
1624         rc = va_range_init(hdev, ctx->dram_va_range, dram_range_start,
1625                         dram_range_end);
1626         if (rc) {
1627                 dev_err(hdev->dev, "failed to init dram vm range\n");
1628                 goto dram_vm_err;
1629         }
1630
1631         hl_debugfs_add_ctx_mem_hash(hdev, ctx);
1632
1633         return 0;
1634
1635 dram_vm_err:
1636         mutex_destroy(&ctx->dram_va_range->lock);
1637
1638         if (hdev->pmmu_huge_range) {
1639                 mutex_lock(&ctx->host_huge_va_range->lock);
1640                 clear_va_list_locked(hdev, &ctx->host_huge_va_range->list);
1641                 mutex_unlock(&ctx->host_huge_va_range->lock);
1642         }
1643 host_hpage_range_err:
1644         if (hdev->pmmu_huge_range)
1645                 mutex_destroy(&ctx->host_huge_va_range->lock);
1646         mutex_lock(&ctx->host_va_range->lock);
1647         clear_va_list_locked(hdev, &ctx->host_va_range->list);
1648         mutex_unlock(&ctx->host_va_range->lock);
1649 host_page_range_err:
1650         mutex_destroy(&ctx->host_va_range->lock);
1651         mutex_destroy(&ctx->mem_hash_lock);
1652         hl_mmu_ctx_fini(ctx);
1653 mmu_ctx_err:
1654         kfree(ctx->dram_va_range);
1655 dram_va_range_err:
1656         kfree(ctx->host_huge_va_range);
1657 host_huge_va_range_err:
1658         kfree(ctx->host_va_range);
1659
1660         return rc;
1661 }
1662
1663 int hl_vm_ctx_init(struct hl_ctx *ctx)
1664 {
1665         struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
1666         u64 host_range_start, host_range_end, host_huge_range_start,
1667                 host_huge_range_end, dram_range_start, dram_range_end;
1668
1669         atomic64_set(&ctx->dram_phys_mem, 0);
1670
1671         /*
1672          * - If MMU is enabled, init the ranges as usual.
1673          * - If MMU is disabled, in case of host mapping, the returned address
1674          *   is the given one.
1675          *   In case of DRAM mapping, the returned address is the physical
1676          *   address of the memory related to the given handle.
1677          */
1678         if (ctx->hdev->mmu_enable) {
1679                 dram_range_start = prop->dmmu.start_addr;
1680                 dram_range_end = prop->dmmu.end_addr;
1681                 host_range_start = prop->pmmu.start_addr;
1682                 host_range_end = prop->pmmu.end_addr;
1683                 host_huge_range_start = prop->pmmu_huge.start_addr;
1684                 host_huge_range_end = prop->pmmu_huge.end_addr;
1685         } else {
1686                 dram_range_start = prop->dram_user_base_address;
1687                 dram_range_end = prop->dram_end_address;
1688                 host_range_start = prop->dram_user_base_address;
1689                 host_range_end = prop->dram_end_address;
1690                 host_huge_range_start = prop->dram_user_base_address;
1691                 host_huge_range_end = prop->dram_end_address;
1692         }
1693
1694         return vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end,
1695                                         host_huge_range_start,
1696                                         host_huge_range_end,
1697                                         dram_range_start,
1698                                         dram_range_end);
1699 }
1700
1701 /*
1702  * hl_vm_ctx_fini       - virtual memory teardown of context
1703  *
1704  * @ctx                 : pointer to the habanalabs context structure
1705  *
1706  * This function perform teardown the following:
1707  * - Virtual block list of available virtual memory
1708  * - Virtual address to area descriptor hashtable
1709  * - MMU for context
1710  *
1711  * In addition this function does the following:
1712  * - Unmaps the existing hashtable nodes if the hashtable is not empty. The
1713  *   hashtable should be empty as no valid mappings should exist at this
1714  *   point.
1715  * - Frees any existing physical page list from the idr which relates to the
1716  *   current context asid.
1717  * - This function checks the virtual block list for correctness. At this point
1718  *   the list should contain one element which describes the whole virtual
1719  *   memory range of the context. Otherwise, a warning is printed.
1720  */
1721 void hl_vm_ctx_fini(struct hl_ctx *ctx)
1722 {
1723         struct hl_device *hdev = ctx->hdev;
1724         struct hl_vm *vm = &hdev->vm;
1725         struct hl_vm_phys_pg_pack *phys_pg_list;
1726         struct hl_vm_hash_node *hnode;
1727         struct hlist_node *tmp_node;
1728         int i;
1729
1730         hl_debugfs_remove_ctx_mem_hash(hdev, ctx);
1731
1732         /*
1733          * Clearly something went wrong on hard reset so no point in printing
1734          * another side effect error
1735          */
1736         if (!hdev->hard_reset_pending && !hash_empty(ctx->mem_hash))
1737                 dev_notice(hdev->dev,
1738                         "user released device without removing its memory mappings\n");
1739
1740         hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) {
1741                 dev_dbg(hdev->dev,
1742                         "hl_mem_hash_node of vaddr 0x%llx of asid %d is still alive\n",
1743                         hnode->vaddr, ctx->asid);
1744                 unmap_device_va(ctx, hnode->vaddr, true);
1745         }
1746
1747         /* invalidate the cache once after the unmapping loop */
1748         hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
1749         hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_PHYS_PACK);
1750
1751         spin_lock(&vm->idr_lock);
1752         idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i)
1753                 if (phys_pg_list->asid == ctx->asid) {
1754                         dev_dbg(hdev->dev,
1755                                 "page list 0x%px of asid %d is still alive\n",
1756                                 phys_pg_list, ctx->asid);
1757                         atomic64_sub(phys_pg_list->total_size,
1758                                         &hdev->dram_used_mem);
1759                         free_phys_pg_pack(hdev, phys_pg_list);
1760                         idr_remove(&vm->phys_pg_pack_handles, i);
1761                 }
1762         spin_unlock(&vm->idr_lock);
1763
1764         va_range_fini(hdev, ctx->dram_va_range);
1765         if (hdev->pmmu_huge_range)
1766                 va_range_fini(hdev, ctx->host_huge_va_range);
1767         va_range_fini(hdev, ctx->host_va_range);
1768
1769         mutex_destroy(&ctx->mem_hash_lock);
1770         hl_mmu_ctx_fini(ctx);
1771 }
1772
1773 /*
1774  * hl_vm_init           - initialize virtual memory module
1775  *
1776  * @hdev                : pointer to the habanalabs device structure
1777  *
1778  * This function initializes the following:
1779  * - MMU module
1780  * - DRAM physical pages pool of 2MB
1781  * - Idr for device memory allocation handles
1782  */
1783 int hl_vm_init(struct hl_device *hdev)
1784 {
1785         struct asic_fixed_properties *prop = &hdev->asic_prop;
1786         struct hl_vm *vm = &hdev->vm;
1787         int rc;
1788
1789         vm->dram_pg_pool = gen_pool_create(__ffs(prop->dram_page_size), -1);
1790         if (!vm->dram_pg_pool) {
1791                 dev_err(hdev->dev, "Failed to create dram page pool\n");
1792                 return -ENOMEM;
1793         }
1794
1795         kref_init(&vm->dram_pg_pool_refcount);
1796
1797         rc = gen_pool_add(vm->dram_pg_pool, prop->dram_user_base_address,
1798                         prop->dram_end_address - prop->dram_user_base_address,
1799                         -1);
1800
1801         if (rc) {
1802                 dev_err(hdev->dev,
1803                         "Failed to add memory to dram page pool %d\n", rc);
1804                 goto pool_add_err;
1805         }
1806
1807         spin_lock_init(&vm->idr_lock);
1808         idr_init(&vm->phys_pg_pack_handles);
1809
1810         atomic64_set(&hdev->dram_used_mem, 0);
1811
1812         vm->init_done = true;
1813
1814         return 0;
1815
1816 pool_add_err:
1817         gen_pool_destroy(vm->dram_pg_pool);
1818
1819         return rc;
1820 }
1821
1822 /*
1823  * hl_vm_fini           - virtual memory module teardown
1824  *
1825  * @hdev                : pointer to the habanalabs device structure
1826  *
1827  * This function perform teardown to the following:
1828  * - Idr for device memory allocation handles
1829  * - DRAM physical pages pool of 2MB
1830  * - MMU module
1831  */
1832 void hl_vm_fini(struct hl_device *hdev)
1833 {
1834         struct hl_vm *vm = &hdev->vm;
1835
1836         if (!vm->init_done)
1837                 return;
1838
1839         /*
1840          * At this point all the contexts should be freed and hence no DRAM
1841          * memory should be in use. Hence the DRAM pool should be freed here.
1842          */
1843         if (kref_put(&vm->dram_pg_pool_refcount, dram_pg_pool_do_release) != 1)
1844                 dev_warn(hdev->dev, "dram_pg_pool was not destroyed on %s\n",
1845                                 __func__);
1846
1847         vm->init_done = false;
1848 }