soundwire: sysfs: add slave status and device number before probe
[linux-2.6-microblaze.git] / drivers / misc / mic / scif / scif_rma.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Intel MIC Platform Software Stack (MPSS)
4  *
5  * Copyright(c) 2015 Intel Corporation.
6  *
7  * Intel SCIF driver.
8  */
9 #include <linux/intel-iommu.h>
10 #include <linux/pagemap.h>
11 #include <linux/sched/mm.h>
12 #include <linux/sched/signal.h>
13
14 #include "scif_main.h"
15 #include "scif_map.h"
16
17 /* Used to skip ulimit checks for registrations with SCIF_MAP_KERNEL flag */
18 #define SCIF_MAP_ULIMIT 0x40
19
20 bool scif_ulimit_check = 1;
21
22 /**
23  * scif_rma_ep_init:
24  * @ep: end point
25  *
26  * Initialize RMA per EP data structures.
27  */
28 void scif_rma_ep_init(struct scif_endpt *ep)
29 {
30         struct scif_endpt_rma_info *rma = &ep->rma_info;
31
32         mutex_init(&rma->rma_lock);
33         init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN);
34         spin_lock_init(&rma->tc_lock);
35         mutex_init(&rma->mmn_lock);
36         INIT_LIST_HEAD(&rma->reg_list);
37         INIT_LIST_HEAD(&rma->remote_reg_list);
38         atomic_set(&rma->tw_refcount, 0);
39         atomic_set(&rma->tcw_refcount, 0);
40         atomic_set(&rma->tcw_total_pages, 0);
41         atomic_set(&rma->fence_refcount, 0);
42
43         rma->async_list_del = 0;
44         rma->dma_chan = NULL;
45         INIT_LIST_HEAD(&rma->mmn_list);
46         INIT_LIST_HEAD(&rma->vma_list);
47         init_waitqueue_head(&rma->markwq);
48 }
49
50 /**
51  * scif_rma_ep_can_uninit:
52  * @ep: end point
53  *
54  * Returns 1 if an endpoint can be uninitialized and 0 otherwise.
55  */
56 int scif_rma_ep_can_uninit(struct scif_endpt *ep)
57 {
58         int ret = 0;
59
60         mutex_lock(&ep->rma_info.rma_lock);
61         /* Destroy RMA Info only if both lists are empty */
62         if (list_empty(&ep->rma_info.reg_list) &&
63             list_empty(&ep->rma_info.remote_reg_list) &&
64             list_empty(&ep->rma_info.mmn_list) &&
65             !atomic_read(&ep->rma_info.tw_refcount) &&
66             !atomic_read(&ep->rma_info.tcw_refcount) &&
67             !atomic_read(&ep->rma_info.fence_refcount))
68                 ret = 1;
69         mutex_unlock(&ep->rma_info.rma_lock);
70         return ret;
71 }
72
73 /**
74  * scif_create_pinned_pages:
75  * @nr_pages: number of pages in window
76  * @prot: read/write protection
77  *
78  * Allocate and prepare a set of pinned pages.
79  */
80 static struct scif_pinned_pages *
81 scif_create_pinned_pages(int nr_pages, int prot)
82 {
83         struct scif_pinned_pages *pin;
84
85         might_sleep();
86         pin = scif_zalloc(sizeof(*pin));
87         if (!pin)
88                 goto error;
89
90         pin->pages = scif_zalloc(nr_pages * sizeof(*pin->pages));
91         if (!pin->pages)
92                 goto error_free_pinned_pages;
93
94         pin->prot = prot;
95         pin->magic = SCIFEP_MAGIC;
96         return pin;
97
98 error_free_pinned_pages:
99         scif_free(pin, sizeof(*pin));
100 error:
101         return NULL;
102 }
103
104 /**
105  * scif_destroy_pinned_pages:
106  * @pin: A set of pinned pages.
107  *
108  * Deallocate resources for pinned pages.
109  */
110 static int scif_destroy_pinned_pages(struct scif_pinned_pages *pin)
111 {
112         int j;
113         int writeable = pin->prot & SCIF_PROT_WRITE;
114         int kernel = SCIF_MAP_KERNEL & pin->map_flags;
115
116         if (kernel) {
117                 for (j = 0; j < pin->nr_pages; j++) {
118                         if (pin->pages[j] && !kernel) {
119                                 if (writeable)
120                                         set_page_dirty_lock(pin->pages[j]);
121                                 put_page(pin->pages[j]);
122                         }
123                 }
124         } else
125                 unpin_user_pages_dirty_lock(pin->pages, pin->nr_pages,
126                                             writeable);
127         scif_free(pin->pages,
128                   pin->nr_pages * sizeof(*pin->pages));
129         scif_free(pin, sizeof(*pin));
130         return 0;
131 }
132
133 /*
134  * scif_create_window:
135  * @ep: end point
136  * @nr_pages: number of pages
137  * @offset: registration offset
138  * @temp: true if a temporary window is being created
139  *
140  * Allocate and prepare a self registration window.
141  */
142 struct scif_window *scif_create_window(struct scif_endpt *ep, int nr_pages,
143                                        s64 offset, bool temp)
144 {
145         struct scif_window *window;
146
147         might_sleep();
148         window = scif_zalloc(sizeof(*window));
149         if (!window)
150                 goto error;
151
152         window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr));
153         if (!window->dma_addr)
154                 goto error_free_window;
155
156         window->num_pages = scif_zalloc(nr_pages * sizeof(*window->num_pages));
157         if (!window->num_pages)
158                 goto error_free_window;
159
160         window->offset = offset;
161         window->ep = (u64)ep;
162         window->magic = SCIFEP_MAGIC;
163         window->reg_state = OP_IDLE;
164         init_waitqueue_head(&window->regwq);
165         window->unreg_state = OP_IDLE;
166         init_waitqueue_head(&window->unregwq);
167         INIT_LIST_HEAD(&window->list);
168         window->type = SCIF_WINDOW_SELF;
169         window->temp = temp;
170         return window;
171
172 error_free_window:
173         scif_free(window->dma_addr,
174                   nr_pages * sizeof(*window->dma_addr));
175         scif_free(window, sizeof(*window));
176 error:
177         return NULL;
178 }
179
180 /**
181  * scif_destroy_incomplete_window:
182  * @ep: end point
183  * @window: registration window
184  *
185  * Deallocate resources for self window.
186  */
187 static void scif_destroy_incomplete_window(struct scif_endpt *ep,
188                                            struct scif_window *window)
189 {
190         int err;
191         int nr_pages = window->nr_pages;
192         struct scif_allocmsg *alloc = &window->alloc_handle;
193         struct scifmsg msg;
194
195 retry:
196         /* Wait for a SCIF_ALLOC_GNT/REJ message */
197         err = wait_event_timeout(alloc->allocwq,
198                                  alloc->state != OP_IN_PROGRESS,
199                                  SCIF_NODE_ALIVE_TIMEOUT);
200         if (!err && scifdev_alive(ep))
201                 goto retry;
202
203         mutex_lock(&ep->rma_info.rma_lock);
204         if (alloc->state == OP_COMPLETED) {
205                 msg.uop = SCIF_FREE_VIRT;
206                 msg.src = ep->port;
207                 msg.payload[0] = ep->remote_ep;
208                 msg.payload[1] = window->alloc_handle.vaddr;
209                 msg.payload[2] = (u64)window;
210                 msg.payload[3] = SCIF_REGISTER;
211                 _scif_nodeqp_send(ep->remote_dev, &msg);
212         }
213         mutex_unlock(&ep->rma_info.rma_lock);
214
215         scif_free_window_offset(ep, window, window->offset);
216         scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr));
217         scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages));
218         scif_free(window, sizeof(*window));
219 }
220
221 /**
222  * scif_unmap_window:
223  * @remote_dev: SCIF remote device
224  * @window: registration window
225  *
226  * Delete any DMA mappings created for a registered self window
227  */
228 void scif_unmap_window(struct scif_dev *remote_dev, struct scif_window *window)
229 {
230         int j;
231
232         if (scif_is_iommu_enabled() && !scifdev_self(remote_dev)) {
233                 if (window->st) {
234                         dma_unmap_sg(&remote_dev->sdev->dev,
235                                      window->st->sgl, window->st->nents,
236                                      DMA_BIDIRECTIONAL);
237                         sg_free_table(window->st);
238                         kfree(window->st);
239                         window->st = NULL;
240                 }
241         } else {
242                 for (j = 0; j < window->nr_contig_chunks; j++) {
243                         if (window->dma_addr[j]) {
244                                 scif_unmap_single(window->dma_addr[j],
245                                                   remote_dev,
246                                                   window->num_pages[j] <<
247                                                   PAGE_SHIFT);
248                                 window->dma_addr[j] = 0x0;
249                         }
250                 }
251         }
252 }
253
254 static inline struct mm_struct *__scif_acquire_mm(void)
255 {
256         if (scif_ulimit_check)
257                 return get_task_mm(current);
258         return NULL;
259 }
260
261 static inline void __scif_release_mm(struct mm_struct *mm)
262 {
263         if (mm)
264                 mmput(mm);
265 }
266
267 static inline int
268 __scif_dec_pinned_vm_lock(struct mm_struct *mm,
269                           int nr_pages)
270 {
271         if (!mm || !nr_pages || !scif_ulimit_check)
272                 return 0;
273
274         atomic64_sub(nr_pages, &mm->pinned_vm);
275         return 0;
276 }
277
278 static inline int __scif_check_inc_pinned_vm(struct mm_struct *mm,
279                                              int nr_pages)
280 {
281         unsigned long locked, lock_limit;
282
283         if (!mm || !nr_pages || !scif_ulimit_check)
284                 return 0;
285
286         lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
287         locked = atomic64_add_return(nr_pages, &mm->pinned_vm);
288
289         if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
290                 atomic64_sub(nr_pages, &mm->pinned_vm);
291                 dev_err(scif_info.mdev.this_device,
292                         "locked(%lu) > lock_limit(%lu)\n",
293                         locked, lock_limit);
294                 return -ENOMEM;
295         }
296         return 0;
297 }
298
299 /**
300  * scif_destroy_window:
301  * @ep: end point
302  * @window: registration window
303  *
304  * Deallocate resources for self window.
305  */
306 int scif_destroy_window(struct scif_endpt *ep, struct scif_window *window)
307 {
308         int j;
309         struct scif_pinned_pages *pinned_pages = window->pinned_pages;
310         int nr_pages = window->nr_pages;
311
312         might_sleep();
313         if (!window->temp && window->mm) {
314                 __scif_dec_pinned_vm_lock(window->mm, window->nr_pages);
315                 __scif_release_mm(window->mm);
316                 window->mm = NULL;
317         }
318
319         scif_free_window_offset(ep, window, window->offset);
320         scif_unmap_window(ep->remote_dev, window);
321         /*
322          * Decrement references for this set of pinned pages from
323          * this window.
324          */
325         j = atomic_sub_return(1, &pinned_pages->ref_count);
326         if (j < 0)
327                 dev_err(scif_info.mdev.this_device,
328                         "%s %d incorrect ref count %d\n",
329                         __func__, __LINE__, j);
330         /*
331          * If the ref count for pinned_pages is zero then someone
332          * has already called scif_unpin_pages() for it and we should
333          * destroy the page cache.
334          */
335         if (!j)
336                 scif_destroy_pinned_pages(window->pinned_pages);
337         scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr));
338         scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages));
339         window->magic = 0;
340         scif_free(window, sizeof(*window));
341         return 0;
342 }
343
344 /**
345  * scif_create_remote_lookup:
346  * @remote_dev: SCIF remote device
347  * @window: remote window
348  *
349  * Allocate and prepare lookup entries for the remote
350  * end to copy over the physical addresses.
351  * Returns 0 on success and appropriate errno on failure.
352  */
353 static int scif_create_remote_lookup(struct scif_dev *remote_dev,
354                                      struct scif_window *window)
355 {
356         int i, j, err = 0;
357         int nr_pages = window->nr_pages;
358         bool vmalloc_dma_phys, vmalloc_num_pages;
359
360         might_sleep();
361         /* Map window */
362         err = scif_map_single(&window->mapped_offset,
363                               window, remote_dev, sizeof(*window));
364         if (err)
365                 goto error_window;
366
367         /* Compute the number of lookup entries. 21 == 2MB Shift */
368         window->nr_lookup = ALIGN(nr_pages * PAGE_SIZE,
369                                         ((2) * 1024 * 1024)) >> 21;
370
371         window->dma_addr_lookup.lookup =
372                 scif_alloc_coherent(&window->dma_addr_lookup.offset,
373                                     remote_dev, window->nr_lookup *
374                                     sizeof(*window->dma_addr_lookup.lookup),
375                                     GFP_KERNEL | __GFP_ZERO);
376         if (!window->dma_addr_lookup.lookup) {
377                 err = -ENOMEM;
378                 goto error_window;
379         }
380
381         window->num_pages_lookup.lookup =
382                 scif_alloc_coherent(&window->num_pages_lookup.offset,
383                                     remote_dev, window->nr_lookup *
384                                     sizeof(*window->num_pages_lookup.lookup),
385                                     GFP_KERNEL | __GFP_ZERO);
386         if (!window->num_pages_lookup.lookup) {
387                 err = -ENOMEM;
388                 goto error_window;
389         }
390
391         vmalloc_dma_phys = is_vmalloc_addr(&window->dma_addr[0]);
392         vmalloc_num_pages = is_vmalloc_addr(&window->num_pages[0]);
393
394         /* Now map each of the pages containing physical addresses */
395         for (i = 0, j = 0; i < nr_pages; i += SCIF_NR_ADDR_IN_PAGE, j++) {
396                 err = scif_map_page(&window->dma_addr_lookup.lookup[j],
397                                     vmalloc_dma_phys ?
398                                     vmalloc_to_page(&window->dma_addr[i]) :
399                                     virt_to_page(&window->dma_addr[i]),
400                                     remote_dev);
401                 if (err)
402                         goto error_window;
403                 err = scif_map_page(&window->num_pages_lookup.lookup[j],
404                                     vmalloc_num_pages ?
405                                     vmalloc_to_page(&window->num_pages[i]) :
406                                     virt_to_page(&window->num_pages[i]),
407                                     remote_dev);
408                 if (err)
409                         goto error_window;
410         }
411         return 0;
412 error_window:
413         return err;
414 }
415
416 /**
417  * scif_destroy_remote_lookup:
418  * @remote_dev: SCIF remote device
419  * @window: remote window
420  *
421  * Destroy lookup entries used for the remote
422  * end to copy over the physical addresses.
423  */
424 static void scif_destroy_remote_lookup(struct scif_dev *remote_dev,
425                                        struct scif_window *window)
426 {
427         int i, j;
428
429         if (window->nr_lookup) {
430                 struct scif_rma_lookup *lup = &window->dma_addr_lookup;
431                 struct scif_rma_lookup *npup = &window->num_pages_lookup;
432
433                 for (i = 0, j = 0; i < window->nr_pages;
434                         i += SCIF_NR_ADDR_IN_PAGE, j++) {
435                         if (lup->lookup && lup->lookup[j])
436                                 scif_unmap_single(lup->lookup[j],
437                                                   remote_dev,
438                                                   PAGE_SIZE);
439                         if (npup->lookup && npup->lookup[j])
440                                 scif_unmap_single(npup->lookup[j],
441                                                   remote_dev,
442                                                   PAGE_SIZE);
443                 }
444                 if (lup->lookup)
445                         scif_free_coherent(lup->lookup, lup->offset,
446                                            remote_dev, window->nr_lookup *
447                                            sizeof(*lup->lookup));
448                 if (npup->lookup)
449                         scif_free_coherent(npup->lookup, npup->offset,
450                                            remote_dev, window->nr_lookup *
451                                            sizeof(*npup->lookup));
452                 if (window->mapped_offset)
453                         scif_unmap_single(window->mapped_offset,
454                                           remote_dev, sizeof(*window));
455                 window->nr_lookup = 0;
456         }
457 }
458
459 /**
460  * scif_create_remote_window:
461  * @scifdev:  SCIF device
462  * @nr_pages: number of pages in window
463  *
464  * Allocate and prepare a remote registration window.
465  */
466 static struct scif_window *
467 scif_create_remote_window(struct scif_dev *scifdev, int nr_pages)
468 {
469         struct scif_window *window;
470
471         might_sleep();
472         window = scif_zalloc(sizeof(*window));
473         if (!window)
474                 goto error_ret;
475
476         window->magic = SCIFEP_MAGIC;
477         window->nr_pages = nr_pages;
478
479         window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr));
480         if (!window->dma_addr)
481                 goto error_window;
482
483         window->num_pages = scif_zalloc(nr_pages *
484                                         sizeof(*window->num_pages));
485         if (!window->num_pages)
486                 goto error_window;
487
488         if (scif_create_remote_lookup(scifdev, window))
489                 goto error_window;
490
491         window->type = SCIF_WINDOW_PEER;
492         window->unreg_state = OP_IDLE;
493         INIT_LIST_HEAD(&window->list);
494         return window;
495 error_window:
496         scif_destroy_remote_window(window);
497 error_ret:
498         return NULL;
499 }
500
501 /**
502  * scif_destroy_remote_window:
503  * @window: remote registration window
504  *
505  * Deallocate resources for remote window.
506  */
507 void
508 scif_destroy_remote_window(struct scif_window *window)
509 {
510         scif_free(window->dma_addr, window->nr_pages *
511                   sizeof(*window->dma_addr));
512         scif_free(window->num_pages, window->nr_pages *
513                   sizeof(*window->num_pages));
514         window->magic = 0;
515         scif_free(window, sizeof(*window));
516 }
517
518 /**
519  * scif_iommu_map: create DMA mappings if the IOMMU is enabled
520  * @remote_dev: SCIF remote device
521  * @window: remote registration window
522  *
523  * Map the physical pages using dma_map_sg(..) and then detect the number
524  * of contiguous DMA mappings allocated
525  */
526 static int scif_iommu_map(struct scif_dev *remote_dev,
527                           struct scif_window *window)
528 {
529         struct scatterlist *sg;
530         int i, err;
531         scif_pinned_pages_t pin = window->pinned_pages;
532
533         window->st = kzalloc(sizeof(*window->st), GFP_KERNEL);
534         if (!window->st)
535                 return -ENOMEM;
536
537         err = sg_alloc_table(window->st, window->nr_pages, GFP_KERNEL);
538         if (err)
539                 return err;
540
541         for_each_sg(window->st->sgl, sg, window->st->nents, i)
542                 sg_set_page(sg, pin->pages[i], PAGE_SIZE, 0x0);
543
544         err = dma_map_sg(&remote_dev->sdev->dev, window->st->sgl,
545                          window->st->nents, DMA_BIDIRECTIONAL);
546         if (!err)
547                 return -ENOMEM;
548         /* Detect contiguous ranges of DMA mappings */
549         sg = window->st->sgl;
550         for (i = 0; sg; i++) {
551                 dma_addr_t last_da;
552
553                 window->dma_addr[i] = sg_dma_address(sg);
554                 window->num_pages[i] = sg_dma_len(sg) >> PAGE_SHIFT;
555                 last_da = sg_dma_address(sg) + sg_dma_len(sg);
556                 while ((sg = sg_next(sg)) && sg_dma_address(sg) == last_da) {
557                         window->num_pages[i] +=
558                                 (sg_dma_len(sg) >> PAGE_SHIFT);
559                         last_da = window->dma_addr[i] +
560                                 sg_dma_len(sg);
561                 }
562                 window->nr_contig_chunks++;
563         }
564         return 0;
565 }
566
567 /**
568  * scif_map_window:
569  * @remote_dev: SCIF remote device
570  * @window: self registration window
571  *
572  * Map pages of a window into the aperture/PCI.
573  * Also determine addresses required for DMA.
574  */
575 int
576 scif_map_window(struct scif_dev *remote_dev, struct scif_window *window)
577 {
578         int i, j, k, err = 0, nr_contig_pages;
579         scif_pinned_pages_t pin;
580         phys_addr_t phys_prev, phys_curr;
581
582         might_sleep();
583
584         pin = window->pinned_pages;
585
586         if (intel_iommu_enabled && !scifdev_self(remote_dev))
587                 return scif_iommu_map(remote_dev, window);
588
589         for (i = 0, j = 0; i < window->nr_pages; i += nr_contig_pages, j++) {
590                 phys_prev = page_to_phys(pin->pages[i]);
591                 nr_contig_pages = 1;
592
593                 /* Detect physically contiguous chunks */
594                 for (k = i + 1; k < window->nr_pages; k++) {
595                         phys_curr = page_to_phys(pin->pages[k]);
596                         if (phys_curr != (phys_prev + PAGE_SIZE))
597                                 break;
598                         phys_prev = phys_curr;
599                         nr_contig_pages++;
600                 }
601                 window->num_pages[j] = nr_contig_pages;
602                 window->nr_contig_chunks++;
603                 if (scif_is_mgmt_node()) {
604                         /*
605                          * Management node has to deal with SMPT on X100 and
606                          * hence the DMA mapping is required
607                          */
608                         err = scif_map_single(&window->dma_addr[j],
609                                               phys_to_virt(page_to_phys(
610                                                            pin->pages[i])),
611                                               remote_dev,
612                                               nr_contig_pages << PAGE_SHIFT);
613                         if (err)
614                                 return err;
615                 } else {
616                         window->dma_addr[j] = page_to_phys(pin->pages[i]);
617                 }
618         }
619         return err;
620 }
621
622 /**
623  * scif_send_scif_unregister:
624  * @ep: end point
625  * @window: self registration window
626  *
627  * Send a SCIF_UNREGISTER message.
628  */
629 static int scif_send_scif_unregister(struct scif_endpt *ep,
630                                      struct scif_window *window)
631 {
632         struct scifmsg msg;
633
634         msg.uop = SCIF_UNREGISTER;
635         msg.src = ep->port;
636         msg.payload[0] = window->alloc_handle.vaddr;
637         msg.payload[1] = (u64)window;
638         return scif_nodeqp_send(ep->remote_dev, &msg);
639 }
640
641 /**
642  * scif_unregister_window:
643  * @window: self registration window
644  *
645  * Send an unregistration request and wait for a response.
646  */
647 int scif_unregister_window(struct scif_window *window)
648 {
649         int err = 0;
650         struct scif_endpt *ep = (struct scif_endpt *)window->ep;
651         bool send_msg = false;
652
653         might_sleep();
654         switch (window->unreg_state) {
655         case OP_IDLE:
656         {
657                 window->unreg_state = OP_IN_PROGRESS;
658                 send_msg = true;
659         }
660                 /* fall through */
661         case OP_IN_PROGRESS:
662         {
663                 scif_get_window(window, 1);
664                 mutex_unlock(&ep->rma_info.rma_lock);
665                 if (send_msg) {
666                         err = scif_send_scif_unregister(ep, window);
667                         if (err) {
668                                 window->unreg_state = OP_COMPLETED;
669                                 goto done;
670                         }
671                 } else {
672                         /* Return ENXIO since unregistration is in progress */
673                         mutex_lock(&ep->rma_info.rma_lock);
674                         return -ENXIO;
675                 }
676 retry:
677                 /* Wait for a SCIF_UNREGISTER_(N)ACK message */
678                 err = wait_event_timeout(window->unregwq,
679                                          window->unreg_state != OP_IN_PROGRESS,
680                                          SCIF_NODE_ALIVE_TIMEOUT);
681                 if (!err && scifdev_alive(ep))
682                         goto retry;
683                 if (!err) {
684                         err = -ENODEV;
685                         window->unreg_state = OP_COMPLETED;
686                         dev_err(scif_info.mdev.this_device,
687                                 "%s %d err %d\n", __func__, __LINE__, err);
688                 }
689                 if (err > 0)
690                         err = 0;
691 done:
692                 mutex_lock(&ep->rma_info.rma_lock);
693                 scif_put_window(window, 1);
694                 break;
695         }
696         case OP_FAILED:
697         {
698                 if (!scifdev_alive(ep)) {
699                         err = -ENODEV;
700                         window->unreg_state = OP_COMPLETED;
701                 }
702                 break;
703         }
704         case OP_COMPLETED:
705                 break;
706         default:
707                 err = -ENODEV;
708         }
709
710         if (window->unreg_state == OP_COMPLETED && window->ref_count)
711                 scif_put_window(window, window->nr_pages);
712
713         if (!window->ref_count) {
714                 atomic_inc(&ep->rma_info.tw_refcount);
715                 list_del_init(&window->list);
716                 scif_free_window_offset(ep, window, window->offset);
717                 mutex_unlock(&ep->rma_info.rma_lock);
718                 if ((!!(window->pinned_pages->map_flags & SCIF_MAP_KERNEL)) &&
719                     scifdev_alive(ep)) {
720                         scif_drain_dma_intr(ep->remote_dev->sdev,
721                                             ep->rma_info.dma_chan);
722                 } else {
723                         if (!__scif_dec_pinned_vm_lock(window->mm,
724                                                        window->nr_pages)) {
725                                 __scif_release_mm(window->mm);
726                                 window->mm = NULL;
727                         }
728                 }
729                 scif_queue_for_cleanup(window, &scif_info.rma);
730                 mutex_lock(&ep->rma_info.rma_lock);
731         }
732         return err;
733 }
734
735 /**
736  * scif_send_alloc_request:
737  * @ep: end point
738  * @window: self registration window
739  *
740  * Send a remote window allocation request
741  */
742 static int scif_send_alloc_request(struct scif_endpt *ep,
743                                    struct scif_window *window)
744 {
745         struct scifmsg msg;
746         struct scif_allocmsg *alloc = &window->alloc_handle;
747
748         /* Set up the Alloc Handle */
749         alloc->state = OP_IN_PROGRESS;
750         init_waitqueue_head(&alloc->allocwq);
751
752         /* Send out an allocation request */
753         msg.uop = SCIF_ALLOC_REQ;
754         msg.payload[1] = window->nr_pages;
755         msg.payload[2] = (u64)&window->alloc_handle;
756         return _scif_nodeqp_send(ep->remote_dev, &msg);
757 }
758
759 /**
760  * scif_prep_remote_window:
761  * @ep: end point
762  * @window: self registration window
763  *
764  * Send a remote window allocation request, wait for an allocation response,
765  * and prepares the remote window by copying over the page lists
766  */
767 static int scif_prep_remote_window(struct scif_endpt *ep,
768                                    struct scif_window *window)
769 {
770         struct scifmsg msg;
771         struct scif_window *remote_window;
772         struct scif_allocmsg *alloc = &window->alloc_handle;
773         dma_addr_t *dma_phys_lookup, *tmp, *num_pages_lookup, *tmp1;
774         int i = 0, j = 0;
775         int nr_contig_chunks, loop_nr_contig_chunks;
776         int remaining_nr_contig_chunks, nr_lookup;
777         int err, map_err;
778
779         map_err = scif_map_window(ep->remote_dev, window);
780         if (map_err)
781                 dev_err(&ep->remote_dev->sdev->dev,
782                         "%s %d map_err %d\n", __func__, __LINE__, map_err);
783         remaining_nr_contig_chunks = window->nr_contig_chunks;
784         nr_contig_chunks = window->nr_contig_chunks;
785 retry:
786         /* Wait for a SCIF_ALLOC_GNT/REJ message */
787         err = wait_event_timeout(alloc->allocwq,
788                                  alloc->state != OP_IN_PROGRESS,
789                                  SCIF_NODE_ALIVE_TIMEOUT);
790         mutex_lock(&ep->rma_info.rma_lock);
791         /* Synchronize with the thread waking up allocwq */
792         mutex_unlock(&ep->rma_info.rma_lock);
793         if (!err && scifdev_alive(ep))
794                 goto retry;
795
796         if (!err)
797                 err = -ENODEV;
798
799         if (err > 0)
800                 err = 0;
801         else
802                 return err;
803
804         /* Bail out. The remote end rejected this request */
805         if (alloc->state == OP_FAILED)
806                 return -ENOMEM;
807
808         if (map_err) {
809                 dev_err(&ep->remote_dev->sdev->dev,
810                         "%s %d err %d\n", __func__, __LINE__, map_err);
811                 msg.uop = SCIF_FREE_VIRT;
812                 msg.src = ep->port;
813                 msg.payload[0] = ep->remote_ep;
814                 msg.payload[1] = window->alloc_handle.vaddr;
815                 msg.payload[2] = (u64)window;
816                 msg.payload[3] = SCIF_REGISTER;
817                 spin_lock(&ep->lock);
818                 if (ep->state == SCIFEP_CONNECTED)
819                         err = _scif_nodeqp_send(ep->remote_dev, &msg);
820                 else
821                         err = -ENOTCONN;
822                 spin_unlock(&ep->lock);
823                 return err;
824         }
825
826         remote_window = scif_ioremap(alloc->phys_addr, sizeof(*window),
827                                      ep->remote_dev);
828
829         /* Compute the number of lookup entries. 21 == 2MB Shift */
830         nr_lookup = ALIGN(nr_contig_chunks, SCIF_NR_ADDR_IN_PAGE)
831                           >> ilog2(SCIF_NR_ADDR_IN_PAGE);
832
833         dma_phys_lookup =
834                 scif_ioremap(remote_window->dma_addr_lookup.offset,
835                              nr_lookup *
836                              sizeof(*remote_window->dma_addr_lookup.lookup),
837                              ep->remote_dev);
838         num_pages_lookup =
839                 scif_ioremap(remote_window->num_pages_lookup.offset,
840                              nr_lookup *
841                              sizeof(*remote_window->num_pages_lookup.lookup),
842                              ep->remote_dev);
843
844         while (remaining_nr_contig_chunks) {
845                 loop_nr_contig_chunks = min_t(int, remaining_nr_contig_chunks,
846                                               (int)SCIF_NR_ADDR_IN_PAGE);
847                 /* #1/2 - Copy  physical addresses over to the remote side */
848
849                 /* #2/2 - Copy DMA addresses (addresses that are fed into the
850                  * DMA engine) We transfer bus addresses which are then
851                  * converted into a MIC physical address on the remote
852                  * side if it is a MIC, if the remote node is a mgmt node we
853                  * transfer the MIC physical address
854                  */
855                 tmp = scif_ioremap(dma_phys_lookup[j],
856                                    loop_nr_contig_chunks *
857                                    sizeof(*window->dma_addr),
858                                    ep->remote_dev);
859                 tmp1 = scif_ioremap(num_pages_lookup[j],
860                                     loop_nr_contig_chunks *
861                                     sizeof(*window->num_pages),
862                                     ep->remote_dev);
863                 if (scif_is_mgmt_node()) {
864                         memcpy_toio((void __force __iomem *)tmp,
865                                     &window->dma_addr[i], loop_nr_contig_chunks
866                                     * sizeof(*window->dma_addr));
867                         memcpy_toio((void __force __iomem *)tmp1,
868                                     &window->num_pages[i], loop_nr_contig_chunks
869                                     * sizeof(*window->num_pages));
870                 } else {
871                         if (scifdev_is_p2p(ep->remote_dev)) {
872                                 /*
873                                  * add remote node's base address for this node
874                                  * to convert it into a MIC address
875                                  */
876                                 int m;
877                                 dma_addr_t dma_addr;
878
879                                 for (m = 0; m < loop_nr_contig_chunks; m++) {
880                                         dma_addr = window->dma_addr[i + m] +
881                                                 ep->remote_dev->base_addr;
882                                         writeq(dma_addr,
883                                                (void __force __iomem *)&tmp[m]);
884                                 }
885                                 memcpy_toio((void __force __iomem *)tmp1,
886                                             &window->num_pages[i],
887                                             loop_nr_contig_chunks
888                                             * sizeof(*window->num_pages));
889                         } else {
890                                 /* Mgmt node or loopback - transfer DMA
891                                  * addresses as is, this is the same as a
892                                  * MIC physical address (we use the dma_addr
893                                  * and not the phys_addr array since the
894                                  * phys_addr is only setup if there is a mmap()
895                                  * request from the mgmt node)
896                                  */
897                                 memcpy_toio((void __force __iomem *)tmp,
898                                             &window->dma_addr[i],
899                                             loop_nr_contig_chunks *
900                                             sizeof(*window->dma_addr));
901                                 memcpy_toio((void __force __iomem *)tmp1,
902                                             &window->num_pages[i],
903                                             loop_nr_contig_chunks *
904                                             sizeof(*window->num_pages));
905                         }
906                 }
907                 remaining_nr_contig_chunks -= loop_nr_contig_chunks;
908                 i += loop_nr_contig_chunks;
909                 j++;
910                 scif_iounmap(tmp, loop_nr_contig_chunks *
911                              sizeof(*window->dma_addr), ep->remote_dev);
912                 scif_iounmap(tmp1, loop_nr_contig_chunks *
913                              sizeof(*window->num_pages), ep->remote_dev);
914         }
915
916         /* Prepare the remote window for the peer */
917         remote_window->peer_window = (u64)window;
918         remote_window->offset = window->offset;
919         remote_window->prot = window->prot;
920         remote_window->nr_contig_chunks = nr_contig_chunks;
921         remote_window->ep = ep->remote_ep;
922         scif_iounmap(num_pages_lookup,
923                      nr_lookup *
924                      sizeof(*remote_window->num_pages_lookup.lookup),
925                      ep->remote_dev);
926         scif_iounmap(dma_phys_lookup,
927                      nr_lookup *
928                      sizeof(*remote_window->dma_addr_lookup.lookup),
929                      ep->remote_dev);
930         scif_iounmap(remote_window, sizeof(*remote_window), ep->remote_dev);
931         window->peer_window = alloc->vaddr;
932         return err;
933 }
934
935 /**
936  * scif_send_scif_register:
937  * @ep: end point
938  * @window: self registration window
939  *
940  * Send a SCIF_REGISTER message if EP is connected and wait for a
941  * SCIF_REGISTER_(N)ACK message else send a SCIF_FREE_VIRT
942  * message so that the peer can free its remote window allocated earlier.
943  */
944 static int scif_send_scif_register(struct scif_endpt *ep,
945                                    struct scif_window *window)
946 {
947         int err = 0;
948         struct scifmsg msg;
949
950         msg.src = ep->port;
951         msg.payload[0] = ep->remote_ep;
952         msg.payload[1] = window->alloc_handle.vaddr;
953         msg.payload[2] = (u64)window;
954         spin_lock(&ep->lock);
955         if (ep->state == SCIFEP_CONNECTED) {
956                 msg.uop = SCIF_REGISTER;
957                 window->reg_state = OP_IN_PROGRESS;
958                 err = _scif_nodeqp_send(ep->remote_dev, &msg);
959                 spin_unlock(&ep->lock);
960                 if (!err) {
961 retry:
962                         /* Wait for a SCIF_REGISTER_(N)ACK message */
963                         err = wait_event_timeout(window->regwq,
964                                                  window->reg_state !=
965                                                  OP_IN_PROGRESS,
966                                                  SCIF_NODE_ALIVE_TIMEOUT);
967                         if (!err && scifdev_alive(ep))
968                                 goto retry;
969                         err = !err ? -ENODEV : 0;
970                         if (window->reg_state == OP_FAILED)
971                                 err = -ENOTCONN;
972                 }
973         } else {
974                 msg.uop = SCIF_FREE_VIRT;
975                 msg.payload[3] = SCIF_REGISTER;
976                 err = _scif_nodeqp_send(ep->remote_dev, &msg);
977                 spin_unlock(&ep->lock);
978                 if (!err)
979                         err = -ENOTCONN;
980         }
981         return err;
982 }
983
984 /**
985  * scif_get_window_offset:
986  * @ep: end point descriptor
987  * @flags: flags
988  * @offset: offset hint
989  * @num_pages: number of pages
990  * @out_offset: computed offset returned by reference.
991  *
992  * Compute/Claim a new offset for this EP.
993  */
994 int scif_get_window_offset(struct scif_endpt *ep, int flags, s64 offset,
995                            int num_pages, s64 *out_offset)
996 {
997         s64 page_index;
998         struct iova *iova_ptr;
999         int err = 0;
1000
1001         if (flags & SCIF_MAP_FIXED) {
1002                 page_index = SCIF_IOVA_PFN(offset);
1003                 iova_ptr = reserve_iova(&ep->rma_info.iovad, page_index,
1004                                         page_index + num_pages - 1);
1005                 if (!iova_ptr)
1006                         err = -EADDRINUSE;
1007         } else {
1008                 iova_ptr = alloc_iova(&ep->rma_info.iovad, num_pages,
1009                                       SCIF_DMA_63BIT_PFN - 1, 0);
1010                 if (!iova_ptr)
1011                         err = -ENOMEM;
1012         }
1013         if (!err)
1014                 *out_offset = (iova_ptr->pfn_lo) << PAGE_SHIFT;
1015         return err;
1016 }
1017
1018 /**
1019  * scif_free_window_offset:
1020  * @ep: end point descriptor
1021  * @window: registration window
1022  * @offset: Offset to be freed
1023  *
1024  * Free offset for this EP. The callee is supposed to grab
1025  * the RMA mutex before calling this API.
1026  */
1027 void scif_free_window_offset(struct scif_endpt *ep,
1028                              struct scif_window *window, s64 offset)
1029 {
1030         if ((window && !window->offset_freed) || !window) {
1031                 free_iova(&ep->rma_info.iovad, offset >> PAGE_SHIFT);
1032                 if (window)
1033                         window->offset_freed = true;
1034         }
1035 }
1036
1037 /**
1038  * scif_alloc_req: Respond to SCIF_ALLOC_REQ interrupt message
1039  * @scifdev:    SCIF device
1040  * @msg:        Interrupt message
1041  *
1042  * Remote side is requesting a memory allocation.
1043  */
1044 void scif_alloc_req(struct scif_dev *scifdev, struct scifmsg *msg)
1045 {
1046         int err;
1047         struct scif_window *window = NULL;
1048         int nr_pages = msg->payload[1];
1049
1050         window = scif_create_remote_window(scifdev, nr_pages);
1051         if (!window) {
1052                 err = -ENOMEM;
1053                 goto error;
1054         }
1055
1056         /* The peer's allocation request is granted */
1057         msg->uop = SCIF_ALLOC_GNT;
1058         msg->payload[0] = (u64)window;
1059         msg->payload[1] = window->mapped_offset;
1060         err = scif_nodeqp_send(scifdev, msg);
1061         if (err)
1062                 scif_destroy_remote_window(window);
1063         return;
1064 error:
1065         /* The peer's allocation request is rejected */
1066         dev_err(&scifdev->sdev->dev,
1067                 "%s %d error %d alloc_ptr %p nr_pages 0x%x\n",
1068                 __func__, __LINE__, err, window, nr_pages);
1069         msg->uop = SCIF_ALLOC_REJ;
1070         scif_nodeqp_send(scifdev, msg);
1071 }
1072
1073 /**
1074  * scif_alloc_gnt_rej: Respond to SCIF_ALLOC_GNT/REJ interrupt message
1075  * @scifdev:    SCIF device
1076  * @msg:        Interrupt message
1077  *
1078  * Remote side responded to a memory allocation.
1079  */
1080 void scif_alloc_gnt_rej(struct scif_dev *scifdev, struct scifmsg *msg)
1081 {
1082         struct scif_allocmsg *handle = (struct scif_allocmsg *)msg->payload[2];
1083         struct scif_window *window = container_of(handle, struct scif_window,
1084                                                   alloc_handle);
1085         struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1086
1087         mutex_lock(&ep->rma_info.rma_lock);
1088         handle->vaddr = msg->payload[0];
1089         handle->phys_addr = msg->payload[1];
1090         if (msg->uop == SCIF_ALLOC_GNT)
1091                 handle->state = OP_COMPLETED;
1092         else
1093                 handle->state = OP_FAILED;
1094         wake_up(&handle->allocwq);
1095         mutex_unlock(&ep->rma_info.rma_lock);
1096 }
1097
1098 /**
1099  * scif_free_virt: Respond to SCIF_FREE_VIRT interrupt message
1100  * @scifdev:    SCIF device
1101  * @msg:        Interrupt message
1102  *
1103  * Free up memory kmalloc'd earlier.
1104  */
1105 void scif_free_virt(struct scif_dev *scifdev, struct scifmsg *msg)
1106 {
1107         struct scif_window *window = (struct scif_window *)msg->payload[1];
1108
1109         scif_destroy_remote_window(window);
1110 }
1111
1112 static void
1113 scif_fixup_aper_base(struct scif_dev *dev, struct scif_window *window)
1114 {
1115         int j;
1116         struct scif_hw_dev *sdev = dev->sdev;
1117         phys_addr_t apt_base = 0;
1118
1119         /*
1120          * Add the aperture base if the DMA address is not card relative
1121          * since the DMA addresses need to be an offset into the bar
1122          */
1123         if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER &&
1124             sdev->aper && !sdev->card_rel_da)
1125                 apt_base = sdev->aper->pa;
1126         else
1127                 return;
1128
1129         for (j = 0; j < window->nr_contig_chunks; j++) {
1130                 if (window->num_pages[j])
1131                         window->dma_addr[j] += apt_base;
1132                 else
1133                         break;
1134         }
1135 }
1136
1137 /**
1138  * scif_recv_reg: Respond to SCIF_REGISTER interrupt message
1139  * @scifdev:    SCIF device
1140  * @msg:        Interrupt message
1141  *
1142  * Update remote window list with a new registered window.
1143  */
1144 void scif_recv_reg(struct scif_dev *scifdev, struct scifmsg *msg)
1145 {
1146         struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
1147         struct scif_window *window =
1148                 (struct scif_window *)msg->payload[1];
1149
1150         mutex_lock(&ep->rma_info.rma_lock);
1151         spin_lock(&ep->lock);
1152         if (ep->state == SCIFEP_CONNECTED) {
1153                 msg->uop = SCIF_REGISTER_ACK;
1154                 scif_nodeqp_send(ep->remote_dev, msg);
1155                 scif_fixup_aper_base(ep->remote_dev, window);
1156                 /* No further failures expected. Insert new window */
1157                 scif_insert_window(window, &ep->rma_info.remote_reg_list);
1158         } else {
1159                 msg->uop = SCIF_REGISTER_NACK;
1160                 scif_nodeqp_send(ep->remote_dev, msg);
1161         }
1162         spin_unlock(&ep->lock);
1163         mutex_unlock(&ep->rma_info.rma_lock);
1164         /* free up any lookup resources now that page lists are transferred */
1165         scif_destroy_remote_lookup(ep->remote_dev, window);
1166         /*
1167          * We could not insert the window but we need to
1168          * destroy the window.
1169          */
1170         if (msg->uop == SCIF_REGISTER_NACK)
1171                 scif_destroy_remote_window(window);
1172 }
1173
1174 /**
1175  * scif_recv_unreg: Respond to SCIF_UNREGISTER interrupt message
1176  * @scifdev:    SCIF device
1177  * @msg:        Interrupt message
1178  *
1179  * Remove window from remote registration list;
1180  */
1181 void scif_recv_unreg(struct scif_dev *scifdev, struct scifmsg *msg)
1182 {
1183         struct scif_rma_req req;
1184         struct scif_window *window = NULL;
1185         struct scif_window *recv_window =
1186                 (struct scif_window *)msg->payload[0];
1187         struct scif_endpt *ep;
1188         int del_window = 0;
1189
1190         ep = (struct scif_endpt *)recv_window->ep;
1191         req.out_window = &window;
1192         req.offset = recv_window->offset;
1193         req.prot = 0;
1194         req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT;
1195         req.type = SCIF_WINDOW_FULL;
1196         req.head = &ep->rma_info.remote_reg_list;
1197         msg->payload[0] = ep->remote_ep;
1198
1199         mutex_lock(&ep->rma_info.rma_lock);
1200         /* Does a valid window exist? */
1201         if (scif_query_window(&req)) {
1202                 dev_err(&scifdev->sdev->dev,
1203                         "%s %d -ENXIO\n", __func__, __LINE__);
1204                 msg->uop = SCIF_UNREGISTER_ACK;
1205                 goto error;
1206         }
1207         if (window) {
1208                 if (window->ref_count)
1209                         scif_put_window(window, window->nr_pages);
1210                 else
1211                         dev_err(&scifdev->sdev->dev,
1212                                 "%s %d ref count should be +ve\n",
1213                                 __func__, __LINE__);
1214                 window->unreg_state = OP_COMPLETED;
1215                 if (!window->ref_count) {
1216                         msg->uop = SCIF_UNREGISTER_ACK;
1217                         atomic_inc(&ep->rma_info.tw_refcount);
1218                         ep->rma_info.async_list_del = 1;
1219                         list_del_init(&window->list);
1220                         del_window = 1;
1221                 } else {
1222                         /* NACK! There are valid references to this window */
1223                         msg->uop = SCIF_UNREGISTER_NACK;
1224                 }
1225         } else {
1226                 /* The window did not make its way to the list at all. ACK */
1227                 msg->uop = SCIF_UNREGISTER_ACK;
1228                 scif_destroy_remote_window(recv_window);
1229         }
1230 error:
1231         mutex_unlock(&ep->rma_info.rma_lock);
1232         if (del_window)
1233                 scif_drain_dma_intr(ep->remote_dev->sdev,
1234                                     ep->rma_info.dma_chan);
1235         scif_nodeqp_send(ep->remote_dev, msg);
1236         if (del_window)
1237                 scif_queue_for_cleanup(window, &scif_info.rma);
1238 }
1239
1240 /**
1241  * scif_recv_reg_ack: Respond to SCIF_REGISTER_ACK interrupt message
1242  * @scifdev:    SCIF device
1243  * @msg:        Interrupt message
1244  *
1245  * Wake up the window waiting to complete registration.
1246  */
1247 void scif_recv_reg_ack(struct scif_dev *scifdev, struct scifmsg *msg)
1248 {
1249         struct scif_window *window =
1250                 (struct scif_window *)msg->payload[2];
1251         struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1252
1253         mutex_lock(&ep->rma_info.rma_lock);
1254         window->reg_state = OP_COMPLETED;
1255         wake_up(&window->regwq);
1256         mutex_unlock(&ep->rma_info.rma_lock);
1257 }
1258
1259 /**
1260  * scif_recv_reg_nack: Respond to SCIF_REGISTER_NACK interrupt message
1261  * @scifdev:    SCIF device
1262  * @msg:        Interrupt message
1263  *
1264  * Wake up the window waiting to inform it that registration
1265  * cannot be completed.
1266  */
1267 void scif_recv_reg_nack(struct scif_dev *scifdev, struct scifmsg *msg)
1268 {
1269         struct scif_window *window =
1270                 (struct scif_window *)msg->payload[2];
1271         struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1272
1273         mutex_lock(&ep->rma_info.rma_lock);
1274         window->reg_state = OP_FAILED;
1275         wake_up(&window->regwq);
1276         mutex_unlock(&ep->rma_info.rma_lock);
1277 }
1278
1279 /**
1280  * scif_recv_unreg_ack: Respond to SCIF_UNREGISTER_ACK interrupt message
1281  * @scifdev:    SCIF device
1282  * @msg:        Interrupt message
1283  *
1284  * Wake up the window waiting to complete unregistration.
1285  */
1286 void scif_recv_unreg_ack(struct scif_dev *scifdev, struct scifmsg *msg)
1287 {
1288         struct scif_window *window =
1289                 (struct scif_window *)msg->payload[1];
1290         struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1291
1292         mutex_lock(&ep->rma_info.rma_lock);
1293         window->unreg_state = OP_COMPLETED;
1294         wake_up(&window->unregwq);
1295         mutex_unlock(&ep->rma_info.rma_lock);
1296 }
1297
1298 /**
1299  * scif_recv_unreg_nack: Respond to SCIF_UNREGISTER_NACK interrupt message
1300  * @scifdev:    SCIF device
1301  * @msg:        Interrupt message
1302  *
1303  * Wake up the window waiting to inform it that unregistration
1304  * cannot be completed immediately.
1305  */
1306 void scif_recv_unreg_nack(struct scif_dev *scifdev, struct scifmsg *msg)
1307 {
1308         struct scif_window *window =
1309                 (struct scif_window *)msg->payload[1];
1310         struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1311
1312         mutex_lock(&ep->rma_info.rma_lock);
1313         window->unreg_state = OP_FAILED;
1314         wake_up(&window->unregwq);
1315         mutex_unlock(&ep->rma_info.rma_lock);
1316 }
1317
1318 int __scif_pin_pages(void *addr, size_t len, int *out_prot,
1319                      int map_flags, scif_pinned_pages_t *pages)
1320 {
1321         struct scif_pinned_pages *pinned_pages;
1322         int nr_pages, err = 0, i;
1323         bool vmalloc_addr = false;
1324         bool try_upgrade = false;
1325         int prot = *out_prot;
1326         int ulimit = 0;
1327         struct mm_struct *mm = NULL;
1328
1329         /* Unsupported flags */
1330         if (map_flags & ~(SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT))
1331                 return -EINVAL;
1332         ulimit = !!(map_flags & SCIF_MAP_ULIMIT);
1333
1334         /* Unsupported protection requested */
1335         if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE))
1336                 return -EINVAL;
1337
1338         /* addr/len must be page aligned. len should be non zero */
1339         if (!len ||
1340             (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) ||
1341             (ALIGN((u64)len, PAGE_SIZE) != (u64)len))
1342                 return -EINVAL;
1343
1344         might_sleep();
1345
1346         nr_pages = len >> PAGE_SHIFT;
1347
1348         /* Allocate a set of pinned pages */
1349         pinned_pages = scif_create_pinned_pages(nr_pages, prot);
1350         if (!pinned_pages)
1351                 return -ENOMEM;
1352
1353         if (map_flags & SCIF_MAP_KERNEL) {
1354                 if (is_vmalloc_addr(addr))
1355                         vmalloc_addr = true;
1356
1357                 for (i = 0; i < nr_pages; i++) {
1358                         if (vmalloc_addr)
1359                                 pinned_pages->pages[i] =
1360                                         vmalloc_to_page(addr + (i * PAGE_SIZE));
1361                         else
1362                                 pinned_pages->pages[i] =
1363                                         virt_to_page(addr + (i * PAGE_SIZE));
1364                 }
1365                 pinned_pages->nr_pages = nr_pages;
1366                 pinned_pages->map_flags = SCIF_MAP_KERNEL;
1367         } else {
1368                 /*
1369                  * SCIF supports registration caching. If a registration has
1370                  * been requested with read only permissions, then we try
1371                  * to pin the pages with RW permissions so that a subsequent
1372                  * transfer with RW permission can hit the cache instead of
1373                  * invalidating it. If the upgrade fails with RW then we
1374                  * revert back to R permission and retry
1375                  */
1376                 if (prot == SCIF_PROT_READ)
1377                         try_upgrade = true;
1378                 prot |= SCIF_PROT_WRITE;
1379 retry:
1380                 mm = current->mm;
1381                 if (ulimit) {
1382                         err = __scif_check_inc_pinned_vm(mm, nr_pages);
1383                         if (err) {
1384                                 pinned_pages->nr_pages = 0;
1385                                 goto error_unmap;
1386                         }
1387                 }
1388
1389                 pinned_pages->nr_pages = pin_user_pages_fast(
1390                                 (u64)addr,
1391                                 nr_pages,
1392                                 (prot & SCIF_PROT_WRITE) ? FOLL_WRITE : 0,
1393                                 pinned_pages->pages);
1394                 if (nr_pages != pinned_pages->nr_pages) {
1395                         if (try_upgrade) {
1396                                 if (ulimit)
1397                                         __scif_dec_pinned_vm_lock(mm, nr_pages);
1398                                 /* Roll back any pinned pages */
1399                                 unpin_user_pages(pinned_pages->pages,
1400                                                  pinned_pages->nr_pages);
1401                                 prot &= ~SCIF_PROT_WRITE;
1402                                 try_upgrade = false;
1403                                 goto retry;
1404                         }
1405                 }
1406                 pinned_pages->map_flags = 0;
1407         }
1408
1409         if (pinned_pages->nr_pages < nr_pages) {
1410                 err = -EFAULT;
1411                 pinned_pages->nr_pages = nr_pages;
1412                 goto dec_pinned;
1413         }
1414
1415         *out_prot = prot;
1416         atomic_set(&pinned_pages->ref_count, 1);
1417         *pages = pinned_pages;
1418         return err;
1419 dec_pinned:
1420         if (ulimit)
1421                 __scif_dec_pinned_vm_lock(mm, nr_pages);
1422         /* Something went wrong! Rollback */
1423 error_unmap:
1424         pinned_pages->nr_pages = nr_pages;
1425         scif_destroy_pinned_pages(pinned_pages);
1426         *pages = NULL;
1427         dev_dbg(scif_info.mdev.this_device,
1428                 "%s %d err %d len 0x%lx\n", __func__, __LINE__, err, len);
1429         return err;
1430 }
1431
1432 int scif_pin_pages(void *addr, size_t len, int prot,
1433                    int map_flags, scif_pinned_pages_t *pages)
1434 {
1435         return __scif_pin_pages(addr, len, &prot, map_flags, pages);
1436 }
1437 EXPORT_SYMBOL_GPL(scif_pin_pages);
1438
1439 int scif_unpin_pages(scif_pinned_pages_t pinned_pages)
1440 {
1441         int err = 0, ret;
1442
1443         if (!pinned_pages || SCIFEP_MAGIC != pinned_pages->magic)
1444                 return -EINVAL;
1445
1446         ret = atomic_sub_return(1, &pinned_pages->ref_count);
1447         if (ret < 0) {
1448                 dev_err(scif_info.mdev.this_device,
1449                         "%s %d scif_unpin_pages called without pinning? rc %d\n",
1450                         __func__, __LINE__, ret);
1451                 return -EINVAL;
1452         }
1453         /*
1454          * Destroy the window if the ref count for this set of pinned
1455          * pages has dropped to zero. If it is positive then there is
1456          * a valid registered window which is backed by these pages and
1457          * it will be destroyed once all such windows are unregistered.
1458          */
1459         if (!ret)
1460                 err = scif_destroy_pinned_pages(pinned_pages);
1461
1462         return err;
1463 }
1464 EXPORT_SYMBOL_GPL(scif_unpin_pages);
1465
1466 static inline void
1467 scif_insert_local_window(struct scif_window *window, struct scif_endpt *ep)
1468 {
1469         mutex_lock(&ep->rma_info.rma_lock);
1470         scif_insert_window(window, &ep->rma_info.reg_list);
1471         mutex_unlock(&ep->rma_info.rma_lock);
1472 }
1473
1474 off_t scif_register_pinned_pages(scif_epd_t epd,
1475                                  scif_pinned_pages_t pinned_pages,
1476                                  off_t offset, int map_flags)
1477 {
1478         struct scif_endpt *ep = (struct scif_endpt *)epd;
1479         s64 computed_offset;
1480         struct scif_window *window;
1481         int err;
1482         size_t len;
1483         struct device *spdev;
1484
1485         /* Unsupported flags */
1486         if (map_flags & ~SCIF_MAP_FIXED)
1487                 return -EINVAL;
1488
1489         len = pinned_pages->nr_pages << PAGE_SHIFT;
1490
1491         /*
1492          * Offset is not page aligned/negative or offset+len
1493          * wraps around with SCIF_MAP_FIXED.
1494          */
1495         if ((map_flags & SCIF_MAP_FIXED) &&
1496             ((ALIGN(offset, PAGE_SIZE) != offset) ||
1497             (offset < 0) ||
1498             (len > LONG_MAX - offset)))
1499                 return -EINVAL;
1500
1501         might_sleep();
1502
1503         err = scif_verify_epd(ep);
1504         if (err)
1505                 return err;
1506         /*
1507          * It is an error to pass pinned_pages to scif_register_pinned_pages()
1508          * after calling scif_unpin_pages().
1509          */
1510         if (!atomic_add_unless(&pinned_pages->ref_count, 1, 0))
1511                 return -EINVAL;
1512
1513         /* Compute the offset for this registration */
1514         err = scif_get_window_offset(ep, map_flags, offset,
1515                                      len, &computed_offset);
1516         if (err) {
1517                 atomic_sub(1, &pinned_pages->ref_count);
1518                 return err;
1519         }
1520
1521         /* Allocate and prepare self registration window */
1522         window = scif_create_window(ep, pinned_pages->nr_pages,
1523                                     computed_offset, false);
1524         if (!window) {
1525                 atomic_sub(1, &pinned_pages->ref_count);
1526                 scif_free_window_offset(ep, NULL, computed_offset);
1527                 return -ENOMEM;
1528         }
1529
1530         window->pinned_pages = pinned_pages;
1531         window->nr_pages = pinned_pages->nr_pages;
1532         window->prot = pinned_pages->prot;
1533
1534         spdev = scif_get_peer_dev(ep->remote_dev);
1535         if (IS_ERR(spdev)) {
1536                 err = PTR_ERR(spdev);
1537                 scif_destroy_window(ep, window);
1538                 return err;
1539         }
1540         err = scif_send_alloc_request(ep, window);
1541         if (err) {
1542                 dev_err(&ep->remote_dev->sdev->dev,
1543                         "%s %d err %d\n", __func__, __LINE__, err);
1544                 goto error_unmap;
1545         }
1546
1547         /* Prepare the remote registration window */
1548         err = scif_prep_remote_window(ep, window);
1549         if (err) {
1550                 dev_err(&ep->remote_dev->sdev->dev,
1551                         "%s %d err %d\n", __func__, __LINE__, err);
1552                 goto error_unmap;
1553         }
1554
1555         /* Tell the peer about the new window */
1556         err = scif_send_scif_register(ep, window);
1557         if (err) {
1558                 dev_err(&ep->remote_dev->sdev->dev,
1559                         "%s %d err %d\n", __func__, __LINE__, err);
1560                 goto error_unmap;
1561         }
1562
1563         scif_put_peer_dev(spdev);
1564         /* No further failures expected. Insert new window */
1565         scif_insert_local_window(window, ep);
1566         return computed_offset;
1567 error_unmap:
1568         scif_destroy_window(ep, window);
1569         scif_put_peer_dev(spdev);
1570         dev_err(&ep->remote_dev->sdev->dev,
1571                 "%s %d err %d\n", __func__, __LINE__, err);
1572         return err;
1573 }
1574 EXPORT_SYMBOL_GPL(scif_register_pinned_pages);
1575
1576 off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset,
1577                     int prot, int map_flags)
1578 {
1579         scif_pinned_pages_t pinned_pages;
1580         off_t err;
1581         struct scif_endpt *ep = (struct scif_endpt *)epd;
1582         s64 computed_offset;
1583         struct scif_window *window;
1584         struct mm_struct *mm = NULL;
1585         struct device *spdev;
1586
1587         dev_dbg(scif_info.mdev.this_device,
1588                 "SCIFAPI register: ep %p addr %p len 0x%lx offset 0x%lx prot 0x%x map_flags 0x%x\n",
1589                 epd, addr, len, offset, prot, map_flags);
1590         /* Unsupported flags */
1591         if (map_flags & ~(SCIF_MAP_FIXED | SCIF_MAP_KERNEL))
1592                 return -EINVAL;
1593
1594         /*
1595          * Offset is not page aligned/negative or offset+len
1596          * wraps around with SCIF_MAP_FIXED.
1597          */
1598         if ((map_flags & SCIF_MAP_FIXED) &&
1599             ((ALIGN(offset, PAGE_SIZE) != offset) ||
1600             (offset < 0) ||
1601             (len > LONG_MAX - offset)))
1602                 return -EINVAL;
1603
1604         /* Unsupported protection requested */
1605         if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE))
1606                 return -EINVAL;
1607
1608         /* addr/len must be page aligned. len should be non zero */
1609         if (!len || (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) ||
1610             (ALIGN(len, PAGE_SIZE) != len))
1611                 return -EINVAL;
1612
1613         might_sleep();
1614
1615         err = scif_verify_epd(ep);
1616         if (err)
1617                 return err;
1618
1619         /* Compute the offset for this registration */
1620         err = scif_get_window_offset(ep, map_flags, offset,
1621                                      len >> PAGE_SHIFT, &computed_offset);
1622         if (err)
1623                 return err;
1624
1625         spdev = scif_get_peer_dev(ep->remote_dev);
1626         if (IS_ERR(spdev)) {
1627                 err = PTR_ERR(spdev);
1628                 scif_free_window_offset(ep, NULL, computed_offset);
1629                 return err;
1630         }
1631         /* Allocate and prepare self registration window */
1632         window = scif_create_window(ep, len >> PAGE_SHIFT,
1633                                     computed_offset, false);
1634         if (!window) {
1635                 scif_free_window_offset(ep, NULL, computed_offset);
1636                 scif_put_peer_dev(spdev);
1637                 return -ENOMEM;
1638         }
1639
1640         window->nr_pages = len >> PAGE_SHIFT;
1641
1642         err = scif_send_alloc_request(ep, window);
1643         if (err) {
1644                 scif_destroy_incomplete_window(ep, window);
1645                 scif_put_peer_dev(spdev);
1646                 return err;
1647         }
1648
1649         if (!(map_flags & SCIF_MAP_KERNEL)) {
1650                 mm = __scif_acquire_mm();
1651                 map_flags |= SCIF_MAP_ULIMIT;
1652         }
1653         /* Pin down the pages */
1654         err = __scif_pin_pages(addr, len, &prot,
1655                                map_flags & (SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT),
1656                                &pinned_pages);
1657         if (err) {
1658                 scif_destroy_incomplete_window(ep, window);
1659                 __scif_release_mm(mm);
1660                 goto error;
1661         }
1662
1663         window->pinned_pages = pinned_pages;
1664         window->prot = pinned_pages->prot;
1665         window->mm = mm;
1666
1667         /* Prepare the remote registration window */
1668         err = scif_prep_remote_window(ep, window);
1669         if (err) {
1670                 dev_err(&ep->remote_dev->sdev->dev,
1671                         "%s %d err %ld\n", __func__, __LINE__, err);
1672                 goto error_unmap;
1673         }
1674
1675         /* Tell the peer about the new window */
1676         err = scif_send_scif_register(ep, window);
1677         if (err) {
1678                 dev_err(&ep->remote_dev->sdev->dev,
1679                         "%s %d err %ld\n", __func__, __LINE__, err);
1680                 goto error_unmap;
1681         }
1682
1683         scif_put_peer_dev(spdev);
1684         /* No further failures expected. Insert new window */
1685         scif_insert_local_window(window, ep);
1686         dev_dbg(&ep->remote_dev->sdev->dev,
1687                 "SCIFAPI register: ep %p addr %p len 0x%lx computed_offset 0x%llx\n",
1688                 epd, addr, len, computed_offset);
1689         return computed_offset;
1690 error_unmap:
1691         scif_destroy_window(ep, window);
1692 error:
1693         scif_put_peer_dev(spdev);
1694         dev_err(&ep->remote_dev->sdev->dev,
1695                 "%s %d err %ld\n", __func__, __LINE__, err);
1696         return err;
1697 }
1698 EXPORT_SYMBOL_GPL(scif_register);
1699
1700 int
1701 scif_unregister(scif_epd_t epd, off_t offset, size_t len)
1702 {
1703         struct scif_endpt *ep = (struct scif_endpt *)epd;
1704         struct scif_window *window = NULL;
1705         struct scif_rma_req req;
1706         int nr_pages, err;
1707         struct device *spdev;
1708
1709         dev_dbg(scif_info.mdev.this_device,
1710                 "SCIFAPI unregister: ep %p offset 0x%lx len 0x%lx\n",
1711                 ep, offset, len);
1712         /* len must be page aligned. len should be non zero */
1713         if (!len ||
1714             (ALIGN((u64)len, PAGE_SIZE) != (u64)len))
1715                 return -EINVAL;
1716
1717         /* Offset is not page aligned or offset+len wraps around */
1718         if ((ALIGN(offset, PAGE_SIZE) != offset) ||
1719             (offset < 0) ||
1720             (len > LONG_MAX - offset))
1721                 return -EINVAL;
1722
1723         err = scif_verify_epd(ep);
1724         if (err)
1725                 return err;
1726
1727         might_sleep();
1728         nr_pages = len >> PAGE_SHIFT;
1729
1730         req.out_window = &window;
1731         req.offset = offset;
1732         req.prot = 0;
1733         req.nr_bytes = len;
1734         req.type = SCIF_WINDOW_FULL;
1735         req.head = &ep->rma_info.reg_list;
1736
1737         spdev = scif_get_peer_dev(ep->remote_dev);
1738         if (IS_ERR(spdev)) {
1739                 err = PTR_ERR(spdev);
1740                 return err;
1741         }
1742         mutex_lock(&ep->rma_info.rma_lock);
1743         /* Does a valid window exist? */
1744         err = scif_query_window(&req);
1745         if (err) {
1746                 dev_err(&ep->remote_dev->sdev->dev,
1747                         "%s %d err %d\n", __func__, __LINE__, err);
1748                 goto error;
1749         }
1750         /* Unregister all the windows in this range */
1751         err = scif_rma_list_unregister(window, offset, nr_pages);
1752         if (err)
1753                 dev_err(&ep->remote_dev->sdev->dev,
1754                         "%s %d err %d\n", __func__, __LINE__, err);
1755 error:
1756         mutex_unlock(&ep->rma_info.rma_lock);
1757         scif_put_peer_dev(spdev);
1758         return err;
1759 }
1760 EXPORT_SYMBOL_GPL(scif_unregister);