iommu/iova: Free global iova rcache on iova alloc failure
[linux-2.6-microblaze.git] / drivers / iommu / iova.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright © 2006-2009, Intel Corporation.
4  *
5  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
6  */
7
8 #include <linux/iova.h>
9 #include <linux/module.h>
10 #include <linux/slab.h>
11 #include <linux/smp.h>
12 #include <linux/bitops.h>
13 #include <linux/cpu.h>
14
15 /* The anchor node sits above the top of the usable address space */
16 #define IOVA_ANCHOR     ~0UL
17
18 static bool iova_rcache_insert(struct iova_domain *iovad,
19                                unsigned long pfn,
20                                unsigned long size);
21 static unsigned long iova_rcache_get(struct iova_domain *iovad,
22                                      unsigned long size,
23                                      unsigned long limit_pfn);
24 static void init_iova_rcaches(struct iova_domain *iovad);
25 static void free_iova_rcaches(struct iova_domain *iovad);
26 static void fq_destroy_all_entries(struct iova_domain *iovad);
27 static void fq_flush_timeout(struct timer_list *t);
28 static void free_global_cached_iovas(struct iova_domain *iovad);
29
30 void
31 init_iova_domain(struct iova_domain *iovad, unsigned long granule,
32         unsigned long start_pfn)
33 {
34         /*
35          * IOVA granularity will normally be equal to the smallest
36          * supported IOMMU page size; both *must* be capable of
37          * representing individual CPU pages exactly.
38          */
39         BUG_ON((granule > PAGE_SIZE) || !is_power_of_2(granule));
40
41         spin_lock_init(&iovad->iova_rbtree_lock);
42         iovad->rbroot = RB_ROOT;
43         iovad->cached_node = &iovad->anchor.node;
44         iovad->cached32_node = &iovad->anchor.node;
45         iovad->granule = granule;
46         iovad->start_pfn = start_pfn;
47         iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad));
48         iovad->max32_alloc_size = iovad->dma_32bit_pfn;
49         iovad->flush_cb = NULL;
50         iovad->fq = NULL;
51         iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR;
52         rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node);
53         rb_insert_color(&iovad->anchor.node, &iovad->rbroot);
54         init_iova_rcaches(iovad);
55 }
56 EXPORT_SYMBOL_GPL(init_iova_domain);
57
58 bool has_iova_flush_queue(struct iova_domain *iovad)
59 {
60         return !!iovad->fq;
61 }
62
63 static void free_iova_flush_queue(struct iova_domain *iovad)
64 {
65         if (!has_iova_flush_queue(iovad))
66                 return;
67
68         if (timer_pending(&iovad->fq_timer))
69                 del_timer(&iovad->fq_timer);
70
71         fq_destroy_all_entries(iovad);
72
73         free_percpu(iovad->fq);
74
75         iovad->fq         = NULL;
76         iovad->flush_cb   = NULL;
77         iovad->entry_dtor = NULL;
78 }
79
80 int init_iova_flush_queue(struct iova_domain *iovad,
81                           iova_flush_cb flush_cb, iova_entry_dtor entry_dtor)
82 {
83         struct iova_fq __percpu *queue;
84         int cpu;
85
86         atomic64_set(&iovad->fq_flush_start_cnt,  0);
87         atomic64_set(&iovad->fq_flush_finish_cnt, 0);
88
89         queue = alloc_percpu(struct iova_fq);
90         if (!queue)
91                 return -ENOMEM;
92
93         iovad->flush_cb   = flush_cb;
94         iovad->entry_dtor = entry_dtor;
95
96         for_each_possible_cpu(cpu) {
97                 struct iova_fq *fq;
98
99                 fq = per_cpu_ptr(queue, cpu);
100                 fq->head = 0;
101                 fq->tail = 0;
102
103                 spin_lock_init(&fq->lock);
104         }
105
106         smp_wmb();
107
108         iovad->fq = queue;
109
110         timer_setup(&iovad->fq_timer, fq_flush_timeout, 0);
111         atomic_set(&iovad->fq_timer_on, 0);
112
113         return 0;
114 }
115 EXPORT_SYMBOL_GPL(init_iova_flush_queue);
116
117 static struct rb_node *
118 __get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn)
119 {
120         if (limit_pfn <= iovad->dma_32bit_pfn)
121                 return iovad->cached32_node;
122
123         return iovad->cached_node;
124 }
125
126 static void
127 __cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new)
128 {
129         if (new->pfn_hi < iovad->dma_32bit_pfn)
130                 iovad->cached32_node = &new->node;
131         else
132                 iovad->cached_node = &new->node;
133 }
134
135 static void
136 __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
137 {
138         struct iova *cached_iova;
139
140         cached_iova = rb_entry(iovad->cached32_node, struct iova, node);
141         if (free == cached_iova ||
142             (free->pfn_hi < iovad->dma_32bit_pfn &&
143              free->pfn_lo >= cached_iova->pfn_lo)) {
144                 iovad->cached32_node = rb_next(&free->node);
145                 iovad->max32_alloc_size = iovad->dma_32bit_pfn;
146         }
147
148         cached_iova = rb_entry(iovad->cached_node, struct iova, node);
149         if (free->pfn_lo >= cached_iova->pfn_lo)
150                 iovad->cached_node = rb_next(&free->node);
151 }
152
153 /* Insert the iova into domain rbtree by holding writer lock */
154 static void
155 iova_insert_rbtree(struct rb_root *root, struct iova *iova,
156                    struct rb_node *start)
157 {
158         struct rb_node **new, *parent = NULL;
159
160         new = (start) ? &start : &(root->rb_node);
161         /* Figure out where to put new node */
162         while (*new) {
163                 struct iova *this = rb_entry(*new, struct iova, node);
164
165                 parent = *new;
166
167                 if (iova->pfn_lo < this->pfn_lo)
168                         new = &((*new)->rb_left);
169                 else if (iova->pfn_lo > this->pfn_lo)
170                         new = &((*new)->rb_right);
171                 else {
172                         WARN_ON(1); /* this should not happen */
173                         return;
174                 }
175         }
176         /* Add new node and rebalance tree. */
177         rb_link_node(&iova->node, parent, new);
178         rb_insert_color(&iova->node, root);
179 }
180
181 static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
182                 unsigned long size, unsigned long limit_pfn,
183                         struct iova *new, bool size_aligned)
184 {
185         struct rb_node *curr, *prev;
186         struct iova *curr_iova;
187         unsigned long flags;
188         unsigned long new_pfn, retry_pfn;
189         unsigned long align_mask = ~0UL;
190         unsigned long high_pfn = limit_pfn, low_pfn = iovad->start_pfn;
191
192         if (size_aligned)
193                 align_mask <<= fls_long(size - 1);
194
195         /* Walk the tree backwards */
196         spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
197         if (limit_pfn <= iovad->dma_32bit_pfn &&
198                         size >= iovad->max32_alloc_size)
199                 goto iova32_full;
200
201         curr = __get_cached_rbnode(iovad, limit_pfn);
202         curr_iova = rb_entry(curr, struct iova, node);
203         retry_pfn = curr_iova->pfn_hi + 1;
204
205 retry:
206         do {
207                 high_pfn = min(high_pfn, curr_iova->pfn_lo);
208                 new_pfn = (high_pfn - size) & align_mask;
209                 prev = curr;
210                 curr = rb_prev(curr);
211                 curr_iova = rb_entry(curr, struct iova, node);
212         } while (curr && new_pfn <= curr_iova->pfn_hi && new_pfn >= low_pfn);
213
214         if (high_pfn < size || new_pfn < low_pfn) {
215                 if (low_pfn == iovad->start_pfn && retry_pfn < limit_pfn) {
216                         high_pfn = limit_pfn;
217                         low_pfn = retry_pfn;
218                         curr = &iovad->anchor.node;
219                         curr_iova = rb_entry(curr, struct iova, node);
220                         goto retry;
221                 }
222                 iovad->max32_alloc_size = size;
223                 goto iova32_full;
224         }
225
226         /* pfn_lo will point to size aligned address if size_aligned is set */
227         new->pfn_lo = new_pfn;
228         new->pfn_hi = new->pfn_lo + size - 1;
229
230         /* If we have 'prev', it's a valid place to start the insertion. */
231         iova_insert_rbtree(&iovad->rbroot, new, prev);
232         __cached_rbnode_insert_update(iovad, new);
233
234         spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
235         return 0;
236
237 iova32_full:
238         spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
239         return -ENOMEM;
240 }
241
242 static struct kmem_cache *iova_cache;
243 static unsigned int iova_cache_users;
244 static DEFINE_MUTEX(iova_cache_mutex);
245
246 struct iova *alloc_iova_mem(void)
247 {
248         return kmem_cache_zalloc(iova_cache, GFP_ATOMIC | __GFP_NOWARN);
249 }
250 EXPORT_SYMBOL(alloc_iova_mem);
251
252 void free_iova_mem(struct iova *iova)
253 {
254         if (iova->pfn_lo != IOVA_ANCHOR)
255                 kmem_cache_free(iova_cache, iova);
256 }
257 EXPORT_SYMBOL(free_iova_mem);
258
259 int iova_cache_get(void)
260 {
261         mutex_lock(&iova_cache_mutex);
262         if (!iova_cache_users) {
263                 iova_cache = kmem_cache_create(
264                         "iommu_iova", sizeof(struct iova), 0,
265                         SLAB_HWCACHE_ALIGN, NULL);
266                 if (!iova_cache) {
267                         mutex_unlock(&iova_cache_mutex);
268                         pr_err("Couldn't create iova cache\n");
269                         return -ENOMEM;
270                 }
271         }
272
273         iova_cache_users++;
274         mutex_unlock(&iova_cache_mutex);
275
276         return 0;
277 }
278 EXPORT_SYMBOL_GPL(iova_cache_get);
279
280 void iova_cache_put(void)
281 {
282         mutex_lock(&iova_cache_mutex);
283         if (WARN_ON(!iova_cache_users)) {
284                 mutex_unlock(&iova_cache_mutex);
285                 return;
286         }
287         iova_cache_users--;
288         if (!iova_cache_users)
289                 kmem_cache_destroy(iova_cache);
290         mutex_unlock(&iova_cache_mutex);
291 }
292 EXPORT_SYMBOL_GPL(iova_cache_put);
293
294 /**
295  * alloc_iova - allocates an iova
296  * @iovad: - iova domain in question
297  * @size: - size of page frames to allocate
298  * @limit_pfn: - max limit address
299  * @size_aligned: - set if size_aligned address range is required
300  * This function allocates an iova in the range iovad->start_pfn to limit_pfn,
301  * searching top-down from limit_pfn to iovad->start_pfn. If the size_aligned
302  * flag is set then the allocated address iova->pfn_lo will be naturally
303  * aligned on roundup_power_of_two(size).
304  */
305 struct iova *
306 alloc_iova(struct iova_domain *iovad, unsigned long size,
307         unsigned long limit_pfn,
308         bool size_aligned)
309 {
310         struct iova *new_iova;
311         int ret;
312
313         new_iova = alloc_iova_mem();
314         if (!new_iova)
315                 return NULL;
316
317         ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn + 1,
318                         new_iova, size_aligned);
319
320         if (ret) {
321                 free_iova_mem(new_iova);
322                 return NULL;
323         }
324
325         return new_iova;
326 }
327 EXPORT_SYMBOL_GPL(alloc_iova);
328
329 static struct iova *
330 private_find_iova(struct iova_domain *iovad, unsigned long pfn)
331 {
332         struct rb_node *node = iovad->rbroot.rb_node;
333
334         assert_spin_locked(&iovad->iova_rbtree_lock);
335
336         while (node) {
337                 struct iova *iova = rb_entry(node, struct iova, node);
338
339                 if (pfn < iova->pfn_lo)
340                         node = node->rb_left;
341                 else if (pfn > iova->pfn_hi)
342                         node = node->rb_right;
343                 else
344                         return iova;    /* pfn falls within iova's range */
345         }
346
347         return NULL;
348 }
349
350 static void private_free_iova(struct iova_domain *iovad, struct iova *iova)
351 {
352         assert_spin_locked(&iovad->iova_rbtree_lock);
353         __cached_rbnode_delete_update(iovad, iova);
354         rb_erase(&iova->node, &iovad->rbroot);
355         free_iova_mem(iova);
356 }
357
358 /**
359  * find_iova - finds an iova for a given pfn
360  * @iovad: - iova domain in question.
361  * @pfn: - page frame number
362  * This function finds and returns an iova belonging to the
363  * given doamin which matches the given pfn.
364  */
365 struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn)
366 {
367         unsigned long flags;
368         struct iova *iova;
369
370         /* Take the lock so that no other thread is manipulating the rbtree */
371         spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
372         iova = private_find_iova(iovad, pfn);
373         spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
374         return iova;
375 }
376 EXPORT_SYMBOL_GPL(find_iova);
377
378 /**
379  * __free_iova - frees the given iova
380  * @iovad: iova domain in question.
381  * @iova: iova in question.
382  * Frees the given iova belonging to the giving domain
383  */
384 void
385 __free_iova(struct iova_domain *iovad, struct iova *iova)
386 {
387         unsigned long flags;
388
389         spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
390         private_free_iova(iovad, iova);
391         spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
392 }
393 EXPORT_SYMBOL_GPL(__free_iova);
394
395 /**
396  * free_iova - finds and frees the iova for a given pfn
397  * @iovad: - iova domain in question.
398  * @pfn: - pfn that is allocated previously
399  * This functions finds an iova for a given pfn and then
400  * frees the iova from that domain.
401  */
402 void
403 free_iova(struct iova_domain *iovad, unsigned long pfn)
404 {
405         struct iova *iova = find_iova(iovad, pfn);
406
407         if (iova)
408                 __free_iova(iovad, iova);
409
410 }
411 EXPORT_SYMBOL_GPL(free_iova);
412
413 /**
414  * alloc_iova_fast - allocates an iova from rcache
415  * @iovad: - iova domain in question
416  * @size: - size of page frames to allocate
417  * @limit_pfn: - max limit address
418  * @flush_rcache: - set to flush rcache on regular allocation failure
419  * This function tries to satisfy an iova allocation from the rcache,
420  * and falls back to regular allocation on failure. If regular allocation
421  * fails too and the flush_rcache flag is set then the rcache will be flushed.
422 */
423 unsigned long
424 alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
425                 unsigned long limit_pfn, bool flush_rcache)
426 {
427         unsigned long iova_pfn;
428         struct iova *new_iova;
429
430         iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1);
431         if (iova_pfn)
432                 return iova_pfn;
433
434 retry:
435         new_iova = alloc_iova(iovad, size, limit_pfn, true);
436         if (!new_iova) {
437                 unsigned int cpu;
438
439                 if (!flush_rcache)
440                         return 0;
441
442                 /* Try replenishing IOVAs by flushing rcache. */
443                 flush_rcache = false;
444                 for_each_online_cpu(cpu)
445                         free_cpu_cached_iovas(cpu, iovad);
446                 free_global_cached_iovas(iovad);
447                 goto retry;
448         }
449
450         return new_iova->pfn_lo;
451 }
452 EXPORT_SYMBOL_GPL(alloc_iova_fast);
453
454 /**
455  * free_iova_fast - free iova pfn range into rcache
456  * @iovad: - iova domain in question.
457  * @pfn: - pfn that is allocated previously
458  * @size: - # of pages in range
459  * This functions frees an iova range by trying to put it into the rcache,
460  * falling back to regular iova deallocation via free_iova() if this fails.
461  */
462 void
463 free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size)
464 {
465         if (iova_rcache_insert(iovad, pfn, size))
466                 return;
467
468         free_iova(iovad, pfn);
469 }
470 EXPORT_SYMBOL_GPL(free_iova_fast);
471
472 #define fq_ring_for_each(i, fq) \
473         for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE)
474
475 static inline bool fq_full(struct iova_fq *fq)
476 {
477         assert_spin_locked(&fq->lock);
478         return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head);
479 }
480
481 static inline unsigned fq_ring_add(struct iova_fq *fq)
482 {
483         unsigned idx = fq->tail;
484
485         assert_spin_locked(&fq->lock);
486
487         fq->tail = (idx + 1) % IOVA_FQ_SIZE;
488
489         return idx;
490 }
491
492 static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq)
493 {
494         u64 counter = atomic64_read(&iovad->fq_flush_finish_cnt);
495         unsigned idx;
496
497         assert_spin_locked(&fq->lock);
498
499         fq_ring_for_each(idx, fq) {
500
501                 if (fq->entries[idx].counter >= counter)
502                         break;
503
504                 if (iovad->entry_dtor)
505                         iovad->entry_dtor(fq->entries[idx].data);
506
507                 free_iova_fast(iovad,
508                                fq->entries[idx].iova_pfn,
509                                fq->entries[idx].pages);
510
511                 fq->head = (fq->head + 1) % IOVA_FQ_SIZE;
512         }
513 }
514
515 static void iova_domain_flush(struct iova_domain *iovad)
516 {
517         atomic64_inc(&iovad->fq_flush_start_cnt);
518         iovad->flush_cb(iovad);
519         atomic64_inc(&iovad->fq_flush_finish_cnt);
520 }
521
522 static void fq_destroy_all_entries(struct iova_domain *iovad)
523 {
524         int cpu;
525
526         /*
527          * This code runs when the iova_domain is being detroyed, so don't
528          * bother to free iovas, just call the entry_dtor on all remaining
529          * entries.
530          */
531         if (!iovad->entry_dtor)
532                 return;
533
534         for_each_possible_cpu(cpu) {
535                 struct iova_fq *fq = per_cpu_ptr(iovad->fq, cpu);
536                 int idx;
537
538                 fq_ring_for_each(idx, fq)
539                         iovad->entry_dtor(fq->entries[idx].data);
540         }
541 }
542
543 static void fq_flush_timeout(struct timer_list *t)
544 {
545         struct iova_domain *iovad = from_timer(iovad, t, fq_timer);
546         int cpu;
547
548         atomic_set(&iovad->fq_timer_on, 0);
549         iova_domain_flush(iovad);
550
551         for_each_possible_cpu(cpu) {
552                 unsigned long flags;
553                 struct iova_fq *fq;
554
555                 fq = per_cpu_ptr(iovad->fq, cpu);
556                 spin_lock_irqsave(&fq->lock, flags);
557                 fq_ring_free(iovad, fq);
558                 spin_unlock_irqrestore(&fq->lock, flags);
559         }
560 }
561
562 void queue_iova(struct iova_domain *iovad,
563                 unsigned long pfn, unsigned long pages,
564                 unsigned long data)
565 {
566         struct iova_fq *fq = raw_cpu_ptr(iovad->fq);
567         unsigned long flags;
568         unsigned idx;
569
570         spin_lock_irqsave(&fq->lock, flags);
571
572         /*
573          * First remove all entries from the flush queue that have already been
574          * flushed out on another CPU. This makes the fq_full() check below less
575          * likely to be true.
576          */
577         fq_ring_free(iovad, fq);
578
579         if (fq_full(fq)) {
580                 iova_domain_flush(iovad);
581                 fq_ring_free(iovad, fq);
582         }
583
584         idx = fq_ring_add(fq);
585
586         fq->entries[idx].iova_pfn = pfn;
587         fq->entries[idx].pages    = pages;
588         fq->entries[idx].data     = data;
589         fq->entries[idx].counter  = atomic64_read(&iovad->fq_flush_start_cnt);
590
591         spin_unlock_irqrestore(&fq->lock, flags);
592
593         /* Avoid false sharing as much as possible. */
594         if (!atomic_read(&iovad->fq_timer_on) &&
595             !atomic_xchg(&iovad->fq_timer_on, 1))
596                 mod_timer(&iovad->fq_timer,
597                           jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT));
598 }
599 EXPORT_SYMBOL_GPL(queue_iova);
600
601 /**
602  * put_iova_domain - destroys the iova doamin
603  * @iovad: - iova domain in question.
604  * All the iova's in that domain are destroyed.
605  */
606 void put_iova_domain(struct iova_domain *iovad)
607 {
608         struct iova *iova, *tmp;
609
610         free_iova_flush_queue(iovad);
611         free_iova_rcaches(iovad);
612         rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node)
613                 free_iova_mem(iova);
614 }
615 EXPORT_SYMBOL_GPL(put_iova_domain);
616
617 static int
618 __is_range_overlap(struct rb_node *node,
619         unsigned long pfn_lo, unsigned long pfn_hi)
620 {
621         struct iova *iova = rb_entry(node, struct iova, node);
622
623         if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo))
624                 return 1;
625         return 0;
626 }
627
628 static inline struct iova *
629 alloc_and_init_iova(unsigned long pfn_lo, unsigned long pfn_hi)
630 {
631         struct iova *iova;
632
633         iova = alloc_iova_mem();
634         if (iova) {
635                 iova->pfn_lo = pfn_lo;
636                 iova->pfn_hi = pfn_hi;
637         }
638
639         return iova;
640 }
641
642 static struct iova *
643 __insert_new_range(struct iova_domain *iovad,
644         unsigned long pfn_lo, unsigned long pfn_hi)
645 {
646         struct iova *iova;
647
648         iova = alloc_and_init_iova(pfn_lo, pfn_hi);
649         if (iova)
650                 iova_insert_rbtree(&iovad->rbroot, iova, NULL);
651
652         return iova;
653 }
654
655 static void
656 __adjust_overlap_range(struct iova *iova,
657         unsigned long *pfn_lo, unsigned long *pfn_hi)
658 {
659         if (*pfn_lo < iova->pfn_lo)
660                 iova->pfn_lo = *pfn_lo;
661         if (*pfn_hi > iova->pfn_hi)
662                 *pfn_lo = iova->pfn_hi + 1;
663 }
664
665 /**
666  * reserve_iova - reserves an iova in the given range
667  * @iovad: - iova domain pointer
668  * @pfn_lo: - lower page frame address
669  * @pfn_hi:- higher pfn adderss
670  * This function allocates reserves the address range from pfn_lo to pfn_hi so
671  * that this address is not dished out as part of alloc_iova.
672  */
673 struct iova *
674 reserve_iova(struct iova_domain *iovad,
675         unsigned long pfn_lo, unsigned long pfn_hi)
676 {
677         struct rb_node *node;
678         unsigned long flags;
679         struct iova *iova;
680         unsigned int overlap = 0;
681
682         /* Don't allow nonsensical pfns */
683         if (WARN_ON((pfn_hi | pfn_lo) > (ULLONG_MAX >> iova_shift(iovad))))
684                 return NULL;
685
686         spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
687         for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) {
688                 if (__is_range_overlap(node, pfn_lo, pfn_hi)) {
689                         iova = rb_entry(node, struct iova, node);
690                         __adjust_overlap_range(iova, &pfn_lo, &pfn_hi);
691                         if ((pfn_lo >= iova->pfn_lo) &&
692                                 (pfn_hi <= iova->pfn_hi))
693                                 goto finish;
694                         overlap = 1;
695
696                 } else if (overlap)
697                                 break;
698         }
699
700         /* We are here either because this is the first reserver node
701          * or need to insert remaining non overlap addr range
702          */
703         iova = __insert_new_range(iovad, pfn_lo, pfn_hi);
704 finish:
705
706         spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
707         return iova;
708 }
709 EXPORT_SYMBOL_GPL(reserve_iova);
710
711 /**
712  * copy_reserved_iova - copies the reserved between domains
713  * @from: - source doamin from where to copy
714  * @to: - destination domin where to copy
715  * This function copies reserved iova's from one doamin to
716  * other.
717  */
718 void
719 copy_reserved_iova(struct iova_domain *from, struct iova_domain *to)
720 {
721         unsigned long flags;
722         struct rb_node *node;
723
724         spin_lock_irqsave(&from->iova_rbtree_lock, flags);
725         for (node = rb_first(&from->rbroot); node; node = rb_next(node)) {
726                 struct iova *iova = rb_entry(node, struct iova, node);
727                 struct iova *new_iova;
728
729                 if (iova->pfn_lo == IOVA_ANCHOR)
730                         continue;
731
732                 new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi);
733                 if (!new_iova)
734                         pr_err("Reserve iova range %lx@%lx failed\n",
735                                iova->pfn_lo, iova->pfn_lo);
736         }
737         spin_unlock_irqrestore(&from->iova_rbtree_lock, flags);
738 }
739 EXPORT_SYMBOL_GPL(copy_reserved_iova);
740
741 struct iova *
742 split_and_remove_iova(struct iova_domain *iovad, struct iova *iova,
743                       unsigned long pfn_lo, unsigned long pfn_hi)
744 {
745         unsigned long flags;
746         struct iova *prev = NULL, *next = NULL;
747
748         spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
749         if (iova->pfn_lo < pfn_lo) {
750                 prev = alloc_and_init_iova(iova->pfn_lo, pfn_lo - 1);
751                 if (prev == NULL)
752                         goto error;
753         }
754         if (iova->pfn_hi > pfn_hi) {
755                 next = alloc_and_init_iova(pfn_hi + 1, iova->pfn_hi);
756                 if (next == NULL)
757                         goto error;
758         }
759
760         __cached_rbnode_delete_update(iovad, iova);
761         rb_erase(&iova->node, &iovad->rbroot);
762
763         if (prev) {
764                 iova_insert_rbtree(&iovad->rbroot, prev, NULL);
765                 iova->pfn_lo = pfn_lo;
766         }
767         if (next) {
768                 iova_insert_rbtree(&iovad->rbroot, next, NULL);
769                 iova->pfn_hi = pfn_hi;
770         }
771         spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
772
773         return iova;
774
775 error:
776         spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
777         if (prev)
778                 free_iova_mem(prev);
779         return NULL;
780 }
781
782 /*
783  * Magazine caches for IOVA ranges.  For an introduction to magazines,
784  * see the USENIX 2001 paper "Magazines and Vmem: Extending the Slab
785  * Allocator to Many CPUs and Arbitrary Resources" by Bonwick and Adams.
786  * For simplicity, we use a static magazine size and don't implement the
787  * dynamic size tuning described in the paper.
788  */
789
790 #define IOVA_MAG_SIZE 128
791
792 struct iova_magazine {
793         unsigned long size;
794         unsigned long pfns[IOVA_MAG_SIZE];
795 };
796
797 struct iova_cpu_rcache {
798         spinlock_t lock;
799         struct iova_magazine *loaded;
800         struct iova_magazine *prev;
801 };
802
803 static struct iova_magazine *iova_magazine_alloc(gfp_t flags)
804 {
805         return kzalloc(sizeof(struct iova_magazine), flags);
806 }
807
808 static void iova_magazine_free(struct iova_magazine *mag)
809 {
810         kfree(mag);
811 }
812
813 static void
814 iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad)
815 {
816         unsigned long flags;
817         int i;
818
819         if (!mag)
820                 return;
821
822         spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
823
824         for (i = 0 ; i < mag->size; ++i) {
825                 struct iova *iova = private_find_iova(iovad, mag->pfns[i]);
826
827                 if (WARN_ON(!iova))
828                         continue;
829
830                 private_free_iova(iovad, iova);
831         }
832
833         spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
834
835         mag->size = 0;
836 }
837
838 static bool iova_magazine_full(struct iova_magazine *mag)
839 {
840         return (mag && mag->size == IOVA_MAG_SIZE);
841 }
842
843 static bool iova_magazine_empty(struct iova_magazine *mag)
844 {
845         return (!mag || mag->size == 0);
846 }
847
848 static unsigned long iova_magazine_pop(struct iova_magazine *mag,
849                                        unsigned long limit_pfn)
850 {
851         int i;
852         unsigned long pfn;
853
854         BUG_ON(iova_magazine_empty(mag));
855
856         /* Only fall back to the rbtree if we have no suitable pfns at all */
857         for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--)
858                 if (i == 0)
859                         return 0;
860
861         /* Swap it to pop it */
862         pfn = mag->pfns[i];
863         mag->pfns[i] = mag->pfns[--mag->size];
864
865         return pfn;
866 }
867
868 static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn)
869 {
870         BUG_ON(iova_magazine_full(mag));
871
872         mag->pfns[mag->size++] = pfn;
873 }
874
875 static void init_iova_rcaches(struct iova_domain *iovad)
876 {
877         struct iova_cpu_rcache *cpu_rcache;
878         struct iova_rcache *rcache;
879         unsigned int cpu;
880         int i;
881
882         for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
883                 rcache = &iovad->rcaches[i];
884                 spin_lock_init(&rcache->lock);
885                 rcache->depot_size = 0;
886                 rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache), cache_line_size());
887                 if (WARN_ON(!rcache->cpu_rcaches))
888                         continue;
889                 for_each_possible_cpu(cpu) {
890                         cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
891                         spin_lock_init(&cpu_rcache->lock);
892                         cpu_rcache->loaded = iova_magazine_alloc(GFP_KERNEL);
893                         cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL);
894                 }
895         }
896 }
897
898 /*
899  * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and
900  * return true on success.  Can fail if rcache is full and we can't free
901  * space, and free_iova() (our only caller) will then return the IOVA
902  * range to the rbtree instead.
903  */
904 static bool __iova_rcache_insert(struct iova_domain *iovad,
905                                  struct iova_rcache *rcache,
906                                  unsigned long iova_pfn)
907 {
908         struct iova_magazine *mag_to_free = NULL;
909         struct iova_cpu_rcache *cpu_rcache;
910         bool can_insert = false;
911         unsigned long flags;
912
913         cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
914         spin_lock_irqsave(&cpu_rcache->lock, flags);
915
916         if (!iova_magazine_full(cpu_rcache->loaded)) {
917                 can_insert = true;
918         } else if (!iova_magazine_full(cpu_rcache->prev)) {
919                 swap(cpu_rcache->prev, cpu_rcache->loaded);
920                 can_insert = true;
921         } else {
922                 struct iova_magazine *new_mag = iova_magazine_alloc(GFP_ATOMIC);
923
924                 if (new_mag) {
925                         spin_lock(&rcache->lock);
926                         if (rcache->depot_size < MAX_GLOBAL_MAGS) {
927                                 rcache->depot[rcache->depot_size++] =
928                                                 cpu_rcache->loaded;
929                         } else {
930                                 mag_to_free = cpu_rcache->loaded;
931                         }
932                         spin_unlock(&rcache->lock);
933
934                         cpu_rcache->loaded = new_mag;
935                         can_insert = true;
936                 }
937         }
938
939         if (can_insert)
940                 iova_magazine_push(cpu_rcache->loaded, iova_pfn);
941
942         spin_unlock_irqrestore(&cpu_rcache->lock, flags);
943
944         if (mag_to_free) {
945                 iova_magazine_free_pfns(mag_to_free, iovad);
946                 iova_magazine_free(mag_to_free);
947         }
948
949         return can_insert;
950 }
951
952 static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn,
953                                unsigned long size)
954 {
955         unsigned int log_size = order_base_2(size);
956
957         if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
958                 return false;
959
960         return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn);
961 }
962
963 /*
964  * Caller wants to allocate a new IOVA range from 'rcache'.  If we can
965  * satisfy the request, return a matching non-NULL range and remove
966  * it from the 'rcache'.
967  */
968 static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
969                                        unsigned long limit_pfn)
970 {
971         struct iova_cpu_rcache *cpu_rcache;
972         unsigned long iova_pfn = 0;
973         bool has_pfn = false;
974         unsigned long flags;
975
976         cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
977         spin_lock_irqsave(&cpu_rcache->lock, flags);
978
979         if (!iova_magazine_empty(cpu_rcache->loaded)) {
980                 has_pfn = true;
981         } else if (!iova_magazine_empty(cpu_rcache->prev)) {
982                 swap(cpu_rcache->prev, cpu_rcache->loaded);
983                 has_pfn = true;
984         } else {
985                 spin_lock(&rcache->lock);
986                 if (rcache->depot_size > 0) {
987                         iova_magazine_free(cpu_rcache->loaded);
988                         cpu_rcache->loaded = rcache->depot[--rcache->depot_size];
989                         has_pfn = true;
990                 }
991                 spin_unlock(&rcache->lock);
992         }
993
994         if (has_pfn)
995                 iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn);
996
997         spin_unlock_irqrestore(&cpu_rcache->lock, flags);
998
999         return iova_pfn;
1000 }
1001
1002 /*
1003  * Try to satisfy IOVA allocation range from rcache.  Fail if requested
1004  * size is too big or the DMA limit we are given isn't satisfied by the
1005  * top element in the magazine.
1006  */
1007 static unsigned long iova_rcache_get(struct iova_domain *iovad,
1008                                      unsigned long size,
1009                                      unsigned long limit_pfn)
1010 {
1011         unsigned int log_size = order_base_2(size);
1012
1013         if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
1014                 return 0;
1015
1016         return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size);
1017 }
1018
1019 /*
1020  * free rcache data structures.
1021  */
1022 static void free_iova_rcaches(struct iova_domain *iovad)
1023 {
1024         struct iova_rcache *rcache;
1025         struct iova_cpu_rcache *cpu_rcache;
1026         unsigned int cpu;
1027         int i, j;
1028
1029         for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
1030                 rcache = &iovad->rcaches[i];
1031                 for_each_possible_cpu(cpu) {
1032                         cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
1033                         iova_magazine_free(cpu_rcache->loaded);
1034                         iova_magazine_free(cpu_rcache->prev);
1035                 }
1036                 free_percpu(rcache->cpu_rcaches);
1037                 for (j = 0; j < rcache->depot_size; ++j)
1038                         iova_magazine_free(rcache->depot[j]);
1039         }
1040 }
1041
1042 /*
1043  * free all the IOVA ranges cached by a cpu (used when cpu is unplugged)
1044  */
1045 void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad)
1046 {
1047         struct iova_cpu_rcache *cpu_rcache;
1048         struct iova_rcache *rcache;
1049         unsigned long flags;
1050         int i;
1051
1052         for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
1053                 rcache = &iovad->rcaches[i];
1054                 cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
1055                 spin_lock_irqsave(&cpu_rcache->lock, flags);
1056                 iova_magazine_free_pfns(cpu_rcache->loaded, iovad);
1057                 iova_magazine_free_pfns(cpu_rcache->prev, iovad);
1058                 spin_unlock_irqrestore(&cpu_rcache->lock, flags);
1059         }
1060 }
1061
1062 /*
1063  * free all the IOVA ranges of global cache
1064  */
1065 static void free_global_cached_iovas(struct iova_domain *iovad)
1066 {
1067         struct iova_rcache *rcache;
1068         unsigned long flags;
1069         int i, j;
1070
1071         for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
1072                 rcache = &iovad->rcaches[i];
1073                 spin_lock_irqsave(&rcache->lock, flags);
1074                 for (j = 0; j < rcache->depot_size; ++j) {
1075                         iova_magazine_free_pfns(rcache->depot[j], iovad);
1076                         iova_magazine_free(rcache->depot[j]);
1077                 }
1078                 rcache->depot_size = 0;
1079                 spin_unlock_irqrestore(&rcache->lock, flags);
1080         }
1081 }
1082 MODULE_AUTHOR("Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>");
1083 MODULE_LICENSE("GPL");