Merge series "Extend regulator notification support" from Matti Vaittinen <matti...
[linux-2.6-microblaze.git] / drivers / iommu / iova.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright © 2006-2009, Intel Corporation.
4  *
5  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
6  */
7
8 #include <linux/iova.h>
9 #include <linux/module.h>
10 #include <linux/slab.h>
11 #include <linux/smp.h>
12 #include <linux/bitops.h>
13 #include <linux/cpu.h>
14
15 /* The anchor node sits above the top of the usable address space */
16 #define IOVA_ANCHOR     ~0UL
17
18 static bool iova_rcache_insert(struct iova_domain *iovad,
19                                unsigned long pfn,
20                                unsigned long size);
21 static unsigned long iova_rcache_get(struct iova_domain *iovad,
22                                      unsigned long size,
23                                      unsigned long limit_pfn);
24 static void init_iova_rcaches(struct iova_domain *iovad);
25 static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad);
26 static void free_iova_rcaches(struct iova_domain *iovad);
27 static void fq_destroy_all_entries(struct iova_domain *iovad);
28 static void fq_flush_timeout(struct timer_list *t);
29
30 static int iova_cpuhp_dead(unsigned int cpu, struct hlist_node *node)
31 {
32         struct iova_domain *iovad;
33
34         iovad = hlist_entry_safe(node, struct iova_domain, cpuhp_dead);
35
36         free_cpu_cached_iovas(cpu, iovad);
37         return 0;
38 }
39
40 static void free_global_cached_iovas(struct iova_domain *iovad);
41
42 static struct iova *to_iova(struct rb_node *node)
43 {
44         return rb_entry(node, struct iova, node);
45 }
46
47 void
48 init_iova_domain(struct iova_domain *iovad, unsigned long granule,
49         unsigned long start_pfn)
50 {
51         /*
52          * IOVA granularity will normally be equal to the smallest
53          * supported IOMMU page size; both *must* be capable of
54          * representing individual CPU pages exactly.
55          */
56         BUG_ON((granule > PAGE_SIZE) || !is_power_of_2(granule));
57
58         spin_lock_init(&iovad->iova_rbtree_lock);
59         iovad->rbroot = RB_ROOT;
60         iovad->cached_node = &iovad->anchor.node;
61         iovad->cached32_node = &iovad->anchor.node;
62         iovad->granule = granule;
63         iovad->start_pfn = start_pfn;
64         iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad));
65         iovad->max32_alloc_size = iovad->dma_32bit_pfn;
66         iovad->flush_cb = NULL;
67         iovad->fq = NULL;
68         iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR;
69         rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node);
70         rb_insert_color(&iovad->anchor.node, &iovad->rbroot);
71         cpuhp_state_add_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD, &iovad->cpuhp_dead);
72         init_iova_rcaches(iovad);
73 }
74 EXPORT_SYMBOL_GPL(init_iova_domain);
75
76 static bool has_iova_flush_queue(struct iova_domain *iovad)
77 {
78         return !!iovad->fq;
79 }
80
81 static void free_iova_flush_queue(struct iova_domain *iovad)
82 {
83         if (!has_iova_flush_queue(iovad))
84                 return;
85
86         if (timer_pending(&iovad->fq_timer))
87                 del_timer(&iovad->fq_timer);
88
89         fq_destroy_all_entries(iovad);
90
91         free_percpu(iovad->fq);
92
93         iovad->fq         = NULL;
94         iovad->flush_cb   = NULL;
95         iovad->entry_dtor = NULL;
96 }
97
98 int init_iova_flush_queue(struct iova_domain *iovad,
99                           iova_flush_cb flush_cb, iova_entry_dtor entry_dtor)
100 {
101         struct iova_fq __percpu *queue;
102         int cpu;
103
104         atomic64_set(&iovad->fq_flush_start_cnt,  0);
105         atomic64_set(&iovad->fq_flush_finish_cnt, 0);
106
107         queue = alloc_percpu(struct iova_fq);
108         if (!queue)
109                 return -ENOMEM;
110
111         iovad->flush_cb   = flush_cb;
112         iovad->entry_dtor = entry_dtor;
113
114         for_each_possible_cpu(cpu) {
115                 struct iova_fq *fq;
116
117                 fq = per_cpu_ptr(queue, cpu);
118                 fq->head = 0;
119                 fq->tail = 0;
120
121                 spin_lock_init(&fq->lock);
122         }
123
124         smp_wmb();
125
126         iovad->fq = queue;
127
128         timer_setup(&iovad->fq_timer, fq_flush_timeout, 0);
129         atomic_set(&iovad->fq_timer_on, 0);
130
131         return 0;
132 }
133
134 static struct rb_node *
135 __get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn)
136 {
137         if (limit_pfn <= iovad->dma_32bit_pfn)
138                 return iovad->cached32_node;
139
140         return iovad->cached_node;
141 }
142
143 static void
144 __cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new)
145 {
146         if (new->pfn_hi < iovad->dma_32bit_pfn)
147                 iovad->cached32_node = &new->node;
148         else
149                 iovad->cached_node = &new->node;
150 }
151
152 static void
153 __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
154 {
155         struct iova *cached_iova;
156
157         cached_iova = to_iova(iovad->cached32_node);
158         if (free == cached_iova ||
159             (free->pfn_hi < iovad->dma_32bit_pfn &&
160              free->pfn_lo >= cached_iova->pfn_lo)) {
161                 iovad->cached32_node = rb_next(&free->node);
162                 iovad->max32_alloc_size = iovad->dma_32bit_pfn;
163         }
164
165         cached_iova = to_iova(iovad->cached_node);
166         if (free->pfn_lo >= cached_iova->pfn_lo)
167                 iovad->cached_node = rb_next(&free->node);
168 }
169
170 static struct rb_node *iova_find_limit(struct iova_domain *iovad, unsigned long limit_pfn)
171 {
172         struct rb_node *node, *next;
173         /*
174          * Ideally what we'd like to judge here is whether limit_pfn is close
175          * enough to the highest-allocated IOVA that starting the allocation
176          * walk from the anchor node will be quicker than this initial work to
177          * find an exact starting point (especially if that ends up being the
178          * anchor node anyway). This is an incredibly crude approximation which
179          * only really helps the most likely case, but is at least trivially easy.
180          */
181         if (limit_pfn > iovad->dma_32bit_pfn)
182                 return &iovad->anchor.node;
183
184         node = iovad->rbroot.rb_node;
185         while (to_iova(node)->pfn_hi < limit_pfn)
186                 node = node->rb_right;
187
188 search_left:
189         while (node->rb_left && to_iova(node->rb_left)->pfn_lo >= limit_pfn)
190                 node = node->rb_left;
191
192         if (!node->rb_left)
193                 return node;
194
195         next = node->rb_left;
196         while (next->rb_right) {
197                 next = next->rb_right;
198                 if (to_iova(next)->pfn_lo >= limit_pfn) {
199                         node = next;
200                         goto search_left;
201                 }
202         }
203
204         return node;
205 }
206
207 /* Insert the iova into domain rbtree by holding writer lock */
208 static void
209 iova_insert_rbtree(struct rb_root *root, struct iova *iova,
210                    struct rb_node *start)
211 {
212         struct rb_node **new, *parent = NULL;
213
214         new = (start) ? &start : &(root->rb_node);
215         /* Figure out where to put new node */
216         while (*new) {
217                 struct iova *this = to_iova(*new);
218
219                 parent = *new;
220
221                 if (iova->pfn_lo < this->pfn_lo)
222                         new = &((*new)->rb_left);
223                 else if (iova->pfn_lo > this->pfn_lo)
224                         new = &((*new)->rb_right);
225                 else {
226                         WARN_ON(1); /* this should not happen */
227                         return;
228                 }
229         }
230         /* Add new node and rebalance tree. */
231         rb_link_node(&iova->node, parent, new);
232         rb_insert_color(&iova->node, root);
233 }
234
235 static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
236                 unsigned long size, unsigned long limit_pfn,
237                         struct iova *new, bool size_aligned)
238 {
239         struct rb_node *curr, *prev;
240         struct iova *curr_iova;
241         unsigned long flags;
242         unsigned long new_pfn, retry_pfn;
243         unsigned long align_mask = ~0UL;
244         unsigned long high_pfn = limit_pfn, low_pfn = iovad->start_pfn;
245
246         if (size_aligned)
247                 align_mask <<= fls_long(size - 1);
248
249         /* Walk the tree backwards */
250         spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
251         if (limit_pfn <= iovad->dma_32bit_pfn &&
252                         size >= iovad->max32_alloc_size)
253                 goto iova32_full;
254
255         curr = __get_cached_rbnode(iovad, limit_pfn);
256         curr_iova = to_iova(curr);
257         retry_pfn = curr_iova->pfn_hi + 1;
258
259 retry:
260         do {
261                 high_pfn = min(high_pfn, curr_iova->pfn_lo);
262                 new_pfn = (high_pfn - size) & align_mask;
263                 prev = curr;
264                 curr = rb_prev(curr);
265                 curr_iova = to_iova(curr);
266         } while (curr && new_pfn <= curr_iova->pfn_hi && new_pfn >= low_pfn);
267
268         if (high_pfn < size || new_pfn < low_pfn) {
269                 if (low_pfn == iovad->start_pfn && retry_pfn < limit_pfn) {
270                         high_pfn = limit_pfn;
271                         low_pfn = retry_pfn;
272                         curr = iova_find_limit(iovad, limit_pfn);
273                         curr_iova = to_iova(curr);
274                         goto retry;
275                 }
276                 iovad->max32_alloc_size = size;
277                 goto iova32_full;
278         }
279
280         /* pfn_lo will point to size aligned address if size_aligned is set */
281         new->pfn_lo = new_pfn;
282         new->pfn_hi = new->pfn_lo + size - 1;
283
284         /* If we have 'prev', it's a valid place to start the insertion. */
285         iova_insert_rbtree(&iovad->rbroot, new, prev);
286         __cached_rbnode_insert_update(iovad, new);
287
288         spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
289         return 0;
290
291 iova32_full:
292         spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
293         return -ENOMEM;
294 }
295
296 static struct kmem_cache *iova_cache;
297 static unsigned int iova_cache_users;
298 static DEFINE_MUTEX(iova_cache_mutex);
299
300 static struct iova *alloc_iova_mem(void)
301 {
302         return kmem_cache_zalloc(iova_cache, GFP_ATOMIC | __GFP_NOWARN);
303 }
304
305 static void free_iova_mem(struct iova *iova)
306 {
307         if (iova->pfn_lo != IOVA_ANCHOR)
308                 kmem_cache_free(iova_cache, iova);
309 }
310
311 int iova_cache_get(void)
312 {
313         mutex_lock(&iova_cache_mutex);
314         if (!iova_cache_users) {
315                 int ret;
316
317                 ret = cpuhp_setup_state_multi(CPUHP_IOMMU_IOVA_DEAD, "iommu/iova:dead", NULL,
318                                         iova_cpuhp_dead);
319                 if (ret) {
320                         mutex_unlock(&iova_cache_mutex);
321                         pr_err("Couldn't register cpuhp handler\n");
322                         return ret;
323                 }
324
325                 iova_cache = kmem_cache_create(
326                         "iommu_iova", sizeof(struct iova), 0,
327                         SLAB_HWCACHE_ALIGN, NULL);
328                 if (!iova_cache) {
329                         cpuhp_remove_multi_state(CPUHP_IOMMU_IOVA_DEAD);
330                         mutex_unlock(&iova_cache_mutex);
331                         pr_err("Couldn't create iova cache\n");
332                         return -ENOMEM;
333                 }
334         }
335
336         iova_cache_users++;
337         mutex_unlock(&iova_cache_mutex);
338
339         return 0;
340 }
341 EXPORT_SYMBOL_GPL(iova_cache_get);
342
343 void iova_cache_put(void)
344 {
345         mutex_lock(&iova_cache_mutex);
346         if (WARN_ON(!iova_cache_users)) {
347                 mutex_unlock(&iova_cache_mutex);
348                 return;
349         }
350         iova_cache_users--;
351         if (!iova_cache_users) {
352                 cpuhp_remove_multi_state(CPUHP_IOMMU_IOVA_DEAD);
353                 kmem_cache_destroy(iova_cache);
354         }
355         mutex_unlock(&iova_cache_mutex);
356 }
357 EXPORT_SYMBOL_GPL(iova_cache_put);
358
359 /**
360  * alloc_iova - allocates an iova
361  * @iovad: - iova domain in question
362  * @size: - size of page frames to allocate
363  * @limit_pfn: - max limit address
364  * @size_aligned: - set if size_aligned address range is required
365  * This function allocates an iova in the range iovad->start_pfn to limit_pfn,
366  * searching top-down from limit_pfn to iovad->start_pfn. If the size_aligned
367  * flag is set then the allocated address iova->pfn_lo will be naturally
368  * aligned on roundup_power_of_two(size).
369  */
370 struct iova *
371 alloc_iova(struct iova_domain *iovad, unsigned long size,
372         unsigned long limit_pfn,
373         bool size_aligned)
374 {
375         struct iova *new_iova;
376         int ret;
377
378         new_iova = alloc_iova_mem();
379         if (!new_iova)
380                 return NULL;
381
382         ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn + 1,
383                         new_iova, size_aligned);
384
385         if (ret) {
386                 free_iova_mem(new_iova);
387                 return NULL;
388         }
389
390         return new_iova;
391 }
392 EXPORT_SYMBOL_GPL(alloc_iova);
393
394 static struct iova *
395 private_find_iova(struct iova_domain *iovad, unsigned long pfn)
396 {
397         struct rb_node *node = iovad->rbroot.rb_node;
398
399         assert_spin_locked(&iovad->iova_rbtree_lock);
400
401         while (node) {
402                 struct iova *iova = to_iova(node);
403
404                 if (pfn < iova->pfn_lo)
405                         node = node->rb_left;
406                 else if (pfn > iova->pfn_hi)
407                         node = node->rb_right;
408                 else
409                         return iova;    /* pfn falls within iova's range */
410         }
411
412         return NULL;
413 }
414
415 static void private_free_iova(struct iova_domain *iovad, struct iova *iova)
416 {
417         assert_spin_locked(&iovad->iova_rbtree_lock);
418         __cached_rbnode_delete_update(iovad, iova);
419         rb_erase(&iova->node, &iovad->rbroot);
420         free_iova_mem(iova);
421 }
422
423 /**
424  * find_iova - finds an iova for a given pfn
425  * @iovad: - iova domain in question.
426  * @pfn: - page frame number
427  * This function finds and returns an iova belonging to the
428  * given domain which matches the given pfn.
429  */
430 struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn)
431 {
432         unsigned long flags;
433         struct iova *iova;
434
435         /* Take the lock so that no other thread is manipulating the rbtree */
436         spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
437         iova = private_find_iova(iovad, pfn);
438         spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
439         return iova;
440 }
441 EXPORT_SYMBOL_GPL(find_iova);
442
443 /**
444  * __free_iova - frees the given iova
445  * @iovad: iova domain in question.
446  * @iova: iova in question.
447  * Frees the given iova belonging to the giving domain
448  */
449 void
450 __free_iova(struct iova_domain *iovad, struct iova *iova)
451 {
452         unsigned long flags;
453
454         spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
455         private_free_iova(iovad, iova);
456         spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
457 }
458 EXPORT_SYMBOL_GPL(__free_iova);
459
460 /**
461  * free_iova - finds and frees the iova for a given pfn
462  * @iovad: - iova domain in question.
463  * @pfn: - pfn that is allocated previously
464  * This functions finds an iova for a given pfn and then
465  * frees the iova from that domain.
466  */
467 void
468 free_iova(struct iova_domain *iovad, unsigned long pfn)
469 {
470         unsigned long flags;
471         struct iova *iova;
472
473         spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
474         iova = private_find_iova(iovad, pfn);
475         if (iova)
476                 private_free_iova(iovad, iova);
477         spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
478
479 }
480 EXPORT_SYMBOL_GPL(free_iova);
481
482 /**
483  * alloc_iova_fast - allocates an iova from rcache
484  * @iovad: - iova domain in question
485  * @size: - size of page frames to allocate
486  * @limit_pfn: - max limit address
487  * @flush_rcache: - set to flush rcache on regular allocation failure
488  * This function tries to satisfy an iova allocation from the rcache,
489  * and falls back to regular allocation on failure. If regular allocation
490  * fails too and the flush_rcache flag is set then the rcache will be flushed.
491 */
492 unsigned long
493 alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
494                 unsigned long limit_pfn, bool flush_rcache)
495 {
496         unsigned long iova_pfn;
497         struct iova *new_iova;
498
499         iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1);
500         if (iova_pfn)
501                 return iova_pfn;
502
503 retry:
504         new_iova = alloc_iova(iovad, size, limit_pfn, true);
505         if (!new_iova) {
506                 unsigned int cpu;
507
508                 if (!flush_rcache)
509                         return 0;
510
511                 /* Try replenishing IOVAs by flushing rcache. */
512                 flush_rcache = false;
513                 for_each_online_cpu(cpu)
514                         free_cpu_cached_iovas(cpu, iovad);
515                 free_global_cached_iovas(iovad);
516                 goto retry;
517         }
518
519         return new_iova->pfn_lo;
520 }
521
522 /**
523  * free_iova_fast - free iova pfn range into rcache
524  * @iovad: - iova domain in question.
525  * @pfn: - pfn that is allocated previously
526  * @size: - # of pages in range
527  * This functions frees an iova range by trying to put it into the rcache,
528  * falling back to regular iova deallocation via free_iova() if this fails.
529  */
530 void
531 free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size)
532 {
533         if (iova_rcache_insert(iovad, pfn, size))
534                 return;
535
536         free_iova(iovad, pfn);
537 }
538
539 #define fq_ring_for_each(i, fq) \
540         for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE)
541
542 static inline bool fq_full(struct iova_fq *fq)
543 {
544         assert_spin_locked(&fq->lock);
545         return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head);
546 }
547
548 static inline unsigned fq_ring_add(struct iova_fq *fq)
549 {
550         unsigned idx = fq->tail;
551
552         assert_spin_locked(&fq->lock);
553
554         fq->tail = (idx + 1) % IOVA_FQ_SIZE;
555
556         return idx;
557 }
558
559 static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq)
560 {
561         u64 counter = atomic64_read(&iovad->fq_flush_finish_cnt);
562         unsigned idx;
563
564         assert_spin_locked(&fq->lock);
565
566         fq_ring_for_each(idx, fq) {
567
568                 if (fq->entries[idx].counter >= counter)
569                         break;
570
571                 if (iovad->entry_dtor)
572                         iovad->entry_dtor(fq->entries[idx].data);
573
574                 free_iova_fast(iovad,
575                                fq->entries[idx].iova_pfn,
576                                fq->entries[idx].pages);
577
578                 fq->head = (fq->head + 1) % IOVA_FQ_SIZE;
579         }
580 }
581
582 static void iova_domain_flush(struct iova_domain *iovad)
583 {
584         atomic64_inc(&iovad->fq_flush_start_cnt);
585         iovad->flush_cb(iovad);
586         atomic64_inc(&iovad->fq_flush_finish_cnt);
587 }
588
589 static void fq_destroy_all_entries(struct iova_domain *iovad)
590 {
591         int cpu;
592
593         /*
594          * This code runs when the iova_domain is being detroyed, so don't
595          * bother to free iovas, just call the entry_dtor on all remaining
596          * entries.
597          */
598         if (!iovad->entry_dtor)
599                 return;
600
601         for_each_possible_cpu(cpu) {
602                 struct iova_fq *fq = per_cpu_ptr(iovad->fq, cpu);
603                 int idx;
604
605                 fq_ring_for_each(idx, fq)
606                         iovad->entry_dtor(fq->entries[idx].data);
607         }
608 }
609
610 static void fq_flush_timeout(struct timer_list *t)
611 {
612         struct iova_domain *iovad = from_timer(iovad, t, fq_timer);
613         int cpu;
614
615         atomic_set(&iovad->fq_timer_on, 0);
616         iova_domain_flush(iovad);
617
618         for_each_possible_cpu(cpu) {
619                 unsigned long flags;
620                 struct iova_fq *fq;
621
622                 fq = per_cpu_ptr(iovad->fq, cpu);
623                 spin_lock_irqsave(&fq->lock, flags);
624                 fq_ring_free(iovad, fq);
625                 spin_unlock_irqrestore(&fq->lock, flags);
626         }
627 }
628
629 void queue_iova(struct iova_domain *iovad,
630                 unsigned long pfn, unsigned long pages,
631                 unsigned long data)
632 {
633         struct iova_fq *fq = raw_cpu_ptr(iovad->fq);
634         unsigned long flags;
635         unsigned idx;
636
637         spin_lock_irqsave(&fq->lock, flags);
638
639         /*
640          * First remove all entries from the flush queue that have already been
641          * flushed out on another CPU. This makes the fq_full() check below less
642          * likely to be true.
643          */
644         fq_ring_free(iovad, fq);
645
646         if (fq_full(fq)) {
647                 iova_domain_flush(iovad);
648                 fq_ring_free(iovad, fq);
649         }
650
651         idx = fq_ring_add(fq);
652
653         fq->entries[idx].iova_pfn = pfn;
654         fq->entries[idx].pages    = pages;
655         fq->entries[idx].data     = data;
656         fq->entries[idx].counter  = atomic64_read(&iovad->fq_flush_start_cnt);
657
658         spin_unlock_irqrestore(&fq->lock, flags);
659
660         /* Avoid false sharing as much as possible. */
661         if (!atomic_read(&iovad->fq_timer_on) &&
662             !atomic_xchg(&iovad->fq_timer_on, 1))
663                 mod_timer(&iovad->fq_timer,
664                           jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT));
665 }
666
667 /**
668  * put_iova_domain - destroys the iova domain
669  * @iovad: - iova domain in question.
670  * All the iova's in that domain are destroyed.
671  */
672 void put_iova_domain(struct iova_domain *iovad)
673 {
674         struct iova *iova, *tmp;
675
676         cpuhp_state_remove_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD,
677                                             &iovad->cpuhp_dead);
678
679         free_iova_flush_queue(iovad);
680         free_iova_rcaches(iovad);
681         rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node)
682                 free_iova_mem(iova);
683 }
684 EXPORT_SYMBOL_GPL(put_iova_domain);
685
686 static int
687 __is_range_overlap(struct rb_node *node,
688         unsigned long pfn_lo, unsigned long pfn_hi)
689 {
690         struct iova *iova = to_iova(node);
691
692         if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo))
693                 return 1;
694         return 0;
695 }
696
697 static inline struct iova *
698 alloc_and_init_iova(unsigned long pfn_lo, unsigned long pfn_hi)
699 {
700         struct iova *iova;
701
702         iova = alloc_iova_mem();
703         if (iova) {
704                 iova->pfn_lo = pfn_lo;
705                 iova->pfn_hi = pfn_hi;
706         }
707
708         return iova;
709 }
710
711 static struct iova *
712 __insert_new_range(struct iova_domain *iovad,
713         unsigned long pfn_lo, unsigned long pfn_hi)
714 {
715         struct iova *iova;
716
717         iova = alloc_and_init_iova(pfn_lo, pfn_hi);
718         if (iova)
719                 iova_insert_rbtree(&iovad->rbroot, iova, NULL);
720
721         return iova;
722 }
723
724 static void
725 __adjust_overlap_range(struct iova *iova,
726         unsigned long *pfn_lo, unsigned long *pfn_hi)
727 {
728         if (*pfn_lo < iova->pfn_lo)
729                 iova->pfn_lo = *pfn_lo;
730         if (*pfn_hi > iova->pfn_hi)
731                 *pfn_lo = iova->pfn_hi + 1;
732 }
733
734 /**
735  * reserve_iova - reserves an iova in the given range
736  * @iovad: - iova domain pointer
737  * @pfn_lo: - lower page frame address
738  * @pfn_hi:- higher pfn adderss
739  * This function allocates reserves the address range from pfn_lo to pfn_hi so
740  * that this address is not dished out as part of alloc_iova.
741  */
742 struct iova *
743 reserve_iova(struct iova_domain *iovad,
744         unsigned long pfn_lo, unsigned long pfn_hi)
745 {
746         struct rb_node *node;
747         unsigned long flags;
748         struct iova *iova;
749         unsigned int overlap = 0;
750
751         /* Don't allow nonsensical pfns */
752         if (WARN_ON((pfn_hi | pfn_lo) > (ULLONG_MAX >> iova_shift(iovad))))
753                 return NULL;
754
755         spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
756         for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) {
757                 if (__is_range_overlap(node, pfn_lo, pfn_hi)) {
758                         iova = to_iova(node);
759                         __adjust_overlap_range(iova, &pfn_lo, &pfn_hi);
760                         if ((pfn_lo >= iova->pfn_lo) &&
761                                 (pfn_hi <= iova->pfn_hi))
762                                 goto finish;
763                         overlap = 1;
764
765                 } else if (overlap)
766                                 break;
767         }
768
769         /* We are here either because this is the first reserver node
770          * or need to insert remaining non overlap addr range
771          */
772         iova = __insert_new_range(iovad, pfn_lo, pfn_hi);
773 finish:
774
775         spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
776         return iova;
777 }
778 EXPORT_SYMBOL_GPL(reserve_iova);
779
780 /*
781  * Magazine caches for IOVA ranges.  For an introduction to magazines,
782  * see the USENIX 2001 paper "Magazines and Vmem: Extending the Slab
783  * Allocator to Many CPUs and Arbitrary Resources" by Bonwick and Adams.
784  * For simplicity, we use a static magazine size and don't implement the
785  * dynamic size tuning described in the paper.
786  */
787
788 #define IOVA_MAG_SIZE 128
789
790 struct iova_magazine {
791         unsigned long size;
792         unsigned long pfns[IOVA_MAG_SIZE];
793 };
794
795 struct iova_cpu_rcache {
796         spinlock_t lock;
797         struct iova_magazine *loaded;
798         struct iova_magazine *prev;
799 };
800
801 static struct iova_magazine *iova_magazine_alloc(gfp_t flags)
802 {
803         return kzalloc(sizeof(struct iova_magazine), flags);
804 }
805
806 static void iova_magazine_free(struct iova_magazine *mag)
807 {
808         kfree(mag);
809 }
810
811 static void
812 iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad)
813 {
814         unsigned long flags;
815         int i;
816
817         if (!mag)
818                 return;
819
820         spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
821
822         for (i = 0 ; i < mag->size; ++i) {
823                 struct iova *iova = private_find_iova(iovad, mag->pfns[i]);
824
825                 if (WARN_ON(!iova))
826                         continue;
827
828                 private_free_iova(iovad, iova);
829         }
830
831         spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
832
833         mag->size = 0;
834 }
835
836 static bool iova_magazine_full(struct iova_magazine *mag)
837 {
838         return (mag && mag->size == IOVA_MAG_SIZE);
839 }
840
841 static bool iova_magazine_empty(struct iova_magazine *mag)
842 {
843         return (!mag || mag->size == 0);
844 }
845
846 static unsigned long iova_magazine_pop(struct iova_magazine *mag,
847                                        unsigned long limit_pfn)
848 {
849         int i;
850         unsigned long pfn;
851
852         BUG_ON(iova_magazine_empty(mag));
853
854         /* Only fall back to the rbtree if we have no suitable pfns at all */
855         for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--)
856                 if (i == 0)
857                         return 0;
858
859         /* Swap it to pop it */
860         pfn = mag->pfns[i];
861         mag->pfns[i] = mag->pfns[--mag->size];
862
863         return pfn;
864 }
865
866 static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn)
867 {
868         BUG_ON(iova_magazine_full(mag));
869
870         mag->pfns[mag->size++] = pfn;
871 }
872
873 static void init_iova_rcaches(struct iova_domain *iovad)
874 {
875         struct iova_cpu_rcache *cpu_rcache;
876         struct iova_rcache *rcache;
877         unsigned int cpu;
878         int i;
879
880         for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
881                 rcache = &iovad->rcaches[i];
882                 spin_lock_init(&rcache->lock);
883                 rcache->depot_size = 0;
884                 rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache), cache_line_size());
885                 if (WARN_ON(!rcache->cpu_rcaches))
886                         continue;
887                 for_each_possible_cpu(cpu) {
888                         cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
889                         spin_lock_init(&cpu_rcache->lock);
890                         cpu_rcache->loaded = iova_magazine_alloc(GFP_KERNEL);
891                         cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL);
892                 }
893         }
894 }
895
896 /*
897  * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and
898  * return true on success.  Can fail if rcache is full and we can't free
899  * space, and free_iova() (our only caller) will then return the IOVA
900  * range to the rbtree instead.
901  */
902 static bool __iova_rcache_insert(struct iova_domain *iovad,
903                                  struct iova_rcache *rcache,
904                                  unsigned long iova_pfn)
905 {
906         struct iova_magazine *mag_to_free = NULL;
907         struct iova_cpu_rcache *cpu_rcache;
908         bool can_insert = false;
909         unsigned long flags;
910
911         cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
912         spin_lock_irqsave(&cpu_rcache->lock, flags);
913
914         if (!iova_magazine_full(cpu_rcache->loaded)) {
915                 can_insert = true;
916         } else if (!iova_magazine_full(cpu_rcache->prev)) {
917                 swap(cpu_rcache->prev, cpu_rcache->loaded);
918                 can_insert = true;
919         } else {
920                 struct iova_magazine *new_mag = iova_magazine_alloc(GFP_ATOMIC);
921
922                 if (new_mag) {
923                         spin_lock(&rcache->lock);
924                         if (rcache->depot_size < MAX_GLOBAL_MAGS) {
925                                 rcache->depot[rcache->depot_size++] =
926                                                 cpu_rcache->loaded;
927                         } else {
928                                 mag_to_free = cpu_rcache->loaded;
929                         }
930                         spin_unlock(&rcache->lock);
931
932                         cpu_rcache->loaded = new_mag;
933                         can_insert = true;
934                 }
935         }
936
937         if (can_insert)
938                 iova_magazine_push(cpu_rcache->loaded, iova_pfn);
939
940         spin_unlock_irqrestore(&cpu_rcache->lock, flags);
941
942         if (mag_to_free) {
943                 iova_magazine_free_pfns(mag_to_free, iovad);
944                 iova_magazine_free(mag_to_free);
945         }
946
947         return can_insert;
948 }
949
950 static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn,
951                                unsigned long size)
952 {
953         unsigned int log_size = order_base_2(size);
954
955         if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
956                 return false;
957
958         return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn);
959 }
960
961 /*
962  * Caller wants to allocate a new IOVA range from 'rcache'.  If we can
963  * satisfy the request, return a matching non-NULL range and remove
964  * it from the 'rcache'.
965  */
966 static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
967                                        unsigned long limit_pfn)
968 {
969         struct iova_cpu_rcache *cpu_rcache;
970         unsigned long iova_pfn = 0;
971         bool has_pfn = false;
972         unsigned long flags;
973
974         cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
975         spin_lock_irqsave(&cpu_rcache->lock, flags);
976
977         if (!iova_magazine_empty(cpu_rcache->loaded)) {
978                 has_pfn = true;
979         } else if (!iova_magazine_empty(cpu_rcache->prev)) {
980                 swap(cpu_rcache->prev, cpu_rcache->loaded);
981                 has_pfn = true;
982         } else {
983                 spin_lock(&rcache->lock);
984                 if (rcache->depot_size > 0) {
985                         iova_magazine_free(cpu_rcache->loaded);
986                         cpu_rcache->loaded = rcache->depot[--rcache->depot_size];
987                         has_pfn = true;
988                 }
989                 spin_unlock(&rcache->lock);
990         }
991
992         if (has_pfn)
993                 iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn);
994
995         spin_unlock_irqrestore(&cpu_rcache->lock, flags);
996
997         return iova_pfn;
998 }
999
1000 /*
1001  * Try to satisfy IOVA allocation range from rcache.  Fail if requested
1002  * size is too big or the DMA limit we are given isn't satisfied by the
1003  * top element in the magazine.
1004  */
1005 static unsigned long iova_rcache_get(struct iova_domain *iovad,
1006                                      unsigned long size,
1007                                      unsigned long limit_pfn)
1008 {
1009         unsigned int log_size = order_base_2(size);
1010
1011         if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
1012                 return 0;
1013
1014         return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size);
1015 }
1016
1017 /*
1018  * free rcache data structures.
1019  */
1020 static void free_iova_rcaches(struct iova_domain *iovad)
1021 {
1022         struct iova_rcache *rcache;
1023         struct iova_cpu_rcache *cpu_rcache;
1024         unsigned int cpu;
1025         int i, j;
1026
1027         for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
1028                 rcache = &iovad->rcaches[i];
1029                 for_each_possible_cpu(cpu) {
1030                         cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
1031                         iova_magazine_free(cpu_rcache->loaded);
1032                         iova_magazine_free(cpu_rcache->prev);
1033                 }
1034                 free_percpu(rcache->cpu_rcaches);
1035                 for (j = 0; j < rcache->depot_size; ++j)
1036                         iova_magazine_free(rcache->depot[j]);
1037         }
1038 }
1039
1040 /*
1041  * free all the IOVA ranges cached by a cpu (used when cpu is unplugged)
1042  */
1043 static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad)
1044 {
1045         struct iova_cpu_rcache *cpu_rcache;
1046         struct iova_rcache *rcache;
1047         unsigned long flags;
1048         int i;
1049
1050         for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
1051                 rcache = &iovad->rcaches[i];
1052                 cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
1053                 spin_lock_irqsave(&cpu_rcache->lock, flags);
1054                 iova_magazine_free_pfns(cpu_rcache->loaded, iovad);
1055                 iova_magazine_free_pfns(cpu_rcache->prev, iovad);
1056                 spin_unlock_irqrestore(&cpu_rcache->lock, flags);
1057         }
1058 }
1059
1060 /*
1061  * free all the IOVA ranges of global cache
1062  */
1063 static void free_global_cached_iovas(struct iova_domain *iovad)
1064 {
1065         struct iova_rcache *rcache;
1066         unsigned long flags;
1067         int i, j;
1068
1069         for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
1070                 rcache = &iovad->rcaches[i];
1071                 spin_lock_irqsave(&rcache->lock, flags);
1072                 for (j = 0; j < rcache->depot_size; ++j) {
1073                         iova_magazine_free_pfns(rcache->depot[j], iovad);
1074                         iova_magazine_free(rcache->depot[j]);
1075                 }
1076                 rcache->depot_size = 0;
1077                 spin_unlock_irqrestore(&rcache->lock, flags);
1078         }
1079 }
1080 MODULE_AUTHOR("Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>");
1081 MODULE_LICENSE("GPL");