Linux 6.9-rc1
[linux-2.6-microblaze.git] / drivers / iommu / s390-iommu.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for s390 PCI devices
4  *
5  * Copyright IBM Corp. 2015
6  * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
7  */
8
9 #include <linux/pci.h>
10 #include <linux/iommu.h>
11 #include <linux/iommu-helper.h>
12 #include <linux/sizes.h>
13 #include <linux/rculist.h>
14 #include <linux/rcupdate.h>
15 #include <asm/pci_dma.h>
16
17 #include "dma-iommu.h"
18
19 static const struct iommu_ops s390_iommu_ops;
20
21 static struct kmem_cache *dma_region_table_cache;
22 static struct kmem_cache *dma_page_table_cache;
23
24 static u64 s390_iommu_aperture;
25 static u32 s390_iommu_aperture_factor = 1;
26
27 struct s390_domain {
28         struct iommu_domain     domain;
29         struct list_head        devices;
30         struct zpci_iommu_ctrs  ctrs;
31         unsigned long           *dma_table;
32         spinlock_t              list_lock;
33         struct rcu_head         rcu;
34 };
35
36 static inline unsigned int calc_rtx(dma_addr_t ptr)
37 {
38         return ((unsigned long)ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK;
39 }
40
41 static inline unsigned int calc_sx(dma_addr_t ptr)
42 {
43         return ((unsigned long)ptr >> ZPCI_ST_SHIFT) & ZPCI_INDEX_MASK;
44 }
45
46 static inline unsigned int calc_px(dma_addr_t ptr)
47 {
48         return ((unsigned long)ptr >> PAGE_SHIFT) & ZPCI_PT_MASK;
49 }
50
51 static inline void set_pt_pfaa(unsigned long *entry, phys_addr_t pfaa)
52 {
53         *entry &= ZPCI_PTE_FLAG_MASK;
54         *entry |= (pfaa & ZPCI_PTE_ADDR_MASK);
55 }
56
57 static inline void set_rt_sto(unsigned long *entry, phys_addr_t sto)
58 {
59         *entry &= ZPCI_RTE_FLAG_MASK;
60         *entry |= (sto & ZPCI_RTE_ADDR_MASK);
61         *entry |= ZPCI_TABLE_TYPE_RTX;
62 }
63
64 static inline void set_st_pto(unsigned long *entry, phys_addr_t pto)
65 {
66         *entry &= ZPCI_STE_FLAG_MASK;
67         *entry |= (pto & ZPCI_STE_ADDR_MASK);
68         *entry |= ZPCI_TABLE_TYPE_SX;
69 }
70
71 static inline void validate_rt_entry(unsigned long *entry)
72 {
73         *entry &= ~ZPCI_TABLE_VALID_MASK;
74         *entry &= ~ZPCI_TABLE_OFFSET_MASK;
75         *entry |= ZPCI_TABLE_VALID;
76         *entry |= ZPCI_TABLE_LEN_RTX;
77 }
78
79 static inline void validate_st_entry(unsigned long *entry)
80 {
81         *entry &= ~ZPCI_TABLE_VALID_MASK;
82         *entry |= ZPCI_TABLE_VALID;
83 }
84
85 static inline void invalidate_pt_entry(unsigned long *entry)
86 {
87         WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID);
88         *entry &= ~ZPCI_PTE_VALID_MASK;
89         *entry |= ZPCI_PTE_INVALID;
90 }
91
92 static inline void validate_pt_entry(unsigned long *entry)
93 {
94         WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID);
95         *entry &= ~ZPCI_PTE_VALID_MASK;
96         *entry |= ZPCI_PTE_VALID;
97 }
98
99 static inline void entry_set_protected(unsigned long *entry)
100 {
101         *entry &= ~ZPCI_TABLE_PROT_MASK;
102         *entry |= ZPCI_TABLE_PROTECTED;
103 }
104
105 static inline void entry_clr_protected(unsigned long *entry)
106 {
107         *entry &= ~ZPCI_TABLE_PROT_MASK;
108         *entry |= ZPCI_TABLE_UNPROTECTED;
109 }
110
111 static inline int reg_entry_isvalid(unsigned long entry)
112 {
113         return (entry & ZPCI_TABLE_VALID_MASK) == ZPCI_TABLE_VALID;
114 }
115
116 static inline int pt_entry_isvalid(unsigned long entry)
117 {
118         return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID;
119 }
120
121 static inline unsigned long *get_rt_sto(unsigned long entry)
122 {
123         if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX)
124                 return phys_to_virt(entry & ZPCI_RTE_ADDR_MASK);
125         else
126                 return NULL;
127 }
128
129 static inline unsigned long *get_st_pto(unsigned long entry)
130 {
131         if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX)
132                 return phys_to_virt(entry & ZPCI_STE_ADDR_MASK);
133         else
134                 return NULL;
135 }
136
137 static int __init dma_alloc_cpu_table_caches(void)
138 {
139         dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables",
140                                                    ZPCI_TABLE_SIZE,
141                                                    ZPCI_TABLE_ALIGN,
142                                                    0, NULL);
143         if (!dma_region_table_cache)
144                 return -ENOMEM;
145
146         dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables",
147                                                  ZPCI_PT_SIZE,
148                                                  ZPCI_PT_ALIGN,
149                                                  0, NULL);
150         if (!dma_page_table_cache) {
151                 kmem_cache_destroy(dma_region_table_cache);
152                 return -ENOMEM;
153         }
154         return 0;
155 }
156
157 static unsigned long *dma_alloc_cpu_table(gfp_t gfp)
158 {
159         unsigned long *table, *entry;
160
161         table = kmem_cache_alloc(dma_region_table_cache, gfp);
162         if (!table)
163                 return NULL;
164
165         for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++)
166                 *entry = ZPCI_TABLE_INVALID;
167         return table;
168 }
169
170 static void dma_free_cpu_table(void *table)
171 {
172         kmem_cache_free(dma_region_table_cache, table);
173 }
174
175 static void dma_free_page_table(void *table)
176 {
177         kmem_cache_free(dma_page_table_cache, table);
178 }
179
180 static void dma_free_seg_table(unsigned long entry)
181 {
182         unsigned long *sto = get_rt_sto(entry);
183         int sx;
184
185         for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++)
186                 if (reg_entry_isvalid(sto[sx]))
187                         dma_free_page_table(get_st_pto(sto[sx]));
188
189         dma_free_cpu_table(sto);
190 }
191
192 static void dma_cleanup_tables(unsigned long *table)
193 {
194         int rtx;
195
196         if (!table)
197                 return;
198
199         for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
200                 if (reg_entry_isvalid(table[rtx]))
201                         dma_free_seg_table(table[rtx]);
202
203         dma_free_cpu_table(table);
204 }
205
206 static unsigned long *dma_alloc_page_table(gfp_t gfp)
207 {
208         unsigned long *table, *entry;
209
210         table = kmem_cache_alloc(dma_page_table_cache, gfp);
211         if (!table)
212                 return NULL;
213
214         for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++)
215                 *entry = ZPCI_PTE_INVALID;
216         return table;
217 }
218
219 static unsigned long *dma_get_seg_table_origin(unsigned long *rtep, gfp_t gfp)
220 {
221         unsigned long old_rte, rte;
222         unsigned long *sto;
223
224         rte = READ_ONCE(*rtep);
225         if (reg_entry_isvalid(rte)) {
226                 sto = get_rt_sto(rte);
227         } else {
228                 sto = dma_alloc_cpu_table(gfp);
229                 if (!sto)
230                         return NULL;
231
232                 set_rt_sto(&rte, virt_to_phys(sto));
233                 validate_rt_entry(&rte);
234                 entry_clr_protected(&rte);
235
236                 old_rte = cmpxchg(rtep, ZPCI_TABLE_INVALID, rte);
237                 if (old_rte != ZPCI_TABLE_INVALID) {
238                         /* Somone else was faster, use theirs */
239                         dma_free_cpu_table(sto);
240                         sto = get_rt_sto(old_rte);
241                 }
242         }
243         return sto;
244 }
245
246 static unsigned long *dma_get_page_table_origin(unsigned long *step, gfp_t gfp)
247 {
248         unsigned long old_ste, ste;
249         unsigned long *pto;
250
251         ste = READ_ONCE(*step);
252         if (reg_entry_isvalid(ste)) {
253                 pto = get_st_pto(ste);
254         } else {
255                 pto = dma_alloc_page_table(gfp);
256                 if (!pto)
257                         return NULL;
258                 set_st_pto(&ste, virt_to_phys(pto));
259                 validate_st_entry(&ste);
260                 entry_clr_protected(&ste);
261
262                 old_ste = cmpxchg(step, ZPCI_TABLE_INVALID, ste);
263                 if (old_ste != ZPCI_TABLE_INVALID) {
264                         /* Somone else was faster, use theirs */
265                         dma_free_page_table(pto);
266                         pto = get_st_pto(old_ste);
267                 }
268         }
269         return pto;
270 }
271
272 static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr, gfp_t gfp)
273 {
274         unsigned long *sto, *pto;
275         unsigned int rtx, sx, px;
276
277         rtx = calc_rtx(dma_addr);
278         sto = dma_get_seg_table_origin(&rto[rtx], gfp);
279         if (!sto)
280                 return NULL;
281
282         sx = calc_sx(dma_addr);
283         pto = dma_get_page_table_origin(&sto[sx], gfp);
284         if (!pto)
285                 return NULL;
286
287         px = calc_px(dma_addr);
288         return &pto[px];
289 }
290
291 static void dma_update_cpu_trans(unsigned long *ptep, phys_addr_t page_addr, int flags)
292 {
293         unsigned long pte;
294
295         pte = READ_ONCE(*ptep);
296         if (flags & ZPCI_PTE_INVALID) {
297                 invalidate_pt_entry(&pte);
298         } else {
299                 set_pt_pfaa(&pte, page_addr);
300                 validate_pt_entry(&pte);
301         }
302
303         if (flags & ZPCI_TABLE_PROTECTED)
304                 entry_set_protected(&pte);
305         else
306                 entry_clr_protected(&pte);
307
308         xchg(ptep, pte);
309 }
310
311 static struct s390_domain *to_s390_domain(struct iommu_domain *dom)
312 {
313         return container_of(dom, struct s390_domain, domain);
314 }
315
316 static bool s390_iommu_capable(struct device *dev, enum iommu_cap cap)
317 {
318         struct zpci_dev *zdev = to_zpci_dev(dev);
319
320         switch (cap) {
321         case IOMMU_CAP_CACHE_COHERENCY:
322                 return true;
323         case IOMMU_CAP_DEFERRED_FLUSH:
324                 return zdev->pft != PCI_FUNC_TYPE_ISM;
325         default:
326                 return false;
327         }
328 }
329
330 static struct iommu_domain *s390_domain_alloc_paging(struct device *dev)
331 {
332         struct s390_domain *s390_domain;
333
334         s390_domain = kzalloc(sizeof(*s390_domain), GFP_KERNEL);
335         if (!s390_domain)
336                 return NULL;
337
338         s390_domain->dma_table = dma_alloc_cpu_table(GFP_KERNEL);
339         if (!s390_domain->dma_table) {
340                 kfree(s390_domain);
341                 return NULL;
342         }
343         s390_domain->domain.geometry.force_aperture = true;
344         s390_domain->domain.geometry.aperture_start = 0;
345         s390_domain->domain.geometry.aperture_end = ZPCI_TABLE_SIZE_RT - 1;
346
347         spin_lock_init(&s390_domain->list_lock);
348         INIT_LIST_HEAD_RCU(&s390_domain->devices);
349
350         return &s390_domain->domain;
351 }
352
353 static void s390_iommu_rcu_free_domain(struct rcu_head *head)
354 {
355         struct s390_domain *s390_domain = container_of(head, struct s390_domain, rcu);
356
357         dma_cleanup_tables(s390_domain->dma_table);
358         kfree(s390_domain);
359 }
360
361 static void s390_domain_free(struct iommu_domain *domain)
362 {
363         struct s390_domain *s390_domain = to_s390_domain(domain);
364
365         rcu_read_lock();
366         WARN_ON(!list_empty(&s390_domain->devices));
367         rcu_read_unlock();
368
369         call_rcu(&s390_domain->rcu, s390_iommu_rcu_free_domain);
370 }
371
372 static void s390_iommu_detach_device(struct iommu_domain *domain,
373                                      struct device *dev)
374 {
375         struct s390_domain *s390_domain = to_s390_domain(domain);
376         struct zpci_dev *zdev = to_zpci_dev(dev);
377         unsigned long flags;
378
379         spin_lock_irqsave(&s390_domain->list_lock, flags);
380         list_del_rcu(&zdev->iommu_list);
381         spin_unlock_irqrestore(&s390_domain->list_lock, flags);
382
383         zpci_unregister_ioat(zdev, 0);
384         zdev->s390_domain = NULL;
385         zdev->dma_table = NULL;
386 }
387
388 static int s390_iommu_attach_device(struct iommu_domain *domain,
389                                     struct device *dev)
390 {
391         struct s390_domain *s390_domain = to_s390_domain(domain);
392         struct zpci_dev *zdev = to_zpci_dev(dev);
393         unsigned long flags;
394         u8 status;
395         int cc;
396
397         if (!zdev)
398                 return -ENODEV;
399
400         if (WARN_ON(domain->geometry.aperture_start > zdev->end_dma ||
401                 domain->geometry.aperture_end < zdev->start_dma))
402                 return -EINVAL;
403
404         if (zdev->s390_domain)
405                 s390_iommu_detach_device(&zdev->s390_domain->domain, dev);
406
407         cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
408                                 virt_to_phys(s390_domain->dma_table), &status);
409         /*
410          * If the device is undergoing error recovery the reset code
411          * will re-establish the new domain.
412          */
413         if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL)
414                 return -EIO;
415
416         zdev->dma_table = s390_domain->dma_table;
417         zdev->s390_domain = s390_domain;
418
419         spin_lock_irqsave(&s390_domain->list_lock, flags);
420         list_add_rcu(&zdev->iommu_list, &s390_domain->devices);
421         spin_unlock_irqrestore(&s390_domain->list_lock, flags);
422
423         return 0;
424 }
425
426 static void s390_iommu_get_resv_regions(struct device *dev,
427                                         struct list_head *list)
428 {
429         struct zpci_dev *zdev = to_zpci_dev(dev);
430         struct iommu_resv_region *region;
431
432         if (zdev->start_dma) {
433                 region = iommu_alloc_resv_region(0, zdev->start_dma, 0,
434                                                  IOMMU_RESV_RESERVED, GFP_KERNEL);
435                 if (!region)
436                         return;
437                 list_add_tail(&region->list, list);
438         }
439
440         if (zdev->end_dma < ZPCI_TABLE_SIZE_RT - 1) {
441                 region = iommu_alloc_resv_region(zdev->end_dma + 1,
442                                                  ZPCI_TABLE_SIZE_RT - zdev->end_dma - 1,
443                                                  0, IOMMU_RESV_RESERVED, GFP_KERNEL);
444                 if (!region)
445                         return;
446                 list_add_tail(&region->list, list);
447         }
448 }
449
450 static struct iommu_device *s390_iommu_probe_device(struct device *dev)
451 {
452         struct zpci_dev *zdev;
453
454         if (!dev_is_pci(dev))
455                 return ERR_PTR(-ENODEV);
456
457         zdev = to_zpci_dev(dev);
458
459         if (zdev->start_dma > zdev->end_dma ||
460             zdev->start_dma > ZPCI_TABLE_SIZE_RT - 1)
461                 return ERR_PTR(-EINVAL);
462
463         if (zdev->end_dma > ZPCI_TABLE_SIZE_RT - 1)
464                 zdev->end_dma = ZPCI_TABLE_SIZE_RT - 1;
465
466         if (zdev->tlb_refresh)
467                 dev->iommu->shadow_on_flush = 1;
468
469         return &zdev->iommu_dev;
470 }
471
472 static void s390_iommu_release_device(struct device *dev)
473 {
474         struct zpci_dev *zdev = to_zpci_dev(dev);
475
476         /*
477          * release_device is expected to detach any domain currently attached
478          * to the device, but keep it attached to other devices in the group.
479          */
480         if (zdev)
481                 s390_iommu_detach_device(&zdev->s390_domain->domain, dev);
482 }
483
484 static int zpci_refresh_all(struct zpci_dev *zdev)
485 {
486         return zpci_refresh_trans((u64)zdev->fh << 32, zdev->start_dma,
487                                   zdev->end_dma - zdev->start_dma + 1);
488 }
489
490 static void s390_iommu_flush_iotlb_all(struct iommu_domain *domain)
491 {
492         struct s390_domain *s390_domain = to_s390_domain(domain);
493         struct zpci_dev *zdev;
494
495         rcu_read_lock();
496         list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
497                 atomic64_inc(&s390_domain->ctrs.global_rpcits);
498                 zpci_refresh_all(zdev);
499         }
500         rcu_read_unlock();
501 }
502
503 static void s390_iommu_iotlb_sync(struct iommu_domain *domain,
504                                   struct iommu_iotlb_gather *gather)
505 {
506         struct s390_domain *s390_domain = to_s390_domain(domain);
507         size_t size = gather->end - gather->start + 1;
508         struct zpci_dev *zdev;
509
510         /* If gather was never added to there is nothing to flush */
511         if (!gather->end)
512                 return;
513
514         rcu_read_lock();
515         list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
516                 atomic64_inc(&s390_domain->ctrs.sync_rpcits);
517                 zpci_refresh_trans((u64)zdev->fh << 32, gather->start,
518                                    size);
519         }
520         rcu_read_unlock();
521 }
522
523 static int s390_iommu_iotlb_sync_map(struct iommu_domain *domain,
524                                      unsigned long iova, size_t size)
525 {
526         struct s390_domain *s390_domain = to_s390_domain(domain);
527         struct zpci_dev *zdev;
528         int ret = 0;
529
530         rcu_read_lock();
531         list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
532                 if (!zdev->tlb_refresh)
533                         continue;
534                 atomic64_inc(&s390_domain->ctrs.sync_map_rpcits);
535                 ret = zpci_refresh_trans((u64)zdev->fh << 32,
536                                          iova, size);
537                 /*
538                  * let the hypervisor discover invalidated entries
539                  * allowing it to free IOVAs and unpin pages
540                  */
541                 if (ret == -ENOMEM) {
542                         ret = zpci_refresh_all(zdev);
543                         if (ret)
544                                 break;
545                 }
546         }
547         rcu_read_unlock();
548
549         return ret;
550 }
551
552 static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
553                                      phys_addr_t pa, dma_addr_t dma_addr,
554                                      unsigned long nr_pages, int flags,
555                                      gfp_t gfp)
556 {
557         phys_addr_t page_addr = pa & PAGE_MASK;
558         unsigned long *entry;
559         unsigned long i;
560         int rc;
561
562         for (i = 0; i < nr_pages; i++) {
563                 entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr,
564                                            gfp);
565                 if (unlikely(!entry)) {
566                         rc = -ENOMEM;
567                         goto undo_cpu_trans;
568                 }
569                 dma_update_cpu_trans(entry, page_addr, flags);
570                 page_addr += PAGE_SIZE;
571                 dma_addr += PAGE_SIZE;
572         }
573
574         return 0;
575
576 undo_cpu_trans:
577         while (i-- > 0) {
578                 dma_addr -= PAGE_SIZE;
579                 entry = dma_walk_cpu_trans(s390_domain->dma_table,
580                                            dma_addr, gfp);
581                 if (!entry)
582                         break;
583                 dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID);
584         }
585
586         return rc;
587 }
588
589 static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain,
590                                        dma_addr_t dma_addr, unsigned long nr_pages)
591 {
592         unsigned long *entry;
593         unsigned long i;
594         int rc = 0;
595
596         for (i = 0; i < nr_pages; i++) {
597                 entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr,
598                                            GFP_ATOMIC);
599                 if (unlikely(!entry)) {
600                         rc = -EINVAL;
601                         break;
602                 }
603                 dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID);
604                 dma_addr += PAGE_SIZE;
605         }
606
607         return rc;
608 }
609
610 static int s390_iommu_map_pages(struct iommu_domain *domain,
611                                 unsigned long iova, phys_addr_t paddr,
612                                 size_t pgsize, size_t pgcount,
613                                 int prot, gfp_t gfp, size_t *mapped)
614 {
615         struct s390_domain *s390_domain = to_s390_domain(domain);
616         size_t size = pgcount << __ffs(pgsize);
617         int flags = ZPCI_PTE_VALID, rc = 0;
618
619         if (pgsize != SZ_4K)
620                 return -EINVAL;
621
622         if (iova < s390_domain->domain.geometry.aperture_start ||
623             (iova + size - 1) > s390_domain->domain.geometry.aperture_end)
624                 return -EINVAL;
625
626         if (!IS_ALIGNED(iova | paddr, pgsize))
627                 return -EINVAL;
628
629         if (!(prot & IOMMU_WRITE))
630                 flags |= ZPCI_TABLE_PROTECTED;
631
632         rc = s390_iommu_validate_trans(s390_domain, paddr, iova,
633                                      pgcount, flags, gfp);
634         if (!rc) {
635                 *mapped = size;
636                 atomic64_add(pgcount, &s390_domain->ctrs.mapped_pages);
637         }
638
639         return rc;
640 }
641
642 static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
643                                            dma_addr_t iova)
644 {
645         struct s390_domain *s390_domain = to_s390_domain(domain);
646         unsigned long *rto, *sto, *pto;
647         unsigned long ste, pte, rte;
648         unsigned int rtx, sx, px;
649         phys_addr_t phys = 0;
650
651         if (iova < domain->geometry.aperture_start ||
652             iova > domain->geometry.aperture_end)
653                 return 0;
654
655         rtx = calc_rtx(iova);
656         sx = calc_sx(iova);
657         px = calc_px(iova);
658         rto = s390_domain->dma_table;
659
660         rte = READ_ONCE(rto[rtx]);
661         if (reg_entry_isvalid(rte)) {
662                 sto = get_rt_sto(rte);
663                 ste = READ_ONCE(sto[sx]);
664                 if (reg_entry_isvalid(ste)) {
665                         pto = get_st_pto(ste);
666                         pte = READ_ONCE(pto[px]);
667                         if (pt_entry_isvalid(pte))
668                                 phys = pte & ZPCI_PTE_ADDR_MASK;
669                 }
670         }
671
672         return phys;
673 }
674
675 static size_t s390_iommu_unmap_pages(struct iommu_domain *domain,
676                                      unsigned long iova,
677                                      size_t pgsize, size_t pgcount,
678                                      struct iommu_iotlb_gather *gather)
679 {
680         struct s390_domain *s390_domain = to_s390_domain(domain);
681         size_t size = pgcount << __ffs(pgsize);
682         int rc;
683
684         if (WARN_ON(iova < s390_domain->domain.geometry.aperture_start ||
685             (iova + size - 1) > s390_domain->domain.geometry.aperture_end))
686                 return 0;
687
688         rc = s390_iommu_invalidate_trans(s390_domain, iova, pgcount);
689         if (rc)
690                 return 0;
691
692         iommu_iotlb_gather_add_range(gather, iova, size);
693         atomic64_add(pgcount, &s390_domain->ctrs.unmapped_pages);
694
695         return size;
696 }
697
698 static void s390_iommu_probe_finalize(struct device *dev)
699 {
700         iommu_setup_dma_ops(dev, 0, U64_MAX);
701 }
702
703 struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev)
704 {
705         if (!zdev || !zdev->s390_domain)
706                 return NULL;
707         return &zdev->s390_domain->ctrs;
708 }
709
710 int zpci_init_iommu(struct zpci_dev *zdev)
711 {
712         u64 aperture_size;
713         int rc = 0;
714
715         rc = iommu_device_sysfs_add(&zdev->iommu_dev, NULL, NULL,
716                                     "s390-iommu.%08x", zdev->fid);
717         if (rc)
718                 goto out_err;
719
720         rc = iommu_device_register(&zdev->iommu_dev, &s390_iommu_ops, NULL);
721         if (rc)
722                 goto out_sysfs;
723
724         zdev->start_dma = PAGE_ALIGN(zdev->start_dma);
725         aperture_size = min3(s390_iommu_aperture,
726                              ZPCI_TABLE_SIZE_RT - zdev->start_dma,
727                              zdev->end_dma - zdev->start_dma + 1);
728         zdev->end_dma = zdev->start_dma + aperture_size - 1;
729
730         return 0;
731
732 out_sysfs:
733         iommu_device_sysfs_remove(&zdev->iommu_dev);
734
735 out_err:
736         return rc;
737 }
738
739 void zpci_destroy_iommu(struct zpci_dev *zdev)
740 {
741         iommu_device_unregister(&zdev->iommu_dev);
742         iommu_device_sysfs_remove(&zdev->iommu_dev);
743 }
744
745 static int __init s390_iommu_setup(char *str)
746 {
747         if (!strcmp(str, "strict")) {
748                 pr_warn("s390_iommu=strict deprecated; use iommu.strict=1 instead\n");
749                 iommu_set_dma_strict();
750         }
751         return 1;
752 }
753
754 __setup("s390_iommu=", s390_iommu_setup);
755
756 static int __init s390_iommu_aperture_setup(char *str)
757 {
758         if (kstrtou32(str, 10, &s390_iommu_aperture_factor))
759                 s390_iommu_aperture_factor = 1;
760         return 1;
761 }
762
763 __setup("s390_iommu_aperture=", s390_iommu_aperture_setup);
764
765 static int __init s390_iommu_init(void)
766 {
767         int rc;
768
769         iommu_dma_forcedac = true;
770         s390_iommu_aperture = (u64)virt_to_phys(high_memory);
771         if (!s390_iommu_aperture_factor)
772                 s390_iommu_aperture = ULONG_MAX;
773         else
774                 s390_iommu_aperture *= s390_iommu_aperture_factor;
775
776         rc = dma_alloc_cpu_table_caches();
777         if (rc)
778                 return rc;
779
780         return rc;
781 }
782 subsys_initcall(s390_iommu_init);
783
784 static const struct iommu_ops s390_iommu_ops = {
785         .capable = s390_iommu_capable,
786         .domain_alloc_paging = s390_domain_alloc_paging,
787         .probe_device = s390_iommu_probe_device,
788         .probe_finalize = s390_iommu_probe_finalize,
789         .release_device = s390_iommu_release_device,
790         .device_group = generic_device_group,
791         .pgsize_bitmap = SZ_4K,
792         .get_resv_regions = s390_iommu_get_resv_regions,
793         .default_domain_ops = &(const struct iommu_domain_ops) {
794                 .attach_dev     = s390_iommu_attach_device,
795                 .map_pages      = s390_iommu_map_pages,
796                 .unmap_pages    = s390_iommu_unmap_pages,
797                 .flush_iotlb_all = s390_iommu_flush_iotlb_all,
798                 .iotlb_sync      = s390_iommu_iotlb_sync,
799                 .iotlb_sync_map  = s390_iommu_iotlb_sync_map,
800                 .iova_to_phys   = s390_iommu_iova_to_phys,
801                 .free           = s390_domain_free,
802         }
803 };