Linux 6.9-rc1
[linux-2.6-microblaze.git] / arch / s390 / mm / hugetlbpage.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  IBM System z Huge TLB Page Support for Kernel.
4  *
5  *    Copyright IBM Corp. 2007,2020
6  *    Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
7  */
8
9 #define KMSG_COMPONENT "hugetlb"
10 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
11
12 #include <asm/pgalloc.h>
13 #include <linux/mm.h>
14 #include <linux/hugetlb.h>
15 #include <linux/mman.h>
16 #include <linux/sched/mm.h>
17 #include <linux/security.h>
18
19 /*
20  * If the bit selected by single-bit bitmask "a" is set within "x", move
21  * it to the position indicated by single-bit bitmask "b".
22  */
23 #define move_set_bit(x, a, b)   (((x) & (a)) >> ilog2(a) << ilog2(b))
24
25 static inline unsigned long __pte_to_rste(pte_t pte)
26 {
27         unsigned long rste;
28
29         /*
30          * Convert encoding               pte bits      pmd / pud bits
31          *                              lIR.uswrdy.p    dy..R...I...wr
32          * empty                        010.000000.0 -> 00..0...1...00
33          * prot-none, clean, old        111.000000.1 -> 00..1...1...00
34          * prot-none, clean, young      111.000001.1 -> 01..1...1...00
35          * prot-none, dirty, old        111.000010.1 -> 10..1...1...00
36          * prot-none, dirty, young      111.000011.1 -> 11..1...1...00
37          * read-only, clean, old        111.000100.1 -> 00..1...1...01
38          * read-only, clean, young      101.000101.1 -> 01..1...0...01
39          * read-only, dirty, old        111.000110.1 -> 10..1...1...01
40          * read-only, dirty, young      101.000111.1 -> 11..1...0...01
41          * read-write, clean, old       111.001100.1 -> 00..1...1...11
42          * read-write, clean, young     101.001101.1 -> 01..1...0...11
43          * read-write, dirty, old       110.001110.1 -> 10..0...1...11
44          * read-write, dirty, young     100.001111.1 -> 11..0...0...11
45          * HW-bits: R read-only, I invalid
46          * SW-bits: p present, y young, d dirty, r read, w write, s special,
47          *          u unused, l large
48          */
49         if (pte_present(pte)) {
50                 rste = pte_val(pte) & PAGE_MASK;
51                 rste |= move_set_bit(pte_val(pte), _PAGE_READ,
52                                      _SEGMENT_ENTRY_READ);
53                 rste |= move_set_bit(pte_val(pte), _PAGE_WRITE,
54                                      _SEGMENT_ENTRY_WRITE);
55                 rste |= move_set_bit(pte_val(pte), _PAGE_INVALID,
56                                      _SEGMENT_ENTRY_INVALID);
57                 rste |= move_set_bit(pte_val(pte), _PAGE_PROTECT,
58                                      _SEGMENT_ENTRY_PROTECT);
59                 rste |= move_set_bit(pte_val(pte), _PAGE_DIRTY,
60                                      _SEGMENT_ENTRY_DIRTY);
61                 rste |= move_set_bit(pte_val(pte), _PAGE_YOUNG,
62                                      _SEGMENT_ENTRY_YOUNG);
63 #ifdef CONFIG_MEM_SOFT_DIRTY
64                 rste |= move_set_bit(pte_val(pte), _PAGE_SOFT_DIRTY,
65                                      _SEGMENT_ENTRY_SOFT_DIRTY);
66 #endif
67                 rste |= move_set_bit(pte_val(pte), _PAGE_NOEXEC,
68                                      _SEGMENT_ENTRY_NOEXEC);
69         } else
70                 rste = _SEGMENT_ENTRY_EMPTY;
71         return rste;
72 }
73
74 static inline pte_t __rste_to_pte(unsigned long rste)
75 {
76         unsigned long pteval;
77         int present;
78
79         if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
80                 present = pud_present(__pud(rste));
81         else
82                 present = pmd_present(__pmd(rste));
83
84         /*
85          * Convert encoding             pmd / pud bits      pte bits
86          *                              dy..R...I...wr    lIR.uswrdy.p
87          * empty                        00..0...1...00 -> 010.000000.0
88          * prot-none, clean, old        00..1...1...00 -> 111.000000.1
89          * prot-none, clean, young      01..1...1...00 -> 111.000001.1
90          * prot-none, dirty, old        10..1...1...00 -> 111.000010.1
91          * prot-none, dirty, young      11..1...1...00 -> 111.000011.1
92          * read-only, clean, old        00..1...1...01 -> 111.000100.1
93          * read-only, clean, young      01..1...0...01 -> 101.000101.1
94          * read-only, dirty, old        10..1...1...01 -> 111.000110.1
95          * read-only, dirty, young      11..1...0...01 -> 101.000111.1
96          * read-write, clean, old       00..1...1...11 -> 111.001100.1
97          * read-write, clean, young     01..1...0...11 -> 101.001101.1
98          * read-write, dirty, old       10..0...1...11 -> 110.001110.1
99          * read-write, dirty, young     11..0...0...11 -> 100.001111.1
100          * HW-bits: R read-only, I invalid
101          * SW-bits: p present, y young, d dirty, r read, w write, s special,
102          *          u unused, l large
103          */
104         if (present) {
105                 pteval = rste & _SEGMENT_ENTRY_ORIGIN_LARGE;
106                 pteval |= _PAGE_LARGE | _PAGE_PRESENT;
107                 pteval |= move_set_bit(rste, _SEGMENT_ENTRY_READ, _PAGE_READ);
108                 pteval |= move_set_bit(rste, _SEGMENT_ENTRY_WRITE, _PAGE_WRITE);
109                 pteval |= move_set_bit(rste, _SEGMENT_ENTRY_INVALID, _PAGE_INVALID);
110                 pteval |= move_set_bit(rste, _SEGMENT_ENTRY_PROTECT, _PAGE_PROTECT);
111                 pteval |= move_set_bit(rste, _SEGMENT_ENTRY_DIRTY, _PAGE_DIRTY);
112                 pteval |= move_set_bit(rste, _SEGMENT_ENTRY_YOUNG, _PAGE_YOUNG);
113 #ifdef CONFIG_MEM_SOFT_DIRTY
114                 pteval |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, _PAGE_SOFT_DIRTY);
115 #endif
116                 pteval |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC, _PAGE_NOEXEC);
117         } else
118                 pteval = _PAGE_INVALID;
119         return __pte(pteval);
120 }
121
122 static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste)
123 {
124         struct page *page;
125         unsigned long size, paddr;
126
127         if (!mm_uses_skeys(mm) ||
128             rste & _SEGMENT_ENTRY_INVALID)
129                 return;
130
131         if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) {
132                 page = pud_page(__pud(rste));
133                 size = PUD_SIZE;
134                 paddr = rste & PUD_MASK;
135         } else {
136                 page = pmd_page(__pmd(rste));
137                 size = PMD_SIZE;
138                 paddr = rste & PMD_MASK;
139         }
140
141         if (!test_and_set_bit(PG_arch_1, &page->flags))
142                 __storage_key_init_range(paddr, paddr + size - 1);
143 }
144
145 void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
146                      pte_t *ptep, pte_t pte)
147 {
148         unsigned long rste;
149
150         rste = __pte_to_rste(pte);
151         if (!MACHINE_HAS_NX)
152                 rste &= ~_SEGMENT_ENTRY_NOEXEC;
153
154         /* Set correct table type for 2G hugepages */
155         if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) {
156                 if (likely(pte_present(pte)))
157                         rste |= _REGION3_ENTRY_LARGE;
158                 rste |= _REGION_ENTRY_TYPE_R3;
159         } else if (likely(pte_present(pte)))
160                 rste |= _SEGMENT_ENTRY_LARGE;
161
162         clear_huge_pte_skeys(mm, rste);
163         set_pte(ptep, __pte(rste));
164 }
165
166 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
167                      pte_t *ptep, pte_t pte, unsigned long sz)
168 {
169         __set_huge_pte_at(mm, addr, ptep, pte);
170 }
171
172 pte_t huge_ptep_get(pte_t *ptep)
173 {
174         return __rste_to_pte(pte_val(*ptep));
175 }
176
177 pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
178                               unsigned long addr, pte_t *ptep)
179 {
180         pte_t pte = huge_ptep_get(ptep);
181         pmd_t *pmdp = (pmd_t *) ptep;
182         pud_t *pudp = (pud_t *) ptep;
183
184         if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
185                 pudp_xchg_direct(mm, addr, pudp, __pud(_REGION3_ENTRY_EMPTY));
186         else
187                 pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
188         return pte;
189 }
190
191 pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
192                         unsigned long addr, unsigned long sz)
193 {
194         pgd_t *pgdp;
195         p4d_t *p4dp;
196         pud_t *pudp;
197         pmd_t *pmdp = NULL;
198
199         pgdp = pgd_offset(mm, addr);
200         p4dp = p4d_alloc(mm, pgdp, addr);
201         if (p4dp) {
202                 pudp = pud_alloc(mm, p4dp, addr);
203                 if (pudp) {
204                         if (sz == PUD_SIZE)
205                                 return (pte_t *) pudp;
206                         else if (sz == PMD_SIZE)
207                                 pmdp = pmd_alloc(mm, pudp, addr);
208                 }
209         }
210         return (pte_t *) pmdp;
211 }
212
213 pte_t *huge_pte_offset(struct mm_struct *mm,
214                        unsigned long addr, unsigned long sz)
215 {
216         pgd_t *pgdp;
217         p4d_t *p4dp;
218         pud_t *pudp;
219         pmd_t *pmdp = NULL;
220
221         pgdp = pgd_offset(mm, addr);
222         if (pgd_present(*pgdp)) {
223                 p4dp = p4d_offset(pgdp, addr);
224                 if (p4d_present(*p4dp)) {
225                         pudp = pud_offset(p4dp, addr);
226                         if (pud_present(*pudp)) {
227                                 if (pud_leaf(*pudp))
228                                         return (pte_t *) pudp;
229                                 pmdp = pmd_offset(pudp, addr);
230                         }
231                 }
232         }
233         return (pte_t *) pmdp;
234 }
235
236 int pmd_huge(pmd_t pmd)
237 {
238         return pmd_leaf(pmd);
239 }
240
241 int pud_huge(pud_t pud)
242 {
243         return pud_leaf(pud);
244 }
245
246 bool __init arch_hugetlb_valid_size(unsigned long size)
247 {
248         if (MACHINE_HAS_EDAT1 && size == PMD_SIZE)
249                 return true;
250         else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE)
251                 return true;
252         else
253                 return false;
254 }
255
256 static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
257                 unsigned long addr, unsigned long len,
258                 unsigned long pgoff, unsigned long flags)
259 {
260         struct hstate *h = hstate_file(file);
261         struct vm_unmapped_area_info info;
262
263         info.flags = 0;
264         info.length = len;
265         info.low_limit = current->mm->mmap_base;
266         info.high_limit = TASK_SIZE;
267         info.align_mask = PAGE_MASK & ~huge_page_mask(h);
268         info.align_offset = 0;
269         return vm_unmapped_area(&info);
270 }
271
272 static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
273                 unsigned long addr0, unsigned long len,
274                 unsigned long pgoff, unsigned long flags)
275 {
276         struct hstate *h = hstate_file(file);
277         struct vm_unmapped_area_info info;
278         unsigned long addr;
279
280         info.flags = VM_UNMAPPED_AREA_TOPDOWN;
281         info.length = len;
282         info.low_limit = PAGE_SIZE;
283         info.high_limit = current->mm->mmap_base;
284         info.align_mask = PAGE_MASK & ~huge_page_mask(h);
285         info.align_offset = 0;
286         addr = vm_unmapped_area(&info);
287
288         /*
289          * A failed mmap() very likely causes application failure,
290          * so fall back to the bottom-up function here. This scenario
291          * can happen with large stack limits and large mmap()
292          * allocations.
293          */
294         if (addr & ~PAGE_MASK) {
295                 VM_BUG_ON(addr != -ENOMEM);
296                 info.flags = 0;
297                 info.low_limit = TASK_UNMAPPED_BASE;
298                 info.high_limit = TASK_SIZE;
299                 addr = vm_unmapped_area(&info);
300         }
301
302         return addr;
303 }
304
305 unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
306                 unsigned long len, unsigned long pgoff, unsigned long flags)
307 {
308         struct hstate *h = hstate_file(file);
309         struct mm_struct *mm = current->mm;
310         struct vm_area_struct *vma;
311
312         if (len & ~huge_page_mask(h))
313                 return -EINVAL;
314         if (len > TASK_SIZE - mmap_min_addr)
315                 return -ENOMEM;
316
317         if (flags & MAP_FIXED) {
318                 if (prepare_hugepage_range(file, addr, len))
319                         return -EINVAL;
320                 goto check_asce_limit;
321         }
322
323         if (addr) {
324                 addr = ALIGN(addr, huge_page_size(h));
325                 vma = find_vma(mm, addr);
326                 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
327                     (!vma || addr + len <= vm_start_gap(vma)))
328                         goto check_asce_limit;
329         }
330
331         if (mm->get_unmapped_area == arch_get_unmapped_area)
332                 addr = hugetlb_get_unmapped_area_bottomup(file, addr, len,
333                                 pgoff, flags);
334         else
335                 addr = hugetlb_get_unmapped_area_topdown(file, addr, len,
336                                 pgoff, flags);
337         if (offset_in_page(addr))
338                 return addr;
339
340 check_asce_limit:
341         return check_asce_limit(mm, addr, len);
342 }