Merge tag 'defconfig-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc
[linux-2.6-microblaze.git] / arch / s390 / mm / vmem.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *    Copyright IBM Corp. 2006
4  *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
5  */
6
7 #include <linux/memory_hotplug.h>
8 #include <linux/memblock.h>
9 #include <linux/pfn.h>
10 #include <linux/mm.h>
11 #include <linux/init.h>
12 #include <linux/list.h>
13 #include <linux/hugetlb.h>
14 #include <linux/slab.h>
15 #include <asm/cacheflush.h>
16 #include <asm/pgalloc.h>
17 #include <asm/setup.h>
18 #include <asm/tlbflush.h>
19 #include <asm/sections.h>
20 #include <asm/set_memory.h>
21
22 static DEFINE_MUTEX(vmem_mutex);
23
24 static void __ref *vmem_alloc_pages(unsigned int order)
25 {
26         unsigned long size = PAGE_SIZE << order;
27
28         if (slab_is_available())
29                 return (void *)__get_free_pages(GFP_KERNEL, order);
30         return memblock_alloc(size, size);
31 }
32
33 static void vmem_free_pages(unsigned long addr, int order)
34 {
35         /* We don't expect boot memory to be removed ever. */
36         if (!slab_is_available() ||
37             WARN_ON_ONCE(PageReserved(virt_to_page(addr))))
38                 return;
39         free_pages(addr, order);
40 }
41
42 void *vmem_crst_alloc(unsigned long val)
43 {
44         unsigned long *table;
45
46         table = vmem_alloc_pages(CRST_ALLOC_ORDER);
47         if (table)
48                 crst_table_init(table, val);
49         return table;
50 }
51
52 pte_t __ref *vmem_pte_alloc(void)
53 {
54         unsigned long size = PTRS_PER_PTE * sizeof(pte_t);
55         pte_t *pte;
56
57         if (slab_is_available())
58                 pte = (pte_t *) page_table_alloc(&init_mm);
59         else
60                 pte = (pte_t *) memblock_alloc(size, size);
61         if (!pte)
62                 return NULL;
63         memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
64         return pte;
65 }
66
67 static void vmem_pte_free(unsigned long *table)
68 {
69         /* We don't expect boot memory to be removed ever. */
70         if (!slab_is_available() ||
71             WARN_ON_ONCE(PageReserved(virt_to_page(table))))
72                 return;
73         page_table_free(&init_mm, table);
74 }
75
76 #define PAGE_UNUSED 0xFD
77
78 /*
79  * The unused vmemmap range, which was not yet memset(PAGE_UNUSED) ranges
80  * from unused_sub_pmd_start to next PMD_SIZE boundary.
81  */
82 static unsigned long unused_sub_pmd_start;
83
84 static void vmemmap_flush_unused_sub_pmd(void)
85 {
86         if (!unused_sub_pmd_start)
87                 return;
88         memset((void *)unused_sub_pmd_start, PAGE_UNUSED,
89                ALIGN(unused_sub_pmd_start, PMD_SIZE) - unused_sub_pmd_start);
90         unused_sub_pmd_start = 0;
91 }
92
93 static void vmemmap_mark_sub_pmd_used(unsigned long start, unsigned long end)
94 {
95         /*
96          * As we expect to add in the same granularity as we remove, it's
97          * sufficient to mark only some piece used to block the memmap page from
98          * getting removed (just in case the memmap never gets initialized,
99          * e.g., because the memory block never gets onlined).
100          */
101         memset((void *)start, 0, sizeof(struct page));
102 }
103
104 static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end)
105 {
106         /*
107          * We only optimize if the new used range directly follows the
108          * previously unused range (esp., when populating consecutive sections).
109          */
110         if (unused_sub_pmd_start == start) {
111                 unused_sub_pmd_start = end;
112                 if (likely(IS_ALIGNED(unused_sub_pmd_start, PMD_SIZE)))
113                         unused_sub_pmd_start = 0;
114                 return;
115         }
116         vmemmap_flush_unused_sub_pmd();
117         vmemmap_mark_sub_pmd_used(start, end);
118 }
119
120 static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end)
121 {
122         unsigned long page = ALIGN_DOWN(start, PMD_SIZE);
123
124         vmemmap_flush_unused_sub_pmd();
125
126         /* Could be our memmap page is filled with PAGE_UNUSED already ... */
127         vmemmap_mark_sub_pmd_used(start, end);
128
129         /* Mark the unused parts of the new memmap page PAGE_UNUSED. */
130         if (!IS_ALIGNED(start, PMD_SIZE))
131                 memset((void *)page, PAGE_UNUSED, start - page);
132         /*
133          * We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of
134          * consecutive sections. Remember for the last added PMD the last
135          * unused range in the populated PMD.
136          */
137         if (!IS_ALIGNED(end, PMD_SIZE))
138                 unused_sub_pmd_start = end;
139 }
140
141 /* Returns true if the PMD is completely unused and can be freed. */
142 static bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end)
143 {
144         unsigned long page = ALIGN_DOWN(start, PMD_SIZE);
145
146         vmemmap_flush_unused_sub_pmd();
147         memset((void *)start, PAGE_UNUSED, end - start);
148         return !memchr_inv((void *)page, PAGE_UNUSED, PMD_SIZE);
149 }
150
151 /* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
152 static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
153                                   unsigned long end, bool add, bool direct)
154 {
155         unsigned long prot, pages = 0;
156         int ret = -ENOMEM;
157         pte_t *pte;
158
159         prot = pgprot_val(PAGE_KERNEL);
160         if (!MACHINE_HAS_NX)
161                 prot &= ~_PAGE_NOEXEC;
162
163         pte = pte_offset_kernel(pmd, addr);
164         for (; addr < end; addr += PAGE_SIZE, pte++) {
165                 if (!add) {
166                         if (pte_none(*pte))
167                                 continue;
168                         if (!direct)
169                                 vmem_free_pages((unsigned long) pfn_to_virt(pte_pfn(*pte)), 0);
170                         pte_clear(&init_mm, addr, pte);
171                 } else if (pte_none(*pte)) {
172                         if (!direct) {
173                                 void *new_page = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE);
174
175                                 if (!new_page)
176                                         goto out;
177                                 pte_val(*pte) = __pa(new_page) | prot;
178                         } else {
179                                 pte_val(*pte) = __pa(addr) | prot;
180                         }
181                 } else {
182                         continue;
183                 }
184                 pages++;
185         }
186         ret = 0;
187 out:
188         if (direct)
189                 update_page_count(PG_DIRECT_MAP_4K, add ? pages : -pages);
190         return ret;
191 }
192
193 static void try_free_pte_table(pmd_t *pmd, unsigned long start)
194 {
195         pte_t *pte;
196         int i;
197
198         /* We can safely assume this is fully in 1:1 mapping & vmemmap area */
199         pte = pte_offset_kernel(pmd, start);
200         for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
201                 if (!pte_none(*pte))
202                         return;
203         }
204         vmem_pte_free((unsigned long *) pmd_deref(*pmd));
205         pmd_clear(pmd);
206 }
207
208 /* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
209 static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
210                                   unsigned long end, bool add, bool direct)
211 {
212         unsigned long next, prot, pages = 0;
213         int ret = -ENOMEM;
214         pmd_t *pmd;
215         pte_t *pte;
216
217         prot = pgprot_val(SEGMENT_KERNEL);
218         if (!MACHINE_HAS_NX)
219                 prot &= ~_SEGMENT_ENTRY_NOEXEC;
220
221         pmd = pmd_offset(pud, addr);
222         for (; addr < end; addr = next, pmd++) {
223                 next = pmd_addr_end(addr, end);
224                 if (!add) {
225                         if (pmd_none(*pmd))
226                                 continue;
227                         if (pmd_large(*pmd)) {
228                                 if (IS_ALIGNED(addr, PMD_SIZE) &&
229                                     IS_ALIGNED(next, PMD_SIZE)) {
230                                         if (!direct)
231                                                 vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
232                                         pmd_clear(pmd);
233                                         pages++;
234                                 } else if (!direct && vmemmap_unuse_sub_pmd(addr, next)) {
235                                         vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
236                                         pmd_clear(pmd);
237                                 }
238                                 continue;
239                         }
240                 } else if (pmd_none(*pmd)) {
241                         if (IS_ALIGNED(addr, PMD_SIZE) &&
242                             IS_ALIGNED(next, PMD_SIZE) &&
243                             MACHINE_HAS_EDAT1 && addr && direct &&
244                             !debug_pagealloc_enabled()) {
245                                 pmd_val(*pmd) = __pa(addr) | prot;
246                                 pages++;
247                                 continue;
248                         } else if (!direct && MACHINE_HAS_EDAT1) {
249                                 void *new_page;
250
251                                 /*
252                                  * Use 1MB frames for vmemmap if available. We
253                                  * always use large frames even if they are only
254                                  * partially used. Otherwise we would have also
255                                  * page tables since vmemmap_populate gets
256                                  * called for each section separately.
257                                  */
258                                 new_page = vmemmap_alloc_block(PMD_SIZE, NUMA_NO_NODE);
259                                 if (new_page) {
260                                         pmd_val(*pmd) = __pa(new_page) | prot;
261                                         if (!IS_ALIGNED(addr, PMD_SIZE) ||
262                                             !IS_ALIGNED(next, PMD_SIZE)) {
263                                                 vmemmap_use_new_sub_pmd(addr, next);
264                                         }
265                                         continue;
266                                 }
267                         }
268                         pte = vmem_pte_alloc();
269                         if (!pte)
270                                 goto out;
271                         pmd_populate(&init_mm, pmd, pte);
272                 } else if (pmd_large(*pmd)) {
273                         if (!direct)
274                                 vmemmap_use_sub_pmd(addr, next);
275                         continue;
276                 }
277                 ret = modify_pte_table(pmd, addr, next, add, direct);
278                 if (ret)
279                         goto out;
280                 if (!add)
281                         try_free_pte_table(pmd, addr & PMD_MASK);
282         }
283         ret = 0;
284 out:
285         if (direct)
286                 update_page_count(PG_DIRECT_MAP_1M, add ? pages : -pages);
287         return ret;
288 }
289
290 static void try_free_pmd_table(pud_t *pud, unsigned long start)
291 {
292         const unsigned long end = start + PUD_SIZE;
293         pmd_t *pmd;
294         int i;
295
296         /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
297         if (end > VMALLOC_START)
298                 return;
299 #ifdef CONFIG_KASAN
300         if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
301                 return;
302 #endif
303         pmd = pmd_offset(pud, start);
304         for (i = 0; i < PTRS_PER_PMD; i++, pmd++)
305                 if (!pmd_none(*pmd))
306                         return;
307         vmem_free_pages(pud_deref(*pud), CRST_ALLOC_ORDER);
308         pud_clear(pud);
309 }
310
311 static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
312                             bool add, bool direct)
313 {
314         unsigned long next, prot, pages = 0;
315         int ret = -ENOMEM;
316         pud_t *pud;
317         pmd_t *pmd;
318
319         prot = pgprot_val(REGION3_KERNEL);
320         if (!MACHINE_HAS_NX)
321                 prot &= ~_REGION_ENTRY_NOEXEC;
322         pud = pud_offset(p4d, addr);
323         for (; addr < end; addr = next, pud++) {
324                 next = pud_addr_end(addr, end);
325                 if (!add) {
326                         if (pud_none(*pud))
327                                 continue;
328                         if (pud_large(*pud)) {
329                                 if (IS_ALIGNED(addr, PUD_SIZE) &&
330                                     IS_ALIGNED(next, PUD_SIZE)) {
331                                         pud_clear(pud);
332                                         pages++;
333                                 }
334                                 continue;
335                         }
336                 } else if (pud_none(*pud)) {
337                         if (IS_ALIGNED(addr, PUD_SIZE) &&
338                             IS_ALIGNED(next, PUD_SIZE) &&
339                             MACHINE_HAS_EDAT2 && addr && direct &&
340                             !debug_pagealloc_enabled()) {
341                                 pud_val(*pud) = __pa(addr) | prot;
342                                 pages++;
343                                 continue;
344                         }
345                         pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
346                         if (!pmd)
347                                 goto out;
348                         pud_populate(&init_mm, pud, pmd);
349                 } else if (pud_large(*pud)) {
350                         continue;
351                 }
352                 ret = modify_pmd_table(pud, addr, next, add, direct);
353                 if (ret)
354                         goto out;
355                 if (!add)
356                         try_free_pmd_table(pud, addr & PUD_MASK);
357         }
358         ret = 0;
359 out:
360         if (direct)
361                 update_page_count(PG_DIRECT_MAP_2G, add ? pages : -pages);
362         return ret;
363 }
364
365 static void try_free_pud_table(p4d_t *p4d, unsigned long start)
366 {
367         const unsigned long end = start + P4D_SIZE;
368         pud_t *pud;
369         int i;
370
371         /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
372         if (end > VMALLOC_START)
373                 return;
374 #ifdef CONFIG_KASAN
375         if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
376                 return;
377 #endif
378
379         pud = pud_offset(p4d, start);
380         for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
381                 if (!pud_none(*pud))
382                         return;
383         }
384         vmem_free_pages(p4d_deref(*p4d), CRST_ALLOC_ORDER);
385         p4d_clear(p4d);
386 }
387
388 static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end,
389                             bool add, bool direct)
390 {
391         unsigned long next;
392         int ret = -ENOMEM;
393         p4d_t *p4d;
394         pud_t *pud;
395
396         p4d = p4d_offset(pgd, addr);
397         for (; addr < end; addr = next, p4d++) {
398                 next = p4d_addr_end(addr, end);
399                 if (!add) {
400                         if (p4d_none(*p4d))
401                                 continue;
402                 } else if (p4d_none(*p4d)) {
403                         pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
404                         if (!pud)
405                                 goto out;
406                         p4d_populate(&init_mm, p4d, pud);
407                 }
408                 ret = modify_pud_table(p4d, addr, next, add, direct);
409                 if (ret)
410                         goto out;
411                 if (!add)
412                         try_free_pud_table(p4d, addr & P4D_MASK);
413         }
414         ret = 0;
415 out:
416         return ret;
417 }
418
419 static void try_free_p4d_table(pgd_t *pgd, unsigned long start)
420 {
421         const unsigned long end = start + PGDIR_SIZE;
422         p4d_t *p4d;
423         int i;
424
425         /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
426         if (end > VMALLOC_START)
427                 return;
428 #ifdef CONFIG_KASAN
429         if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
430                 return;
431 #endif
432
433         p4d = p4d_offset(pgd, start);
434         for (i = 0; i < PTRS_PER_P4D; i++, p4d++) {
435                 if (!p4d_none(*p4d))
436                         return;
437         }
438         vmem_free_pages(pgd_deref(*pgd), CRST_ALLOC_ORDER);
439         pgd_clear(pgd);
440 }
441
442 static int modify_pagetable(unsigned long start, unsigned long end, bool add,
443                             bool direct)
444 {
445         unsigned long addr, next;
446         int ret = -ENOMEM;
447         pgd_t *pgd;
448         p4d_t *p4d;
449
450         if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end)))
451                 return -EINVAL;
452         for (addr = start; addr < end; addr = next) {
453                 next = pgd_addr_end(addr, end);
454                 pgd = pgd_offset_k(addr);
455
456                 if (!add) {
457                         if (pgd_none(*pgd))
458                                 continue;
459                 } else if (pgd_none(*pgd)) {
460                         p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
461                         if (!p4d)
462                                 goto out;
463                         pgd_populate(&init_mm, pgd, p4d);
464                 }
465                 ret = modify_p4d_table(pgd, addr, next, add, direct);
466                 if (ret)
467                         goto out;
468                 if (!add)
469                         try_free_p4d_table(pgd, addr & PGDIR_MASK);
470         }
471         ret = 0;
472 out:
473         if (!add)
474                 flush_tlb_kernel_range(start, end);
475         return ret;
476 }
477
478 static int add_pagetable(unsigned long start, unsigned long end, bool direct)
479 {
480         return modify_pagetable(start, end, true, direct);
481 }
482
483 static int remove_pagetable(unsigned long start, unsigned long end, bool direct)
484 {
485         return modify_pagetable(start, end, false, direct);
486 }
487
488 /*
489  * Add a physical memory range to the 1:1 mapping.
490  */
491 static int vmem_add_range(unsigned long start, unsigned long size)
492 {
493         return add_pagetable(start, start + size, true);
494 }
495
496 /*
497  * Remove a physical memory range from the 1:1 mapping.
498  */
499 static void vmem_remove_range(unsigned long start, unsigned long size)
500 {
501         remove_pagetable(start, start + size, true);
502 }
503
504 /*
505  * Add a backed mem_map array to the virtual mem_map array.
506  */
507 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
508                                struct vmem_altmap *altmap)
509 {
510         int ret;
511
512         mutex_lock(&vmem_mutex);
513         /* We don't care about the node, just use NUMA_NO_NODE on allocations */
514         ret = add_pagetable(start, end, false);
515         if (ret)
516                 remove_pagetable(start, end, false);
517         mutex_unlock(&vmem_mutex);
518         return ret;
519 }
520
521 void vmemmap_free(unsigned long start, unsigned long end,
522                   struct vmem_altmap *altmap)
523 {
524         mutex_lock(&vmem_mutex);
525         remove_pagetable(start, end, false);
526         mutex_unlock(&vmem_mutex);
527 }
528
529 void vmem_remove_mapping(unsigned long start, unsigned long size)
530 {
531         mutex_lock(&vmem_mutex);
532         vmem_remove_range(start, size);
533         mutex_unlock(&vmem_mutex);
534 }
535
536 struct range arch_get_mappable_range(void)
537 {
538         struct range mhp_range;
539
540         mhp_range.start = 0;
541         mhp_range.end =  VMEM_MAX_PHYS - 1;
542         return mhp_range;
543 }
544
545 int vmem_add_mapping(unsigned long start, unsigned long size)
546 {
547         struct range range = arch_get_mappable_range();
548         int ret;
549
550         if (start < range.start ||
551             start + size > range.end + 1 ||
552             start + size < start)
553                 return -ERANGE;
554
555         mutex_lock(&vmem_mutex);
556         ret = vmem_add_range(start, size);
557         if (ret)
558                 vmem_remove_range(start, size);
559         mutex_unlock(&vmem_mutex);
560         return ret;
561 }
562
563 /*
564  * map whole physical memory to virtual memory (identity mapping)
565  * we reserve enough space in the vmalloc area for vmemmap to hotplug
566  * additional memory segments.
567  */
568 void __init vmem_map_init(void)
569 {
570         phys_addr_t base, end;
571         u64 i;
572
573         for_each_mem_range(i, &base, &end)
574                 vmem_add_range(base, end - base);
575         __set_memory((unsigned long)_stext,
576                      (unsigned long)(_etext - _stext) >> PAGE_SHIFT,
577                      SET_MEMORY_RO | SET_MEMORY_X);
578         __set_memory((unsigned long)_etext,
579                      (unsigned long)(__end_rodata - _etext) >> PAGE_SHIFT,
580                      SET_MEMORY_RO);
581         __set_memory((unsigned long)_sinittext,
582                      (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT,
583                      SET_MEMORY_RO | SET_MEMORY_X);
584         __set_memory(__stext_amode31, (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT,
585                      SET_MEMORY_RO | SET_MEMORY_X);
586
587         /* we need lowcore executable for our LPSWE instructions */
588         set_memory_x(0, 1);
589
590         pr_info("Write protected kernel read-only data: %luk\n",
591                 (unsigned long)(__end_rodata - _stext) >> 10);
592 }