ARM: s3c64xx: bring back notes from removed debug-macro.S
[linux-2.6-microblaze.git] / arch / s390 / mm / vmem.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *    Copyright IBM Corp. 2006
4  *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
5  */
6
7 #include <linux/memblock.h>
8 #include <linux/pfn.h>
9 #include <linux/mm.h>
10 #include <linux/init.h>
11 #include <linux/list.h>
12 #include <linux/hugetlb.h>
13 #include <linux/slab.h>
14 #include <asm/cacheflush.h>
15 #include <asm/pgalloc.h>
16 #include <asm/setup.h>
17 #include <asm/tlbflush.h>
18 #include <asm/sections.h>
19 #include <asm/set_memory.h>
20
21 static DEFINE_MUTEX(vmem_mutex);
22
23 static void __ref *vmem_alloc_pages(unsigned int order)
24 {
25         unsigned long size = PAGE_SIZE << order;
26
27         if (slab_is_available())
28                 return (void *)__get_free_pages(GFP_KERNEL, order);
29         return (void *) memblock_phys_alloc(size, size);
30 }
31
32 static void vmem_free_pages(unsigned long addr, int order)
33 {
34         /* We don't expect boot memory to be removed ever. */
35         if (!slab_is_available() ||
36             WARN_ON_ONCE(PageReserved(phys_to_page(addr))))
37                 return;
38         free_pages(addr, order);
39 }
40
41 void *vmem_crst_alloc(unsigned long val)
42 {
43         unsigned long *table;
44
45         table = vmem_alloc_pages(CRST_ALLOC_ORDER);
46         if (table)
47                 crst_table_init(table, val);
48         return table;
49 }
50
51 pte_t __ref *vmem_pte_alloc(void)
52 {
53         unsigned long size = PTRS_PER_PTE * sizeof(pte_t);
54         pte_t *pte;
55
56         if (slab_is_available())
57                 pte = (pte_t *) page_table_alloc(&init_mm);
58         else
59                 pte = (pte_t *) memblock_phys_alloc(size, size);
60         if (!pte)
61                 return NULL;
62         memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
63         return pte;
64 }
65
66 static void vmem_pte_free(unsigned long *table)
67 {
68         /* We don't expect boot memory to be removed ever. */
69         if (!slab_is_available() ||
70             WARN_ON_ONCE(PageReserved(virt_to_page(table))))
71                 return;
72         page_table_free(&init_mm, table);
73 }
74
75 #define PAGE_UNUSED 0xFD
76
77 /*
78  * The unused vmemmap range, which was not yet memset(PAGE_UNUSED) ranges
79  * from unused_pmd_start to next PMD_SIZE boundary.
80  */
81 static unsigned long unused_pmd_start;
82
83 static void vmemmap_flush_unused_pmd(void)
84 {
85         if (!unused_pmd_start)
86                 return;
87         memset(__va(unused_pmd_start), PAGE_UNUSED,
88                ALIGN(unused_pmd_start, PMD_SIZE) - unused_pmd_start);
89         unused_pmd_start = 0;
90 }
91
92 static void __vmemmap_use_sub_pmd(unsigned long start, unsigned long end)
93 {
94         /*
95          * As we expect to add in the same granularity as we remove, it's
96          * sufficient to mark only some piece used to block the memmap page from
97          * getting removed (just in case the memmap never gets initialized,
98          * e.g., because the memory block never gets onlined).
99          */
100         memset(__va(start), 0, sizeof(struct page));
101 }
102
103 static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end)
104 {
105         /*
106          * We only optimize if the new used range directly follows the
107          * previously unused range (esp., when populating consecutive sections).
108          */
109         if (unused_pmd_start == start) {
110                 unused_pmd_start = end;
111                 if (likely(IS_ALIGNED(unused_pmd_start, PMD_SIZE)))
112                         unused_pmd_start = 0;
113                 return;
114         }
115         vmemmap_flush_unused_pmd();
116         __vmemmap_use_sub_pmd(start, end);
117 }
118
119 static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end)
120 {
121         void *page = __va(ALIGN_DOWN(start, PMD_SIZE));
122
123         vmemmap_flush_unused_pmd();
124
125         /* Could be our memmap page is filled with PAGE_UNUSED already ... */
126         __vmemmap_use_sub_pmd(start, end);
127
128         /* Mark the unused parts of the new memmap page PAGE_UNUSED. */
129         if (!IS_ALIGNED(start, PMD_SIZE))
130                 memset(page, PAGE_UNUSED, start - __pa(page));
131         /*
132          * We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of
133          * consecutive sections. Remember for the last added PMD the last
134          * unused range in the populated PMD.
135          */
136         if (!IS_ALIGNED(end, PMD_SIZE))
137                 unused_pmd_start = end;
138 }
139
140 /* Returns true if the PMD is completely unused and can be freed. */
141 static bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end)
142 {
143         void *page = __va(ALIGN_DOWN(start, PMD_SIZE));
144
145         vmemmap_flush_unused_pmd();
146         memset(__va(start), PAGE_UNUSED, end - start);
147         return !memchr_inv(page, PAGE_UNUSED, PMD_SIZE);
148 }
149
150 /* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
151 static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
152                                   unsigned long end, bool add, bool direct)
153 {
154         unsigned long prot, pages = 0;
155         int ret = -ENOMEM;
156         pte_t *pte;
157
158         prot = pgprot_val(PAGE_KERNEL);
159         if (!MACHINE_HAS_NX)
160                 prot &= ~_PAGE_NOEXEC;
161
162         pte = pte_offset_kernel(pmd, addr);
163         for (; addr < end; addr += PAGE_SIZE, pte++) {
164                 if (!add) {
165                         if (pte_none(*pte))
166                                 continue;
167                         if (!direct)
168                                 vmem_free_pages(pfn_to_phys(pte_pfn(*pte)), 0);
169                         pte_clear(&init_mm, addr, pte);
170                 } else if (pte_none(*pte)) {
171                         if (!direct) {
172                                 void *new_page = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE);
173
174                                 if (!new_page)
175                                         goto out;
176                                 pte_val(*pte) = __pa(new_page) | prot;
177                         } else {
178                                 pte_val(*pte) = addr | prot;
179                         }
180                 } else {
181                         continue;
182                 }
183                 pages++;
184         }
185         ret = 0;
186 out:
187         if (direct)
188                 update_page_count(PG_DIRECT_MAP_4K, add ? pages : -pages);
189         return ret;
190 }
191
192 static void try_free_pte_table(pmd_t *pmd, unsigned long start)
193 {
194         pte_t *pte;
195         int i;
196
197         /* We can safely assume this is fully in 1:1 mapping & vmemmap area */
198         pte = pte_offset_kernel(pmd, start);
199         for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
200                 if (!pte_none(*pte))
201                         return;
202         }
203         vmem_pte_free(__va(pmd_deref(*pmd)));
204         pmd_clear(pmd);
205 }
206
207 /* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
208 static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
209                                   unsigned long end, bool add, bool direct)
210 {
211         unsigned long next, prot, pages = 0;
212         int ret = -ENOMEM;
213         pmd_t *pmd;
214         pte_t *pte;
215
216         prot = pgprot_val(SEGMENT_KERNEL);
217         if (!MACHINE_HAS_NX)
218                 prot &= ~_SEGMENT_ENTRY_NOEXEC;
219
220         pmd = pmd_offset(pud, addr);
221         for (; addr < end; addr = next, pmd++) {
222                 next = pmd_addr_end(addr, end);
223                 if (!add) {
224                         if (pmd_none(*pmd))
225                                 continue;
226                         if (pmd_large(*pmd) && !add) {
227                                 if (IS_ALIGNED(addr, PMD_SIZE) &&
228                                     IS_ALIGNED(next, PMD_SIZE)) {
229                                         if (!direct)
230                                                 vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
231                                         pmd_clear(pmd);
232                                         pages++;
233                                 } else if (!direct && vmemmap_unuse_sub_pmd(addr, next)) {
234                                         vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
235                                         pmd_clear(pmd);
236                                 }
237                                 continue;
238                         }
239                 } else if (pmd_none(*pmd)) {
240                         if (IS_ALIGNED(addr, PMD_SIZE) &&
241                             IS_ALIGNED(next, PMD_SIZE) &&
242                             MACHINE_HAS_EDAT1 && addr && direct &&
243                             !debug_pagealloc_enabled()) {
244                                 pmd_val(*pmd) = addr | prot;
245                                 pages++;
246                                 continue;
247                         } else if (!direct && MACHINE_HAS_EDAT1) {
248                                 void *new_page;
249
250                                 /*
251                                  * Use 1MB frames for vmemmap if available. We
252                                  * always use large frames even if they are only
253                                  * partially used. Otherwise we would have also
254                                  * page tables since vmemmap_populate gets
255                                  * called for each section separately.
256                                  */
257                                 new_page = vmemmap_alloc_block(PMD_SIZE, NUMA_NO_NODE);
258                                 if (new_page) {
259                                         pmd_val(*pmd) = __pa(new_page) | prot;
260                                         if (!IS_ALIGNED(addr, PMD_SIZE) ||
261                                             !IS_ALIGNED(next, PMD_SIZE)) {
262                                                 vmemmap_use_new_sub_pmd(addr, next);
263                                         }
264                                         continue;
265                                 }
266                         }
267                         pte = vmem_pte_alloc();
268                         if (!pte)
269                                 goto out;
270                         pmd_populate(&init_mm, pmd, pte);
271                 } else if (pmd_large(*pmd)) {
272                         if (!direct)
273                                 vmemmap_use_sub_pmd(addr, next);
274                         continue;
275                 }
276                 ret = modify_pte_table(pmd, addr, next, add, direct);
277                 if (ret)
278                         goto out;
279                 if (!add)
280                         try_free_pte_table(pmd, addr & PMD_MASK);
281         }
282         ret = 0;
283 out:
284         if (direct)
285                 update_page_count(PG_DIRECT_MAP_1M, add ? pages : -pages);
286         return ret;
287 }
288
289 static void try_free_pmd_table(pud_t *pud, unsigned long start)
290 {
291         const unsigned long end = start + PUD_SIZE;
292         pmd_t *pmd;
293         int i;
294
295         /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
296         if (end > VMALLOC_START)
297                 return;
298 #ifdef CONFIG_KASAN
299         if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
300                 return;
301 #endif
302         pmd = pmd_offset(pud, start);
303         for (i = 0; i < PTRS_PER_PMD; i++, pmd++)
304                 if (!pmd_none(*pmd))
305                         return;
306         vmem_free_pages(pud_deref(*pud), CRST_ALLOC_ORDER);
307         pud_clear(pud);
308 }
309
310 static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
311                             bool add, bool direct)
312 {
313         unsigned long next, prot, pages = 0;
314         int ret = -ENOMEM;
315         pud_t *pud;
316         pmd_t *pmd;
317
318         prot = pgprot_val(REGION3_KERNEL);
319         if (!MACHINE_HAS_NX)
320                 prot &= ~_REGION_ENTRY_NOEXEC;
321         pud = pud_offset(p4d, addr);
322         for (; addr < end; addr = next, pud++) {
323                 next = pud_addr_end(addr, end);
324                 if (!add) {
325                         if (pud_none(*pud))
326                                 continue;
327                         if (pud_large(*pud)) {
328                                 if (IS_ALIGNED(addr, PUD_SIZE) &&
329                                     IS_ALIGNED(next, PUD_SIZE)) {
330                                         pud_clear(pud);
331                                         pages++;
332                                 }
333                                 continue;
334                         }
335                 } else if (pud_none(*pud)) {
336                         if (IS_ALIGNED(addr, PUD_SIZE) &&
337                             IS_ALIGNED(next, PUD_SIZE) &&
338                             MACHINE_HAS_EDAT2 && addr && direct &&
339                             !debug_pagealloc_enabled()) {
340                                 pud_val(*pud) = addr | prot;
341                                 pages++;
342                                 continue;
343                         }
344                         pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
345                         if (!pmd)
346                                 goto out;
347                         pud_populate(&init_mm, pud, pmd);
348                 } else if (pud_large(*pud)) {
349                         continue;
350                 }
351                 ret = modify_pmd_table(pud, addr, next, add, direct);
352                 if (ret)
353                         goto out;
354                 if (!add)
355                         try_free_pmd_table(pud, addr & PUD_MASK);
356         }
357         ret = 0;
358 out:
359         if (direct)
360                 update_page_count(PG_DIRECT_MAP_2G, add ? pages : -pages);
361         return ret;
362 }
363
364 static void try_free_pud_table(p4d_t *p4d, unsigned long start)
365 {
366         const unsigned long end = start + P4D_SIZE;
367         pud_t *pud;
368         int i;
369
370         /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
371         if (end > VMALLOC_START)
372                 return;
373 #ifdef CONFIG_KASAN
374         if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
375                 return;
376 #endif
377
378         pud = pud_offset(p4d, start);
379         for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
380                 if (!pud_none(*pud))
381                         return;
382         }
383         vmem_free_pages(p4d_deref(*p4d), CRST_ALLOC_ORDER);
384         p4d_clear(p4d);
385 }
386
387 static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end,
388                             bool add, bool direct)
389 {
390         unsigned long next;
391         int ret = -ENOMEM;
392         p4d_t *p4d;
393         pud_t *pud;
394
395         p4d = p4d_offset(pgd, addr);
396         for (; addr < end; addr = next, p4d++) {
397                 next = p4d_addr_end(addr, end);
398                 if (!add) {
399                         if (p4d_none(*p4d))
400                                 continue;
401                 } else if (p4d_none(*p4d)) {
402                         pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
403                         if (!pud)
404                                 goto out;
405                 }
406                 ret = modify_pud_table(p4d, addr, next, add, direct);
407                 if (ret)
408                         goto out;
409                 if (!add)
410                         try_free_pud_table(p4d, addr & P4D_MASK);
411         }
412         ret = 0;
413 out:
414         return ret;
415 }
416
417 static void try_free_p4d_table(pgd_t *pgd, unsigned long start)
418 {
419         const unsigned long end = start + PGDIR_SIZE;
420         p4d_t *p4d;
421         int i;
422
423         /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
424         if (end > VMALLOC_START)
425                 return;
426 #ifdef CONFIG_KASAN
427         if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
428                 return;
429 #endif
430
431         p4d = p4d_offset(pgd, start);
432         for (i = 0; i < PTRS_PER_P4D; i++, p4d++) {
433                 if (!p4d_none(*p4d))
434                         return;
435         }
436         vmem_free_pages(pgd_deref(*pgd), CRST_ALLOC_ORDER);
437         pgd_clear(pgd);
438 }
439
440 static int modify_pagetable(unsigned long start, unsigned long end, bool add,
441                             bool direct)
442 {
443         unsigned long addr, next;
444         int ret = -ENOMEM;
445         pgd_t *pgd;
446         p4d_t *p4d;
447
448         if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end)))
449                 return -EINVAL;
450         for (addr = start; addr < end; addr = next) {
451                 next = pgd_addr_end(addr, end);
452                 pgd = pgd_offset_k(addr);
453
454                 if (!add) {
455                         if (pgd_none(*pgd))
456                                 continue;
457                 } else if (pgd_none(*pgd)) {
458                         p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
459                         if (!p4d)
460                                 goto out;
461                         pgd_populate(&init_mm, pgd, p4d);
462                 }
463                 ret = modify_p4d_table(pgd, addr, next, add, direct);
464                 if (ret)
465                         goto out;
466                 if (!add)
467                         try_free_p4d_table(pgd, addr & PGDIR_MASK);
468         }
469         ret = 0;
470 out:
471         if (!add)
472                 flush_tlb_kernel_range(start, end);
473         return ret;
474 }
475
476 static int add_pagetable(unsigned long start, unsigned long end, bool direct)
477 {
478         return modify_pagetable(start, end, true, direct);
479 }
480
481 static int remove_pagetable(unsigned long start, unsigned long end, bool direct)
482 {
483         return modify_pagetable(start, end, false, direct);
484 }
485
486 /*
487  * Add a physical memory range to the 1:1 mapping.
488  */
489 static int vmem_add_range(unsigned long start, unsigned long size)
490 {
491         return add_pagetable(start, start + size, true);
492 }
493
494 /*
495  * Remove a physical memory range from the 1:1 mapping.
496  */
497 static void vmem_remove_range(unsigned long start, unsigned long size)
498 {
499         remove_pagetable(start, start + size, true);
500 }
501
502 /*
503  * Add a backed mem_map array to the virtual mem_map array.
504  */
505 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
506                                struct vmem_altmap *altmap)
507 {
508         int ret;
509
510         mutex_lock(&vmem_mutex);
511         /* We don't care about the node, just use NUMA_NO_NODE on allocations */
512         ret = add_pagetable(start, end, false);
513         if (ret)
514                 remove_pagetable(start, end, false);
515         mutex_unlock(&vmem_mutex);
516         return ret;
517 }
518
519 void vmemmap_free(unsigned long start, unsigned long end,
520                   struct vmem_altmap *altmap)
521 {
522         mutex_lock(&vmem_mutex);
523         remove_pagetable(start, end, false);
524         mutex_unlock(&vmem_mutex);
525 }
526
527 void vmem_remove_mapping(unsigned long start, unsigned long size)
528 {
529         mutex_lock(&vmem_mutex);
530         vmem_remove_range(start, size);
531         mutex_unlock(&vmem_mutex);
532 }
533
534 int vmem_add_mapping(unsigned long start, unsigned long size)
535 {
536         int ret;
537
538         if (start + size > VMEM_MAX_PHYS ||
539             start + size < start)
540                 return -ERANGE;
541
542         mutex_lock(&vmem_mutex);
543         ret = vmem_add_range(start, size);
544         if (ret)
545                 vmem_remove_range(start, size);
546         mutex_unlock(&vmem_mutex);
547         return ret;
548 }
549
550 /*
551  * map whole physical memory to virtual memory (identity mapping)
552  * we reserve enough space in the vmalloc area for vmemmap to hotplug
553  * additional memory segments.
554  */
555 void __init vmem_map_init(void)
556 {
557         struct memblock_region *reg;
558
559         for_each_memblock(memory, reg)
560                 vmem_add_range(reg->base, reg->size);
561         __set_memory((unsigned long)_stext,
562                      (unsigned long)(_etext - _stext) >> PAGE_SHIFT,
563                      SET_MEMORY_RO | SET_MEMORY_X);
564         __set_memory((unsigned long)_etext,
565                      (unsigned long)(__end_rodata - _etext) >> PAGE_SHIFT,
566                      SET_MEMORY_RO);
567         __set_memory((unsigned long)_sinittext,
568                      (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT,
569                      SET_MEMORY_RO | SET_MEMORY_X);
570         __set_memory(__stext_dma, (__etext_dma - __stext_dma) >> PAGE_SHIFT,
571                      SET_MEMORY_RO | SET_MEMORY_X);
572
573         /* we need lowcore executable for our LPSWE instructions */
574         set_memory_x(0, 1);
575
576         pr_info("Write protected kernel read-only data: %luk\n",
577                 (unsigned long)(__end_rodata - _stext) >> 10);
578 }