arch, drivers: replace for_each_membock() with for_each_mem_range()
[linux-2.6-microblaze.git] / arch / s390 / mm / vmem.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *    Copyright IBM Corp. 2006
4  *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
5  */
6
7 #include <linux/memblock.h>
8 #include <linux/pfn.h>
9 #include <linux/mm.h>
10 #include <linux/init.h>
11 #include <linux/list.h>
12 #include <linux/hugetlb.h>
13 #include <linux/slab.h>
14 #include <asm/cacheflush.h>
15 #include <asm/pgalloc.h>
16 #include <asm/setup.h>
17 #include <asm/tlbflush.h>
18 #include <asm/sections.h>
19 #include <asm/set_memory.h>
20
21 static DEFINE_MUTEX(vmem_mutex);
22
23 static void __ref *vmem_alloc_pages(unsigned int order)
24 {
25         unsigned long size = PAGE_SIZE << order;
26
27         if (slab_is_available())
28                 return (void *)__get_free_pages(GFP_KERNEL, order);
29         return (void *) memblock_phys_alloc(size, size);
30 }
31
32 static void vmem_free_pages(unsigned long addr, int order)
33 {
34         /* We don't expect boot memory to be removed ever. */
35         if (!slab_is_available() ||
36             WARN_ON_ONCE(PageReserved(phys_to_page(addr))))
37                 return;
38         free_pages(addr, order);
39 }
40
41 void *vmem_crst_alloc(unsigned long val)
42 {
43         unsigned long *table;
44
45         table = vmem_alloc_pages(CRST_ALLOC_ORDER);
46         if (table)
47                 crst_table_init(table, val);
48         return table;
49 }
50
51 pte_t __ref *vmem_pte_alloc(void)
52 {
53         unsigned long size = PTRS_PER_PTE * sizeof(pte_t);
54         pte_t *pte;
55
56         if (slab_is_available())
57                 pte = (pte_t *) page_table_alloc(&init_mm);
58         else
59                 pte = (pte_t *) memblock_phys_alloc(size, size);
60         if (!pte)
61                 return NULL;
62         memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
63         return pte;
64 }
65
66 static void vmem_pte_free(unsigned long *table)
67 {
68         /* We don't expect boot memory to be removed ever. */
69         if (!slab_is_available() ||
70             WARN_ON_ONCE(PageReserved(virt_to_page(table))))
71                 return;
72         page_table_free(&init_mm, table);
73 }
74
75 #define PAGE_UNUSED 0xFD
76
77 /*
78  * The unused vmemmap range, which was not yet memset(PAGE_UNUSED) ranges
79  * from unused_pmd_start to next PMD_SIZE boundary.
80  */
81 static unsigned long unused_pmd_start;
82
83 static void vmemmap_flush_unused_pmd(void)
84 {
85         if (!unused_pmd_start)
86                 return;
87         memset(__va(unused_pmd_start), PAGE_UNUSED,
88                ALIGN(unused_pmd_start, PMD_SIZE) - unused_pmd_start);
89         unused_pmd_start = 0;
90 }
91
92 static void __vmemmap_use_sub_pmd(unsigned long start, unsigned long end)
93 {
94         /*
95          * As we expect to add in the same granularity as we remove, it's
96          * sufficient to mark only some piece used to block the memmap page from
97          * getting removed (just in case the memmap never gets initialized,
98          * e.g., because the memory block never gets onlined).
99          */
100         memset(__va(start), 0, sizeof(struct page));
101 }
102
103 static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end)
104 {
105         /*
106          * We only optimize if the new used range directly follows the
107          * previously unused range (esp., when populating consecutive sections).
108          */
109         if (unused_pmd_start == start) {
110                 unused_pmd_start = end;
111                 if (likely(IS_ALIGNED(unused_pmd_start, PMD_SIZE)))
112                         unused_pmd_start = 0;
113                 return;
114         }
115         vmemmap_flush_unused_pmd();
116         __vmemmap_use_sub_pmd(start, end);
117 }
118
119 static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end)
120 {
121         void *page = __va(ALIGN_DOWN(start, PMD_SIZE));
122
123         vmemmap_flush_unused_pmd();
124
125         /* Could be our memmap page is filled with PAGE_UNUSED already ... */
126         __vmemmap_use_sub_pmd(start, end);
127
128         /* Mark the unused parts of the new memmap page PAGE_UNUSED. */
129         if (!IS_ALIGNED(start, PMD_SIZE))
130                 memset(page, PAGE_UNUSED, start - __pa(page));
131         /*
132          * We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of
133          * consecutive sections. Remember for the last added PMD the last
134          * unused range in the populated PMD.
135          */
136         if (!IS_ALIGNED(end, PMD_SIZE))
137                 unused_pmd_start = end;
138 }
139
140 /* Returns true if the PMD is completely unused and can be freed. */
141 static bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end)
142 {
143         void *page = __va(ALIGN_DOWN(start, PMD_SIZE));
144
145         vmemmap_flush_unused_pmd();
146         memset(__va(start), PAGE_UNUSED, end - start);
147         return !memchr_inv(page, PAGE_UNUSED, PMD_SIZE);
148 }
149
150 /* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
151 static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
152                                   unsigned long end, bool add, bool direct)
153 {
154         unsigned long prot, pages = 0;
155         int ret = -ENOMEM;
156         pte_t *pte;
157
158         prot = pgprot_val(PAGE_KERNEL);
159         if (!MACHINE_HAS_NX)
160                 prot &= ~_PAGE_NOEXEC;
161
162         pte = pte_offset_kernel(pmd, addr);
163         for (; addr < end; addr += PAGE_SIZE, pte++) {
164                 if (!add) {
165                         if (pte_none(*pte))
166                                 continue;
167                         if (!direct)
168                                 vmem_free_pages(pfn_to_phys(pte_pfn(*pte)), 0);
169                         pte_clear(&init_mm, addr, pte);
170                 } else if (pte_none(*pte)) {
171                         if (!direct) {
172                                 void *new_page = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE);
173
174                                 if (!new_page)
175                                         goto out;
176                                 pte_val(*pte) = __pa(new_page) | prot;
177                         } else {
178                                 pte_val(*pte) = addr | prot;
179                         }
180                 } else {
181                         continue;
182                 }
183                 pages++;
184         }
185         ret = 0;
186 out:
187         if (direct)
188                 update_page_count(PG_DIRECT_MAP_4K, add ? pages : -pages);
189         return ret;
190 }
191
192 static void try_free_pte_table(pmd_t *pmd, unsigned long start)
193 {
194         pte_t *pte;
195         int i;
196
197         /* We can safely assume this is fully in 1:1 mapping & vmemmap area */
198         pte = pte_offset_kernel(pmd, start);
199         for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
200                 if (!pte_none(*pte))
201                         return;
202         }
203         vmem_pte_free(__va(pmd_deref(*pmd)));
204         pmd_clear(pmd);
205 }
206
207 /* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
208 static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
209                                   unsigned long end, bool add, bool direct)
210 {
211         unsigned long next, prot, pages = 0;
212         int ret = -ENOMEM;
213         pmd_t *pmd;
214         pte_t *pte;
215
216         prot = pgprot_val(SEGMENT_KERNEL);
217         if (!MACHINE_HAS_NX)
218                 prot &= ~_SEGMENT_ENTRY_NOEXEC;
219
220         pmd = pmd_offset(pud, addr);
221         for (; addr < end; addr = next, pmd++) {
222                 next = pmd_addr_end(addr, end);
223                 if (!add) {
224                         if (pmd_none(*pmd))
225                                 continue;
226                         if (pmd_large(*pmd) && !add) {
227                                 if (IS_ALIGNED(addr, PMD_SIZE) &&
228                                     IS_ALIGNED(next, PMD_SIZE)) {
229                                         if (!direct)
230                                                 vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
231                                         pmd_clear(pmd);
232                                         pages++;
233                                 } else if (!direct && vmemmap_unuse_sub_pmd(addr, next)) {
234                                         vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
235                                         pmd_clear(pmd);
236                                 }
237                                 continue;
238                         }
239                 } else if (pmd_none(*pmd)) {
240                         if (IS_ALIGNED(addr, PMD_SIZE) &&
241                             IS_ALIGNED(next, PMD_SIZE) &&
242                             MACHINE_HAS_EDAT1 && addr && direct &&
243                             !debug_pagealloc_enabled()) {
244                                 pmd_val(*pmd) = addr | prot;
245                                 pages++;
246                                 continue;
247                         } else if (!direct && MACHINE_HAS_EDAT1) {
248                                 void *new_page;
249
250                                 /*
251                                  * Use 1MB frames for vmemmap if available. We
252                                  * always use large frames even if they are only
253                                  * partially used. Otherwise we would have also
254                                  * page tables since vmemmap_populate gets
255                                  * called for each section separately.
256                                  */
257                                 new_page = vmemmap_alloc_block(PMD_SIZE, NUMA_NO_NODE);
258                                 if (new_page) {
259                                         pmd_val(*pmd) = __pa(new_page) | prot;
260                                         if (!IS_ALIGNED(addr, PMD_SIZE) ||
261                                             !IS_ALIGNED(next, PMD_SIZE)) {
262                                                 vmemmap_use_new_sub_pmd(addr, next);
263                                         }
264                                         continue;
265                                 }
266                         }
267                         pte = vmem_pte_alloc();
268                         if (!pte)
269                                 goto out;
270                         pmd_populate(&init_mm, pmd, pte);
271                 } else if (pmd_large(*pmd)) {
272                         if (!direct)
273                                 vmemmap_use_sub_pmd(addr, next);
274                         continue;
275                 }
276                 ret = modify_pte_table(pmd, addr, next, add, direct);
277                 if (ret)
278                         goto out;
279                 if (!add)
280                         try_free_pte_table(pmd, addr & PMD_MASK);
281         }
282         ret = 0;
283 out:
284         if (direct)
285                 update_page_count(PG_DIRECT_MAP_1M, add ? pages : -pages);
286         return ret;
287 }
288
289 static void try_free_pmd_table(pud_t *pud, unsigned long start)
290 {
291         const unsigned long end = start + PUD_SIZE;
292         pmd_t *pmd;
293         int i;
294
295         /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
296         if (end > VMALLOC_START)
297                 return;
298 #ifdef CONFIG_KASAN
299         if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
300                 return;
301 #endif
302         pmd = pmd_offset(pud, start);
303         for (i = 0; i < PTRS_PER_PMD; i++, pmd++)
304                 if (!pmd_none(*pmd))
305                         return;
306         vmem_free_pages(pud_deref(*pud), CRST_ALLOC_ORDER);
307         pud_clear(pud);
308 }
309
310 static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
311                             bool add, bool direct)
312 {
313         unsigned long next, prot, pages = 0;
314         int ret = -ENOMEM;
315         pud_t *pud;
316         pmd_t *pmd;
317
318         prot = pgprot_val(REGION3_KERNEL);
319         if (!MACHINE_HAS_NX)
320                 prot &= ~_REGION_ENTRY_NOEXEC;
321         pud = pud_offset(p4d, addr);
322         for (; addr < end; addr = next, pud++) {
323                 next = pud_addr_end(addr, end);
324                 if (!add) {
325                         if (pud_none(*pud))
326                                 continue;
327                         if (pud_large(*pud)) {
328                                 if (IS_ALIGNED(addr, PUD_SIZE) &&
329                                     IS_ALIGNED(next, PUD_SIZE)) {
330                                         pud_clear(pud);
331                                         pages++;
332                                 }
333                                 continue;
334                         }
335                 } else if (pud_none(*pud)) {
336                         if (IS_ALIGNED(addr, PUD_SIZE) &&
337                             IS_ALIGNED(next, PUD_SIZE) &&
338                             MACHINE_HAS_EDAT2 && addr && direct &&
339                             !debug_pagealloc_enabled()) {
340                                 pud_val(*pud) = addr | prot;
341                                 pages++;
342                                 continue;
343                         }
344                         pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
345                         if (!pmd)
346                                 goto out;
347                         pud_populate(&init_mm, pud, pmd);
348                 } else if (pud_large(*pud)) {
349                         continue;
350                 }
351                 ret = modify_pmd_table(pud, addr, next, add, direct);
352                 if (ret)
353                         goto out;
354                 if (!add)
355                         try_free_pmd_table(pud, addr & PUD_MASK);
356         }
357         ret = 0;
358 out:
359         if (direct)
360                 update_page_count(PG_DIRECT_MAP_2G, add ? pages : -pages);
361         return ret;
362 }
363
364 static void try_free_pud_table(p4d_t *p4d, unsigned long start)
365 {
366         const unsigned long end = start + P4D_SIZE;
367         pud_t *pud;
368         int i;
369
370         /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
371         if (end > VMALLOC_START)
372                 return;
373 #ifdef CONFIG_KASAN
374         if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
375                 return;
376 #endif
377
378         pud = pud_offset(p4d, start);
379         for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
380                 if (!pud_none(*pud))
381                         return;
382         }
383         vmem_free_pages(p4d_deref(*p4d), CRST_ALLOC_ORDER);
384         p4d_clear(p4d);
385 }
386
387 static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end,
388                             bool add, bool direct)
389 {
390         unsigned long next;
391         int ret = -ENOMEM;
392         p4d_t *p4d;
393         pud_t *pud;
394
395         p4d = p4d_offset(pgd, addr);
396         for (; addr < end; addr = next, p4d++) {
397                 next = p4d_addr_end(addr, end);
398                 if (!add) {
399                         if (p4d_none(*p4d))
400                                 continue;
401                 } else if (p4d_none(*p4d)) {
402                         pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
403                         if (!pud)
404                                 goto out;
405                         p4d_populate(&init_mm, p4d, pud);
406                 }
407                 ret = modify_pud_table(p4d, addr, next, add, direct);
408                 if (ret)
409                         goto out;
410                 if (!add)
411                         try_free_pud_table(p4d, addr & P4D_MASK);
412         }
413         ret = 0;
414 out:
415         return ret;
416 }
417
418 static void try_free_p4d_table(pgd_t *pgd, unsigned long start)
419 {
420         const unsigned long end = start + PGDIR_SIZE;
421         p4d_t *p4d;
422         int i;
423
424         /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
425         if (end > VMALLOC_START)
426                 return;
427 #ifdef CONFIG_KASAN
428         if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
429                 return;
430 #endif
431
432         p4d = p4d_offset(pgd, start);
433         for (i = 0; i < PTRS_PER_P4D; i++, p4d++) {
434                 if (!p4d_none(*p4d))
435                         return;
436         }
437         vmem_free_pages(pgd_deref(*pgd), CRST_ALLOC_ORDER);
438         pgd_clear(pgd);
439 }
440
441 static int modify_pagetable(unsigned long start, unsigned long end, bool add,
442                             bool direct)
443 {
444         unsigned long addr, next;
445         int ret = -ENOMEM;
446         pgd_t *pgd;
447         p4d_t *p4d;
448
449         if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end)))
450                 return -EINVAL;
451         for (addr = start; addr < end; addr = next) {
452                 next = pgd_addr_end(addr, end);
453                 pgd = pgd_offset_k(addr);
454
455                 if (!add) {
456                         if (pgd_none(*pgd))
457                                 continue;
458                 } else if (pgd_none(*pgd)) {
459                         p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
460                         if (!p4d)
461                                 goto out;
462                         pgd_populate(&init_mm, pgd, p4d);
463                 }
464                 ret = modify_p4d_table(pgd, addr, next, add, direct);
465                 if (ret)
466                         goto out;
467                 if (!add)
468                         try_free_p4d_table(pgd, addr & PGDIR_MASK);
469         }
470         ret = 0;
471 out:
472         if (!add)
473                 flush_tlb_kernel_range(start, end);
474         return ret;
475 }
476
477 static int add_pagetable(unsigned long start, unsigned long end, bool direct)
478 {
479         return modify_pagetable(start, end, true, direct);
480 }
481
482 static int remove_pagetable(unsigned long start, unsigned long end, bool direct)
483 {
484         return modify_pagetable(start, end, false, direct);
485 }
486
487 /*
488  * Add a physical memory range to the 1:1 mapping.
489  */
490 static int vmem_add_range(unsigned long start, unsigned long size)
491 {
492         return add_pagetable(start, start + size, true);
493 }
494
495 /*
496  * Remove a physical memory range from the 1:1 mapping.
497  */
498 static void vmem_remove_range(unsigned long start, unsigned long size)
499 {
500         remove_pagetable(start, start + size, true);
501 }
502
503 /*
504  * Add a backed mem_map array to the virtual mem_map array.
505  */
506 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
507                                struct vmem_altmap *altmap)
508 {
509         int ret;
510
511         mutex_lock(&vmem_mutex);
512         /* We don't care about the node, just use NUMA_NO_NODE on allocations */
513         ret = add_pagetable(start, end, false);
514         if (ret)
515                 remove_pagetable(start, end, false);
516         mutex_unlock(&vmem_mutex);
517         return ret;
518 }
519
520 void vmemmap_free(unsigned long start, unsigned long end,
521                   struct vmem_altmap *altmap)
522 {
523         mutex_lock(&vmem_mutex);
524         remove_pagetable(start, end, false);
525         mutex_unlock(&vmem_mutex);
526 }
527
528 void vmem_remove_mapping(unsigned long start, unsigned long size)
529 {
530         mutex_lock(&vmem_mutex);
531         vmem_remove_range(start, size);
532         mutex_unlock(&vmem_mutex);
533 }
534
535 int vmem_add_mapping(unsigned long start, unsigned long size)
536 {
537         int ret;
538
539         if (start + size > VMEM_MAX_PHYS ||
540             start + size < start)
541                 return -ERANGE;
542
543         mutex_lock(&vmem_mutex);
544         ret = vmem_add_range(start, size);
545         if (ret)
546                 vmem_remove_range(start, size);
547         mutex_unlock(&vmem_mutex);
548         return ret;
549 }
550
551 /*
552  * map whole physical memory to virtual memory (identity mapping)
553  * we reserve enough space in the vmalloc area for vmemmap to hotplug
554  * additional memory segments.
555  */
556 void __init vmem_map_init(void)
557 {
558         phys_addr_t base, end;
559         u64 i;
560
561         for_each_mem_range(i, &base, &end)
562                 vmem_add_range(base, end - base);
563         __set_memory((unsigned long)_stext,
564                      (unsigned long)(_etext - _stext) >> PAGE_SHIFT,
565                      SET_MEMORY_RO | SET_MEMORY_X);
566         __set_memory((unsigned long)_etext,
567                      (unsigned long)(__end_rodata - _etext) >> PAGE_SHIFT,
568                      SET_MEMORY_RO);
569         __set_memory((unsigned long)_sinittext,
570                      (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT,
571                      SET_MEMORY_RO | SET_MEMORY_X);
572         __set_memory(__stext_dma, (__etext_dma - __stext_dma) >> PAGE_SHIFT,
573                      SET_MEMORY_RO | SET_MEMORY_X);
574
575         /* we need lowcore executable for our LPSWE instructions */
576         set_memory_x(0, 1);
577
578         pr_info("Write protected kernel read-only data: %luk\n",
579                 (unsigned long)(__end_rodata - _stext) >> 10);
580 }