Merge tag 'compat-ioctl-fix' of git://git.kernel.org:/pub/scm/linux/kernel/git/arnd...
[linux-2.6-microblaze.git] / arch / mips / mm / init.c
1 /*
2  * This file is subject to the terms and conditions of the GNU General Public
3  * License.  See the file "COPYING" in the main directory of this archive
4  * for more details.
5  *
6  * Copyright (C) 1994 - 2000 Ralf Baechle
7  * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
8  * Kevin D. Kissell, kevink@mips.com and Carsten Langgaard, carstenl@mips.com
9  * Copyright (C) 2000 MIPS Technologies, Inc.  All rights reserved.
10  */
11 #include <linux/bug.h>
12 #include <linux/init.h>
13 #include <linux/export.h>
14 #include <linux/signal.h>
15 #include <linux/sched.h>
16 #include <linux/smp.h>
17 #include <linux/kernel.h>
18 #include <linux/errno.h>
19 #include <linux/string.h>
20 #include <linux/types.h>
21 #include <linux/pagemap.h>
22 #include <linux/ptrace.h>
23 #include <linux/mman.h>
24 #include <linux/mm.h>
25 #include <linux/memblock.h>
26 #include <linux/highmem.h>
27 #include <linux/swap.h>
28 #include <linux/proc_fs.h>
29 #include <linux/pfn.h>
30 #include <linux/hardirq.h>
31 #include <linux/gfp.h>
32 #include <linux/kcore.h>
33 #include <linux/initrd.h>
34
35 #include <asm/bootinfo.h>
36 #include <asm/cachectl.h>
37 #include <asm/cpu.h>
38 #include <asm/dma.h>
39 #include <asm/kmap_types.h>
40 #include <asm/maar.h>
41 #include <asm/mmu_context.h>
42 #include <asm/sections.h>
43 #include <asm/pgtable.h>
44 #include <asm/pgalloc.h>
45 #include <asm/tlb.h>
46 #include <asm/fixmap.h>
47
48 /*
49  * We have up to 8 empty zeroed pages so we can map one of the right colour
50  * when needed.  This is necessary only on R4000 / R4400 SC and MC versions
51  * where we have to avoid VCED / VECI exceptions for good performance at
52  * any price.  Since page is never written to after the initialization we
53  * don't have to care about aliases on other CPUs.
54  */
55 unsigned long empty_zero_page, zero_page_mask;
56 EXPORT_SYMBOL_GPL(empty_zero_page);
57 EXPORT_SYMBOL(zero_page_mask);
58
59 /*
60  * Not static inline because used by IP27 special magic initialization code
61  */
62 void setup_zero_pages(void)
63 {
64         unsigned int order, i;
65         struct page *page;
66
67         if (cpu_has_vce)
68                 order = 3;
69         else
70                 order = 0;
71
72         empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
73         if (!empty_zero_page)
74                 panic("Oh boy, that early out of memory?");
75
76         page = virt_to_page((void *)empty_zero_page);
77         split_page(page, order);
78         for (i = 0; i < (1 << order); i++, page++)
79                 mark_page_reserved(page);
80
81         zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK;
82 }
83
84 static void *__kmap_pgprot(struct page *page, unsigned long addr, pgprot_t prot)
85 {
86         enum fixed_addresses idx;
87         unsigned int uninitialized_var(old_mmid);
88         unsigned long vaddr, flags, entrylo;
89         unsigned long old_ctx;
90         pte_t pte;
91         int tlbidx;
92
93         BUG_ON(Page_dcache_dirty(page));
94
95         preempt_disable();
96         pagefault_disable();
97         idx = (addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1);
98         idx += in_interrupt() ? FIX_N_COLOURS : 0;
99         vaddr = __fix_to_virt(FIX_CMAP_END - idx);
100         pte = mk_pte(page, prot);
101 #if defined(CONFIG_XPA)
102         entrylo = pte_to_entrylo(pte.pte_high);
103 #elif defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
104         entrylo = pte.pte_high;
105 #else
106         entrylo = pte_to_entrylo(pte_val(pte));
107 #endif
108
109         local_irq_save(flags);
110         old_ctx = read_c0_entryhi();
111         write_c0_entryhi(vaddr & (PAGE_MASK << 1));
112         write_c0_entrylo0(entrylo);
113         write_c0_entrylo1(entrylo);
114         if (cpu_has_mmid) {
115                 old_mmid = read_c0_memorymapid();
116                 write_c0_memorymapid(MMID_KERNEL_WIRED);
117         }
118 #ifdef CONFIG_XPA
119         if (cpu_has_xpa) {
120                 entrylo = (pte.pte_low & _PFNX_MASK);
121                 writex_c0_entrylo0(entrylo);
122                 writex_c0_entrylo1(entrylo);
123         }
124 #endif
125         tlbidx = num_wired_entries();
126         write_c0_wired(tlbidx + 1);
127         write_c0_index(tlbidx);
128         mtc0_tlbw_hazard();
129         tlb_write_indexed();
130         tlbw_use_hazard();
131         write_c0_entryhi(old_ctx);
132         if (cpu_has_mmid)
133                 write_c0_memorymapid(old_mmid);
134         local_irq_restore(flags);
135
136         return (void*) vaddr;
137 }
138
139 void *kmap_coherent(struct page *page, unsigned long addr)
140 {
141         return __kmap_pgprot(page, addr, PAGE_KERNEL);
142 }
143
144 void *kmap_noncoherent(struct page *page, unsigned long addr)
145 {
146         return __kmap_pgprot(page, addr, PAGE_KERNEL_NC);
147 }
148
149 void kunmap_coherent(void)
150 {
151         unsigned int wired;
152         unsigned long flags, old_ctx;
153
154         local_irq_save(flags);
155         old_ctx = read_c0_entryhi();
156         wired = num_wired_entries() - 1;
157         write_c0_wired(wired);
158         write_c0_index(wired);
159         write_c0_entryhi(UNIQUE_ENTRYHI(wired));
160         write_c0_entrylo0(0);
161         write_c0_entrylo1(0);
162         mtc0_tlbw_hazard();
163         tlb_write_indexed();
164         tlbw_use_hazard();
165         write_c0_entryhi(old_ctx);
166         local_irq_restore(flags);
167         pagefault_enable();
168         preempt_enable();
169 }
170
171 void copy_user_highpage(struct page *to, struct page *from,
172         unsigned long vaddr, struct vm_area_struct *vma)
173 {
174         void *vfrom, *vto;
175
176         vto = kmap_atomic(to);
177         if (cpu_has_dc_aliases &&
178             page_mapcount(from) && !Page_dcache_dirty(from)) {
179                 vfrom = kmap_coherent(from, vaddr);
180                 copy_page(vto, vfrom);
181                 kunmap_coherent();
182         } else {
183                 vfrom = kmap_atomic(from);
184                 copy_page(vto, vfrom);
185                 kunmap_atomic(vfrom);
186         }
187         if ((!cpu_has_ic_fills_f_dc) ||
188             pages_do_alias((unsigned long)vto, vaddr & PAGE_MASK))
189                 flush_data_cache_page((unsigned long)vto);
190         kunmap_atomic(vto);
191         /* Make sure this page is cleared on other CPU's too before using it */
192         smp_wmb();
193 }
194
195 void copy_to_user_page(struct vm_area_struct *vma,
196         struct page *page, unsigned long vaddr, void *dst, const void *src,
197         unsigned long len)
198 {
199         if (cpu_has_dc_aliases &&
200             page_mapcount(page) && !Page_dcache_dirty(page)) {
201                 void *vto = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
202                 memcpy(vto, src, len);
203                 kunmap_coherent();
204         } else {
205                 memcpy(dst, src, len);
206                 if (cpu_has_dc_aliases)
207                         SetPageDcacheDirty(page);
208         }
209         if (vma->vm_flags & VM_EXEC)
210                 flush_cache_page(vma, vaddr, page_to_pfn(page));
211 }
212
213 void copy_from_user_page(struct vm_area_struct *vma,
214         struct page *page, unsigned long vaddr, void *dst, const void *src,
215         unsigned long len)
216 {
217         if (cpu_has_dc_aliases &&
218             page_mapcount(page) && !Page_dcache_dirty(page)) {
219                 void *vfrom = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
220                 memcpy(dst, vfrom, len);
221                 kunmap_coherent();
222         } else {
223                 memcpy(dst, src, len);
224                 if (cpu_has_dc_aliases)
225                         SetPageDcacheDirty(page);
226         }
227 }
228 EXPORT_SYMBOL_GPL(copy_from_user_page);
229
230 void __init fixrange_init(unsigned long start, unsigned long end,
231         pgd_t *pgd_base)
232 {
233 #ifdef CONFIG_HIGHMEM
234         pgd_t *pgd;
235         pud_t *pud;
236         pmd_t *pmd;
237         pte_t *pte;
238         int i, j, k;
239         unsigned long vaddr;
240
241         vaddr = start;
242         i = pgd_index(vaddr);
243         j = pud_index(vaddr);
244         k = pmd_index(vaddr);
245         pgd = pgd_base + i;
246
247         for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) {
248                 pud = (pud_t *)pgd;
249                 for ( ; (j < PTRS_PER_PUD) && (vaddr < end); pud++, j++) {
250                         pmd = (pmd_t *)pud;
251                         for (; (k < PTRS_PER_PMD) && (vaddr < end); pmd++, k++) {
252                                 if (pmd_none(*pmd)) {
253                                         pte = (pte_t *) memblock_alloc_low(PAGE_SIZE,
254                                                                            PAGE_SIZE);
255                                         if (!pte)
256                                                 panic("%s: Failed to allocate %lu bytes align=%lx\n",
257                                                       __func__, PAGE_SIZE,
258                                                       PAGE_SIZE);
259
260                                         set_pmd(pmd, __pmd((unsigned long)pte));
261                                         BUG_ON(pte != pte_offset_kernel(pmd, 0));
262                                 }
263                                 vaddr += PMD_SIZE;
264                         }
265                         k = 0;
266                 }
267                 j = 0;
268         }
269 #endif
270 }
271
272 struct maar_walk_info {
273         struct maar_config cfg[16];
274         unsigned int num_cfg;
275 };
276
277 static int maar_res_walk(unsigned long start_pfn, unsigned long nr_pages,
278                          void *data)
279 {
280         struct maar_walk_info *wi = data;
281         struct maar_config *cfg = &wi->cfg[wi->num_cfg];
282         unsigned int maar_align;
283
284         /* MAAR registers hold physical addresses right shifted by 4 bits */
285         maar_align = BIT(MIPS_MAAR_ADDR_SHIFT + 4);
286
287         /* Fill in the MAAR config entry */
288         cfg->lower = ALIGN(PFN_PHYS(start_pfn), maar_align);
289         cfg->upper = ALIGN_DOWN(PFN_PHYS(start_pfn + nr_pages), maar_align) - 1;
290         cfg->attrs = MIPS_MAAR_S;
291
292         /* Ensure we don't overflow the cfg array */
293         if (!WARN_ON(wi->num_cfg >= ARRAY_SIZE(wi->cfg)))
294                 wi->num_cfg++;
295
296         return 0;
297 }
298
299
300 unsigned __weak platform_maar_init(unsigned num_pairs)
301 {
302         unsigned int num_configured;
303         struct maar_walk_info wi;
304
305         wi.num_cfg = 0;
306         walk_system_ram_range(0, max_pfn, &wi, maar_res_walk);
307
308         num_configured = maar_config(wi.cfg, wi.num_cfg, num_pairs);
309         if (num_configured < wi.num_cfg)
310                 pr_warn("Not enough MAAR pairs (%u) for all memory regions (%u)\n",
311                         num_pairs, wi.num_cfg);
312
313         return num_configured;
314 }
315
316 void maar_init(void)
317 {
318         unsigned num_maars, used, i;
319         phys_addr_t lower, upper, attr;
320         static struct {
321                 struct maar_config cfgs[3];
322                 unsigned used;
323         } recorded = { { { 0 } }, 0 };
324
325         if (!cpu_has_maar)
326                 return;
327
328         /* Detect the number of MAARs */
329         write_c0_maari(~0);
330         back_to_back_c0_hazard();
331         num_maars = read_c0_maari() + 1;
332
333         /* MAARs should be in pairs */
334         WARN_ON(num_maars % 2);
335
336         /* Set MAARs using values we recorded already */
337         if (recorded.used) {
338                 used = maar_config(recorded.cfgs, recorded.used, num_maars / 2);
339                 BUG_ON(used != recorded.used);
340         } else {
341                 /* Configure the required MAARs */
342                 used = platform_maar_init(num_maars / 2);
343         }
344
345         /* Disable any further MAARs */
346         for (i = (used * 2); i < num_maars; i++) {
347                 write_c0_maari(i);
348                 back_to_back_c0_hazard();
349                 write_c0_maar(0);
350                 back_to_back_c0_hazard();
351         }
352
353         if (recorded.used)
354                 return;
355
356         pr_info("MAAR configuration:\n");
357         for (i = 0; i < num_maars; i += 2) {
358                 write_c0_maari(i);
359                 back_to_back_c0_hazard();
360                 upper = read_c0_maar();
361
362                 write_c0_maari(i + 1);
363                 back_to_back_c0_hazard();
364                 lower = read_c0_maar();
365
366                 attr = lower & upper;
367                 lower = (lower & MIPS_MAAR_ADDR) << 4;
368                 upper = ((upper & MIPS_MAAR_ADDR) << 4) | 0xffff;
369
370                 pr_info("  [%d]: ", i / 2);
371                 if (!(attr & MIPS_MAAR_VL)) {
372                         pr_cont("disabled\n");
373                         continue;
374                 }
375
376                 pr_cont("%pa-%pa", &lower, &upper);
377
378                 if (attr & MIPS_MAAR_S)
379                         pr_cont(" speculate");
380
381                 pr_cont("\n");
382
383                 /* Record the setup for use on secondary CPUs */
384                 if (used <= ARRAY_SIZE(recorded.cfgs)) {
385                         recorded.cfgs[recorded.used].lower = lower;
386                         recorded.cfgs[recorded.used].upper = upper;
387                         recorded.cfgs[recorded.used].attrs = attr;
388                         recorded.used++;
389                 }
390         }
391 }
392
393 #ifndef CONFIG_NEED_MULTIPLE_NODES
394 void __init paging_init(void)
395 {
396         unsigned long max_zone_pfns[MAX_NR_ZONES];
397
398         pagetable_init();
399
400 #ifdef CONFIG_HIGHMEM
401         kmap_init();
402 #endif
403 #ifdef CONFIG_ZONE_DMA
404         max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
405 #endif
406 #ifdef CONFIG_ZONE_DMA32
407         max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
408 #endif
409         max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
410 #ifdef CONFIG_HIGHMEM
411         max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
412
413         if (cpu_has_dc_aliases && max_low_pfn != highend_pfn) {
414                 printk(KERN_WARNING "This processor doesn't support highmem."
415                        " %ldk highmem ignored\n",
416                        (highend_pfn - max_low_pfn) << (PAGE_SHIFT - 10));
417                 max_zone_pfns[ZONE_HIGHMEM] = max_low_pfn;
418         }
419 #endif
420
421         free_area_init_nodes(max_zone_pfns);
422 }
423
424 #ifdef CONFIG_64BIT
425 static struct kcore_list kcore_kseg0;
426 #endif
427
428 static inline void __init mem_init_free_highmem(void)
429 {
430 #ifdef CONFIG_HIGHMEM
431         unsigned long tmp;
432
433         if (cpu_has_dc_aliases)
434                 return;
435
436         for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) {
437                 struct page *page = pfn_to_page(tmp);
438
439                 if (!memblock_is_memory(PFN_PHYS(tmp)))
440                         SetPageReserved(page);
441                 else
442                         free_highmem_page(page);
443         }
444 #endif
445 }
446
447 void __init mem_init(void)
448 {
449         /*
450          * When _PFN_SHIFT is greater than PAGE_SHIFT we won't have enough PTE
451          * bits to hold a full 32b physical address on MIPS32 systems.
452          */
453         BUILD_BUG_ON(IS_ENABLED(CONFIG_32BIT) && (_PFN_SHIFT > PAGE_SHIFT));
454
455 #ifdef CONFIG_HIGHMEM
456 #ifdef CONFIG_DISCONTIGMEM
457 #error "CONFIG_HIGHMEM and CONFIG_DISCONTIGMEM dont work together yet"
458 #endif
459         max_mapnr = highend_pfn ? highend_pfn : max_low_pfn;
460 #else
461         max_mapnr = max_low_pfn;
462 #endif
463         high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
464
465         maar_init();
466         memblock_free_all();
467         setup_zero_pages();     /* Setup zeroed pages.  */
468         mem_init_free_highmem();
469         mem_init_print_info(NULL);
470
471 #ifdef CONFIG_64BIT
472         if ((unsigned long) &_text > (unsigned long) CKSEG0)
473                 /* The -4 is a hack so that user tools don't have to handle
474                    the overflow.  */
475                 kclist_add(&kcore_kseg0, (void *) CKSEG0,
476                                 0x80000000 - 4, KCORE_TEXT);
477 #endif
478 }
479 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
480
481 void free_init_pages(const char *what, unsigned long begin, unsigned long end)
482 {
483         unsigned long pfn;
484
485         for (pfn = PFN_UP(begin); pfn < PFN_DOWN(end); pfn++) {
486                 struct page *page = pfn_to_page(pfn);
487                 void *addr = phys_to_virt(PFN_PHYS(pfn));
488
489                 memset(addr, POISON_FREE_INITMEM, PAGE_SIZE);
490                 free_reserved_page(page);
491         }
492         printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
493 }
494
495 void (*free_init_pages_eva)(void *begin, void *end) = NULL;
496
497 void __ref free_initmem(void)
498 {
499         prom_free_prom_memory();
500         /*
501          * Let the platform define a specific function to free the
502          * init section since EVA may have used any possible mapping
503          * between virtual and physical addresses.
504          */
505         if (free_init_pages_eva)
506                 free_init_pages_eva((void *)&__init_begin, (void *)&__init_end);
507         else
508                 free_initmem_default(POISON_FREE_INITMEM);
509 }
510
511 #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
512 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
513 EXPORT_SYMBOL(__per_cpu_offset);
514
515 static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
516 {
517         return node_distance(cpu_to_node(from), cpu_to_node(to));
518 }
519
520 static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size,
521                                        size_t align)
522 {
523         return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS),
524                                       MEMBLOCK_ALLOC_ACCESSIBLE,
525                                       cpu_to_node(cpu));
526 }
527
528 static void __init pcpu_fc_free(void *ptr, size_t size)
529 {
530         memblock_free_early(__pa(ptr), size);
531 }
532
533 void __init setup_per_cpu_areas(void)
534 {
535         unsigned long delta;
536         unsigned int cpu;
537         int rc;
538
539         /*
540          * Always reserve area for module percpu variables.  That's
541          * what the legacy allocator did.
542          */
543         rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
544                                     PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
545                                     pcpu_cpu_distance,
546                                     pcpu_fc_alloc, pcpu_fc_free);
547         if (rc < 0)
548                 panic("Failed to initialize percpu areas.");
549
550         delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
551         for_each_possible_cpu(cpu)
552                 __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
553 }
554 #endif
555
556 #ifndef CONFIG_MIPS_PGD_C0_CONTEXT
557 unsigned long pgd_current[NR_CPUS];
558 #endif
559
560 /*
561  * Align swapper_pg_dir in to 64K, allows its address to be loaded
562  * with a single LUI instruction in the TLB handlers.  If we used
563  * __aligned(64K), its size would get rounded up to the alignment
564  * size, and waste space.  So we place it in its own section and align
565  * it in the linker script.
566  */
567 pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(.bss..swapper_pg_dir);
568 #ifndef __PAGETABLE_PUD_FOLDED
569 pud_t invalid_pud_table[PTRS_PER_PUD] __page_aligned_bss;
570 #endif
571 #ifndef __PAGETABLE_PMD_FOLDED
572 pmd_t invalid_pmd_table[PTRS_PER_PMD] __page_aligned_bss;
573 EXPORT_SYMBOL_GPL(invalid_pmd_table);
574 #endif
575 pte_t invalid_pte_table[PTRS_PER_PTE] __page_aligned_bss;
576 EXPORT_SYMBOL(invalid_pte_table);