powerpc/mm: Move nohash specifics in subdirectory mm/nohash
authorChristophe Leroy <christophe.leroy@c-s.fr>
Fri, 29 Mar 2019 10:00:02 +0000 (10:00 +0000)
committerMichael Ellerman <mpe@ellerman.id.au>
Thu, 2 May 2019 15:20:22 +0000 (01:20 +1000)
Many files in arch/powerpc/mm are only for nohash. This patch
creates a subdirectory for them.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
[mpe: Shorten new filenames]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
22 files changed:
arch/powerpc/mm/40x_mmu.c [deleted file]
arch/powerpc/mm/44x_mmu.c [deleted file]
arch/powerpc/mm/8xx_mmu.c [deleted file]
arch/powerpc/mm/Makefile
arch/powerpc/mm/fsl_booke_mmu.c [deleted file]
arch/powerpc/mm/hugetlbpage-book3e.c [deleted file]
arch/powerpc/mm/mmu_context_nohash.c [deleted file]
arch/powerpc/mm/nohash/40x.c [new file with mode: 0644]
arch/powerpc/mm/nohash/44x.c [new file with mode: 0644]
arch/powerpc/mm/nohash/8xx.c [new file with mode: 0644]
arch/powerpc/mm/nohash/Makefile [new file with mode: 0644]
arch/powerpc/mm/nohash/book3e_hugetlbpage.c [new file with mode: 0644]
arch/powerpc/mm/nohash/book3e_pgtable.c [new file with mode: 0644]
arch/powerpc/mm/nohash/fsl_booke.c [new file with mode: 0644]
arch/powerpc/mm/nohash/mmu_context.c [new file with mode: 0644]
arch/powerpc/mm/nohash/tlb.c [new file with mode: 0644]
arch/powerpc/mm/nohash/tlb_low.S [new file with mode: 0644]
arch/powerpc/mm/nohash/tlb_low_64e.S [new file with mode: 0644]
arch/powerpc/mm/pgtable-book3e.c [deleted file]
arch/powerpc/mm/tlb_low_64e.S [deleted file]
arch/powerpc/mm/tlb_nohash.c [deleted file]
arch/powerpc/mm/tlb_nohash_low.S [deleted file]

diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/40x_mmu.c
deleted file mode 100644 (file)
index 460459b..0000000
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * This file contains the routines for initializing the MMU
- * on the 4xx series of chips.
- *  -- paulus
- *
- *  Derived from arch/ppc/mm/init.c:
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
- *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
- *    Copyright (C) 1996 Paul Mackerras
- *
- *  Derived from "arch/i386/mm/init.c"
- *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/stddef.h>
-#include <linux/vmalloc.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/highmem.h>
-#include <linux/memblock.h>
-
-#include <asm/pgalloc.h>
-#include <asm/prom.h>
-#include <asm/io.h>
-#include <asm/mmu_context.h>
-#include <asm/pgtable.h>
-#include <asm/mmu.h>
-#include <linux/uaccess.h>
-#include <asm/smp.h>
-#include <asm/bootx.h>
-#include <asm/machdep.h>
-#include <asm/setup.h>
-
-#include <mm/mmu_decl.h>
-
-extern int __map_without_ltlbs;
-/*
- * MMU_init_hw does the chip-specific initialization of the MMU hardware.
- */
-void __init MMU_init_hw(void)
-{
-       /*
-        * The Zone Protection Register (ZPR) defines how protection will
-        * be applied to every page which is a member of a given zone. At
-        * present, we utilize only two of the 4xx's zones.
-        * The zone index bits (of ZSEL) in the PTE are used for software
-        * indicators, except the LSB.  For user access, zone 1 is used,
-        * for kernel access, zone 0 is used.  We set all but zone 1
-        * to zero, allowing only kernel access as indicated in the PTE.
-        * For zone 1, we set a 01 binary (a value of 10 will not work)
-        * to allow user access as indicated in the PTE.  This also allows
-        * kernel access as indicated in the PTE.
-        */
-
-        mtspr(SPRN_ZPR, 0x10000000);
-
-       flush_instruction_cache();
-
-       /*
-        * Set up the real-mode cache parameters for the exception vector
-        * handlers (which are run in real-mode).
-        */
-
-        mtspr(SPRN_DCWR, 0x00000000);  /* All caching is write-back */
-
-        /*
-        * Cache instruction and data space where the exception
-        * vectors and the kernel live in real-mode.
-        */
-
-        mtspr(SPRN_DCCR, 0xFFFF0000);  /* 2GByte of data space at 0x0. */
-        mtspr(SPRN_ICCR, 0xFFFF0000);  /* 2GByte of instr. space at 0x0. */
-}
-
-#define LARGE_PAGE_SIZE_16M    (1<<24)
-#define LARGE_PAGE_SIZE_4M     (1<<22)
-
-unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
-{
-       unsigned long v, s, mapped;
-       phys_addr_t p;
-
-       v = KERNELBASE;
-       p = 0;
-       s = total_lowmem;
-
-       if (__map_without_ltlbs)
-               return 0;
-
-       while (s >= LARGE_PAGE_SIZE_16M) {
-               pmd_t *pmdp;
-               unsigned long val = p | _PMD_SIZE_16M | _PAGE_EXEC | _PAGE_HWWRITE;
-
-               pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v);
-               *pmdp++ = __pmd(val);
-               *pmdp++ = __pmd(val);
-               *pmdp++ = __pmd(val);
-               *pmdp++ = __pmd(val);
-
-               v += LARGE_PAGE_SIZE_16M;
-               p += LARGE_PAGE_SIZE_16M;
-               s -= LARGE_PAGE_SIZE_16M;
-       }
-
-       while (s >= LARGE_PAGE_SIZE_4M) {
-               pmd_t *pmdp;
-               unsigned long val = p | _PMD_SIZE_4M | _PAGE_EXEC | _PAGE_HWWRITE;
-
-               pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v);
-               *pmdp = __pmd(val);
-
-               v += LARGE_PAGE_SIZE_4M;
-               p += LARGE_PAGE_SIZE_4M;
-               s -= LARGE_PAGE_SIZE_4M;
-       }
-
-       mapped = total_lowmem - s;
-
-       /* If the size of RAM is not an exact power of two, we may not
-        * have covered RAM in its entirety with 16 and 4 MiB
-        * pages. Consequently, restrict the top end of RAM currently
-        * allocable so that calls to the MEMBLOCK to allocate PTEs for "tail"
-        * coverage with normal-sized pages (or other reasons) do not
-        * attempt to allocate outside the allowed range.
-        */
-       memblock_set_current_limit(mapped);
-
-       return mapped;
-}
-
-void setup_initial_memory_limit(phys_addr_t first_memblock_base,
-                               phys_addr_t first_memblock_size)
-{
-       /* We don't currently support the first MEMBLOCK not mapping 0
-        * physical on those processors
-        */
-       BUG_ON(first_memblock_base != 0);
-
-       /* 40x can only access 16MB at the moment (see head_40x.S) */
-       memblock_set_current_limit(min_t(u64, first_memblock_size, 0x00800000));
-}
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c
deleted file mode 100644 (file)
index c07983e..0000000
+++ /dev/null
@@ -1,246 +0,0 @@
-/*
- * Modifications by Matt Porter (mporter@mvista.com) to support
- * PPC44x Book E processors.
- *
- * This file contains the routines for initializing the MMU
- * on the 4xx series of chips.
- *  -- paulus
- *
- *  Derived from arch/ppc/mm/init.c:
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
- *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
- *    Copyright (C) 1996 Paul Mackerras
- *
- *  Derived from "arch/i386/mm/init.c"
- *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/init.h>
-#include <linux/memblock.h>
-
-#include <asm/mmu.h>
-#include <asm/page.h>
-#include <asm/cacheflush.h>
-#include <asm/code-patching.h>
-
-#include <mm/mmu_decl.h>
-
-/* Used by the 44x TLB replacement exception handler.
- * Just needed it declared someplace.
- */
-unsigned int tlb_44x_index; /* = 0 */
-unsigned int tlb_44x_hwater = PPC44x_TLB_SIZE - 1 - PPC44x_EARLY_TLBS;
-int icache_44x_need_flush;
-
-unsigned long tlb_47x_boltmap[1024/8];
-
-static void ppc44x_update_tlb_hwater(void)
-{
-       /* The TLB miss handlers hard codes the watermark in a cmpli
-        * instruction to improve performances rather than loading it
-        * from the global variable. Thus, we patch the instructions
-        * in the 2 TLB miss handlers when updating the value
-        */
-       modify_instruction_site(&patch__tlb_44x_hwater_D, 0xffff, tlb_44x_hwater);
-       modify_instruction_site(&patch__tlb_44x_hwater_I, 0xffff, tlb_44x_hwater);
-}
-
-/*
- * "Pins" a 256MB TLB entry in AS0 for kernel lowmem for 44x type MMU
- */
-static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys)
-{
-       unsigned int entry = tlb_44x_hwater--;
-
-       ppc44x_update_tlb_hwater();
-
-       mtspr(SPRN_MMUCR, 0);
-
-       __asm__ __volatile__(
-               "tlbwe  %2,%3,%4\n"
-               "tlbwe  %1,%3,%5\n"
-               "tlbwe  %0,%3,%6\n"
-       :
-       : "r" (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G),
-         "r" (phys),
-         "r" (virt | PPC44x_TLB_VALID | PPC44x_TLB_256M),
-         "r" (entry),
-         "i" (PPC44x_TLB_PAGEID),
-         "i" (PPC44x_TLB_XLAT),
-         "i" (PPC44x_TLB_ATTRIB));
-}
-
-static int __init ppc47x_find_free_bolted(void)
-{
-       unsigned int mmube0 = mfspr(SPRN_MMUBE0);
-       unsigned int mmube1 = mfspr(SPRN_MMUBE1);
-
-       if (!(mmube0 & MMUBE0_VBE0))
-               return 0;
-       if (!(mmube0 & MMUBE0_VBE1))
-               return 1;
-       if (!(mmube0 & MMUBE0_VBE2))
-               return 2;
-       if (!(mmube1 & MMUBE1_VBE3))
-               return 3;
-       if (!(mmube1 & MMUBE1_VBE4))
-               return 4;
-       if (!(mmube1 & MMUBE1_VBE5))
-               return 5;
-       return -1;
-}
-
-static void __init ppc47x_update_boltmap(void)
-{
-       unsigned int mmube0 = mfspr(SPRN_MMUBE0);
-       unsigned int mmube1 = mfspr(SPRN_MMUBE1);
-
-       if (mmube0 & MMUBE0_VBE0)
-               __set_bit((mmube0 >> MMUBE0_IBE0_SHIFT) & 0xff,
-                         tlb_47x_boltmap);
-       if (mmube0 & MMUBE0_VBE1)
-               __set_bit((mmube0 >> MMUBE0_IBE1_SHIFT) & 0xff,
-                         tlb_47x_boltmap);
-       if (mmube0 & MMUBE0_VBE2)
-               __set_bit((mmube0 >> MMUBE0_IBE2_SHIFT) & 0xff,
-                         tlb_47x_boltmap);
-       if (mmube1 & MMUBE1_VBE3)
-               __set_bit((mmube1 >> MMUBE1_IBE3_SHIFT) & 0xff,
-                         tlb_47x_boltmap);
-       if (mmube1 & MMUBE1_VBE4)
-               __set_bit((mmube1 >> MMUBE1_IBE4_SHIFT) & 0xff,
-                         tlb_47x_boltmap);
-       if (mmube1 & MMUBE1_VBE5)
-               __set_bit((mmube1 >> MMUBE1_IBE5_SHIFT) & 0xff,
-                         tlb_47x_boltmap);
-}
-
-/*
- * "Pins" a 256MB TLB entry in AS0 for kernel lowmem for 47x type MMU
- */
-static void ppc47x_pin_tlb(unsigned int virt, unsigned int phys)
-{
-       unsigned int rA;
-       int bolted;
-
-       /* Base rA is HW way select, way 0, bolted bit set */
-       rA = 0x88000000;
-
-       /* Look for a bolted entry slot */
-       bolted = ppc47x_find_free_bolted();
-       BUG_ON(bolted < 0);
-
-       /* Insert bolted slot number */
-       rA |= bolted << 24;
-
-       pr_debug("256M TLB entry for 0x%08x->0x%08x in bolt slot %d\n",
-                virt, phys, bolted);
-
-       mtspr(SPRN_MMUCR, 0);
-
-       __asm__ __volatile__(
-               "tlbwe  %2,%3,0\n"
-               "tlbwe  %1,%3,1\n"
-               "tlbwe  %0,%3,2\n"
-               :
-               : "r" (PPC47x_TLB2_SW | PPC47x_TLB2_SR |
-                      PPC47x_TLB2_SX
-#ifdef CONFIG_SMP
-                      | PPC47x_TLB2_M
-#endif
-                      ),
-                 "r" (phys),
-                 "r" (virt | PPC47x_TLB0_VALID | PPC47x_TLB0_256M),
-                 "r" (rA));
-}
-
-void __init MMU_init_hw(void)
-{
-       /* This is not useful on 47x but won't hurt either */
-       ppc44x_update_tlb_hwater();
-
-       flush_instruction_cache();
-}
-
-unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
-{
-       unsigned long addr;
-       unsigned long memstart = memstart_addr & ~(PPC_PIN_SIZE - 1);
-
-       /* Pin in enough TLBs to cover any lowmem not covered by the
-        * initial 256M mapping established in head_44x.S */
-       for (addr = memstart + PPC_PIN_SIZE; addr < lowmem_end_addr;
-            addr += PPC_PIN_SIZE) {
-               if (mmu_has_feature(MMU_FTR_TYPE_47x))
-                       ppc47x_pin_tlb(addr + PAGE_OFFSET, addr);
-               else
-                       ppc44x_pin_tlb(addr + PAGE_OFFSET, addr);
-       }
-       if (mmu_has_feature(MMU_FTR_TYPE_47x)) {
-               ppc47x_update_boltmap();
-
-#ifdef DEBUG
-               {
-                       int i;
-
-                       printk(KERN_DEBUG "bolted entries: ");
-                       for (i = 0; i < 255; i++) {
-                               if (test_bit(i, tlb_47x_boltmap))
-                                       printk("%d ", i);
-                       }
-                       printk("\n");
-               }
-#endif /* DEBUG */
-       }
-       return total_lowmem;
-}
-
-void setup_initial_memory_limit(phys_addr_t first_memblock_base,
-                               phys_addr_t first_memblock_size)
-{
-       u64 size;
-
-#ifndef CONFIG_NONSTATIC_KERNEL
-       /* We don't currently support the first MEMBLOCK not mapping 0
-        * physical on those processors
-        */
-       BUG_ON(first_memblock_base != 0);
-#endif
-
-       /* 44x has a 256M TLB entry pinned at boot */
-       size = (min_t(u64, first_memblock_size, PPC_PIN_SIZE));
-       memblock_set_current_limit(first_memblock_base + size);
-}
-
-#ifdef CONFIG_SMP
-void __init mmu_init_secondary(int cpu)
-{
-       unsigned long addr;
-       unsigned long memstart = memstart_addr & ~(PPC_PIN_SIZE - 1);
-
-       /* Pin in enough TLBs to cover any lowmem not covered by the
-        * initial 256M mapping established in head_44x.S
-        *
-        * WARNING: This is called with only the first 256M of the
-        * linear mapping in the TLB and we can't take faults yet
-        * so beware of what this code uses. It runs off a temporary
-        * stack. current (r2) isn't initialized, smp_processor_id()
-        * will not work, current thread info isn't accessible, ...
-        */
-       for (addr = memstart + PPC_PIN_SIZE; addr < lowmem_end_addr;
-            addr += PPC_PIN_SIZE) {
-               if (mmu_has_feature(MMU_FTR_TYPE_47x))
-                       ppc47x_pin_tlb(addr + PAGE_OFFSET, addr);
-               else
-                       ppc44x_pin_tlb(addr + PAGE_OFFSET, addr);
-       }
-}
-#endif /* CONFIG_SMP */
diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c
deleted file mode 100644 (file)
index 70d55b6..0000000
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- * This file contains the routines for initializing the MMU
- * on the 8xx series of chips.
- *  -- christophe
- *
- *  Derived from arch/powerpc/mm/40x_mmu.c:
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/memblock.h>
-#include <linux/mmu_context.h>
-#include <asm/fixmap.h>
-#include <asm/code-patching.h>
-
-#include <mm/mmu_decl.h>
-
-#define IMMR_SIZE (FIX_IMMR_SIZE << PAGE_SHIFT)
-
-extern int __map_without_ltlbs;
-
-static unsigned long block_mapped_ram;
-
-/*
- * Return PA for this VA if it is in an area mapped with LTLBs.
- * Otherwise, returns 0
- */
-phys_addr_t v_block_mapped(unsigned long va)
-{
-       unsigned long p = PHYS_IMMR_BASE;
-
-       if (__map_without_ltlbs)
-               return 0;
-       if (va >= VIRT_IMMR_BASE && va < VIRT_IMMR_BASE + IMMR_SIZE)
-               return p + va - VIRT_IMMR_BASE;
-       if (va >= PAGE_OFFSET && va < PAGE_OFFSET + block_mapped_ram)
-               return __pa(va);
-       return 0;
-}
-
-/*
- * Return VA for a given PA mapped with LTLBs or 0 if not mapped
- */
-unsigned long p_block_mapped(phys_addr_t pa)
-{
-       unsigned long p = PHYS_IMMR_BASE;
-
-       if (__map_without_ltlbs)
-               return 0;
-       if (pa >= p && pa < p + IMMR_SIZE)
-               return VIRT_IMMR_BASE + pa - p;
-       if (pa < block_mapped_ram)
-               return (unsigned long)__va(pa);
-       return 0;
-}
-
-#define LARGE_PAGE_SIZE_8M     (1<<23)
-
-/*
- * MMU_init_hw does the chip-specific initialization of the MMU hardware.
- */
-void __init MMU_init_hw(void)
-{
-       /* PIN up to the 3 first 8Mb after IMMR in DTLB table */
-       if (IS_ENABLED(CONFIG_PIN_TLB_DATA)) {
-               unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000;
-               unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY;
-               int i = IS_ENABLED(CONFIG_PIN_TLB_IMMR) ? 29 : 28;
-               unsigned long addr = 0;
-               unsigned long mem = total_lowmem;
-
-               for (; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) {
-                       mtspr(SPRN_MD_CTR, ctr | (i << 8));
-                       mtspr(SPRN_MD_EPN, (unsigned long)__va(addr) | MD_EVALID);
-                       mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID);
-                       mtspr(SPRN_MD_RPN, addr | flags | _PAGE_PRESENT);
-                       addr += LARGE_PAGE_SIZE_8M;
-                       mem -= LARGE_PAGE_SIZE_8M;
-               }
-       }
-}
-
-static void __init mmu_mapin_immr(void)
-{
-       unsigned long p = PHYS_IMMR_BASE;
-       unsigned long v = VIRT_IMMR_BASE;
-       int offset;
-
-       for (offset = 0; offset < IMMR_SIZE; offset += PAGE_SIZE)
-               map_kernel_page(v + offset, p + offset, PAGE_KERNEL_NCG);
-}
-
-static void mmu_patch_cmp_limit(s32 *site, unsigned long mapped)
-{
-       modify_instruction_site(site, 0xffff, (unsigned long)__va(mapped) >> 16);
-}
-
-static void mmu_patch_addis(s32 *site, long simm)
-{
-       unsigned int instr = *(unsigned int *)patch_site_addr(site);
-
-       instr &= 0xffff0000;
-       instr |= ((unsigned long)simm) >> 16;
-       patch_instruction_site(site, instr);
-}
-
-unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
-{
-       unsigned long mapped;
-
-       if (__map_without_ltlbs) {
-               mapped = 0;
-               mmu_mapin_immr();
-               if (!IS_ENABLED(CONFIG_PIN_TLB_IMMR))
-                       patch_instruction_site(&patch__dtlbmiss_immr_jmp, PPC_INST_NOP);
-               if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT))
-                       mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, 0);
-       } else {
-               mapped = top & ~(LARGE_PAGE_SIZE_8M - 1);
-               if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT))
-                       mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top,
-                                           _ALIGN(__pa(_einittext), 8 << 20));
-       }
-
-       mmu_patch_cmp_limit(&patch__dtlbmiss_linmem_top, mapped);
-       mmu_patch_cmp_limit(&patch__fixupdar_linmem_top, mapped);
-
-       /* If the size of RAM is not an exact power of two, we may not
-        * have covered RAM in its entirety with 8 MiB
-        * pages. Consequently, restrict the top end of RAM currently
-        * allocable so that calls to the MEMBLOCK to allocate PTEs for "tail"
-        * coverage with normal-sized pages (or other reasons) do not
-        * attempt to allocate outside the allowed range.
-        */
-       if (mapped)
-               memblock_set_current_limit(mapped);
-
-       block_mapped_ram = mapped;
-
-       return mapped;
-}
-
-void mmu_mark_initmem_nx(void)
-{
-       if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) && CONFIG_ETEXT_SHIFT < 23)
-               mmu_patch_addis(&patch__itlbmiss_linmem_top8,
-                               -((long)_etext & ~(LARGE_PAGE_SIZE_8M - 1)));
-       if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT))
-               mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, __pa(_etext));
-}
-
-#ifdef CONFIG_STRICT_KERNEL_RWX
-void mmu_mark_rodata_ro(void)
-{
-       if (CONFIG_DATA_SHIFT < 23)
-               mmu_patch_addis(&patch__dtlbmiss_romem_top8,
-                               -__pa(((unsigned long)_sinittext) &
-                                     ~(LARGE_PAGE_SIZE_8M - 1)));
-       mmu_patch_addis(&patch__dtlbmiss_romem_top, -__pa(_sinittext));
-}
-#endif
-
-void __init setup_initial_memory_limit(phys_addr_t first_memblock_base,
-                                      phys_addr_t first_memblock_size)
-{
-       /* We don't currently support the first MEMBLOCK not mapping 0
-        * physical on those processors
-        */
-       BUG_ON(first_memblock_base != 0);
-
-       /* 8xx can only access 32MB at the moment */
-       memblock_set_current_limit(min_t(u64, first_memblock_size, 0x02000000));
-}
-
-/*
- * Set up to use a given MMU context.
- * id is context number, pgd is PGD pointer.
- *
- * We place the physical address of the new task page directory loaded
- * into the MMU base register, and set the ASID compare register with
- * the new "context."
- */
-void set_context(unsigned long id, pgd_t *pgd)
-{
-       s16 offset = (s16)(__pa(swapper_pg_dir));
-
-       /* Context switch the PTE pointer for the Abatron BDI2000.
-        * The PGDIR is passed as second argument.
-        */
-       if (IS_ENABLED(CONFIG_BDI_SWITCH))
-               abatron_pteptrs[1] = pgd;
-
-       /* Register M_TWB will contain base address of level 1 table minus the
-        * lower part of the kernel PGDIR base address, so that all accesses to
-        * level 1 table are done relative to lower part of kernel PGDIR base
-        * address.
-        */
-       mtspr(SPRN_M_TWB, __pa(pgd) - offset);
-
-       /* Update context */
-       mtspr(SPRN_M_CASID, id - 1);
-       /* sync */
-       mb();
-}
-
-void flush_instruction_cache(void)
-{
-       isync();
-       mtspr(SPRN_IC_CST, IDC_INVALL);
-       isync();
-}
-
-#ifdef CONFIG_PPC_KUEP
-void __init setup_kuep(bool disabled)
-{
-       if (disabled)
-               return;
-
-       pr_info("Activating Kernel Userspace Execution Prevention\n");
-
-       mtspr(SPRN_MI_AP, MI_APG_KUEP);
-}
-#endif
-
-#ifdef CONFIG_PPC_KUAP
-void __init setup_kuap(bool disabled)
-{
-       pr_info("Activating Kernel Userspace Access Protection\n");
-
-       if (disabled)
-               pr_warn("KUAP cannot be disabled yet on 8xx when compiled in\n");
-
-       mtspr(SPRN_MD_AP, MD_APG_KUAP);
-}
-#endif
index 68cb1e8..08557ba 100644 (file)
@@ -8,30 +8,15 @@ ccflags-$(CONFIG_PPC64)       := $(NO_MINIMAL_TOC)
 obj-y                          := fault.o mem.o pgtable.o mmap.o \
                                   init_$(BITS).o pgtable_$(BITS).o \
                                   init-common.o mmu_context.o drmem.o
-obj-$(CONFIG_PPC_MMU_NOHASH)   += mmu_context_nohash.o tlb_nohash.o \
-                                  tlb_nohash_low.o
-obj-$(CONFIG_PPC_BOOK3E)       += tlb_low_$(BITS)e.o
-obj-$(CONFIG_PPC_BOOK3E_64)   += pgtable-book3e.o
+obj-$(CONFIG_PPC_MMU_NOHASH)   += nohash/
 obj-$(CONFIG_PPC_BOOK3S_32)    += book3s32/
 obj-$(CONFIG_PPC_BOOK3S_64)    += book3s64/
 obj-$(CONFIG_PPC_BOOK3S_64)    += pgtable-frag.o
 obj-$(CONFIG_PPC32)            += pgtable-frag.o
-obj-$(CONFIG_40x)              += 40x_mmu.o
-obj-$(CONFIG_44x)              += 44x_mmu.o
-obj-$(CONFIG_PPC_8xx)          += 8xx_mmu.o
-obj-$(CONFIG_PPC_FSL_BOOK3E)   += fsl_booke_mmu.o
 obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o
 obj-$(CONFIG_PPC_MM_SLICES)    += slice.o
 obj-y                          += hugetlbpage.o
-ifdef CONFIG_HUGETLB_PAGE
-obj-$(CONFIG_PPC_BOOK3E_MMU)   += hugetlbpage-book3e.o
-endif
 obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
 obj-$(CONFIG_HIGHMEM)          += highmem.o
 obj-$(CONFIG_PPC_COPRO_BASE)   += copro_fault.o
 obj-$(CONFIG_PPC_PTDUMP)       += ptdump/
-
-# Disable kcov instrumentation on sensitive code
-# This is necessary for booting with kcov enabled on book3e machines
-KCOV_INSTRUMENT_tlb_nohash.o := n
-KCOV_INSTRUMENT_fsl_booke_mmu.o := n
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
deleted file mode 100644 (file)
index 71a1a36..0000000
+++ /dev/null
@@ -1,326 +0,0 @@
-/*
- * Modifications by Kumar Gala (galak@kernel.crashing.org) to support
- * E500 Book E processors.
- *
- * Copyright 2004,2010 Freescale Semiconductor, Inc.
- *
- * This file contains the routines for initializing the MMU
- * on the 4xx series of chips.
- *  -- paulus
- *
- *  Derived from arch/ppc/mm/init.c:
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
- *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
- *    Copyright (C) 1996 Paul Mackerras
- *
- *  Derived from "arch/i386/mm/init.c"
- *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/stddef.h>
-#include <linux/vmalloc.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/highmem.h>
-#include <linux/memblock.h>
-
-#include <asm/pgalloc.h>
-#include <asm/prom.h>
-#include <asm/io.h>
-#include <asm/mmu_context.h>
-#include <asm/pgtable.h>
-#include <asm/mmu.h>
-#include <linux/uaccess.h>
-#include <asm/smp.h>
-#include <asm/machdep.h>
-#include <asm/setup.h>
-#include <asm/paca.h>
-
-#include <mm/mmu_decl.h>
-
-unsigned int tlbcam_index;
-
-#define NUM_TLBCAMS    (64)
-struct tlbcam TLBCAM[NUM_TLBCAMS];
-
-struct tlbcamrange {
-       unsigned long start;
-       unsigned long limit;
-       phys_addr_t phys;
-} tlbcam_addrs[NUM_TLBCAMS];
-
-unsigned long tlbcam_sz(int idx)
-{
-       return tlbcam_addrs[idx].limit - tlbcam_addrs[idx].start + 1;
-}
-
-#ifdef CONFIG_FSL_BOOKE
-/*
- * Return PA for this VA if it is mapped by a CAM, or 0
- */
-phys_addr_t v_block_mapped(unsigned long va)
-{
-       int b;
-       for (b = 0; b < tlbcam_index; ++b)
-               if (va >= tlbcam_addrs[b].start && va < tlbcam_addrs[b].limit)
-                       return tlbcam_addrs[b].phys + (va - tlbcam_addrs[b].start);
-       return 0;
-}
-
-/*
- * Return VA for a given PA or 0 if not mapped
- */
-unsigned long p_block_mapped(phys_addr_t pa)
-{
-       int b;
-       for (b = 0; b < tlbcam_index; ++b)
-               if (pa >= tlbcam_addrs[b].phys
-                       && pa < (tlbcam_addrs[b].limit-tlbcam_addrs[b].start)
-                             +tlbcam_addrs[b].phys)
-                       return tlbcam_addrs[b].start+(pa-tlbcam_addrs[b].phys);
-       return 0;
-}
-#endif
-
-/*
- * Set up a variable-size TLB entry (tlbcam). The parameters are not checked;
- * in particular size must be a power of 4 between 4k and the max supported by
- * an implementation; max may further be limited by what can be represented in
- * an unsigned long (for example, 32-bit implementations cannot support a 4GB
- * size).
- */
-static void settlbcam(int index, unsigned long virt, phys_addr_t phys,
-               unsigned long size, unsigned long flags, unsigned int pid)
-{
-       unsigned int tsize;
-
-       tsize = __ilog2(size) - 10;
-
-#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
-       if ((flags & _PAGE_NO_CACHE) == 0)
-               flags |= _PAGE_COHERENT;
-#endif
-
-       TLBCAM[index].MAS0 = MAS0_TLBSEL(1) | MAS0_ESEL(index) | MAS0_NV(index+1);
-       TLBCAM[index].MAS1 = MAS1_VALID | MAS1_IPROT | MAS1_TSIZE(tsize) | MAS1_TID(pid);
-       TLBCAM[index].MAS2 = virt & PAGE_MASK;
-
-       TLBCAM[index].MAS2 |= (flags & _PAGE_WRITETHRU) ? MAS2_W : 0;
-       TLBCAM[index].MAS2 |= (flags & _PAGE_NO_CACHE) ? MAS2_I : 0;
-       TLBCAM[index].MAS2 |= (flags & _PAGE_COHERENT) ? MAS2_M : 0;
-       TLBCAM[index].MAS2 |= (flags & _PAGE_GUARDED) ? MAS2_G : 0;
-       TLBCAM[index].MAS2 |= (flags & _PAGE_ENDIAN) ? MAS2_E : 0;
-
-       TLBCAM[index].MAS3 = (phys & MAS3_RPN) | MAS3_SX | MAS3_SR;
-       TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_SW : 0);
-       if (mmu_has_feature(MMU_FTR_BIG_PHYS))
-               TLBCAM[index].MAS7 = (u64)phys >> 32;
-
-       /* Below is unlikely -- only for large user pages or similar */
-       if (pte_user(__pte(flags))) {
-          TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR;
-          TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0);
-       }
-
-       tlbcam_addrs[index].start = virt;
-       tlbcam_addrs[index].limit = virt + size - 1;
-       tlbcam_addrs[index].phys = phys;
-}
-
-unsigned long calc_cam_sz(unsigned long ram, unsigned long virt,
-                         phys_addr_t phys)
-{
-       unsigned int camsize = __ilog2(ram);
-       unsigned int align = __ffs(virt | phys);
-       unsigned long max_cam;
-
-       if ((mfspr(SPRN_MMUCFG) & MMUCFG_MAVN) == MMUCFG_MAVN_V1) {
-               /* Convert (4^max) kB to (2^max) bytes */
-               max_cam = ((mfspr(SPRN_TLB1CFG) >> 16) & 0xf) * 2 + 10;
-               camsize &= ~1U;
-               align &= ~1U;
-       } else {
-               /* Convert (2^max) kB to (2^max) bytes */
-               max_cam = __ilog2(mfspr(SPRN_TLB1PS)) + 10;
-       }
-
-       if (camsize > align)
-               camsize = align;
-       if (camsize > max_cam)
-               camsize = max_cam;
-
-       return 1UL << camsize;
-}
-
-static unsigned long map_mem_in_cams_addr(phys_addr_t phys, unsigned long virt,
-                                       unsigned long ram, int max_cam_idx,
-                                       bool dryrun)
-{
-       int i;
-       unsigned long amount_mapped = 0;
-
-       /* Calculate CAM values */
-       for (i = 0; ram && i < max_cam_idx; i++) {
-               unsigned long cam_sz;
-
-               cam_sz = calc_cam_sz(ram, virt, phys);
-               if (!dryrun)
-                       settlbcam(i, virt, phys, cam_sz,
-                                 pgprot_val(PAGE_KERNEL_X), 0);
-
-               ram -= cam_sz;
-               amount_mapped += cam_sz;
-               virt += cam_sz;
-               phys += cam_sz;
-       }
-
-       if (dryrun)
-               return amount_mapped;
-
-       loadcam_multi(0, i, max_cam_idx);
-       tlbcam_index = i;
-
-#ifdef CONFIG_PPC64
-       get_paca()->tcd.esel_next = i;
-       get_paca()->tcd.esel_max = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
-       get_paca()->tcd.esel_first = i;
-#endif
-
-       return amount_mapped;
-}
-
-unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx, bool dryrun)
-{
-       unsigned long virt = PAGE_OFFSET;
-       phys_addr_t phys = memstart_addr;
-
-       return map_mem_in_cams_addr(phys, virt, ram, max_cam_idx, dryrun);
-}
-
-#ifdef CONFIG_PPC32
-
-#if defined(CONFIG_LOWMEM_CAM_NUM_BOOL) && (CONFIG_LOWMEM_CAM_NUM >= NUM_TLBCAMS)
-#error "LOWMEM_CAM_NUM must be less than NUM_TLBCAMS"
-#endif
-
-unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
-{
-       return tlbcam_addrs[tlbcam_index - 1].limit - PAGE_OFFSET + 1;
-}
-
-/*
- * MMU_init_hw does the chip-specific initialization of the MMU hardware.
- */
-void __init MMU_init_hw(void)
-{
-       flush_instruction_cache();
-}
-
-void __init adjust_total_lowmem(void)
-{
-       unsigned long ram;
-       int i;
-
-       /* adjust lowmem size to __max_low_memory */
-       ram = min((phys_addr_t)__max_low_memory, (phys_addr_t)total_lowmem);
-
-       i = switch_to_as1();
-       __max_low_memory = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM, false);
-       restore_to_as0(i, 0, 0, 1);
-
-       pr_info("Memory CAM mapping: ");
-       for (i = 0; i < tlbcam_index - 1; i++)
-               pr_cont("%lu/", tlbcam_sz(i) >> 20);
-       pr_cont("%lu Mb, residual: %dMb\n", tlbcam_sz(tlbcam_index - 1) >> 20,
-               (unsigned int)((total_lowmem - __max_low_memory) >> 20));
-
-       memblock_set_current_limit(memstart_addr + __max_low_memory);
-}
-
-void setup_initial_memory_limit(phys_addr_t first_memblock_base,
-                               phys_addr_t first_memblock_size)
-{
-       phys_addr_t limit = first_memblock_base + first_memblock_size;
-
-       /* 64M mapped initially according to head_fsl_booke.S */
-       memblock_set_current_limit(min_t(u64, limit, 0x04000000));
-}
-
-#ifdef CONFIG_RELOCATABLE
-int __initdata is_second_reloc;
-notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start)
-{
-       unsigned long base = KERNELBASE;
-
-       kernstart_addr = start;
-       if (is_second_reloc) {
-               virt_phys_offset = PAGE_OFFSET - memstart_addr;
-               return;
-       }
-
-       /*
-        * Relocatable kernel support based on processing of dynamic
-        * relocation entries. Before we get the real memstart_addr,
-        * We will compute the virt_phys_offset like this:
-        * virt_phys_offset = stext.run - kernstart_addr
-        *
-        * stext.run = (KERNELBASE & ~0x3ffffff) +
-        *                              (kernstart_addr & 0x3ffffff)
-        * When we relocate, we have :
-        *
-        *      (kernstart_addr & 0x3ffffff) = (stext.run & 0x3ffffff)
-        *
-        * hence:
-        *  virt_phys_offset = (KERNELBASE & ~0x3ffffff) -
-        *                              (kernstart_addr & ~0x3ffffff)
-        *
-        */
-       start &= ~0x3ffffff;
-       base &= ~0x3ffffff;
-       virt_phys_offset = base - start;
-       early_get_first_memblock_info(__va(dt_ptr), NULL);
-       /*
-        * We now get the memstart_addr, then we should check if this
-        * address is the same as what the PAGE_OFFSET map to now. If
-        * not we have to change the map of PAGE_OFFSET to memstart_addr
-        * and do a second relocation.
-        */
-       if (start != memstart_addr) {
-               int n;
-               long offset = start - memstart_addr;
-
-               is_second_reloc = 1;
-               n = switch_to_as1();
-               /* map a 64M area for the second relocation */
-               if (memstart_addr > start)
-                       map_mem_in_cams(0x4000000, CONFIG_LOWMEM_CAM_NUM,
-                                       false);
-               else
-                       map_mem_in_cams_addr(start, PAGE_OFFSET + offset,
-                                       0x4000000, CONFIG_LOWMEM_CAM_NUM,
-                                       false);
-               restore_to_as0(n, offset, __va(dt_ptr), 1);
-               /* We should never reach here */
-               panic("Relocation error");
-       }
-}
-#endif
-#endif
diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c
deleted file mode 100644 (file)
index f84ec46..0000000
+++ /dev/null
@@ -1,206 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * PPC Huge TLB Page Support for Book3E MMU
- *
- * Copyright (C) 2009 David Gibson, IBM Corporation.
- * Copyright (C) 2011 Becky Bruce, Freescale Semiconductor
- *
- */
-#include <linux/mm.h>
-#include <linux/hugetlb.h>
-
-#include <asm/mmu.h>
-
-#ifdef CONFIG_PPC_FSL_BOOK3E
-#ifdef CONFIG_PPC64
-static inline int tlb1_next(void)
-{
-       struct paca_struct *paca = get_paca();
-       struct tlb_core_data *tcd;
-       int this, next;
-
-       tcd = paca->tcd_ptr;
-       this = tcd->esel_next;
-
-       next = this + 1;
-       if (next >= tcd->esel_max)
-               next = tcd->esel_first;
-
-       tcd->esel_next = next;
-       return this;
-}
-#else
-static inline int tlb1_next(void)
-{
-       int index, ncams;
-
-       ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
-
-       index = this_cpu_read(next_tlbcam_idx);
-
-       /* Just round-robin the entries and wrap when we hit the end */
-       if (unlikely(index == ncams - 1))
-               __this_cpu_write(next_tlbcam_idx, tlbcam_index);
-       else
-               __this_cpu_inc(next_tlbcam_idx);
-
-       return index;
-}
-#endif /* !PPC64 */
-#endif /* FSL */
-
-static inline int mmu_get_tsize(int psize)
-{
-       return mmu_psize_defs[psize].enc;
-}
-
-#if defined(CONFIG_PPC_FSL_BOOK3E) && defined(CONFIG_PPC64)
-#include <asm/paca.h>
-
-static inline void book3e_tlb_lock(void)
-{
-       struct paca_struct *paca = get_paca();
-       unsigned long tmp;
-       int token = smp_processor_id() + 1;
-
-       /*
-        * Besides being unnecessary in the absence of SMT, this
-        * check prevents trying to do lbarx/stbcx. on e5500 which
-        * doesn't implement either feature.
-        */
-       if (!cpu_has_feature(CPU_FTR_SMT))
-               return;
-
-       asm volatile("1: lbarx %0, 0, %1;"
-                    "cmpwi %0, 0;"
-                    "bne 2f;"
-                    "stbcx. %2, 0, %1;"
-                    "bne 1b;"
-                    "b 3f;"
-                    "2: lbzx %0, 0, %1;"
-                    "cmpwi %0, 0;"
-                    "bne 2b;"
-                    "b 1b;"
-                    "3:"
-                    : "=&r" (tmp)
-                    : "r" (&paca->tcd_ptr->lock), "r" (token)
-                    : "memory");
-}
-
-static inline void book3e_tlb_unlock(void)
-{
-       struct paca_struct *paca = get_paca();
-
-       if (!cpu_has_feature(CPU_FTR_SMT))
-               return;
-
-       isync();
-       paca->tcd_ptr->lock = 0;
-}
-#else
-static inline void book3e_tlb_lock(void)
-{
-}
-
-static inline void book3e_tlb_unlock(void)
-{
-}
-#endif
-
-static inline int book3e_tlb_exists(unsigned long ea, unsigned long pid)
-{
-       int found = 0;
-
-       mtspr(SPRN_MAS6, pid << 16);
-       if (mmu_has_feature(MMU_FTR_USE_TLBRSRV)) {
-               asm volatile(
-                       "li     %0,0\n"
-                       "tlbsx. 0,%1\n"
-                       "bne    1f\n"
-                       "li     %0,1\n"
-                       "1:\n"
-                       : "=&r"(found) : "r"(ea));
-       } else {
-               asm volatile(
-                       "tlbsx  0,%1\n"
-                       "mfspr  %0,0x271\n"
-                       "srwi   %0,%0,31\n"
-                       : "=&r"(found) : "r"(ea));
-       }
-
-       return found;
-}
-
-void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
-                           pte_t pte)
-{
-       unsigned long mas1, mas2;
-       u64 mas7_3;
-       unsigned long psize, tsize, shift;
-       unsigned long flags;
-       struct mm_struct *mm;
-
-#ifdef CONFIG_PPC_FSL_BOOK3E
-       int index;
-#endif
-
-       if (unlikely(is_kernel_addr(ea)))
-               return;
-
-       mm = vma->vm_mm;
-
-       psize = vma_mmu_pagesize(vma);
-       shift = __ilog2(psize);
-       tsize = shift - 10;
-       /*
-        * We can't be interrupted while we're setting up the MAS
-        * regusters or after we've confirmed that no tlb exists.
-        */
-       local_irq_save(flags);
-
-       book3e_tlb_lock();
-
-       if (unlikely(book3e_tlb_exists(ea, mm->context.id))) {
-               book3e_tlb_unlock();
-               local_irq_restore(flags);
-               return;
-       }
-
-#ifdef CONFIG_PPC_FSL_BOOK3E
-       /* We have to use the CAM(TLB1) on FSL parts for hugepages */
-       index = tlb1_next();
-       mtspr(SPRN_MAS0, MAS0_ESEL(index) | MAS0_TLBSEL(1));
-#endif
-
-       mas1 = MAS1_VALID | MAS1_TID(mm->context.id) | MAS1_TSIZE(tsize);
-       mas2 = ea & ~((1UL << shift) - 1);
-       mas2 |= (pte_val(pte) >> PTE_WIMGE_SHIFT) & MAS2_WIMGE_MASK;
-       mas7_3 = (u64)pte_pfn(pte) << PAGE_SHIFT;
-       mas7_3 |= (pte_val(pte) >> PTE_BAP_SHIFT) & MAS3_BAP_MASK;
-       if (!pte_dirty(pte))
-               mas7_3 &= ~(MAS3_SW|MAS3_UW);
-
-       mtspr(SPRN_MAS1, mas1);
-       mtspr(SPRN_MAS2, mas2);
-
-       if (mmu_has_feature(MMU_FTR_USE_PAIRED_MAS)) {
-               mtspr(SPRN_MAS7_MAS3, mas7_3);
-       } else {
-               if (mmu_has_feature(MMU_FTR_BIG_PHYS))
-                       mtspr(SPRN_MAS7, upper_32_bits(mas7_3));
-               mtspr(SPRN_MAS3, lower_32_bits(mas7_3));
-       }
-
-       asm volatile ("tlbwe");
-
-       book3e_tlb_unlock();
-       local_irq_restore(flags);
-}
-
-void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
-{
-       struct hstate *hstate = hstate_file(vma->vm_file);
-       unsigned long tsize = huge_page_shift(hstate) - 10;
-
-       __flush_tlb_page(vma->vm_mm, vmaddr, tsize, 0);
-}
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
deleted file mode 100644 (file)
index ae4505d..0000000
+++ /dev/null
@@ -1,497 +0,0 @@
-/*
- * This file contains the routines for handling the MMU on those
- * PowerPC implementations where the MMU is not using the hash
- * table, such as 8xx, 4xx, BookE's etc...
- *
- * Copyright 2008 Ben Herrenschmidt <benh@kernel.crashing.org>
- *                IBM Corp.
- *
- *  Derived from previous arch/powerpc/mm/mmu_context.c
- *  and arch/powerpc/include/asm/mmu_context.h
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- *
- * TODO:
- *
- *   - The global context lock will not scale very well
- *   - The maps should be dynamically allocated to allow for processors
- *     that support more PID bits at runtime
- *   - Implement flush_tlb_mm() by making the context stale and picking
- *     a new one
- *   - More aggressively clear stale map bits and maybe find some way to
- *     also clear mm->cpu_vm_mask bits when processes are migrated
- */
-
-//#define DEBUG_MAP_CONSISTENCY
-//#define DEBUG_CLAMP_LAST_CONTEXT   31
-//#define DEBUG_HARDER
-
-/* We don't use DEBUG because it tends to be compiled in always nowadays
- * and this would generate way too much output
- */
-#ifdef DEBUG_HARDER
-#define pr_hard(args...)       printk(KERN_DEBUG args)
-#define pr_hardcont(args...)   printk(KERN_CONT args)
-#else
-#define pr_hard(args...)       do { } while(0)
-#define pr_hardcont(args...)   do { } while(0)
-#endif
-
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <linux/spinlock.h>
-#include <linux/memblock.h>
-#include <linux/notifier.h>
-#include <linux/cpu.h>
-#include <linux/slab.h>
-
-#include <asm/mmu_context.h>
-#include <asm/tlbflush.h>
-
-#include <mm/mmu_decl.h>
-
-/*
- * The MPC8xx has only 16 contexts. We rotate through them on each task switch.
- * A better way would be to keep track of tasks that own contexts, and implement
- * an LRU usage. That way very active tasks don't always have to pay the TLB
- * reload overhead. The kernel pages are mapped shared, so the kernel can run on
- * behalf of any task that makes a kernel entry. Shared does not mean they are
- * not protected, just that the ASID comparison is not performed. -- Dan
- *
- * The IBM4xx has 256 contexts, so we can just rotate through these as a way of
- * "switching" contexts. If the TID of the TLB is zero, the PID/TID comparison
- * is disabled, so we can use a TID of zero to represent all kernel pages as
- * shared among all contexts. -- Dan
- *
- * The IBM 47x core supports 16-bit PIDs, thus 65535 contexts. We should
- * normally never have to steal though the facility is present if needed.
- * -- BenH
- */
-#define FIRST_CONTEXT 1
-#ifdef DEBUG_CLAMP_LAST_CONTEXT
-#define LAST_CONTEXT DEBUG_CLAMP_LAST_CONTEXT
-#elif defined(CONFIG_PPC_8xx)
-#define LAST_CONTEXT 16
-#elif defined(CONFIG_PPC_47x)
-#define LAST_CONTEXT 65535
-#else
-#define LAST_CONTEXT 255
-#endif
-
-static unsigned int next_context, nr_free_contexts;
-static unsigned long *context_map;
-#ifdef CONFIG_SMP
-static unsigned long *stale_map[NR_CPUS];
-#endif
-static struct mm_struct **context_mm;
-static DEFINE_RAW_SPINLOCK(context_lock);
-
-#define CTX_MAP_SIZE   \
-       (sizeof(unsigned long) * (LAST_CONTEXT / BITS_PER_LONG + 1))
-
-
-/* Steal a context from a task that has one at the moment.
- *
- * This is used when we are running out of available PID numbers
- * on the processors.
- *
- * This isn't an LRU system, it just frees up each context in
- * turn (sort-of pseudo-random replacement :).  This would be the
- * place to implement an LRU scheme if anyone was motivated to do it.
- *  -- paulus
- *
- * For context stealing, we use a slightly different approach for
- * SMP and UP. Basically, the UP one is simpler and doesn't use
- * the stale map as we can just flush the local CPU
- *  -- benh
- */
-#ifdef CONFIG_SMP
-static unsigned int steal_context_smp(unsigned int id)
-{
-       struct mm_struct *mm;
-       unsigned int cpu, max, i;
-
-       max = LAST_CONTEXT - FIRST_CONTEXT;
-
-       /* Attempt to free next_context first and then loop until we manage */
-       while (max--) {
-               /* Pick up the victim mm */
-               mm = context_mm[id];
-
-               /* We have a candidate victim, check if it's active, on SMP
-                * we cannot steal active contexts
-                */
-               if (mm->context.active) {
-                       id++;
-                       if (id > LAST_CONTEXT)
-                               id = FIRST_CONTEXT;
-                       continue;
-               }
-               pr_hardcont(" | steal %d from 0x%p", id, mm);
-
-               /* Mark this mm has having no context anymore */
-               mm->context.id = MMU_NO_CONTEXT;
-
-               /* Mark it stale on all CPUs that used this mm. For threaded
-                * implementations, we set it on all threads on each core
-                * represented in the mask. A future implementation will use
-                * a core map instead but this will do for now.
-                */
-               for_each_cpu(cpu, mm_cpumask(mm)) {
-                       for (i = cpu_first_thread_sibling(cpu);
-                            i <= cpu_last_thread_sibling(cpu); i++) {
-                               if (stale_map[i])
-                                       __set_bit(id, stale_map[i]);
-                       }
-                       cpu = i - 1;
-               }
-               return id;
-       }
-
-       /* This will happen if you have more CPUs than available contexts,
-        * all we can do here is wait a bit and try again
-        */
-       raw_spin_unlock(&context_lock);
-       cpu_relax();
-       raw_spin_lock(&context_lock);
-
-       /* This will cause the caller to try again */
-       return MMU_NO_CONTEXT;
-}
-#endif  /* CONFIG_SMP */
-
-static unsigned int steal_all_contexts(void)
-{
-       struct mm_struct *mm;
-#ifdef CONFIG_SMP
-       int cpu = smp_processor_id();
-#endif
-       unsigned int id;
-
-       for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) {
-               /* Pick up the victim mm */
-               mm = context_mm[id];
-
-               pr_hardcont(" | steal %d from 0x%p", id, mm);
-
-               /* Mark this mm as having no context anymore */
-               mm->context.id = MMU_NO_CONTEXT;
-               if (id != FIRST_CONTEXT) {
-                       context_mm[id] = NULL;
-                       __clear_bit(id, context_map);
-#ifdef DEBUG_MAP_CONSISTENCY
-                       mm->context.active = 0;
-#endif
-               }
-#ifdef CONFIG_SMP
-               __clear_bit(id, stale_map[cpu]);
-#endif
-       }
-
-       /* Flush the TLB for all contexts (not to be used on SMP) */
-       _tlbil_all();
-
-       nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT;
-
-       return FIRST_CONTEXT;
-}
-
-/* Note that this will also be called on SMP if all other CPUs are
- * offlined, which means that it may be called for cpu != 0. For
- * this to work, we somewhat assume that CPUs that are onlined
- * come up with a fully clean TLB (or are cleaned when offlined)
- */
-static unsigned int steal_context_up(unsigned int id)
-{
-       struct mm_struct *mm;
-#ifdef CONFIG_SMP
-       int cpu = smp_processor_id();
-#endif
-
-       /* Pick up the victim mm */
-       mm = context_mm[id];
-
-       pr_hardcont(" | steal %d from 0x%p", id, mm);
-
-       /* Flush the TLB for that context */
-       local_flush_tlb_mm(mm);
-
-       /* Mark this mm has having no context anymore */
-       mm->context.id = MMU_NO_CONTEXT;
-
-       /* XXX This clear should ultimately be part of local_flush_tlb_mm */
-#ifdef CONFIG_SMP
-       __clear_bit(id, stale_map[cpu]);
-#endif
-
-       return id;
-}
-
-#ifdef DEBUG_MAP_CONSISTENCY
-static void context_check_map(void)
-{
-       unsigned int id, nrf, nact;
-
-       nrf = nact = 0;
-       for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) {
-               int used = test_bit(id, context_map);
-               if (!used)
-                       nrf++;
-               if (used != (context_mm[id] != NULL))
-                       pr_err("MMU: Context %d is %s and MM is %p !\n",
-                              id, used ? "used" : "free", context_mm[id]);
-               if (context_mm[id] != NULL)
-                       nact += context_mm[id]->context.active;
-       }
-       if (nrf != nr_free_contexts) {
-               pr_err("MMU: Free context count out of sync ! (%d vs %d)\n",
-                      nr_free_contexts, nrf);
-               nr_free_contexts = nrf;
-       }
-       if (nact > num_online_cpus())
-               pr_err("MMU: More active contexts than CPUs ! (%d vs %d)\n",
-                      nact, num_online_cpus());
-       if (FIRST_CONTEXT > 0 && !test_bit(0, context_map))
-               pr_err("MMU: Context 0 has been freed !!!\n");
-}
-#else
-static void context_check_map(void) { }
-#endif
-
-void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
-                       struct task_struct *tsk)
-{
-       unsigned int id;
-#ifdef CONFIG_SMP
-       unsigned int i, cpu = smp_processor_id();
-#endif
-       unsigned long *map;
-
-       /* No lockless fast path .. yet */
-       raw_spin_lock(&context_lock);
-
-       pr_hard("[%d] activating context for mm @%p, active=%d, id=%d",
-               cpu, next, next->context.active, next->context.id);
-
-#ifdef CONFIG_SMP
-       /* Mark us active and the previous one not anymore */
-       next->context.active++;
-       if (prev) {
-               pr_hardcont(" (old=0x%p a=%d)", prev, prev->context.active);
-               WARN_ON(prev->context.active < 1);
-               prev->context.active--;
-       }
-
- again:
-#endif /* CONFIG_SMP */
-
-       /* If we already have a valid assigned context, skip all that */
-       id = next->context.id;
-       if (likely(id != MMU_NO_CONTEXT)) {
-#ifdef DEBUG_MAP_CONSISTENCY
-               if (context_mm[id] != next)
-                       pr_err("MMU: mm 0x%p has id %d but context_mm[%d] says 0x%p\n",
-                              next, id, id, context_mm[id]);
-#endif
-               goto ctxt_ok;
-       }
-
-       /* We really don't have a context, let's try to acquire one */
-       id = next_context;
-       if (id > LAST_CONTEXT)
-               id = FIRST_CONTEXT;
-       map = context_map;
-
-       /* No more free contexts, let's try to steal one */
-       if (nr_free_contexts == 0) {
-#ifdef CONFIG_SMP
-               if (num_online_cpus() > 1) {
-                       id = steal_context_smp(id);
-                       if (id == MMU_NO_CONTEXT)
-                               goto again;
-                       goto stolen;
-               }
-#endif /* CONFIG_SMP */
-               if (IS_ENABLED(CONFIG_PPC_8xx))
-                       id = steal_all_contexts();
-               else
-                       id = steal_context_up(id);
-               goto stolen;
-       }
-       nr_free_contexts--;
-
-       /* We know there's at least one free context, try to find it */
-       while (__test_and_set_bit(id, map)) {
-               id = find_next_zero_bit(map, LAST_CONTEXT+1, id);
-               if (id > LAST_CONTEXT)
-                       id = FIRST_CONTEXT;
-       }
- stolen:
-       next_context = id + 1;
-       context_mm[id] = next;
-       next->context.id = id;
-       pr_hardcont(" | new id=%d,nrf=%d", id, nr_free_contexts);
-
-       context_check_map();
- ctxt_ok:
-
-       /* If that context got marked stale on this CPU, then flush the
-        * local TLB for it and unmark it before we use it
-        */
-#ifdef CONFIG_SMP
-       if (test_bit(id, stale_map[cpu])) {
-               pr_hardcont(" | stale flush %d [%d..%d]",
-                           id, cpu_first_thread_sibling(cpu),
-                           cpu_last_thread_sibling(cpu));
-
-               local_flush_tlb_mm(next);
-
-               /* XXX This clear should ultimately be part of local_flush_tlb_mm */
-               for (i = cpu_first_thread_sibling(cpu);
-                    i <= cpu_last_thread_sibling(cpu); i++) {
-                       if (stale_map[i])
-                               __clear_bit(id, stale_map[i]);
-               }
-       }
-#endif
-
-       /* Flick the MMU and release lock */
-       pr_hardcont(" -> %d\n", id);
-       set_context(id, next->pgd);
-       raw_spin_unlock(&context_lock);
-}
-
-/*
- * Set up the context for a new address space.
- */
-int init_new_context(struct task_struct *t, struct mm_struct *mm)
-{
-       pr_hard("initing context for mm @%p\n", mm);
-
-       /*
-        * We have MMU_NO_CONTEXT set to be ~0. Hence check
-        * explicitly against context.id == 0. This ensures that we properly
-        * initialize context slice details for newly allocated mm's (which will
-        * have id == 0) and don't alter context slice inherited via fork (which
-        * will have id != 0).
-        */
-       if (mm->context.id == 0)
-               slice_init_new_context_exec(mm);
-       mm->context.id = MMU_NO_CONTEXT;
-       mm->context.active = 0;
-       pte_frag_set(&mm->context, NULL);
-       return 0;
-}
-
-/*
- * We're finished using the context for an address space.
- */
-void destroy_context(struct mm_struct *mm)
-{
-       unsigned long flags;
-       unsigned int id;
-
-       if (mm->context.id == MMU_NO_CONTEXT)
-               return;
-
-       WARN_ON(mm->context.active != 0);
-
-       raw_spin_lock_irqsave(&context_lock, flags);
-       id = mm->context.id;
-       if (id != MMU_NO_CONTEXT) {
-               __clear_bit(id, context_map);
-               mm->context.id = MMU_NO_CONTEXT;
-#ifdef DEBUG_MAP_CONSISTENCY
-               mm->context.active = 0;
-#endif
-               context_mm[id] = NULL;
-               nr_free_contexts++;
-       }
-       raw_spin_unlock_irqrestore(&context_lock, flags);
-}
-
-#ifdef CONFIG_SMP
-static int mmu_ctx_cpu_prepare(unsigned int cpu)
-{
-       /* We don't touch CPU 0 map, it's allocated at aboot and kept
-        * around forever
-        */
-       if (cpu == boot_cpuid)
-               return 0;
-
-       pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu);
-       stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL);
-       return 0;
-}
-
-static int mmu_ctx_cpu_dead(unsigned int cpu)
-{
-#ifdef CONFIG_HOTPLUG_CPU
-       if (cpu == boot_cpuid)
-               return 0;
-
-       pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu);
-       kfree(stale_map[cpu]);
-       stale_map[cpu] = NULL;
-
-       /* We also clear the cpu_vm_mask bits of CPUs going away */
-       clear_tasks_mm_cpumask(cpu);
-#endif
-       return 0;
-}
-
-#endif /* CONFIG_SMP */
-
-/*
- * Initialize the context management stuff.
- */
-void __init mmu_context_init(void)
-{
-       /* Mark init_mm as being active on all possible CPUs since
-        * we'll get called with prev == init_mm the first time
-        * we schedule on a given CPU
-        */
-       init_mm.context.active = NR_CPUS;
-
-       /*
-        * Allocate the maps used by context management
-        */
-       context_map = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES);
-       if (!context_map)
-               panic("%s: Failed to allocate %zu bytes\n", __func__,
-                     CTX_MAP_SIZE);
-       context_mm = memblock_alloc(sizeof(void *) * (LAST_CONTEXT + 1),
-                                   SMP_CACHE_BYTES);
-       if (!context_mm)
-               panic("%s: Failed to allocate %zu bytes\n", __func__,
-                     sizeof(void *) * (LAST_CONTEXT + 1));
-#ifdef CONFIG_SMP
-       stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES);
-       if (!stale_map[boot_cpuid])
-               panic("%s: Failed to allocate %zu bytes\n", __func__,
-                     CTX_MAP_SIZE);
-
-       cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE,
-                                 "powerpc/mmu/ctx:prepare",
-                                 mmu_ctx_cpu_prepare, mmu_ctx_cpu_dead);
-#endif
-
-       printk(KERN_INFO
-              "MMU: Allocated %zu bytes of context maps for %d contexts\n",
-              2 * CTX_MAP_SIZE + (sizeof(void *) * (LAST_CONTEXT + 1)),
-              LAST_CONTEXT - FIRST_CONTEXT + 1);
-
-       /*
-        * Some processors have too few contexts to reserve one for
-        * init_mm, and require using context 0 for a normal task.
-        * Other processors reserve the use of context zero for the kernel.
-        * This code assumes FIRST_CONTEXT < 32.
-        */
-       context_map[0] = (1 << FIRST_CONTEXT) - 1;
-       next_context = FIRST_CONTEXT;
-       nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT + 1;
-}
diff --git a/arch/powerpc/mm/nohash/40x.c b/arch/powerpc/mm/nohash/40x.c
new file mode 100644 (file)
index 0000000..460459b
--- /dev/null
@@ -0,0 +1,159 @@
+/*
+ * This file contains the routines for initializing the MMU
+ * on the 4xx series of chips.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/highmem.h>
+#include <linux/memblock.h>
+
+#include <asm/pgalloc.h>
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <linux/uaccess.h>
+#include <asm/smp.h>
+#include <asm/bootx.h>
+#include <asm/machdep.h>
+#include <asm/setup.h>
+
+#include <mm/mmu_decl.h>
+
+extern int __map_without_ltlbs;
+/*
+ * MMU_init_hw does the chip-specific initialization of the MMU hardware.
+ */
+void __init MMU_init_hw(void)
+{
+       /*
+        * The Zone Protection Register (ZPR) defines how protection will
+        * be applied to every page which is a member of a given zone. At
+        * present, we utilize only two of the 4xx's zones.
+        * The zone index bits (of ZSEL) in the PTE are used for software
+        * indicators, except the LSB.  For user access, zone 1 is used,
+        * for kernel access, zone 0 is used.  We set all but zone 1
+        * to zero, allowing only kernel access as indicated in the PTE.
+        * For zone 1, we set a 01 binary (a value of 10 will not work)
+        * to allow user access as indicated in the PTE.  This also allows
+        * kernel access as indicated in the PTE.
+        */
+
+        mtspr(SPRN_ZPR, 0x10000000);
+
+       flush_instruction_cache();
+
+       /*
+        * Set up the real-mode cache parameters for the exception vector
+        * handlers (which are run in real-mode).
+        */
+
+        mtspr(SPRN_DCWR, 0x00000000);  /* All caching is write-back */
+
+        /*
+        * Cache instruction and data space where the exception
+        * vectors and the kernel live in real-mode.
+        */
+
+        mtspr(SPRN_DCCR, 0xFFFF0000);  /* 2GByte of data space at 0x0. */
+        mtspr(SPRN_ICCR, 0xFFFF0000);  /* 2GByte of instr. space at 0x0. */
+}
+
+#define LARGE_PAGE_SIZE_16M    (1<<24)
+#define LARGE_PAGE_SIZE_4M     (1<<22)
+
+unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
+{
+       unsigned long v, s, mapped;
+       phys_addr_t p;
+
+       v = KERNELBASE;
+       p = 0;
+       s = total_lowmem;
+
+       if (__map_without_ltlbs)
+               return 0;
+
+       while (s >= LARGE_PAGE_SIZE_16M) {
+               pmd_t *pmdp;
+               unsigned long val = p | _PMD_SIZE_16M | _PAGE_EXEC | _PAGE_HWWRITE;
+
+               pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v);
+               *pmdp++ = __pmd(val);
+               *pmdp++ = __pmd(val);
+               *pmdp++ = __pmd(val);
+               *pmdp++ = __pmd(val);
+
+               v += LARGE_PAGE_SIZE_16M;
+               p += LARGE_PAGE_SIZE_16M;
+               s -= LARGE_PAGE_SIZE_16M;
+       }
+
+       while (s >= LARGE_PAGE_SIZE_4M) {
+               pmd_t *pmdp;
+               unsigned long val = p | _PMD_SIZE_4M | _PAGE_EXEC | _PAGE_HWWRITE;
+
+               pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v);
+               *pmdp = __pmd(val);
+
+               v += LARGE_PAGE_SIZE_4M;
+               p += LARGE_PAGE_SIZE_4M;
+               s -= LARGE_PAGE_SIZE_4M;
+       }
+
+       mapped = total_lowmem - s;
+
+       /* If the size of RAM is not an exact power of two, we may not
+        * have covered RAM in its entirety with 16 and 4 MiB
+        * pages. Consequently, restrict the top end of RAM currently
+        * allocable so that calls to the MEMBLOCK to allocate PTEs for "tail"
+        * coverage with normal-sized pages (or other reasons) do not
+        * attempt to allocate outside the allowed range.
+        */
+       memblock_set_current_limit(mapped);
+
+       return mapped;
+}
+
+void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+                               phys_addr_t first_memblock_size)
+{
+       /* We don't currently support the first MEMBLOCK not mapping 0
+        * physical on those processors
+        */
+       BUG_ON(first_memblock_base != 0);
+
+       /* 40x can only access 16MB at the moment (see head_40x.S) */
+       memblock_set_current_limit(min_t(u64, first_memblock_size, 0x00800000));
+}
diff --git a/arch/powerpc/mm/nohash/44x.c b/arch/powerpc/mm/nohash/44x.c
new file mode 100644 (file)
index 0000000..c07983e
--- /dev/null
@@ -0,0 +1,246 @@
+/*
+ * Modifications by Matt Porter (mporter@mvista.com) to support
+ * PPC44x Book E processors.
+ *
+ * This file contains the routines for initializing the MMU
+ * on the 4xx series of chips.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/memblock.h>
+
+#include <asm/mmu.h>
+#include <asm/page.h>
+#include <asm/cacheflush.h>
+#include <asm/code-patching.h>
+
+#include <mm/mmu_decl.h>
+
+/* Used by the 44x TLB replacement exception handler.
+ * Just needed it declared someplace.
+ */
+unsigned int tlb_44x_index; /* = 0 */
+unsigned int tlb_44x_hwater = PPC44x_TLB_SIZE - 1 - PPC44x_EARLY_TLBS;
+int icache_44x_need_flush;
+
+unsigned long tlb_47x_boltmap[1024/8];
+
+static void ppc44x_update_tlb_hwater(void)
+{
+       /* The TLB miss handlers hard codes the watermark in a cmpli
+        * instruction to improve performances rather than loading it
+        * from the global variable. Thus, we patch the instructions
+        * in the 2 TLB miss handlers when updating the value
+        */
+       modify_instruction_site(&patch__tlb_44x_hwater_D, 0xffff, tlb_44x_hwater);
+       modify_instruction_site(&patch__tlb_44x_hwater_I, 0xffff, tlb_44x_hwater);
+}
+
+/*
+ * "Pins" a 256MB TLB entry in AS0 for kernel lowmem for 44x type MMU
+ */
+static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys)
+{
+       unsigned int entry = tlb_44x_hwater--;
+
+       ppc44x_update_tlb_hwater();
+
+       mtspr(SPRN_MMUCR, 0);
+
+       __asm__ __volatile__(
+               "tlbwe  %2,%3,%4\n"
+               "tlbwe  %1,%3,%5\n"
+               "tlbwe  %0,%3,%6\n"
+       :
+       : "r" (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G),
+         "r" (phys),
+         "r" (virt | PPC44x_TLB_VALID | PPC44x_TLB_256M),
+         "r" (entry),
+         "i" (PPC44x_TLB_PAGEID),
+         "i" (PPC44x_TLB_XLAT),
+         "i" (PPC44x_TLB_ATTRIB));
+}
+
+static int __init ppc47x_find_free_bolted(void)
+{
+       unsigned int mmube0 = mfspr(SPRN_MMUBE0);
+       unsigned int mmube1 = mfspr(SPRN_MMUBE1);
+
+       if (!(mmube0 & MMUBE0_VBE0))
+               return 0;
+       if (!(mmube0 & MMUBE0_VBE1))
+               return 1;
+       if (!(mmube0 & MMUBE0_VBE2))
+               return 2;
+       if (!(mmube1 & MMUBE1_VBE3))
+               return 3;
+       if (!(mmube1 & MMUBE1_VBE4))
+               return 4;
+       if (!(mmube1 & MMUBE1_VBE5))
+               return 5;
+       return -1;
+}
+
+static void __init ppc47x_update_boltmap(void)
+{
+       unsigned int mmube0 = mfspr(SPRN_MMUBE0);
+       unsigned int mmube1 = mfspr(SPRN_MMUBE1);
+
+       if (mmube0 & MMUBE0_VBE0)
+               __set_bit((mmube0 >> MMUBE0_IBE0_SHIFT) & 0xff,
+                         tlb_47x_boltmap);
+       if (mmube0 & MMUBE0_VBE1)
+               __set_bit((mmube0 >> MMUBE0_IBE1_SHIFT) & 0xff,
+                         tlb_47x_boltmap);
+       if (mmube0 & MMUBE0_VBE2)
+               __set_bit((mmube0 >> MMUBE0_IBE2_SHIFT) & 0xff,
+                         tlb_47x_boltmap);
+       if (mmube1 & MMUBE1_VBE3)
+               __set_bit((mmube1 >> MMUBE1_IBE3_SHIFT) & 0xff,
+                         tlb_47x_boltmap);
+       if (mmube1 & MMUBE1_VBE4)
+               __set_bit((mmube1 >> MMUBE1_IBE4_SHIFT) & 0xff,
+                         tlb_47x_boltmap);
+       if (mmube1 & MMUBE1_VBE5)
+               __set_bit((mmube1 >> MMUBE1_IBE5_SHIFT) & 0xff,
+                         tlb_47x_boltmap);
+}
+
+/*
+ * "Pins" a 256MB TLB entry in AS0 for kernel lowmem for 47x type MMU
+ */
+static void ppc47x_pin_tlb(unsigned int virt, unsigned int phys)
+{
+       unsigned int rA;
+       int bolted;
+
+       /* Base rA is HW way select, way 0, bolted bit set */
+       rA = 0x88000000;
+
+       /* Look for a bolted entry slot */
+       bolted = ppc47x_find_free_bolted();
+       BUG_ON(bolted < 0);
+
+       /* Insert bolted slot number */
+       rA |= bolted << 24;
+
+       pr_debug("256M TLB entry for 0x%08x->0x%08x in bolt slot %d\n",
+                virt, phys, bolted);
+
+       mtspr(SPRN_MMUCR, 0);
+
+       __asm__ __volatile__(
+               "tlbwe  %2,%3,0\n"
+               "tlbwe  %1,%3,1\n"
+               "tlbwe  %0,%3,2\n"
+               :
+               : "r" (PPC47x_TLB2_SW | PPC47x_TLB2_SR |
+                      PPC47x_TLB2_SX
+#ifdef CONFIG_SMP
+                      | PPC47x_TLB2_M
+#endif
+                      ),
+                 "r" (phys),
+                 "r" (virt | PPC47x_TLB0_VALID | PPC47x_TLB0_256M),
+                 "r" (rA));
+}
+
+void __init MMU_init_hw(void)
+{
+       /* This is not useful on 47x but won't hurt either */
+       ppc44x_update_tlb_hwater();
+
+       flush_instruction_cache();
+}
+
+unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
+{
+       unsigned long addr;
+       unsigned long memstart = memstart_addr & ~(PPC_PIN_SIZE - 1);
+
+       /* Pin in enough TLBs to cover any lowmem not covered by the
+        * initial 256M mapping established in head_44x.S */
+       for (addr = memstart + PPC_PIN_SIZE; addr < lowmem_end_addr;
+            addr += PPC_PIN_SIZE) {
+               if (mmu_has_feature(MMU_FTR_TYPE_47x))
+                       ppc47x_pin_tlb(addr + PAGE_OFFSET, addr);
+               else
+                       ppc44x_pin_tlb(addr + PAGE_OFFSET, addr);
+       }
+       if (mmu_has_feature(MMU_FTR_TYPE_47x)) {
+               ppc47x_update_boltmap();
+
+#ifdef DEBUG
+               {
+                       int i;
+
+                       printk(KERN_DEBUG "bolted entries: ");
+                       for (i = 0; i < 255; i++) {
+                               if (test_bit(i, tlb_47x_boltmap))
+                                       printk("%d ", i);
+                       }
+                       printk("\n");
+               }
+#endif /* DEBUG */
+       }
+       return total_lowmem;
+}
+
+void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+                               phys_addr_t first_memblock_size)
+{
+       u64 size;
+
+#ifndef CONFIG_NONSTATIC_KERNEL
+       /* We don't currently support the first MEMBLOCK not mapping 0
+        * physical on those processors
+        */
+       BUG_ON(first_memblock_base != 0);
+#endif
+
+       /* 44x has a 256M TLB entry pinned at boot */
+       size = (min_t(u64, first_memblock_size, PPC_PIN_SIZE));
+       memblock_set_current_limit(first_memblock_base + size);
+}
+
+#ifdef CONFIG_SMP
+void __init mmu_init_secondary(int cpu)
+{
+       unsigned long addr;
+       unsigned long memstart = memstart_addr & ~(PPC_PIN_SIZE - 1);
+
+       /* Pin in enough TLBs to cover any lowmem not covered by the
+        * initial 256M mapping established in head_44x.S
+        *
+        * WARNING: This is called with only the first 256M of the
+        * linear mapping in the TLB and we can't take faults yet
+        * so beware of what this code uses. It runs off a temporary
+        * stack. current (r2) isn't initialized, smp_processor_id()
+        * will not work, current thread info isn't accessible, ...
+        */
+       for (addr = memstart + PPC_PIN_SIZE; addr < lowmem_end_addr;
+            addr += PPC_PIN_SIZE) {
+               if (mmu_has_feature(MMU_FTR_TYPE_47x))
+                       ppc47x_pin_tlb(addr + PAGE_OFFSET, addr);
+               else
+                       ppc44x_pin_tlb(addr + PAGE_OFFSET, addr);
+       }
+}
+#endif /* CONFIG_SMP */
diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
new file mode 100644 (file)
index 0000000..70d55b6
--- /dev/null
@@ -0,0 +1,239 @@
+/*
+ * This file contains the routines for initializing the MMU
+ * on the 8xx series of chips.
+ *  -- christophe
+ *
+ *  Derived from arch/powerpc/mm/40x_mmu.c:
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/memblock.h>
+#include <linux/mmu_context.h>
+#include <asm/fixmap.h>
+#include <asm/code-patching.h>
+
+#include <mm/mmu_decl.h>
+
+#define IMMR_SIZE (FIX_IMMR_SIZE << PAGE_SHIFT)
+
+extern int __map_without_ltlbs;
+
+static unsigned long block_mapped_ram;
+
+/*
+ * Return PA for this VA if it is in an area mapped with LTLBs.
+ * Otherwise, returns 0
+ */
+phys_addr_t v_block_mapped(unsigned long va)
+{
+       unsigned long p = PHYS_IMMR_BASE;
+
+       if (__map_without_ltlbs)
+               return 0;
+       if (va >= VIRT_IMMR_BASE && va < VIRT_IMMR_BASE + IMMR_SIZE)
+               return p + va - VIRT_IMMR_BASE;
+       if (va >= PAGE_OFFSET && va < PAGE_OFFSET + block_mapped_ram)
+               return __pa(va);
+       return 0;
+}
+
+/*
+ * Return VA for a given PA mapped with LTLBs or 0 if not mapped
+ */
+unsigned long p_block_mapped(phys_addr_t pa)
+{
+       unsigned long p = PHYS_IMMR_BASE;
+
+       if (__map_without_ltlbs)
+               return 0;
+       if (pa >= p && pa < p + IMMR_SIZE)
+               return VIRT_IMMR_BASE + pa - p;
+       if (pa < block_mapped_ram)
+               return (unsigned long)__va(pa);
+       return 0;
+}
+
+#define LARGE_PAGE_SIZE_8M     (1<<23)
+
+/*
+ * MMU_init_hw does the chip-specific initialization of the MMU hardware.
+ */
+void __init MMU_init_hw(void)
+{
+       /* PIN up to the 3 first 8Mb after IMMR in DTLB table */
+       if (IS_ENABLED(CONFIG_PIN_TLB_DATA)) {
+               unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000;
+               unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY;
+               int i = IS_ENABLED(CONFIG_PIN_TLB_IMMR) ? 29 : 28;
+               unsigned long addr = 0;
+               unsigned long mem = total_lowmem;
+
+               for (; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) {
+                       mtspr(SPRN_MD_CTR, ctr | (i << 8));
+                       mtspr(SPRN_MD_EPN, (unsigned long)__va(addr) | MD_EVALID);
+                       mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID);
+                       mtspr(SPRN_MD_RPN, addr | flags | _PAGE_PRESENT);
+                       addr += LARGE_PAGE_SIZE_8M;
+                       mem -= LARGE_PAGE_SIZE_8M;
+               }
+       }
+}
+
+static void __init mmu_mapin_immr(void)
+{
+       unsigned long p = PHYS_IMMR_BASE;
+       unsigned long v = VIRT_IMMR_BASE;
+       int offset;
+
+       for (offset = 0; offset < IMMR_SIZE; offset += PAGE_SIZE)
+               map_kernel_page(v + offset, p + offset, PAGE_KERNEL_NCG);
+}
+
+static void mmu_patch_cmp_limit(s32 *site, unsigned long mapped)
+{
+       modify_instruction_site(site, 0xffff, (unsigned long)__va(mapped) >> 16);
+}
+
+static void mmu_patch_addis(s32 *site, long simm)
+{
+       unsigned int instr = *(unsigned int *)patch_site_addr(site);
+
+       instr &= 0xffff0000;
+       instr |= ((unsigned long)simm) >> 16;
+       patch_instruction_site(site, instr);
+}
+
+unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
+{
+       unsigned long mapped;
+
+       if (__map_without_ltlbs) {
+               mapped = 0;
+               mmu_mapin_immr();
+               if (!IS_ENABLED(CONFIG_PIN_TLB_IMMR))
+                       patch_instruction_site(&patch__dtlbmiss_immr_jmp, PPC_INST_NOP);
+               if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT))
+                       mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, 0);
+       } else {
+               mapped = top & ~(LARGE_PAGE_SIZE_8M - 1);
+               if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT))
+                       mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top,
+                                           _ALIGN(__pa(_einittext), 8 << 20));
+       }
+
+       mmu_patch_cmp_limit(&patch__dtlbmiss_linmem_top, mapped);
+       mmu_patch_cmp_limit(&patch__fixupdar_linmem_top, mapped);
+
+       /* If the size of RAM is not an exact power of two, we may not
+        * have covered RAM in its entirety with 8 MiB
+        * pages. Consequently, restrict the top end of RAM currently
+        * allocable so that calls to the MEMBLOCK to allocate PTEs for "tail"
+        * coverage with normal-sized pages (or other reasons) do not
+        * attempt to allocate outside the allowed range.
+        */
+       if (mapped)
+               memblock_set_current_limit(mapped);
+
+       block_mapped_ram = mapped;
+
+       return mapped;
+}
+
+void mmu_mark_initmem_nx(void)
+{
+       if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) && CONFIG_ETEXT_SHIFT < 23)
+               mmu_patch_addis(&patch__itlbmiss_linmem_top8,
+                               -((long)_etext & ~(LARGE_PAGE_SIZE_8M - 1)));
+       if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT))
+               mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, __pa(_etext));
+}
+
+#ifdef CONFIG_STRICT_KERNEL_RWX
+void mmu_mark_rodata_ro(void)
+{
+       if (CONFIG_DATA_SHIFT < 23)
+               mmu_patch_addis(&patch__dtlbmiss_romem_top8,
+                               -__pa(((unsigned long)_sinittext) &
+                                     ~(LARGE_PAGE_SIZE_8M - 1)));
+       mmu_patch_addis(&patch__dtlbmiss_romem_top, -__pa(_sinittext));
+}
+#endif
+
+void __init setup_initial_memory_limit(phys_addr_t first_memblock_base,
+                                      phys_addr_t first_memblock_size)
+{
+       /* We don't currently support the first MEMBLOCK not mapping 0
+        * physical on those processors
+        */
+       BUG_ON(first_memblock_base != 0);
+
+       /* 8xx can only access 32MB at the moment */
+       memblock_set_current_limit(min_t(u64, first_memblock_size, 0x02000000));
+}
+
+/*
+ * Set up to use a given MMU context.
+ * id is context number, pgd is PGD pointer.
+ *
+ * We place the physical address of the new task page directory loaded
+ * into the MMU base register, and set the ASID compare register with
+ * the new "context."
+ */
+void set_context(unsigned long id, pgd_t *pgd)
+{
+       s16 offset = (s16)(__pa(swapper_pg_dir));
+
+       /* Context switch the PTE pointer for the Abatron BDI2000.
+        * The PGDIR is passed as second argument.
+        */
+       if (IS_ENABLED(CONFIG_BDI_SWITCH))
+               abatron_pteptrs[1] = pgd;
+
+       /* Register M_TWB will contain base address of level 1 table minus the
+        * lower part of the kernel PGDIR base address, so that all accesses to
+        * level 1 table are done relative to lower part of kernel PGDIR base
+        * address.
+        */
+       mtspr(SPRN_M_TWB, __pa(pgd) - offset);
+
+       /* Update context */
+       mtspr(SPRN_M_CASID, id - 1);
+       /* sync */
+       mb();
+}
+
+void flush_instruction_cache(void)
+{
+       isync();
+       mtspr(SPRN_IC_CST, IDC_INVALL);
+       isync();
+}
+
+#ifdef CONFIG_PPC_KUEP
+void __init setup_kuep(bool disabled)
+{
+       if (disabled)
+               return;
+
+       pr_info("Activating Kernel Userspace Execution Prevention\n");
+
+       mtspr(SPRN_MI_AP, MI_APG_KUEP);
+}
+#endif
+
+#ifdef CONFIG_PPC_KUAP
+void __init setup_kuap(bool disabled)
+{
+       pr_info("Activating Kernel Userspace Access Protection\n");
+
+       if (disabled)
+               pr_warn("KUAP cannot be disabled yet on 8xx when compiled in\n");
+
+       mtspr(SPRN_MD_AP, MD_APG_KUAP);
+}
+#endif
diff --git a/arch/powerpc/mm/nohash/Makefile b/arch/powerpc/mm/nohash/Makefile
new file mode 100644 (file)
index 0000000..b2228ff
--- /dev/null
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+
+ccflags-$(CONFIG_PPC64)        := $(NO_MINIMAL_TOC)
+
+obj-y                          += mmu_context.o tlb.o tlb_low.o
+obj-$(CONFIG_PPC_BOOK3E_64)    += tlb_low_64e.o book3e_pgtable.o
+obj-$(CONFIG_40x)              += 40x.o
+obj-$(CONFIG_44x)              += 44x.o
+obj-$(CONFIG_PPC_8xx)          += 8xx.o
+obj-$(CONFIG_PPC_FSL_BOOK3E)   += fsl_booke.o
+ifdef CONFIG_HUGETLB_PAGE
+obj-$(CONFIG_PPC_BOOK3E_MMU)   += book3e_hugetlbpage.o
+endif
+
+# Disable kcov instrumentation on sensitive code
+# This is necessary for booting with kcov enabled on book3e machines
+KCOV_INSTRUMENT_tlb.o := n
+KCOV_INSTRUMENT_fsl_booke.o := n
diff --git a/arch/powerpc/mm/nohash/book3e_hugetlbpage.c b/arch/powerpc/mm/nohash/book3e_hugetlbpage.c
new file mode 100644 (file)
index 0000000..f84ec46
--- /dev/null
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PPC Huge TLB Page Support for Book3E MMU
+ *
+ * Copyright (C) 2009 David Gibson, IBM Corporation.
+ * Copyright (C) 2011 Becky Bruce, Freescale Semiconductor
+ *
+ */
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+
+#include <asm/mmu.h>
+
+#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC64
+static inline int tlb1_next(void)
+{
+       struct paca_struct *paca = get_paca();
+       struct tlb_core_data *tcd;
+       int this, next;
+
+       tcd = paca->tcd_ptr;
+       this = tcd->esel_next;
+
+       next = this + 1;
+       if (next >= tcd->esel_max)
+               next = tcd->esel_first;
+
+       tcd->esel_next = next;
+       return this;
+}
+#else
+static inline int tlb1_next(void)
+{
+       int index, ncams;
+
+       ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
+
+       index = this_cpu_read(next_tlbcam_idx);
+
+       /* Just round-robin the entries and wrap when we hit the end */
+       if (unlikely(index == ncams - 1))
+               __this_cpu_write(next_tlbcam_idx, tlbcam_index);
+       else
+               __this_cpu_inc(next_tlbcam_idx);
+
+       return index;
+}
+#endif /* !PPC64 */
+#endif /* FSL */
+
+static inline int mmu_get_tsize(int psize)
+{
+       return mmu_psize_defs[psize].enc;
+}
+
+#if defined(CONFIG_PPC_FSL_BOOK3E) && defined(CONFIG_PPC64)
+#include <asm/paca.h>
+
+static inline void book3e_tlb_lock(void)
+{
+       struct paca_struct *paca = get_paca();
+       unsigned long tmp;
+       int token = smp_processor_id() + 1;
+
+       /*
+        * Besides being unnecessary in the absence of SMT, this
+        * check prevents trying to do lbarx/stbcx. on e5500 which
+        * doesn't implement either feature.
+        */
+       if (!cpu_has_feature(CPU_FTR_SMT))
+               return;
+
+       asm volatile("1: lbarx %0, 0, %1;"
+                    "cmpwi %0, 0;"
+                    "bne 2f;"
+                    "stbcx. %2, 0, %1;"
+                    "bne 1b;"
+                    "b 3f;"
+                    "2: lbzx %0, 0, %1;"
+                    "cmpwi %0, 0;"
+                    "bne 2b;"
+                    "b 1b;"
+                    "3:"
+                    : "=&r" (tmp)
+                    : "r" (&paca->tcd_ptr->lock), "r" (token)
+                    : "memory");
+}
+
+static inline void book3e_tlb_unlock(void)
+{
+       struct paca_struct *paca = get_paca();
+
+       if (!cpu_has_feature(CPU_FTR_SMT))
+               return;
+
+       isync();
+       paca->tcd_ptr->lock = 0;
+}
+#else
+static inline void book3e_tlb_lock(void)
+{
+}
+
+static inline void book3e_tlb_unlock(void)
+{
+}
+#endif
+
+static inline int book3e_tlb_exists(unsigned long ea, unsigned long pid)
+{
+       int found = 0;
+
+       mtspr(SPRN_MAS6, pid << 16);
+       if (mmu_has_feature(MMU_FTR_USE_TLBRSRV)) {
+               asm volatile(
+                       "li     %0,0\n"
+                       "tlbsx. 0,%1\n"
+                       "bne    1f\n"
+                       "li     %0,1\n"
+                       "1:\n"
+                       : "=&r"(found) : "r"(ea));
+       } else {
+               asm volatile(
+                       "tlbsx  0,%1\n"
+                       "mfspr  %0,0x271\n"
+                       "srwi   %0,%0,31\n"
+                       : "=&r"(found) : "r"(ea));
+       }
+
+       return found;
+}
+
+void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
+                           pte_t pte)
+{
+       unsigned long mas1, mas2;
+       u64 mas7_3;
+       unsigned long psize, tsize, shift;
+       unsigned long flags;
+       struct mm_struct *mm;
+
+#ifdef CONFIG_PPC_FSL_BOOK3E
+       int index;
+#endif
+
+       if (unlikely(is_kernel_addr(ea)))
+               return;
+
+       mm = vma->vm_mm;
+
+       psize = vma_mmu_pagesize(vma);
+       shift = __ilog2(psize);
+       tsize = shift - 10;
+       /*
+        * We can't be interrupted while we're setting up the MAS
+        * regusters or after we've confirmed that no tlb exists.
+        */
+       local_irq_save(flags);
+
+       book3e_tlb_lock();
+
+       if (unlikely(book3e_tlb_exists(ea, mm->context.id))) {
+               book3e_tlb_unlock();
+               local_irq_restore(flags);
+               return;
+       }
+
+#ifdef CONFIG_PPC_FSL_BOOK3E
+       /* We have to use the CAM(TLB1) on FSL parts for hugepages */
+       index = tlb1_next();
+       mtspr(SPRN_MAS0, MAS0_ESEL(index) | MAS0_TLBSEL(1));
+#endif
+
+       mas1 = MAS1_VALID | MAS1_TID(mm->context.id) | MAS1_TSIZE(tsize);
+       mas2 = ea & ~((1UL << shift) - 1);
+       mas2 |= (pte_val(pte) >> PTE_WIMGE_SHIFT) & MAS2_WIMGE_MASK;
+       mas7_3 = (u64)pte_pfn(pte) << PAGE_SHIFT;
+       mas7_3 |= (pte_val(pte) >> PTE_BAP_SHIFT) & MAS3_BAP_MASK;
+       if (!pte_dirty(pte))
+               mas7_3 &= ~(MAS3_SW|MAS3_UW);
+
+       mtspr(SPRN_MAS1, mas1);
+       mtspr(SPRN_MAS2, mas2);
+
+       if (mmu_has_feature(MMU_FTR_USE_PAIRED_MAS)) {
+               mtspr(SPRN_MAS7_MAS3, mas7_3);
+       } else {
+               if (mmu_has_feature(MMU_FTR_BIG_PHYS))
+                       mtspr(SPRN_MAS7, upper_32_bits(mas7_3));
+               mtspr(SPRN_MAS3, lower_32_bits(mas7_3));
+       }
+
+       asm volatile ("tlbwe");
+
+       book3e_tlb_unlock();
+       local_irq_restore(flags);
+}
+
+void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+       struct hstate *hstate = hstate_file(vma->vm_file);
+       unsigned long tsize = huge_page_shift(hstate) - 10;
+
+       __flush_tlb_page(vma->vm_mm, vmaddr, tsize, 0);
+}
diff --git a/arch/powerpc/mm/nohash/book3e_pgtable.c b/arch/powerpc/mm/nohash/book3e_pgtable.c
new file mode 100644 (file)
index 0000000..f296c2e
--- /dev/null
@@ -0,0 +1,120 @@
+/*
+ * Copyright 2005, Paul Mackerras, IBM Corporation.
+ * Copyright 2009, Benjamin Herrenschmidt, IBM Corporation.
+ * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/sched.h>
+#include <linux/memblock.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/dma.h>
+
+#include <mm/mmu_decl.h>
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+/*
+ * On Book3E CPUs, the vmemmap is currently mapped in the top half of
+ * the vmalloc space using normal page tables, though the size of
+ * pages encoded in the PTEs can be different
+ */
+int __meminit vmemmap_create_mapping(unsigned long start,
+                                    unsigned long page_size,
+                                    unsigned long phys)
+{
+       /* Create a PTE encoding without page size */
+       unsigned long i, flags = _PAGE_PRESENT | _PAGE_ACCESSED |
+               _PAGE_KERNEL_RW;
+
+       /* PTEs only contain page size encodings up to 32M */
+       BUG_ON(mmu_psize_defs[mmu_vmemmap_psize].enc > 0xf);
+
+       /* Encode the size in the PTE */
+       flags |= mmu_psize_defs[mmu_vmemmap_psize].enc << 8;
+
+       /* For each PTE for that area, map things. Note that we don't
+        * increment phys because all PTEs are of the large size and
+        * thus must have the low bits clear
+        */
+       for (i = 0; i < page_size; i += PAGE_SIZE)
+               BUG_ON(map_kernel_page(start + i, phys, __pgprot(flags)));
+
+       return 0;
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+void vmemmap_remove_mapping(unsigned long start,
+                           unsigned long page_size)
+{
+}
+#endif
+#endif /* CONFIG_SPARSEMEM_VMEMMAP */
+
+static __ref void *early_alloc_pgtable(unsigned long size)
+{
+       void *ptr;
+
+       ptr = memblock_alloc_try_nid(size, size, MEMBLOCK_LOW_LIMIT,
+                                    __pa(MAX_DMA_ADDRESS), NUMA_NO_NODE);
+
+       if (!ptr)
+               panic("%s: Failed to allocate %lu bytes align=0x%lx max_addr=%lx\n",
+                     __func__, size, size, __pa(MAX_DMA_ADDRESS));
+
+       return ptr;
+}
+
+/*
+ * map_kernel_page currently only called by __ioremap
+ * map_kernel_page adds an entry to the ioremap page table
+ * and adds an entry to the HPT, possibly bolting it
+ */
+int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
+{
+       pgd_t *pgdp;
+       pud_t *pudp;
+       pmd_t *pmdp;
+       pte_t *ptep;
+
+       BUILD_BUG_ON(TASK_SIZE_USER64 > PGTABLE_RANGE);
+       if (slab_is_available()) {
+               pgdp = pgd_offset_k(ea);
+               pudp = pud_alloc(&init_mm, pgdp, ea);
+               if (!pudp)
+                       return -ENOMEM;
+               pmdp = pmd_alloc(&init_mm, pudp, ea);
+               if (!pmdp)
+                       return -ENOMEM;
+               ptep = pte_alloc_kernel(pmdp, ea);
+               if (!ptep)
+                       return -ENOMEM;
+       } else {
+               pgdp = pgd_offset_k(ea);
+#ifndef __PAGETABLE_PUD_FOLDED
+               if (pgd_none(*pgdp)) {
+                       pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
+                       pgd_populate(&init_mm, pgdp, pudp);
+               }
+#endif /* !__PAGETABLE_PUD_FOLDED */
+               pudp = pud_offset(pgdp, ea);
+               if (pud_none(*pudp)) {
+                       pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
+                       pud_populate(&init_mm, pudp, pmdp);
+               }
+               pmdp = pmd_offset(pudp, ea);
+               if (!pmd_present(*pmdp)) {
+                       ptep = early_alloc_pgtable(PAGE_SIZE);
+                       pmd_populate_kernel(&init_mm, pmdp, ptep);
+               }
+               ptep = pte_offset_kernel(pmdp, ea);
+       }
+       set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, prot));
+
+       smp_wmb();
+       return 0;
+}
diff --git a/arch/powerpc/mm/nohash/fsl_booke.c b/arch/powerpc/mm/nohash/fsl_booke.c
new file mode 100644 (file)
index 0000000..71a1a36
--- /dev/null
@@ -0,0 +1,326 @@
+/*
+ * Modifications by Kumar Gala (galak@kernel.crashing.org) to support
+ * E500 Book E processors.
+ *
+ * Copyright 2004,2010 Freescale Semiconductor, Inc.
+ *
+ * This file contains the routines for initializing the MMU
+ * on the 4xx series of chips.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/highmem.h>
+#include <linux/memblock.h>
+
+#include <asm/pgalloc.h>
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <linux/uaccess.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/setup.h>
+#include <asm/paca.h>
+
+#include <mm/mmu_decl.h>
+
+unsigned int tlbcam_index;
+
+#define NUM_TLBCAMS    (64)
+struct tlbcam TLBCAM[NUM_TLBCAMS];
+
+struct tlbcamrange {
+       unsigned long start;
+       unsigned long limit;
+       phys_addr_t phys;
+} tlbcam_addrs[NUM_TLBCAMS];
+
+unsigned long tlbcam_sz(int idx)
+{
+       return tlbcam_addrs[idx].limit - tlbcam_addrs[idx].start + 1;
+}
+
+#ifdef CONFIG_FSL_BOOKE
+/*
+ * Return PA for this VA if it is mapped by a CAM, or 0
+ */
+phys_addr_t v_block_mapped(unsigned long va)
+{
+       int b;
+       for (b = 0; b < tlbcam_index; ++b)
+               if (va >= tlbcam_addrs[b].start && va < tlbcam_addrs[b].limit)
+                       return tlbcam_addrs[b].phys + (va - tlbcam_addrs[b].start);
+       return 0;
+}
+
+/*
+ * Return VA for a given PA or 0 if not mapped
+ */
+unsigned long p_block_mapped(phys_addr_t pa)
+{
+       int b;
+       for (b = 0; b < tlbcam_index; ++b)
+               if (pa >= tlbcam_addrs[b].phys
+                       && pa < (tlbcam_addrs[b].limit-tlbcam_addrs[b].start)
+                             +tlbcam_addrs[b].phys)
+                       return tlbcam_addrs[b].start+(pa-tlbcam_addrs[b].phys);
+       return 0;
+}
+#endif
+
+/*
+ * Set up a variable-size TLB entry (tlbcam). The parameters are not checked;
+ * in particular size must be a power of 4 between 4k and the max supported by
+ * an implementation; max may further be limited by what can be represented in
+ * an unsigned long (for example, 32-bit implementations cannot support a 4GB
+ * size).
+ */
+static void settlbcam(int index, unsigned long virt, phys_addr_t phys,
+               unsigned long size, unsigned long flags, unsigned int pid)
+{
+       unsigned int tsize;
+
+       tsize = __ilog2(size) - 10;
+
+#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
+       if ((flags & _PAGE_NO_CACHE) == 0)
+               flags |= _PAGE_COHERENT;
+#endif
+
+       TLBCAM[index].MAS0 = MAS0_TLBSEL(1) | MAS0_ESEL(index) | MAS0_NV(index+1);
+       TLBCAM[index].MAS1 = MAS1_VALID | MAS1_IPROT | MAS1_TSIZE(tsize) | MAS1_TID(pid);
+       TLBCAM[index].MAS2 = virt & PAGE_MASK;
+
+       TLBCAM[index].MAS2 |= (flags & _PAGE_WRITETHRU) ? MAS2_W : 0;
+       TLBCAM[index].MAS2 |= (flags & _PAGE_NO_CACHE) ? MAS2_I : 0;
+       TLBCAM[index].MAS2 |= (flags & _PAGE_COHERENT) ? MAS2_M : 0;
+       TLBCAM[index].MAS2 |= (flags & _PAGE_GUARDED) ? MAS2_G : 0;
+       TLBCAM[index].MAS2 |= (flags & _PAGE_ENDIAN) ? MAS2_E : 0;
+
+       TLBCAM[index].MAS3 = (phys & MAS3_RPN) | MAS3_SX | MAS3_SR;
+       TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_SW : 0);
+       if (mmu_has_feature(MMU_FTR_BIG_PHYS))
+               TLBCAM[index].MAS7 = (u64)phys >> 32;
+
+       /* Below is unlikely -- only for large user pages or similar */
+       if (pte_user(__pte(flags))) {
+          TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR;
+          TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0);
+       }
+
+       tlbcam_addrs[index].start = virt;
+       tlbcam_addrs[index].limit = virt + size - 1;
+       tlbcam_addrs[index].phys = phys;
+}
+
+unsigned long calc_cam_sz(unsigned long ram, unsigned long virt,
+                         phys_addr_t phys)
+{
+       unsigned int camsize = __ilog2(ram);
+       unsigned int align = __ffs(virt | phys);
+       unsigned long max_cam;
+
+       if ((mfspr(SPRN_MMUCFG) & MMUCFG_MAVN) == MMUCFG_MAVN_V1) {
+               /* Convert (4^max) kB to (2^max) bytes */
+               max_cam = ((mfspr(SPRN_TLB1CFG) >> 16) & 0xf) * 2 + 10;
+               camsize &= ~1U;
+               align &= ~1U;
+       } else {
+               /* Convert (2^max) kB to (2^max) bytes */
+               max_cam = __ilog2(mfspr(SPRN_TLB1PS)) + 10;
+       }
+
+       if (camsize > align)
+               camsize = align;
+       if (camsize > max_cam)
+               camsize = max_cam;
+
+       return 1UL << camsize;
+}
+
+static unsigned long map_mem_in_cams_addr(phys_addr_t phys, unsigned long virt,
+                                       unsigned long ram, int max_cam_idx,
+                                       bool dryrun)
+{
+       int i;
+       unsigned long amount_mapped = 0;
+
+       /* Calculate CAM values */
+       for (i = 0; ram && i < max_cam_idx; i++) {
+               unsigned long cam_sz;
+
+               cam_sz = calc_cam_sz(ram, virt, phys);
+               if (!dryrun)
+                       settlbcam(i, virt, phys, cam_sz,
+                                 pgprot_val(PAGE_KERNEL_X), 0);
+
+               ram -= cam_sz;
+               amount_mapped += cam_sz;
+               virt += cam_sz;
+               phys += cam_sz;
+       }
+
+       if (dryrun)
+               return amount_mapped;
+
+       loadcam_multi(0, i, max_cam_idx);
+       tlbcam_index = i;
+
+#ifdef CONFIG_PPC64
+       get_paca()->tcd.esel_next = i;
+       get_paca()->tcd.esel_max = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
+       get_paca()->tcd.esel_first = i;
+#endif
+
+       return amount_mapped;
+}
+
+unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx, bool dryrun)
+{
+       unsigned long virt = PAGE_OFFSET;
+       phys_addr_t phys = memstart_addr;
+
+       return map_mem_in_cams_addr(phys, virt, ram, max_cam_idx, dryrun);
+}
+
+#ifdef CONFIG_PPC32
+
+#if defined(CONFIG_LOWMEM_CAM_NUM_BOOL) && (CONFIG_LOWMEM_CAM_NUM >= NUM_TLBCAMS)
+#error "LOWMEM_CAM_NUM must be less than NUM_TLBCAMS"
+#endif
+
+unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
+{
+       return tlbcam_addrs[tlbcam_index - 1].limit - PAGE_OFFSET + 1;
+}
+
+/*
+ * MMU_init_hw does the chip-specific initialization of the MMU hardware.
+ */
+void __init MMU_init_hw(void)
+{
+       flush_instruction_cache();
+}
+
+void __init adjust_total_lowmem(void)
+{
+       unsigned long ram;
+       int i;
+
+       /* adjust lowmem size to __max_low_memory */
+       ram = min((phys_addr_t)__max_low_memory, (phys_addr_t)total_lowmem);
+
+       i = switch_to_as1();
+       __max_low_memory = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM, false);
+       restore_to_as0(i, 0, 0, 1);
+
+       pr_info("Memory CAM mapping: ");
+       for (i = 0; i < tlbcam_index - 1; i++)
+               pr_cont("%lu/", tlbcam_sz(i) >> 20);
+       pr_cont("%lu Mb, residual: %dMb\n", tlbcam_sz(tlbcam_index - 1) >> 20,
+               (unsigned int)((total_lowmem - __max_low_memory) >> 20));
+
+       memblock_set_current_limit(memstart_addr + __max_low_memory);
+}
+
+void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+                               phys_addr_t first_memblock_size)
+{
+       phys_addr_t limit = first_memblock_base + first_memblock_size;
+
+       /* 64M mapped initially according to head_fsl_booke.S */
+       memblock_set_current_limit(min_t(u64, limit, 0x04000000));
+}
+
+#ifdef CONFIG_RELOCATABLE
+int __initdata is_second_reloc;
+notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start)
+{
+       unsigned long base = KERNELBASE;
+
+       kernstart_addr = start;
+       if (is_second_reloc) {
+               virt_phys_offset = PAGE_OFFSET - memstart_addr;
+               return;
+       }
+
+       /*
+        * Relocatable kernel support based on processing of dynamic
+        * relocation entries. Before we get the real memstart_addr,
+        * We will compute the virt_phys_offset like this:
+        * virt_phys_offset = stext.run - kernstart_addr
+        *
+        * stext.run = (KERNELBASE & ~0x3ffffff) +
+        *                              (kernstart_addr & 0x3ffffff)
+        * When we relocate, we have :
+        *
+        *      (kernstart_addr & 0x3ffffff) = (stext.run & 0x3ffffff)
+        *
+        * hence:
+        *  virt_phys_offset = (KERNELBASE & ~0x3ffffff) -
+        *                              (kernstart_addr & ~0x3ffffff)
+        *
+        */
+       start &= ~0x3ffffff;
+       base &= ~0x3ffffff;
+       virt_phys_offset = base - start;
+       early_get_first_memblock_info(__va(dt_ptr), NULL);
+       /*
+        * We now get the memstart_addr, then we should check if this
+        * address is the same as what the PAGE_OFFSET map to now. If
+        * not we have to change the map of PAGE_OFFSET to memstart_addr
+        * and do a second relocation.
+        */
+       if (start != memstart_addr) {
+               int n;
+               long offset = start - memstart_addr;
+
+               is_second_reloc = 1;
+               n = switch_to_as1();
+               /* map a 64M area for the second relocation */
+               if (memstart_addr > start)
+                       map_mem_in_cams(0x4000000, CONFIG_LOWMEM_CAM_NUM,
+                                       false);
+               else
+                       map_mem_in_cams_addr(start, PAGE_OFFSET + offset,
+                                       0x4000000, CONFIG_LOWMEM_CAM_NUM,
+                                       false);
+               restore_to_as0(n, offset, __va(dt_ptr), 1);
+               /* We should never reach here */
+               panic("Relocation error");
+       }
+}
+#endif
+#endif
diff --git a/arch/powerpc/mm/nohash/mmu_context.c b/arch/powerpc/mm/nohash/mmu_context.c
new file mode 100644 (file)
index 0000000..ae4505d
--- /dev/null
@@ -0,0 +1,497 @@
+/*
+ * This file contains the routines for handling the MMU on those
+ * PowerPC implementations where the MMU is not using the hash
+ * table, such as 8xx, 4xx, BookE's etc...
+ *
+ * Copyright 2008 Ben Herrenschmidt <benh@kernel.crashing.org>
+ *                IBM Corp.
+ *
+ *  Derived from previous arch/powerpc/mm/mmu_context.c
+ *  and arch/powerpc/include/asm/mmu_context.h
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ * TODO:
+ *
+ *   - The global context lock will not scale very well
+ *   - The maps should be dynamically allocated to allow for processors
+ *     that support more PID bits at runtime
+ *   - Implement flush_tlb_mm() by making the context stale and picking
+ *     a new one
+ *   - More aggressively clear stale map bits and maybe find some way to
+ *     also clear mm->cpu_vm_mask bits when processes are migrated
+ */
+
+//#define DEBUG_MAP_CONSISTENCY
+//#define DEBUG_CLAMP_LAST_CONTEXT   31
+//#define DEBUG_HARDER
+
+/* We don't use DEBUG because it tends to be compiled in always nowadays
+ * and this would generate way too much output
+ */
+#ifdef DEBUG_HARDER
+#define pr_hard(args...)       printk(KERN_DEBUG args)
+#define pr_hardcont(args...)   printk(KERN_CONT args)
+#else
+#define pr_hard(args...)       do { } while(0)
+#define pr_hardcont(args...)   do { } while(0)
+#endif
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/memblock.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/slab.h>
+
+#include <asm/mmu_context.h>
+#include <asm/tlbflush.h>
+
+#include <mm/mmu_decl.h>
+
+/*
+ * The MPC8xx has only 16 contexts. We rotate through them on each task switch.
+ * A better way would be to keep track of tasks that own contexts, and implement
+ * an LRU usage. That way very active tasks don't always have to pay the TLB
+ * reload overhead. The kernel pages are mapped shared, so the kernel can run on
+ * behalf of any task that makes a kernel entry. Shared does not mean they are
+ * not protected, just that the ASID comparison is not performed. -- Dan
+ *
+ * The IBM4xx has 256 contexts, so we can just rotate through these as a way of
+ * "switching" contexts. If the TID of the TLB is zero, the PID/TID comparison
+ * is disabled, so we can use a TID of zero to represent all kernel pages as
+ * shared among all contexts. -- Dan
+ *
+ * The IBM 47x core supports 16-bit PIDs, thus 65535 contexts. We should
+ * normally never have to steal though the facility is present if needed.
+ * -- BenH
+ */
+#define FIRST_CONTEXT 1
+#ifdef DEBUG_CLAMP_LAST_CONTEXT
+#define LAST_CONTEXT DEBUG_CLAMP_LAST_CONTEXT
+#elif defined(CONFIG_PPC_8xx)
+#define LAST_CONTEXT 16
+#elif defined(CONFIG_PPC_47x)
+#define LAST_CONTEXT 65535
+#else
+#define LAST_CONTEXT 255
+#endif
+
+static unsigned int next_context, nr_free_contexts;
+static unsigned long *context_map;
+#ifdef CONFIG_SMP
+static unsigned long *stale_map[NR_CPUS];
+#endif
+static struct mm_struct **context_mm;
+static DEFINE_RAW_SPINLOCK(context_lock);
+
+#define CTX_MAP_SIZE   \
+       (sizeof(unsigned long) * (LAST_CONTEXT / BITS_PER_LONG + 1))
+
+
+/* Steal a context from a task that has one at the moment.
+ *
+ * This is used when we are running out of available PID numbers
+ * on the processors.
+ *
+ * This isn't an LRU system, it just frees up each context in
+ * turn (sort-of pseudo-random replacement :).  This would be the
+ * place to implement an LRU scheme if anyone was motivated to do it.
+ *  -- paulus
+ *
+ * For context stealing, we use a slightly different approach for
+ * SMP and UP. Basically, the UP one is simpler and doesn't use
+ * the stale map as we can just flush the local CPU
+ *  -- benh
+ */
+#ifdef CONFIG_SMP
+static unsigned int steal_context_smp(unsigned int id)
+{
+       struct mm_struct *mm;
+       unsigned int cpu, max, i;
+
+       max = LAST_CONTEXT - FIRST_CONTEXT;
+
+       /* Attempt to free next_context first and then loop until we manage */
+       while (max--) {
+               /* Pick up the victim mm */
+               mm = context_mm[id];
+
+               /* We have a candidate victim, check if it's active, on SMP
+                * we cannot steal active contexts
+                */
+               if (mm->context.active) {
+                       id++;
+                       if (id > LAST_CONTEXT)
+                               id = FIRST_CONTEXT;
+                       continue;
+               }
+               pr_hardcont(" | steal %d from 0x%p", id, mm);
+
+               /* Mark this mm has having no context anymore */
+               mm->context.id = MMU_NO_CONTEXT;
+
+               /* Mark it stale on all CPUs that used this mm. For threaded
+                * implementations, we set it on all threads on each core
+                * represented in the mask. A future implementation will use
+                * a core map instead but this will do for now.
+                */
+               for_each_cpu(cpu, mm_cpumask(mm)) {
+                       for (i = cpu_first_thread_sibling(cpu);
+                            i <= cpu_last_thread_sibling(cpu); i++) {
+                               if (stale_map[i])
+                                       __set_bit(id, stale_map[i]);
+                       }
+                       cpu = i - 1;
+               }
+               return id;
+       }
+
+       /* This will happen if you have more CPUs than available contexts,
+        * all we can do here is wait a bit and try again
+        */
+       raw_spin_unlock(&context_lock);
+       cpu_relax();
+       raw_spin_lock(&context_lock);
+
+       /* This will cause the caller to try again */
+       return MMU_NO_CONTEXT;
+}
+#endif  /* CONFIG_SMP */
+
+static unsigned int steal_all_contexts(void)
+{
+       struct mm_struct *mm;
+#ifdef CONFIG_SMP
+       int cpu = smp_processor_id();
+#endif
+       unsigned int id;
+
+       for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) {
+               /* Pick up the victim mm */
+               mm = context_mm[id];
+
+               pr_hardcont(" | steal %d from 0x%p", id, mm);
+
+               /* Mark this mm as having no context anymore */
+               mm->context.id = MMU_NO_CONTEXT;
+               if (id != FIRST_CONTEXT) {
+                       context_mm[id] = NULL;
+                       __clear_bit(id, context_map);
+#ifdef DEBUG_MAP_CONSISTENCY
+                       mm->context.active = 0;
+#endif
+               }
+#ifdef CONFIG_SMP
+               __clear_bit(id, stale_map[cpu]);
+#endif
+       }
+
+       /* Flush the TLB for all contexts (not to be used on SMP) */
+       _tlbil_all();
+
+       nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT;
+
+       return FIRST_CONTEXT;
+}
+
+/* Note that this will also be called on SMP if all other CPUs are
+ * offlined, which means that it may be called for cpu != 0. For
+ * this to work, we somewhat assume that CPUs that are onlined
+ * come up with a fully clean TLB (or are cleaned when offlined)
+ */
+static unsigned int steal_context_up(unsigned int id)
+{
+       struct mm_struct *mm;
+#ifdef CONFIG_SMP
+       int cpu = smp_processor_id();
+#endif
+
+       /* Pick up the victim mm */
+       mm = context_mm[id];
+
+       pr_hardcont(" | steal %d from 0x%p", id, mm);
+
+       /* Flush the TLB for that context */
+       local_flush_tlb_mm(mm);
+
+       /* Mark this mm has having no context anymore */
+       mm->context.id = MMU_NO_CONTEXT;
+
+       /* XXX This clear should ultimately be part of local_flush_tlb_mm */
+#ifdef CONFIG_SMP
+       __clear_bit(id, stale_map[cpu]);
+#endif
+
+       return id;
+}
+
+#ifdef DEBUG_MAP_CONSISTENCY
+static void context_check_map(void)
+{
+       unsigned int id, nrf, nact;
+
+       nrf = nact = 0;
+       for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) {
+               int used = test_bit(id, context_map);
+               if (!used)
+                       nrf++;
+               if (used != (context_mm[id] != NULL))
+                       pr_err("MMU: Context %d is %s and MM is %p !\n",
+                              id, used ? "used" : "free", context_mm[id]);
+               if (context_mm[id] != NULL)
+                       nact += context_mm[id]->context.active;
+       }
+       if (nrf != nr_free_contexts) {
+               pr_err("MMU: Free context count out of sync ! (%d vs %d)\n",
+                      nr_free_contexts, nrf);
+               nr_free_contexts = nrf;
+       }
+       if (nact > num_online_cpus())
+               pr_err("MMU: More active contexts than CPUs ! (%d vs %d)\n",
+                      nact, num_online_cpus());
+       if (FIRST_CONTEXT > 0 && !test_bit(0, context_map))
+               pr_err("MMU: Context 0 has been freed !!!\n");
+}
+#else
+static void context_check_map(void) { }
+#endif
+
+void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
+                       struct task_struct *tsk)
+{
+       unsigned int id;
+#ifdef CONFIG_SMP
+       unsigned int i, cpu = smp_processor_id();
+#endif
+       unsigned long *map;
+
+       /* No lockless fast path .. yet */
+       raw_spin_lock(&context_lock);
+
+       pr_hard("[%d] activating context for mm @%p, active=%d, id=%d",
+               cpu, next, next->context.active, next->context.id);
+
+#ifdef CONFIG_SMP
+       /* Mark us active and the previous one not anymore */
+       next->context.active++;
+       if (prev) {
+               pr_hardcont(" (old=0x%p a=%d)", prev, prev->context.active);
+               WARN_ON(prev->context.active < 1);
+               prev->context.active--;
+       }
+
+ again:
+#endif /* CONFIG_SMP */
+
+       /* If we already have a valid assigned context, skip all that */
+       id = next->context.id;
+       if (likely(id != MMU_NO_CONTEXT)) {
+#ifdef DEBUG_MAP_CONSISTENCY
+               if (context_mm[id] != next)
+                       pr_err("MMU: mm 0x%p has id %d but context_mm[%d] says 0x%p\n",
+                              next, id, id, context_mm[id]);
+#endif
+               goto ctxt_ok;
+       }
+
+       /* We really don't have a context, let's try to acquire one */
+       id = next_context;
+       if (id > LAST_CONTEXT)
+               id = FIRST_CONTEXT;
+       map = context_map;
+
+       /* No more free contexts, let's try to steal one */
+       if (nr_free_contexts == 0) {
+#ifdef CONFIG_SMP
+               if (num_online_cpus() > 1) {
+                       id = steal_context_smp(id);
+                       if (id == MMU_NO_CONTEXT)
+                               goto again;
+                       goto stolen;
+               }
+#endif /* CONFIG_SMP */
+               if (IS_ENABLED(CONFIG_PPC_8xx))
+                       id = steal_all_contexts();
+               else
+                       id = steal_context_up(id);
+               goto stolen;
+       }
+       nr_free_contexts--;
+
+       /* We know there's at least one free context, try to find it */
+       while (__test_and_set_bit(id, map)) {
+               id = find_next_zero_bit(map, LAST_CONTEXT+1, id);
+               if (id > LAST_CONTEXT)
+                       id = FIRST_CONTEXT;
+       }
+ stolen:
+       next_context = id + 1;
+       context_mm[id] = next;
+       next->context.id = id;
+       pr_hardcont(" | new id=%d,nrf=%d", id, nr_free_contexts);
+
+       context_check_map();
+ ctxt_ok:
+
+       /* If that context got marked stale on this CPU, then flush the
+        * local TLB for it and unmark it before we use it
+        */
+#ifdef CONFIG_SMP
+       if (test_bit(id, stale_map[cpu])) {
+               pr_hardcont(" | stale flush %d [%d..%d]",
+                           id, cpu_first_thread_sibling(cpu),
+                           cpu_last_thread_sibling(cpu));
+
+               local_flush_tlb_mm(next);
+
+               /* XXX This clear should ultimately be part of local_flush_tlb_mm */
+               for (i = cpu_first_thread_sibling(cpu);
+                    i <= cpu_last_thread_sibling(cpu); i++) {
+                       if (stale_map[i])
+                               __clear_bit(id, stale_map[i]);
+               }
+       }
+#endif
+
+       /* Flick the MMU and release lock */
+       pr_hardcont(" -> %d\n", id);
+       set_context(id, next->pgd);
+       raw_spin_unlock(&context_lock);
+}
+
+/*
+ * Set up the context for a new address space.
+ */
+int init_new_context(struct task_struct *t, struct mm_struct *mm)
+{
+       pr_hard("initing context for mm @%p\n", mm);
+
+       /*
+        * We have MMU_NO_CONTEXT set to be ~0. Hence check
+        * explicitly against context.id == 0. This ensures that we properly
+        * initialize context slice details for newly allocated mm's (which will
+        * have id == 0) and don't alter context slice inherited via fork (which
+        * will have id != 0).
+        */
+       if (mm->context.id == 0)
+               slice_init_new_context_exec(mm);
+       mm->context.id = MMU_NO_CONTEXT;
+       mm->context.active = 0;
+       pte_frag_set(&mm->context, NULL);
+       return 0;
+}
+
+/*
+ * We're finished using the context for an address space.
+ */
+void destroy_context(struct mm_struct *mm)
+{
+       unsigned long flags;
+       unsigned int id;
+
+       if (mm->context.id == MMU_NO_CONTEXT)
+               return;
+
+       WARN_ON(mm->context.active != 0);
+
+       raw_spin_lock_irqsave(&context_lock, flags);
+       id = mm->context.id;
+       if (id != MMU_NO_CONTEXT) {
+               __clear_bit(id, context_map);
+               mm->context.id = MMU_NO_CONTEXT;
+#ifdef DEBUG_MAP_CONSISTENCY
+               mm->context.active = 0;
+#endif
+               context_mm[id] = NULL;
+               nr_free_contexts++;
+       }
+       raw_spin_unlock_irqrestore(&context_lock, flags);
+}
+
+#ifdef CONFIG_SMP
+static int mmu_ctx_cpu_prepare(unsigned int cpu)
+{
+       /* We don't touch CPU 0 map, it's allocated at aboot and kept
+        * around forever
+        */
+       if (cpu == boot_cpuid)
+               return 0;
+
+       pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu);
+       stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL);
+       return 0;
+}
+
+static int mmu_ctx_cpu_dead(unsigned int cpu)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+       if (cpu == boot_cpuid)
+               return 0;
+
+       pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu);
+       kfree(stale_map[cpu]);
+       stale_map[cpu] = NULL;
+
+       /* We also clear the cpu_vm_mask bits of CPUs going away */
+       clear_tasks_mm_cpumask(cpu);
+#endif
+       return 0;
+}
+
+#endif /* CONFIG_SMP */
+
+/*
+ * Initialize the context management stuff.
+ */
+void __init mmu_context_init(void)
+{
+       /* Mark init_mm as being active on all possible CPUs since
+        * we'll get called with prev == init_mm the first time
+        * we schedule on a given CPU
+        */
+       init_mm.context.active = NR_CPUS;
+
+       /*
+        * Allocate the maps used by context management
+        */
+       context_map = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES);
+       if (!context_map)
+               panic("%s: Failed to allocate %zu bytes\n", __func__,
+                     CTX_MAP_SIZE);
+       context_mm = memblock_alloc(sizeof(void *) * (LAST_CONTEXT + 1),
+                                   SMP_CACHE_BYTES);
+       if (!context_mm)
+               panic("%s: Failed to allocate %zu bytes\n", __func__,
+                     sizeof(void *) * (LAST_CONTEXT + 1));
+#ifdef CONFIG_SMP
+       stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES);
+       if (!stale_map[boot_cpuid])
+               panic("%s: Failed to allocate %zu bytes\n", __func__,
+                     CTX_MAP_SIZE);
+
+       cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE,
+                                 "powerpc/mmu/ctx:prepare",
+                                 mmu_ctx_cpu_prepare, mmu_ctx_cpu_dead);
+#endif
+
+       printk(KERN_INFO
+              "MMU: Allocated %zu bytes of context maps for %d contexts\n",
+              2 * CTX_MAP_SIZE + (sizeof(void *) * (LAST_CONTEXT + 1)),
+              LAST_CONTEXT - FIRST_CONTEXT + 1);
+
+       /*
+        * Some processors have too few contexts to reserve one for
+        * init_mm, and require using context 0 for a normal task.
+        * Other processors reserve the use of context zero for the kernel.
+        * This code assumes FIRST_CONTEXT < 32.
+        */
+       context_map[0] = (1 << FIRST_CONTEXT) - 1;
+       next_context = FIRST_CONTEXT;
+       nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT + 1;
+}
diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c
new file mode 100644 (file)
index 0000000..704e613
--- /dev/null
@@ -0,0 +1,810 @@
+/*
+ * This file contains the routines for TLB flushing.
+ * On machines where the MMU does not use a hash table to store virtual to
+ * physical translations (ie, SW loaded TLBs or Book3E compilant processors,
+ * this does -not- include 603 however which shares the implementation with
+ * hash based processors)
+ *
+ *  -- BenH
+ *
+ * Copyright 2008,2009 Ben Herrenschmidt <benh@kernel.crashing.org>
+ *                     IBM Corp.
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/preempt.h>
+#include <linux/spinlock.h>
+#include <linux/memblock.h>
+#include <linux/of_fdt.h>
+#include <linux/hugetlb.h>
+
+#include <asm/tlbflush.h>
+#include <asm/tlb.h>
+#include <asm/code-patching.h>
+#include <asm/cputhreads.h>
+#include <asm/hugetlb.h>
+#include <asm/paca.h>
+
+#include <mm/mmu_decl.h>
+
+/*
+ * This struct lists the sw-supported page sizes.  The hardawre MMU may support
+ * other sizes not listed here.   The .ind field is only used on MMUs that have
+ * indirect page table entries.
+ */
+#if defined(CONFIG_PPC_BOOK3E_MMU) || defined(CONFIG_PPC_8xx)
+#ifdef CONFIG_PPC_FSL_BOOK3E
+struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
+       [MMU_PAGE_4K] = {
+               .shift  = 12,
+               .enc    = BOOK3E_PAGESZ_4K,
+       },
+       [MMU_PAGE_2M] = {
+               .shift  = 21,
+               .enc    = BOOK3E_PAGESZ_2M,
+       },
+       [MMU_PAGE_4M] = {
+               .shift  = 22,
+               .enc    = BOOK3E_PAGESZ_4M,
+       },
+       [MMU_PAGE_16M] = {
+               .shift  = 24,
+               .enc    = BOOK3E_PAGESZ_16M,
+       },
+       [MMU_PAGE_64M] = {
+               .shift  = 26,
+               .enc    = BOOK3E_PAGESZ_64M,
+       },
+       [MMU_PAGE_256M] = {
+               .shift  = 28,
+               .enc    = BOOK3E_PAGESZ_256M,
+       },
+       [MMU_PAGE_1G] = {
+               .shift  = 30,
+               .enc    = BOOK3E_PAGESZ_1GB,
+       },
+};
+#elif defined(CONFIG_PPC_8xx)
+struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
+       /* we only manage 4k and 16k pages as normal pages */
+#ifdef CONFIG_PPC_4K_PAGES
+       [MMU_PAGE_4K] = {
+               .shift  = 12,
+       },
+#else
+       [MMU_PAGE_16K] = {
+               .shift  = 14,
+       },
+#endif
+       [MMU_PAGE_512K] = {
+               .shift  = 19,
+       },
+       [MMU_PAGE_8M] = {
+               .shift  = 23,
+       },
+};
+#else
+struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
+       [MMU_PAGE_4K] = {
+               .shift  = 12,
+               .ind    = 20,
+               .enc    = BOOK3E_PAGESZ_4K,
+       },
+       [MMU_PAGE_16K] = {
+               .shift  = 14,
+               .enc    = BOOK3E_PAGESZ_16K,
+       },
+       [MMU_PAGE_64K] = {
+               .shift  = 16,
+               .ind    = 28,
+               .enc    = BOOK3E_PAGESZ_64K,
+       },
+       [MMU_PAGE_1M] = {
+               .shift  = 20,
+               .enc    = BOOK3E_PAGESZ_1M,
+       },
+       [MMU_PAGE_16M] = {
+               .shift  = 24,
+               .ind    = 36,
+               .enc    = BOOK3E_PAGESZ_16M,
+       },
+       [MMU_PAGE_256M] = {
+               .shift  = 28,
+               .enc    = BOOK3E_PAGESZ_256M,
+       },
+       [MMU_PAGE_1G] = {
+               .shift  = 30,
+               .enc    = BOOK3E_PAGESZ_1GB,
+       },
+};
+#endif /* CONFIG_FSL_BOOKE */
+
+static inline int mmu_get_tsize(int psize)
+{
+       return mmu_psize_defs[psize].enc;
+}
+#else
+static inline int mmu_get_tsize(int psize)
+{
+       /* This isn't used on !Book3E for now */
+       return 0;
+}
+#endif /* CONFIG_PPC_BOOK3E_MMU */
+
+/* The variables below are currently only used on 64-bit Book3E
+ * though this will probably be made common with other nohash
+ * implementations at some point
+ */
+#ifdef CONFIG_PPC64
+
+int mmu_linear_psize;          /* Page size used for the linear mapping */
+int mmu_pte_psize;             /* Page size used for PTE pages */
+int mmu_vmemmap_psize;         /* Page size used for the virtual mem map */
+int book3e_htw_mode;           /* HW tablewalk?  Value is PPC_HTW_* */
+unsigned long linear_map_top;  /* Top of linear mapping */
+
+
+/*
+ * Number of bytes to add to SPRN_SPRG_TLB_EXFRAME on crit/mcheck/debug
+ * exceptions.  This is used for bolted and e6500 TLB miss handlers which
+ * do not modify this SPRG in the TLB miss code; for other TLB miss handlers,
+ * this is set to zero.
+ */
+int extlb_level_exc;
+
+#endif /* CONFIG_PPC64 */
+
+#ifdef CONFIG_PPC_FSL_BOOK3E
+/* next_tlbcam_idx is used to round-robin tlbcam entry assignment */
+DEFINE_PER_CPU(int, next_tlbcam_idx);
+EXPORT_PER_CPU_SYMBOL(next_tlbcam_idx);
+#endif
+
+/*
+ * Base TLB flushing operations:
+ *
+ *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ *  - flush_tlb_page(vma, vmaddr) flushes one page
+ *  - flush_tlb_range(vma, start, end) flushes a range of pages
+ *  - flush_tlb_kernel_range(start, end) flushes kernel pages
+ *
+ *  - local_* variants of page and mm only apply to the current
+ *    processor
+ */
+
+/*
+ * These are the base non-SMP variants of page and mm flushing
+ */
+void local_flush_tlb_mm(struct mm_struct *mm)
+{
+       unsigned int pid;
+
+       preempt_disable();
+       pid = mm->context.id;
+       if (pid != MMU_NO_CONTEXT)
+               _tlbil_pid(pid);
+       preempt_enable();
+}
+EXPORT_SYMBOL(local_flush_tlb_mm);
+
+void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
+                           int tsize, int ind)
+{
+       unsigned int pid;
+
+       preempt_disable();
+       pid = mm ? mm->context.id : 0;
+       if (pid != MMU_NO_CONTEXT)
+               _tlbil_va(vmaddr, pid, tsize, ind);
+       preempt_enable();
+}
+
+void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+       __local_flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr,
+                              mmu_get_tsize(mmu_virtual_psize), 0);
+}
+EXPORT_SYMBOL(local_flush_tlb_page);
+
+/*
+ * And here are the SMP non-local implementations
+ */
+#ifdef CONFIG_SMP
+
+static DEFINE_RAW_SPINLOCK(tlbivax_lock);
+
+struct tlb_flush_param {
+       unsigned long addr;
+       unsigned int pid;
+       unsigned int tsize;
+       unsigned int ind;
+};
+
+static void do_flush_tlb_mm_ipi(void *param)
+{
+       struct tlb_flush_param *p = param;
+
+       _tlbil_pid(p ? p->pid : 0);
+}
+
+static void do_flush_tlb_page_ipi(void *param)
+{
+       struct tlb_flush_param *p = param;
+
+       _tlbil_va(p->addr, p->pid, p->tsize, p->ind);
+}
+
+
+/* Note on invalidations and PID:
+ *
+ * We snapshot the PID with preempt disabled. At this point, it can still
+ * change either because:
+ * - our context is being stolen (PID -> NO_CONTEXT) on another CPU
+ * - we are invaliating some target that isn't currently running here
+ *   and is concurrently acquiring a new PID on another CPU
+ * - some other CPU is re-acquiring a lost PID for this mm
+ * etc...
+ *
+ * However, this shouldn't be a problem as we only guarantee
+ * invalidation of TLB entries present prior to this call, so we
+ * don't care about the PID changing, and invalidating a stale PID
+ * is generally harmless.
+ */
+
+void flush_tlb_mm(struct mm_struct *mm)
+{
+       unsigned int pid;
+
+       preempt_disable();
+       pid = mm->context.id;
+       if (unlikely(pid == MMU_NO_CONTEXT))
+               goto no_context;
+       if (!mm_is_core_local(mm)) {
+               struct tlb_flush_param p = { .pid = pid };
+               /* Ignores smp_processor_id() even if set. */
+               smp_call_function_many(mm_cpumask(mm),
+                                      do_flush_tlb_mm_ipi, &p, 1);
+       }
+       _tlbil_pid(pid);
+ no_context:
+       preempt_enable();
+}
+EXPORT_SYMBOL(flush_tlb_mm);
+
+void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
+                     int tsize, int ind)
+{
+       struct cpumask *cpu_mask;
+       unsigned int pid;
+
+       /*
+        * This function as well as __local_flush_tlb_page() must only be called
+        * for user contexts.
+        */
+       if (WARN_ON(!mm))
+               return;
+
+       preempt_disable();
+       pid = mm->context.id;
+       if (unlikely(pid == MMU_NO_CONTEXT))
+               goto bail;
+       cpu_mask = mm_cpumask(mm);
+       if (!mm_is_core_local(mm)) {
+               /* If broadcast tlbivax is supported, use it */
+               if (mmu_has_feature(MMU_FTR_USE_TLBIVAX_BCAST)) {
+                       int lock = mmu_has_feature(MMU_FTR_LOCK_BCAST_INVAL);
+                       if (lock)
+                               raw_spin_lock(&tlbivax_lock);
+                       _tlbivax_bcast(vmaddr, pid, tsize, ind);
+                       if (lock)
+                               raw_spin_unlock(&tlbivax_lock);
+                       goto bail;
+               } else {
+                       struct tlb_flush_param p = {
+                               .pid = pid,
+                               .addr = vmaddr,
+                               .tsize = tsize,
+                               .ind = ind,
+                       };
+                       /* Ignores smp_processor_id() even if set in cpu_mask */
+                       smp_call_function_many(cpu_mask,
+                                              do_flush_tlb_page_ipi, &p, 1);
+               }
+       }
+       _tlbil_va(vmaddr, pid, tsize, ind);
+ bail:
+       preempt_enable();
+}
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+#ifdef CONFIG_HUGETLB_PAGE
+       if (vma && is_vm_hugetlb_page(vma))
+               flush_hugetlb_page(vma, vmaddr);
+#endif
+
+       __flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr,
+                        mmu_get_tsize(mmu_virtual_psize), 0);
+}
+EXPORT_SYMBOL(flush_tlb_page);
+
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_PPC_47x
+void __init early_init_mmu_47x(void)
+{
+#ifdef CONFIG_SMP
+       unsigned long root = of_get_flat_dt_root();
+       if (of_get_flat_dt_prop(root, "cooperative-partition", NULL))
+               mmu_clear_feature(MMU_FTR_USE_TLBIVAX_BCAST);
+#endif /* CONFIG_SMP */
+}
+#endif /* CONFIG_PPC_47x */
+
+/*
+ * Flush kernel TLB entries in the given range
+ */
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+#ifdef CONFIG_SMP
+       preempt_disable();
+       smp_call_function(do_flush_tlb_mm_ipi, NULL, 1);
+       _tlbil_pid(0);
+       preempt_enable();
+#else
+       _tlbil_pid(0);
+#endif
+}
+EXPORT_SYMBOL(flush_tlb_kernel_range);
+
+/*
+ * Currently, for range flushing, we just do a full mm flush. This should
+ * be optimized based on a threshold on the size of the range, since
+ * some implementation can stack multiple tlbivax before a tlbsync but
+ * for now, we keep it that way
+ */
+void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+                    unsigned long end)
+
+{
+       if (end - start == PAGE_SIZE && !(start & ~PAGE_MASK))
+               flush_tlb_page(vma, start);
+       else
+               flush_tlb_mm(vma->vm_mm);
+}
+EXPORT_SYMBOL(flush_tlb_range);
+
+void tlb_flush(struct mmu_gather *tlb)
+{
+       flush_tlb_mm(tlb->mm);
+}
+
+/*
+ * Below are functions specific to the 64-bit variant of Book3E though that
+ * may change in the future
+ */
+
+#ifdef CONFIG_PPC64
+
+/*
+ * Handling of virtual linear page tables or indirect TLB entries
+ * flushing when PTE pages are freed
+ */
+void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address)
+{
+       int tsize = mmu_psize_defs[mmu_pte_psize].enc;
+
+       if (book3e_htw_mode != PPC_HTW_NONE) {
+               unsigned long start = address & PMD_MASK;
+               unsigned long end = address + PMD_SIZE;
+               unsigned long size = 1UL << mmu_psize_defs[mmu_pte_psize].shift;
+
+               /* This isn't the most optimal, ideally we would factor out the
+                * while preempt & CPU mask mucking around, or even the IPI but
+                * it will do for now
+                */
+               while (start < end) {
+                       __flush_tlb_page(tlb->mm, start, tsize, 1);
+                       start += size;
+               }
+       } else {
+               unsigned long rmask = 0xf000000000000000ul;
+               unsigned long rid = (address & rmask) | 0x1000000000000000ul;
+               unsigned long vpte = address & ~rmask;
+
+#ifdef CONFIG_PPC_64K_PAGES
+               vpte = (vpte >> (PAGE_SHIFT - 4)) & ~0xfffful;
+#else
+               vpte = (vpte >> (PAGE_SHIFT - 3)) & ~0xffful;
+#endif
+               vpte |= rid;
+               __flush_tlb_page(tlb->mm, vpte, tsize, 0);
+       }
+}
+
+static void setup_page_sizes(void)
+{
+       unsigned int tlb0cfg;
+       unsigned int tlb0ps;
+       unsigned int eptcfg;
+       int i, psize;
+
+#ifdef CONFIG_PPC_FSL_BOOK3E
+       unsigned int mmucfg = mfspr(SPRN_MMUCFG);
+       int fsl_mmu = mmu_has_feature(MMU_FTR_TYPE_FSL_E);
+
+       if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V1) {
+               unsigned int tlb1cfg = mfspr(SPRN_TLB1CFG);
+               unsigned int min_pg, max_pg;
+
+               min_pg = (tlb1cfg & TLBnCFG_MINSIZE) >> TLBnCFG_MINSIZE_SHIFT;
+               max_pg = (tlb1cfg & TLBnCFG_MAXSIZE) >> TLBnCFG_MAXSIZE_SHIFT;
+
+               for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
+                       struct mmu_psize_def *def;
+                       unsigned int shift;
+
+                       def = &mmu_psize_defs[psize];
+                       shift = def->shift;
+
+                       if (shift == 0 || shift & 1)
+                               continue;
+
+                       /* adjust to be in terms of 4^shift Kb */
+                       shift = (shift - 10) >> 1;
+
+                       if ((shift >= min_pg) && (shift <= max_pg))
+                               def->flags |= MMU_PAGE_SIZE_DIRECT;
+               }
+
+               goto out;
+       }
+
+       if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2) {
+               u32 tlb1cfg, tlb1ps;
+
+               tlb0cfg = mfspr(SPRN_TLB0CFG);
+               tlb1cfg = mfspr(SPRN_TLB1CFG);
+               tlb1ps = mfspr(SPRN_TLB1PS);
+               eptcfg = mfspr(SPRN_EPTCFG);
+
+               if ((tlb1cfg & TLBnCFG_IND) && (tlb0cfg & TLBnCFG_PT))
+                       book3e_htw_mode = PPC_HTW_E6500;
+
+               /*
+                * We expect 4K subpage size and unrestricted indirect size.
+                * The lack of a restriction on indirect size is a Freescale
+                * extension, indicated by PSn = 0 but SPSn != 0.
+                */
+               if (eptcfg != 2)
+                       book3e_htw_mode = PPC_HTW_NONE;
+
+               for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
+                       struct mmu_psize_def *def = &mmu_psize_defs[psize];
+
+                       if (!def->shift)
+                               continue;
+
+                       if (tlb1ps & (1U << (def->shift - 10))) {
+                               def->flags |= MMU_PAGE_SIZE_DIRECT;
+
+                               if (book3e_htw_mode && psize == MMU_PAGE_2M)
+                                       def->flags |= MMU_PAGE_SIZE_INDIRECT;
+                       }
+               }
+
+               goto out;
+       }
+#endif
+
+       tlb0cfg = mfspr(SPRN_TLB0CFG);
+       tlb0ps = mfspr(SPRN_TLB0PS);
+       eptcfg = mfspr(SPRN_EPTCFG);
+
+       /* Look for supported direct sizes */
+       for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
+               struct mmu_psize_def *def = &mmu_psize_defs[psize];
+
+               if (tlb0ps & (1U << (def->shift - 10)))
+                       def->flags |= MMU_PAGE_SIZE_DIRECT;
+       }
+
+       /* Indirect page sizes supported ? */
+       if ((tlb0cfg & TLBnCFG_IND) == 0 ||
+           (tlb0cfg & TLBnCFG_PT) == 0)
+               goto out;
+
+       book3e_htw_mode = PPC_HTW_IBM;
+
+       /* Now, we only deal with one IND page size for each
+        * direct size. Hopefully all implementations today are
+        * unambiguous, but we might want to be careful in the
+        * future.
+        */
+       for (i = 0; i < 3; i++) {
+               unsigned int ps, sps;
+
+               sps = eptcfg & 0x1f;
+               eptcfg >>= 5;
+               ps = eptcfg & 0x1f;
+               eptcfg >>= 5;
+               if (!ps || !sps)
+                       continue;
+               for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
+                       struct mmu_psize_def *def = &mmu_psize_defs[psize];
+
+                       if (ps == (def->shift - 10))
+                               def->flags |= MMU_PAGE_SIZE_INDIRECT;
+                       if (sps == (def->shift - 10))
+                               def->ind = ps + 10;
+               }
+       }
+
+out:
+       /* Cleanup array and print summary */
+       pr_info("MMU: Supported page sizes\n");
+       for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
+               struct mmu_psize_def *def = &mmu_psize_defs[psize];
+               const char *__page_type_names[] = {
+                       "unsupported",
+                       "direct",
+                       "indirect",
+                       "direct & indirect"
+               };
+               if (def->flags == 0) {
+                       def->shift = 0; 
+                       continue;
+               }
+               pr_info("  %8ld KB as %s\n", 1ul << (def->shift - 10),
+                       __page_type_names[def->flags & 0x3]);
+       }
+}
+
+static void setup_mmu_htw(void)
+{
+       /*
+        * If we want to use HW tablewalk, enable it by patching the TLB miss
+        * handlers to branch to the one dedicated to it.
+        */
+
+       switch (book3e_htw_mode) {
+       case PPC_HTW_IBM:
+               patch_exception(0x1c0, exc_data_tlb_miss_htw_book3e);
+               patch_exception(0x1e0, exc_instruction_tlb_miss_htw_book3e);
+               break;
+#ifdef CONFIG_PPC_FSL_BOOK3E
+       case PPC_HTW_E6500:
+               extlb_level_exc = EX_TLB_SIZE;
+               patch_exception(0x1c0, exc_data_tlb_miss_e6500_book3e);
+               patch_exception(0x1e0, exc_instruction_tlb_miss_e6500_book3e);
+               break;
+#endif
+       }
+       pr_info("MMU: Book3E HW tablewalk %s\n",
+               book3e_htw_mode != PPC_HTW_NONE ? "enabled" : "not supported");
+}
+
+/*
+ * Early initialization of the MMU TLB code
+ */
+static void early_init_this_mmu(void)
+{
+       unsigned int mas4;
+
+       /* Set MAS4 based on page table setting */
+
+       mas4 = 0x4 << MAS4_WIMGED_SHIFT;
+       switch (book3e_htw_mode) {
+       case PPC_HTW_E6500:
+               mas4 |= MAS4_INDD;
+               mas4 |= BOOK3E_PAGESZ_2M << MAS4_TSIZED_SHIFT;
+               mas4 |= MAS4_TLBSELD(1);
+               mmu_pte_psize = MMU_PAGE_2M;
+               break;
+
+       case PPC_HTW_IBM:
+               mas4 |= MAS4_INDD;
+#ifdef CONFIG_PPC_64K_PAGES
+               mas4 |= BOOK3E_PAGESZ_256M << MAS4_TSIZED_SHIFT;
+               mmu_pte_psize = MMU_PAGE_256M;
+#else
+               mas4 |= BOOK3E_PAGESZ_1M << MAS4_TSIZED_SHIFT;
+               mmu_pte_psize = MMU_PAGE_1M;
+#endif
+               break;
+
+       case PPC_HTW_NONE:
+#ifdef CONFIG_PPC_64K_PAGES
+               mas4 |= BOOK3E_PAGESZ_64K << MAS4_TSIZED_SHIFT;
+#else
+               mas4 |= BOOK3E_PAGESZ_4K << MAS4_TSIZED_SHIFT;
+#endif
+               mmu_pte_psize = mmu_virtual_psize;
+               break;
+       }
+       mtspr(SPRN_MAS4, mas4);
+
+#ifdef CONFIG_PPC_FSL_BOOK3E
+       if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
+               unsigned int num_cams;
+               int __maybe_unused cpu = smp_processor_id();
+               bool map = true;
+
+               /* use a quarter of the TLBCAM for bolted linear map */
+               num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4;
+
+               /*
+                * Only do the mapping once per core, or else the
+                * transient mapping would cause problems.
+                */
+#ifdef CONFIG_SMP
+               if (hweight32(get_tensr()) > 1)
+                       map = false;
+#endif
+
+               if (map)
+                       linear_map_top = map_mem_in_cams(linear_map_top,
+                                                        num_cams, false);
+       }
+#endif
+
+       /* A sync won't hurt us after mucking around with
+        * the MMU configuration
+        */
+       mb();
+}
+
+static void __init early_init_mmu_global(void)
+{
+       /* XXX This will have to be decided at runtime, but right
+        * now our boot and TLB miss code hard wires it. Ideally
+        * we should find out a suitable page size and patch the
+        * TLB miss code (either that or use the PACA to store
+        * the value we want)
+        */
+       mmu_linear_psize = MMU_PAGE_1G;
+
+       /* XXX This should be decided at runtime based on supported
+        * page sizes in the TLB, but for now let's assume 16M is
+        * always there and a good fit (which it probably is)
+        *
+        * Freescale booke only supports 4K pages in TLB0, so use that.
+        */
+       if (mmu_has_feature(MMU_FTR_TYPE_FSL_E))
+               mmu_vmemmap_psize = MMU_PAGE_4K;
+       else
+               mmu_vmemmap_psize = MMU_PAGE_16M;
+
+       /* XXX This code only checks for TLB 0 capabilities and doesn't
+        *     check what page size combos are supported by the HW. It
+        *     also doesn't handle the case where a separate array holds
+        *     the IND entries from the array loaded by the PT.
+        */
+       /* Look for supported page sizes */
+       setup_page_sizes();
+
+       /* Look for HW tablewalk support */
+       setup_mmu_htw();
+
+#ifdef CONFIG_PPC_FSL_BOOK3E
+       if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
+               if (book3e_htw_mode == PPC_HTW_NONE) {
+                       extlb_level_exc = EX_TLB_SIZE;
+                       patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e);
+                       patch_exception(0x1e0,
+                               exc_instruction_tlb_miss_bolted_book3e);
+               }
+       }
+#endif
+
+       /* Set the global containing the top of the linear mapping
+        * for use by the TLB miss code
+        */
+       linear_map_top = memblock_end_of_DRAM();
+}
+
+static void __init early_mmu_set_memory_limit(void)
+{
+#ifdef CONFIG_PPC_FSL_BOOK3E
+       if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
+               /*
+                * Limit memory so we dont have linear faults.
+                * Unlike memblock_set_current_limit, which limits
+                * memory available during early boot, this permanently
+                * reduces the memory available to Linux.  We need to
+                * do this because highmem is not supported on 64-bit.
+                */
+               memblock_enforce_memory_limit(linear_map_top);
+       }
+#endif
+
+       memblock_set_current_limit(linear_map_top);
+}
+
+/* boot cpu only */
+void __init early_init_mmu(void)
+{
+       early_init_mmu_global();
+       early_init_this_mmu();
+       early_mmu_set_memory_limit();
+}
+
+void early_init_mmu_secondary(void)
+{
+       early_init_this_mmu();
+}
+
+void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+                               phys_addr_t first_memblock_size)
+{
+       /* On non-FSL Embedded 64-bit, we adjust the RMA size to match
+        * the bolted TLB entry. We know for now that only 1G
+        * entries are supported though that may eventually
+        * change.
+        *
+        * on FSL Embedded 64-bit, usually all RAM is bolted, but with
+        * unusual memory sizes it's possible for some RAM to not be mapped
+        * (such RAM is not used at all by Linux, since we don't support
+        * highmem on 64-bit).  We limit ppc64_rma_size to what would be
+        * mappable if this memblock is the only one.  Additional memblocks
+        * can only increase, not decrease, the amount that ends up getting
+        * mapped.  We still limit max to 1G even if we'll eventually map
+        * more.  This is due to what the early init code is set up to do.
+        *
+        * We crop it to the size of the first MEMBLOCK to
+        * avoid going over total available memory just in case...
+        */
+#ifdef CONFIG_PPC_FSL_BOOK3E
+       if (early_mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
+               unsigned long linear_sz;
+               unsigned int num_cams;
+
+               /* use a quarter of the TLBCAM for bolted linear map */
+               num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4;
+
+               linear_sz = map_mem_in_cams(first_memblock_size, num_cams,
+                                           true);
+
+               ppc64_rma_size = min_t(u64, linear_sz, 0x40000000);
+       } else
+#endif
+               ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000);
+
+       /* Finally limit subsequent allocations */
+       memblock_set_current_limit(first_memblock_base + ppc64_rma_size);
+}
+#else /* ! CONFIG_PPC64 */
+void __init early_init_mmu(void)
+{
+#ifdef CONFIG_PPC_47x
+       early_init_mmu_47x();
+#endif
+
+#ifdef CONFIG_PPC_MM_SLICES
+#if defined(CONFIG_PPC_8xx)
+       init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW;
+#endif
+#endif
+}
+#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/mm/nohash/tlb_low.S b/arch/powerpc/mm/nohash/tlb_low.S
new file mode 100644 (file)
index 0000000..e066a65
--- /dev/null
@@ -0,0 +1,491 @@
+/*
+ * This file contains low-level functions for performing various
+ * types of TLB invalidations on various processors with no hash
+ * table.
+ *
+ * This file implements the following functions for all no-hash
+ * processors. Some aren't implemented for some variants. Some
+ * are inline in tlbflush.h
+ *
+ *     - tlbil_va
+ *     - tlbil_pid
+ *     - tlbil_all
+ *     - tlbivax_bcast
+ *
+ * Code mostly moved over from misc_32.S
+ *
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Partially rewritten by Cort Dougan (cort@cs.nmt.edu)
+ * Paul Mackerras, Kumar Gala and Benjamin Herrenschmidt.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/mmu.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/processor.h>
+#include <asm/bug.h>
+#include <asm/asm-compat.h>
+#include <asm/feature-fixups.h>
+
+#if defined(CONFIG_40x)
+
+/*
+ * 40x implementation needs only tlbil_va
+ */
+_GLOBAL(__tlbil_va)
+       /* We run the search with interrupts disabled because we have to change
+        * the PID and I don't want to preempt when that happens.
+        */
+       mfmsr   r5
+       mfspr   r6,SPRN_PID
+       wrteei  0
+       mtspr   SPRN_PID,r4
+       tlbsx.  r3, 0, r3
+       mtspr   SPRN_PID,r6
+       wrtee   r5
+       bne     1f
+       sync
+       /* There are only 64 TLB entries, so r3 < 64, which means bit 25 is
+        * clear. Since 25 is the V bit in the TLB_TAG, loading this value
+        * will invalidate the TLB entry. */
+       tlbwe   r3, r3, TLB_TAG
+       isync
+1:     blr
+
+#elif defined(CONFIG_PPC_8xx)
+
+/*
+ * Nothing to do for 8xx, everything is inline
+ */
+
+#elif defined(CONFIG_44x) /* Includes 47x */
+
+/*
+ * 440 implementation uses tlbsx/we for tlbil_va and a full sweep
+ * of the TLB for everything else.
+ */
+_GLOBAL(__tlbil_va)
+       mfspr   r5,SPRN_MMUCR
+       mfmsr   r10
+
+       /*
+        * We write 16 bits of STID since 47x supports that much, we
+        * will never be passed out of bounds values on 440 (hopefully)
+        */
+       rlwimi  r5,r4,0,16,31
+
+       /* We have to run the search with interrupts disabled, otherwise
+        * an interrupt which causes a TLB miss can clobber the MMUCR
+        * between the mtspr and the tlbsx.
+        *
+        * Critical and Machine Check interrupts take care of saving
+        * and restoring MMUCR, so only normal interrupts have to be
+        * taken care of.
+        */
+       wrteei  0
+       mtspr   SPRN_MMUCR,r5
+       tlbsx.  r6,0,r3
+       bne     10f
+       sync
+BEGIN_MMU_FTR_SECTION
+       b       2f
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x)
+       /* On 440 There are only 64 TLB entries, so r3 < 64, which means bit
+        * 22, is clear.  Since 22 is the V bit in the TLB_PAGEID, loading this
+        * value will invalidate the TLB entry.
+        */
+       tlbwe   r6,r6,PPC44x_TLB_PAGEID
+       isync
+10:    wrtee   r10
+       blr
+2:
+#ifdef CONFIG_PPC_47x
+       oris    r7,r6,0x8000    /* specify way explicitly */
+       clrrwi  r4,r3,12        /* get an EPN for the hashing with V = 0 */
+       ori     r4,r4,PPC47x_TLBE_SIZE
+       tlbwe   r4,r7,0         /* write it */
+       isync
+       wrtee   r10
+       blr
+#else /* CONFIG_PPC_47x */
+1:     trap
+       EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0;
+#endif /* !CONFIG_PPC_47x */
+
+_GLOBAL(_tlbil_all)
+_GLOBAL(_tlbil_pid)
+BEGIN_MMU_FTR_SECTION
+       b       2f
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x)
+       li      r3,0
+       sync
+
+       /* Load high watermark */
+       lis     r4,tlb_44x_hwater@ha
+       lwz     r5,tlb_44x_hwater@l(r4)
+
+1:     tlbwe   r3,r3,PPC44x_TLB_PAGEID
+       addi    r3,r3,1
+       cmpw    0,r3,r5
+       ble     1b
+
+       isync
+       blr
+2:
+#ifdef CONFIG_PPC_47x
+       /* 476 variant. There's not simple way to do this, hopefully we'll
+        * try to limit the amount of such full invalidates
+        */
+       mfmsr   r11             /* Interrupts off */
+       wrteei  0
+       li      r3,-1           /* Current set */
+       lis     r10,tlb_47x_boltmap@h
+       ori     r10,r10,tlb_47x_boltmap@l
+       lis     r7,0x8000       /* Specify way explicitly */
+
+       b       9f              /* For each set */
+
+1:     li      r9,4            /* Number of ways */
+       li      r4,0            /* Current way */
+       li      r6,0            /* Default entry value 0 */
+       andi.   r0,r8,1         /* Check if way 0 is bolted */
+       mtctr   r9              /* Load way counter */
+       bne-    3f              /* Bolted, skip loading it */
+
+2:     /* For each way */
+       or      r5,r3,r4        /* Make way|index for tlbre */
+       rlwimi  r5,r5,16,8,15   /* Copy index into position */
+       tlbre   r6,r5,0         /* Read entry */
+3:     addis   r4,r4,0x2000    /* Next way */
+       andi.   r0,r6,PPC47x_TLB0_VALID /* Valid entry ? */
+       beq     4f              /* Nope, skip it */
+       rlwimi  r7,r5,0,1,2     /* Insert way number */
+       rlwinm  r6,r6,0,21,19   /* Clear V */
+       tlbwe   r6,r7,0         /* Write it */
+4:     bdnz    2b              /* Loop for each way */
+       srwi    r8,r8,1         /* Next boltmap bit */
+9:     cmpwi   cr1,r3,255      /* Last set done ? */
+       addi    r3,r3,1         /* Next set */
+       beq     cr1,1f          /* End of loop */
+       andi.   r0,r3,0x1f      /* Need to load a new boltmap word ? */
+       bne     1b              /* No, loop */
+       lwz     r8,0(r10)       /* Load boltmap entry */
+       addi    r10,r10,4       /* Next word */
+       b       1b              /* Then loop */
+1:     isync                   /* Sync shadows */
+       wrtee   r11
+#else /* CONFIG_PPC_47x */
+1:     trap
+       EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0;
+#endif /* !CONFIG_PPC_47x */
+       blr
+
+#ifdef CONFIG_PPC_47x
+
+/*
+ * _tlbivax_bcast is only on 47x. We don't bother doing a runtime
+ * check though, it will blow up soon enough if we mistakenly try
+ * to use it on a 440.
+ */
+_GLOBAL(_tlbivax_bcast)
+       mfspr   r5,SPRN_MMUCR
+       mfmsr   r10
+       rlwimi  r5,r4,0,16,31
+       wrteei  0
+       mtspr   SPRN_MMUCR,r5
+       isync
+       PPC_TLBIVAX(0, R3)
+       isync
+       eieio
+       tlbsync
+BEGIN_FTR_SECTION
+       b       1f
+END_FTR_SECTION_IFSET(CPU_FTR_476_DD2)
+       sync
+       wrtee   r10
+       blr
+/*
+ * DD2 HW could hang if in instruction fetch happens before msync completes.
+ * Touch enough instruction cache lines to ensure cache hits
+ */
+1:     mflr    r9
+       bl      2f
+2:     mflr    r6
+       li      r7,32
+       PPC_ICBT(0,R6,R7)               /* touch next cache line */
+       add     r6,r6,r7
+       PPC_ICBT(0,R6,R7)               /* touch next cache line */
+       add     r6,r6,r7
+       PPC_ICBT(0,R6,R7)               /* touch next cache line */
+       sync
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       mtlr    r9
+       wrtee   r10
+       blr
+#endif /* CONFIG_PPC_47x */
+
+#elif defined(CONFIG_FSL_BOOKE)
+/*
+ * FSL BookE implementations.
+ *
+ * Since feature sections are using _SECTION_ELSE we need
+ * to have the larger code path before the _SECTION_ELSE
+ */
+
+/*
+ * Flush MMU TLB on the local processor
+ */
+_GLOBAL(_tlbil_all)
+BEGIN_MMU_FTR_SECTION
+       li      r3,(MMUCSR0_TLBFI)@l
+       mtspr   SPRN_MMUCSR0, r3
+1:
+       mfspr   r3,SPRN_MMUCSR0
+       andi.   r3,r3,MMUCSR0_TLBFI@l
+       bne     1b
+MMU_FTR_SECTION_ELSE
+       PPC_TLBILX_ALL(0,R0)
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_TLBILX)
+       msync
+       isync
+       blr
+
+_GLOBAL(_tlbil_pid)
+BEGIN_MMU_FTR_SECTION
+       slwi    r3,r3,16
+       mfmsr   r10
+       wrteei  0
+       mfspr   r4,SPRN_MAS6    /* save MAS6 */
+       mtspr   SPRN_MAS6,r3
+       PPC_TLBILX_PID(0,R0)
+       mtspr   SPRN_MAS6,r4    /* restore MAS6 */
+       wrtee   r10
+MMU_FTR_SECTION_ELSE
+       li      r3,(MMUCSR0_TLBFI)@l
+       mtspr   SPRN_MMUCSR0, r3
+1:
+       mfspr   r3,SPRN_MMUCSR0
+       andi.   r3,r3,MMUCSR0_TLBFI@l
+       bne     1b
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBILX)
+       msync
+       isync
+       blr
+
+/*
+ * Flush MMU TLB for a particular address, but only on the local processor
+ * (no broadcast)
+ */
+_GLOBAL(__tlbil_va)
+       mfmsr   r10
+       wrteei  0
+       slwi    r4,r4,16
+       ori     r4,r4,(MAS6_ISIZE(BOOK3E_PAGESZ_4K))@l
+       mtspr   SPRN_MAS6,r4            /* assume AS=0 for now */
+BEGIN_MMU_FTR_SECTION
+       tlbsx   0,r3
+       mfspr   r4,SPRN_MAS1            /* check valid */
+       andis.  r3,r4,MAS1_VALID@h
+       beq     1f
+       rlwinm  r4,r4,0,1,31
+       mtspr   SPRN_MAS1,r4
+       tlbwe
+MMU_FTR_SECTION_ELSE
+       PPC_TLBILX_VA(0,R3)
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_TLBILX)
+       msync
+       isync
+1:     wrtee   r10
+       blr
+#elif defined(CONFIG_PPC_BOOK3E)
+/*
+ * New Book3E (>= 2.06) implementation
+ *
+ * Note: We may be able to get away without the interrupt masking stuff
+ * if we save/restore MAS6 on exceptions that might modify it
+ */
+_GLOBAL(_tlbil_pid)
+       slwi    r4,r3,MAS6_SPID_SHIFT
+       mfmsr   r10
+       wrteei  0
+       mtspr   SPRN_MAS6,r4
+       PPC_TLBILX_PID(0,R0)
+       wrtee   r10
+       msync
+       isync
+       blr
+
+_GLOBAL(_tlbil_pid_noind)
+       slwi    r4,r3,MAS6_SPID_SHIFT
+       mfmsr   r10
+       ori     r4,r4,MAS6_SIND
+       wrteei  0
+       mtspr   SPRN_MAS6,r4
+       PPC_TLBILX_PID(0,R0)
+       wrtee   r10
+       msync
+       isync
+       blr
+
+_GLOBAL(_tlbil_all)
+       PPC_TLBILX_ALL(0,R0)
+       msync
+       isync
+       blr
+
+_GLOBAL(_tlbil_va)
+       mfmsr   r10
+       wrteei  0
+       cmpwi   cr0,r6,0
+       slwi    r4,r4,MAS6_SPID_SHIFT
+       rlwimi  r4,r5,MAS6_ISIZE_SHIFT,MAS6_ISIZE_MASK
+       beq     1f
+       rlwimi  r4,r6,MAS6_SIND_SHIFT,MAS6_SIND
+1:     mtspr   SPRN_MAS6,r4            /* assume AS=0 for now */
+       PPC_TLBILX_VA(0,R3)
+       msync
+       isync
+       wrtee   r10
+       blr
+
+_GLOBAL(_tlbivax_bcast)
+       mfmsr   r10
+       wrteei  0
+       cmpwi   cr0,r6,0
+       slwi    r4,r4,MAS6_SPID_SHIFT
+       rlwimi  r4,r5,MAS6_ISIZE_SHIFT,MAS6_ISIZE_MASK
+       beq     1f
+       rlwimi  r4,r6,MAS6_SIND_SHIFT,MAS6_SIND
+1:     mtspr   SPRN_MAS6,r4            /* assume AS=0 for now */
+       PPC_TLBIVAX(0,R3)
+       eieio
+       tlbsync
+       sync
+       wrtee   r10
+       blr
+
+_GLOBAL(set_context)
+#ifdef CONFIG_BDI_SWITCH
+       /* Context switch the PTE pointer for the Abatron BDI2000.
+        * The PGDIR is the second parameter.
+        */
+       lis     r5, abatron_pteptrs@h
+       ori     r5, r5, abatron_pteptrs@l
+       stw     r4, 0x4(r5)
+#endif
+       mtspr   SPRN_PID,r3
+       isync                   /* Force context change */
+       blr
+#else
+#error Unsupported processor type !
+#endif
+
+#if defined(CONFIG_PPC_FSL_BOOK3E)
+/*
+ * extern void loadcam_entry(unsigned int index)
+ *
+ * Load TLBCAM[index] entry in to the L2 CAM MMU
+ * Must preserve r7, r8, r9, and r10
+ */
+_GLOBAL(loadcam_entry)
+       mflr    r5
+       LOAD_REG_ADDR_PIC(r4, TLBCAM)
+       mtlr    r5
+       mulli   r5,r3,TLBCAM_SIZE
+       add     r3,r5,r4
+       lwz     r4,TLBCAM_MAS0(r3)
+       mtspr   SPRN_MAS0,r4
+       lwz     r4,TLBCAM_MAS1(r3)
+       mtspr   SPRN_MAS1,r4
+       PPC_LL  r4,TLBCAM_MAS2(r3)
+       mtspr   SPRN_MAS2,r4
+       lwz     r4,TLBCAM_MAS3(r3)
+       mtspr   SPRN_MAS3,r4
+BEGIN_MMU_FTR_SECTION
+       lwz     r4,TLBCAM_MAS7(r3)
+       mtspr   SPRN_MAS7,r4
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS)
+       isync
+       tlbwe
+       isync
+       blr
+
+/*
+ * Load multiple TLB entries at once, using an alternate-space
+ * trampoline so that we don't have to care about whether the same
+ * TLB entry maps us before and after.
+ *
+ * r3 = first entry to write
+ * r4 = number of entries to write
+ * r5 = temporary tlb entry
+ */
+_GLOBAL(loadcam_multi)
+       mflr    r8
+
+       /*
+        * Set up temporary TLB entry that is the same as what we're
+        * running from, but in AS=1.
+        */
+       bl      1f
+1:     mflr    r6
+       tlbsx   0,r8
+       mfspr   r6,SPRN_MAS1
+       ori     r6,r6,MAS1_TS
+       mtspr   SPRN_MAS1,r6
+       mfspr   r6,SPRN_MAS0
+       rlwimi  r6,r5,MAS0_ESEL_SHIFT,MAS0_ESEL_MASK
+       mr      r7,r5
+       mtspr   SPRN_MAS0,r6
+       isync
+       tlbwe
+       isync
+
+       /* Switch to AS=1 */
+       mfmsr   r6
+       ori     r6,r6,MSR_IS|MSR_DS
+       mtmsr   r6
+       isync
+
+       mr      r9,r3
+       add     r10,r3,r4
+2:     bl      loadcam_entry
+       addi    r9,r9,1
+       cmpw    r9,r10
+       mr      r3,r9
+       blt     2b
+
+       /* Return to AS=0 and clear the temporary entry */
+       mfmsr   r6
+       rlwinm. r6,r6,0,~(MSR_IS|MSR_DS)
+       mtmsr   r6
+       isync
+
+       li      r6,0
+       mtspr   SPRN_MAS1,r6
+       rlwinm  r6,r7,MAS0_ESEL_SHIFT,MAS0_ESEL_MASK
+       oris    r6,r6,MAS0_TLBSEL(1)@h
+       mtspr   SPRN_MAS0,r6
+       isync
+       tlbwe
+       isync
+
+       mtlr    r8
+       blr
+#endif
diff --git a/arch/powerpc/mm/nohash/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S
new file mode 100644 (file)
index 0000000..9ed9006
--- /dev/null
@@ -0,0 +1,1280 @@
+/*
+ *  Low level TLB miss handlers for Book3E
+ *
+ *  Copyright (C) 2008-2009
+ *      Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/processor.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cputable.h>
+#include <asm/pgtable.h>
+#include <asm/exception-64e.h>
+#include <asm/ppc-opcode.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_booke_hv_asm.h>
+#include <asm/feature-fixups.h>
+
+#ifdef CONFIG_PPC_64K_PAGES
+#define VPTE_PMD_SHIFT (PTE_INDEX_SIZE+1)
+#else
+#define VPTE_PMD_SHIFT (PTE_INDEX_SIZE)
+#endif
+#define VPTE_PUD_SHIFT (VPTE_PMD_SHIFT + PMD_INDEX_SIZE)
+#define VPTE_PGD_SHIFT (VPTE_PUD_SHIFT + PUD_INDEX_SIZE)
+#define VPTE_INDEX_SIZE (VPTE_PGD_SHIFT + PGD_INDEX_SIZE)
+
+/**********************************************************************
+ *                                                                    *
+ * TLB miss handling for Book3E with a bolted linear mapping          *
+ * No virtual page table, no nested TLB misses                        *
+ *                                                                    *
+ **********************************************************************/
+
+/*
+ * Note that, unlike non-bolted handlers, TLB_EXFRAME is not
+ * modified by the TLB miss handlers themselves, since the TLB miss
+ * handler code will not itself cause a recursive TLB miss.
+ *
+ * TLB_EXFRAME will be modified when crit/mc/debug exceptions are
+ * entered/exited.
+ */
+.macro tlb_prolog_bolted intnum addr
+       mtspr   SPRN_SPRG_GEN_SCRATCH,r12
+       mfspr   r12,SPRN_SPRG_TLB_EXFRAME
+       std     r13,EX_TLB_R13(r12)
+       std     r10,EX_TLB_R10(r12)
+       mfspr   r13,SPRN_SPRG_PACA
+
+       mfcr    r10
+       std     r11,EX_TLB_R11(r12)
+#ifdef CONFIG_KVM_BOOKE_HV
+BEGIN_FTR_SECTION
+       mfspr   r11, SPRN_SRR1
+END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
+#endif
+       DO_KVM  \intnum, SPRN_SRR1
+       std     r16,EX_TLB_R16(r12)
+       mfspr   r16,\addr               /* get faulting address */
+       std     r14,EX_TLB_R14(r12)
+       ld      r14,PACAPGD(r13)
+       std     r15,EX_TLB_R15(r12)
+       std     r10,EX_TLB_CR(r12)
+#ifdef CONFIG_PPC_FSL_BOOK3E
+START_BTB_FLUSH_SECTION
+       mfspr r11, SPRN_SRR1
+       andi. r10,r11,MSR_PR
+       beq 1f
+       BTB_FLUSH(r10)
+1:
+END_BTB_FLUSH_SECTION
+       std     r7,EX_TLB_R7(r12)
+#endif
+       TLB_MISS_PROLOG_STATS
+.endm
+
+.macro tlb_epilog_bolted
+       ld      r14,EX_TLB_CR(r12)
+#ifdef CONFIG_PPC_FSL_BOOK3E
+       ld      r7,EX_TLB_R7(r12)
+#endif
+       ld      r10,EX_TLB_R10(r12)
+       ld      r11,EX_TLB_R11(r12)
+       ld      r13,EX_TLB_R13(r12)
+       mtcr    r14
+       ld      r14,EX_TLB_R14(r12)
+       ld      r15,EX_TLB_R15(r12)
+       TLB_MISS_RESTORE_STATS
+       ld      r16,EX_TLB_R16(r12)
+       mfspr   r12,SPRN_SPRG_GEN_SCRATCH
+.endm
+
+/* Data TLB miss */
+       START_EXCEPTION(data_tlb_miss_bolted)
+       tlb_prolog_bolted BOOKE_INTERRUPT_DTLB_MISS SPRN_DEAR
+
+       /* We need _PAGE_PRESENT and  _PAGE_ACCESSED set */
+
+       /* We do the user/kernel test for the PID here along with the RW test
+        */
+       /* We pre-test some combination of permissions to avoid double
+        * faults:
+        *
+        * We move the ESR:ST bit into the position of _PAGE_BAP_SW in the PTE
+        * ESR_ST   is 0x00800000
+        * _PAGE_BAP_SW is 0x00000010
+        * So the shift is >> 19. This tests for supervisor writeability.
+        * If the page happens to be supervisor writeable and not user
+        * writeable, we will take a new fault later, but that should be
+        * a rare enough case.
+        *
+        * We also move ESR_ST in _PAGE_DIRTY position
+        * _PAGE_DIRTY is 0x00001000 so the shift is >> 11
+        *
+        * MAS1 is preset for all we need except for TID that needs to
+        * be cleared for kernel translations
+        */
+
+       mfspr   r11,SPRN_ESR
+
+       srdi    r15,r16,60              /* get region */
+       rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
+       bne-    dtlb_miss_fault_bolted  /* Bail if fault addr is invalid */
+
+       rlwinm  r10,r11,32-19,27,27
+       rlwimi  r10,r11,32-16,19,19
+       cmpwi   r15,0                   /* user vs kernel check */
+       ori     r10,r10,_PAGE_PRESENT
+       oris    r11,r10,_PAGE_ACCESSED@h
+
+       TLB_MISS_STATS_SAVE_INFO_BOLTED
+       bne     tlb_miss_kernel_bolted
+
+tlb_miss_common_bolted:
+/*
+ * This is the guts of the TLB miss handler for bolted-linear.
+ * We are entered with:
+ *
+ * r16 = faulting address
+ * r15 = crap (free to use)
+ * r14 = page table base
+ * r13 = PACA
+ * r11 = PTE permission mask
+ * r10 = crap (free to use)
+ */
+       rldicl  r15,r16,64-PGDIR_SHIFT+3,64-PGD_INDEX_SIZE-3
+       cmpldi  cr0,r14,0
+       clrrdi  r15,r15,3
+       beq     tlb_miss_fault_bolted   /* No PGDIR, bail */
+
+BEGIN_MMU_FTR_SECTION
+       /* Set the TLB reservation and search for existing entry. Then load
+        * the entry.
+        */
+       PPC_TLBSRX_DOT(0,R16)
+       ldx     r14,r14,r15             /* grab pgd entry */
+       beq     tlb_miss_done_bolted    /* tlb exists already, bail */
+MMU_FTR_SECTION_ELSE
+       ldx     r14,r14,r15             /* grab pgd entry */
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV)
+
+#ifndef CONFIG_PPC_64K_PAGES
+       rldicl  r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3
+       clrrdi  r15,r15,3
+       cmpdi   cr0,r14,0
+       bge     tlb_miss_fault_bolted   /* Bad pgd entry or hugepage; bail */
+       ldx     r14,r14,r15             /* grab pud entry */
+#endif /* CONFIG_PPC_64K_PAGES */
+
+       rldicl  r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3
+       clrrdi  r15,r15,3
+       cmpdi   cr0,r14,0
+       bge     tlb_miss_fault_bolted
+       ldx     r14,r14,r15             /* Grab pmd entry */
+
+       rldicl  r15,r16,64-PAGE_SHIFT+3,64-PTE_INDEX_SIZE-3
+       clrrdi  r15,r15,3
+       cmpdi   cr0,r14,0
+       bge     tlb_miss_fault_bolted
+       ldx     r14,r14,r15             /* Grab PTE, normal (!huge) page */
+
+       /* Check if required permissions are met */
+       andc.   r15,r11,r14
+       rldicr  r15,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT
+       bne-    tlb_miss_fault_bolted
+
+       /* Now we build the MAS:
+        *
+        * MAS 0   :    Fully setup with defaults in MAS4 and TLBnCFG
+        * MAS 1   :    Almost fully setup
+        *               - PID already updated by caller if necessary
+        *               - TSIZE need change if !base page size, not
+        *                 yet implemented for now
+        * MAS 2   :    Defaults not useful, need to be redone
+        * MAS 3+7 :    Needs to be done
+        */
+       clrrdi  r11,r16,12              /* Clear low crap in EA */
+       clrldi  r15,r15,12              /* Clear crap at the top */
+       rlwimi  r11,r14,32-19,27,31     /* Insert WIMGE */
+       rlwimi  r15,r14,32-8,22,25      /* Move in U bits */
+       mtspr   SPRN_MAS2,r11
+       andi.   r11,r14,_PAGE_DIRTY
+       rlwimi  r15,r14,32-2,26,31      /* Move in BAP bits */
+
+       /* Mask out SW and UW if !DIRTY (XXX optimize this !) */
+       bne     1f
+       li      r11,MAS3_SW|MAS3_UW
+       andc    r15,r15,r11
+1:
+       mtspr   SPRN_MAS7_MAS3,r15
+       tlbwe
+
+tlb_miss_done_bolted:
+       TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK)
+       tlb_epilog_bolted
+       rfi
+
+itlb_miss_kernel_bolted:
+       li      r11,_PAGE_PRESENT|_PAGE_BAP_SX  /* Base perm */
+       oris    r11,r11,_PAGE_ACCESSED@h
+tlb_miss_kernel_bolted:
+       mfspr   r10,SPRN_MAS1
+       ld      r14,PACA_KERNELPGD(r13)
+       cmpldi  cr0,r15,8               /* Check for vmalloc region */
+       rlwinm  r10,r10,0,16,1          /* Clear TID */
+       mtspr   SPRN_MAS1,r10
+       beq+    tlb_miss_common_bolted
+
+tlb_miss_fault_bolted:
+       /* We need to check if it was an instruction miss */
+       andi.   r10,r11,_PAGE_EXEC|_PAGE_BAP_SX
+       bne     itlb_miss_fault_bolted
+dtlb_miss_fault_bolted:
+       TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
+       tlb_epilog_bolted
+       b       exc_data_storage_book3e
+itlb_miss_fault_bolted:
+       TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
+       tlb_epilog_bolted
+       b       exc_instruction_storage_book3e
+
+/* Instruction TLB miss */
+       START_EXCEPTION(instruction_tlb_miss_bolted)
+       tlb_prolog_bolted BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR0
+
+       rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
+       srdi    r15,r16,60              /* get region */
+       TLB_MISS_STATS_SAVE_INFO_BOLTED
+       bne-    itlb_miss_fault_bolted
+
+       li      r11,_PAGE_PRESENT|_PAGE_EXEC    /* Base perm */
+
+       /* We do the user/kernel test for the PID here along with the RW test
+        */
+
+       cmpldi  cr0,r15,0                       /* Check for user region */
+       oris    r11,r11,_PAGE_ACCESSED@h
+       beq     tlb_miss_common_bolted
+       b       itlb_miss_kernel_bolted
+
+#ifdef CONFIG_PPC_FSL_BOOK3E
+/*
+ * TLB miss handling for e6500 and derivatives, using hardware tablewalk.
+ *
+ * Linear mapping is bolted: no virtual page table or nested TLB misses
+ * Indirect entries in TLB1, hardware loads resulting direct entries
+ *    into TLB0
+ * No HES or NV hint on TLB1, so we need to do software round-robin
+ * No tlbsrx. so we need a spinlock, and we have to deal
+ *    with MAS-damage caused by tlbsx
+ * 4K pages only
+ */
+
+       START_EXCEPTION(instruction_tlb_miss_e6500)
+       tlb_prolog_bolted BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR0
+
+       ld      r11,PACA_TCD_PTR(r13)
+       srdi.   r15,r16,60              /* get region */
+       ori     r16,r16,1
+
+       TLB_MISS_STATS_SAVE_INFO_BOLTED
+       bne     tlb_miss_kernel_e6500   /* user/kernel test */
+
+       b       tlb_miss_common_e6500
+
+       START_EXCEPTION(data_tlb_miss_e6500)
+       tlb_prolog_bolted BOOKE_INTERRUPT_DTLB_MISS SPRN_DEAR
+
+       ld      r11,PACA_TCD_PTR(r13)
+       srdi.   r15,r16,60              /* get region */
+       rldicr  r16,r16,0,62
+
+       TLB_MISS_STATS_SAVE_INFO_BOLTED
+       bne     tlb_miss_kernel_e6500   /* user vs kernel check */
+
+/*
+ * This is the guts of the TLB miss handler for e6500 and derivatives.
+ * We are entered with:
+ *
+ * r16 = page of faulting address (low bit 0 if data, 1 if instruction)
+ * r15 = crap (free to use)
+ * r14 = page table base
+ * r13 = PACA
+ * r11 = tlb_per_core ptr
+ * r10 = crap (free to use)
+ * r7  = esel_next
+ */
+tlb_miss_common_e6500:
+       crmove  cr2*4+2,cr0*4+2         /* cr2.eq != 0 if kernel address */
+
+BEGIN_FTR_SECTION              /* CPU_FTR_SMT */
+       /*
+        * Search if we already have an indirect entry for that virtual
+        * address, and if we do, bail out.
+        *
+        * MAS6:IND should be already set based on MAS4
+        */
+       lhz     r10,PACAPACAINDEX(r13)
+       addi    r10,r10,1
+       crclr   cr1*4+eq        /* set cr1.eq = 0 for non-recursive */
+1:     lbarx   r15,0,r11
+       cmpdi   r15,0
+       bne     2f
+       stbcx.  r10,0,r11
+       bne     1b
+3:
+       .subsection 1
+2:     cmpd    cr1,r15,r10     /* recursive lock due to mcheck/crit/etc? */
+       beq     cr1,3b          /* unlock will happen if cr1.eq = 0 */
+10:    lbz     r15,0(r11)
+       cmpdi   r15,0
+       bne     10b
+       b       1b
+       .previous
+END_FTR_SECTION_IFSET(CPU_FTR_SMT)
+
+       lbz     r7,TCD_ESEL_NEXT(r11)
+
+BEGIN_FTR_SECTION              /* CPU_FTR_SMT */
+       /*
+        * Erratum A-008139 says that we can't use tlbwe to change
+        * an indirect entry in any way (including replacing or
+        * invalidating) if the other thread could be in the process
+        * of a lookup.  The workaround is to invalidate the entry
+        * with tlbilx before overwriting.
+        */
+
+       rlwinm  r10,r7,16,0xff0000
+       oris    r10,r10,MAS0_TLBSEL(1)@h
+       mtspr   SPRN_MAS0,r10
+       isync
+       tlbre
+       mfspr   r15,SPRN_MAS1
+       andis.  r15,r15,MAS1_VALID@h
+       beq     5f
+
+BEGIN_FTR_SECTION_NESTED(532)
+       mfspr   r10,SPRN_MAS8
+       rlwinm  r10,r10,0,0x80000fff  /* tgs,tlpid -> sgs,slpid */
+       mtspr   SPRN_MAS5,r10
+END_FTR_SECTION_NESTED(CPU_FTR_EMB_HV,CPU_FTR_EMB_HV,532)
+
+       mfspr   r10,SPRN_MAS1
+       rlwinm  r15,r10,0,0x3fff0000  /* tid -> spid */
+       rlwimi  r15,r10,20,0x00000003 /* ind,ts -> sind,sas */
+       mfspr   r10,SPRN_MAS6
+       mtspr   SPRN_MAS6,r15
+
+       mfspr   r15,SPRN_MAS2
+       isync
+       tlbilxva 0,r15
+       isync
+
+       mtspr   SPRN_MAS6,r10
+
+5:
+BEGIN_FTR_SECTION_NESTED(532)
+       li      r10,0
+       mtspr   SPRN_MAS8,r10
+       mtspr   SPRN_MAS5,r10
+END_FTR_SECTION_NESTED(CPU_FTR_EMB_HV,CPU_FTR_EMB_HV,532)
+
+       tlbsx   0,r16
+       mfspr   r10,SPRN_MAS1
+       andis.  r15,r10,MAS1_VALID@h
+       bne     tlb_miss_done_e6500
+FTR_SECTION_ELSE
+       mfspr   r10,SPRN_MAS1
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT)
+
+       oris    r10,r10,MAS1_VALID@h
+       beq     cr2,4f
+       rlwinm  r10,r10,0,16,1          /* Clear TID */
+4:     mtspr   SPRN_MAS1,r10
+
+       /* Now, we need to walk the page tables. First check if we are in
+        * range.
+        */
+       rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
+       bne-    tlb_miss_fault_e6500
+
+       rldicl  r15,r16,64-PGDIR_SHIFT+3,64-PGD_INDEX_SIZE-3
+       cmpldi  cr0,r14,0
+       clrrdi  r15,r15,3
+       beq-    tlb_miss_fault_e6500 /* No PGDIR, bail */
+       ldx     r14,r14,r15             /* grab pgd entry */
+
+       rldicl  r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3
+       clrrdi  r15,r15,3
+       cmpdi   cr0,r14,0
+       bge     tlb_miss_huge_e6500     /* Bad pgd entry or hugepage; bail */
+       ldx     r14,r14,r15             /* grab pud entry */
+
+       rldicl  r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3
+       clrrdi  r15,r15,3
+       cmpdi   cr0,r14,0
+       bge     tlb_miss_huge_e6500
+       ldx     r14,r14,r15             /* Grab pmd entry */
+
+       mfspr   r10,SPRN_MAS0
+       cmpdi   cr0,r14,0
+       bge     tlb_miss_huge_e6500
+
+       /* Now we build the MAS for a 2M indirect page:
+        *
+        * MAS 0   :    ESEL needs to be filled by software round-robin
+        * MAS 1   :    Fully set up
+        *               - PID already updated by caller if necessary
+        *               - TSIZE for now is base ind page size always
+        *               - TID already cleared if necessary
+        * MAS 2   :    Default not 2M-aligned, need to be redone
+        * MAS 3+7 :    Needs to be done
+        */
+
+       ori     r14,r14,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT)
+       mtspr   SPRN_MAS7_MAS3,r14
+
+       clrrdi  r15,r16,21              /* make EA 2M-aligned */
+       mtspr   SPRN_MAS2,r15
+
+tlb_miss_huge_done_e6500:
+       lbz     r16,TCD_ESEL_MAX(r11)
+       lbz     r14,TCD_ESEL_FIRST(r11)
+       rlwimi  r10,r7,16,0x00ff0000    /* insert esel_next into MAS0 */
+       addi    r7,r7,1                 /* increment esel_next */
+       mtspr   SPRN_MAS0,r10
+       cmpw    r7,r16
+       iseleq  r7,r14,r7               /* if next == last use first */
+       stb     r7,TCD_ESEL_NEXT(r11)
+
+       tlbwe
+
+tlb_miss_done_e6500:
+       .macro  tlb_unlock_e6500
+BEGIN_FTR_SECTION
+       beq     cr1,1f          /* no unlock if lock was recursively grabbed */
+       li      r15,0
+       isync
+       stb     r15,0(r11)
+1:
+END_FTR_SECTION_IFSET(CPU_FTR_SMT)
+       .endm
+
+       tlb_unlock_e6500
+       TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK)
+       tlb_epilog_bolted
+       rfi
+
+tlb_miss_huge_e6500:
+       beq     tlb_miss_fault_e6500
+       li      r10,1
+       andi.   r15,r14,HUGEPD_SHIFT_MASK@l /* r15 = psize */
+       rldimi  r14,r10,63,0            /* Set PD_HUGE */
+       xor     r14,r14,r15             /* Clear size bits */
+       ldx     r14,0,r14
+
+       /*
+        * Now we build the MAS for a huge page.
+        *
+        * MAS 0   :    ESEL needs to be filled by software round-robin
+        *               - can be handled by indirect code
+        * MAS 1   :    Need to clear IND and set TSIZE
+        * MAS 2,3+7:   Needs to be redone similar to non-tablewalk handler
+        */
+
+       subi    r15,r15,10              /* Convert psize to tsize */
+       mfspr   r10,SPRN_MAS1
+       rlwinm  r10,r10,0,~MAS1_IND
+       rlwimi  r10,r15,MAS1_TSIZE_SHIFT,MAS1_TSIZE_MASK
+       mtspr   SPRN_MAS1,r10
+
+       li      r10,-0x400
+       sld     r15,r10,r15             /* Generate mask based on size */
+       and     r10,r16,r15
+       rldicr  r15,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT
+       rlwimi  r10,r14,32-19,27,31     /* Insert WIMGE */
+       clrldi  r15,r15,PAGE_SHIFT      /* Clear crap at the top */
+       rlwimi  r15,r14,32-8,22,25      /* Move in U bits */
+       mtspr   SPRN_MAS2,r10
+       andi.   r10,r14,_PAGE_DIRTY
+       rlwimi  r15,r14,32-2,26,31      /* Move in BAP bits */
+
+       /* Mask out SW and UW if !DIRTY (XXX optimize this !) */
+       bne     1f
+       li      r10,MAS3_SW|MAS3_UW
+       andc    r15,r15,r10
+1:
+       mtspr   SPRN_MAS7_MAS3,r15
+
+       mfspr   r10,SPRN_MAS0
+       b       tlb_miss_huge_done_e6500
+
+tlb_miss_kernel_e6500:
+       ld      r14,PACA_KERNELPGD(r13)
+       cmpldi  cr1,r15,8               /* Check for vmalloc region */
+       beq+    cr1,tlb_miss_common_e6500
+
+tlb_miss_fault_e6500:
+       tlb_unlock_e6500
+       /* We need to check if it was an instruction miss */
+       andi.   r16,r16,1
+       bne     itlb_miss_fault_e6500
+dtlb_miss_fault_e6500:
+       TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
+       tlb_epilog_bolted
+       b       exc_data_storage_book3e
+itlb_miss_fault_e6500:
+       TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
+       tlb_epilog_bolted
+       b       exc_instruction_storage_book3e
+#endif /* CONFIG_PPC_FSL_BOOK3E */
+
+/**********************************************************************
+ *                                                                    *
+ * TLB miss handling for Book3E with TLB reservation and HES support  *
+ *                                                                    *
+ **********************************************************************/
+
+
+/* Data TLB miss */
+       START_EXCEPTION(data_tlb_miss)
+       TLB_MISS_PROLOG
+
+       /* Now we handle the fault proper. We only save DEAR in normal
+        * fault case since that's the only interesting values here.
+        * We could probably also optimize by not saving SRR0/1 in the
+        * linear mapping case but I'll leave that for later
+        */
+       mfspr   r14,SPRN_ESR
+       mfspr   r16,SPRN_DEAR           /* get faulting address */
+       srdi    r15,r16,60              /* get region */
+       cmpldi  cr0,r15,0xc             /* linear mapping ? */
+       TLB_MISS_STATS_SAVE_INFO
+       beq     tlb_load_linear         /* yes -> go to linear map load */
+
+       /* The page tables are mapped virtually linear. At this point, though,
+        * we don't know whether we are trying to fault in a first level
+        * virtual address or a virtual page table address. We can get that
+        * from bit 0x1 of the region ID which we have set for a page table
+        */
+       andi.   r10,r15,0x1
+       bne-    virt_page_table_tlb_miss
+
+       std     r14,EX_TLB_ESR(r12);    /* save ESR */
+       std     r16,EX_TLB_DEAR(r12);   /* save DEAR */
+
+        /* We need _PAGE_PRESENT and  _PAGE_ACCESSED set */
+       li      r11,_PAGE_PRESENT
+       oris    r11,r11,_PAGE_ACCESSED@h
+
+       /* We do the user/kernel test for the PID here along with the RW test
+        */
+       cmpldi  cr0,r15,0               /* Check for user region */
+
+       /* We pre-test some combination of permissions to avoid double
+        * faults:
+        *
+        * We move the ESR:ST bit into the position of _PAGE_BAP_SW in the PTE
+        * ESR_ST   is 0x00800000
+        * _PAGE_BAP_SW is 0x00000010
+        * So the shift is >> 19. This tests for supervisor writeability.
+        * If the page happens to be supervisor writeable and not user
+        * writeable, we will take a new fault later, but that should be
+        * a rare enough case.
+        *
+        * We also move ESR_ST in _PAGE_DIRTY position
+        * _PAGE_DIRTY is 0x00001000 so the shift is >> 11
+        *
+        * MAS1 is preset for all we need except for TID that needs to
+        * be cleared for kernel translations
+        */
+       rlwimi  r11,r14,32-19,27,27
+       rlwimi  r11,r14,32-16,19,19
+       beq     normal_tlb_miss
+       /* XXX replace the RMW cycles with immediate loads + writes */
+1:     mfspr   r10,SPRN_MAS1
+       cmpldi  cr0,r15,8               /* Check for vmalloc region */
+       rlwinm  r10,r10,0,16,1          /* Clear TID */
+       mtspr   SPRN_MAS1,r10
+       beq+    normal_tlb_miss
+
+       /* We got a crappy address, just fault with whatever DEAR and ESR
+        * are here
+        */
+       TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
+       TLB_MISS_EPILOG_ERROR
+       b       exc_data_storage_book3e
+
+/* Instruction TLB miss */
+       START_EXCEPTION(instruction_tlb_miss)
+       TLB_MISS_PROLOG
+
+       /* If we take a recursive fault, the second level handler may need
+        * to know whether we are handling a data or instruction fault in
+        * order to get to the right store fault handler. We provide that
+        * info by writing a crazy value in ESR in our exception frame
+        */
+       li      r14,-1  /* store to exception frame is done later */
+
+       /* Now we handle the fault proper. We only save DEAR in the non
+        * linear mapping case since we know the linear mapping case will
+        * not re-enter. We could indeed optimize and also not save SRR0/1
+        * in the linear mapping case but I'll leave that for later
+        *
+        * Faulting address is SRR0 which is already in r16
+        */
+       srdi    r15,r16,60              /* get region */
+       cmpldi  cr0,r15,0xc             /* linear mapping ? */
+       TLB_MISS_STATS_SAVE_INFO
+       beq     tlb_load_linear         /* yes -> go to linear map load */
+
+       /* We do the user/kernel test for the PID here along with the RW test
+        */
+       li      r11,_PAGE_PRESENT|_PAGE_EXEC    /* Base perm */
+       oris    r11,r11,_PAGE_ACCESSED@h
+
+       cmpldi  cr0,r15,0                       /* Check for user region */
+       std     r14,EX_TLB_ESR(r12)             /* write crazy -1 to frame */
+       beq     normal_tlb_miss
+
+       li      r11,_PAGE_PRESENT|_PAGE_BAP_SX  /* Base perm */
+       oris    r11,r11,_PAGE_ACCESSED@h
+       /* XXX replace the RMW cycles with immediate loads + writes */
+       mfspr   r10,SPRN_MAS1
+       cmpldi  cr0,r15,8                       /* Check for vmalloc region */
+       rlwinm  r10,r10,0,16,1                  /* Clear TID */
+       mtspr   SPRN_MAS1,r10
+       beq+    normal_tlb_miss
+
+       /* We got a crappy address, just fault */
+       TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
+       TLB_MISS_EPILOG_ERROR
+       b       exc_instruction_storage_book3e
+
+/*
+ * This is the guts of the first-level TLB miss handler for direct
+ * misses. We are entered with:
+ *
+ * r16 = faulting address
+ * r15 = region ID
+ * r14 = crap (free to use)
+ * r13 = PACA
+ * r12 = TLB exception frame in PACA
+ * r11 = PTE permission mask
+ * r10 = crap (free to use)
+ */
+normal_tlb_miss:
+       /* So we first construct the page table address. We do that by
+        * shifting the bottom of the address (not the region ID) by
+        * PAGE_SHIFT-3, clearing the bottom 3 bits (get a PTE ptr) and
+        * or'ing the fourth high bit.
+        *
+        * NOTE: For 64K pages, we do things slightly differently in
+        * order to handle the weird page table format used by linux
+        */
+       ori     r10,r15,0x1
+#ifdef CONFIG_PPC_64K_PAGES
+       /* For the top bits, 16 bytes per PTE */
+       rldicl  r14,r16,64-(PAGE_SHIFT-4),PAGE_SHIFT-4+4
+       /* Now create the bottom bits as 0 in position 0x8000 and
+        * the rest calculated for 8 bytes per PTE
+        */
+       rldicl  r15,r16,64-(PAGE_SHIFT-3),64-15
+       /* Insert the bottom bits in */
+       rlwimi  r14,r15,0,16,31
+#else
+       rldicl  r14,r16,64-(PAGE_SHIFT-3),PAGE_SHIFT-3+4
+#endif
+       sldi    r15,r10,60
+       clrrdi  r14,r14,3
+       or      r10,r15,r14
+
+BEGIN_MMU_FTR_SECTION
+       /* Set the TLB reservation and search for existing entry. Then load
+        * the entry.
+        */
+       PPC_TLBSRX_DOT(0,R16)
+       ld      r14,0(r10)
+       beq     normal_tlb_miss_done
+MMU_FTR_SECTION_ELSE
+       ld      r14,0(r10)
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV)
+
+finish_normal_tlb_miss:
+       /* Check if required permissions are met */
+       andc.   r15,r11,r14
+       bne-    normal_tlb_miss_access_fault
+
+       /* Now we build the MAS:
+        *
+        * MAS 0   :    Fully setup with defaults in MAS4 and TLBnCFG
+        * MAS 1   :    Almost fully setup
+        *               - PID already updated by caller if necessary
+        *               - TSIZE need change if !base page size, not
+        *                 yet implemented for now
+        * MAS 2   :    Defaults not useful, need to be redone
+        * MAS 3+7 :    Needs to be done
+        *
+        * TODO: mix up code below for better scheduling
+        */
+       clrrdi  r11,r16,12              /* Clear low crap in EA */
+       rlwimi  r11,r14,32-19,27,31     /* Insert WIMGE */
+       mtspr   SPRN_MAS2,r11
+
+       /* Check page size, if not standard, update MAS1 */
+       rldicl  r11,r14,64-8,64-8
+#ifdef CONFIG_PPC_64K_PAGES
+       cmpldi  cr0,r11,BOOK3E_PAGESZ_64K
+#else
+       cmpldi  cr0,r11,BOOK3E_PAGESZ_4K
+#endif
+       beq-    1f
+       mfspr   r11,SPRN_MAS1
+       rlwimi  r11,r14,31,21,24
+       rlwinm  r11,r11,0,21,19
+       mtspr   SPRN_MAS1,r11
+1:
+       /* Move RPN in position */
+       rldicr  r11,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT
+       clrldi  r15,r11,12              /* Clear crap at the top */
+       rlwimi  r15,r14,32-8,22,25      /* Move in U bits */
+       rlwimi  r15,r14,32-2,26,31      /* Move in BAP bits */
+
+       /* Mask out SW and UW if !DIRTY (XXX optimize this !) */
+       andi.   r11,r14,_PAGE_DIRTY
+       bne     1f
+       li      r11,MAS3_SW|MAS3_UW
+       andc    r15,r15,r11
+1:
+BEGIN_MMU_FTR_SECTION
+       srdi    r16,r15,32
+       mtspr   SPRN_MAS3,r15
+       mtspr   SPRN_MAS7,r16
+MMU_FTR_SECTION_ELSE
+       mtspr   SPRN_MAS7_MAS3,r15
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS)
+
+       tlbwe
+
+normal_tlb_miss_done:
+       /* We don't bother with restoring DEAR or ESR since we know we are
+        * level 0 and just going back to userland. They are only needed
+        * if you are going to take an access fault
+        */
+       TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK)
+       TLB_MISS_EPILOG_SUCCESS
+       rfi
+
+normal_tlb_miss_access_fault:
+       /* We need to check if it was an instruction miss */
+       andi.   r10,r11,_PAGE_EXEC
+       bne     1f
+       ld      r14,EX_TLB_DEAR(r12)
+       ld      r15,EX_TLB_ESR(r12)
+       mtspr   SPRN_DEAR,r14
+       mtspr   SPRN_ESR,r15
+       TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
+       TLB_MISS_EPILOG_ERROR
+       b       exc_data_storage_book3e
+1:     TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
+       TLB_MISS_EPILOG_ERROR
+       b       exc_instruction_storage_book3e
+
+
+/*
+ * This is the guts of the second-level TLB miss handler for direct
+ * misses. We are entered with:
+ *
+ * r16 = virtual page table faulting address
+ * r15 = region (top 4 bits of address)
+ * r14 = crap (free to use)
+ * r13 = PACA
+ * r12 = TLB exception frame in PACA
+ * r11 = crap (free to use)
+ * r10 = crap (free to use)
+ *
+ * Note that this should only ever be called as a second level handler
+ * with the current scheme when using SW load.
+ * That means we can always get the original fault DEAR at
+ * EX_TLB_DEAR-EX_TLB_SIZE(r12)
+ *
+ * It can be re-entered by the linear mapping miss handler. However, to
+ * avoid too much complication, it will restart the whole fault at level
+ * 0 so we don't care too much about clobbers
+ *
+ * XXX That code was written back when we couldn't clobber r14. We can now,
+ * so we could probably optimize things a bit
+ */
+virt_page_table_tlb_miss:
+       /* Are we hitting a kernel page table ? */
+       andi.   r10,r15,0x8
+
+       /* The cool thing now is that r10 contains 0 for user and 8 for kernel,
+        * and we happen to have the swapper_pg_dir at offset 8 from the user
+        * pgdir in the PACA :-).
+        */
+       add     r11,r10,r13
+
+       /* If kernel, we need to clear MAS1 TID */
+       beq     1f
+       /* XXX replace the RMW cycles with immediate loads + writes */
+       mfspr   r10,SPRN_MAS1
+       rlwinm  r10,r10,0,16,1                  /* Clear TID */
+       mtspr   SPRN_MAS1,r10
+1:
+BEGIN_MMU_FTR_SECTION
+       /* Search if we already have a TLB entry for that virtual address, and
+        * if we do, bail out.
+        */
+       PPC_TLBSRX_DOT(0,R16)
+       beq     virt_page_table_tlb_miss_done
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV)
+
+       /* Now, we need to walk the page tables. First check if we are in
+        * range.
+        */
+       rldicl. r10,r16,64-(VPTE_INDEX_SIZE+3),VPTE_INDEX_SIZE+3+4
+       bne-    virt_page_table_tlb_miss_fault
+
+       /* Get the PGD pointer */
+       ld      r15,PACAPGD(r11)
+       cmpldi  cr0,r15,0
+       beq-    virt_page_table_tlb_miss_fault
+
+       /* Get to PGD entry */
+       rldicl  r11,r16,64-VPTE_PGD_SHIFT,64-PGD_INDEX_SIZE-3
+       clrrdi  r10,r11,3
+       ldx     r15,r10,r15
+       cmpdi   cr0,r15,0
+       bge     virt_page_table_tlb_miss_fault
+
+#ifndef CONFIG_PPC_64K_PAGES
+       /* Get to PUD entry */
+       rldicl  r11,r16,64-VPTE_PUD_SHIFT,64-PUD_INDEX_SIZE-3
+       clrrdi  r10,r11,3
+       ldx     r15,r10,r15
+       cmpdi   cr0,r15,0
+       bge     virt_page_table_tlb_miss_fault
+#endif /* CONFIG_PPC_64K_PAGES */
+
+       /* Get to PMD entry */
+       rldicl  r11,r16,64-VPTE_PMD_SHIFT,64-PMD_INDEX_SIZE-3
+       clrrdi  r10,r11,3
+       ldx     r15,r10,r15
+       cmpdi   cr0,r15,0
+       bge     virt_page_table_tlb_miss_fault
+
+       /* Ok, we're all right, we can now create a kernel translation for
+        * a 4K or 64K page from r16 -> r15.
+        */
+       /* Now we build the MAS:
+        *
+        * MAS 0   :    Fully setup with defaults in MAS4 and TLBnCFG
+        * MAS 1   :    Almost fully setup
+        *               - PID already updated by caller if necessary
+        *               - TSIZE for now is base page size always
+        * MAS 2   :    Use defaults
+        * MAS 3+7 :    Needs to be done
+        *
+        * So we only do MAS 2 and 3 for now...
+        */
+       clrldi  r11,r15,4               /* remove region ID from RPN */
+       ori     r10,r11,1               /* Or-in SR */
+
+BEGIN_MMU_FTR_SECTION
+       srdi    r16,r10,32
+       mtspr   SPRN_MAS3,r10
+       mtspr   SPRN_MAS7,r16
+MMU_FTR_SECTION_ELSE
+       mtspr   SPRN_MAS7_MAS3,r10
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS)
+
+       tlbwe
+
+BEGIN_MMU_FTR_SECTION
+virt_page_table_tlb_miss_done:
+
+       /* We have overridden MAS2:EPN but currently our primary TLB miss
+        * handler will always restore it so that should not be an issue,
+        * if we ever optimize the primary handler to not write MAS2 on
+        * some cases, we'll have to restore MAS2:EPN here based on the
+        * original fault's DEAR. If we do that we have to modify the
+        * ITLB miss handler to also store SRR0 in the exception frame
+        * as DEAR.
+        *
+        * However, one nasty thing we did is we cleared the reservation
+        * (well, potentially we did). We do a trick here thus if we
+        * are not a level 0 exception (we interrupted the TLB miss) we
+        * offset the return address by -4 in order to replay the tlbsrx
+        * instruction there
+        */
+       subf    r10,r13,r12
+       cmpldi  cr0,r10,PACA_EXTLB+EX_TLB_SIZE
+       bne-    1f
+       ld      r11,PACA_EXTLB+EX_TLB_SIZE+EX_TLB_SRR0(r13)
+       addi    r10,r11,-4
+       std     r10,PACA_EXTLB+EX_TLB_SIZE+EX_TLB_SRR0(r13)
+1:
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV)
+       /* Return to caller, normal case */
+       TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_OK);
+       TLB_MISS_EPILOG_SUCCESS
+       rfi
+
+virt_page_table_tlb_miss_fault:
+       /* If we fault here, things are a little bit tricky. We need to call
+        * either data or instruction store fault, and we need to retrieve
+        * the original fault address and ESR (for data).
+        *
+        * The thing is, we know that in normal circumstances, this is
+        * always called as a second level tlb miss for SW load or as a first
+        * level TLB miss for HW load, so we should be able to peek at the
+        * relevant information in the first exception frame in the PACA.
+        *
+        * However, we do need to double check that, because we may just hit
+        * a stray kernel pointer or a userland attack trying to hit those
+        * areas. If that is the case, we do a data fault. (We can't get here
+        * from an instruction tlb miss anyway).
+        *
+        * Note also that when going to a fault, we must unwind the previous
+        * level as well. Since we are doing that, we don't need to clear or
+        * restore the TLB reservation neither.
+        */
+       subf    r10,r13,r12
+       cmpldi  cr0,r10,PACA_EXTLB+EX_TLB_SIZE
+       bne-    virt_page_table_tlb_miss_whacko_fault
+
+       /* We dig the original DEAR and ESR from slot 0 */
+       ld      r15,EX_TLB_DEAR+PACA_EXTLB(r13)
+       ld      r16,EX_TLB_ESR+PACA_EXTLB(r13)
+
+       /* We check for the "special" ESR value for instruction faults */
+       cmpdi   cr0,r16,-1
+       beq     1f
+       mtspr   SPRN_DEAR,r15
+       mtspr   SPRN_ESR,r16
+       TLB_MISS_STATS_D(MMSTAT_TLB_MISS_PT_FAULT);
+       TLB_MISS_EPILOG_ERROR
+       b       exc_data_storage_book3e
+1:     TLB_MISS_STATS_I(MMSTAT_TLB_MISS_PT_FAULT);
+       TLB_MISS_EPILOG_ERROR
+       b       exc_instruction_storage_book3e
+
+virt_page_table_tlb_miss_whacko_fault:
+       /* The linear fault will restart everything so ESR and DEAR will
+        * not have been clobbered, let's just fault with what we have
+        */
+       TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_FAULT);
+       TLB_MISS_EPILOG_ERROR
+       b       exc_data_storage_book3e
+
+
+/**************************************************************
+ *                                                            *
+ * TLB miss handling for Book3E with hw page table support    *
+ *                                                            *
+ **************************************************************/
+
+
+/* Data TLB miss */
+       START_EXCEPTION(data_tlb_miss_htw)
+       TLB_MISS_PROLOG
+
+       /* Now we handle the fault proper. We only save DEAR in normal
+        * fault case since that's the only interesting values here.
+        * We could probably also optimize by not saving SRR0/1 in the
+        * linear mapping case but I'll leave that for later
+        */
+       mfspr   r14,SPRN_ESR
+       mfspr   r16,SPRN_DEAR           /* get faulting address */
+       srdi    r11,r16,60              /* get region */
+       cmpldi  cr0,r11,0xc             /* linear mapping ? */
+       TLB_MISS_STATS_SAVE_INFO
+       beq     tlb_load_linear         /* yes -> go to linear map load */
+
+       /* We do the user/kernel test for the PID here along with the RW test
+        */
+       cmpldi  cr0,r11,0               /* Check for user region */
+       ld      r15,PACAPGD(r13)        /* Load user pgdir */
+       beq     htw_tlb_miss
+
+       /* XXX replace the RMW cycles with immediate loads + writes */
+1:     mfspr   r10,SPRN_MAS1
+       cmpldi  cr0,r11,8               /* Check for vmalloc region */
+       rlwinm  r10,r10,0,16,1          /* Clear TID */
+       mtspr   SPRN_MAS1,r10
+       ld      r15,PACA_KERNELPGD(r13) /* Load kernel pgdir */
+       beq+    htw_tlb_miss
+
+       /* We got a crappy address, just fault with whatever DEAR and ESR
+        * are here
+        */
+       TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
+       TLB_MISS_EPILOG_ERROR
+       b       exc_data_storage_book3e
+
+/* Instruction TLB miss */
+       START_EXCEPTION(instruction_tlb_miss_htw)
+       TLB_MISS_PROLOG
+
+       /* If we take a recursive fault, the second level handler may need
+        * to know whether we are handling a data or instruction fault in
+        * order to get to the right store fault handler. We provide that
+        * info by keeping a crazy value for ESR in r14
+        */
+       li      r14,-1  /* store to exception frame is done later */
+
+       /* Now we handle the fault proper. We only save DEAR in the non
+        * linear mapping case since we know the linear mapping case will
+        * not re-enter. We could indeed optimize and also not save SRR0/1
+        * in the linear mapping case but I'll leave that for later
+        *
+        * Faulting address is SRR0 which is already in r16
+        */
+       srdi    r11,r16,60              /* get region */
+       cmpldi  cr0,r11,0xc             /* linear mapping ? */
+       TLB_MISS_STATS_SAVE_INFO
+       beq     tlb_load_linear         /* yes -> go to linear map load */
+
+       /* We do the user/kernel test for the PID here along with the RW test
+        */
+       cmpldi  cr0,r11,0                       /* Check for user region */
+       ld      r15,PACAPGD(r13)                /* Load user pgdir */
+       beq     htw_tlb_miss
+
+       /* XXX replace the RMW cycles with immediate loads + writes */
+1:     mfspr   r10,SPRN_MAS1
+       cmpldi  cr0,r11,8                       /* Check for vmalloc region */
+       rlwinm  r10,r10,0,16,1                  /* Clear TID */
+       mtspr   SPRN_MAS1,r10
+       ld      r15,PACA_KERNELPGD(r13)         /* Load kernel pgdir */
+       beq+    htw_tlb_miss
+
+       /* We got a crappy address, just fault */
+       TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
+       TLB_MISS_EPILOG_ERROR
+       b       exc_instruction_storage_book3e
+
+
+/*
+ * This is the guts of the second-level TLB miss handler for direct
+ * misses. We are entered with:
+ *
+ * r16 = virtual page table faulting address
+ * r15 = PGD pointer
+ * r14 = ESR
+ * r13 = PACA
+ * r12 = TLB exception frame in PACA
+ * r11 = crap (free to use)
+ * r10 = crap (free to use)
+ *
+ * It can be re-entered by the linear mapping miss handler. However, to
+ * avoid too much complication, it will save/restore things for us
+ */
+htw_tlb_miss:
+       /* Search if we already have a TLB entry for that virtual address, and
+        * if we do, bail out.
+        *
+        * MAS1:IND should be already set based on MAS4
+        */
+       PPC_TLBSRX_DOT(0,R16)
+       beq     htw_tlb_miss_done
+
+       /* Now, we need to walk the page tables. First check if we are in
+        * range.
+        */
+       rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
+       bne-    htw_tlb_miss_fault
+
+       /* Get the PGD pointer */
+       cmpldi  cr0,r15,0
+       beq-    htw_tlb_miss_fault
+
+       /* Get to PGD entry */
+       rldicl  r11,r16,64-(PGDIR_SHIFT-3),64-PGD_INDEX_SIZE-3
+       clrrdi  r10,r11,3
+       ldx     r15,r10,r15
+       cmpdi   cr0,r15,0
+       bge     htw_tlb_miss_fault
+
+#ifndef CONFIG_PPC_64K_PAGES
+       /* Get to PUD entry */
+       rldicl  r11,r16,64-(PUD_SHIFT-3),64-PUD_INDEX_SIZE-3
+       clrrdi  r10,r11,3
+       ldx     r15,r10,r15
+       cmpdi   cr0,r15,0
+       bge     htw_tlb_miss_fault
+#endif /* CONFIG_PPC_64K_PAGES */
+
+       /* Get to PMD entry */
+       rldicl  r11,r16,64-(PMD_SHIFT-3),64-PMD_INDEX_SIZE-3
+       clrrdi  r10,r11,3
+       ldx     r15,r10,r15
+       cmpdi   cr0,r15,0
+       bge     htw_tlb_miss_fault
+
+       /* Ok, we're all right, we can now create an indirect entry for
+        * a 1M or 256M page.
+        *
+        * The last trick is now that because we use "half" pages for
+        * the HTW (1M IND is 2K and 256M IND is 32K) we need to account
+        * for an added LSB bit to the RPN. For 64K pages, there is no
+        * problem as we already use 32K arrays (half PTE pages), but for
+        * 4K page we need to extract a bit from the virtual address and
+        * insert it into the "PA52" bit of the RPN.
+        */
+#ifndef CONFIG_PPC_64K_PAGES
+       rlwimi  r15,r16,32-9,20,20
+#endif
+       /* Now we build the MAS:
+        *
+        * MAS 0   :    Fully setup with defaults in MAS4 and TLBnCFG
+        * MAS 1   :    Almost fully setup
+        *               - PID already updated by caller if necessary
+        *               - TSIZE for now is base ind page size always
+        * MAS 2   :    Use defaults
+        * MAS 3+7 :    Needs to be done
+        */
+#ifdef CONFIG_PPC_64K_PAGES
+       ori     r10,r15,(BOOK3E_PAGESZ_64K << MAS3_SPSIZE_SHIFT)
+#else
+       ori     r10,r15,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT)
+#endif
+
+BEGIN_MMU_FTR_SECTION
+       srdi    r16,r10,32
+       mtspr   SPRN_MAS3,r10
+       mtspr   SPRN_MAS7,r16
+MMU_FTR_SECTION_ELSE
+       mtspr   SPRN_MAS7_MAS3,r10
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS)
+
+       tlbwe
+
+htw_tlb_miss_done:
+       /* We don't bother with restoring DEAR or ESR since we know we are
+        * level 0 and just going back to userland. They are only needed
+        * if you are going to take an access fault
+        */
+       TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_OK)
+       TLB_MISS_EPILOG_SUCCESS
+       rfi
+
+htw_tlb_miss_fault:
+       /* We need to check if it was an instruction miss. We know this
+        * though because r14 would contain -1
+        */
+       cmpdi   cr0,r14,-1
+       beq     1f
+       mtspr   SPRN_DEAR,r16
+       mtspr   SPRN_ESR,r14
+       TLB_MISS_STATS_D(MMSTAT_TLB_MISS_PT_FAULT)
+       TLB_MISS_EPILOG_ERROR
+       b       exc_data_storage_book3e
+1:     TLB_MISS_STATS_I(MMSTAT_TLB_MISS_PT_FAULT)
+       TLB_MISS_EPILOG_ERROR
+       b       exc_instruction_storage_book3e
+
+/*
+ * This is the guts of "any" level TLB miss handler for kernel linear
+ * mapping misses. We are entered with:
+ *
+ *
+ * r16 = faulting address
+ * r15 = crap (free to use)
+ * r14 = ESR (data) or -1 (instruction)
+ * r13 = PACA
+ * r12 = TLB exception frame in PACA
+ * r11 = crap (free to use)
+ * r10 = crap (free to use)
+ *
+ * In addition we know that we will not re-enter, so in theory, we could
+ * use a simpler epilog not restoring SRR0/1 etc.. but we'll do that later.
+ *
+ * We also need to be careful about MAS registers here & TLB reservation,
+ * as we know we'll have clobbered them if we interrupt the main TLB miss
+ * handlers in which case we probably want to do a full restart at level
+ * 0 rather than saving / restoring the MAS.
+ *
+ * Note: If we care about performance of that core, we can easily shuffle
+ *       a few things around
+ */
+tlb_load_linear:
+       /* For now, we assume the linear mapping is contiguous and stops at
+        * linear_map_top. We also assume the size is a multiple of 1G, thus
+        * we only use 1G pages for now. That might have to be changed in a
+        * final implementation, especially when dealing with hypervisors
+        */
+       ld      r11,PACATOC(r13)
+       ld      r11,linear_map_top@got(r11)
+       ld      r10,0(r11)
+       tovirt(10,10)
+       cmpld   cr0,r16,r10
+       bge     tlb_load_linear_fault
+
+       /* MAS1 need whole new setup. */
+       li      r15,(BOOK3E_PAGESZ_1GB<<MAS1_TSIZE_SHIFT)
+       oris    r15,r15,MAS1_VALID@h    /* MAS1 needs V and TSIZE */
+       mtspr   SPRN_MAS1,r15
+
+       /* Already somebody there ? */
+       PPC_TLBSRX_DOT(0,R16)
+       beq     tlb_load_linear_done
+
+       /* Now we build the remaining MAS. MAS0 and 2 should be fine
+        * with their defaults, which leaves us with MAS 3 and 7. The
+        * mapping is linear, so we just take the address, clear the
+        * region bits, and or in the permission bits which are currently
+        * hard wired
+        */
+       clrrdi  r10,r16,30              /* 1G page index */
+       clrldi  r10,r10,4               /* clear region bits */
+       ori     r10,r10,MAS3_SR|MAS3_SW|MAS3_SX
+
+BEGIN_MMU_FTR_SECTION
+       srdi    r16,r10,32
+       mtspr   SPRN_MAS3,r10
+       mtspr   SPRN_MAS7,r16
+MMU_FTR_SECTION_ELSE
+       mtspr   SPRN_MAS7_MAS3,r10
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS)
+
+       tlbwe
+
+tlb_load_linear_done:
+       /* We use the "error" epilog for success as we do want to
+        * restore to the initial faulting context, whatever it was.
+        * We do that because we can't resume a fault within a TLB
+        * miss handler, due to MAS and TLB reservation being clobbered.
+        */
+       TLB_MISS_STATS_X(MMSTAT_TLB_MISS_LINEAR)
+       TLB_MISS_EPILOG_ERROR
+       rfi
+
+tlb_load_linear_fault:
+       /* We keep the DEAR and ESR around, this shouldn't have happened */
+       cmpdi   cr0,r14,-1
+       beq     1f
+       TLB_MISS_EPILOG_ERROR_SPECIAL
+       b       exc_data_storage_book3e
+1:     TLB_MISS_EPILOG_ERROR_SPECIAL
+       b       exc_instruction_storage_book3e
+
+
+#ifdef CONFIG_BOOK3E_MMU_TLB_STATS
+.tlb_stat_inc:
+1:     ldarx   r8,0,r9
+       addi    r8,r8,1
+       stdcx.  r8,0,r9
+       bne-    1b
+       blr
+#endif
diff --git a/arch/powerpc/mm/pgtable-book3e.c b/arch/powerpc/mm/pgtable-book3e.c
deleted file mode 100644 (file)
index f296c2e..0000000
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright 2005, Paul Mackerras, IBM Corporation.
- * Copyright 2009, Benjamin Herrenschmidt, IBM Corporation.
- * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/sched.h>
-#include <linux/memblock.h>
-#include <asm/pgalloc.h>
-#include <asm/tlb.h>
-#include <asm/dma.h>
-
-#include <mm/mmu_decl.h>
-
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-/*
- * On Book3E CPUs, the vmemmap is currently mapped in the top half of
- * the vmalloc space using normal page tables, though the size of
- * pages encoded in the PTEs can be different
- */
-int __meminit vmemmap_create_mapping(unsigned long start,
-                                    unsigned long page_size,
-                                    unsigned long phys)
-{
-       /* Create a PTE encoding without page size */
-       unsigned long i, flags = _PAGE_PRESENT | _PAGE_ACCESSED |
-               _PAGE_KERNEL_RW;
-
-       /* PTEs only contain page size encodings up to 32M */
-       BUG_ON(mmu_psize_defs[mmu_vmemmap_psize].enc > 0xf);
-
-       /* Encode the size in the PTE */
-       flags |= mmu_psize_defs[mmu_vmemmap_psize].enc << 8;
-
-       /* For each PTE for that area, map things. Note that we don't
-        * increment phys because all PTEs are of the large size and
-        * thus must have the low bits clear
-        */
-       for (i = 0; i < page_size; i += PAGE_SIZE)
-               BUG_ON(map_kernel_page(start + i, phys, __pgprot(flags)));
-
-       return 0;
-}
-
-#ifdef CONFIG_MEMORY_HOTPLUG
-void vmemmap_remove_mapping(unsigned long start,
-                           unsigned long page_size)
-{
-}
-#endif
-#endif /* CONFIG_SPARSEMEM_VMEMMAP */
-
-static __ref void *early_alloc_pgtable(unsigned long size)
-{
-       void *ptr;
-
-       ptr = memblock_alloc_try_nid(size, size, MEMBLOCK_LOW_LIMIT,
-                                    __pa(MAX_DMA_ADDRESS), NUMA_NO_NODE);
-
-       if (!ptr)
-               panic("%s: Failed to allocate %lu bytes align=0x%lx max_addr=%lx\n",
-                     __func__, size, size, __pa(MAX_DMA_ADDRESS));
-
-       return ptr;
-}
-
-/*
- * map_kernel_page currently only called by __ioremap
- * map_kernel_page adds an entry to the ioremap page table
- * and adds an entry to the HPT, possibly bolting it
- */
-int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
-{
-       pgd_t *pgdp;
-       pud_t *pudp;
-       pmd_t *pmdp;
-       pte_t *ptep;
-
-       BUILD_BUG_ON(TASK_SIZE_USER64 > PGTABLE_RANGE);
-       if (slab_is_available()) {
-               pgdp = pgd_offset_k(ea);
-               pudp = pud_alloc(&init_mm, pgdp, ea);
-               if (!pudp)
-                       return -ENOMEM;
-               pmdp = pmd_alloc(&init_mm, pudp, ea);
-               if (!pmdp)
-                       return -ENOMEM;
-               ptep = pte_alloc_kernel(pmdp, ea);
-               if (!ptep)
-                       return -ENOMEM;
-       } else {
-               pgdp = pgd_offset_k(ea);
-#ifndef __PAGETABLE_PUD_FOLDED
-               if (pgd_none(*pgdp)) {
-                       pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
-                       pgd_populate(&init_mm, pgdp, pudp);
-               }
-#endif /* !__PAGETABLE_PUD_FOLDED */
-               pudp = pud_offset(pgdp, ea);
-               if (pud_none(*pudp)) {
-                       pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
-                       pud_populate(&init_mm, pudp, pmdp);
-               }
-               pmdp = pmd_offset(pudp, ea);
-               if (!pmd_present(*pmdp)) {
-                       ptep = early_alloc_pgtable(PAGE_SIZE);
-                       pmd_populate_kernel(&init_mm, pmdp, ptep);
-               }
-               ptep = pte_offset_kernel(pmdp, ea);
-       }
-       set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, prot));
-
-       smp_wmb();
-       return 0;
-}
diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S
deleted file mode 100644 (file)
index 9ed9006..0000000
+++ /dev/null
@@ -1,1280 +0,0 @@
-/*
- *  Low level TLB miss handlers for Book3E
- *
- *  Copyright (C) 2008-2009
- *      Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- */
-
-#include <asm/processor.h>
-#include <asm/reg.h>
-#include <asm/page.h>
-#include <asm/mmu.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/cputable.h>
-#include <asm/pgtable.h>
-#include <asm/exception-64e.h>
-#include <asm/ppc-opcode.h>
-#include <asm/kvm_asm.h>
-#include <asm/kvm_booke_hv_asm.h>
-#include <asm/feature-fixups.h>
-
-#ifdef CONFIG_PPC_64K_PAGES
-#define VPTE_PMD_SHIFT (PTE_INDEX_SIZE+1)
-#else
-#define VPTE_PMD_SHIFT (PTE_INDEX_SIZE)
-#endif
-#define VPTE_PUD_SHIFT (VPTE_PMD_SHIFT + PMD_INDEX_SIZE)
-#define VPTE_PGD_SHIFT (VPTE_PUD_SHIFT + PUD_INDEX_SIZE)
-#define VPTE_INDEX_SIZE (VPTE_PGD_SHIFT + PGD_INDEX_SIZE)
-
-/**********************************************************************
- *                                                                    *
- * TLB miss handling for Book3E with a bolted linear mapping          *
- * No virtual page table, no nested TLB misses                        *
- *                                                                    *
- **********************************************************************/
-
-/*
- * Note that, unlike non-bolted handlers, TLB_EXFRAME is not
- * modified by the TLB miss handlers themselves, since the TLB miss
- * handler code will not itself cause a recursive TLB miss.
- *
- * TLB_EXFRAME will be modified when crit/mc/debug exceptions are
- * entered/exited.
- */
-.macro tlb_prolog_bolted intnum addr
-       mtspr   SPRN_SPRG_GEN_SCRATCH,r12
-       mfspr   r12,SPRN_SPRG_TLB_EXFRAME
-       std     r13,EX_TLB_R13(r12)
-       std     r10,EX_TLB_R10(r12)
-       mfspr   r13,SPRN_SPRG_PACA
-
-       mfcr    r10
-       std     r11,EX_TLB_R11(r12)
-#ifdef CONFIG_KVM_BOOKE_HV
-BEGIN_FTR_SECTION
-       mfspr   r11, SPRN_SRR1
-END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
-#endif
-       DO_KVM  \intnum, SPRN_SRR1
-       std     r16,EX_TLB_R16(r12)
-       mfspr   r16,\addr               /* get faulting address */
-       std     r14,EX_TLB_R14(r12)
-       ld      r14,PACAPGD(r13)
-       std     r15,EX_TLB_R15(r12)
-       std     r10,EX_TLB_CR(r12)
-#ifdef CONFIG_PPC_FSL_BOOK3E
-START_BTB_FLUSH_SECTION
-       mfspr r11, SPRN_SRR1
-       andi. r10,r11,MSR_PR
-       beq 1f
-       BTB_FLUSH(r10)
-1:
-END_BTB_FLUSH_SECTION
-       std     r7,EX_TLB_R7(r12)
-#endif
-       TLB_MISS_PROLOG_STATS
-.endm
-
-.macro tlb_epilog_bolted
-       ld      r14,EX_TLB_CR(r12)
-#ifdef CONFIG_PPC_FSL_BOOK3E
-       ld      r7,EX_TLB_R7(r12)
-#endif
-       ld      r10,EX_TLB_R10(r12)
-       ld      r11,EX_TLB_R11(r12)
-       ld      r13,EX_TLB_R13(r12)
-       mtcr    r14
-       ld      r14,EX_TLB_R14(r12)
-       ld      r15,EX_TLB_R15(r12)
-       TLB_MISS_RESTORE_STATS
-       ld      r16,EX_TLB_R16(r12)
-       mfspr   r12,SPRN_SPRG_GEN_SCRATCH
-.endm
-
-/* Data TLB miss */
-       START_EXCEPTION(data_tlb_miss_bolted)
-       tlb_prolog_bolted BOOKE_INTERRUPT_DTLB_MISS SPRN_DEAR
-
-       /* We need _PAGE_PRESENT and  _PAGE_ACCESSED set */
-
-       /* We do the user/kernel test for the PID here along with the RW test
-        */
-       /* We pre-test some combination of permissions to avoid double
-        * faults:
-        *
-        * We move the ESR:ST bit into the position of _PAGE_BAP_SW in the PTE
-        * ESR_ST   is 0x00800000
-        * _PAGE_BAP_SW is 0x00000010
-        * So the shift is >> 19. This tests for supervisor writeability.
-        * If the page happens to be supervisor writeable and not user
-        * writeable, we will take a new fault later, but that should be
-        * a rare enough case.
-        *
-        * We also move ESR_ST in _PAGE_DIRTY position
-        * _PAGE_DIRTY is 0x00001000 so the shift is >> 11
-        *
-        * MAS1 is preset for all we need except for TID that needs to
-        * be cleared for kernel translations
-        */
-
-       mfspr   r11,SPRN_ESR
-
-       srdi    r15,r16,60              /* get region */
-       rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
-       bne-    dtlb_miss_fault_bolted  /* Bail if fault addr is invalid */
-
-       rlwinm  r10,r11,32-19,27,27
-       rlwimi  r10,r11,32-16,19,19
-       cmpwi   r15,0                   /* user vs kernel check */
-       ori     r10,r10,_PAGE_PRESENT
-       oris    r11,r10,_PAGE_ACCESSED@h
-
-       TLB_MISS_STATS_SAVE_INFO_BOLTED
-       bne     tlb_miss_kernel_bolted
-
-tlb_miss_common_bolted:
-/*
- * This is the guts of the TLB miss handler for bolted-linear.
- * We are entered with:
- *
- * r16 = faulting address
- * r15 = crap (free to use)
- * r14 = page table base
- * r13 = PACA
- * r11 = PTE permission mask
- * r10 = crap (free to use)
- */
-       rldicl  r15,r16,64-PGDIR_SHIFT+3,64-PGD_INDEX_SIZE-3
-       cmpldi  cr0,r14,0
-       clrrdi  r15,r15,3
-       beq     tlb_miss_fault_bolted   /* No PGDIR, bail */
-
-BEGIN_MMU_FTR_SECTION
-       /* Set the TLB reservation and search for existing entry. Then load
-        * the entry.
-        */
-       PPC_TLBSRX_DOT(0,R16)
-       ldx     r14,r14,r15             /* grab pgd entry */
-       beq     tlb_miss_done_bolted    /* tlb exists already, bail */
-MMU_FTR_SECTION_ELSE
-       ldx     r14,r14,r15             /* grab pgd entry */
-ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV)
-
-#ifndef CONFIG_PPC_64K_PAGES
-       rldicl  r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3
-       clrrdi  r15,r15,3
-       cmpdi   cr0,r14,0
-       bge     tlb_miss_fault_bolted   /* Bad pgd entry or hugepage; bail */
-       ldx     r14,r14,r15             /* grab pud entry */
-#endif /* CONFIG_PPC_64K_PAGES */
-
-       rldicl  r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3
-       clrrdi  r15,r15,3
-       cmpdi   cr0,r14,0
-       bge     tlb_miss_fault_bolted
-       ldx     r14,r14,r15             /* Grab pmd entry */
-
-       rldicl  r15,r16,64-PAGE_SHIFT+3,64-PTE_INDEX_SIZE-3
-       clrrdi  r15,r15,3
-       cmpdi   cr0,r14,0
-       bge     tlb_miss_fault_bolted
-       ldx     r14,r14,r15             /* Grab PTE, normal (!huge) page */
-
-       /* Check if required permissions are met */
-       andc.   r15,r11,r14
-       rldicr  r15,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT
-       bne-    tlb_miss_fault_bolted
-
-       /* Now we build the MAS:
-        *
-        * MAS 0   :    Fully setup with defaults in MAS4 and TLBnCFG
-        * MAS 1   :    Almost fully setup
-        *               - PID already updated by caller if necessary
-        *               - TSIZE need change if !base page size, not
-        *                 yet implemented for now
-        * MAS 2   :    Defaults not useful, need to be redone
-        * MAS 3+7 :    Needs to be done
-        */
-       clrrdi  r11,r16,12              /* Clear low crap in EA */
-       clrldi  r15,r15,12              /* Clear crap at the top */
-       rlwimi  r11,r14,32-19,27,31     /* Insert WIMGE */
-       rlwimi  r15,r14,32-8,22,25      /* Move in U bits */
-       mtspr   SPRN_MAS2,r11
-       andi.   r11,r14,_PAGE_DIRTY
-       rlwimi  r15,r14,32-2,26,31      /* Move in BAP bits */
-
-       /* Mask out SW and UW if !DIRTY (XXX optimize this !) */
-       bne     1f
-       li      r11,MAS3_SW|MAS3_UW
-       andc    r15,r15,r11
-1:
-       mtspr   SPRN_MAS7_MAS3,r15
-       tlbwe
-
-tlb_miss_done_bolted:
-       TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK)
-       tlb_epilog_bolted
-       rfi
-
-itlb_miss_kernel_bolted:
-       li      r11,_PAGE_PRESENT|_PAGE_BAP_SX  /* Base perm */
-       oris    r11,r11,_PAGE_ACCESSED@h
-tlb_miss_kernel_bolted:
-       mfspr   r10,SPRN_MAS1
-       ld      r14,PACA_KERNELPGD(r13)
-       cmpldi  cr0,r15,8               /* Check for vmalloc region */
-       rlwinm  r10,r10,0,16,1          /* Clear TID */
-       mtspr   SPRN_MAS1,r10
-       beq+    tlb_miss_common_bolted
-
-tlb_miss_fault_bolted:
-       /* We need to check if it was an instruction miss */
-       andi.   r10,r11,_PAGE_EXEC|_PAGE_BAP_SX
-       bne     itlb_miss_fault_bolted
-dtlb_miss_fault_bolted:
-       TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
-       tlb_epilog_bolted
-       b       exc_data_storage_book3e
-itlb_miss_fault_bolted:
-       TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
-       tlb_epilog_bolted
-       b       exc_instruction_storage_book3e
-
-/* Instruction TLB miss */
-       START_EXCEPTION(instruction_tlb_miss_bolted)
-       tlb_prolog_bolted BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR0
-
-       rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
-       srdi    r15,r16,60              /* get region */
-       TLB_MISS_STATS_SAVE_INFO_BOLTED
-       bne-    itlb_miss_fault_bolted
-
-       li      r11,_PAGE_PRESENT|_PAGE_EXEC    /* Base perm */
-
-       /* We do the user/kernel test for the PID here along with the RW test
-        */
-
-       cmpldi  cr0,r15,0                       /* Check for user region */
-       oris    r11,r11,_PAGE_ACCESSED@h
-       beq     tlb_miss_common_bolted
-       b       itlb_miss_kernel_bolted
-
-#ifdef CONFIG_PPC_FSL_BOOK3E
-/*
- * TLB miss handling for e6500 and derivatives, using hardware tablewalk.
- *
- * Linear mapping is bolted: no virtual page table or nested TLB misses
- * Indirect entries in TLB1, hardware loads resulting direct entries
- *    into TLB0
- * No HES or NV hint on TLB1, so we need to do software round-robin
- * No tlbsrx. so we need a spinlock, and we have to deal
- *    with MAS-damage caused by tlbsx
- * 4K pages only
- */
-
-       START_EXCEPTION(instruction_tlb_miss_e6500)
-       tlb_prolog_bolted BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR0
-
-       ld      r11,PACA_TCD_PTR(r13)
-       srdi.   r15,r16,60              /* get region */
-       ori     r16,r16,1
-
-       TLB_MISS_STATS_SAVE_INFO_BOLTED
-       bne     tlb_miss_kernel_e6500   /* user/kernel test */
-
-       b       tlb_miss_common_e6500
-
-       START_EXCEPTION(data_tlb_miss_e6500)
-       tlb_prolog_bolted BOOKE_INTERRUPT_DTLB_MISS SPRN_DEAR
-
-       ld      r11,PACA_TCD_PTR(r13)
-       srdi.   r15,r16,60              /* get region */
-       rldicr  r16,r16,0,62
-
-       TLB_MISS_STATS_SAVE_INFO_BOLTED
-       bne     tlb_miss_kernel_e6500   /* user vs kernel check */
-
-/*
- * This is the guts of the TLB miss handler for e6500 and derivatives.
- * We are entered with:
- *
- * r16 = page of faulting address (low bit 0 if data, 1 if instruction)
- * r15 = crap (free to use)
- * r14 = page table base
- * r13 = PACA
- * r11 = tlb_per_core ptr
- * r10 = crap (free to use)
- * r7  = esel_next
- */
-tlb_miss_common_e6500:
-       crmove  cr2*4+2,cr0*4+2         /* cr2.eq != 0 if kernel address */
-
-BEGIN_FTR_SECTION              /* CPU_FTR_SMT */
-       /*
-        * Search if we already have an indirect entry for that virtual
-        * address, and if we do, bail out.
-        *
-        * MAS6:IND should be already set based on MAS4
-        */
-       lhz     r10,PACAPACAINDEX(r13)
-       addi    r10,r10,1
-       crclr   cr1*4+eq        /* set cr1.eq = 0 for non-recursive */
-1:     lbarx   r15,0,r11
-       cmpdi   r15,0
-       bne     2f
-       stbcx.  r10,0,r11
-       bne     1b
-3:
-       .subsection 1
-2:     cmpd    cr1,r15,r10     /* recursive lock due to mcheck/crit/etc? */
-       beq     cr1,3b          /* unlock will happen if cr1.eq = 0 */
-10:    lbz     r15,0(r11)
-       cmpdi   r15,0
-       bne     10b
-       b       1b
-       .previous
-END_FTR_SECTION_IFSET(CPU_FTR_SMT)
-
-       lbz     r7,TCD_ESEL_NEXT(r11)
-
-BEGIN_FTR_SECTION              /* CPU_FTR_SMT */
-       /*
-        * Erratum A-008139 says that we can't use tlbwe to change
-        * an indirect entry in any way (including replacing or
-        * invalidating) if the other thread could be in the process
-        * of a lookup.  The workaround is to invalidate the entry
-        * with tlbilx before overwriting.
-        */
-
-       rlwinm  r10,r7,16,0xff0000
-       oris    r10,r10,MAS0_TLBSEL(1)@h
-       mtspr   SPRN_MAS0,r10
-       isync
-       tlbre
-       mfspr   r15,SPRN_MAS1
-       andis.  r15,r15,MAS1_VALID@h
-       beq     5f
-
-BEGIN_FTR_SECTION_NESTED(532)
-       mfspr   r10,SPRN_MAS8
-       rlwinm  r10,r10,0,0x80000fff  /* tgs,tlpid -> sgs,slpid */
-       mtspr   SPRN_MAS5,r10
-END_FTR_SECTION_NESTED(CPU_FTR_EMB_HV,CPU_FTR_EMB_HV,532)
-
-       mfspr   r10,SPRN_MAS1
-       rlwinm  r15,r10,0,0x3fff0000  /* tid -> spid */
-       rlwimi  r15,r10,20,0x00000003 /* ind,ts -> sind,sas */
-       mfspr   r10,SPRN_MAS6
-       mtspr   SPRN_MAS6,r15
-
-       mfspr   r15,SPRN_MAS2
-       isync
-       tlbilxva 0,r15
-       isync
-
-       mtspr   SPRN_MAS6,r10
-
-5:
-BEGIN_FTR_SECTION_NESTED(532)
-       li      r10,0
-       mtspr   SPRN_MAS8,r10
-       mtspr   SPRN_MAS5,r10
-END_FTR_SECTION_NESTED(CPU_FTR_EMB_HV,CPU_FTR_EMB_HV,532)
-
-       tlbsx   0,r16
-       mfspr   r10,SPRN_MAS1
-       andis.  r15,r10,MAS1_VALID@h
-       bne     tlb_miss_done_e6500
-FTR_SECTION_ELSE
-       mfspr   r10,SPRN_MAS1
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT)
-
-       oris    r10,r10,MAS1_VALID@h
-       beq     cr2,4f
-       rlwinm  r10,r10,0,16,1          /* Clear TID */
-4:     mtspr   SPRN_MAS1,r10
-
-       /* Now, we need to walk the page tables. First check if we are in
-        * range.
-        */
-       rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
-       bne-    tlb_miss_fault_e6500
-
-       rldicl  r15,r16,64-PGDIR_SHIFT+3,64-PGD_INDEX_SIZE-3
-       cmpldi  cr0,r14,0
-       clrrdi  r15,r15,3
-       beq-    tlb_miss_fault_e6500 /* No PGDIR, bail */
-       ldx     r14,r14,r15             /* grab pgd entry */
-
-       rldicl  r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3
-       clrrdi  r15,r15,3
-       cmpdi   cr0,r14,0
-       bge     tlb_miss_huge_e6500     /* Bad pgd entry or hugepage; bail */
-       ldx     r14,r14,r15             /* grab pud entry */
-
-       rldicl  r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3
-       clrrdi  r15,r15,3
-       cmpdi   cr0,r14,0
-       bge     tlb_miss_huge_e6500
-       ldx     r14,r14,r15             /* Grab pmd entry */
-
-       mfspr   r10,SPRN_MAS0
-       cmpdi   cr0,r14,0
-       bge     tlb_miss_huge_e6500
-
-       /* Now we build the MAS for a 2M indirect page:
-        *
-        * MAS 0   :    ESEL needs to be filled by software round-robin
-        * MAS 1   :    Fully set up
-        *               - PID already updated by caller if necessary
-        *               - TSIZE for now is base ind page size always
-        *               - TID already cleared if necessary
-        * MAS 2   :    Default not 2M-aligned, need to be redone
-        * MAS 3+7 :    Needs to be done
-        */
-
-       ori     r14,r14,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT)
-       mtspr   SPRN_MAS7_MAS3,r14
-
-       clrrdi  r15,r16,21              /* make EA 2M-aligned */
-       mtspr   SPRN_MAS2,r15
-
-tlb_miss_huge_done_e6500:
-       lbz     r16,TCD_ESEL_MAX(r11)
-       lbz     r14,TCD_ESEL_FIRST(r11)
-       rlwimi  r10,r7,16,0x00ff0000    /* insert esel_next into MAS0 */
-       addi    r7,r7,1                 /* increment esel_next */
-       mtspr   SPRN_MAS0,r10
-       cmpw    r7,r16
-       iseleq  r7,r14,r7               /* if next == last use first */
-       stb     r7,TCD_ESEL_NEXT(r11)
-
-       tlbwe
-
-tlb_miss_done_e6500:
-       .macro  tlb_unlock_e6500
-BEGIN_FTR_SECTION
-       beq     cr1,1f          /* no unlock if lock was recursively grabbed */
-       li      r15,0
-       isync
-       stb     r15,0(r11)
-1:
-END_FTR_SECTION_IFSET(CPU_FTR_SMT)
-       .endm
-
-       tlb_unlock_e6500
-       TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK)
-       tlb_epilog_bolted
-       rfi
-
-tlb_miss_huge_e6500:
-       beq     tlb_miss_fault_e6500
-       li      r10,1
-       andi.   r15,r14,HUGEPD_SHIFT_MASK@l /* r15 = psize */
-       rldimi  r14,r10,63,0            /* Set PD_HUGE */
-       xor     r14,r14,r15             /* Clear size bits */
-       ldx     r14,0,r14
-
-       /*
-        * Now we build the MAS for a huge page.
-        *
-        * MAS 0   :    ESEL needs to be filled by software round-robin
-        *               - can be handled by indirect code
-        * MAS 1   :    Need to clear IND and set TSIZE
-        * MAS 2,3+7:   Needs to be redone similar to non-tablewalk handler
-        */
-
-       subi    r15,r15,10              /* Convert psize to tsize */
-       mfspr   r10,SPRN_MAS1
-       rlwinm  r10,r10,0,~MAS1_IND
-       rlwimi  r10,r15,MAS1_TSIZE_SHIFT,MAS1_TSIZE_MASK
-       mtspr   SPRN_MAS1,r10
-
-       li      r10,-0x400
-       sld     r15,r10,r15             /* Generate mask based on size */
-       and     r10,r16,r15
-       rldicr  r15,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT
-       rlwimi  r10,r14,32-19,27,31     /* Insert WIMGE */
-       clrldi  r15,r15,PAGE_SHIFT      /* Clear crap at the top */
-       rlwimi  r15,r14,32-8,22,25      /* Move in U bits */
-       mtspr   SPRN_MAS2,r10
-       andi.   r10,r14,_PAGE_DIRTY
-       rlwimi  r15,r14,32-2,26,31      /* Move in BAP bits */
-
-       /* Mask out SW and UW if !DIRTY (XXX optimize this !) */
-       bne     1f
-       li      r10,MAS3_SW|MAS3_UW
-       andc    r15,r15,r10
-1:
-       mtspr   SPRN_MAS7_MAS3,r15
-
-       mfspr   r10,SPRN_MAS0
-       b       tlb_miss_huge_done_e6500
-
-tlb_miss_kernel_e6500:
-       ld      r14,PACA_KERNELPGD(r13)
-       cmpldi  cr1,r15,8               /* Check for vmalloc region */
-       beq+    cr1,tlb_miss_common_e6500
-
-tlb_miss_fault_e6500:
-       tlb_unlock_e6500
-       /* We need to check if it was an instruction miss */
-       andi.   r16,r16,1
-       bne     itlb_miss_fault_e6500
-dtlb_miss_fault_e6500:
-       TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
-       tlb_epilog_bolted
-       b       exc_data_storage_book3e
-itlb_miss_fault_e6500:
-       TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
-       tlb_epilog_bolted
-       b       exc_instruction_storage_book3e
-#endif /* CONFIG_PPC_FSL_BOOK3E */
-
-/**********************************************************************
- *                                                                    *
- * TLB miss handling for Book3E with TLB reservation and HES support  *
- *                                                                    *
- **********************************************************************/
-
-
-/* Data TLB miss */
-       START_EXCEPTION(data_tlb_miss)
-       TLB_MISS_PROLOG
-
-       /* Now we handle the fault proper. We only save DEAR in normal
-        * fault case since that's the only interesting values here.
-        * We could probably also optimize by not saving SRR0/1 in the
-        * linear mapping case but I'll leave that for later
-        */
-       mfspr   r14,SPRN_ESR
-       mfspr   r16,SPRN_DEAR           /* get faulting address */
-       srdi    r15,r16,60              /* get region */
-       cmpldi  cr0,r15,0xc             /* linear mapping ? */
-       TLB_MISS_STATS_SAVE_INFO
-       beq     tlb_load_linear         /* yes -> go to linear map load */
-
-       /* The page tables are mapped virtually linear. At this point, though,
-        * we don't know whether we are trying to fault in a first level
-        * virtual address or a virtual page table address. We can get that
-        * from bit 0x1 of the region ID which we have set for a page table
-        */
-       andi.   r10,r15,0x1
-       bne-    virt_page_table_tlb_miss
-
-       std     r14,EX_TLB_ESR(r12);    /* save ESR */
-       std     r16,EX_TLB_DEAR(r12);   /* save DEAR */
-
-        /* We need _PAGE_PRESENT and  _PAGE_ACCESSED set */
-       li      r11,_PAGE_PRESENT
-       oris    r11,r11,_PAGE_ACCESSED@h
-
-       /* We do the user/kernel test for the PID here along with the RW test
-        */
-       cmpldi  cr0,r15,0               /* Check for user region */
-
-       /* We pre-test some combination of permissions to avoid double
-        * faults:
-        *
-        * We move the ESR:ST bit into the position of _PAGE_BAP_SW in the PTE
-        * ESR_ST   is 0x00800000
-        * _PAGE_BAP_SW is 0x00000010
-        * So the shift is >> 19. This tests for supervisor writeability.
-        * If the page happens to be supervisor writeable and not user
-        * writeable, we will take a new fault later, but that should be
-        * a rare enough case.
-        *
-        * We also move ESR_ST in _PAGE_DIRTY position
-        * _PAGE_DIRTY is 0x00001000 so the shift is >> 11
-        *
-        * MAS1 is preset for all we need except for TID that needs to
-        * be cleared for kernel translations
-        */
-       rlwimi  r11,r14,32-19,27,27
-       rlwimi  r11,r14,32-16,19,19
-       beq     normal_tlb_miss
-       /* XXX replace the RMW cycles with immediate loads + writes */
-1:     mfspr   r10,SPRN_MAS1
-       cmpldi  cr0,r15,8               /* Check for vmalloc region */
-       rlwinm  r10,r10,0,16,1          /* Clear TID */
-       mtspr   SPRN_MAS1,r10
-       beq+    normal_tlb_miss
-
-       /* We got a crappy address, just fault with whatever DEAR and ESR
-        * are here
-        */
-       TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
-       TLB_MISS_EPILOG_ERROR
-       b       exc_data_storage_book3e
-
-/* Instruction TLB miss */
-       START_EXCEPTION(instruction_tlb_miss)
-       TLB_MISS_PROLOG
-
-       /* If we take a recursive fault, the second level handler may need
-        * to know whether we are handling a data or instruction fault in
-        * order to get to the right store fault handler. We provide that
-        * info by writing a crazy value in ESR in our exception frame
-        */
-       li      r14,-1  /* store to exception frame is done later */
-
-       /* Now we handle the fault proper. We only save DEAR in the non
-        * linear mapping case since we know the linear mapping case will
-        * not re-enter. We could indeed optimize and also not save SRR0/1
-        * in the linear mapping case but I'll leave that for later
-        *
-        * Faulting address is SRR0 which is already in r16
-        */
-       srdi    r15,r16,60              /* get region */
-       cmpldi  cr0,r15,0xc             /* linear mapping ? */
-       TLB_MISS_STATS_SAVE_INFO
-       beq     tlb_load_linear         /* yes -> go to linear map load */
-
-       /* We do the user/kernel test for the PID here along with the RW test
-        */
-       li      r11,_PAGE_PRESENT|_PAGE_EXEC    /* Base perm */
-       oris    r11,r11,_PAGE_ACCESSED@h
-
-       cmpldi  cr0,r15,0                       /* Check for user region */
-       std     r14,EX_TLB_ESR(r12)             /* write crazy -1 to frame */
-       beq     normal_tlb_miss
-
-       li      r11,_PAGE_PRESENT|_PAGE_BAP_SX  /* Base perm */
-       oris    r11,r11,_PAGE_ACCESSED@h
-       /* XXX replace the RMW cycles with immediate loads + writes */
-       mfspr   r10,SPRN_MAS1
-       cmpldi  cr0,r15,8                       /* Check for vmalloc region */
-       rlwinm  r10,r10,0,16,1                  /* Clear TID */
-       mtspr   SPRN_MAS1,r10
-       beq+    normal_tlb_miss
-
-       /* We got a crappy address, just fault */
-       TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
-       TLB_MISS_EPILOG_ERROR
-       b       exc_instruction_storage_book3e
-
-/*
- * This is the guts of the first-level TLB miss handler for direct
- * misses. We are entered with:
- *
- * r16 = faulting address
- * r15 = region ID
- * r14 = crap (free to use)
- * r13 = PACA
- * r12 = TLB exception frame in PACA
- * r11 = PTE permission mask
- * r10 = crap (free to use)
- */
-normal_tlb_miss:
-       /* So we first construct the page table address. We do that by
-        * shifting the bottom of the address (not the region ID) by
-        * PAGE_SHIFT-3, clearing the bottom 3 bits (get a PTE ptr) and
-        * or'ing the fourth high bit.
-        *
-        * NOTE: For 64K pages, we do things slightly differently in
-        * order to handle the weird page table format used by linux
-        */
-       ori     r10,r15,0x1
-#ifdef CONFIG_PPC_64K_PAGES
-       /* For the top bits, 16 bytes per PTE */
-       rldicl  r14,r16,64-(PAGE_SHIFT-4),PAGE_SHIFT-4+4
-       /* Now create the bottom bits as 0 in position 0x8000 and
-        * the rest calculated for 8 bytes per PTE
-        */
-       rldicl  r15,r16,64-(PAGE_SHIFT-3),64-15
-       /* Insert the bottom bits in */
-       rlwimi  r14,r15,0,16,31
-#else
-       rldicl  r14,r16,64-(PAGE_SHIFT-3),PAGE_SHIFT-3+4
-#endif
-       sldi    r15,r10,60
-       clrrdi  r14,r14,3
-       or      r10,r15,r14
-
-BEGIN_MMU_FTR_SECTION
-       /* Set the TLB reservation and search for existing entry. Then load
-        * the entry.
-        */
-       PPC_TLBSRX_DOT(0,R16)
-       ld      r14,0(r10)
-       beq     normal_tlb_miss_done
-MMU_FTR_SECTION_ELSE
-       ld      r14,0(r10)
-ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV)
-
-finish_normal_tlb_miss:
-       /* Check if required permissions are met */
-       andc.   r15,r11,r14
-       bne-    normal_tlb_miss_access_fault
-
-       /* Now we build the MAS:
-        *
-        * MAS 0   :    Fully setup with defaults in MAS4 and TLBnCFG
-        * MAS 1   :    Almost fully setup
-        *               - PID already updated by caller if necessary
-        *               - TSIZE need change if !base page size, not
-        *                 yet implemented for now
-        * MAS 2   :    Defaults not useful, need to be redone
-        * MAS 3+7 :    Needs to be done
-        *
-        * TODO: mix up code below for better scheduling
-        */
-       clrrdi  r11,r16,12              /* Clear low crap in EA */
-       rlwimi  r11,r14,32-19,27,31     /* Insert WIMGE */
-       mtspr   SPRN_MAS2,r11
-
-       /* Check page size, if not standard, update MAS1 */
-       rldicl  r11,r14,64-8,64-8
-#ifdef CONFIG_PPC_64K_PAGES
-       cmpldi  cr0,r11,BOOK3E_PAGESZ_64K
-#else
-       cmpldi  cr0,r11,BOOK3E_PAGESZ_4K
-#endif
-       beq-    1f
-       mfspr   r11,SPRN_MAS1
-       rlwimi  r11,r14,31,21,24
-       rlwinm  r11,r11,0,21,19
-       mtspr   SPRN_MAS1,r11
-1:
-       /* Move RPN in position */
-       rldicr  r11,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT
-       clrldi  r15,r11,12              /* Clear crap at the top */
-       rlwimi  r15,r14,32-8,22,25      /* Move in U bits */
-       rlwimi  r15,r14,32-2,26,31      /* Move in BAP bits */
-
-       /* Mask out SW and UW if !DIRTY (XXX optimize this !) */
-       andi.   r11,r14,_PAGE_DIRTY
-       bne     1f
-       li      r11,MAS3_SW|MAS3_UW
-       andc    r15,r15,r11
-1:
-BEGIN_MMU_FTR_SECTION
-       srdi    r16,r15,32
-       mtspr   SPRN_MAS3,r15
-       mtspr   SPRN_MAS7,r16
-MMU_FTR_SECTION_ELSE
-       mtspr   SPRN_MAS7_MAS3,r15
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS)
-
-       tlbwe
-
-normal_tlb_miss_done:
-       /* We don't bother with restoring DEAR or ESR since we know we are
-        * level 0 and just going back to userland. They are only needed
-        * if you are going to take an access fault
-        */
-       TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK)
-       TLB_MISS_EPILOG_SUCCESS
-       rfi
-
-normal_tlb_miss_access_fault:
-       /* We need to check if it was an instruction miss */
-       andi.   r10,r11,_PAGE_EXEC
-       bne     1f
-       ld      r14,EX_TLB_DEAR(r12)
-       ld      r15,EX_TLB_ESR(r12)
-       mtspr   SPRN_DEAR,r14
-       mtspr   SPRN_ESR,r15
-       TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
-       TLB_MISS_EPILOG_ERROR
-       b       exc_data_storage_book3e
-1:     TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
-       TLB_MISS_EPILOG_ERROR
-       b       exc_instruction_storage_book3e
-
-
-/*
- * This is the guts of the second-level TLB miss handler for direct
- * misses. We are entered with:
- *
- * r16 = virtual page table faulting address
- * r15 = region (top 4 bits of address)
- * r14 = crap (free to use)
- * r13 = PACA
- * r12 = TLB exception frame in PACA
- * r11 = crap (free to use)
- * r10 = crap (free to use)
- *
- * Note that this should only ever be called as a second level handler
- * with the current scheme when using SW load.
- * That means we can always get the original fault DEAR at
- * EX_TLB_DEAR-EX_TLB_SIZE(r12)
- *
- * It can be re-entered by the linear mapping miss handler. However, to
- * avoid too much complication, it will restart the whole fault at level
- * 0 so we don't care too much about clobbers
- *
- * XXX That code was written back when we couldn't clobber r14. We can now,
- * so we could probably optimize things a bit
- */
-virt_page_table_tlb_miss:
-       /* Are we hitting a kernel page table ? */
-       andi.   r10,r15,0x8
-
-       /* The cool thing now is that r10 contains 0 for user and 8 for kernel,
-        * and we happen to have the swapper_pg_dir at offset 8 from the user
-        * pgdir in the PACA :-).
-        */
-       add     r11,r10,r13
-
-       /* If kernel, we need to clear MAS1 TID */
-       beq     1f
-       /* XXX replace the RMW cycles with immediate loads + writes */
-       mfspr   r10,SPRN_MAS1
-       rlwinm  r10,r10,0,16,1                  /* Clear TID */
-       mtspr   SPRN_MAS1,r10
-1:
-BEGIN_MMU_FTR_SECTION
-       /* Search if we already have a TLB entry for that virtual address, and
-        * if we do, bail out.
-        */
-       PPC_TLBSRX_DOT(0,R16)
-       beq     virt_page_table_tlb_miss_done
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV)
-
-       /* Now, we need to walk the page tables. First check if we are in
-        * range.
-        */
-       rldicl. r10,r16,64-(VPTE_INDEX_SIZE+3),VPTE_INDEX_SIZE+3+4
-       bne-    virt_page_table_tlb_miss_fault
-
-       /* Get the PGD pointer */
-       ld      r15,PACAPGD(r11)
-       cmpldi  cr0,r15,0
-       beq-    virt_page_table_tlb_miss_fault
-
-       /* Get to PGD entry */
-       rldicl  r11,r16,64-VPTE_PGD_SHIFT,64-PGD_INDEX_SIZE-3
-       clrrdi  r10,r11,3
-       ldx     r15,r10,r15
-       cmpdi   cr0,r15,0
-       bge     virt_page_table_tlb_miss_fault
-
-#ifndef CONFIG_PPC_64K_PAGES
-       /* Get to PUD entry */
-       rldicl  r11,r16,64-VPTE_PUD_SHIFT,64-PUD_INDEX_SIZE-3
-       clrrdi  r10,r11,3
-       ldx     r15,r10,r15
-       cmpdi   cr0,r15,0
-       bge     virt_page_table_tlb_miss_fault
-#endif /* CONFIG_PPC_64K_PAGES */
-
-       /* Get to PMD entry */
-       rldicl  r11,r16,64-VPTE_PMD_SHIFT,64-PMD_INDEX_SIZE-3
-       clrrdi  r10,r11,3
-       ldx     r15,r10,r15
-       cmpdi   cr0,r15,0
-       bge     virt_page_table_tlb_miss_fault
-
-       /* Ok, we're all right, we can now create a kernel translation for
-        * a 4K or 64K page from r16 -> r15.
-        */
-       /* Now we build the MAS:
-        *
-        * MAS 0   :    Fully setup with defaults in MAS4 and TLBnCFG
-        * MAS 1   :    Almost fully setup
-        *               - PID already updated by caller if necessary
-        *               - TSIZE for now is base page size always
-        * MAS 2   :    Use defaults
-        * MAS 3+7 :    Needs to be done
-        *
-        * So we only do MAS 2 and 3 for now...
-        */
-       clrldi  r11,r15,4               /* remove region ID from RPN */
-       ori     r10,r11,1               /* Or-in SR */
-
-BEGIN_MMU_FTR_SECTION
-       srdi    r16,r10,32
-       mtspr   SPRN_MAS3,r10
-       mtspr   SPRN_MAS7,r16
-MMU_FTR_SECTION_ELSE
-       mtspr   SPRN_MAS7_MAS3,r10
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS)
-
-       tlbwe
-
-BEGIN_MMU_FTR_SECTION
-virt_page_table_tlb_miss_done:
-
-       /* We have overridden MAS2:EPN but currently our primary TLB miss
-        * handler will always restore it so that should not be an issue,
-        * if we ever optimize the primary handler to not write MAS2 on
-        * some cases, we'll have to restore MAS2:EPN here based on the
-        * original fault's DEAR. If we do that we have to modify the
-        * ITLB miss handler to also store SRR0 in the exception frame
-        * as DEAR.
-        *
-        * However, one nasty thing we did is we cleared the reservation
-        * (well, potentially we did). We do a trick here thus if we
-        * are not a level 0 exception (we interrupted the TLB miss) we
-        * offset the return address by -4 in order to replay the tlbsrx
-        * instruction there
-        */
-       subf    r10,r13,r12
-       cmpldi  cr0,r10,PACA_EXTLB+EX_TLB_SIZE
-       bne-    1f
-       ld      r11,PACA_EXTLB+EX_TLB_SIZE+EX_TLB_SRR0(r13)
-       addi    r10,r11,-4
-       std     r10,PACA_EXTLB+EX_TLB_SIZE+EX_TLB_SRR0(r13)
-1:
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV)
-       /* Return to caller, normal case */
-       TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_OK);
-       TLB_MISS_EPILOG_SUCCESS
-       rfi
-
-virt_page_table_tlb_miss_fault:
-       /* If we fault here, things are a little bit tricky. We need to call
-        * either data or instruction store fault, and we need to retrieve
-        * the original fault address and ESR (for data).
-        *
-        * The thing is, we know that in normal circumstances, this is
-        * always called as a second level tlb miss for SW load or as a first
-        * level TLB miss for HW load, so we should be able to peek at the
-        * relevant information in the first exception frame in the PACA.
-        *
-        * However, we do need to double check that, because we may just hit
-        * a stray kernel pointer or a userland attack trying to hit those
-        * areas. If that is the case, we do a data fault. (We can't get here
-        * from an instruction tlb miss anyway).
-        *
-        * Note also that when going to a fault, we must unwind the previous
-        * level as well. Since we are doing that, we don't need to clear or
-        * restore the TLB reservation neither.
-        */
-       subf    r10,r13,r12
-       cmpldi  cr0,r10,PACA_EXTLB+EX_TLB_SIZE
-       bne-    virt_page_table_tlb_miss_whacko_fault
-
-       /* We dig the original DEAR and ESR from slot 0 */
-       ld      r15,EX_TLB_DEAR+PACA_EXTLB(r13)
-       ld      r16,EX_TLB_ESR+PACA_EXTLB(r13)
-
-       /* We check for the "special" ESR value for instruction faults */
-       cmpdi   cr0,r16,-1
-       beq     1f
-       mtspr   SPRN_DEAR,r15
-       mtspr   SPRN_ESR,r16
-       TLB_MISS_STATS_D(MMSTAT_TLB_MISS_PT_FAULT);
-       TLB_MISS_EPILOG_ERROR
-       b       exc_data_storage_book3e
-1:     TLB_MISS_STATS_I(MMSTAT_TLB_MISS_PT_FAULT);
-       TLB_MISS_EPILOG_ERROR
-       b       exc_instruction_storage_book3e
-
-virt_page_table_tlb_miss_whacko_fault:
-       /* The linear fault will restart everything so ESR and DEAR will
-        * not have been clobbered, let's just fault with what we have
-        */
-       TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_FAULT);
-       TLB_MISS_EPILOG_ERROR
-       b       exc_data_storage_book3e
-
-
-/**************************************************************
- *                                                            *
- * TLB miss handling for Book3E with hw page table support    *
- *                                                            *
- **************************************************************/
-
-
-/* Data TLB miss */
-       START_EXCEPTION(data_tlb_miss_htw)
-       TLB_MISS_PROLOG
-
-       /* Now we handle the fault proper. We only save DEAR in normal
-        * fault case since that's the only interesting values here.
-        * We could probably also optimize by not saving SRR0/1 in the
-        * linear mapping case but I'll leave that for later
-        */
-       mfspr   r14,SPRN_ESR
-       mfspr   r16,SPRN_DEAR           /* get faulting address */
-       srdi    r11,r16,60              /* get region */
-       cmpldi  cr0,r11,0xc             /* linear mapping ? */
-       TLB_MISS_STATS_SAVE_INFO
-       beq     tlb_load_linear         /* yes -> go to linear map load */
-
-       /* We do the user/kernel test for the PID here along with the RW test
-        */
-       cmpldi  cr0,r11,0               /* Check for user region */
-       ld      r15,PACAPGD(r13)        /* Load user pgdir */
-       beq     htw_tlb_miss
-
-       /* XXX replace the RMW cycles with immediate loads + writes */
-1:     mfspr   r10,SPRN_MAS1
-       cmpldi  cr0,r11,8               /* Check for vmalloc region */
-       rlwinm  r10,r10,0,16,1          /* Clear TID */
-       mtspr   SPRN_MAS1,r10
-       ld      r15,PACA_KERNELPGD(r13) /* Load kernel pgdir */
-       beq+    htw_tlb_miss
-
-       /* We got a crappy address, just fault with whatever DEAR and ESR
-        * are here
-        */
-       TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
-       TLB_MISS_EPILOG_ERROR
-       b       exc_data_storage_book3e
-
-/* Instruction TLB miss */
-       START_EXCEPTION(instruction_tlb_miss_htw)
-       TLB_MISS_PROLOG
-
-       /* If we take a recursive fault, the second level handler may need
-        * to know whether we are handling a data or instruction fault in
-        * order to get to the right store fault handler. We provide that
-        * info by keeping a crazy value for ESR in r14
-        */
-       li      r14,-1  /* store to exception frame is done later */
-
-       /* Now we handle the fault proper. We only save DEAR in the non
-        * linear mapping case since we know the linear mapping case will
-        * not re-enter. We could indeed optimize and also not save SRR0/1
-        * in the linear mapping case but I'll leave that for later
-        *
-        * Faulting address is SRR0 which is already in r16
-        */
-       srdi    r11,r16,60              /* get region */
-       cmpldi  cr0,r11,0xc             /* linear mapping ? */
-       TLB_MISS_STATS_SAVE_INFO
-       beq     tlb_load_linear         /* yes -> go to linear map load */
-
-       /* We do the user/kernel test for the PID here along with the RW test
-        */
-       cmpldi  cr0,r11,0                       /* Check for user region */
-       ld      r15,PACAPGD(r13)                /* Load user pgdir */
-       beq     htw_tlb_miss
-
-       /* XXX replace the RMW cycles with immediate loads + writes */
-1:     mfspr   r10,SPRN_MAS1
-       cmpldi  cr0,r11,8                       /* Check for vmalloc region */
-       rlwinm  r10,r10,0,16,1                  /* Clear TID */
-       mtspr   SPRN_MAS1,r10
-       ld      r15,PACA_KERNELPGD(r13)         /* Load kernel pgdir */
-       beq+    htw_tlb_miss
-
-       /* We got a crappy address, just fault */
-       TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
-       TLB_MISS_EPILOG_ERROR
-       b       exc_instruction_storage_book3e
-
-
-/*
- * This is the guts of the second-level TLB miss handler for direct
- * misses. We are entered with:
- *
- * r16 = virtual page table faulting address
- * r15 = PGD pointer
- * r14 = ESR
- * r13 = PACA
- * r12 = TLB exception frame in PACA
- * r11 = crap (free to use)
- * r10 = crap (free to use)
- *
- * It can be re-entered by the linear mapping miss handler. However, to
- * avoid too much complication, it will save/restore things for us
- */
-htw_tlb_miss:
-       /* Search if we already have a TLB entry for that virtual address, and
-        * if we do, bail out.
-        *
-        * MAS1:IND should be already set based on MAS4
-        */
-       PPC_TLBSRX_DOT(0,R16)
-       beq     htw_tlb_miss_done
-
-       /* Now, we need to walk the page tables. First check if we are in
-        * range.
-        */
-       rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
-       bne-    htw_tlb_miss_fault
-
-       /* Get the PGD pointer */
-       cmpldi  cr0,r15,0
-       beq-    htw_tlb_miss_fault
-
-       /* Get to PGD entry */
-       rldicl  r11,r16,64-(PGDIR_SHIFT-3),64-PGD_INDEX_SIZE-3
-       clrrdi  r10,r11,3
-       ldx     r15,r10,r15
-       cmpdi   cr0,r15,0
-       bge     htw_tlb_miss_fault
-
-#ifndef CONFIG_PPC_64K_PAGES
-       /* Get to PUD entry */
-       rldicl  r11,r16,64-(PUD_SHIFT-3),64-PUD_INDEX_SIZE-3
-       clrrdi  r10,r11,3
-       ldx     r15,r10,r15
-       cmpdi   cr0,r15,0
-       bge     htw_tlb_miss_fault
-#endif /* CONFIG_PPC_64K_PAGES */
-
-       /* Get to PMD entry */
-       rldicl  r11,r16,64-(PMD_SHIFT-3),64-PMD_INDEX_SIZE-3
-       clrrdi  r10,r11,3
-       ldx     r15,r10,r15
-       cmpdi   cr0,r15,0
-       bge     htw_tlb_miss_fault
-
-       /* Ok, we're all right, we can now create an indirect entry for
-        * a 1M or 256M page.
-        *
-        * The last trick is now that because we use "half" pages for
-        * the HTW (1M IND is 2K and 256M IND is 32K) we need to account
-        * for an added LSB bit to the RPN. For 64K pages, there is no
-        * problem as we already use 32K arrays (half PTE pages), but for
-        * 4K page we need to extract a bit from the virtual address and
-        * insert it into the "PA52" bit of the RPN.
-        */
-#ifndef CONFIG_PPC_64K_PAGES
-       rlwimi  r15,r16,32-9,20,20
-#endif
-       /* Now we build the MAS:
-        *
-        * MAS 0   :    Fully setup with defaults in MAS4 and TLBnCFG
-        * MAS 1   :    Almost fully setup
-        *               - PID already updated by caller if necessary
-        *               - TSIZE for now is base ind page size always
-        * MAS 2   :    Use defaults
-        * MAS 3+7 :    Needs to be done
-        */
-#ifdef CONFIG_PPC_64K_PAGES
-       ori     r10,r15,(BOOK3E_PAGESZ_64K << MAS3_SPSIZE_SHIFT)
-#else
-       ori     r10,r15,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT)
-#endif
-
-BEGIN_MMU_FTR_SECTION
-       srdi    r16,r10,32
-       mtspr   SPRN_MAS3,r10
-       mtspr   SPRN_MAS7,r16
-MMU_FTR_SECTION_ELSE
-       mtspr   SPRN_MAS7_MAS3,r10
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS)
-
-       tlbwe
-
-htw_tlb_miss_done:
-       /* We don't bother with restoring DEAR or ESR since we know we are
-        * level 0 and just going back to userland. They are only needed
-        * if you are going to take an access fault
-        */
-       TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_OK)
-       TLB_MISS_EPILOG_SUCCESS
-       rfi
-
-htw_tlb_miss_fault:
-       /* We need to check if it was an instruction miss. We know this
-        * though because r14 would contain -1
-        */
-       cmpdi   cr0,r14,-1
-       beq     1f
-       mtspr   SPRN_DEAR,r16
-       mtspr   SPRN_ESR,r14
-       TLB_MISS_STATS_D(MMSTAT_TLB_MISS_PT_FAULT)
-       TLB_MISS_EPILOG_ERROR
-       b       exc_data_storage_book3e
-1:     TLB_MISS_STATS_I(MMSTAT_TLB_MISS_PT_FAULT)
-       TLB_MISS_EPILOG_ERROR
-       b       exc_instruction_storage_book3e
-
-/*
- * This is the guts of "any" level TLB miss handler for kernel linear
- * mapping misses. We are entered with:
- *
- *
- * r16 = faulting address
- * r15 = crap (free to use)
- * r14 = ESR (data) or -1 (instruction)
- * r13 = PACA
- * r12 = TLB exception frame in PACA
- * r11 = crap (free to use)
- * r10 = crap (free to use)
- *
- * In addition we know that we will not re-enter, so in theory, we could
- * use a simpler epilog not restoring SRR0/1 etc.. but we'll do that later.
- *
- * We also need to be careful about MAS registers here & TLB reservation,
- * as we know we'll have clobbered them if we interrupt the main TLB miss
- * handlers in which case we probably want to do a full restart at level
- * 0 rather than saving / restoring the MAS.
- *
- * Note: If we care about performance of that core, we can easily shuffle
- *       a few things around
- */
-tlb_load_linear:
-       /* For now, we assume the linear mapping is contiguous and stops at
-        * linear_map_top. We also assume the size is a multiple of 1G, thus
-        * we only use 1G pages for now. That might have to be changed in a
-        * final implementation, especially when dealing with hypervisors
-        */
-       ld      r11,PACATOC(r13)
-       ld      r11,linear_map_top@got(r11)
-       ld      r10,0(r11)
-       tovirt(10,10)
-       cmpld   cr0,r16,r10
-       bge     tlb_load_linear_fault
-
-       /* MAS1 need whole new setup. */
-       li      r15,(BOOK3E_PAGESZ_1GB<<MAS1_TSIZE_SHIFT)
-       oris    r15,r15,MAS1_VALID@h    /* MAS1 needs V and TSIZE */
-       mtspr   SPRN_MAS1,r15
-
-       /* Already somebody there ? */
-       PPC_TLBSRX_DOT(0,R16)
-       beq     tlb_load_linear_done
-
-       /* Now we build the remaining MAS. MAS0 and 2 should be fine
-        * with their defaults, which leaves us with MAS 3 and 7. The
-        * mapping is linear, so we just take the address, clear the
-        * region bits, and or in the permission bits which are currently
-        * hard wired
-        */
-       clrrdi  r10,r16,30              /* 1G page index */
-       clrldi  r10,r10,4               /* clear region bits */
-       ori     r10,r10,MAS3_SR|MAS3_SW|MAS3_SX
-
-BEGIN_MMU_FTR_SECTION
-       srdi    r16,r10,32
-       mtspr   SPRN_MAS3,r10
-       mtspr   SPRN_MAS7,r16
-MMU_FTR_SECTION_ELSE
-       mtspr   SPRN_MAS7_MAS3,r10
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS)
-
-       tlbwe
-
-tlb_load_linear_done:
-       /* We use the "error" epilog for success as we do want to
-        * restore to the initial faulting context, whatever it was.
-        * We do that because we can't resume a fault within a TLB
-        * miss handler, due to MAS and TLB reservation being clobbered.
-        */
-       TLB_MISS_STATS_X(MMSTAT_TLB_MISS_LINEAR)
-       TLB_MISS_EPILOG_ERROR
-       rfi
-
-tlb_load_linear_fault:
-       /* We keep the DEAR and ESR around, this shouldn't have happened */
-       cmpdi   cr0,r14,-1
-       beq     1f
-       TLB_MISS_EPILOG_ERROR_SPECIAL
-       b       exc_data_storage_book3e
-1:     TLB_MISS_EPILOG_ERROR_SPECIAL
-       b       exc_instruction_storage_book3e
-
-
-#ifdef CONFIG_BOOK3E_MMU_TLB_STATS
-.tlb_stat_inc:
-1:     ldarx   r8,0,r9
-       addi    r8,r8,1
-       stdcx.  r8,0,r9
-       bne-    1b
-       blr
-#endif
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
deleted file mode 100644 (file)
index 704e613..0000000
+++ /dev/null
@@ -1,810 +0,0 @@
-/*
- * This file contains the routines for TLB flushing.
- * On machines where the MMU does not use a hash table to store virtual to
- * physical translations (ie, SW loaded TLBs or Book3E compilant processors,
- * this does -not- include 603 however which shares the implementation with
- * hash based processors)
- *
- *  -- BenH
- *
- * Copyright 2008,2009 Ben Herrenschmidt <benh@kernel.crashing.org>
- *                     IBM Corp.
- *
- *  Derived from arch/ppc/mm/init.c:
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
- *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
- *    Copyright (C) 1996 Paul Mackerras
- *
- *  Derived from "arch/i386/mm/init.c"
- *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <linux/highmem.h>
-#include <linux/pagemap.h>
-#include <linux/preempt.h>
-#include <linux/spinlock.h>
-#include <linux/memblock.h>
-#include <linux/of_fdt.h>
-#include <linux/hugetlb.h>
-
-#include <asm/tlbflush.h>
-#include <asm/tlb.h>
-#include <asm/code-patching.h>
-#include <asm/cputhreads.h>
-#include <asm/hugetlb.h>
-#include <asm/paca.h>
-
-#include <mm/mmu_decl.h>
-
-/*
- * This struct lists the sw-supported page sizes.  The hardawre MMU may support
- * other sizes not listed here.   The .ind field is only used on MMUs that have
- * indirect page table entries.
- */
-#if defined(CONFIG_PPC_BOOK3E_MMU) || defined(CONFIG_PPC_8xx)
-#ifdef CONFIG_PPC_FSL_BOOK3E
-struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
-       [MMU_PAGE_4K] = {
-               .shift  = 12,
-               .enc    = BOOK3E_PAGESZ_4K,
-       },
-       [MMU_PAGE_2M] = {
-               .shift  = 21,
-               .enc    = BOOK3E_PAGESZ_2M,
-       },
-       [MMU_PAGE_4M] = {
-               .shift  = 22,
-               .enc    = BOOK3E_PAGESZ_4M,
-       },
-       [MMU_PAGE_16M] = {
-               .shift  = 24,
-               .enc    = BOOK3E_PAGESZ_16M,
-       },
-       [MMU_PAGE_64M] = {
-               .shift  = 26,
-               .enc    = BOOK3E_PAGESZ_64M,
-       },
-       [MMU_PAGE_256M] = {
-               .shift  = 28,
-               .enc    = BOOK3E_PAGESZ_256M,
-       },
-       [MMU_PAGE_1G] = {
-               .shift  = 30,
-               .enc    = BOOK3E_PAGESZ_1GB,
-       },
-};
-#elif defined(CONFIG_PPC_8xx)
-struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
-       /* we only manage 4k and 16k pages as normal pages */
-#ifdef CONFIG_PPC_4K_PAGES
-       [MMU_PAGE_4K] = {
-               .shift  = 12,
-       },
-#else
-       [MMU_PAGE_16K] = {
-               .shift  = 14,
-       },
-#endif
-       [MMU_PAGE_512K] = {
-               .shift  = 19,
-       },
-       [MMU_PAGE_8M] = {
-               .shift  = 23,
-       },
-};
-#else
-struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
-       [MMU_PAGE_4K] = {
-               .shift  = 12,
-               .ind    = 20,
-               .enc    = BOOK3E_PAGESZ_4K,
-       },
-       [MMU_PAGE_16K] = {
-               .shift  = 14,
-               .enc    = BOOK3E_PAGESZ_16K,
-       },
-       [MMU_PAGE_64K] = {
-               .shift  = 16,
-               .ind    = 28,
-               .enc    = BOOK3E_PAGESZ_64K,
-       },
-       [MMU_PAGE_1M] = {
-               .shift  = 20,
-               .enc    = BOOK3E_PAGESZ_1M,
-       },
-       [MMU_PAGE_16M] = {
-               .shift  = 24,
-               .ind    = 36,
-               .enc    = BOOK3E_PAGESZ_16M,
-       },
-       [MMU_PAGE_256M] = {
-               .shift  = 28,
-               .enc    = BOOK3E_PAGESZ_256M,
-       },
-       [MMU_PAGE_1G] = {
-               .shift  = 30,
-               .enc    = BOOK3E_PAGESZ_1GB,
-       },
-};
-#endif /* CONFIG_FSL_BOOKE */
-
-static inline int mmu_get_tsize(int psize)
-{
-       return mmu_psize_defs[psize].enc;
-}
-#else
-static inline int mmu_get_tsize(int psize)
-{
-       /* This isn't used on !Book3E for now */
-       return 0;
-}
-#endif /* CONFIG_PPC_BOOK3E_MMU */
-
-/* The variables below are currently only used on 64-bit Book3E
- * though this will probably be made common with other nohash
- * implementations at some point
- */
-#ifdef CONFIG_PPC64
-
-int mmu_linear_psize;          /* Page size used for the linear mapping */
-int mmu_pte_psize;             /* Page size used for PTE pages */
-int mmu_vmemmap_psize;         /* Page size used for the virtual mem map */
-int book3e_htw_mode;           /* HW tablewalk?  Value is PPC_HTW_* */
-unsigned long linear_map_top;  /* Top of linear mapping */
-
-
-/*
- * Number of bytes to add to SPRN_SPRG_TLB_EXFRAME on crit/mcheck/debug
- * exceptions.  This is used for bolted and e6500 TLB miss handlers which
- * do not modify this SPRG in the TLB miss code; for other TLB miss handlers,
- * this is set to zero.
- */
-int extlb_level_exc;
-
-#endif /* CONFIG_PPC64 */
-
-#ifdef CONFIG_PPC_FSL_BOOK3E
-/* next_tlbcam_idx is used to round-robin tlbcam entry assignment */
-DEFINE_PER_CPU(int, next_tlbcam_idx);
-EXPORT_PER_CPU_SYMBOL(next_tlbcam_idx);
-#endif
-
-/*
- * Base TLB flushing operations:
- *
- *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
- *  - flush_tlb_page(vma, vmaddr) flushes one page
- *  - flush_tlb_range(vma, start, end) flushes a range of pages
- *  - flush_tlb_kernel_range(start, end) flushes kernel pages
- *
- *  - local_* variants of page and mm only apply to the current
- *    processor
- */
-
-/*
- * These are the base non-SMP variants of page and mm flushing
- */
-void local_flush_tlb_mm(struct mm_struct *mm)
-{
-       unsigned int pid;
-
-       preempt_disable();
-       pid = mm->context.id;
-       if (pid != MMU_NO_CONTEXT)
-               _tlbil_pid(pid);
-       preempt_enable();
-}
-EXPORT_SYMBOL(local_flush_tlb_mm);
-
-void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
-                           int tsize, int ind)
-{
-       unsigned int pid;
-
-       preempt_disable();
-       pid = mm ? mm->context.id : 0;
-       if (pid != MMU_NO_CONTEXT)
-               _tlbil_va(vmaddr, pid, tsize, ind);
-       preempt_enable();
-}
-
-void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
-{
-       __local_flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr,
-                              mmu_get_tsize(mmu_virtual_psize), 0);
-}
-EXPORT_SYMBOL(local_flush_tlb_page);
-
-/*
- * And here are the SMP non-local implementations
- */
-#ifdef CONFIG_SMP
-
-static DEFINE_RAW_SPINLOCK(tlbivax_lock);
-
-struct tlb_flush_param {
-       unsigned long addr;
-       unsigned int pid;
-       unsigned int tsize;
-       unsigned int ind;
-};
-
-static void do_flush_tlb_mm_ipi(void *param)
-{
-       struct tlb_flush_param *p = param;
-
-       _tlbil_pid(p ? p->pid : 0);
-}
-
-static void do_flush_tlb_page_ipi(void *param)
-{
-       struct tlb_flush_param *p = param;
-
-       _tlbil_va(p->addr, p->pid, p->tsize, p->ind);
-}
-
-
-/* Note on invalidations and PID:
- *
- * We snapshot the PID with preempt disabled. At this point, it can still
- * change either because:
- * - our context is being stolen (PID -> NO_CONTEXT) on another CPU
- * - we are invaliating some target that isn't currently running here
- *   and is concurrently acquiring a new PID on another CPU
- * - some other CPU is re-acquiring a lost PID for this mm
- * etc...
- *
- * However, this shouldn't be a problem as we only guarantee
- * invalidation of TLB entries present prior to this call, so we
- * don't care about the PID changing, and invalidating a stale PID
- * is generally harmless.
- */
-
-void flush_tlb_mm(struct mm_struct *mm)
-{
-       unsigned int pid;
-
-       preempt_disable();
-       pid = mm->context.id;
-       if (unlikely(pid == MMU_NO_CONTEXT))
-               goto no_context;
-       if (!mm_is_core_local(mm)) {
-               struct tlb_flush_param p = { .pid = pid };
-               /* Ignores smp_processor_id() even if set. */
-               smp_call_function_many(mm_cpumask(mm),
-                                      do_flush_tlb_mm_ipi, &p, 1);
-       }
-       _tlbil_pid(pid);
- no_context:
-       preempt_enable();
-}
-EXPORT_SYMBOL(flush_tlb_mm);
-
-void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
-                     int tsize, int ind)
-{
-       struct cpumask *cpu_mask;
-       unsigned int pid;
-
-       /*
-        * This function as well as __local_flush_tlb_page() must only be called
-        * for user contexts.
-        */
-       if (WARN_ON(!mm))
-               return;
-
-       preempt_disable();
-       pid = mm->context.id;
-       if (unlikely(pid == MMU_NO_CONTEXT))
-               goto bail;
-       cpu_mask = mm_cpumask(mm);
-       if (!mm_is_core_local(mm)) {
-               /* If broadcast tlbivax is supported, use it */
-               if (mmu_has_feature(MMU_FTR_USE_TLBIVAX_BCAST)) {
-                       int lock = mmu_has_feature(MMU_FTR_LOCK_BCAST_INVAL);
-                       if (lock)
-                               raw_spin_lock(&tlbivax_lock);
-                       _tlbivax_bcast(vmaddr, pid, tsize, ind);
-                       if (lock)
-                               raw_spin_unlock(&tlbivax_lock);
-                       goto bail;
-               } else {
-                       struct tlb_flush_param p = {
-                               .pid = pid,
-                               .addr = vmaddr,
-                               .tsize = tsize,
-                               .ind = ind,
-                       };
-                       /* Ignores smp_processor_id() even if set in cpu_mask */
-                       smp_call_function_many(cpu_mask,
-                                              do_flush_tlb_page_ipi, &p, 1);
-               }
-       }
-       _tlbil_va(vmaddr, pid, tsize, ind);
- bail:
-       preempt_enable();
-}
-
-void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
-{
-#ifdef CONFIG_HUGETLB_PAGE
-       if (vma && is_vm_hugetlb_page(vma))
-               flush_hugetlb_page(vma, vmaddr);
-#endif
-
-       __flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr,
-                        mmu_get_tsize(mmu_virtual_psize), 0);
-}
-EXPORT_SYMBOL(flush_tlb_page);
-
-#endif /* CONFIG_SMP */
-
-#ifdef CONFIG_PPC_47x
-void __init early_init_mmu_47x(void)
-{
-#ifdef CONFIG_SMP
-       unsigned long root = of_get_flat_dt_root();
-       if (of_get_flat_dt_prop(root, "cooperative-partition", NULL))
-               mmu_clear_feature(MMU_FTR_USE_TLBIVAX_BCAST);
-#endif /* CONFIG_SMP */
-}
-#endif /* CONFIG_PPC_47x */
-
-/*
- * Flush kernel TLB entries in the given range
- */
-void flush_tlb_kernel_range(unsigned long start, unsigned long end)
-{
-#ifdef CONFIG_SMP
-       preempt_disable();
-       smp_call_function(do_flush_tlb_mm_ipi, NULL, 1);
-       _tlbil_pid(0);
-       preempt_enable();
-#else
-       _tlbil_pid(0);
-#endif
-}
-EXPORT_SYMBOL(flush_tlb_kernel_range);
-
-/*
- * Currently, for range flushing, we just do a full mm flush. This should
- * be optimized based on a threshold on the size of the range, since
- * some implementation can stack multiple tlbivax before a tlbsync but
- * for now, we keep it that way
- */
-void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
-                    unsigned long end)
-
-{
-       if (end - start == PAGE_SIZE && !(start & ~PAGE_MASK))
-               flush_tlb_page(vma, start);
-       else
-               flush_tlb_mm(vma->vm_mm);
-}
-EXPORT_SYMBOL(flush_tlb_range);
-
-void tlb_flush(struct mmu_gather *tlb)
-{
-       flush_tlb_mm(tlb->mm);
-}
-
-/*
- * Below are functions specific to the 64-bit variant of Book3E though that
- * may change in the future
- */
-
-#ifdef CONFIG_PPC64
-
-/*
- * Handling of virtual linear page tables or indirect TLB entries
- * flushing when PTE pages are freed
- */
-void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address)
-{
-       int tsize = mmu_psize_defs[mmu_pte_psize].enc;
-
-       if (book3e_htw_mode != PPC_HTW_NONE) {
-               unsigned long start = address & PMD_MASK;
-               unsigned long end = address + PMD_SIZE;
-               unsigned long size = 1UL << mmu_psize_defs[mmu_pte_psize].shift;
-
-               /* This isn't the most optimal, ideally we would factor out the
-                * while preempt & CPU mask mucking around, or even the IPI but
-                * it will do for now
-                */
-               while (start < end) {
-                       __flush_tlb_page(tlb->mm, start, tsize, 1);
-                       start += size;
-               }
-       } else {
-               unsigned long rmask = 0xf000000000000000ul;
-               unsigned long rid = (address & rmask) | 0x1000000000000000ul;
-               unsigned long vpte = address & ~rmask;
-
-#ifdef CONFIG_PPC_64K_PAGES
-               vpte = (vpte >> (PAGE_SHIFT - 4)) & ~0xfffful;
-#else
-               vpte = (vpte >> (PAGE_SHIFT - 3)) & ~0xffful;
-#endif
-               vpte |= rid;
-               __flush_tlb_page(tlb->mm, vpte, tsize, 0);
-       }
-}
-
-static void setup_page_sizes(void)
-{
-       unsigned int tlb0cfg;
-       unsigned int tlb0ps;
-       unsigned int eptcfg;
-       int i, psize;
-
-#ifdef CONFIG_PPC_FSL_BOOK3E
-       unsigned int mmucfg = mfspr(SPRN_MMUCFG);
-       int fsl_mmu = mmu_has_feature(MMU_FTR_TYPE_FSL_E);
-
-       if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V1) {
-               unsigned int tlb1cfg = mfspr(SPRN_TLB1CFG);
-               unsigned int min_pg, max_pg;
-
-               min_pg = (tlb1cfg & TLBnCFG_MINSIZE) >> TLBnCFG_MINSIZE_SHIFT;
-               max_pg = (tlb1cfg & TLBnCFG_MAXSIZE) >> TLBnCFG_MAXSIZE_SHIFT;
-
-               for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
-                       struct mmu_psize_def *def;
-                       unsigned int shift;
-
-                       def = &mmu_psize_defs[psize];
-                       shift = def->shift;
-
-                       if (shift == 0 || shift & 1)
-                               continue;
-
-                       /* adjust to be in terms of 4^shift Kb */
-                       shift = (shift - 10) >> 1;
-
-                       if ((shift >= min_pg) && (shift <= max_pg))
-                               def->flags |= MMU_PAGE_SIZE_DIRECT;
-               }
-
-               goto out;
-       }
-
-       if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2) {
-               u32 tlb1cfg, tlb1ps;
-
-               tlb0cfg = mfspr(SPRN_TLB0CFG);
-               tlb1cfg = mfspr(SPRN_TLB1CFG);
-               tlb1ps = mfspr(SPRN_TLB1PS);
-               eptcfg = mfspr(SPRN_EPTCFG);
-
-               if ((tlb1cfg & TLBnCFG_IND) && (tlb0cfg & TLBnCFG_PT))
-                       book3e_htw_mode = PPC_HTW_E6500;
-
-               /*
-                * We expect 4K subpage size and unrestricted indirect size.
-                * The lack of a restriction on indirect size is a Freescale
-                * extension, indicated by PSn = 0 but SPSn != 0.
-                */
-               if (eptcfg != 2)
-                       book3e_htw_mode = PPC_HTW_NONE;
-
-               for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
-                       struct mmu_psize_def *def = &mmu_psize_defs[psize];
-
-                       if (!def->shift)
-                               continue;
-
-                       if (tlb1ps & (1U << (def->shift - 10))) {
-                               def->flags |= MMU_PAGE_SIZE_DIRECT;
-
-                               if (book3e_htw_mode && psize == MMU_PAGE_2M)
-                                       def->flags |= MMU_PAGE_SIZE_INDIRECT;
-                       }
-               }
-
-               goto out;
-       }
-#endif
-
-       tlb0cfg = mfspr(SPRN_TLB0CFG);
-       tlb0ps = mfspr(SPRN_TLB0PS);
-       eptcfg = mfspr(SPRN_EPTCFG);
-
-       /* Look for supported direct sizes */
-       for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
-               struct mmu_psize_def *def = &mmu_psize_defs[psize];
-
-               if (tlb0ps & (1U << (def->shift - 10)))
-                       def->flags |= MMU_PAGE_SIZE_DIRECT;
-       }
-
-       /* Indirect page sizes supported ? */
-       if ((tlb0cfg & TLBnCFG_IND) == 0 ||
-           (tlb0cfg & TLBnCFG_PT) == 0)
-               goto out;
-
-       book3e_htw_mode = PPC_HTW_IBM;
-
-       /* Now, we only deal with one IND page size for each
-        * direct size. Hopefully all implementations today are
-        * unambiguous, but we might want to be careful in the
-        * future.
-        */
-       for (i = 0; i < 3; i++) {
-               unsigned int ps, sps;
-
-               sps = eptcfg & 0x1f;
-               eptcfg >>= 5;
-               ps = eptcfg & 0x1f;
-               eptcfg >>= 5;
-               if (!ps || !sps)
-                       continue;
-               for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
-                       struct mmu_psize_def *def = &mmu_psize_defs[psize];
-
-                       if (ps == (def->shift - 10))
-                               def->flags |= MMU_PAGE_SIZE_INDIRECT;
-                       if (sps == (def->shift - 10))
-                               def->ind = ps + 10;
-               }
-       }
-
-out:
-       /* Cleanup array and print summary */
-       pr_info("MMU: Supported page sizes\n");
-       for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
-               struct mmu_psize_def *def = &mmu_psize_defs[psize];
-               const char *__page_type_names[] = {
-                       "unsupported",
-                       "direct",
-                       "indirect",
-                       "direct & indirect"
-               };
-               if (def->flags == 0) {
-                       def->shift = 0; 
-                       continue;
-               }
-               pr_info("  %8ld KB as %s\n", 1ul << (def->shift - 10),
-                       __page_type_names[def->flags & 0x3]);
-       }
-}
-
-static void setup_mmu_htw(void)
-{
-       /*
-        * If we want to use HW tablewalk, enable it by patching the TLB miss
-        * handlers to branch to the one dedicated to it.
-        */
-
-       switch (book3e_htw_mode) {
-       case PPC_HTW_IBM:
-               patch_exception(0x1c0, exc_data_tlb_miss_htw_book3e);
-               patch_exception(0x1e0, exc_instruction_tlb_miss_htw_book3e);
-               break;
-#ifdef CONFIG_PPC_FSL_BOOK3E
-       case PPC_HTW_E6500:
-               extlb_level_exc = EX_TLB_SIZE;
-               patch_exception(0x1c0, exc_data_tlb_miss_e6500_book3e);
-               patch_exception(0x1e0, exc_instruction_tlb_miss_e6500_book3e);
-               break;
-#endif
-       }
-       pr_info("MMU: Book3E HW tablewalk %s\n",
-               book3e_htw_mode != PPC_HTW_NONE ? "enabled" : "not supported");
-}
-
-/*
- * Early initialization of the MMU TLB code
- */
-static void early_init_this_mmu(void)
-{
-       unsigned int mas4;
-
-       /* Set MAS4 based on page table setting */
-
-       mas4 = 0x4 << MAS4_WIMGED_SHIFT;
-       switch (book3e_htw_mode) {
-       case PPC_HTW_E6500:
-               mas4 |= MAS4_INDD;
-               mas4 |= BOOK3E_PAGESZ_2M << MAS4_TSIZED_SHIFT;
-               mas4 |= MAS4_TLBSELD(1);
-               mmu_pte_psize = MMU_PAGE_2M;
-               break;
-
-       case PPC_HTW_IBM:
-               mas4 |= MAS4_INDD;
-#ifdef CONFIG_PPC_64K_PAGES
-               mas4 |= BOOK3E_PAGESZ_256M << MAS4_TSIZED_SHIFT;
-               mmu_pte_psize = MMU_PAGE_256M;
-#else
-               mas4 |= BOOK3E_PAGESZ_1M << MAS4_TSIZED_SHIFT;
-               mmu_pte_psize = MMU_PAGE_1M;
-#endif
-               break;
-
-       case PPC_HTW_NONE:
-#ifdef CONFIG_PPC_64K_PAGES
-               mas4 |= BOOK3E_PAGESZ_64K << MAS4_TSIZED_SHIFT;
-#else
-               mas4 |= BOOK3E_PAGESZ_4K << MAS4_TSIZED_SHIFT;
-#endif
-               mmu_pte_psize = mmu_virtual_psize;
-               break;
-       }
-       mtspr(SPRN_MAS4, mas4);
-
-#ifdef CONFIG_PPC_FSL_BOOK3E
-       if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
-               unsigned int num_cams;
-               int __maybe_unused cpu = smp_processor_id();
-               bool map = true;
-
-               /* use a quarter of the TLBCAM for bolted linear map */
-               num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4;
-
-               /*
-                * Only do the mapping once per core, or else the
-                * transient mapping would cause problems.
-                */
-#ifdef CONFIG_SMP
-               if (hweight32(get_tensr()) > 1)
-                       map = false;
-#endif
-
-               if (map)
-                       linear_map_top = map_mem_in_cams(linear_map_top,
-                                                        num_cams, false);
-       }
-#endif
-
-       /* A sync won't hurt us after mucking around with
-        * the MMU configuration
-        */
-       mb();
-}
-
-static void __init early_init_mmu_global(void)
-{
-       /* XXX This will have to be decided at runtime, but right
-        * now our boot and TLB miss code hard wires it. Ideally
-        * we should find out a suitable page size and patch the
-        * TLB miss code (either that or use the PACA to store
-        * the value we want)
-        */
-       mmu_linear_psize = MMU_PAGE_1G;
-
-       /* XXX This should be decided at runtime based on supported
-        * page sizes in the TLB, but for now let's assume 16M is
-        * always there and a good fit (which it probably is)
-        *
-        * Freescale booke only supports 4K pages in TLB0, so use that.
-        */
-       if (mmu_has_feature(MMU_FTR_TYPE_FSL_E))
-               mmu_vmemmap_psize = MMU_PAGE_4K;
-       else
-               mmu_vmemmap_psize = MMU_PAGE_16M;
-
-       /* XXX This code only checks for TLB 0 capabilities and doesn't
-        *     check what page size combos are supported by the HW. It
-        *     also doesn't handle the case where a separate array holds
-        *     the IND entries from the array loaded by the PT.
-        */
-       /* Look for supported page sizes */
-       setup_page_sizes();
-
-       /* Look for HW tablewalk support */
-       setup_mmu_htw();
-
-#ifdef CONFIG_PPC_FSL_BOOK3E
-       if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
-               if (book3e_htw_mode == PPC_HTW_NONE) {
-                       extlb_level_exc = EX_TLB_SIZE;
-                       patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e);
-                       patch_exception(0x1e0,
-                               exc_instruction_tlb_miss_bolted_book3e);
-               }
-       }
-#endif
-
-       /* Set the global containing the top of the linear mapping
-        * for use by the TLB miss code
-        */
-       linear_map_top = memblock_end_of_DRAM();
-}
-
-static void __init early_mmu_set_memory_limit(void)
-{
-#ifdef CONFIG_PPC_FSL_BOOK3E
-       if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
-               /*
-                * Limit memory so we dont have linear faults.
-                * Unlike memblock_set_current_limit, which limits
-                * memory available during early boot, this permanently
-                * reduces the memory available to Linux.  We need to
-                * do this because highmem is not supported on 64-bit.
-                */
-               memblock_enforce_memory_limit(linear_map_top);
-       }
-#endif
-
-       memblock_set_current_limit(linear_map_top);
-}
-
-/* boot cpu only */
-void __init early_init_mmu(void)
-{
-       early_init_mmu_global();
-       early_init_this_mmu();
-       early_mmu_set_memory_limit();
-}
-
-void early_init_mmu_secondary(void)
-{
-       early_init_this_mmu();
-}
-
-void setup_initial_memory_limit(phys_addr_t first_memblock_base,
-                               phys_addr_t first_memblock_size)
-{
-       /* On non-FSL Embedded 64-bit, we adjust the RMA size to match
-        * the bolted TLB entry. We know for now that only 1G
-        * entries are supported though that may eventually
-        * change.
-        *
-        * on FSL Embedded 64-bit, usually all RAM is bolted, but with
-        * unusual memory sizes it's possible for some RAM to not be mapped
-        * (such RAM is not used at all by Linux, since we don't support
-        * highmem on 64-bit).  We limit ppc64_rma_size to what would be
-        * mappable if this memblock is the only one.  Additional memblocks
-        * can only increase, not decrease, the amount that ends up getting
-        * mapped.  We still limit max to 1G even if we'll eventually map
-        * more.  This is due to what the early init code is set up to do.
-        *
-        * We crop it to the size of the first MEMBLOCK to
-        * avoid going over total available memory just in case...
-        */
-#ifdef CONFIG_PPC_FSL_BOOK3E
-       if (early_mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
-               unsigned long linear_sz;
-               unsigned int num_cams;
-
-               /* use a quarter of the TLBCAM for bolted linear map */
-               num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4;
-
-               linear_sz = map_mem_in_cams(first_memblock_size, num_cams,
-                                           true);
-
-               ppc64_rma_size = min_t(u64, linear_sz, 0x40000000);
-       } else
-#endif
-               ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000);
-
-       /* Finally limit subsequent allocations */
-       memblock_set_current_limit(first_memblock_base + ppc64_rma_size);
-}
-#else /* ! CONFIG_PPC64 */
-void __init early_init_mmu(void)
-{
-#ifdef CONFIG_PPC_47x
-       early_init_mmu_47x();
-#endif
-
-#ifdef CONFIG_PPC_MM_SLICES
-#if defined(CONFIG_PPC_8xx)
-       init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW;
-#endif
-#endif
-}
-#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S
deleted file mode 100644 (file)
index e066a65..0000000
+++ /dev/null
@@ -1,491 +0,0 @@
-/*
- * This file contains low-level functions for performing various
- * types of TLB invalidations on various processors with no hash
- * table.
- *
- * This file implements the following functions for all no-hash
- * processors. Some aren't implemented for some variants. Some
- * are inline in tlbflush.h
- *
- *     - tlbil_va
- *     - tlbil_pid
- *     - tlbil_all
- *     - tlbivax_bcast
- *
- * Code mostly moved over from misc_32.S
- *
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- * Partially rewritten by Cort Dougan (cort@cs.nmt.edu)
- * Paul Mackerras, Kumar Gala and Benjamin Herrenschmidt.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- */
-
-#include <asm/reg.h>
-#include <asm/page.h>
-#include <asm/cputable.h>
-#include <asm/mmu.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/processor.h>
-#include <asm/bug.h>
-#include <asm/asm-compat.h>
-#include <asm/feature-fixups.h>
-
-#if defined(CONFIG_40x)
-
-/*
- * 40x implementation needs only tlbil_va
- */
-_GLOBAL(__tlbil_va)
-       /* We run the search with interrupts disabled because we have to change
-        * the PID and I don't want to preempt when that happens.
-        */
-       mfmsr   r5
-       mfspr   r6,SPRN_PID
-       wrteei  0
-       mtspr   SPRN_PID,r4
-       tlbsx.  r3, 0, r3
-       mtspr   SPRN_PID,r6
-       wrtee   r5
-       bne     1f
-       sync
-       /* There are only 64 TLB entries, so r3 < 64, which means bit 25 is
-        * clear. Since 25 is the V bit in the TLB_TAG, loading this value
-        * will invalidate the TLB entry. */
-       tlbwe   r3, r3, TLB_TAG
-       isync
-1:     blr
-
-#elif defined(CONFIG_PPC_8xx)
-
-/*
- * Nothing to do for 8xx, everything is inline
- */
-
-#elif defined(CONFIG_44x) /* Includes 47x */
-
-/*
- * 440 implementation uses tlbsx/we for tlbil_va and a full sweep
- * of the TLB for everything else.
- */
-_GLOBAL(__tlbil_va)
-       mfspr   r5,SPRN_MMUCR
-       mfmsr   r10
-
-       /*
-        * We write 16 bits of STID since 47x supports that much, we
-        * will never be passed out of bounds values on 440 (hopefully)
-        */
-       rlwimi  r5,r4,0,16,31
-
-       /* We have to run the search with interrupts disabled, otherwise
-        * an interrupt which causes a TLB miss can clobber the MMUCR
-        * between the mtspr and the tlbsx.
-        *
-        * Critical and Machine Check interrupts take care of saving
-        * and restoring MMUCR, so only normal interrupts have to be
-        * taken care of.
-        */
-       wrteei  0
-       mtspr   SPRN_MMUCR,r5
-       tlbsx.  r6,0,r3
-       bne     10f
-       sync
-BEGIN_MMU_FTR_SECTION
-       b       2f
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x)
-       /* On 440 There are only 64 TLB entries, so r3 < 64, which means bit
-        * 22, is clear.  Since 22 is the V bit in the TLB_PAGEID, loading this
-        * value will invalidate the TLB entry.
-        */
-       tlbwe   r6,r6,PPC44x_TLB_PAGEID
-       isync
-10:    wrtee   r10
-       blr
-2:
-#ifdef CONFIG_PPC_47x
-       oris    r7,r6,0x8000    /* specify way explicitly */
-       clrrwi  r4,r3,12        /* get an EPN for the hashing with V = 0 */
-       ori     r4,r4,PPC47x_TLBE_SIZE
-       tlbwe   r4,r7,0         /* write it */
-       isync
-       wrtee   r10
-       blr
-#else /* CONFIG_PPC_47x */
-1:     trap
-       EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0;
-#endif /* !CONFIG_PPC_47x */
-
-_GLOBAL(_tlbil_all)
-_GLOBAL(_tlbil_pid)
-BEGIN_MMU_FTR_SECTION
-       b       2f
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x)
-       li      r3,0
-       sync
-
-       /* Load high watermark */
-       lis     r4,tlb_44x_hwater@ha
-       lwz     r5,tlb_44x_hwater@l(r4)
-
-1:     tlbwe   r3,r3,PPC44x_TLB_PAGEID
-       addi    r3,r3,1
-       cmpw    0,r3,r5
-       ble     1b
-
-       isync
-       blr
-2:
-#ifdef CONFIG_PPC_47x
-       /* 476 variant. There's not simple way to do this, hopefully we'll
-        * try to limit the amount of such full invalidates
-        */
-       mfmsr   r11             /* Interrupts off */
-       wrteei  0
-       li      r3,-1           /* Current set */
-       lis     r10,tlb_47x_boltmap@h
-       ori     r10,r10,tlb_47x_boltmap@l
-       lis     r7,0x8000       /* Specify way explicitly */
-
-       b       9f              /* For each set */
-
-1:     li      r9,4            /* Number of ways */
-       li      r4,0            /* Current way */
-       li      r6,0            /* Default entry value 0 */
-       andi.   r0,r8,1         /* Check if way 0 is bolted */
-       mtctr   r9              /* Load way counter */
-       bne-    3f              /* Bolted, skip loading it */
-
-2:     /* For each way */
-       or      r5,r3,r4        /* Make way|index for tlbre */
-       rlwimi  r5,r5,16,8,15   /* Copy index into position */
-       tlbre   r6,r5,0         /* Read entry */
-3:     addis   r4,r4,0x2000    /* Next way */
-       andi.   r0,r6,PPC47x_TLB0_VALID /* Valid entry ? */
-       beq     4f              /* Nope, skip it */
-       rlwimi  r7,r5,0,1,2     /* Insert way number */
-       rlwinm  r6,r6,0,21,19   /* Clear V */
-       tlbwe   r6,r7,0         /* Write it */
-4:     bdnz    2b              /* Loop for each way */
-       srwi    r8,r8,1         /* Next boltmap bit */
-9:     cmpwi   cr1,r3,255      /* Last set done ? */
-       addi    r3,r3,1         /* Next set */
-       beq     cr1,1f          /* End of loop */
-       andi.   r0,r3,0x1f      /* Need to load a new boltmap word ? */
-       bne     1b              /* No, loop */
-       lwz     r8,0(r10)       /* Load boltmap entry */
-       addi    r10,r10,4       /* Next word */
-       b       1b              /* Then loop */
-1:     isync                   /* Sync shadows */
-       wrtee   r11
-#else /* CONFIG_PPC_47x */
-1:     trap
-       EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0;
-#endif /* !CONFIG_PPC_47x */
-       blr
-
-#ifdef CONFIG_PPC_47x
-
-/*
- * _tlbivax_bcast is only on 47x. We don't bother doing a runtime
- * check though, it will blow up soon enough if we mistakenly try
- * to use it on a 440.
- */
-_GLOBAL(_tlbivax_bcast)
-       mfspr   r5,SPRN_MMUCR
-       mfmsr   r10
-       rlwimi  r5,r4,0,16,31
-       wrteei  0
-       mtspr   SPRN_MMUCR,r5
-       isync
-       PPC_TLBIVAX(0, R3)
-       isync
-       eieio
-       tlbsync
-BEGIN_FTR_SECTION
-       b       1f
-END_FTR_SECTION_IFSET(CPU_FTR_476_DD2)
-       sync
-       wrtee   r10
-       blr
-/*
- * DD2 HW could hang if in instruction fetch happens before msync completes.
- * Touch enough instruction cache lines to ensure cache hits
- */
-1:     mflr    r9
-       bl      2f
-2:     mflr    r6
-       li      r7,32
-       PPC_ICBT(0,R6,R7)               /* touch next cache line */
-       add     r6,r6,r7
-       PPC_ICBT(0,R6,R7)               /* touch next cache line */
-       add     r6,r6,r7
-       PPC_ICBT(0,R6,R7)               /* touch next cache line */
-       sync
-       nop
-       nop
-       nop
-       nop
-       nop
-       nop
-       nop
-       nop
-       mtlr    r9
-       wrtee   r10
-       blr
-#endif /* CONFIG_PPC_47x */
-
-#elif defined(CONFIG_FSL_BOOKE)
-/*
- * FSL BookE implementations.
- *
- * Since feature sections are using _SECTION_ELSE we need
- * to have the larger code path before the _SECTION_ELSE
- */
-
-/*
- * Flush MMU TLB on the local processor
- */
-_GLOBAL(_tlbil_all)
-BEGIN_MMU_FTR_SECTION
-       li      r3,(MMUCSR0_TLBFI)@l
-       mtspr   SPRN_MMUCSR0, r3
-1:
-       mfspr   r3,SPRN_MMUCSR0
-       andi.   r3,r3,MMUCSR0_TLBFI@l
-       bne     1b
-MMU_FTR_SECTION_ELSE
-       PPC_TLBILX_ALL(0,R0)
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_TLBILX)
-       msync
-       isync
-       blr
-
-_GLOBAL(_tlbil_pid)
-BEGIN_MMU_FTR_SECTION
-       slwi    r3,r3,16
-       mfmsr   r10
-       wrteei  0
-       mfspr   r4,SPRN_MAS6    /* save MAS6 */
-       mtspr   SPRN_MAS6,r3
-       PPC_TLBILX_PID(0,R0)
-       mtspr   SPRN_MAS6,r4    /* restore MAS6 */
-       wrtee   r10
-MMU_FTR_SECTION_ELSE
-       li      r3,(MMUCSR0_TLBFI)@l
-       mtspr   SPRN_MMUCSR0, r3
-1:
-       mfspr   r3,SPRN_MMUCSR0
-       andi.   r3,r3,MMUCSR0_TLBFI@l
-       bne     1b
-ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBILX)
-       msync
-       isync
-       blr
-
-/*
- * Flush MMU TLB for a particular address, but only on the local processor
- * (no broadcast)
- */
-_GLOBAL(__tlbil_va)
-       mfmsr   r10
-       wrteei  0
-       slwi    r4,r4,16
-       ori     r4,r4,(MAS6_ISIZE(BOOK3E_PAGESZ_4K))@l
-       mtspr   SPRN_MAS6,r4            /* assume AS=0 for now */
-BEGIN_MMU_FTR_SECTION
-       tlbsx   0,r3
-       mfspr   r4,SPRN_MAS1            /* check valid */
-       andis.  r3,r4,MAS1_VALID@h
-       beq     1f
-       rlwinm  r4,r4,0,1,31
-       mtspr   SPRN_MAS1,r4
-       tlbwe
-MMU_FTR_SECTION_ELSE
-       PPC_TLBILX_VA(0,R3)
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_TLBILX)
-       msync
-       isync
-1:     wrtee   r10
-       blr
-#elif defined(CONFIG_PPC_BOOK3E)
-/*
- * New Book3E (>= 2.06) implementation
- *
- * Note: We may be able to get away without the interrupt masking stuff
- * if we save/restore MAS6 on exceptions that might modify it
- */
-_GLOBAL(_tlbil_pid)
-       slwi    r4,r3,MAS6_SPID_SHIFT
-       mfmsr   r10
-       wrteei  0
-       mtspr   SPRN_MAS6,r4
-       PPC_TLBILX_PID(0,R0)
-       wrtee   r10
-       msync
-       isync
-       blr
-
-_GLOBAL(_tlbil_pid_noind)
-       slwi    r4,r3,MAS6_SPID_SHIFT
-       mfmsr   r10
-       ori     r4,r4,MAS6_SIND
-       wrteei  0
-       mtspr   SPRN_MAS6,r4
-       PPC_TLBILX_PID(0,R0)
-       wrtee   r10
-       msync
-       isync
-       blr
-
-_GLOBAL(_tlbil_all)
-       PPC_TLBILX_ALL(0,R0)
-       msync
-       isync
-       blr
-
-_GLOBAL(_tlbil_va)
-       mfmsr   r10
-       wrteei  0
-       cmpwi   cr0,r6,0
-       slwi    r4,r4,MAS6_SPID_SHIFT
-       rlwimi  r4,r5,MAS6_ISIZE_SHIFT,MAS6_ISIZE_MASK
-       beq     1f
-       rlwimi  r4,r6,MAS6_SIND_SHIFT,MAS6_SIND
-1:     mtspr   SPRN_MAS6,r4            /* assume AS=0 for now */
-       PPC_TLBILX_VA(0,R3)
-       msync
-       isync
-       wrtee   r10
-       blr
-
-_GLOBAL(_tlbivax_bcast)
-       mfmsr   r10
-       wrteei  0
-       cmpwi   cr0,r6,0
-       slwi    r4,r4,MAS6_SPID_SHIFT
-       rlwimi  r4,r5,MAS6_ISIZE_SHIFT,MAS6_ISIZE_MASK
-       beq     1f
-       rlwimi  r4,r6,MAS6_SIND_SHIFT,MAS6_SIND
-1:     mtspr   SPRN_MAS6,r4            /* assume AS=0 for now */
-       PPC_TLBIVAX(0,R3)
-       eieio
-       tlbsync
-       sync
-       wrtee   r10
-       blr
-
-_GLOBAL(set_context)
-#ifdef CONFIG_BDI_SWITCH
-       /* Context switch the PTE pointer for the Abatron BDI2000.
-        * The PGDIR is the second parameter.
-        */
-       lis     r5, abatron_pteptrs@h
-       ori     r5, r5, abatron_pteptrs@l
-       stw     r4, 0x4(r5)
-#endif
-       mtspr   SPRN_PID,r3
-       isync                   /* Force context change */
-       blr
-#else
-#error Unsupported processor type !
-#endif
-
-#if defined(CONFIG_PPC_FSL_BOOK3E)
-/*
- * extern void loadcam_entry(unsigned int index)
- *
- * Load TLBCAM[index] entry in to the L2 CAM MMU
- * Must preserve r7, r8, r9, and r10
- */
-_GLOBAL(loadcam_entry)
-       mflr    r5
-       LOAD_REG_ADDR_PIC(r4, TLBCAM)
-       mtlr    r5
-       mulli   r5,r3,TLBCAM_SIZE
-       add     r3,r5,r4
-       lwz     r4,TLBCAM_MAS0(r3)
-       mtspr   SPRN_MAS0,r4
-       lwz     r4,TLBCAM_MAS1(r3)
-       mtspr   SPRN_MAS1,r4
-       PPC_LL  r4,TLBCAM_MAS2(r3)
-       mtspr   SPRN_MAS2,r4
-       lwz     r4,TLBCAM_MAS3(r3)
-       mtspr   SPRN_MAS3,r4
-BEGIN_MMU_FTR_SECTION
-       lwz     r4,TLBCAM_MAS7(r3)
-       mtspr   SPRN_MAS7,r4
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS)
-       isync
-       tlbwe
-       isync
-       blr
-
-/*
- * Load multiple TLB entries at once, using an alternate-space
- * trampoline so that we don't have to care about whether the same
- * TLB entry maps us before and after.
- *
- * r3 = first entry to write
- * r4 = number of entries to write
- * r5 = temporary tlb entry
- */
-_GLOBAL(loadcam_multi)
-       mflr    r8
-
-       /*
-        * Set up temporary TLB entry that is the same as what we're
-        * running from, but in AS=1.
-        */
-       bl      1f
-1:     mflr    r6
-       tlbsx   0,r8
-       mfspr   r6,SPRN_MAS1
-       ori     r6,r6,MAS1_TS
-       mtspr   SPRN_MAS1,r6
-       mfspr   r6,SPRN_MAS0
-       rlwimi  r6,r5,MAS0_ESEL_SHIFT,MAS0_ESEL_MASK
-       mr      r7,r5
-       mtspr   SPRN_MAS0,r6
-       isync
-       tlbwe
-       isync
-
-       /* Switch to AS=1 */
-       mfmsr   r6
-       ori     r6,r6,MSR_IS|MSR_DS
-       mtmsr   r6
-       isync
-
-       mr      r9,r3
-       add     r10,r3,r4
-2:     bl      loadcam_entry
-       addi    r9,r9,1
-       cmpw    r9,r10
-       mr      r3,r9
-       blt     2b
-
-       /* Return to AS=0 and clear the temporary entry */
-       mfmsr   r6
-       rlwinm. r6,r6,0,~(MSR_IS|MSR_DS)
-       mtmsr   r6
-       isync
-
-       li      r6,0
-       mtspr   SPRN_MAS1,r6
-       rlwinm  r6,r7,MAS0_ESEL_SHIFT,MAS0_ESEL_MASK
-       oris    r6,r6,MAS0_TLBSEL(1)@h
-       mtspr   SPRN_MAS0,r6
-       isync
-       tlbwe
-       isync
-
-       mtlr    r8
-       blr
-#endif