Merge tag 'powerpc-5.2-1' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/power...

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 10 May 2019 12:29:27 +0000 (05:29 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 10 May 2019 12:29:27 +0000 (05:29 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 10 May 2019 12:29:27 +0000 (05:29 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 10 May 2019 12:29:27 +0000 (05:29 -0700)
diff --cc Documentation/admin-guide/kernel-parameters.txt
Simple merge
diff --cc MAINTAINERS
Simple merge
diff --cc arch/powerpc/Kconfig
Simple merge
diff --cc arch/powerpc/configs/skiroot_defconfig
Simple merge
diff --cc arch/powerpc/include/asm/paca.h

index 134e912,245d11a..62f27e0
--- 1/arch/powerpc/include/asm/paca.h
--- 2/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@@ -173,8 -171,8 +173,7 @@@ struct paca_struct 
         u16 trap_save;                  /* Used when bad stack is encountered */
         u8 irq_soft_mask;               /* mask for irq soft masking */
         u8 irq_happened;                /* irq happened while soft-disabled */
- -      u8 io_sync;                     /* writel() needs spin_unlock sync */
         u8 irq_work_pending;            /* IRQ_WORK interrupt while soft-disable */
-       u8 nap_state_lost;              /* NV GPR values lost in power7_idle */
   #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
         u8 pmcregs_in_use;              /* pseries puts this in lppaca */
   #endif
diff --cc arch/powerpc/kernel/security.c
Simple merge
diff --cc arch/powerpc/kernel/setup_64.c
Simple merge
diff --cc arch/powerpc/kvm/book3s_hv.c
Simple merge
diff --cc arch/powerpc/mm/book3s32/mmu.c

index 0000000,615f78d..fc073cb

mode 000000,100644..100644
--- /dev/null
--- 2/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@@ -1,0 -1,432 +1,442 @@@
- -      unsigned int base_shift = (fls(base) - 1) & 31;
+ /*
+  * This file contains the routines for handling the MMU on those
+  * PowerPC implementations where the MMU substantially follows the
+  * architecture specification.  This includes the 6xx, 7xx, 7xxx,
+  * and 8260 implementations but excludes the 8xx and 4xx.
+  *  -- paulus
+  *
+  *  Derived from arch/ppc/mm/init.c:
+  *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+  *
+  *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+  *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+  *    Copyright (C) 1996 Paul Mackerras
+  *
+  *  Derived from "arch/i386/mm/init.c"
+  *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+  *
+  *  This program is free software; you can redistribute it and/or
+  *  modify it under the terms of the GNU General Public License
+  *  as published by the Free Software Foundation; either version
+  *  2 of the License, or (at your option) any later version.
+  *
+  */
+ 
+ #include <linux/kernel.h>
+ #include <linux/mm.h>
+ #include <linux/init.h>
+ #include <linux/highmem.h>
+ #include <linux/memblock.h>
+ 
+ #include <asm/prom.h>
+ #include <asm/mmu.h>
+ #include <asm/machdep.h>
+ #include <asm/code-patching.h>
+ #include <asm/sections.h>
+ 
+ #include <mm/mmu_decl.h>
+ 
+ struct hash_pte *Hash;
+ static unsigned long Hash_size, Hash_mask;
+ unsigned long _SDR1;
+ static unsigned int hash_mb, hash_mb2;
+ 
+ struct ppc_bat BATS[8][2];    /* 8 pairs of IBAT, DBAT */
+ 
+ struct batrange {             /* stores address ranges mapped by BATs */
+       unsigned long start;
+       unsigned long limit;
+       phys_addr_t phys;
+ } bat_addrs[8];
+ 
+ /*
+  * Return PA for this VA if it is mapped by a BAT, or 0
+  */
+ phys_addr_t v_block_mapped(unsigned long va)
+ {
+       int b;
+       for (b = 0; b < ARRAY_SIZE(bat_addrs); ++b)
+               if (va >= bat_addrs[b].start && va < bat_addrs[b].limit)
+                       return bat_addrs[b].phys + (va - bat_addrs[b].start);
+       return 0;
+ }
+ 
+ /*
+  * Return VA for a given PA or 0 if not mapped
+  */
+ unsigned long p_block_mapped(phys_addr_t pa)
+ {
+       int b;
+       for (b = 0; b < ARRAY_SIZE(bat_addrs); ++b)
+               if (pa >= bat_addrs[b].phys
+                   && pa < (bat_addrs[b].limit-bat_addrs[b].start)
+                             +bat_addrs[b].phys)
+                       return bat_addrs[b].start+(pa-bat_addrs[b].phys);
+       return 0;
+ }
+ 
+ static int find_free_bat(void)
+ {
+       int b;
+ 
+       if (cpu_has_feature(CPU_FTR_601)) {
+               for (b = 0; b < 4; b++) {
+                       struct ppc_bat *bat = BATS[b];
+ 
+                       if (!(bat[0].batl & 0x40))
+                               return b;
+               }
+       } else {
+               int n = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
+ 
+               for (b = 0; b < n; b++) {
+                       struct ppc_bat *bat = BATS[b];
+ 
+                       if (!(bat[1].batu & 3))
+                               return b;
+               }
+       }
+       return -1;
+ }
+ 
++/*
++ * This function calculates the size of the larger block usable to map the
++ * beginning of an area based on the start address and size of that area:
++ * - max block size is 8M on 601 and 256 on other 6xx.
++ * - base address must be aligned to the block size. So the maximum block size
++ *   is identified by the lowest bit set to 1 in the base address (for instance
++ *   if base is 0x16000000, max size is 0x02000000).
++ * - block size has to be a power of two. This is calculated by finding the
++ *   highest bit set to 1.
++ */
+ static unsigned int block_size(unsigned long base, unsigned long top)
+ {
+       unsigned int max_size = (cpu_has_feature(CPU_FTR_601) ? 8 : 256) << 20;
- -      int done;
++      unsigned int base_shift = (ffs(base) - 1) & 31;
+       unsigned int block_shift = (fls(top - base) - 1) & 31;
+ 
+       return min3(max_size, 1U << base_shift, 1U << block_shift);
+ }
+ 
+ /*
+  * Set up one of the IBAT (block address translation) register pairs.
+  * The parameters are not checked; in particular size must be a power
+  * of 2 between 128k and 256M.
+  * Only for 603+ ...
+  */
+ static void setibat(int index, unsigned long virt, phys_addr_t phys,
+                   unsigned int size, pgprot_t prot)
+ {
+       unsigned int bl = (size >> 17) - 1;
+       int wimgxpp;
+       struct ppc_bat *bat = BATS[index];
+       unsigned long flags = pgprot_val(prot);
+ 
+       if (!cpu_has_feature(CPU_FTR_NEED_COHERENT))
+               flags &= ~_PAGE_COHERENT;
+ 
+       wimgxpp = (flags & _PAGE_COHERENT) | (_PAGE_EXEC ? BPP_RX : BPP_XX);
+       bat[0].batu = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */
+       bat[0].batl = BAT_PHYS_ADDR(phys) | wimgxpp;
+       if (flags & _PAGE_USER)
+               bat[0].batu |= 1;       /* Vp = 1 */
+ }
+ 
+ static void clearibat(int index)
+ {
+       struct ppc_bat *bat = BATS[index];
+ 
+       bat[0].batu = 0;
+       bat[0].batl = 0;
+ }
+ 
+ static unsigned long __init __mmu_mapin_ram(unsigned long base, unsigned long top)
+ {
+       int idx;
+ 
+       while ((idx = find_free_bat()) != -1 && base != top) {
+               unsigned int size = block_size(base, top);
+ 
+               if (size < 128 << 10)
+                       break;
+               setbat(idx, PAGE_OFFSET + base, base, size, PAGE_KERNEL_X);
+               base += size;
+       }
+ 
+       return base;
+ }
+ 
+ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
+ {
- -      if (done != border - base)
++      unsigned long done;
+       unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET;
+ 
+       if (__map_without_bats) {
+               pr_debug("RAM mapped without BATs\n");
+               return base;
+       }
+ 
+       if (!strict_kernel_rwx_enabled() || base >= border || top <= border)
+               return __mmu_mapin_ram(base, top);
+ 
+       done = __mmu_mapin_ram(base, border);
- -      return done + __mmu_mapin_ram(border, top);
++      if (done != border)
+               return done;
+ 
++      return __mmu_mapin_ram(border, top);
+ }
+ 
+ void mmu_mark_initmem_nx(void)
+ {
+       int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
+       int i;
+       unsigned long base = (unsigned long)_stext - PAGE_OFFSET;
+       unsigned long top = (unsigned long)_etext - PAGE_OFFSET;
+       unsigned long size;
+ 
+       if (cpu_has_feature(CPU_FTR_601))
+               return;
+ 
+       for (i = 0; i < nb - 1 && base < top && top - base > (128 << 10);) {
+               size = block_size(base, top);
+               setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT);
+               base += size;
+       }
+       if (base < top) {
+               size = block_size(base, top);
+               size = max(size, 128UL << 10);
+               if ((top - base) > size) {
+                       if (strict_kernel_rwx_enabled())
+                               pr_warn("Kernel _etext not properly aligned\n");
+                       size <<= 1;
+               }
+               setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT);
+               base += size;
+       }
+       for (; i < nb; i++)
+               clearibat(i);
+ 
+       update_bats();
+ 
+       for (i = TASK_SIZE >> 28; i < 16; i++) {
+               /* Do not set NX on VM space for modules */
+               if (IS_ENABLED(CONFIG_MODULES) &&
+                   (VMALLOC_START & 0xf0000000) == i << 28)
+                       break;
+               mtsrin(mfsrin(i << 28) | 0x10000000, i << 28);
+       }
+ }
+ 
+ void mmu_mark_rodata_ro(void)
+ {
+       int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
+       int i;
+ 
+       if (cpu_has_feature(CPU_FTR_601))
+               return;
+ 
+       for (i = 0; i < nb; i++) {
+               struct ppc_bat *bat = BATS[i];
+ 
+               if (bat_addrs[i].start < (unsigned long)__init_begin)
+                       bat[1].batl = (bat[1].batl & ~BPP_RW) | BPP_RX;
+       }
+ 
+       update_bats();
+ }
+ 
+ /*
+  * Set up one of the I/D BAT (block address translation) register pairs.
+  * The parameters are not checked; in particular size must be a power
+  * of 2 between 128k and 256M.
+  * On 603+, only set IBAT when _PAGE_EXEC is set
+  */
+ void __init setbat(int index, unsigned long virt, phys_addr_t phys,
+                  unsigned int size, pgprot_t prot)
+ {
+       unsigned int bl;
+       int wimgxpp;
+       struct ppc_bat *bat = BATS[index];
+       unsigned long flags = pgprot_val(prot);
+ 
+       if ((flags & _PAGE_NO_CACHE) ||
+           (cpu_has_feature(CPU_FTR_NEED_COHERENT) == 0))
+               flags &= ~_PAGE_COHERENT;
+ 
+       bl = (size >> 17) - 1;
+       if (PVR_VER(mfspr(SPRN_PVR)) != 1) {
+               /* 603, 604, etc. */
+               /* Do DBAT first */
+               wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
+                                  | _PAGE_COHERENT | _PAGE_GUARDED);
+               wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX;
+               bat[1].batu = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */
+               bat[1].batl = BAT_PHYS_ADDR(phys) | wimgxpp;
+               if (flags & _PAGE_USER)
+                       bat[1].batu |= 1;       /* Vp = 1 */
+               if (flags & _PAGE_GUARDED) {
+                       /* G bit must be zero in IBATs */
+                       flags &= ~_PAGE_EXEC;
+               }
+               if (flags & _PAGE_EXEC)
+                       bat[0] = bat[1];
+               else
+                       bat[0].batu = bat[0].batl = 0;
+       } else {
+               /* 601 cpu */
+               if (bl > BL_8M)
+                       bl = BL_8M;
+               wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
+                                  | _PAGE_COHERENT);
+               wimgxpp |= (flags & _PAGE_RW)?
+                       ((flags & _PAGE_USER)? PP_RWRW: PP_RWXX): PP_RXRX;
+               bat->batu = virt | wimgxpp | 4; /* Ks=0, Ku=1 */
+               bat->batl = phys | bl | 0x40;   /* V=1 */
+       }
+ 
+       bat_addrs[index].start = virt;
+       bat_addrs[index].limit = virt + ((bl + 1) << 17) - 1;
+       bat_addrs[index].phys = phys;
+ }
+ 
+ /*
+  * Preload a translation in the hash table
+  */
+ void hash_preload(struct mm_struct *mm, unsigned long ea,
+                 bool is_exec, unsigned long trap)
+ {
+       pmd_t *pmd;
+ 
+       if (!Hash)
+               return;
+       pmd = pmd_offset(pud_offset(pgd_offset(mm, ea), ea), ea);
+       if (!pmd_none(*pmd))
+               add_hash_page(mm->context.id, ea, pmd_val(*pmd));
+ }
+ 
+ /*
+  * Initialize the hash table and patch the instructions in hashtable.S.
+  */
+ void __init MMU_init_hw(void)
+ {
+       unsigned int n_hpteg, lg_n_hpteg;
+ 
+       if (!mmu_has_feature(MMU_FTR_HPTE_TABLE))
+               return;
+ 
+       if ( ppc_md.progress ) ppc_md.progress("hash:enter", 0x105);
+ 
+ #define LG_HPTEG_SIZE 6               /* 64 bytes per HPTEG */
+ #define SDR1_LOW_BITS ((n_hpteg - 1) >> 10)
+ #define MIN_N_HPTEG   1024            /* min 64kB hash table */
+ 
+       /*
+        * Allow 1 HPTE (1/8 HPTEG) for each page of memory.
+        * This is less than the recommended amount, but then
+        * Linux ain't AIX.
+        */
+       n_hpteg = total_memory / (PAGE_SIZE * 8);
+       if (n_hpteg < MIN_N_HPTEG)
+               n_hpteg = MIN_N_HPTEG;
+       lg_n_hpteg = __ilog2(n_hpteg);
+       if (n_hpteg & (n_hpteg - 1)) {
+               ++lg_n_hpteg;           /* round up if not power of 2 */
+               n_hpteg = 1 << lg_n_hpteg;
+       }
+       Hash_size = n_hpteg << LG_HPTEG_SIZE;
+ 
+       /*
+        * Find some memory for the hash table.
+        */
+       if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322);
+       Hash = memblock_alloc(Hash_size, Hash_size);
+       if (!Hash)
+               panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
+                     __func__, Hash_size, Hash_size);
+       _SDR1 = __pa(Hash) | SDR1_LOW_BITS;
+ 
+       pr_info("Total memory = %lldMB; using %ldkB for hash table\n",
+               (unsigned long long)(total_memory >> 20), Hash_size >> 10);
+ 
+ 
+       Hash_mask = n_hpteg - 1;
+       hash_mb2 = hash_mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg;
+       if (lg_n_hpteg > 16)
+               hash_mb2 = 16 - LG_HPTEG_SIZE;
+ }
+ 
+ void __init MMU_init_hw_patch(void)
+ {
+       unsigned int hmask = Hash_mask >> (16 - LG_HPTEG_SIZE);
+ 
+       if (ppc_md.progress)
+               ppc_md.progress("hash:patch", 0x345);
+       if (ppc_md.progress)
+               ppc_md.progress("hash:done", 0x205);
+ 
+       /* WARNING: Make sure nothing can trigger a KASAN check past this point */
+ 
+       /*
+        * Patch up the instructions in hashtable.S:create_hpte
+        */
+       modify_instruction_site(&patch__hash_page_A0, 0xffff,
+                               ((unsigned int)Hash - PAGE_OFFSET) >> 16);
+       modify_instruction_site(&patch__hash_page_A1, 0x7c0, hash_mb << 6);
+       modify_instruction_site(&patch__hash_page_A2, 0x7c0, hash_mb2 << 6);
+       modify_instruction_site(&patch__hash_page_B, 0xffff, hmask);
+       modify_instruction_site(&patch__hash_page_C, 0xffff, hmask);
+ 
+       /*
+        * Patch up the instructions in hashtable.S:flush_hash_page
+        */
+       modify_instruction_site(&patch__flush_hash_A0, 0xffff,
+                               ((unsigned int)Hash - PAGE_OFFSET) >> 16);
+       modify_instruction_site(&patch__flush_hash_A1, 0x7c0, hash_mb << 6);
+       modify_instruction_site(&patch__flush_hash_A2, 0x7c0, hash_mb2 << 6);
+       modify_instruction_site(&patch__flush_hash_B, 0xffff, hmask);
+ }
+ 
+ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+                               phys_addr_t first_memblock_size)
+ {
+       /* We don't currently support the first MEMBLOCK not mapping 0
+        * physical on those processors
+        */
+       BUG_ON(first_memblock_base != 0);
+ 
+       /* 601 can only access 16MB at the moment */
+       if (PVR_VER(mfspr(SPRN_PVR)) == 1)
+               memblock_set_current_limit(min_t(u64, first_memblock_size, 0x01000000));
+       else /* Anything else has 256M mapped */
+               memblock_set_current_limit(min_t(u64, first_memblock_size, 0x10000000));
+ }
+ 
+ void __init print_system_hash_info(void)
+ {
+       pr_info("Hash_size         = 0x%lx\n", Hash_size);
+       if (Hash_mask)
+               pr_info("Hash_mask         = 0x%lx\n", Hash_mask);
+ }
+ 
+ #ifdef CONFIG_PPC_KUEP
+ void __init setup_kuep(bool disabled)
+ {
+       pr_info("Activating Kernel Userspace Execution Prevention\n");
+ 
+       if (cpu_has_feature(CPU_FTR_601))
+               pr_warn("KUEP is not working on powerpc 601 (No NX bit in Seg Regs)\n");
+ 
+       if (disabled)
+               pr_warn("KUEP cannot be disabled yet on 6xx when compiled in\n");
+ }
+ #endif
+ 
+ #ifdef CONFIG_PPC_KUAP
+ void __init setup_kuap(bool disabled)
+ {
+       pr_info("Activating Kernel Userspace Access Protection\n");
+ 
+       if (disabled)
+               pr_warn("KUAP cannot be disabled yet on 6xx when compiled in\n");
+ }
+ #endif
diff --cc arch/powerpc/mm/book3s64/iommu_api.c

index 0000000,e7a9c4f..8330f13

mode 000000,100644..100644
--- /dev/null
--- 2/arch/powerpc/mm/book3s64/iommu_api.c
+++ b/arch/powerpc/mm/book3s64/iommu_api.c
@@@ -1,0 -1,482 +1,501 @@@
- -      struct mm_iommu_table_group_mem_t *mem;
- -      long i, ret, locked_entries = 0;
+ /*
+  *  IOMMU helpers in MMU context.
+  *
+  *  Copyright (C) 2015 IBM Corp. <aik@ozlabs.ru>
+  *
+  *  This program is free software; you can redistribute it and/or
+  *  modify it under the terms of the GNU General Public License
+  *  as published by the Free Software Foundation; either version
+  *  2 of the License, or (at your option) any later version.
+  *
+  */
+ 
+ #include <linux/sched/signal.h>
+ #include <linux/slab.h>
+ #include <linux/rculist.h>
+ #include <linux/vmalloc.h>
+ #include <linux/mutex.h>
+ #include <linux/migrate.h>
+ #include <linux/hugetlb.h>
+ #include <linux/swap.h>
+ #include <linux/sizes.h>
+ #include <asm/mmu_context.h>
+ #include <asm/pte-walk.h>
+ #include <linux/mm_inline.h>
+ 
+ static DEFINE_MUTEX(mem_list_mutex);
+ 
+ #define MM_IOMMU_TABLE_GROUP_PAGE_DIRTY       0x1
+ #define MM_IOMMU_TABLE_GROUP_PAGE_MASK        ~(SZ_4K - 1)
+ 
+ struct mm_iommu_table_group_mem_t {
+       struct list_head next;
+       struct rcu_head rcu;
+       unsigned long used;
+       atomic64_t mapped;
+       unsigned int pageshift;
+       u64 ua;                 /* userspace address */
+       u64 entries;            /* number of entries in hpas/hpages[] */
+       /*
+        * in mm_iommu_get we temporarily use this to store
+        * struct page address.
+        *
+        * We need to convert ua to hpa in real mode. Make it
+        * simpler by storing physical address.
+        */
+       union {
+               struct page **hpages;   /* vmalloc'ed */
+               phys_addr_t *hpas;
+       };
+ #define MM_IOMMU_TABLE_INVALID_HPA    ((uint64_t)-1)
+       u64 dev_hpa;            /* Device memory base address */
+ };
+ 
+ static long mm_iommu_adjust_locked_vm(struct mm_struct *mm,
+               unsigned long npages, bool incr)
+ {
+       long ret = 0, locked, lock_limit;
+ 
+       if (!npages)
+               return 0;
+ 
+       down_write(&mm->mmap_sem);
+ 
+       if (incr) {
+               locked = mm->locked_vm + npages;
+               lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+               if (locked > lock_limit && !capable(CAP_IPC_LOCK))
+                       ret = -ENOMEM;
+               else
+                       mm->locked_vm += npages;
+       } else {
+               if (WARN_ON_ONCE(npages > mm->locked_vm))
+                       npages = mm->locked_vm;
+               mm->locked_vm -= npages;
+       }
+ 
+       pr_debug("[%d] RLIMIT_MEMLOCK HASH64 %c%ld %ld/%ld\n",
+                       current ? current->pid : 0,
+                       incr ? '+' : '-',
+                       npages << PAGE_SHIFT,
+                       mm->locked_vm << PAGE_SHIFT,
+                       rlimit(RLIMIT_MEMLOCK));
+       up_write(&mm->mmap_sem);
+ 
+       return ret;
+ }
+ 
+ bool mm_iommu_preregistered(struct mm_struct *mm)
+ {
+       return !list_empty(&mm->context.iommu_group_mem_list);
+ }
+ EXPORT_SYMBOL_GPL(mm_iommu_preregistered);
+ 
+ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
+                             unsigned long entries, unsigned long dev_hpa,
+                             struct mm_iommu_table_group_mem_t **pmem)
+ {
- -
- -      mutex_lock(&mem_list_mutex);
- -
- -      list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list,
- -                      next) {
- -              /* Overlap? */
- -              if ((mem->ua < (ua + (entries << PAGE_SHIFT))) &&
- -                              (ua < (mem->ua +
- -                                     (mem->entries << PAGE_SHIFT)))) {
- -                      ret = -EINVAL;
- -                      goto unlock_exit;
- -              }
- -
- -      }
++      struct mm_iommu_table_group_mem_t *mem, *mem2;
++      long i, ret, locked_entries = 0, pinned = 0;
+       unsigned int pageshift;
- -                      goto unlock_exit;
++      unsigned long entry, chunk;
+ 
+       if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) {
+               ret = mm_iommu_adjust_locked_vm(mm, entries, true);
+               if (ret)
- -      ret = get_user_pages_longterm(ua, entries, FOLL_WRITE, mem->hpages, NULL);
++                      return ret;
+ 
+               locked_entries = entries;
+       }
+ 
+       mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+       if (!mem) {
+               ret = -ENOMEM;
+               goto unlock_exit;
+       }
+ 
+       if (dev_hpa != MM_IOMMU_TABLE_INVALID_HPA) {
+               mem->pageshift = __ffs(dev_hpa | (entries << PAGE_SHIFT));
+               mem->dev_hpa = dev_hpa;
+               goto good_exit;
+       }
+       mem->dev_hpa = MM_IOMMU_TABLE_INVALID_HPA;
+ 
+       /*
+        * For a starting point for a maximum page size calculation
+        * we use @ua and @entries natural alignment to allow IOMMU pages
+        * smaller than huge pages but still bigger than PAGE_SIZE.
+        */
+       mem->pageshift = __ffs(ua | (entries << PAGE_SHIFT));
+       mem->hpas = vzalloc(array_size(entries, sizeof(mem->hpas[0])));
+       if (!mem->hpas) {
+               kfree(mem);
+               ret = -ENOMEM;
+               goto unlock_exit;
+       }
+ 
+       down_read(&mm->mmap_sem);
- -      if (ret != entries) {
- -              /* free the reference taken */
- -              for (i = 0; i < ret; i++)
- -                      put_page(mem->hpages[i]);
- -
- -              vfree(mem->hpas);
- -              kfree(mem);
- -              ret = -EFAULT;
- -              goto unlock_exit;
++      chunk = (1UL << (PAGE_SHIFT + MAX_ORDER - 1)) /
++                      sizeof(struct vm_area_struct *);
++      chunk = min(chunk, entries);
++      for (entry = 0; entry < entries; entry += chunk) {
++              unsigned long n = min(entries - entry, chunk);
++
++              ret = get_user_pages_longterm(ua + (entry << PAGE_SHIFT), n,
++                              FOLL_WRITE, mem->hpages + entry, NULL);
++              if (ret == n) {
++                      pinned += n;
++                      continue;
++              }
++              if (ret > 0)
++                      pinned += ret;
++              break;
++      }
+       up_read(&mm->mmap_sem);
- -      ret = 0;
++      if (pinned != entries) {
++              if (!ret)
++                      ret = -EFAULT;
++              goto free_exit;
+       }
+ 
+       pageshift = PAGE_SHIFT;
+       for (i = 0; i < entries; ++i) {
+               struct page *page = mem->hpages[i];
+ 
+               /*
+                * Allow to use larger than 64k IOMMU pages. Only do that
+                * if we are backed by hugetlb.
+                */
+               if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page)) {
+                       struct page *head = compound_head(page);
+ 
+                       pageshift = compound_order(head) + PAGE_SHIFT;
+               }
+               mem->pageshift = min(mem->pageshift, pageshift);
+               /*
+                * We don't need struct page reference any more, switch
+                * to physical address.
+                */
+               mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
+       }
+ 
+ good_exit:
- -      *pmem = mem;
+       atomic64_set(&mem->mapped, 1);
+       mem->used = 1;
+       mem->ua = ua;
+       mem->entries = entries;
- -      list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list);
+ 
- -unlock_exit:
- -      if (locked_entries && ret)
- -              mm_iommu_adjust_locked_vm(mm, locked_entries, false);
++      mutex_lock(&mem_list_mutex);
+ 
- -      unsigned long entries, dev_hpa;
++      list_for_each_entry_rcu(mem2, &mm->context.iommu_group_mem_list, next) {
++              /* Overlap? */
++              if ((mem2->ua < (ua + (entries << PAGE_SHIFT))) &&
++                              (ua < (mem2->ua +
++                                     (mem2->entries << PAGE_SHIFT)))) {
++                      ret = -EINVAL;
++                      mutex_unlock(&mem_list_mutex);
++                      goto free_exit;
++              }
++      }
++
++      list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list);
+ 
+       mutex_unlock(&mem_list_mutex);
+ 
++      *pmem = mem;
++
++      return 0;
++
++free_exit:
++      /* free the reference taken */
++      for (i = 0; i < pinned; i++)
++              put_page(mem->hpages[i]);
++
++      vfree(mem->hpas);
++      kfree(mem);
++
++unlock_exit:
++      mm_iommu_adjust_locked_vm(mm, locked_entries, false);
++
+       return ret;
+ }
+ 
+ long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
+               struct mm_iommu_table_group_mem_t **pmem)
+ {
+       return mm_iommu_do_alloc(mm, ua, entries, MM_IOMMU_TABLE_INVALID_HPA,
+                       pmem);
+ }
+ EXPORT_SYMBOL_GPL(mm_iommu_new);
+ 
+ long mm_iommu_newdev(struct mm_struct *mm, unsigned long ua,
+               unsigned long entries, unsigned long dev_hpa,
+               struct mm_iommu_table_group_mem_t **pmem)
+ {
+       return mm_iommu_do_alloc(mm, ua, entries, dev_hpa, pmem);
+ }
+ EXPORT_SYMBOL_GPL(mm_iommu_newdev);
+ 
+ static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem)
+ {
+       long i;
+       struct page *page = NULL;
+ 
+       if (!mem->hpas)
+               return;
+ 
+       for (i = 0; i < mem->entries; ++i) {
+               if (!mem->hpas[i])
+                       continue;
+ 
+               page = pfn_to_page(mem->hpas[i] >> PAGE_SHIFT);
+               if (!page)
+                       continue;
+ 
+               if (mem->hpas[i] & MM_IOMMU_TABLE_GROUP_PAGE_DIRTY)
+                       SetPageDirty(page);
+ 
+               put_page(page);
+               mem->hpas[i] = 0;
+       }
+ }
+ 
+ static void mm_iommu_do_free(struct mm_iommu_table_group_mem_t *mem)
+ {
+ 
+       mm_iommu_unpin(mem);
+       vfree(mem->hpas);
+       kfree(mem);
+ }
+ 
+ static void mm_iommu_free(struct rcu_head *head)
+ {
+       struct mm_iommu_table_group_mem_t *mem = container_of(head,
+                       struct mm_iommu_table_group_mem_t, rcu);
+ 
+       mm_iommu_do_free(mem);
+ }
+ 
+ static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem)
+ {
+       list_del_rcu(&mem->next);
+       call_rcu(&mem->rcu, mm_iommu_free);
+ }
+ 
+ long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
+ {
+       long ret = 0;
- -      entries = mem->entries;
- -      dev_hpa = mem->dev_hpa;
++      unsigned long unlock_entries = 0;
+ 
+       mutex_lock(&mem_list_mutex);
+ 
+       if (mem->used == 0) {
+               ret = -ENOENT;
+               goto unlock_exit;
+       }
+ 
+       --mem->used;
+       /* There are still users, exit */
+       if (mem->used)
+               goto unlock_exit;
+ 
+       /* Are there still mappings? */
+       if (atomic_cmpxchg(&mem->mapped, 1, 0) != 1) {
+               ++mem->used;
+               ret = -EBUSY;
+               goto unlock_exit;
+       }
+ 
++      if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
++              unlock_entries = mem->entries;
++
+       /* @mapped became 0 so now mappings are disabled, release the region */
- -      if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
- -              mm_iommu_adjust_locked_vm(mm, entries, false);
- -
+       mm_iommu_release(mem);
+ 
+ unlock_exit:
+       mutex_unlock(&mem_list_mutex);
+ 
++      mm_iommu_adjust_locked_vm(mm, unlock_entries, false);
++
+       return ret;
+ }
+ EXPORT_SYMBOL_GPL(mm_iommu_put);
+ 
+ struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm,
+               unsigned long ua, unsigned long size)
+ {
+       struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
+ 
+       list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
+               if ((mem->ua <= ua) &&
+                               (ua + size <= mem->ua +
+                                (mem->entries << PAGE_SHIFT))) {
+                       ret = mem;
+                       break;
+               }
+       }
+ 
+       return ret;
+ }
+ EXPORT_SYMBOL_GPL(mm_iommu_lookup);
+ 
+ struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm,
+               unsigned long ua, unsigned long size)
+ {
+       struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
+ 
+       list_for_each_entry_lockless(mem, &mm->context.iommu_group_mem_list,
+                       next) {
+               if ((mem->ua <= ua) &&
+                               (ua + size <= mem->ua +
+                                (mem->entries << PAGE_SHIFT))) {
+                       ret = mem;
+                       break;
+               }
+       }
+ 
+       return ret;
+ }
+ 
+ struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm,
+               unsigned long ua, unsigned long entries)
+ {
+       struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
+ 
+       mutex_lock(&mem_list_mutex);
+ 
+       list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
+               if ((mem->ua == ua) && (mem->entries == entries)) {
+                       ret = mem;
+                       ++mem->used;
+                       break;
+               }
+       }
+ 
+       mutex_unlock(&mem_list_mutex);
+ 
+       return ret;
+ }
+ EXPORT_SYMBOL_GPL(mm_iommu_get);
+ 
+ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
+               unsigned long ua, unsigned int pageshift, unsigned long *hpa)
+ {
+       const long entry = (ua - mem->ua) >> PAGE_SHIFT;
+       u64 *va;
+ 
+       if (entry >= mem->entries)
+               return -EFAULT;
+ 
+       if (pageshift > mem->pageshift)
+               return -EFAULT;
+ 
+       if (!mem->hpas) {
+               *hpa = mem->dev_hpa + (ua - mem->ua);
+               return 0;
+       }
+ 
+       va = &mem->hpas[entry];
+       *hpa = (*va & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);
+ 
+       return 0;
+ }
+ EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa);
+ 
+ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
+               unsigned long ua, unsigned int pageshift, unsigned long *hpa)
+ {
+       const long entry = (ua - mem->ua) >> PAGE_SHIFT;
+       unsigned long *pa;
+ 
+       if (entry >= mem->entries)
+               return -EFAULT;
+ 
+       if (pageshift > mem->pageshift)
+               return -EFAULT;
+ 
+       if (!mem->hpas) {
+               *hpa = mem->dev_hpa + (ua - mem->ua);
+               return 0;
+       }
+ 
+       pa = (void *) vmalloc_to_phys(&mem->hpas[entry]);
+       if (!pa)
+               return -EFAULT;
+ 
+       *hpa = (*pa & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);
+ 
+       return 0;
+ }
+ 
+ extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua)
+ {
+       struct mm_iommu_table_group_mem_t *mem;
+       long entry;
+       void *va;
+       unsigned long *pa;
+ 
+       mem = mm_iommu_lookup_rm(mm, ua, PAGE_SIZE);
+       if (!mem)
+               return;
+ 
+       if (mem->dev_hpa != MM_IOMMU_TABLE_INVALID_HPA)
+               return;
+ 
+       entry = (ua - mem->ua) >> PAGE_SHIFT;
+       va = &mem->hpas[entry];
+ 
+       pa = (void *) vmalloc_to_phys(va);
+       if (!pa)
+               return;
+ 
+       *pa |= MM_IOMMU_TABLE_GROUP_PAGE_DIRTY;
+ }
+ 
+ bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
+               unsigned int pageshift, unsigned long *size)
+ {
+       struct mm_iommu_table_group_mem_t *mem;
+       unsigned long end;
+ 
+       list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
+               if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
+                       continue;
+ 
+               end = mem->dev_hpa + (mem->entries << PAGE_SHIFT);
+               if ((mem->dev_hpa <= hpa) && (hpa < end)) {
+                       /*
+                        * Since the IOMMU page size might be bigger than
+                        * PAGE_SIZE, the amount of preregistered memory
+                        * starting from @hpa might be smaller than 1<<pageshift
+                        * and the caller needs to distinguish this situation.
+                        */
+                       *size = min(1UL << pageshift, end - hpa);
+                       return true;
+               }
+       }
+ 
+       return false;
+ }
+ EXPORT_SYMBOL_GPL(mm_iommu_is_devmem);
+ 
+ long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem)
+ {
+       if (atomic64_inc_not_zero(&mem->mapped))
+               return 0;
+ 
+       /* Last mm_iommu_put() has been called, no more mappings allowed() */
+       return -ENXIO;
+ }
+ EXPORT_SYMBOL_GPL(mm_iommu_mapped_inc);
+ 
+ void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem)
+ {
+       atomic64_add_unless(&mem->mapped, -1, 1);
+ }
+ EXPORT_SYMBOL_GPL(mm_iommu_mapped_dec);
+ 
+ void mm_iommu_init(struct mm_struct *mm)
+ {
+       INIT_LIST_HEAD_RCU(&mm->context.iommu_group_mem_list);
+ }
diff --cc arch/powerpc/platforms/Kconfig.cputype

index 50cd09b,fa6b032..d0e172d
--- 1/arch/powerpc/platforms/Kconfig.cputype
--- 2/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@@ -324,8 -330,10 +330,10 @@@ config ARCH_ENABLE_SPLIT_PMD_PTLOC
   
   config PPC_RADIX_MMU
         bool "Radix MMU Support"
- -      depends on PPC_BOOK3S_64
+ +      depends on PPC_BOOK3S_64 && HUGETLB_PAGE
         select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
+       select PPC_HAVE_KUEP
+       select PPC_HAVE_KUAP
         default y
         help
           Enable support for the Power ISA 3.0 Radix style MMU. Currently this
diff --cc arch/powerpc/xmon/xmon.c

index 13c6a47,3e7be19..1b0149b
--- 1/arch/powerpc/xmon/xmon.c
--- 2/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@@ -2429,12 -2455,8 +2455,11 @@@ static void dump_one_paca(int cpu
         DUMP(p, trap_save, "%#-*x");
         DUMP(p, irq_soft_mask, "%#-*x");
         DUMP(p, irq_happened, "%#-*x");
- -      DUMP(p, io_sync, "%#-*x");
+ +#ifdef CONFIG_MMIOWB
+ +      DUMP(p, mmiowb_state.nesting_count, "%#-*x");
+ +      DUMP(p, mmiowb_state.mmiowb_pending, "%#-*x");
+ +#endif
         DUMP(p, irq_work_pending, "%#-*x");
-       DUMP(p, nap_state_lost, "%#-*x");
         DUMP(p, sprg_vdso, "%#-*llx");
   
   #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
diff --cc include/linux/cpuhotplug.h
Simple merge
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 10 May 2019 12:29:27 +0000 (05:29 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 10 May 2019 12:29:27 +0000 (05:29 -0700)
		1	2
Documentation/admin-guide/kernel-parameters.txt	patch \|	diff1 \|	diff2 \|	blob \| history
MAINTAINERS	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/configs/skiroot_defconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/include/asm/paca.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kernel/security.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kernel/setup_64.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kvm/book3s_hv.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/mm/book3s32/mmu.c	patch \|	\|	diff2 \|	blob \| history
arch/powerpc/mm/book3s64/iommu_api.c	patch \|	\|	diff2 \|	blob \| history
arch/powerpc/platforms/Kconfig.cputype	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/xmon/xmon.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/cpuhotplug.h	patch \|	diff1 \|	diff2 \|	blob \| history