mm: do_wp_page() simplification

[linux-2.6-microblaze.git] / mm / hugetlb.c
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index bcabbe0..590111e 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -31,7 +31,6 @@
  #include <linux/cma.h>
  
  #include <asm/page.h>
-#include <asm/pgtable.h>
  #include <asm/tlb.h>
  
  #include <linux/io.h>
@@ -46,7 +45,10 @@ int hugetlb_max_hstate __read_mostly;
  unsigned int default_hstate_idx;
  struct hstate hstates[HUGE_MAX_HSTATE];
  
+#ifdef CONFIG_CMA
  static struct cma *hugetlb_cma[MAX_NUMNODES];
+#endif
+static unsigned long hugetlb_cma_size __initdata;
  
  /*
   * Minimum page order among possible hugepage sizes, set to a proper value
@@ -59,8 +61,8 @@ __initdata LIST_HEAD(huge_boot_pages);
  /* for command line parsing */
  static struct hstate * __initdata parsed_hstate;
  static unsigned long __initdata default_hstate_max_huge_pages;
-static unsigned long __initdata default_hstate_size;
  static bool __initdata parsed_valid_hugepagesz = true;
+static bool __initdata parsed_default_hugepagesz;
  
  /*
   * Protects updates to hugepage_freelists, hugepage_activelist, nr_huge_pages,
@@ -85,7 +87,7 @@ static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
         spin_unlock(&spool->lock);
  
         /* If no pages are used, and no other handles to the subpool
-        * remain, give up any reservations mased on minimum size and
+        * remain, give up any reservations based on minimum size and
          * free the subpool */
         if (free) {
                 if (spool->min_hpages != -1)
@@ -133,7 +135,7 @@ void hugepage_put_subpool(struct hugepage_subpool *spool)
   * the request.  Otherwise, return the number of pages by which the
   * global pools must be adjusted (upward).  The returned value may
   * only be different than the passed value (delta) in the case where
- * a subpool minimum size must be manitained.
+ * a subpool minimum size must be maintained.
   */
  static long hugepage_subpool_get_pages(struct hugepage_subpool *spool,
                                       long delta)
@@ -473,7 +475,7 @@ out_of_memory:
   *
   * Return the number of new huge pages added to the map.  This number is greater
   * than or equal to zero.  If file_region entries needed to be allocated for
- * this operation and we were not able to allocate, it ruturns -ENOMEM.
+ * this operation and we were not able to allocate, it returns -ENOMEM.
   * region_add of regions of length 1 never allocate file_regions and cannot
   * fail; region_chg will always allocate at least 1 entry and a region_add for
   * 1 page will only require at most 1 entry.
@@ -988,7 +990,7 @@ static bool vma_has_reserves(struct vm_area_struct *vma, long chg)
                  * We know VM_NORESERVE is not set.  Therefore, there SHOULD
                  * be a region map for all pages.  The only situation where
                  * there is no region map is if a hole was punched via
-                * fallocate.  In this case, there really are no reverves to
+                * fallocate.  In this case, there really are no reserves to
                  * use.  This situation is indicated if chg != 0.
                  */
                 if (chg)
@@ -1236,9 +1238,10 @@ static void free_gigantic_page(struct page *page, unsigned int order)
          * If the page isn't allocated using the cma allocator,
          * cma_release() returns false.
          */
-       if (IS_ENABLED(CONFIG_CMA) &&
-           cma_release(hugetlb_cma[page_to_nid(page)], page, 1 << order))
+#ifdef CONFIG_CMA
+       if (cma_release(hugetlb_cma[page_to_nid(page)], page, 1 << order))
                 return;
+#endif
  
         free_contig_range(page_to_pfn(page), 1 << order);
  }
@@ -1249,7 +1252,8 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
  {
         unsigned long nr_pages = 1UL << huge_page_order(h);
  
-       if (IS_ENABLED(CONFIG_CMA)) {
+#ifdef CONFIG_CMA
+       {
                 struct page *page;
                 int node;
  
@@ -1263,6 +1267,7 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
                                 return page;
                 }
         }
+#endif
  
         return alloc_contig_pages(nr_pages, gfp_mask, nid, nodemask);
  }
@@ -1519,7 +1524,7 @@ static void prep_compound_gigantic_page(struct page *page, unsigned int order)
                  * For gigantic hugepages allocated through bootmem at
                  * boot, it's safer to be consistent with the not-gigantic
                  * hugepages and clear the PG_reserved bit from all tail pages
-                * too.  Otherwse drivers using get_user_pages() to access tail
+                * too.  Otherwise drivers using get_user_pages() to access tail
                  * pages may get the reference counting wrong if they see
                  * PG_reserved set on a tail page (despite the head page not
                  * having PG_reserved set).  Enforcing this consistency between
@@ -1594,7 +1599,7 @@ static struct address_space *_get_hugetlb_page_mapping(struct page *hpage)
  
         /* Use first found vma */
         pgoff_start = page_to_pgoff(hpage);
-       pgoff_end = pgoff_start + hpage_nr_pages(hpage) - 1;
+       pgoff_end = pgoff_start + pages_per_huge_page(page_hstate(hpage)) - 1;
         anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root,
                                         pgoff_start, pgoff_end) {
                 struct vm_area_struct *vma = avc->vma;
@@ -2572,7 +2577,7 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
  
         for (i = 0; i < h->max_huge_pages; ++i) {
                 if (hstate_is_gigantic(h)) {
-                       if (IS_ENABLED(CONFIG_CMA) && hugetlb_cma[0]) {
+                       if (hugetlb_cma_size) {
                                 pr_warn_once("HugeTLB: hugetlb_cma is enabled, skip boot time allocation\n");
                                 break;
                         }
@@ -3060,7 +3065,7 @@ static void __init hugetlb_sysfs_init(void)
                 err = hugetlb_sysfs_add_hstate(h, hugepages_kobj,
                                          hstate_kobjs, &hstate_attr_group);
                 if (err)
-                       pr_err("Hugetlb: Unable to add hstate %s", h->name);
+                       pr_err("HugeTLB: Unable to add hstate %s", h->name);
         }
  }
  
@@ -3164,7 +3169,7 @@ static void hugetlb_register_node(struct node *node)
                                                 nhs->hstate_kobjs,
                                                 &per_node_hstate_attr_group);
                 if (err) {
-                       pr_err("Hugetlb: Unable to add hstate %s for node %d\n",
+                       pr_err("HugeTLB: Unable to add hstate %s for node %d\n",
                                 h->name, node->dev.id);
                         hugetlb_unregister_node(node);
                         break;
@@ -3212,23 +3217,41 @@ static int __init hugetlb_init(void)
  {
         int i;
  
-       if (!hugepages_supported())
+       if (!hugepages_supported()) {
+               if (hugetlb_max_hstate || default_hstate_max_huge_pages)
+                       pr_warn("HugeTLB: huge pages not supported, ignoring associated command-line parameters\n");
                 return 0;
+       }
  
-       if (!size_to_hstate(default_hstate_size)) {
-               if (default_hstate_size != 0) {
-                       pr_err("HugeTLB: unsupported default_hugepagesz %lu. Reverting to %lu\n",
-                              default_hstate_size, HPAGE_SIZE);
+       /*
+        * Make sure HPAGE_SIZE (HUGETLB_PAGE_ORDER) hstate exists.  Some
+        * architectures depend on setup being done here.
+        */
+       hugetlb_add_hstate(HUGETLB_PAGE_ORDER);
+       if (!parsed_default_hugepagesz) {
+               /*
+                * If we did not parse a default huge page size, set
+                * default_hstate_idx to HPAGE_SIZE hstate. And, if the
+                * number of huge pages for this default size was implicitly
+                * specified, set that here as well.
+                * Note that the implicit setting will overwrite an explicit
+                * setting.  A warning will be printed in this case.
+                */
+               default_hstate_idx = hstate_index(size_to_hstate(HPAGE_SIZE));
+               if (default_hstate_max_huge_pages) {
+                       if (default_hstate.max_huge_pages) {
+                               char buf[32];
+
+                               string_get_size(huge_page_size(&default_hstate),
+                                       1, STRING_UNITS_2, buf, 32);
+                               pr_warn("HugeTLB: Ignoring hugepages=%lu associated with %s page size\n",
+                                       default_hstate.max_huge_pages, buf);
+                               pr_warn("HugeTLB: Using hugepages=%lu for number of default huge pages\n",
+                                       default_hstate_max_huge_pages);
+                       }
+                       default_hstate.max_huge_pages =
+                               default_hstate_max_huge_pages;
                 }
-
-               default_hstate_size = HPAGE_SIZE;
-               if (!size_to_hstate(default_hstate_size))
-                       hugetlb_add_hstate(HUGETLB_PAGE_ORDER);
-       }
-       default_hstate_idx = hstate_index(size_to_hstate(default_hstate_size));
-       if (default_hstate_max_huge_pages) {
-               if (!default_hstate.max_huge_pages)
-                       default_hstate.max_huge_pages = default_hstate_max_huge_pages;
         }
  
         hugetlb_cma_check();
@@ -3256,10 +3279,10 @@ static int __init hugetlb_init(void)
  }
  subsys_initcall(hugetlb_init);
  
-/* Should be called on processing a hugepagesz=... option */
-void __init hugetlb_bad_size(void)
+/* Overwritten by architectures with more huge page sizes */
+bool __init __attribute((weak)) arch_hugetlb_valid_size(unsigned long size)
  {
-       parsed_valid_hugepagesz = false;
+       return size == HPAGE_SIZE;
  }
  
  void __init hugetlb_add_hstate(unsigned int order)
@@ -3268,7 +3291,6 @@ void __init hugetlb_add_hstate(unsigned int order)
         unsigned long i;
  
         if (size_to_hstate(PAGE_SIZE << order)) {
-               pr_warn("hugepagesz= specified twice, ignoring\n");
                 return;
         }
         BUG_ON(hugetlb_max_hstate >= HUGE_MAX_HSTATE);
@@ -3289,20 +3311,29 @@ void __init hugetlb_add_hstate(unsigned int order)
         parsed_hstate = h;
  }
  
-static int __init hugetlb_nrpages_setup(char *s)
+/*
+ * hugepages command line processing
+ * hugepages normally follows a valid hugepagsz or default_hugepagsz
+ * specification.  If not, ignore the hugepages value.  hugepages can also
+ * be the first huge page command line  option in which case it implicitly
+ * specifies the number of huge pages for the default size.
+ */
+static int __init hugepages_setup(char *s)
  {
         unsigned long *mhp;
         static unsigned long *last_mhp;
  
         if (!parsed_valid_hugepagesz) {
-               pr_warn("hugepages = %s preceded by "
-                       "an unsupported hugepagesz, ignoring\n", s);
+               pr_warn("HugeTLB: hugepages=%s does not follow a valid hugepagesz, ignoring\n", s);
                 parsed_valid_hugepagesz = true;
-               return 1;
+               return 0;
         }
+
         /*
-        * !hugetlb_max_hstate means we haven't parsed a hugepagesz= parameter yet,
-        * so this hugepages= parameter goes to the "default hstate".
+        * !hugetlb_max_hstate means we haven't parsed a hugepagesz= parameter
+        * yet, so this hugepages= parameter goes to the "default hstate".
+        * Otherwise, it goes with the previously parsed hugepagesz or
+        * default_hugepagesz.
          */
         else if (!hugetlb_max_hstate)
                 mhp = &default_hstate_max_huge_pages;
@@ -3310,8 +3341,8 @@ static int __init hugetlb_nrpages_setup(char *s)
                 mhp = &parsed_hstate->max_huge_pages;
  
         if (mhp == last_mhp) {
-               pr_warn("hugepages= specified twice without interleaving hugepagesz=, ignoring\n");
-               return 1;
+               pr_warn("HugeTLB: hugepages= specified twice without interleaving hugepagesz=, ignoring hugepages=%s\n", s);
+               return 0;
         }
  
         if (sscanf(s, "%lu", mhp) <= 0)
@@ -3329,14 +3360,102 @@ static int __init hugetlb_nrpages_setup(char *s)
  
         return 1;
  }
-__setup("hugepages=", hugetlb_nrpages_setup);
+__setup("hugepages=", hugepages_setup);
  
-static int __init hugetlb_default_setup(char *s)
+/*
+ * hugepagesz command line processing
+ * A specific huge page size can only be specified once with hugepagesz.
+ * hugepagesz is followed by hugepages on the command line.  The global
+ * variable 'parsed_valid_hugepagesz' is used to determine if prior
+ * hugepagesz argument was valid.
+ */
+static int __init hugepagesz_setup(char *s)
  {
-       default_hstate_size = memparse(s, &s);
+       unsigned long size;
+       struct hstate *h;
+
+       parsed_valid_hugepagesz = false;
+       size = (unsigned long)memparse(s, NULL);
+
+       if (!arch_hugetlb_valid_size(size)) {
+               pr_err("HugeTLB: unsupported hugepagesz=%s\n", s);
+               return 0;
+       }
+
+       h = size_to_hstate(size);
+       if (h) {
+               /*
+                * hstate for this size already exists.  This is normally
+                * an error, but is allowed if the existing hstate is the
+                * default hstate.  More specifically, it is only allowed if
+                * the number of huge pages for the default hstate was not
+                * previously specified.
+                */
+               if (!parsed_default_hugepagesz ||  h != &default_hstate ||
+                   default_hstate.max_huge_pages) {
+                       pr_warn("HugeTLB: hugepagesz=%s specified twice, ignoring\n", s);
+                       return 0;
+               }
+
+               /*
+                * No need to call hugetlb_add_hstate() as hstate already
+                * exists.  But, do set parsed_hstate so that a following
+                * hugepages= parameter will be applied to this hstate.
+                */
+               parsed_hstate = h;
+               parsed_valid_hugepagesz = true;
+               return 1;
+       }
+
+       hugetlb_add_hstate(ilog2(size) - PAGE_SHIFT);
+       parsed_valid_hugepagesz = true;
         return 1;
  }
-__setup("default_hugepagesz=", hugetlb_default_setup);
+__setup("hugepagesz=", hugepagesz_setup);
+
+/*
+ * default_hugepagesz command line input
+ * Only one instance of default_hugepagesz allowed on command line.
+ */
+static int __init default_hugepagesz_setup(char *s)
+{
+       unsigned long size;
+
+       parsed_valid_hugepagesz = false;
+       if (parsed_default_hugepagesz) {
+               pr_err("HugeTLB: default_hugepagesz previously specified, ignoring %s\n", s);
+               return 0;
+       }
+
+       size = (unsigned long)memparse(s, NULL);
+
+       if (!arch_hugetlb_valid_size(size)) {
+               pr_err("HugeTLB: unsupported default_hugepagesz=%s\n", s);
+               return 0;
+       }
+
+       hugetlb_add_hstate(ilog2(size) - PAGE_SHIFT);
+       parsed_valid_hugepagesz = true;
+       parsed_default_hugepagesz = true;
+       default_hstate_idx = hstate_index(size_to_hstate(size));
+
+       /*
+        * The number of default huge pages (for this size) could have been
+        * specified as the first hugetlb parameter: hugepages=X.  If so,
+        * then default_hstate_max_huge_pages is set.  If the default huge
+        * page size is gigantic (>= MAX_ORDER), then the pages must be
+        * allocated here from bootmem allocator.
+        */
+       if (default_hstate_max_huge_pages) {
+               default_hstate.max_huge_pages = default_hstate_max_huge_pages;
+               if (hstate_is_gigantic(&default_hstate))
+                       hugetlb_hstate_alloc_pages(&default_hstate);
+               default_hstate_max_huge_pages = 0;
+       }
+
+       return 1;
+}
+__setup("default_hugepagesz=", default_hugepagesz_setup);
  
  static unsigned int cpuset_mems_nr(unsigned int *array)
  {
@@ -3352,7 +3471,7 @@ static unsigned int cpuset_mems_nr(unsigned int *array)
  #ifdef CONFIG_SYSCTL
  static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
                          struct ctl_table *table, int write,
-                        void __user *buffer, size_t *length, loff_t *ppos)
+                        void *buffer, size_t *length, loff_t *ppos)
  {
         struct hstate *h = &default_hstate;
         unsigned long tmp = h->max_huge_pages;
@@ -3375,7 +3494,7 @@ out:
  }
  
  int hugetlb_sysctl_handler(struct ctl_table *table, int write,
-                         void __user *buffer, size_t *length, loff_t *ppos)
+                         void *buffer, size_t *length, loff_t *ppos)
  {
  
         return hugetlb_sysctl_handler_common(false, table, write,
@@ -3384,7 +3503,7 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write,
  
  #ifdef CONFIG_NUMA
  int hugetlb_mempolicy_sysctl_handler(struct ctl_table *table, int write,
-                         void __user *buffer, size_t *length, loff_t *ppos)
+                         void *buffer, size_t *length, loff_t *ppos)
  {
         return hugetlb_sysctl_handler_common(true, table, write,
                                                         buffer, length, ppos);
@@ -3392,8 +3511,7 @@ int hugetlb_mempolicy_sysctl_handler(struct ctl_table *table, int write,
  #endif /* CONFIG_NUMA */
  
  int hugetlb_overcommit_handler(struct ctl_table *table, int write,
-                       void __user *buffer,
-                       size_t *length, loff_t *ppos)
+               void *buffer, size_t *length, loff_t *ppos)
  {
         struct hstate *h = &default_hstate;
         unsigned long tmp;
@@ -4466,9 +4584,9 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
         /*
          * entry could be a migration/hwpoison entry at this point, so this
          * check prevents the kernel from going below assuming that we have
-        * a active hugepage in pagecache. This goto expects the 2nd page fault,
-        * and is_hugetlb_entry_(migration|hwpoisoned) check will properly
-        * handle it.
+        * an active hugepage in pagecache. This goto expects the 2nd page
+        * fault, and is_hugetlb_entry_(migration|hwpoisoned) check will
+        * properly handle it.
          */
         if (!pte_present(entry))
                 goto out_mutex;
@@ -4583,7 +4701,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
                                                 (const void __user *) src_addr,
                                                 pages_per_huge_page(h), false);
  
-               /* fallback to copy_from_user outside mmap_sem */
+               /* fallback to copy_from_user outside mmap_lock */
                 if (unlikely(ret)) {
                         ret = -ENOENT;
                         *pagep = page;
@@ -5355,8 +5473,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
   * huge_pte_offset() - Walk the page table to resolve the hugepage
   * entry at address @addr
   *
- * Return: Pointer to page table or swap entry (PUD or PMD) for
- * address @addr, or NULL if a p*d_none() entry is encountered and the
+ * Return: Pointer to page table entry (PUD or PMD) for
+ * address @addr, or NULL if a !p*d_present() entry is encountered and the
   * size @sz doesn't match the hugepage size at this level of the page
   * table.
   */
@@ -5365,8 +5483,8 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
  {
         pgd_t *pgd;
         p4d_t *p4d;
-       pud_t *pud, pud_entry;
-       pmd_t *pmd, pmd_entry;
+       pud_t *pud;
+       pmd_t *pmd;
  
         pgd = pgd_offset(mm, addr);
         if (!pgd_present(*pgd))
@@ -5376,22 +5494,16 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
                 return NULL;
  
         pud = pud_offset(p4d, addr);
-       pud_entry = READ_ONCE(*pud);
-       if (sz != PUD_SIZE && pud_none(pud_entry))
-               return NULL;
-       /* hugepage or swap? */
-       if (pud_huge(pud_entry) || !pud_present(pud_entry))
+       if (sz == PUD_SIZE)
+               /* must be pud huge, non-present or none */
                 return (pte_t *)pud;
-
-       pmd = pmd_offset(pud, addr);
-       pmd_entry = READ_ONCE(*pmd);
-       if (sz != PMD_SIZE && pmd_none(pmd_entry))
+       if (!pud_present(*pud))
                 return NULL;
-       /* hugepage or swap? */
-       if (pmd_huge(pmd_entry) || !pmd_present(pmd_entry))
-               return (pte_t *)pmd;
+       /* must have a valid entry and size to go further */
  
-       return NULL;
+       pmd = pmd_offset(pud, addr);
+       /* must be pmd huge, non-present or none */
+       return (pte_t *)pmd;
  }
  
  #endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */
@@ -5548,7 +5660,6 @@ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason)
  }
  
  #ifdef CONFIG_CMA
-static unsigned long hugetlb_cma_size __initdata;
  static bool cma_reserve_called __initdata;
  
  static int __init cmdline_parse_hugetlb_cma(char *p)