X-Git-Url: http://git.monstr.eu/?a=blobdiff_plain;f=mm%2Fhugetlb.c;h=a301c2d672bf5246f1ec4bf709374b3b78489189;hb=ba9086a6df1ea8bb2bbed7a92f14bfddc10fb8a7;hp=e52c878940bb06edd23803344b9e15e859112f26;hpb=049eb096da48db0421dd5e358b9b082a1a8a2025;p=linux-2.6-microblaze.git diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e52c878940bb..a301c2d672bf 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -133,7 +134,7 @@ void hugepage_put_subpool(struct hugepage_subpool *spool) /* * Subpool accounting for allocating and reserving pages. * Return -ENOMEM if there are not enough resources to satisfy the - * the request. Otherwise, return the number of pages by which the + * request. Otherwise, return the number of pages by which the * global pools must be adjusted (upward). The returned value may * only be different than the passed value (delta) in the case where * a subpool minimum size must be maintained. @@ -1040,10 +1041,16 @@ static void enqueue_huge_page(struct hstate *h, struct page *page) static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid) { struct page *page; + bool nocma = !!(current->flags & PF_MEMALLOC_NOCMA); + + list_for_each_entry(page, &h->hugepage_freelists[nid], lru) { + if (nocma && is_migrate_cma_page(page)) + continue; - list_for_each_entry(page, &h->hugepage_freelists[nid], lru) if (!PageHWPoison(page)) break; + } + /* * if 'non-isolated free hugepage' not found on the list, * the allocation fails. @@ -1093,15 +1100,6 @@ retry_cpuset: return NULL; } -/* Movability of hugepages depends on migration support. */ -static inline gfp_t htlb_alloc_mask(struct hstate *h) -{ - if (hugepage_movable_supported(h)) - return GFP_HIGHUSER_MOVABLE; - else - return GFP_HIGHUSER; -} - static struct page *dequeue_huge_page_vma(struct hstate *h, struct vm_area_struct *vma, unsigned long address, int avoid_reserve, @@ -1944,7 +1942,7 @@ out_unlock: return page; } -struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask, +static struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask, int nid, nodemask_t *nmask) { struct page *page; @@ -1985,32 +1983,10 @@ struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h, return page; } -/* page migration callback function */ -struct page *alloc_huge_page_node(struct hstate *h, int nid) -{ - gfp_t gfp_mask = htlb_alloc_mask(h); - struct page *page = NULL; - - if (nid != NUMA_NO_NODE) - gfp_mask |= __GFP_THISNODE; - - spin_lock(&hugetlb_lock); - if (h->free_huge_pages - h->resv_huge_pages > 0) - page = dequeue_huge_page_nodemask(h, gfp_mask, nid, NULL); - spin_unlock(&hugetlb_lock); - - if (!page) - page = alloc_migrate_huge_page(h, gfp_mask, nid, NULL); - - return page; -} - /* page migration callback function */ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, - nodemask_t *nmask) + nodemask_t *nmask, gfp_t gfp_mask) { - gfp_t gfp_mask = htlb_alloc_mask(h); - spin_lock(&hugetlb_lock); if (h->free_huge_pages - h->resv_huge_pages > 0) { struct page *page; @@ -2038,7 +2014,7 @@ struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma, gfp_mask = htlb_alloc_mask(h); node = huge_node(vma, address, gfp_mask, &mpol, &nodemask); - page = alloc_huge_page_nodemask(h, node, nodemask); + page = alloc_huge_page_nodemask(h, node, nodemask, gfp_mask); mpol_cond_put(mpol); return page; @@ -2167,7 +2143,7 @@ static void return_unused_surplus_pages(struct hstate *h, * evenly across all nodes with memory. Iterate across these nodes * until we can no longer free unreserved surplus pages. This occurs * when the nodes with surplus pages have no free pages. - * free_pool_huge_page() will balance the the freed pages across the + * free_pool_huge_page() will balance the freed pages across the * on-line nodes with memory and will handle the hstate accounting. * * Note that we decrement resv_huge_pages as we free the pages. If @@ -3458,13 +3434,21 @@ static int __init default_hugepagesz_setup(char *s) } __setup("default_hugepagesz=", default_hugepagesz_setup); -static unsigned int cpuset_mems_nr(unsigned int *array) +static unsigned int allowed_mems_nr(struct hstate *h) { int node; unsigned int nr = 0; + nodemask_t *mpol_allowed; + unsigned int *array = h->free_huge_pages_node; + gfp_t gfp_mask = htlb_alloc_mask(h); - for_each_node_mask(node, cpuset_current_mems_allowed) - nr += array[node]; + mpol_allowed = policy_nodemask_current(gfp_mask); + + for_each_node_mask(node, cpuset_current_mems_allowed) { + if (!mpol_allowed || + (mpol_allowed && node_isset(node, *mpol_allowed))) + nr += array[node]; + } return nr; } @@ -3643,12 +3627,18 @@ static int hugetlb_acct_memory(struct hstate *h, long delta) * we fall back to check against current free page availability as * a best attempt and hopefully to minimize the impact of changing * semantics that cpuset has. + * + * Apart from cpuset, we also have memory policy mechanism that + * also determines from which node the kernel will allocate memory + * in a NUMA system. So similar to cpuset, we also should consider + * the memory policy of the current task. Similar to the description + * above. */ if (delta > 0) { if (gather_surplus_pages(h, delta) < 0) goto out; - if (delta > cpuset_mems_nr(h->free_huge_pages_node)) { + if (delta > allowed_mems_nr(h)) { return_unused_surplus_pages(h, delta); goto out; } @@ -3953,7 +3943,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, continue; ptl = huge_pte_lock(h, mm, ptep); - if (huge_pmd_unshare(mm, &address, ptep)) { + if (huge_pmd_unshare(mm, vma, &address, ptep)) { spin_unlock(ptl); /* * We just unmapped a page of PMDs by clearing a PUD. @@ -4540,10 +4530,6 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) return VM_FAULT_HWPOISON_LARGE | VM_FAULT_SET_HINDEX(hstate_index(h)); - } else { - ptep = huge_pte_alloc(mm, haddr, huge_page_size(h)); - if (!ptep) - return VM_FAULT_OOM; } /* @@ -5020,7 +5006,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, if (!ptep) continue; ptl = huge_pte_lock(h, mm, ptep); - if (huge_pmd_unshare(mm, &address, ptep)) { + if (huge_pmd_unshare(mm, vma, &address, ptep)) { pages++; spin_unlock(ptl); shared_pmd = true; @@ -5401,12 +5387,14 @@ out: * returns: 1 successfully unmapped a shared pte page * 0 the underlying pte page is not shared, or it is the last user */ -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long *addr, pte_t *ptep) { pgd_t *pgd = pgd_offset(mm, *addr); p4d_t *p4d = p4d_offset(pgd, *addr); pud_t *pud = pud_offset(p4d, *addr); + i_mmap_assert_write_locked(vma->vm_file->f_mapping); BUG_ON(page_count(virt_to_page(ptep)) == 0); if (page_count(virt_to_page(ptep)) == 1) return 0; @@ -5424,7 +5412,8 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) return NULL; } -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long *addr, pte_t *ptep) { return 0; } @@ -5694,12 +5683,14 @@ void __init hugetlb_cma_reserve(int order) reserved = 0; for_each_node_state(nid, N_ONLINE) { int res; + char name[20]; size = min(per_node, hugetlb_cma_size - reserved); size = round_up(size, PAGE_SIZE << order); + snprintf(name, 20, "hugetlb%d", nid); res = cma_declare_contiguous_nid(0, size, 0, PAGE_SIZE << order, - 0, false, "hugetlb", + 0, false, name, &hugetlb_cma[nid], nid); if (res) { pr_warn("hugetlb_cma: reservation failed: err %d, node %d",