X-Git-Url: http://git.monstr.eu/?a=blobdiff_plain;f=mm%2Fgup.c;h=0697134b6a12cb4d0aec4eda2facaa67213487c9;hb=0f979d815cd52084b99e9f6b367e79488850df2e;hp=ef7d2da9f03ff190ea795dd277cfa5c9106e9d97;hpb=12fc11bce6f29a73eb3d61ab4e76a9ece3da1f1d;p=linux-2.6-microblaze.git diff --git a/mm/gup.c b/mm/gup.c index ef7d2da9f03f..0697134b6a12 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -87,11 +87,12 @@ __maybe_unused struct page *try_grab_compound_head(struct page *page, int orig_refs = refs; /* - * Can't do FOLL_LONGTERM + FOLL_PIN with CMA in the gup fast - * path, so fail and let the caller fall back to the slow path. + * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a + * right zone, so fail and let the caller fall back to the slow + * path. */ - if (unlikely(flags & FOLL_LONGTERM) && - is_migrate_cma_page(page)) + if (unlikely((flags & FOLL_LONGTERM) && + !is_pinnable_page(page))) return NULL; /* @@ -213,6 +214,58 @@ void unpin_user_page(struct page *page) } EXPORT_SYMBOL(unpin_user_page); +static inline void compound_range_next(unsigned long i, unsigned long npages, + struct page **list, struct page **head, + unsigned int *ntails) +{ + struct page *next, *page; + unsigned int nr = 1; + + if (i >= npages) + return; + + next = *list + i; + page = compound_head(next); + if (PageCompound(page) && compound_order(page) >= 1) + nr = min_t(unsigned int, + page + compound_nr(page) - next, npages - i); + + *head = page; + *ntails = nr; +} + +#define for_each_compound_range(__i, __list, __npages, __head, __ntails) \ + for (__i = 0, \ + compound_range_next(__i, __npages, __list, &(__head), &(__ntails)); \ + __i < __npages; __i += __ntails, \ + compound_range_next(__i, __npages, __list, &(__head), &(__ntails))) + +static inline void compound_next(unsigned long i, unsigned long npages, + struct page **list, struct page **head, + unsigned int *ntails) +{ + struct page *page; + unsigned int nr; + + if (i >= npages) + return; + + page = compound_head(list[i]); + for (nr = i + 1; nr < npages; nr++) { + if (compound_head(list[nr]) != page) + break; + } + + *head = page; + *ntails = nr - i; +} + +#define for_each_compound_head(__i, __list, __npages, __head, __ntails) \ + for (__i = 0, \ + compound_next(__i, __npages, __list, &(__head), &(__ntails)); \ + __i < __npages; __i += __ntails, \ + compound_next(__i, __npages, __list, &(__head), &(__ntails))) + /** * unpin_user_pages_dirty_lock() - release and optionally dirty gup-pinned pages * @pages: array of pages to be maybe marked dirty, and definitely released. @@ -239,20 +292,15 @@ void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages, bool make_dirty) { unsigned long index; - - /* - * TODO: this can be optimized for huge pages: if a series of pages is - * physically contiguous and part of the same compound page, then a - * single operation to the head page should suffice. - */ + struct page *head; + unsigned int ntails; if (!make_dirty) { unpin_user_pages(pages, npages); return; } - for (index = 0; index < npages; index++) { - struct page *page = compound_head(pages[index]); + for_each_compound_head(index, pages, npages, head, ntails) { /* * Checking PageDirty at this point may race with * clear_page_dirty_for_io(), but that's OK. Two key @@ -273,13 +321,49 @@ void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages, * written back, so it gets written back again in the * next writeback cycle. This is harmless. */ - if (!PageDirty(page)) - set_page_dirty_lock(page); - unpin_user_page(page); + if (!PageDirty(head)) + set_page_dirty_lock(head); + put_compound_head(head, ntails, FOLL_PIN); } } EXPORT_SYMBOL(unpin_user_pages_dirty_lock); +/** + * unpin_user_page_range_dirty_lock() - release and optionally dirty + * gup-pinned page range + * + * @page: the starting page of a range maybe marked dirty, and definitely released. + * @npages: number of consecutive pages to release. + * @make_dirty: whether to mark the pages dirty + * + * "gup-pinned page range" refers to a range of pages that has had one of the + * pin_user_pages() variants called on that page. + * + * For the page ranges defined by [page .. page+npages], make that range (or + * its head pages, if a compound page) dirty, if @make_dirty is true, and if the + * page range was previously listed as clean. + * + * set_page_dirty_lock() is used internally. If instead, set_page_dirty() is + * required, then the caller should a) verify that this is really correct, + * because _lock() is usually required, and b) hand code it: + * set_page_dirty_lock(), unpin_user_page(). + * + */ +void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages, + bool make_dirty) +{ + unsigned long index; + struct page *head; + unsigned int ntails; + + for_each_compound_range(index, &page, npages, head, ntails) { + if (make_dirty && !PageDirty(head)) + set_page_dirty_lock(head); + put_compound_head(head, ntails, FOLL_PIN); + } +} +EXPORT_SYMBOL(unpin_user_page_range_dirty_lock); + /** * unpin_user_pages() - release an array of gup-pinned pages. * @pages: array of pages to be marked dirty and released. @@ -292,6 +376,8 @@ EXPORT_SYMBOL(unpin_user_pages_dirty_lock); void unpin_user_pages(struct page **pages, unsigned long npages) { unsigned long index; + struct page *head; + unsigned int ntails; /* * If this WARN_ON() fires, then the system *might* be leaking pages (by @@ -300,13 +386,9 @@ void unpin_user_pages(struct page **pages, unsigned long npages) */ if (WARN_ON(IS_ERR_VALUE(npages))) return; - /* - * TODO: this can be optimized for huge pages: if a series of pages is - * physically contiguous and part of the same compound page, then a - * single operation to the head page should suffice. - */ - for (index = 0; index < npages; index++) - unpin_user_page(pages[index]); + + for_each_compound_head(index, pages, npages, head, ntails) + put_compound_head(head, ntails, FOLL_PIN); } EXPORT_SYMBOL(unpin_user_pages); @@ -435,18 +517,6 @@ retry: } } - if (flags & FOLL_SPLIT && PageTransCompound(page)) { - get_page(page); - pte_unmap_unlock(ptep, ptl); - lock_page(page); - ret = split_huge_page(page); - unlock_page(page); - put_page(page); - if (ret) - return ERR_PTR(ret); - goto retry; - } - /* try_grab_page() does nothing unless FOLL_GET or FOLL_PIN is set. */ if (unlikely(!try_grab_page(page, flags))) { page = ERR_PTR(-ENOMEM); @@ -591,7 +661,7 @@ retry_locked: spin_unlock(ptl); return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap); } - if (flags & (FOLL_SPLIT | FOLL_SPLIT_PMD)) { + if (flags & FOLL_SPLIT_PMD) { int ret; page = pmd_page(*pmd); if (is_huge_zero_page(page)) { @@ -600,19 +670,7 @@ retry_locked: split_huge_pmd(vma, pmd, address); if (pmd_trans_unstable(pmd)) ret = -EBUSY; - } else if (flags & FOLL_SPLIT) { - if (unlikely(!try_get_page(page))) { - spin_unlock(ptl); - return ERR_PTR(-ENOMEM); - } - spin_unlock(ptl); - lock_page(page); - ret = split_huge_page(page); - unlock_page(page); - put_page(page); - if (pmd_none(*pmd)) - return no_page_table(vma, flags); - } else { /* flags & FOLL_SPLIT_PMD */ + } else { spin_unlock(ptl); split_huge_pmd(vma, pmd, address); ret = pte_alloc(mm, pmd) ? -ENOMEM : 0; @@ -1470,7 +1528,7 @@ static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start, { struct vm_area_struct *vma; unsigned long vm_flags; - int i; + long i; /* calculate required read or write permissions. * If FOLL_FORCE is set, we only require the "MAY" flags. @@ -1517,7 +1575,7 @@ finish_or_fault: * Returns NULL on any kind of failure - a hole must then be inserted into * the corefile, to preserve alignment with its headers; and also returns * NULL wherever the ZERO_PAGE, or an anonymous pte_none, has been found - - * allowing a hole to be left in the corefile to save diskspace. + * allowing a hole to be left in the corefile to save disk space. * * Called without mmap_lock (takes and releases the mmap_lock by itself). */ @@ -1543,112 +1601,92 @@ struct page *get_dump_page(unsigned long addr) } #endif /* CONFIG_ELF_CORE */ -#ifdef CONFIG_CMA -static long check_and_migrate_cma_pages(struct mm_struct *mm, - unsigned long start, - unsigned long nr_pages, - struct page **pages, - struct vm_area_struct **vmas, - unsigned int gup_flags) +#ifdef CONFIG_MIGRATION +/* + * Check whether all pages are pinnable, if so return number of pages. If some + * pages are not pinnable, migrate them, and unpin all pages. Return zero if + * pages were migrated, or if some pages were not successfully isolated. + * Return negative error if migration fails. + */ +static long check_and_migrate_movable_pages(unsigned long nr_pages, + struct page **pages, + unsigned int gup_flags) { unsigned long i; - unsigned long step; + unsigned long isolation_error_count = 0; bool drain_allow = true; - bool migrate_allow = true; - LIST_HEAD(cma_page_list); - long ret = nr_pages; + LIST_HEAD(movable_page_list); + long ret = 0; + struct page *prev_head = NULL; + struct page *head; struct migration_target_control mtc = { .nid = NUMA_NO_NODE, - .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_NOWARN, + .gfp_mask = GFP_USER | __GFP_NOWARN, }; -check_again: - for (i = 0; i < nr_pages;) { - - struct page *head = compound_head(pages[i]); - - /* - * gup may start from a tail page. Advance step by the left - * part. - */ - step = compound_nr(head) - (pages[i] - head); + for (i = 0; i < nr_pages; i++) { + head = compound_head(pages[i]); + if (head == prev_head) + continue; + prev_head = head; /* - * If we get a page from the CMA zone, since we are going to - * be pinning these entries, we might as well move them out - * of the CMA zone if possible. + * If we get a movable page, since we are going to be pinning + * these entries, try to move them out if possible. */ - if (is_migrate_cma_page(head)) { - if (PageHuge(head)) - isolate_huge_page(head, &cma_page_list); - else { + if (!is_pinnable_page(head)) { + if (PageHuge(head)) { + if (!isolate_huge_page(head, &movable_page_list)) + isolation_error_count++; + } else { if (!PageLRU(head) && drain_allow) { lru_add_drain_all(); drain_allow = false; } - if (!isolate_lru_page(head)) { - list_add_tail(&head->lru, &cma_page_list); - mod_node_page_state(page_pgdat(head), - NR_ISOLATED_ANON + - page_is_file_lru(head), - thp_nr_pages(head)); + if (isolate_lru_page(head)) { + isolation_error_count++; + continue; } + list_add_tail(&head->lru, &movable_page_list); + mod_node_page_state(page_pgdat(head), + NR_ISOLATED_ANON + + page_is_file_lru(head), + thp_nr_pages(head)); } } - - i += step; } - if (!list_empty(&cma_page_list)) { - /* - * drop the above get_user_pages reference. - */ - if (gup_flags & FOLL_PIN) - unpin_user_pages(pages, nr_pages); - else - for (i = 0; i < nr_pages; i++) - put_page(pages[i]); - - if (migrate_pages(&cma_page_list, alloc_migration_target, NULL, - (unsigned long)&mtc, MIGRATE_SYNC, MR_CONTIG_RANGE)) { - /* - * some of the pages failed migration. Do get_user_pages - * without migration. - */ - migrate_allow = false; + /* + * If list is empty, and no isolation errors, means that all pages are + * in the correct zone. + */ + if (list_empty(&movable_page_list) && !isolation_error_count) + return nr_pages; - if (!list_empty(&cma_page_list)) - putback_movable_pages(&cma_page_list); - } - /* - * We did migrate all the pages, Try to get the page references - * again migrating any new CMA pages which we failed to isolate - * earlier. - */ - ret = __get_user_pages_locked(mm, start, nr_pages, - pages, vmas, NULL, - gup_flags); - - if ((ret > 0) && migrate_allow) { - nr_pages = ret; - drain_allow = true; - goto check_again; - } + if (gup_flags & FOLL_PIN) { + unpin_user_pages(pages, nr_pages); + } else { + for (i = 0; i < nr_pages; i++) + put_page(pages[i]); + } + if (!list_empty(&movable_page_list)) { + ret = migrate_pages(&movable_page_list, alloc_migration_target, + NULL, (unsigned long)&mtc, MIGRATE_SYNC, + MR_LONGTERM_PIN); + if (ret && !list_empty(&movable_page_list)) + putback_movable_pages(&movable_page_list); } - return ret; + return ret > 0 ? -ENOMEM : ret; } #else -static long check_and_migrate_cma_pages(struct mm_struct *mm, - unsigned long start, - unsigned long nr_pages, - struct page **pages, - struct vm_area_struct **vmas, - unsigned int gup_flags) +static long check_and_migrate_movable_pages(unsigned long nr_pages, + struct page **pages, + unsigned int gup_flags) { return nr_pages; } -#endif /* CONFIG_CMA */ +#endif /* CONFIG_MIGRATION */ /* * __gup_longterm_locked() is a wrapper for __get_user_pages_locked which @@ -1661,21 +1699,22 @@ static long __gup_longterm_locked(struct mm_struct *mm, struct vm_area_struct **vmas, unsigned int gup_flags) { - unsigned long flags = 0; + unsigned int flags; long rc; - if (gup_flags & FOLL_LONGTERM) - flags = memalloc_nocma_save(); - - rc = __get_user_pages_locked(mm, start, nr_pages, pages, vmas, NULL, - gup_flags); + if (!(gup_flags & FOLL_LONGTERM)) + return __get_user_pages_locked(mm, start, nr_pages, pages, vmas, + NULL, gup_flags); + flags = memalloc_pin_save(); + do { + rc = __get_user_pages_locked(mm, start, nr_pages, pages, vmas, + NULL, gup_flags); + if (rc <= 0) + break; + rc = check_and_migrate_movable_pages(rc, pages, gup_flags); + } while (!rc); + memalloc_pin_restore(flags); - if (gup_flags & FOLL_LONGTERM) { - if (rc > 0) - rc = check_and_migrate_cma_pages(mm, start, rc, pages, - vmas, gup_flags); - memalloc_nocma_restore(flags); - } return rc; }