fs/hugetlbfs/inode.c: fix bugs in hugetlb_vmtruncate_list()
[linux-2.6-microblaze.git] / mm / page_alloc.c
index fbff97d..63358d9 100644 (file)
@@ -43,6 +43,7 @@
 #include <linux/vmalloc.h>
 #include <linux/vmstat.h>
 #include <linux/mempolicy.h>
+#include <linux/memremap.h>
 #include <linux/stop_machine.h>
 #include <linux/sort.h>
 #include <linux/pfn.h>
@@ -222,13 +223,15 @@ static char * const zone_names[MAX_NR_ZONES] = {
 #endif
 };
 
-static void free_compound_page(struct page *page);
 compound_page_dtor * const compound_page_dtors[] = {
        NULL,
        free_compound_page,
 #ifdef CONFIG_HUGETLB_PAGE
        free_huge_page,
 #endif
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       free_transhuge_page,
+#endif
 };
 
 int min_free_kbytes = 1024;
@@ -450,7 +453,7 @@ out:
  * This usage means that zero-order pages may not be compound.
  */
 
-static void free_compound_page(struct page *page)
+void free_compound_page(struct page *page)
 {
        __free_pages_ok(page, compound_order(page));
 }
@@ -466,8 +469,10 @@ void prep_compound_page(struct page *page, unsigned int order)
        for (i = 1; i < nr_pages; i++) {
                struct page *p = page + i;
                set_page_count(p, 0);
+               p->mapping = TAIL_MAPPING;
                set_compound_head(p, page);
        }
+       atomic_set(compound_mapcount_ptr(page), -1);
 }
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
@@ -732,7 +737,7 @@ static inline int free_pages_check(struct page *page)
        const char *bad_reason = NULL;
        unsigned long bad_flags = 0;
 
-       if (unlikely(page_mapcount(page)))
+       if (unlikely(atomic_read(&page->_mapcount) != -1))
                bad_reason = "nonzero mapcount";
        if (unlikely(page->mapping != NULL))
                bad_reason = "non-NULL mapping";
@@ -805,7 +810,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
                do {
                        int mt; /* migratetype of the to-be-freed page */
 
-                       page = list_entry(list->prev, struct page, lru);
+                       page = list_last_entry(list, struct page, lru);
                        /* must delete as __free_one_page list manipulates */
                        list_del(&page->lru);
 
@@ -856,6 +861,27 @@ static int free_tail_pages_check(struct page *head_page, struct page *page)
                ret = 0;
                goto out;
        }
+       switch (page - head_page) {
+       case 1:
+               /* the first tail page: ->mapping is compound_mapcount() */
+               if (unlikely(compound_mapcount(page))) {
+                       bad_page(page, "nonzero compound_mapcount", 0);
+                       goto out;
+               }
+               break;
+       case 2:
+               /*
+                * the second tail page: ->mapping is
+                * page_deferred_list().next -- ignore value.
+                */
+               break;
+       default:
+               if (page->mapping != TAIL_MAPPING) {
+                       bad_page(page, "corrupted mapping in tail page", 0);
+                       goto out;
+               }
+               break;
+       }
        if (unlikely(!PageTail(page))) {
                bad_page(page, "PageTail not set", 0);
                goto out;
@@ -866,6 +892,7 @@ static int free_tail_pages_check(struct page *head_page, struct page *page)
        }
        ret = 0;
 out:
+       page->mapping = NULL;
        clear_compound_head(page);
        return ret;
 }
@@ -1329,7 +1356,7 @@ static inline int check_new_page(struct page *page)
        const char *bad_reason = NULL;
        unsigned long bad_flags = 0;
 
-       if (unlikely(page_mapcount(page)))
+       if (unlikely(atomic_read(&page->_mapcount) != -1))
                bad_reason = "nonzero mapcount";
        if (unlikely(page->mapping != NULL))
                bad_reason = "non-NULL mapping";
@@ -1410,11 +1437,10 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
        /* Find a page of the appropriate size in the preferred list */
        for (current_order = order; current_order < MAX_ORDER; ++current_order) {
                area = &(zone->free_area[current_order]);
-               if (list_empty(&area->free_list[migratetype]))
-                       continue;
-
-               page = list_entry(area->free_list[migratetype].next,
+               page = list_first_entry_or_null(&area->free_list[migratetype],
                                                        struct page, lru);
+               if (!page)
+                       continue;
                list_del(&page->lru);
                rmv_page_order(page);
                area->nr_free--;
@@ -1693,12 +1719,12 @@ static void unreserve_highatomic_pageblock(const struct alloc_context *ac)
                for (order = 0; order < MAX_ORDER; order++) {
                        struct free_area *area = &(zone->free_area[order]);
 
-                       if (list_empty(&area->free_list[MIGRATE_HIGHATOMIC]))
+                       page = list_first_entry_or_null(
+                                       &area->free_list[MIGRATE_HIGHATOMIC],
+                                       struct page, lru);
+                       if (!page)
                                continue;
 
-                       page = list_entry(area->free_list[MIGRATE_HIGHATOMIC].next,
-                                               struct page, lru);
-
                        /*
                         * It should never happen but changes to locking could
                         * inadvertently allow a per-cpu drain to add pages
@@ -1746,7 +1772,7 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
                if (fallback_mt == -1)
                        continue;
 
-               page = list_entry(area->free_list[fallback_mt].next,
+               page = list_first_entry(&area->free_list[fallback_mt],
                                                struct page, lru);
                if (can_steal)
                        steal_suitable_fallback(zone, page, start_migratetype);
@@ -1981,7 +2007,7 @@ void mark_free_pages(struct zone *zone)
        unsigned long pfn, max_zone_pfn;
        unsigned long flags;
        unsigned int order, t;
-       struct list_head *curr;
+       struct page *page;
 
        if (zone_is_empty(zone))
                return;
@@ -1991,17 +2017,17 @@ void mark_free_pages(struct zone *zone)
        max_zone_pfn = zone_end_pfn(zone);
        for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
                if (pfn_valid(pfn)) {
-                       struct page *page = pfn_to_page(pfn);
-
+                       page = pfn_to_page(pfn);
                        if (!swsusp_page_is_forbidden(page))
                                swsusp_unset_page_free(page);
                }
 
        for_each_migratetype_order(order, t) {
-               list_for_each(curr, &zone->free_area[order].free_list[t]) {
+               list_for_each_entry(page,
+                               &zone->free_area[order].free_list[t], lru) {
                        unsigned long i;
 
-                       pfn = page_to_pfn(list_entry(curr, struct page, lru));
+                       pfn = page_to_pfn(page);
                        for (i = 0; i < (1UL << order); i++)
                                swsusp_set_page_free(pfn_to_page(pfn + i));
                }
@@ -2205,9 +2231,9 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
                }
 
                if (cold)
-                       page = list_entry(list->prev, struct page, lru);
+                       page = list_last_entry(list, struct page, lru);
                else
-                       page = list_entry(list->next, struct page, lru);
+                       page = list_first_entry(list, struct page, lru);
 
                list_del(&page->lru);
                pcp->count--;
@@ -2733,8 +2759,21 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
                        goto out;
        }
        /* Exhausted what can be done so it's blamo time */
-       if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL))
+       if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) {
                *did_some_progress = 1;
+
+               if (gfp_mask & __GFP_NOFAIL) {
+                       page = get_page_from_freelist(gfp_mask, order,
+                                       ALLOC_NO_WATERMARKS|ALLOC_CPUSET, ac);
+                       /*
+                        * fallback to ignore cpuset restriction if our nodes
+                        * are depleted
+                        */
+                       if (!page)
+                               page = get_page_from_freelist(gfp_mask, order,
+                                       ALLOC_NO_WATERMARKS, ac);
+               }
+       }
 out:
        mutex_unlock(&oom_lock);
        return page;
@@ -4447,16 +4486,22 @@ static inline unsigned long wait_table_bits(unsigned long size)
 void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
                unsigned long start_pfn, enum memmap_context context)
 {
-       pg_data_t *pgdat = NODE_DATA(nid);
+       struct vmem_altmap *altmap = to_vmem_altmap(__pfn_to_phys(start_pfn));
        unsigned long end_pfn = start_pfn + size;
+       pg_data_t *pgdat = NODE_DATA(nid);
        unsigned long pfn;
-       struct zone *z;
        unsigned long nr_initialised = 0;
 
        if (highest_memmap_pfn < end_pfn - 1)
                highest_memmap_pfn = end_pfn - 1;
 
-       z = &pgdat->node_zones[zone];
+       /*
+        * Honor reservation requested by the driver for this ZONE_DEVICE
+        * memory
+        */
+       if (altmap && start_pfn == altmap->base_pfn)
+               start_pfn += altmap->reserve;
+
        for (pfn = start_pfn; pfn < end_pfn; pfn++) {
                /*
                 * There can be holes in boot-time mem_map[]s