Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc
[linux-2.6-microblaze.git] / mm / page_alloc.c
index be18ccd..b100255 100644 (file)
@@ -61,6 +61,7 @@
 #include <linux/hugetlb.h>
 #include <linux/sched/rt.h>
 
+#include <asm/sections.h>
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
 #include "internal.h"
@@ -103,6 +104,9 @@ nodemask_t node_states[NR_NODE_STATES] __read_mostly = {
 };
 EXPORT_SYMBOL(node_states);
 
+/* Protect totalram_pages and zone->managed_pages */
+static DEFINE_SPINLOCK(managed_page_count_lock);
+
 unsigned long totalram_pages __read_mostly;
 unsigned long totalreserve_pages __read_mostly;
 /*
@@ -200,6 +204,7 @@ static char * const zone_names[MAX_NR_ZONES] = {
 };
 
 int min_free_kbytes = 1024;
+int user_min_free_kbytes;
 
 static unsigned long __meminitdata nr_kernel_pages;
 static unsigned long __meminitdata nr_all_pages;
@@ -742,14 +747,7 @@ static void __free_pages_ok(struct page *page, unsigned int order)
        local_irq_restore(flags);
 }
 
-/*
- * Read access to zone->managed_pages is safe because it's unsigned long,
- * but we still need to serialize writers. Currently all callers of
- * __free_pages_bootmem() except put_page_bootmem() should only be used
- * at boot time. So for shorter boot time, we shift the burden to
- * put_page_bootmem() to serialize writers.
- */
-void __meminit __free_pages_bootmem(struct page *page, unsigned int order)
+void __init __free_pages_bootmem(struct page *page, unsigned int order)
 {
        unsigned int nr_pages = 1 << order;
        unsigned int loop;
@@ -784,11 +782,7 @@ void __init init_cma_reserved_pageblock(struct page *page)
        set_page_refcounted(page);
        set_pageblock_migratetype(page, MIGRATE_CMA);
        __free_pages(page, pageblock_order);
-       totalram_pages += pageblock_nr_pages;
-#ifdef CONFIG_HIGHMEM
-       if (PageHighMem(page))
-               totalhigh_pages += pageblock_nr_pages;
-#endif
+       adjust_managed_page_count(page, pageblock_nr_pages);
 }
 #endif
 
@@ -1053,7 +1047,7 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
                         * MIGRATE_CMA areas.
                         */
                        if (!is_migrate_cma(migratetype) &&
-                           (unlikely(current_order >= pageblock_order / 2) ||
+                           (current_order >= pageblock_order / 2 ||
                             start_migratetype == MIGRATE_RECLAIMABLE ||
                             page_group_by_mobility_disabled)) {
                                int pages;
@@ -2845,7 +2839,7 @@ EXPORT_SYMBOL(free_pages_exact);
  * nr_free_zone_pages() counts the number of counts pages which are beyond the
  * high watermark within all zones at or below a given zone index.  For each
  * zone, the number of pages is calculated as:
- *     present_pages - high_pages
+ *     managed_pages - high_pages
  */
 static unsigned long nr_free_zone_pages(int offset)
 {
@@ -2912,9 +2906,13 @@ EXPORT_SYMBOL(si_meminfo);
 #ifdef CONFIG_NUMA
 void si_meminfo_node(struct sysinfo *val, int nid)
 {
+       int zone_type;          /* needs to be signed */
+       unsigned long managed_pages = 0;
        pg_data_t *pgdat = NODE_DATA(nid);
 
-       val->totalram = pgdat->node_present_pages;
+       for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
+               managed_pages += pgdat->node_zones[zone_type].managed_pages;
+       val->totalram = managed_pages;
        val->freeram = node_page_state(nid, NR_FREE_PAGES);
 #ifdef CONFIG_HIGHMEM
        val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].managed_pages;
@@ -3156,12 +3154,10 @@ static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref)
  * Add all populated zones of a node to the zonelist.
  */
 static int build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist,
-                               int nr_zones, enum zone_type zone_type)
+                               int nr_zones)
 {
        struct zone *zone;
-
-       BUG_ON(zone_type >= MAX_NR_ZONES);
-       zone_type++;
+       enum zone_type zone_type = MAX_NR_ZONES;
 
        do {
                zone_type--;
@@ -3171,8 +3167,8 @@ static int build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist,
                                &zonelist->_zonerefs[nr_zones++]);
                        check_highest_zone(zone_type);
                }
-
        } while (zone_type);
+
        return nr_zones;
 }
 
@@ -3366,8 +3362,7 @@ static void build_zonelists_in_node_order(pg_data_t *pgdat, int node)
        zonelist = &pgdat->node_zonelists[0];
        for (j = 0; zonelist->_zonerefs[j].zone != NULL; j++)
                ;
-       j = build_zonelists_node(NODE_DATA(node), zonelist, j,
-                                                       MAX_NR_ZONES - 1);
+       j = build_zonelists_node(NODE_DATA(node), zonelist, j);
        zonelist->_zonerefs[j].zone = NULL;
        zonelist->_zonerefs[j].zone_idx = 0;
 }
@@ -3381,7 +3376,7 @@ static void build_thisnode_zonelists(pg_data_t *pgdat)
        struct zonelist *zonelist;
 
        zonelist = &pgdat->node_zonelists[1];
-       j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1);
+       j = build_zonelists_node(pgdat, zonelist, 0);
        zonelist->_zonerefs[j].zone = NULL;
        zonelist->_zonerefs[j].zone_idx = 0;
 }
@@ -3438,8 +3433,8 @@ static int default_zonelist_order(void)
                        z = &NODE_DATA(nid)->node_zones[zone_type];
                        if (populated_zone(z)) {
                                if (zone_type < ZONE_NORMAL)
-                                       low_kmem_size += z->present_pages;
-                               total_size += z->present_pages;
+                                       low_kmem_size += z->managed_pages;
+                               total_size += z->managed_pages;
                        } else if (zone_type == ZONE_NORMAL) {
                                /*
                                 * If any node has only lowmem, then node order
@@ -3589,7 +3584,7 @@ static void build_zonelists(pg_data_t *pgdat)
        local_node = pgdat->node_id;
 
        zonelist = &pgdat->node_zonelists[0];
-       j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1);
+       j = build_zonelists_node(pgdat, zonelist, 0);
 
        /*
         * Now we build the zonelist so that it contains the zones
@@ -3602,14 +3597,12 @@ static void build_zonelists(pg_data_t *pgdat)
        for (node = local_node + 1; node < MAX_NUMNODES; node++) {
                if (!node_online(node))
                        continue;
-               j = build_zonelists_node(NODE_DATA(node), zonelist, j,
-                                                       MAX_NR_ZONES - 1);
+               j = build_zonelists_node(NODE_DATA(node), zonelist, j);
        }
        for (node = 0; node < local_node; node++) {
                if (!node_online(node))
                        continue;
-               j = build_zonelists_node(NODE_DATA(node), zonelist, j,
-                                                       MAX_NR_ZONES - 1);
+               j = build_zonelists_node(NODE_DATA(node), zonelist, j);
        }
 
        zonelist->_zonerefs[j].zone = NULL;
@@ -4424,13 +4417,13 @@ static void __meminit adjust_zone_range_for_zone_movable(int nid,
  */
 static unsigned long __meminit zone_spanned_pages_in_node(int nid,
                                        unsigned long zone_type,
+                                       unsigned long node_start_pfn,
+                                       unsigned long node_end_pfn,
                                        unsigned long *ignored)
 {
-       unsigned long node_start_pfn, node_end_pfn;
        unsigned long zone_start_pfn, zone_end_pfn;
 
-       /* Get the start and end of the node and zone */
-       get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn);
+       /* Get the start and end of the zone */
        zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type];
        zone_end_pfn = arch_zone_highest_possible_pfn[zone_type];
        adjust_zone_range_for_zone_movable(nid, zone_type,
@@ -4485,14 +4478,14 @@ unsigned long __init absent_pages_in_range(unsigned long start_pfn,
 /* Return the number of page frames in holes in a zone on a node */
 static unsigned long __meminit zone_absent_pages_in_node(int nid,
                                        unsigned long zone_type,
+                                       unsigned long node_start_pfn,
+                                       unsigned long node_end_pfn,
                                        unsigned long *ignored)
 {
        unsigned long zone_low = arch_zone_lowest_possible_pfn[zone_type];
        unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type];
-       unsigned long node_start_pfn, node_end_pfn;
        unsigned long zone_start_pfn, zone_end_pfn;
 
-       get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn);
        zone_start_pfn = clamp(node_start_pfn, zone_low, zone_high);
        zone_end_pfn = clamp(node_end_pfn, zone_low, zone_high);
 
@@ -4505,6 +4498,8 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
 #else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 static inline unsigned long __meminit zone_spanned_pages_in_node(int nid,
                                        unsigned long zone_type,
+                                       unsigned long node_start_pfn,
+                                       unsigned long node_end_pfn,
                                        unsigned long *zones_size)
 {
        return zones_size[zone_type];
@@ -4512,6 +4507,8 @@ static inline unsigned long __meminit zone_spanned_pages_in_node(int nid,
 
 static inline unsigned long __meminit zone_absent_pages_in_node(int nid,
                                                unsigned long zone_type,
+                                               unsigned long node_start_pfn,
+                                               unsigned long node_end_pfn,
                                                unsigned long *zholes_size)
 {
        if (!zholes_size)
@@ -4523,21 +4520,27 @@ static inline unsigned long __meminit zone_absent_pages_in_node(int nid,
 #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
 static void __meminit calculate_node_totalpages(struct pglist_data *pgdat,
-               unsigned long *zones_size, unsigned long *zholes_size)
+                                               unsigned long node_start_pfn,
+                                               unsigned long node_end_pfn,
+                                               unsigned long *zones_size,
+                                               unsigned long *zholes_size)
 {
        unsigned long realtotalpages, totalpages = 0;
        enum zone_type i;
 
        for (i = 0; i < MAX_NR_ZONES; i++)
                totalpages += zone_spanned_pages_in_node(pgdat->node_id, i,
-                                                               zones_size);
+                                                        node_start_pfn,
+                                                        node_end_pfn,
+                                                        zones_size);
        pgdat->node_spanned_pages = totalpages;
 
        realtotalpages = totalpages;
        for (i = 0; i < MAX_NR_ZONES; i++)
                realtotalpages -=
                        zone_absent_pages_in_node(pgdat->node_id, i,
-                                                               zholes_size);
+                                                 node_start_pfn, node_end_pfn,
+                                                 zholes_size);
        pgdat->node_present_pages = realtotalpages;
        printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id,
                                                        realtotalpages);
@@ -4646,6 +4649,7 @@ static unsigned long __paginginit calc_memmap_size(unsigned long spanned_pages,
  * NOTE: pgdat should get zeroed by caller.
  */
 static void __paginginit free_area_init_core(struct pglist_data *pgdat,
+               unsigned long node_start_pfn, unsigned long node_end_pfn,
                unsigned long *zones_size, unsigned long *zholes_size)
 {
        enum zone_type j;
@@ -4667,8 +4671,11 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
                struct zone *zone = pgdat->node_zones + j;
                unsigned long size, realsize, freesize, memmap_pages;
 
-               size = zone_spanned_pages_in_node(nid, j, zones_size);
+               size = zone_spanned_pages_in_node(nid, j, node_start_pfn,
+                                                 node_end_pfn, zones_size);
                realsize = freesize = size - zone_absent_pages_in_node(nid, j,
+                                                               node_start_pfn,
+                                                               node_end_pfn,
                                                                zholes_size);
 
                /*
@@ -4782,6 +4789,8 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
                unsigned long node_start_pfn, unsigned long *zholes_size)
 {
        pg_data_t *pgdat = NODE_DATA(nid);
+       unsigned long start_pfn = 0;
+       unsigned long end_pfn = 0;
 
        /* pg_data_t should be reset to zero when it's allocated */
        WARN_ON(pgdat->nr_zones || pgdat->classzone_idx);
@@ -4789,7 +4798,11 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
        pgdat->node_id = nid;
        pgdat->node_start_pfn = node_start_pfn;
        init_zone_allows_reclaim(nid);
-       calculate_node_totalpages(pgdat, zones_size, zholes_size);
+#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
+       get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
+#endif
+       calculate_node_totalpages(pgdat, start_pfn, end_pfn,
+                                 zones_size, zholes_size);
 
        alloc_node_mem_map(pgdat);
 #ifdef CONFIG_FLAT_NODE_MEM_MAP
@@ -4798,7 +4811,8 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
                (unsigned long)pgdat->node_mem_map);
 #endif
 
-       free_area_init_core(pgdat, zones_size, zholes_size);
+       free_area_init_core(pgdat, start_pfn, end_pfn,
+                           zones_size, zholes_size);
 }
 
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
@@ -5206,6 +5220,19 @@ early_param("movablecore", cmdline_parse_movablecore);
 
 #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
+void adjust_managed_page_count(struct page *page, long count)
+{
+       spin_lock(&managed_page_count_lock);
+       page_zone(page)->managed_pages += count;
+       totalram_pages += count;
+#ifdef CONFIG_HIGHMEM
+       if (PageHighMem(page))
+               totalhigh_pages += count;
+#endif
+       spin_unlock(&managed_page_count_lock);
+}
+EXPORT_SYMBOL(adjust_managed_page_count);
+
 unsigned long free_reserved_area(void *start, void *end, int poison, char *s)
 {
        void *pos;
@@ -5214,7 +5241,7 @@ unsigned long free_reserved_area(void *start, void *end, int poison, char *s)
        start = (void *)PAGE_ALIGN((unsigned long)start);
        end = (void *)((unsigned long)end & PAGE_MASK);
        for (pos = start; pos < end; pos += PAGE_SIZE, pages++) {
-               if (poison)
+               if ((unsigned int)poison <= 0xFF)
                        memset(pos, poison, PAGE_SIZE);
                free_reserved_page(virt_to_page(pos));
        }
@@ -5232,10 +5259,62 @@ void free_highmem_page(struct page *page)
 {
        __free_reserved_page(page);
        totalram_pages++;
+       page_zone(page)->managed_pages++;
        totalhigh_pages++;
 }
 #endif
 
+
+void __init mem_init_print_info(const char *str)
+{
+       unsigned long physpages, codesize, datasize, rosize, bss_size;
+       unsigned long init_code_size, init_data_size;
+
+       physpages = get_num_physpages();
+       codesize = _etext - _stext;
+       datasize = _edata - _sdata;
+       rosize = __end_rodata - __start_rodata;
+       bss_size = __bss_stop - __bss_start;
+       init_data_size = __init_end - __init_begin;
+       init_code_size = _einittext - _sinittext;
+
+       /*
+        * Detect special cases and adjust section sizes accordingly:
+        * 1) .init.* may be embedded into .data sections
+        * 2) .init.text.* may be out of [__init_begin, __init_end],
+        *    please refer to arch/tile/kernel/vmlinux.lds.S.
+        * 3) .rodata.* may be embedded into .text or .data sections.
+        */
+#define adj_init_size(start, end, size, pos, adj) \
+       if (start <= pos && pos < end && size > adj) \
+               size -= adj;
+
+       adj_init_size(__init_begin, __init_end, init_data_size,
+                    _sinittext, init_code_size);
+       adj_init_size(_stext, _etext, codesize, _sinittext, init_code_size);
+       adj_init_size(_sdata, _edata, datasize, __init_begin, init_data_size);
+       adj_init_size(_stext, _etext, codesize, __start_rodata, rosize);
+       adj_init_size(_sdata, _edata, datasize, __start_rodata, rosize);
+
+#undef adj_init_size
+
+       printk("Memory: %luK/%luK available "
+              "(%luK kernel code, %luK rwdata, %luK rodata, "
+              "%luK init, %luK bss, %luK reserved"
+#ifdef CONFIG_HIGHMEM
+              ", %luK highmem"
+#endif
+              "%s%s)\n",
+              nr_free_pages() << (PAGE_SHIFT-10), physpages << (PAGE_SHIFT-10),
+              codesize >> 10, datasize >> 10, rosize >> 10,
+              (init_data_size + init_code_size) >> 10, bss_size >> 10,
+              (physpages - totalram_pages) << (PAGE_SHIFT-10),
+#ifdef CONFIG_HIGHMEM
+              totalhigh_pages << (PAGE_SHIFT-10),
+#endif
+              str ? ", " : "", str ? str : "");
+}
+
 /**
  * set_dma_reserve - set the specified number of pages reserved in the first zone
  * @new_dma_reserve: The number of pages to mark reserved
@@ -5511,14 +5590,21 @@ static void __meminit setup_per_zone_inactive_ratio(void)
 int __meminit init_per_zone_wmark_min(void)
 {
        unsigned long lowmem_kbytes;
+       int new_min_free_kbytes;
 
        lowmem_kbytes = nr_free_buffer_pages() * (PAGE_SIZE >> 10);
-
-       min_free_kbytes = int_sqrt(lowmem_kbytes * 16);
-       if (min_free_kbytes < 128)
-               min_free_kbytes = 128;
-       if (min_free_kbytes > 65536)
-               min_free_kbytes = 65536;
+       new_min_free_kbytes = int_sqrt(lowmem_kbytes * 16);
+
+       if (new_min_free_kbytes > user_min_free_kbytes) {
+               min_free_kbytes = new_min_free_kbytes;
+               if (min_free_kbytes < 128)
+                       min_free_kbytes = 128;
+               if (min_free_kbytes > 65536)
+                       min_free_kbytes = 65536;
+       } else {
+               pr_warn("min_free_kbytes is not updated to %d because user defined value %d is preferred\n",
+                               new_min_free_kbytes, user_min_free_kbytes);
+       }
        setup_per_zone_wmarks();
        refresh_zone_stat_thresholds();
        setup_per_zone_lowmem_reserve();
@@ -5536,8 +5622,10 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
        void __user *buffer, size_t *length, loff_t *ppos)
 {
        proc_dointvec(table, write, buffer, length, ppos);
-       if (write)
+       if (write) {
+               user_min_free_kbytes = min_free_kbytes;
                setup_per_zone_wmarks();
+       }
        return 0;
 }