Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/rzhang/linux
[linux-2.6-microblaze.git] / mm / vmscan.c
index e36d766..eb3dd37 100644 (file)
@@ -46,6 +46,7 @@
 #include <linux/oom.h>
 #include <linux/prefetch.h>
 #include <linux/printk.h>
+#include <linux/dax.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -411,7 +412,7 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
        struct shrinker *shrinker;
        unsigned long freed = 0;
 
-       if (memcg && !memcg_kmem_is_active(memcg))
+       if (memcg && !memcg_kmem_online(memcg))
                return 0;
 
        if (nr_scanned == 0)
@@ -671,9 +672,15 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
                 * inode reclaim needs to empty out the radix tree or
                 * the nodes are lost.  Don't plant shadows behind its
                 * back.
+                *
+                * We also don't store shadows for DAX mappings because the
+                * only page cache pages found in these are zero pages
+                * covering holes, and because we don't want to mix DAX
+                * exceptional entries and shadow exceptional entries in the
+                * same page_tree.
                 */
                if (reclaimed && page_is_file_cache(page) &&
-                   !mapping_exiting(mapping))
+                   !mapping_exiting(mapping) && !dax_mapping(mapping))
                        shadow = workingset_eviction(mapping, page);
                __delete_from_page_cache(page, shadow, memcg);
                spin_unlock_irqrestore(&mapping->tree_lock, flags);
@@ -906,6 +913,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                int may_enter_fs;
                enum page_references references = PAGEREF_RECLAIM_CLEAN;
                bool dirty, writeback;
+               bool lazyfree = false;
+               int ret = SWAP_SUCCESS;
 
                cond_resched();
 
@@ -1049,6 +1058,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                                goto keep_locked;
                        if (!add_to_swap(page, page_list))
                                goto activate_locked;
+                       lazyfree = true;
                        may_enter_fs = 1;
 
                        /* Adding to swap updated mapping */
@@ -1060,14 +1070,17 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                 * processes. Try to unmap it here.
                 */
                if (page_mapped(page) && mapping) {
-                       switch (try_to_unmap(page,
-                                       ttu_flags|TTU_BATCH_FLUSH)) {
+                       switch (ret = try_to_unmap(page, lazyfree ?
+                               (ttu_flags | TTU_BATCH_FLUSH | TTU_LZFREE) :
+                               (ttu_flags | TTU_BATCH_FLUSH))) {
                        case SWAP_FAIL:
                                goto activate_locked;
                        case SWAP_AGAIN:
                                goto keep_locked;
                        case SWAP_MLOCK:
                                goto cull_mlocked;
+                       case SWAP_LZFREE:
+                               goto lazyfree;
                        case SWAP_SUCCESS:
                                ; /* try to free the page below */
                        }
@@ -1174,6 +1187,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                        }
                }
 
+lazyfree:
                if (!mapping || !__remove_mapping(mapping, page, true))
                        goto keep_locked;
 
@@ -1184,8 +1198,11 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                 * we obviously don't have to worry about waking up a process
                 * waiting on the page lock, because there are no references.
                 */
-               __clear_page_locked(page);
+               __ClearPageLocked(page);
 free_it:
+               if (ret == SWAP_LZFREE)
+                       count_vm_event(PGLAZYFREED);
+
                nr_reclaimed++;
 
                /*
@@ -1204,7 +1221,7 @@ cull_mlocked:
 
 activate_locked:
                /* Not a candidate for swapping, so reclaim swap space. */
-               if (PageSwapCache(page) && vm_swap_full())
+               if (PageSwapCache(page) && mem_cgroup_swap_full(page))
                        try_to_free_swap(page);
                VM_BUG_ON_PAGE(PageActive(page), page);
                SetPageActive(page);
@@ -1426,6 +1443,7 @@ int isolate_lru_page(struct page *page)
        int ret = -EBUSY;
 
        VM_BUG_ON_PAGE(!page_count(page), page);
+       VM_BUG_ON_PAGE(PageTail(page), page);
 
        if (PageLRU(page)) {
                struct zone *zone = page_zone(page);
@@ -1955,10 +1973,11 @@ enum scan_balance {
  * nr[0] = anon inactive pages to scan; nr[1] = anon active pages to scan
  * nr[2] = file inactive pages to scan; nr[3] = file active pages to scan
  */
-static void get_scan_count(struct lruvec *lruvec, int swappiness,
+static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
                           struct scan_control *sc, unsigned long *nr,
                           unsigned long *lru_pages)
 {
+       int swappiness = mem_cgroup_swappiness(memcg);
        struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
        u64 fraction[2];
        u64 denominator = 0;    /* gcc */
@@ -1985,14 +2004,14 @@ static void get_scan_count(struct lruvec *lruvec, int swappiness,
        if (current_is_kswapd()) {
                if (!zone_reclaimable(zone))
                        force_scan = true;
-               if (!mem_cgroup_lruvec_online(lruvec))
+               if (!mem_cgroup_online(memcg))
                        force_scan = true;
        }
        if (!global_reclaim(sc))
                force_scan = true;
 
        /* If we have no swap space, do not bother scanning anon pages. */
-       if (!sc->may_swap || (get_nr_swap_pages() <= 0)) {
+       if (!sc->may_swap || mem_cgroup_get_nr_swap_pages(memcg) <= 0) {
                scan_balance = SCAN_FILE;
                goto out;
        }
@@ -2182,9 +2201,10 @@ static inline void init_tlb_ubc(void)
 /*
  * This is a basic per-zone page freer.  Used by both kswapd and direct reclaim.
  */
-static void shrink_lruvec(struct lruvec *lruvec, int swappiness,
-                         struct scan_control *sc, unsigned long *lru_pages)
+static void shrink_zone_memcg(struct zone *zone, struct mem_cgroup *memcg,
+                             struct scan_control *sc, unsigned long *lru_pages)
 {
+       struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
        unsigned long nr[NR_LRU_LISTS];
        unsigned long targets[NR_LRU_LISTS];
        unsigned long nr_to_scan;
@@ -2194,7 +2214,7 @@ static void shrink_lruvec(struct lruvec *lruvec, int swappiness,
        struct blk_plug plug;
        bool scan_adjusted;
 
-       get_scan_count(lruvec, swappiness, sc, nr, lru_pages);
+       get_scan_count(lruvec, memcg, sc, nr, lru_pages);
 
        /* Record the original scan target for proportional adjustments later */
        memcpy(targets, nr, sizeof(nr));
@@ -2396,9 +2416,8 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
                memcg = mem_cgroup_iter(root, NULL, &reclaim);
                do {
                        unsigned long lru_pages;
+                       unsigned long reclaimed;
                        unsigned long scanned;
-                       struct lruvec *lruvec;
-                       int swappiness;
 
                        if (mem_cgroup_low(root, memcg)) {
                                if (!sc->may_thrash)
@@ -2406,11 +2425,10 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
                                mem_cgroup_events(memcg, MEMCG_LOW, 1);
                        }
 
-                       lruvec = mem_cgroup_zone_lruvec(zone, memcg);
-                       swappiness = mem_cgroup_swappiness(memcg);
+                       reclaimed = sc->nr_reclaimed;
                        scanned = sc->nr_scanned;
 
-                       shrink_lruvec(lruvec, swappiness, sc, &lru_pages);
+                       shrink_zone_memcg(zone, memcg, sc, &lru_pages);
                        zone_lru_pages += lru_pages;
 
                        if (memcg && is_classzone)
@@ -2418,6 +2436,11 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
                                            memcg, sc->nr_scanned - scanned,
                                            lru_pages);
 
+                       /* Record the group's reclaim efficiency */
+                       vmpressure(sc->gfp_mask, memcg, false,
+                                  sc->nr_scanned - scanned,
+                                  sc->nr_reclaimed - reclaimed);
+
                        /*
                         * Direct reclaim and kswapd have to scan all memory
                         * cgroups to fulfill the overall scan target for the
@@ -2449,7 +2472,8 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
                        reclaim_state->reclaimed_slab = 0;
                }
 
-               vmpressure(sc->gfp_mask, sc->target_mem_cgroup,
+               /* Record the subtree's reclaim efficiency */
+               vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true,
                           sc->nr_scanned - nr_scanned,
                           sc->nr_reclaimed - nr_reclaimed);
 
@@ -2874,8 +2898,6 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
                .may_unmap = 1,
                .may_swap = !noswap,
        };
-       struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
-       int swappiness = mem_cgroup_swappiness(memcg);
        unsigned long lru_pages;
 
        sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
@@ -2892,7 +2914,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
         * will pick up pages from other mem cgroup's as well. We hack
         * the priority and make it zero.
         */
-       shrink_lruvec(lruvec, swappiness, &sc, &lru_pages);
+       shrink_zone_memcg(zone, memcg, &sc, &lru_pages);
 
        trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);