remap_file_pages: Use vma_lookup() instead of find_vma()
[linux-2.6-microblaze.git] / mm / vmscan.c
index 4620df6..6c401b4 100644 (file)
@@ -100,9 +100,12 @@ struct scan_control {
        unsigned int may_swap:1;
 
        /*
-        * Cgroups are not reclaimed below their configured memory.low,
-        * unless we threaten to OOM. If any cgroups are skipped due to
-        * memory.low and nothing was reclaimed, go back for memory.low.
+        * Cgroup memory below memory.low is protected as long as we
+        * don't threaten to OOM. If any cgroup is reclaimed at
+        * reduced force or passed over entirely due to its memory.low
+        * setting (memcg_low_skipped), and nothing is reclaimed as a
+        * result, then go back for one more cycle that reclaims the protected
+        * memory (memcg_low_reclaim) to avert OOM.
         */
        unsigned int memcg_low_reclaim:1;
        unsigned int memcg_low_skipped:1;
@@ -1049,14 +1052,13 @@ static pageout_t pageout(struct page *page, struct address_space *mapping)
 static int __remove_mapping(struct address_space *mapping, struct page *page,
                            bool reclaimed, struct mem_cgroup *target_memcg)
 {
-       unsigned long flags;
        int refcount;
        void *shadow = NULL;
 
        BUG_ON(!PageLocked(page));
        BUG_ON(mapping != page_mapping(page));
 
-       xa_lock_irqsave(&mapping->i_pages, flags);
+       xa_lock_irq(&mapping->i_pages);
        /*
         * The non racy check for a busy page.
         *
@@ -1097,7 +1099,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
                if (reclaimed && !mapping_exiting(mapping))
                        shadow = workingset_eviction(page, target_memcg);
                __delete_from_swap_cache(page, swap, shadow);
-               xa_unlock_irqrestore(&mapping->i_pages, flags);
+               xa_unlock_irq(&mapping->i_pages);
                put_swap_page(page, swap);
        } else {
                void (*freepage)(struct page *);
@@ -1123,7 +1125,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
                    !mapping_exiting(mapping) && !dax_mapping(mapping))
                        shadow = workingset_eviction(page, target_memcg);
                __delete_from_page_cache(page, shadow);
-               xa_unlock_irqrestore(&mapping->i_pages, flags);
+               xa_unlock_irq(&mapping->i_pages);
 
                if (freepage != NULL)
                        freepage(page);
@@ -1132,7 +1134,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
        return 1;
 
 cannot_free:
-       xa_unlock_irqrestore(&mapping->i_pages, flags);
+       xa_unlock_irq(&mapping->i_pages);
        return 0;
 }
 
@@ -2537,15 +2539,14 @@ out:
        for_each_evictable_lru(lru) {
                int file = is_file_lru(lru);
                unsigned long lruvec_size;
+               unsigned long low, min;
                unsigned long scan;
-               unsigned long protection;
 
                lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx);
-               protection = mem_cgroup_protection(sc->target_mem_cgroup,
-                                                  memcg,
-                                                  sc->memcg_low_reclaim);
+               mem_cgroup_protection(sc->target_mem_cgroup, memcg,
+                                     &min, &low);
 
-               if (protection) {
+               if (min || low) {
                        /*
                         * Scale a cgroup's reclaim pressure by proportioning
                         * its current usage to its memory.low or memory.min
@@ -2576,6 +2577,15 @@ out:
                         * hard protection.
                         */
                        unsigned long cgroup_size = mem_cgroup_size(memcg);
+                       unsigned long protection;
+
+                       /* memory.low scaling, make sure we retry before OOM */
+                       if (!sc->memcg_low_reclaim && low > min) {
+                               protection = low;
+                               sc->memcg_low_skipped = 1;
+                       } else {
+                               protection = min;
+                       }
 
                        /* Avoid TOCTOU with earlier protection check */
                        cgroup_size = max(cgroup_size, protection);
@@ -2887,6 +2897,12 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
        target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
 
 again:
+       /*
+        * Flush the memory cgroup stats, so that we read accurate per-memcg
+        * lruvec stats for heuristics.
+        */
+       mem_cgroup_flush_stats();
+
        memset(&sc->nr, 0, sizeof(sc->nr));
 
        nr_reclaimed = sc->nr_reclaimed;
@@ -3801,7 +3817,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx)
 
        set_task_reclaim_state(current, &sc.reclaim_state);
        psi_memstall_enter(&pflags);
-       __fs_reclaim_acquire();
+       __fs_reclaim_acquire(_THIS_IP_);
 
        count_vm_event(PAGEOUTRUN);
 
@@ -3927,9 +3943,9 @@ restart:
                        wake_up_all(&pgdat->pfmemalloc_wait);
 
                /* Check if kswapd should be suspending */
-               __fs_reclaim_release();
+               __fs_reclaim_release(_THIS_IP_);
                ret = try_to_freeze();
-               __fs_reclaim_acquire();
+               __fs_reclaim_acquire(_THIS_IP_);
                if (ret || kthread_should_stop())
                        break;
 
@@ -3981,7 +3997,7 @@ out:
        }
 
        snapshot_refaults(NULL, pgdat);
-       __fs_reclaim_release();
+       __fs_reclaim_release(_THIS_IP_);
        psi_memstall_leave(&pflags);
        set_task_reclaim_state(current, NULL);
 
@@ -4413,11 +4429,13 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
                .may_swap = 1,
                .reclaim_idx = gfp_zone(gfp_mask),
        };
+       unsigned long pflags;
 
        trace_mm_vmscan_node_reclaim_begin(pgdat->node_id, order,
                                           sc.gfp_mask);
 
        cond_resched();
+       psi_memstall_enter(&pflags);
        fs_reclaim_acquire(sc.gfp_mask);
        /*
         * We need to be able to allocate from the reserves for RECLAIM_UNMAP
@@ -4442,6 +4460,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
        current->flags &= ~PF_SWAPWRITE;
        memalloc_noreclaim_restore(noreclaim_flag);
        fs_reclaim_release(sc.gfp_mask);
+       psi_memstall_leave(&pflags);
 
        trace_mm_vmscan_node_reclaim_end(sc.nr_reclaimed);