mm: memcontrol: clean up memory.events counting function
[linux-2.6-microblaze.git] / mm / vmscan.c
index cfd2651..fbec74a 100644 (file)
@@ -972,7 +972,6 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                int may_enter_fs;
                enum page_references references = PAGEREF_RECLAIM_CLEAN;
                bool dirty, writeback;
-               int ret = SWAP_SUCCESS;
 
                cond_resched();
 
@@ -1145,15 +1144,9 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                 * processes. Try to unmap it here.
                 */
                if (page_mapped(page)) {
-                       switch (ret = try_to_unmap(page,
-                               ttu_flags | TTU_BATCH_FLUSH)) {
-                       case SWAP_FAIL:
+                       if (!try_to_unmap(page, ttu_flags | TTU_BATCH_FLUSH)) {
                                nr_unmap_fail++;
                                goto activate_locked;
-                       case SWAP_AGAIN:
-                               goto keep_locked;
-                       case SWAP_SUCCESS:
-                               ; /* try to free the page below */
                        }
                }
 
@@ -2013,6 +2006,8 @@ static void shrink_active_list(unsigned long nr_to_scan,
  * Both inactive lists should also be large enough that each inactive
  * page has a chance to be referenced again before it is reclaimed.
  *
+ * If that fails and refaulting is observed, the inactive list grows.
+ *
  * The inactive_ratio is the target ratio of ACTIVE to INACTIVE pages
  * on this LRU, maintained by the pageout code. A zone->inactive_ratio
  * of 3 means 3:1 or 25% of the pages are kept on the inactive list.
@@ -2029,12 +2024,15 @@ static void shrink_active_list(unsigned long nr_to_scan,
  *   10TB     320        32GB
  */
 static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
-                                               struct scan_control *sc, bool trace)
+                                struct mem_cgroup *memcg,
+                                struct scan_control *sc, bool actual_reclaim)
 {
-       unsigned long inactive_ratio;
-       unsigned long inactive, active;
-       enum lru_list inactive_lru = file * LRU_FILE;
        enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE;
+       struct pglist_data *pgdat = lruvec_pgdat(lruvec);
+       enum lru_list inactive_lru = file * LRU_FILE;
+       unsigned long inactive, active;
+       unsigned long inactive_ratio;
+       unsigned long refaults;
        unsigned long gb;
 
        /*
@@ -2047,27 +2045,43 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
        inactive = lruvec_lru_size(lruvec, inactive_lru, sc->reclaim_idx);
        active = lruvec_lru_size(lruvec, active_lru, sc->reclaim_idx);
 
-       gb = (inactive + active) >> (30 - PAGE_SHIFT);
-       if (gb)
-               inactive_ratio = int_sqrt(10 * gb);
+       if (memcg)
+               refaults = mem_cgroup_read_stat(memcg,
+                                               MEMCG_WORKINGSET_ACTIVATE);
        else
-               inactive_ratio = 1;
+               refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE);
+
+       /*
+        * When refaults are being observed, it means a new workingset
+        * is being established. Disable active list protection to get
+        * rid of the stale workingset quickly.
+        */
+       if (file && actual_reclaim && lruvec->refaults != refaults) {
+               inactive_ratio = 0;
+       } else {
+               gb = (inactive + active) >> (30 - PAGE_SHIFT);
+               if (gb)
+                       inactive_ratio = int_sqrt(10 * gb);
+               else
+                       inactive_ratio = 1;
+       }
 
-       if (trace)
-               trace_mm_vmscan_inactive_list_is_low(lruvec_pgdat(lruvec)->node_id,
-                               sc->reclaim_idx,
-                               lruvec_lru_size(lruvec, inactive_lru, MAX_NR_ZONES), inactive,
-                               lruvec_lru_size(lruvec, active_lru, MAX_NR_ZONES), active,
-                               inactive_ratio, file);
+       if (actual_reclaim)
+               trace_mm_vmscan_inactive_list_is_low(pgdat->node_id, sc->reclaim_idx,
+                       lruvec_lru_size(lruvec, inactive_lru, MAX_NR_ZONES), inactive,
+                       lruvec_lru_size(lruvec, active_lru, MAX_NR_ZONES), active,
+                       inactive_ratio, file);
 
        return inactive * inactive_ratio < active;
 }
 
 static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
-                                struct lruvec *lruvec, struct scan_control *sc)
+                                struct lruvec *lruvec, struct mem_cgroup *memcg,
+                                struct scan_control *sc)
 {
        if (is_active_lru(lru)) {
-               if (inactive_list_is_low(lruvec, is_file_lru(lru), sc, true))
+               if (inactive_list_is_low(lruvec, is_file_lru(lru),
+                                        memcg, sc, true))
                        shrink_active_list(nr_to_scan, lruvec, sc, lru);
                return 0;
        }
@@ -2176,7 +2190,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
         * lruvec even if it has plenty of old anonymous pages unless the
         * system is under heavy pressure.
         */
-       if (!inactive_list_is_low(lruvec, true, sc, false) &&
+       if (!inactive_list_is_low(lruvec, true, memcg, sc, false) &&
            lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) {
                scan_balance = SCAN_FILE;
                goto out;
@@ -2327,7 +2341,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc
                                nr[lru] -= nr_to_scan;
 
                                nr_reclaimed += shrink_list(lru, nr_to_scan,
-                                                           lruvec, sc);
+                                                           lruvec, memcg, sc);
                        }
                }
 
@@ -2394,7 +2408,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc
         * Even if we did not try to evict anon pages at all, we want to
         * rebalance the anon lru active/inactive ratio.
         */
-       if (inactive_list_is_low(lruvec, false, sc, true))
+       if (inactive_list_is_low(lruvec, false, memcg, sc, true))
                shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
                                   sc, LRU_ACTIVE_ANON);
 }
@@ -2512,7 +2526,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
                                        sc->memcg_low_skipped = 1;
                                        continue;
                                }
-                               mem_cgroup_events(memcg, MEMCG_LOW, 1);
+                               mem_cgroup_event(memcg, MEMCG_LOW);
                        }
 
                        reclaimed = sc->nr_reclaimed;
@@ -2710,6 +2724,26 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
        sc->gfp_mask = orig_mask;
 }
 
+static void snapshot_refaults(struct mem_cgroup *root_memcg, pg_data_t *pgdat)
+{
+       struct mem_cgroup *memcg;
+
+       memcg = mem_cgroup_iter(root_memcg, NULL, NULL);
+       do {
+               unsigned long refaults;
+               struct lruvec *lruvec;
+
+               if (memcg)
+                       refaults = mem_cgroup_read_stat(memcg,
+                                               MEMCG_WORKINGSET_ACTIVATE);
+               else
+                       refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE);
+
+               lruvec = mem_cgroup_lruvec(pgdat, memcg);
+               lruvec->refaults = refaults;
+       } while ((memcg = mem_cgroup_iter(root_memcg, memcg, NULL)));
+}
+
 /*
  * This is the main entry point to direct page reclaim.
  *
@@ -2730,6 +2764,9 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
                                          struct scan_control *sc)
 {
        int initial_priority = sc->priority;
+       pg_data_t *last_pgdat;
+       struct zoneref *z;
+       struct zone *zone;
 retry:
        delayacct_freepages_start();
 
@@ -2756,6 +2793,15 @@ retry:
                        sc->may_writepage = 1;
        } while (--sc->priority >= 0);
 
+       last_pgdat = NULL;
+       for_each_zone_zonelist_nodemask(zone, z, zonelist, sc->reclaim_idx,
+                                       sc->nodemask) {
+               if (zone->zone_pgdat == last_pgdat)
+                       continue;
+               last_pgdat = zone->zone_pgdat;
+               snapshot_refaults(sc->target_mem_cgroup, zone->zone_pgdat);
+       }
+
        delayacct_freepages_end();
 
        if (sc->nr_reclaimed)
@@ -3040,7 +3086,7 @@ static void age_active_anon(struct pglist_data *pgdat,
        do {
                struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg);
 
-               if (inactive_list_is_low(lruvec, false, sc, true))
+               if (inactive_list_is_low(lruvec, false, memcg, sc, true))
                        shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
                                           sc, LRU_ACTIVE_ANON);
 
@@ -3287,6 +3333,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
                pgdat->kswapd_failures++;
 
 out:
+       snapshot_refaults(NULL, pgdat);
        /*
         * Return the order kswapd stopped reclaiming at as
         * prepare_kswapd_sleep() takes it into account. If another caller