Merge branch 'ptp-virtual-clocks-and-timestamping'
[linux-2.6-microblaze.git] / mm / vmstat.c
index cccee36..b0534e0 100644 (file)
@@ -31,8 +31,6 @@
 
 #include "internal.h"
 
-#define NUMA_STATS_THRESHOLD (U16_MAX - 2)
-
 #ifdef CONFIG_NUMA
 int sysctl_vm_numa_stat = ENABLE_NUMA_STAT;
 
@@ -41,11 +39,12 @@ static void zero_zone_numa_counters(struct zone *zone)
 {
        int item, cpu;
 
-       for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++) {
-               atomic_long_set(&zone->vm_numa_stat[item], 0);
-               for_each_online_cpu(cpu)
-                       per_cpu_ptr(zone->pageset, cpu)->vm_numa_stat_diff[item]
+       for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) {
+               atomic_long_set(&zone->vm_numa_event[item], 0);
+               for_each_online_cpu(cpu) {
+                       per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_numa_event[item]
                                                = 0;
+               }
        }
 }
 
@@ -63,8 +62,8 @@ static void zero_global_numa_counters(void)
 {
        int item;
 
-       for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++)
-               atomic_long_set(&vm_numa_stat[item], 0);
+       for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
+               atomic_long_set(&vm_numa_event[item], 0);
 }
 
 static void invalid_numa_statistics(void)
@@ -161,10 +160,9 @@ void vm_events_fold_cpu(int cpu)
  * vm_stat contains the global counters
  */
 atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
-atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS] __cacheline_aligned_in_smp;
 atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp;
+atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS] __cacheline_aligned_in_smp;
 EXPORT_SYMBOL(vm_zone_stat);
-EXPORT_SYMBOL(vm_numa_stat);
 EXPORT_SYMBOL(vm_node_stat);
 
 #ifdef CONFIG_SMP
@@ -266,7 +264,7 @@ void refresh_zone_stat_thresholds(void)
                for_each_online_cpu(cpu) {
                        int pgdat_threshold;
 
-                       per_cpu_ptr(zone->pageset, cpu)->stat_threshold
+                       per_cpu_ptr(zone->per_cpu_zonestats, cpu)->stat_threshold
                                                        = threshold;
 
                        /* Base nodestat threshold on the largest populated zone. */
@@ -303,7 +301,7 @@ void set_pgdat_percpu_threshold(pg_data_t *pgdat,
 
                threshold = (*calculate_pressure)(zone);
                for_each_online_cpu(cpu)
-                       per_cpu_ptr(zone->pageset, cpu)->stat_threshold
+                       per_cpu_ptr(zone->per_cpu_zonestats, cpu)->stat_threshold
                                                        = threshold;
        }
 }
@@ -316,7 +314,7 @@ void set_pgdat_percpu_threshold(pg_data_t *pgdat,
 void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
                           long delta)
 {
-       struct per_cpu_pageset __percpu *pcp = zone->pageset;
+       struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
        s8 __percpu *p = pcp->vm_stat_diff + item;
        long x;
        long t;
@@ -389,7 +387,7 @@ EXPORT_SYMBOL(__mod_node_page_state);
  */
 void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
 {
-       struct per_cpu_pageset __percpu *pcp = zone->pageset;
+       struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
        s8 __percpu *p = pcp->vm_stat_diff + item;
        s8 v, t;
 
@@ -435,7 +433,7 @@ EXPORT_SYMBOL(__inc_node_page_state);
 
 void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
 {
-       struct per_cpu_pageset __percpu *pcp = zone->pageset;
+       struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
        s8 __percpu *p = pcp->vm_stat_diff + item;
        s8 v, t;
 
@@ -495,7 +493,7 @@ EXPORT_SYMBOL(__dec_node_page_state);
 static inline void mod_zone_state(struct zone *zone,
        enum zone_stat_item item, long delta, int overstep_mode)
 {
-       struct per_cpu_pageset __percpu *pcp = zone->pageset;
+       struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
        s8 __percpu *p = pcp->vm_stat_diff + item;
        long o, n, t, z;
 
@@ -706,8 +704,7 @@ EXPORT_SYMBOL(dec_node_page_state);
  * Fold a differential into the global counters.
  * Returns the number of counters updated.
  */
-#ifdef CONFIG_NUMA
-static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff)
+static int fold_diff(int *zone_diff, int *node_diff)
 {
        int i;
        int changes = 0;
@@ -718,12 +715,6 @@ static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff)
                        changes++;
        }
 
-       for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
-               if (numa_diff[i]) {
-                       atomic_long_add(numa_diff[i], &vm_numa_stat[i]);
-                       changes++;
-       }
-
        for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
                if (node_diff[i]) {
                        atomic_long_add(node_diff[i], &vm_node_stat[i]);
@@ -731,26 +722,34 @@ static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff)
        }
        return changes;
 }
-#else
-static int fold_diff(int *zone_diff, int *node_diff)
+
+#ifdef CONFIG_NUMA
+static void fold_vm_zone_numa_events(struct zone *zone)
 {
-       int i;
-       int changes = 0;
+       unsigned long zone_numa_events[NR_VM_NUMA_EVENT_ITEMS] = { 0, };
+       int cpu;
+       enum numa_stat_item item;
 
-       for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
-               if (zone_diff[i]) {
-                       atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
-                       changes++;
-       }
+       for_each_online_cpu(cpu) {
+               struct per_cpu_zonestat *pzstats;
 
-       for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
-               if (node_diff[i]) {
-                       atomic_long_add(node_diff[i], &vm_node_stat[i]);
-                       changes++;
+               pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
+               for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
+                       zone_numa_events[item] += xchg(&pzstats->vm_numa_event[item], 0);
        }
-       return changes;
+
+       for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
+               zone_numa_event_add(zone_numa_events[item], zone, item);
+}
+
+void fold_vm_numa_events(void)
+{
+       struct zone *zone;
+
+       for_each_populated_zone(zone)
+               fold_vm_zone_numa_events(zone);
 }
-#endif /* CONFIG_NUMA */
+#endif
 
 /*
  * Update the zone counters for the current cpu.
@@ -774,41 +773,30 @@ static int refresh_cpu_vm_stats(bool do_pagesets)
        struct zone *zone;
        int i;
        int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
-#ifdef CONFIG_NUMA
-       int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
-#endif
        int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
        int changes = 0;
 
        for_each_populated_zone(zone) {
-               struct per_cpu_pageset __percpu *p = zone->pageset;
+               struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats;
+#ifdef CONFIG_NUMA
+               struct per_cpu_pages __percpu *pcp = zone->per_cpu_pageset;
+#endif
 
                for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
                        int v;
 
-                       v = this_cpu_xchg(p->vm_stat_diff[i], 0);
+                       v = this_cpu_xchg(pzstats->vm_stat_diff[i], 0);
                        if (v) {
 
                                atomic_long_add(v, &zone->vm_stat[i]);
                                global_zone_diff[i] += v;
 #ifdef CONFIG_NUMA
                                /* 3 seconds idle till flush */
-                               __this_cpu_write(p->expire, 3);
+                               __this_cpu_write(pcp->expire, 3);
 #endif
                        }
                }
 #ifdef CONFIG_NUMA
-               for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) {
-                       int v;
-
-                       v = this_cpu_xchg(p->vm_numa_stat_diff[i], 0);
-                       if (v) {
-
-                               atomic_long_add(v, &zone->vm_numa_stat[i]);
-                               global_numa_diff[i] += v;
-                               __this_cpu_write(p->expire, 3);
-                       }
-               }
 
                if (do_pagesets) {
                        cond_resched();
@@ -819,23 +807,23 @@ static int refresh_cpu_vm_stats(bool do_pagesets)
                         * Check if there are pages remaining in this pageset
                         * if not then there is nothing to expire.
                         */
-                       if (!__this_cpu_read(p->expire) ||
-                              !__this_cpu_read(p->pcp.count))
+                       if (!__this_cpu_read(pcp->expire) ||
+                              !__this_cpu_read(pcp->count))
                                continue;
 
                        /*
                         * We never drain zones local to this processor.
                         */
                        if (zone_to_nid(zone) == numa_node_id()) {
-                               __this_cpu_write(p->expire, 0);
+                               __this_cpu_write(pcp->expire, 0);
                                continue;
                        }
 
-                       if (__this_cpu_dec_return(p->expire))
+                       if (__this_cpu_dec_return(pcp->expire))
                                continue;
 
-                       if (__this_cpu_read(p->pcp.count)) {
-                               drain_zone_pages(zone, this_cpu_ptr(&p->pcp));
+                       if (__this_cpu_read(pcp->count)) {
+                               drain_zone_pages(zone, this_cpu_ptr(pcp));
                                changes++;
                        }
                }
@@ -856,12 +844,7 @@ static int refresh_cpu_vm_stats(bool do_pagesets)
                }
        }
 
-#ifdef CONFIG_NUMA
-       changes += fold_diff(global_zone_diff, global_numa_diff,
-                            global_node_diff);
-#else
        changes += fold_diff(global_zone_diff, global_node_diff);
-#endif
        return changes;
 }
 
@@ -876,36 +859,33 @@ void cpu_vm_stats_fold(int cpu)
        struct zone *zone;
        int i;
        int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
-#ifdef CONFIG_NUMA
-       int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
-#endif
        int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
 
        for_each_populated_zone(zone) {
-               struct per_cpu_pageset *p;
+               struct per_cpu_zonestat *pzstats;
 
-               p = per_cpu_ptr(zone->pageset, cpu);
+               pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
 
-               for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
-                       if (p->vm_stat_diff[i]) {
+               for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
+                       if (pzstats->vm_stat_diff[i]) {
                                int v;
 
-                               v = p->vm_stat_diff[i];
-                               p->vm_stat_diff[i] = 0;
+                               v = pzstats->vm_stat_diff[i];
+                               pzstats->vm_stat_diff[i] = 0;
                                atomic_long_add(v, &zone->vm_stat[i]);
                                global_zone_diff[i] += v;
                        }
-
+               }
 #ifdef CONFIG_NUMA
-               for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
-                       if (p->vm_numa_stat_diff[i]) {
-                               int v;
+               for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) {
+                       if (pzstats->vm_numa_event[i]) {
+                               unsigned long v;
 
-                               v = p->vm_numa_stat_diff[i];
-                               p->vm_numa_stat_diff[i] = 0;
-                               atomic_long_add(v, &zone->vm_numa_stat[i]);
-                               global_numa_diff[i] += v;
+                               v = pzstats->vm_numa_event[i];
+                               pzstats->vm_numa_event[i] = 0;
+                               zone_numa_event_add(v, zone, i);
                        }
+               }
 #endif
        }
 
@@ -925,58 +905,39 @@ void cpu_vm_stats_fold(int cpu)
                        }
        }
 
-#ifdef CONFIG_NUMA
-       fold_diff(global_zone_diff, global_numa_diff, global_node_diff);
-#else
        fold_diff(global_zone_diff, global_node_diff);
-#endif
 }
 
 /*
  * this is only called if !populated_zone(zone), which implies no other users of
  * pset->vm_stat_diff[] exist.
  */
-void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset)
+void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *pzstats)
 {
+       unsigned long v;
        int i;
 
-       for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
-               if (pset->vm_stat_diff[i]) {
-                       int v = pset->vm_stat_diff[i];
-                       pset->vm_stat_diff[i] = 0;
-                       atomic_long_add(v, &zone->vm_stat[i]);
-                       atomic_long_add(v, &vm_zone_stat[i]);
+       for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
+               if (pzstats->vm_stat_diff[i]) {
+                       v = pzstats->vm_stat_diff[i];
+                       pzstats->vm_stat_diff[i] = 0;
+                       zone_page_state_add(v, zone, i);
                }
+       }
 
 #ifdef CONFIG_NUMA
-       for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
-               if (pset->vm_numa_stat_diff[i]) {
-                       int v = pset->vm_numa_stat_diff[i];
-
-                       pset->vm_numa_stat_diff[i] = 0;
-                       atomic_long_add(v, &zone->vm_numa_stat[i]);
-                       atomic_long_add(v, &vm_numa_stat[i]);
+       for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) {
+               if (pzstats->vm_numa_event[i]) {
+                       v = pzstats->vm_numa_event[i];
+                       pzstats->vm_numa_event[i] = 0;
+                       zone_numa_event_add(v, zone, i);
                }
+       }
 #endif
 }
 #endif
 
 #ifdef CONFIG_NUMA
-void __inc_numa_state(struct zone *zone,
-                                enum numa_stat_item item)
-{
-       struct per_cpu_pageset __percpu *pcp = zone->pageset;
-       u16 __percpu *p = pcp->vm_numa_stat_diff + item;
-       u16 v;
-
-       v = __this_cpu_inc_return(*p);
-
-       if (unlikely(v > NUMA_STATS_THRESHOLD)) {
-               zone_numa_state_add(v, zone, item);
-               __this_cpu_write(*p, 0);
-       }
-}
-
 /*
  * Determine the per node value of a stat item. This function
  * is called frequently in a NUMA machine, so try to be as
@@ -995,19 +956,16 @@ unsigned long sum_zone_node_page_state(int node,
        return count;
 }
 
-/*
- * Determine the per node value of a numa stat item. To avoid deviation,
- * the per cpu stat number in vm_numa_stat_diff[] is also included.
- */
-unsigned long sum_zone_numa_state(int node,
+/* Determine the per node value of a numa stat item. */
+unsigned long sum_zone_numa_event_state(int node,
                                 enum numa_stat_item item)
 {
        struct zone *zones = NODE_DATA(node)->node_zones;
-       int i;
        unsigned long count = 0;
+       int i;
 
        for (i = 0; i < MAX_NR_ZONES; i++)
-               count += zone_numa_state_snapshot(zones + i, item);
+               count += zone_numa_event_state(zones + i, item);
 
        return count;
 }
@@ -1686,28 +1644,30 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
                           zone_page_state(zone, i));
 
 #ifdef CONFIG_NUMA
-       for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
+       for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++)
                seq_printf(m, "\n      %-12s %lu", numa_stat_name(i),
-                          zone_numa_state_snapshot(zone, i));
+                          zone_numa_event_state(zone, i));
 #endif
 
        seq_printf(m, "\n  pagesets");
        for_each_online_cpu(i) {
-               struct per_cpu_pageset *pageset;
+               struct per_cpu_pages *pcp;
+               struct per_cpu_zonestat __maybe_unused *pzstats;
 
-               pageset = per_cpu_ptr(zone->pageset, i);
+               pcp = per_cpu_ptr(zone->per_cpu_pageset, i);
                seq_printf(m,
                           "\n    cpu: %i"
                           "\n              count: %i"
                           "\n              high:  %i"
                           "\n              batch: %i",
                           i,
-                          pageset->pcp.count,
-                          pageset->pcp.high,
-                          pageset->pcp.batch);
+                          pcp->count,
+                          pcp->high,
+                          pcp->batch);
 #ifdef CONFIG_SMP
+               pzstats = per_cpu_ptr(zone->per_cpu_zonestats, i);
                seq_printf(m, "\n  vm stats threshold: %d",
-                               pageset->stat_threshold);
+                               pzstats->stat_threshold);
 #endif
        }
        seq_printf(m,
@@ -1740,7 +1700,7 @@ static const struct seq_operations zoneinfo_op = {
 };
 
 #define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \
-                        NR_VM_NUMA_STAT_ITEMS + \
+                        NR_VM_NUMA_EVENT_ITEMS + \
                         NR_VM_NODE_STAT_ITEMS + \
                         NR_VM_WRITEBACK_STAT_ITEMS + \
                         (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \
@@ -1755,6 +1715,7 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos)
                return NULL;
 
        BUILD_BUG_ON(ARRAY_SIZE(vmstat_text) < NR_VMSTAT_ITEMS);
+       fold_vm_numa_events();
        v = kmalloc_array(NR_VMSTAT_ITEMS, sizeof(unsigned long), GFP_KERNEL);
        m->private = v;
        if (!v)
@@ -1764,9 +1725,9 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos)
        v += NR_VM_ZONE_STAT_ITEMS;
 
 #ifdef CONFIG_NUMA
-       for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
-               v[i] = global_numa_state(i);
-       v += NR_VM_NUMA_STAT_ITEMS;
+       for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++)
+               v[i] = global_numa_event_state(i);
+       v += NR_VM_NUMA_EVENT_ITEMS;
 #endif
 
        for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
@@ -1927,19 +1888,16 @@ static bool need_update(int cpu)
        struct zone *zone;
 
        for_each_populated_zone(zone) {
-               struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu);
+               struct per_cpu_zonestat *pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
                struct per_cpu_nodestat *n;
+
                /*
                 * The fast way of checking if there are any vmstat diffs.
                 */
-               if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS *
-                              sizeof(p->vm_stat_diff[0])))
-                       return true;
-#ifdef CONFIG_NUMA
-               if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS *
-                              sizeof(p->vm_numa_stat_diff[0])))
+               if (memchr_inv(pzstats->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS *
+                              sizeof(pzstats->vm_stat_diff[0])))
                        return true;
-#endif
+
                if (last_pgdat == zone->zone_pgdat)
                        continue;
                last_pgdat = zone->zone_pgdat;