[PATCH] Light weight event counters

author Christoph Lameter <clameter@sgi.com>

Fri, 30 Jun 2006 08:55:45 +0000 (01:55 -0700)

committer Linus Torvalds <torvalds@g5.osdl.org>

Fri, 30 Jun 2006 18:25:36 +0000 (11:25 -0700)
author Christoph Lameter <clameter@sgi.com>
Fri, 30 Jun 2006 08:55:45 +0000 (01:55 -0700)
committer Linus Torvalds <torvalds@g5.osdl.org>
Fri, 30 Jun 2006 18:25:36 +0000 (11:25 -0700)
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c

index 61bc446..2476ca7 100644 (file)
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -766,7 +766,6 @@ unsigned long nr_iowait(void)
  #endif /* MODULE */
  EXPORT_SYMBOL_GPL(si_swapinfo);
  EXPORT_SYMBOL_GPL(nr_threads);
-EXPORT_SYMBOL_GPL(get_full_page_state);
  EXPORT_SYMBOL_GPL(nr_running);
  EXPORT_SYMBOL_GPL(nr_iowait);
  //EXPORT_SYMBOL_GPL(nr_context_switches);
diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c

index 180ba79..4811e2d 100644 (file)
--- a/arch/s390/appldata/appldata_mem.c
+++ b/arch/s390/appldata/appldata_mem.c
@@ -107,21 +107,21 @@ static void appldata_get_mem_data(void *data)
          * serialized through the appldata_ops_lock and can use static
          */
         static struct sysinfo val;
-       static struct page_state ps;
+       unsigned long ev[NR_VM_EVENT_ITEMS];
         struct appldata_mem_data *mem_data;
  
         mem_data = data;
         mem_data->sync_count_1++;
  
-       get_full_page_state(&ps);
-       mem_data->pgpgin     = ps.pgpgin >> 1;
-       mem_data->pgpgout    = ps.pgpgout >> 1;
-       mem_data->pswpin     = ps.pswpin;
-       mem_data->pswpout    = ps.pswpout;
-       mem_data->pgalloc    = ps.pgalloc_high + ps.pgalloc_normal +
-                              ps.pgalloc_dma;
-       mem_data->pgfault    = ps.pgfault;
-       mem_data->pgmajfault = ps.pgmajfault;
+       all_vm_events(ev);
+       mem_data->pgpgin     = ev[PGPGIN] >> 1;
+       mem_data->pgpgout    = ev[PGPGOUT] >> 1;
+       mem_data->pswpin     = ev[PSWPIN];
+       mem_data->pswpout    = ev[PSWPOUT];
+       mem_data->pgalloc    = ev[PGALLOC_HIGH] + ev[PGALLOC_NORMAL] +
+                              ev[PGALLOC_DMA];
+       mem_data->pgfault    = ev[PGFAULT];
+       mem_data->pgmajfault = ev[PGMAJFAULT];
  
         si_meminfo(&val);
         mem_data->sharedram = val.sharedram;
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c

index eee03a3..fb83547 100644 (file)
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -3117,9 +3117,9 @@ void submit_bio(int rw, struct bio *bio)
         BIO_BUG_ON(!bio->bi_io_vec);
         bio->bi_rw |= rw;
         if (rw & WRITE)
-               mod_page_state(pgpgout, count);
+               count_vm_events(PGPGOUT, count);
         else
-               mod_page_state(pgpgin, count);
+               count_vm_events(PGPGIN, count);
  
         if (unlikely(block_dump)) {
                 char b[BDEVNAME_SIZE];
diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c

index 298f2dd..d7024c7 100644 (file)
--- a/drivers/parisc/led.c
+++ b/drivers/parisc/led.c
@@ -411,16 +411,17 @@ static __inline__ int led_get_net_activity(void)
  static __inline__ int led_get_diskio_activity(void)
  {      
         static unsigned long last_pgpgin, last_pgpgout;
-       struct page_state pgstat;
+       unsigned long events[NR_VM_EVENT_ITEMS];
         int changed;
  
-       get_full_page_state(&pgstat); /* get no of sectors in & out */
+       all_vm_events(events);
  
         /* Just use a very simple calculation here. Do not care about overflow,
            since we only want to know if there was activity or not. */
-       changed = (pgstat.pgpgin != last_pgpgin) || (pgstat.pgpgout != last_pgpgout);
-       last_pgpgin  = pgstat.pgpgin;
-       last_pgpgout = pgstat.pgpgout;
+       changed = (events[PGPGIN] != last_pgpgin) ||
+                 (events[PGPGOUT] != last_pgpgout);
+       last_pgpgin  = events[PGPGIN];
+       last_pgpgout = events[PGPGOUT];
  
         return (changed ? LED_DISK_IO : 0);
  }
diff --git a/fs/inode.c b/fs/inode.c

index f42961e..14a6c41 100644 (file)
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -452,15 +452,14 @@ static void prune_icache(int nr_to_scan)
                 nr_pruned++;
         }
         inodes_stat.nr_unused -= nr_pruned;
+       if (current_is_kswapd())
+               __count_vm_events(KSWAPD_INODESTEAL, reap);
+       else
+               __count_vm_events(PGINODESTEAL, reap);
         spin_unlock(&inode_lock);
  
         dispose_list(&freeable);
         mutex_unlock(&iprune_mutex);
-
-       if (current_is_kswapd())
-               mod_page_state(kswapd_inodesteal, reap);
-       else
-               mod_page_state(pginodesteal, reap);
  }
  
  /*
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c

index 52d60c3..e7d5a30 100644 (file)
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -93,7 +93,7 @@ static struct page* ncp_file_mmap_nopage(struct vm_area_struct *area,
          */
         if (type)
                 *type = VM_FAULT_MAJOR;
-       inc_page_state(pgmajfault);
+       count_vm_event(PGMAJFAULT);
         return page;
  }
  
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h

index 16173b6..3e0daf5 100644 (file)
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -7,115 +7,77 @@
  #include <linux/mmzone.h>
  #include <asm/atomic.h>
  
+#ifdef CONFIG_VM_EVENT_COUNTERS
  /*
- * Global page accounting.  One instance per CPU.  Only unsigned longs are
- * allowed.
+ * Light weight per cpu counter implementation.
   *
- * - Fields can be modified with xxx_page_state and xxx_page_state_zone at
- * any time safely (which protects the instance from modification by
- * interrupt.
- * - The __xxx_page_state variants can be used safely when interrupts are
- * disabled.
- * - The __xxx_page_state variants can be used if the field is only
- * modified from process context and protected from preemption, or only
- * modified from interrupt context.  In this case, the field should be
- * commented here.
+ * Counters should only be incremented and no critical kernel component
+ * should rely on the counter values.
+ *
+ * Counters are handled completely inline. On many platforms the code
+ * generated will simply be the increment of a global address.
   */
-struct page_state {
-       unsigned long pgpgin;           /* Disk reads */
-       unsigned long pgpgout;          /* Disk writes */
-       unsigned long pswpin;           /* swap reads */
-       unsigned long pswpout;          /* swap writes */
-
-       unsigned long pgalloc_high;     /* page allocations */
-       unsigned long pgalloc_normal;
-       unsigned long pgalloc_dma32;
-       unsigned long pgalloc_dma;
-
-       unsigned long pgfree;           /* page freeings */
-       unsigned long pgactivate;       /* pages moved inactive->active */
-       unsigned long pgdeactivate;     /* pages moved active->inactive */
-
-       unsigned long pgfault;          /* faults (major+minor) */
-       unsigned long pgmajfault;       /* faults (major only) */
-
-       unsigned long pgrefill_high;    /* inspected in refill_inactive_zone */
-       unsigned long pgrefill_normal;
-       unsigned long pgrefill_dma32;
-       unsigned long pgrefill_dma;
-
-       unsigned long pgsteal_high;     /* total highmem pages reclaimed */
-       unsigned long pgsteal_normal;
-       unsigned long pgsteal_dma32;
-       unsigned long pgsteal_dma;
-
-       unsigned long pgscan_kswapd_high;/* total highmem pages scanned */
-       unsigned long pgscan_kswapd_normal;
-       unsigned long pgscan_kswapd_dma32;
-       unsigned long pgscan_kswapd_dma;
-
-       unsigned long pgscan_direct_high;/* total highmem pages scanned */
-       unsigned long pgscan_direct_normal;
-       unsigned long pgscan_direct_dma32;
-       unsigned long pgscan_direct_dma;
-
-       unsigned long pginodesteal;     /* pages reclaimed via inode freeing */
-       unsigned long slabs_scanned;    /* slab objects scanned */
-       unsigned long kswapd_steal;     /* pages reclaimed by kswapd */
-       unsigned long kswapd_inodesteal;/* reclaimed via kswapd inode freeing */
-       unsigned long pageoutrun;       /* kswapd's calls to page reclaim */
-       unsigned long allocstall;       /* direct reclaim calls */
-
-       unsigned long pgrotated;        /* pages rotated to tail of the LRU */
+
+#define FOR_ALL_ZONES(x) x##_DMA, x##_DMA32, x##_NORMAL, x##_HIGH
+
+enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
+               FOR_ALL_ZONES(PGALLOC),
+               PGFREE, PGACTIVATE, PGDEACTIVATE,
+               PGFAULT, PGMAJFAULT,
+               FOR_ALL_ZONES(PGREFILL),
+               FOR_ALL_ZONES(PGSTEAL),
+               FOR_ALL_ZONES(PGSCAN_KSWAPD),
+               FOR_ALL_ZONES(PGSCAN_DIRECT),
+               PGINODESTEAL, SLABS_SCANNED, KSWAPD_STEAL, KSWAPD_INODESTEAL,
+               PAGEOUTRUN, ALLOCSTALL, PGROTATED,
+               NR_VM_EVENT_ITEMS
+};
+
+struct vm_event_state {
+       unsigned long event[NR_VM_EVENT_ITEMS];
  };
  
-extern void get_full_page_state(struct page_state *ret);
-extern void mod_page_state_offset(unsigned long offset, unsigned long delta);
-extern void __mod_page_state_offset(unsigned long offset, unsigned long delta);
-
-#define mod_page_state(member, delta)  \
-       mod_page_state_offset(offsetof(struct page_state, member), (delta))
-
-#define __mod_page_state(member, delta)        \
-       __mod_page_state_offset(offsetof(struct page_state, member), (delta))
-
-#define inc_page_state(member)         mod_page_state(member, 1UL)
-#define dec_page_state(member)         mod_page_state(member, 0UL - 1)
-#define add_page_state(member,delta)   mod_page_state(member, (delta))
-#define sub_page_state(member,delta)   mod_page_state(member, 0UL - (delta))
-
-#define __inc_page_state(member)       __mod_page_state(member, 1UL)
-#define __dec_page_state(member)       __mod_page_state(member, 0UL - 1)
-#define __add_page_state(member,delta) __mod_page_state(member, (delta))
-#define __sub_page_state(member,delta) __mod_page_state(member, 0UL - (delta))
-
-#define page_state(member) (*__page_state(offsetof(struct page_state, member)))
-
-#define state_zone_offset(zone, member)                                        \
-({                                                                     \
-       unsigned offset;                                                \
-       if (is_highmem(zone))                                           \
-               offset = offsetof(struct page_state, member##_high);    \
-       else if (is_normal(zone))                                       \
-               offset = offsetof(struct page_state, member##_normal);  \
-       else if (is_dma32(zone))                                        \
-               offset = offsetof(struct page_state, member##_dma32);   \
-       else                                                            \
-               offset = offsetof(struct page_state, member##_dma);     \
-       offset;                                                         \
-})
-
-#define __mod_page_state_zone(zone, member, delta)                     \
- do {                                                                  \
-       __mod_page_state_offset(state_zone_offset(zone, member), (delta)); \
- } while (0)
-
-#define mod_page_state_zone(zone, member, delta)                       \
- do {                                                                  \
-       mod_page_state_offset(state_zone_offset(zone, member), (delta)); \
- } while (0)
-
-DECLARE_PER_CPU(struct page_state, page_states);
+DECLARE_PER_CPU(struct vm_event_state, vm_event_states);
+
+static inline void __count_vm_event(enum vm_event_item item)
+{
+       __get_cpu_var(vm_event_states.event[item])++;
+}
+
+static inline void count_vm_event(enum vm_event_item item)
+{
+       get_cpu_var(vm_event_states.event[item])++;
+       put_cpu();
+}
+
+static inline void __count_vm_events(enum vm_event_item item, long delta)
+{
+       __get_cpu_var(vm_event_states.event[item]) += delta;
+}
+
+static inline void count_vm_events(enum vm_event_item item, long delta)
+{
+       get_cpu_var(vm_event_states.event[item])++;
+       put_cpu();
+}
+
+extern void all_vm_events(unsigned long *);
+extern void vm_events_fold_cpu(int cpu);
+
+#else
+
+/* Disable counters */
+#define get_cpu_vm_events(e)   0L
+#define count_vm_event(e)      do { } while (0)
+#define count_vm_events(e,d)   do { } while (0)
+#define __count_vm_event(e)    do { } while (0)
+#define __count_vm_events(e,d) do { } while (0)
+#define vm_events_fold_cpu(x)  do { } while (0)
+
+#endif /* CONFIG_VM_EVENT_COUNTERS */
+
+#define __count_zone_vm_events(item, zone, delta) \
+                       __count_vm_events(item##_DMA + zone_idx(zone), delta)
  
  /*
   * Zone based page accounting with per cpu differentials.
diff --git a/init/Kconfig b/init/Kconfig

index f70f2fd..f515948 100644 (file)
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -379,6 +379,15 @@ config SLAB
           SLOB is more space efficient but does not scale well and is
           more susceptible to fragmentation.
  
+config VM_EVENT_COUNTERS
+       default y
+       bool "Enable VM event counters for /proc/vmstat" if EMBEDDED
+       help
+         VM event counters are only needed to for event counts to be
+         shown. They have no function for the kernel itself. This
+         option allows the disabling of the VM event counters.
+         /proc/vmstat will only show page counts.
+
  endmenu                # General setup
  
  config TINY_SHMEM
diff --git a/mm/filemap.c b/mm/filemap.c

index 87d62c4..796a547 100644 (file)
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1416,7 +1416,7 @@ retry_find:
                  */
                 if (!did_readaround) {
                         majmin = VM_FAULT_MAJOR;
-                       inc_page_state(pgmajfault);
+                       count_vm_event(PGMAJFAULT);
                 }
                 did_readaround = 1;
                 ra_pages = max_sane_readahead(file->f_ra.ra_pages);
@@ -1487,7 +1487,7 @@ no_cached_page:
  page_not_uptodate:
         if (!did_readaround) {
                 majmin = VM_FAULT_MAJOR;
-               inc_page_state(pgmajfault);
+               count_vm_event(PGMAJFAULT);
         }
         lock_page(page);
  
diff --git a/mm/memory.c b/mm/memory.c

index 1a78791..7e2a4b1 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1951,7 +1951,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
  
                 /* Had to read the page from swap area: Major fault */
                 ret = VM_FAULT_MAJOR;
-               inc_page_state(pgmajfault);
+               count_vm_event(PGMAJFAULT);
                 grab_swap_token();
         }
  
@@ -2324,7 +2324,7 @@ int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
  
         __set_current_state(TASK_RUNNING);
  
-       inc_page_state(pgfault);
+       count_vm_event(PGFAULT);
  
         if (unlikely(is_vm_hugetlb_page(vma)))
                 return hugetlb_fault(mm, vma, address, write_access);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index d616712..30b0b97 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -456,7 +456,7 @@ static void __free_pages_ok(struct page *page, unsigned int order)
  
         kernel_map_pages(page, 1 << order, 0);
         local_irq_save(flags);
-       __mod_page_state(pgfree, 1 << order);
+       __count_vm_events(PGFREE, 1 << order);
         free_one_page(page_zone(page), page, order);
         local_irq_restore(flags);
  }
@@ -729,7 +729,7 @@ static void fastcall free_hot_cold_page(struct page *page, int cold)
  
         pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
         local_irq_save(flags);
-       __inc_page_state(pgfree);
+       __count_vm_event(PGFREE);
         list_add(&page->lru, &pcp->list);
         pcp->count++;
         if (pcp->count >= pcp->high) {
@@ -805,7 +805,7 @@ again:
                         goto failed;
         }
  
-       __mod_page_state_zone(zone, pgalloc, 1 << order);
+       __count_zone_vm_events(PGALLOC, zone, 1 << order);
         zone_statistics(zonelist, zone);
         local_irq_restore(flags);
         put_cpu();
@@ -2101,24 +2101,11 @@ static int page_alloc_cpu_notify(struct notifier_block *self,
                                  unsigned long action, void *hcpu)
  {
         int cpu = (unsigned long)hcpu;
-       unsigned long *src, *dest;
  
         if (action == CPU_DEAD) {
-               int i;
-
                 local_irq_disable();
                 __drain_pages(cpu);
-
-               /* Add dead cpu's page_states to our own. */
-               dest = (unsigned long *)&__get_cpu_var(page_states);
-               src = (unsigned long *)&per_cpu(page_states, cpu);
-
-               for (i = 0; i < sizeof(struct page_state)/sizeof(unsigned long);
-                               i++) {
-                       dest[i] += src[i];
-                       src[i] = 0;
-               }
-
+               vm_events_fold_cpu(cpu);
                 local_irq_enable();
                 refresh_cpu_vm_stats(cpu);
         }
diff --git a/mm/page_io.c b/mm/page_io.c

index bb2b0d5..8802994 100644 (file)
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -101,7 +101,7 @@ int swap_writepage(struct page *page, struct writeback_control *wbc)
         }
         if (wbc->sync_mode == WB_SYNC_ALL)
                 rw |= (1 << BIO_RW_SYNC);
-       inc_page_state(pswpout);
+       count_vm_event(PSWPOUT);
         set_page_writeback(page);
         unlock_page(page);
         submit_bio(rw, bio);
@@ -123,7 +123,7 @@ int swap_readpage(struct file *file, struct page *page)
                 ret = -ENOMEM;
                 goto out;
         }
-       inc_page_state(pswpin);
+       count_vm_event(PSWPIN);
         submit_bio(READ, bio);
  out:
         return ret;
diff --git a/mm/shmem.c b/mm/shmem.c

index b14ff81..a9c09e0 100644 (file)
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1045,12 +1045,12 @@ repeat:
                 swappage = lookup_swap_cache(swap);
                 if (!swappage) {
                         shmem_swp_unmap(entry);
-                       spin_unlock(&info->lock);
                         /* here we actually do the io */
                         if (type && *type == VM_FAULT_MINOR) {
-                               inc_page_state(pgmajfault);
+                               __count_vm_event(PGMAJFAULT);
                                 *type = VM_FAULT_MAJOR;
                         }
+                       spin_unlock(&info->lock);
                         swappage = shmem_swapin(info, swap, idx);
                         if (!swappage) {
                                 spin_lock(&info->lock);
diff --git a/mm/swap.c b/mm/swap.c

index 990868a..8fd095c 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -87,7 +87,7 @@ int rotate_reclaimable_page(struct page *page)
         spin_lock_irqsave(&zone->lru_lock, flags);
         if (PageLRU(page) && !PageActive(page)) {
                 list_move_tail(&page->lru, &zone->inactive_list);
-               inc_page_state(pgrotated);
+               __count_vm_event(PGROTATED);
         }
         if (!test_clear_page_writeback(page))
                 BUG();
@@ -107,7 +107,7 @@ void fastcall activate_page(struct page *page)
                 del_page_from_inactive_list(zone, page);
                 SetPageActive(page);
                 add_page_to_active_list(zone, page);
-               inc_page_state(pgactivate);
+               __count_vm_event(PGACTIVATE);
         }
         spin_unlock_irq(&zone->lru_lock);
  }
diff --git a/mm/vmscan.c b/mm/vmscan.c

index d694243..ff2ebe9 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -215,7 +215,7 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
                                 break;
                         if (shrink_ret < nr_before)
                                 ret += nr_before - shrink_ret;
-                       mod_page_state(slabs_scanned, this_scan);
+                       count_vm_events(SLABS_SCANNED, this_scan);
                         total_scan -= this_scan;
  
                         cond_resched();
@@ -569,7 +569,7 @@ keep:
         list_splice(&ret_pages, page_list);
         if (pagevec_count(&freed_pvec))
                 __pagevec_release_nonlru(&freed_pvec);
-       mod_page_state(pgactivate, pgactivate);
+       count_vm_events(PGACTIVATE, pgactivate);
         return nr_reclaimed;
  }
  
@@ -659,11 +659,11 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
                 nr_reclaimed += nr_freed;
                 local_irq_disable();
                 if (current_is_kswapd()) {
-                       __mod_page_state_zone(zone, pgscan_kswapd, nr_scan);
-                       __mod_page_state(kswapd_steal, nr_freed);
+                       __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scan);
+                       __count_vm_events(KSWAPD_STEAL, nr_freed);
                 } else
-                       __mod_page_state_zone(zone, pgscan_direct, nr_scan);
-               __mod_page_state_zone(zone, pgsteal, nr_freed);
+                       __count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scan);
+               __count_vm_events(PGACTIVATE, nr_freed);
  
                 if (nr_taken == 0)
                         goto done;
@@ -841,11 +841,10 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
                 }
         }
         zone->nr_active += pgmoved;
-       spin_unlock(&zone->lru_lock);
  
-       __mod_page_state_zone(zone, pgrefill, pgscanned);
-       __mod_page_state(pgdeactivate, pgdeactivate);
-       local_irq_enable();
+       __count_zone_vm_events(PGREFILL, zone, pgscanned);
+       __count_vm_events(PGDEACTIVATE, pgdeactivate);
+       spin_unlock_irq(&zone->lru_lock);
  
         pagevec_release(&pvec);
  }
@@ -977,7 +976,7 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
                 .swappiness = vm_swappiness,
         };
  
-       inc_page_state(allocstall);
+       count_vm_event(ALLOCSTALL);
  
         for (i = 0; zones[i] != NULL; i++) {
                 struct zone *zone = zones[i];
@@ -1074,7 +1073,7 @@ loop_again:
         total_scanned = 0;
         nr_reclaimed = 0;
         sc.may_writepage = !laptop_mode;
-       inc_page_state(pageoutrun);
+       count_vm_event(PAGEOUTRUN);
  
         for (i = 0; i < pgdat->nr_zones; i++) {
                 struct zone *zone = pgdat->node_zones + i;
diff --git a/mm/vmstat.c b/mm/vmstat.c

index ee7f896..73b83d6 100644 (file)
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -13,66 +13,6 @@
  #include <linux/mm.h>
  #include <linux/module.h>
  
-/*
- * Accumulate the page_state information across all CPUs.
- * The result is unavoidably approximate - it can change
- * during and after execution of this function.
- */
-DEFINE_PER_CPU(struct page_state, page_states) = {0};
-
-static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask)
-{
-       unsigned cpu;
-
-       memset(ret, 0, nr * sizeof(unsigned long));
-       cpus_and(*cpumask, *cpumask, cpu_online_map);
-
-       for_each_cpu_mask(cpu, *cpumask) {
-               unsigned long *in;
-               unsigned long *out;
-               unsigned off;
-               unsigned next_cpu;
-
-               in = (unsigned long *)&per_cpu(page_states, cpu);
-
-               next_cpu = next_cpu(cpu, *cpumask);
-               if (likely(next_cpu < NR_CPUS))
-                       prefetch(&per_cpu(page_states, next_cpu));
-
-               out = (unsigned long *)ret;
-               for (off = 0; off < nr; off++)
-                       *out++ += *in++;
-       }
-}
-
-void get_full_page_state(struct page_state *ret)
-{
-       cpumask_t mask = CPU_MASK_ALL;
-
-       __get_page_state(ret, sizeof(*ret) / sizeof(unsigned long), &mask);
-}
-
-void __mod_page_state_offset(unsigned long offset, unsigned long delta)
-{
-       void *ptr;
-
-       ptr = &__get_cpu_var(page_states);
-       *(unsigned long *)(ptr + offset) += delta;
-}
-EXPORT_SYMBOL(__mod_page_state_offset);
-
-void mod_page_state_offset(unsigned long offset, unsigned long delta)
-{
-       unsigned long flags;
-       void *ptr;
-
-       local_irq_save(flags);
-       ptr = &__get_cpu_var(page_states);
-       *(unsigned long *)(ptr + offset) += delta;
-       local_irq_restore(flags);
-}
-EXPORT_SYMBOL(mod_page_state_offset);
-
  void __get_zone_counts(unsigned long *active, unsigned long *inactive,
                         unsigned long *free, struct pglist_data *pgdat)
  {
@@ -106,6 +46,63 @@ void get_zone_counts(unsigned long *active,
         }
  }
  
+#ifdef CONFIG_VM_EVENT_COUNTERS
+DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
+EXPORT_PER_CPU_SYMBOL(vm_event_states);
+
+static void sum_vm_events(unsigned long *ret, cpumask_t *cpumask)
+{
+       int cpu = 0;
+       int i;
+
+       memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
+
+       cpu = first_cpu(*cpumask);
+       while (cpu < NR_CPUS) {
+               struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
+
+               cpu = next_cpu(cpu, *cpumask);
+
+               if (cpu < NR_CPUS)
+                       prefetch(&per_cpu(vm_event_states, cpu));
+
+
+               for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
+                       ret[i] += this->event[i];
+       }
+}
+
+/*
+ * Accumulate the vm event counters across all CPUs.
+ * The result is unavoidably approximate - it can change
+ * during and after execution of this function.
+*/
+void all_vm_events(unsigned long *ret)
+{
+       sum_vm_events(ret, &cpu_online_map);
+}
+
+#ifdef CONFIG_HOTPLUG
+/*
+ * Fold the foreign cpu events into our own.
+ *
+ * This is adding to the events on one processor
+ * but keeps the global counts constant.
+ */
+void vm_events_fold_cpu(int cpu)
+{
+       struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
+       int i;
+
+       for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
+               count_vm_events(i, fold_state->event[i]);
+               fold_state->event[i] = 0;
+       }
+}
+#endif /* CONFIG_HOTPLUG */
+
+#endif /* CONFIG_VM_EVENT_COUNTERS */
+
  /*
   * Manage combined zone based / global counters
   *
@@ -405,16 +402,16 @@ static char *vmstat_text[] = {
         "numa_other",
  #endif
  
-       /* Event counters */
+#ifdef CONFIG_VM_EVENT_COUNTERS
         "pgpgin",
         "pgpgout",
         "pswpin",
         "pswpout",
  
-       "pgalloc_high",
-       "pgalloc_normal",
-       "pgalloc_dma32",
         "pgalloc_dma",
+       "pgalloc_dma32",
+       "pgalloc_normal",
+       "pgalloc_high",
  
         "pgfree",
         "pgactivate",
@@ -423,25 +420,25 @@ static char *vmstat_text[] = {
         "pgfault",
         "pgmajfault",
  
-       "pgrefill_high",
-       "pgrefill_normal",
-       "pgrefill_dma32",
         "pgrefill_dma",
+       "pgrefill_dma32",
+       "pgrefill_normal",
+       "pgrefill_high",
  
-       "pgsteal_high",
-       "pgsteal_normal",
-       "pgsteal_dma32",
         "pgsteal_dma",
+       "pgsteal_dma32",
+       "pgsteal_normal",
+       "pgsteal_high",
  
-       "pgscan_kswapd_high",
-       "pgscan_kswapd_normal",
-       "pgscan_kswapd_dma32",
         "pgscan_kswapd_dma",
+       "pgscan_kswapd_dma32",
+       "pgscan_kswapd_normal",
+       "pgscan_kswapd_high",
  
-       "pgscan_direct_high",
-       "pgscan_direct_normal",
-       "pgscan_direct_dma32",
         "pgscan_direct_dma",
+       "pgscan_direct_dma32",
+       "pgscan_direct_normal",
+       "pgscan_direct_high",
  
         "pginodesteal",
         "slabs_scanned",
@@ -451,6 +448,7 @@ static char *vmstat_text[] = {
         "allocstall",
  
         "pgrotated",
+#endif
  };
  
  /*
@@ -553,23 +551,32 @@ struct seq_operations zoneinfo_op = {
  static void *vmstat_start(struct seq_file *m, loff_t *pos)
  {
         unsigned long *v;
-       struct page_state *ps;
+#ifdef CONFIG_VM_EVENT_COUNTERS
+       unsigned long *e;
+#endif
         int i;
  
         if (*pos >= ARRAY_SIZE(vmstat_text))
                 return NULL;
  
+#ifdef CONFIG_VM_EVENT_COUNTERS
         v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long)
-                       + sizeof(*ps), GFP_KERNEL);
+                       + sizeof(struct vm_event_state), GFP_KERNEL);
+#else
+       v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long),
+                       GFP_KERNEL);
+#endif
         m->private = v;
         if (!v)
                 return ERR_PTR(-ENOMEM);
         for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
                 v[i] = global_page_state(i);
-       ps = (struct page_state *)(v + NR_VM_ZONE_STAT_ITEMS);
-       get_full_page_state(ps);
-       ps->pgpgin /= 2;                /* sectors -> kbytes */
-       ps->pgpgout /= 2;
+#ifdef CONFIG_VM_EVENT_COUNTERS
+       e = v + NR_VM_ZONE_STAT_ITEMS;
+       all_vm_events(e);
+       e[PGPGIN] /= 2;         /* sectors -> kbytes */
+       e[PGPGOUT] /= 2;
+#endif
         return v + *pos;
  }
author	Christoph Lameter <clameter@sgi.com>
	Fri, 30 Jun 2006 08:55:45 +0000 (01:55 -0700)
committer	Linus Torvalds <torvalds@g5.osdl.org>
	Fri, 30 Jun 2006 18:25:36 +0000 (11:25 -0700)
arch/s390/appldata/appldata_base.c		patch \| blob \| history
arch/s390/appldata/appldata_mem.c		patch \| blob \| history
block/ll_rw_blk.c		patch \| blob \| history
drivers/parisc/led.c		patch \| blob \| history
fs/inode.c		patch \| blob \| history
fs/ncpfs/mmap.c		patch \| blob \| history
include/linux/vmstat.h		patch \| blob \| history
init/Kconfig		patch \| blob \| history
mm/filemap.c		patch \| blob \| history
mm/memory.c		patch \| blob \| history
mm/page_alloc.c		patch \| blob \| history
mm/page_io.c		patch \| blob \| history
mm/shmem.c		patch \| blob \| history
mm/swap.c		patch \| blob \| history
mm/vmscan.c		patch \| blob \| history
mm/vmstat.c		patch \| blob \| history