core_initcall(init_zero_pfn);
+#if defined(SPLIT_RSS_COUNTING)
+
+void __sync_task_rss_stat(struct task_struct *task, struct mm_struct *mm)
+{
+ int i;
+
+ for (i = 0; i < NR_MM_COUNTERS; i++) {
+ if (task->rss_stat.count[i]) {
+ add_mm_counter(mm, i, task->rss_stat.count[i]);
+ task->rss_stat.count[i] = 0;
+ }
+ }
+ task->rss_stat.events = 0;
+}
+
+static void add_mm_counter_fast(struct mm_struct *mm, int member, int val)
+{
+ struct task_struct *task = current;
+
+ if (likely(task->mm == mm))
+ task->rss_stat.count[member] += val;
+ else
+ add_mm_counter(mm, member, val);
+}
+#define inc_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, 1)
+#define dec_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, -1)
+
+/* sync counter once per 64 page faults */
+#define TASK_RSS_EVENTS_THRESH (64)
+static void check_sync_rss_stat(struct task_struct *task)
+{
+ if (unlikely(task != current))
+ return;
+ if (unlikely(task->rss_stat.events++ > TASK_RSS_EVENTS_THRESH))
+ __sync_task_rss_stat(task, task->mm);
+}
+
+unsigned long get_mm_counter(struct mm_struct *mm, int member)
+{
+ long val = 0;
+
+ /*
+ * Don't use task->mm here...for avoiding to use task_get_mm()..
+ * The caller must guarantee task->mm is not invalid.
+ */
+ val = atomic_long_read(&mm->rss_stat.count[member]);
+ /*
+ * counter is updated in asynchronous manner and may go to minus.
+ * But it's never be expected number for users.
+ */
+ if (val < 0)
+ return 0;
+ return (unsigned long)val;
+}
+
+void sync_mm_rss(struct task_struct *task, struct mm_struct *mm)
+{
+ __sync_task_rss_stat(task, mm);
+}
+#else
+
+#define inc_mm_counter_fast(mm, member) inc_mm_counter(mm, member)
+#define dec_mm_counter_fast(mm, member) dec_mm_counter(mm, member)
+
+static void check_sync_rss_stat(struct task_struct *task)
+{
+}
+
+void sync_mm_rss(struct task_struct *task, struct mm_struct *mm)
+{
+}
+#endif
+
/*
* If a p?d_bad entry is found while walking page tables, report
* the error, before resetting entry to p?d_none. Usually (but
{
int i;
+ if (current->mm == mm)
+ sync_mm_rss(current, mm);
for (i = 0; i < NR_MM_COUNTERS; i++)
if (rss[i])
add_mm_counter(mm, i, rss[i]);
/* Ok, finally just insert the thing.. */
get_page(page);
- inc_mm_counter(mm, MM_FILEPAGES);
+ inc_mm_counter_fast(mm, MM_FILEPAGES);
page_add_file_rmap(page);
set_pte_at(mm, addr, pte, mk_pte(page, prot));
if (likely(pte_same(*page_table, orig_pte))) {
if (old_page) {
if (!PageAnon(old_page)) {
- dec_mm_counter(mm, MM_FILEPAGES);
- inc_mm_counter(mm, MM_ANONPAGES);
+ dec_mm_counter_fast(mm, MM_FILEPAGES);
+ inc_mm_counter_fast(mm, MM_ANONPAGES);
}
} else
- inc_mm_counter(mm, MM_ANONPAGES);
+ inc_mm_counter_fast(mm, MM_ANONPAGES);
flush_cache_page(vma, address, pte_pfn(orig_pte));
entry = mk_pte(new_page, vma->vm_page_prot);
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
* discarded at swap_free().
*/
- inc_mm_counter(mm, MM_ANONPAGES);
+ inc_mm_counter_fast(mm, MM_ANONPAGES);
pte = mk_pte(page, vma->vm_page_prot);
if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) {
pte = maybe_mkwrite(pte_mkdirty(pte), vma);
if (!pte_none(*page_table))
goto release;
- inc_mm_counter(mm, MM_ANONPAGES);
+ inc_mm_counter_fast(mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, address);
setpte:
set_pte_at(mm, address, page_table, entry);
if (flags & FAULT_FLAG_WRITE)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
if (anon) {
- inc_mm_counter(mm, MM_ANONPAGES);
+ inc_mm_counter_fast(mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, address);
} else {
- inc_mm_counter(mm, MM_FILEPAGES);
+ inc_mm_counter_fast(mm, MM_FILEPAGES);
page_add_file_rmap(page);
if (flags & FAULT_FLAG_WRITE) {
dirty_page = page;
count_vm_event(PGFAULT);
+ /* do counter updates before entering really critical section. */
+ check_sync_rss_stat(current);
+
if (unlikely(is_vm_hugetlb_page(vma)))
return hugetlb_fault(mm, vma, address, flags);