free_pud_range(*tlb, pgd, addr, next, floor, ceiling);
} while (pgd++, addr = next, addr != end);
- if (!tlb_is_full_mm(*tlb))
+ if (!(*tlb)->fullmm)
flush_tlb_pgtables((*tlb)->mm, start, end);
}
return pte_offset_kernel(pmd, address);
}
+static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss)
+{
+ if (file_rss)
+ add_mm_counter(mm, file_rss, file_rss);
+ if (anon_rss)
+ add_mm_counter(mm, anon_rss, anon_rss);
+}
+
+#define NO_RSS 2 /* Increment neither file_rss nor anon_rss */
+
+/*
+ * This function is called to print an error when a pte in a
+ * !VM_RESERVED region is found pointing to an invalid pfn (which
+ * is an error.
+ *
+ * The calling function must still handle the error.
+ */
+void print_bad_pte(struct vm_area_struct *vma, pte_t pte, unsigned long vaddr)
+{
+ printk(KERN_ERR "Bad pte = %08llx, process = %s, "
+ "vm_flags = %lx, vaddr = %lx\n",
+ (long long)pte_val(pte),
+ (vma->vm_mm == current->mm ? current->comm : "???"),
+ vma->vm_flags, vaddr);
+ dump_stack();
+}
+
/*
* copy one vm_area from one task to the other. Assumes the page tables
* already present in the new task to be cleared in the whole range
* but may be dropped within p[mg]d_alloc() and pte_alloc_map().
*/
-static inline void
+static inline int
copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
- pte_t *dst_pte, pte_t *src_pte, unsigned long vm_flags,
+ pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
unsigned long addr)
{
+ unsigned long vm_flags = vma->vm_flags;
pte_t pte = *src_pte;
struct page *page;
unsigned long pfn;
+ int anon = NO_RSS;
/* pte contains position in swap or file, so copy. */
if (unlikely(!pte_present(pte))) {
spin_unlock(&mmlist_lock);
}
}
- set_pte_at(dst_mm, addr, dst_pte, pte);
- return;
+ goto out_set_pte;
}
- pfn = pte_pfn(pte);
- /* the pte points outside of valid memory, the
- * mapping is assumed to be good, meaningful
- * and not mapped via rmap - duplicate the
- * mapping as is.
+ /* If the region is VM_RESERVED, the mapping is not
+ * mapped via rmap - duplicate the pte as is.
*/
- page = NULL;
- if (pfn_valid(pfn))
- page = pfn_to_page(pfn);
+ if (vm_flags & VM_RESERVED)
+ goto out_set_pte;
- if (!page || PageReserved(page)) {
- set_pte_at(dst_mm, addr, dst_pte, pte);
- return;
+ pfn = pte_pfn(pte);
+ /* If the pte points outside of valid memory but
+ * the region is not VM_RESERVED, we have a problem.
+ */
+ if (unlikely(!pfn_valid(pfn))) {
+ print_bad_pte(vma, pte, addr);
+ goto out_set_pte; /* try to do something sane */
}
+ page = pfn_to_page(pfn);
+
/*
* If it's a COW mapping, write protect it both
* in the parent and the child
pte = pte_mkclean(pte);
pte = pte_mkold(pte);
get_page(page);
- inc_mm_counter(dst_mm, rss);
- if (PageAnon(page))
- inc_mm_counter(dst_mm, anon_rss);
- set_pte_at(dst_mm, addr, dst_pte, pte);
page_dup_rmap(page);
+ anon = !!PageAnon(page);
+
+out_set_pte:
+ set_pte_at(dst_mm, addr, dst_pte, pte);
+ return anon;
}
static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
unsigned long addr, unsigned long end)
{
pte_t *src_pte, *dst_pte;
- unsigned long vm_flags = vma->vm_flags;
- int progress;
+ int progress = 0;
+ int rss[NO_RSS+1], anon;
again:
+ rss[1] = rss[0] = 0;
dst_pte = pte_alloc_map(dst_mm, dst_pmd, addr);
if (!dst_pte)
return -ENOMEM;
src_pte = pte_offset_map_nested(src_pmd, addr);
- progress = 0;
spin_lock(&src_mm->page_table_lock);
do {
/*
* We are holding two locks at this point - either of them
* could generate latencies in another task on another CPU.
*/
- if (progress >= 32 && (need_resched() ||
- need_lockbreak(&src_mm->page_table_lock) ||
- need_lockbreak(&dst_mm->page_table_lock)))
- break;
+ if (progress >= 32) {
+ progress = 0;
+ if (need_resched() ||
+ need_lockbreak(&src_mm->page_table_lock) ||
+ need_lockbreak(&dst_mm->page_table_lock))
+ break;
+ }
if (pte_none(*src_pte)) {
progress++;
continue;
}
- copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vm_flags, addr);
+ anon = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma,addr);
+ rss[anon]++;
progress += 8;
} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
spin_unlock(&src_mm->page_table_lock);
pte_unmap_nested(src_pte - 1);
pte_unmap(dst_pte - 1);
+ add_mm_rss(dst_mm, rss[0], rss[1]);
cond_resched_lock(&dst_mm->page_table_lock);
if (addr != end)
goto again;
return 0;
}
-static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
+static void zap_pte_range(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, unsigned long end,
struct zap_details *details)
{
+ struct mm_struct *mm = tlb->mm;
pte_t *pte;
+ int file_rss = 0;
+ int anon_rss = 0;
pte = pte_offset_map(pmd, addr);
do {
continue;
if (pte_present(ptent)) {
struct page *page = NULL;
- unsigned long pfn = pte_pfn(ptent);
- if (pfn_valid(pfn)) {
- page = pfn_to_page(pfn);
- if (PageReserved(page))
- page = NULL;
+ if (!(vma->vm_flags & VM_RESERVED)) {
+ unsigned long pfn = pte_pfn(ptent);
+ if (unlikely(!pfn_valid(pfn)))
+ print_bad_pte(vma, ptent, addr);
+ else
+ page = pfn_to_page(pfn);
}
if (unlikely(details) && page) {
/*
page->index > details->last_index))
continue;
}
- ptent = ptep_get_and_clear_full(tlb->mm, addr, pte,
+ ptent = ptep_get_and_clear_full(mm, addr, pte,
tlb->fullmm);
tlb_remove_tlb_entry(tlb, pte, addr);
if (unlikely(!page))
if (unlikely(details) && details->nonlinear_vma
&& linear_page_index(details->nonlinear_vma,
addr) != page->index)
- set_pte_at(tlb->mm, addr, pte,
+ set_pte_at(mm, addr, pte,
pgoff_to_pte(page->index));
- if (pte_dirty(ptent))
- set_page_dirty(page);
if (PageAnon(page))
- dec_mm_counter(tlb->mm, anon_rss);
- else if (pte_young(ptent))
- mark_page_accessed(page);
- tlb->freed++;
+ anon_rss++;
+ else {
+ if (pte_dirty(ptent))
+ set_page_dirty(page);
+ if (pte_young(ptent))
+ mark_page_accessed(page);
+ file_rss++;
+ }
page_remove_rmap(page);
tlb_remove_page(tlb, page);
continue;
continue;
if (!pte_file(ptent))
free_swap_and_cache(pte_to_swp_entry(ptent));
- pte_clear_full(tlb->mm, addr, pte, tlb->fullmm);
+ pte_clear_full(mm, addr, pte, tlb->fullmm);
} while (pte++, addr += PAGE_SIZE, addr != end);
+
+ add_mm_rss(mm, -file_rss, -anon_rss);
pte_unmap(pte - 1);
}
-static inline void zap_pmd_range(struct mmu_gather *tlb, pud_t *pud,
+static inline void zap_pmd_range(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, pud_t *pud,
unsigned long addr, unsigned long end,
struct zap_details *details)
{
next = pmd_addr_end(addr, end);
if (pmd_none_or_clear_bad(pmd))
continue;
- zap_pte_range(tlb, pmd, addr, next, details);
+ zap_pte_range(tlb, vma, pmd, addr, next, details);
} while (pmd++, addr = next, addr != end);
}
-static inline void zap_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
+static inline void zap_pud_range(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, pgd_t *pgd,
unsigned long addr, unsigned long end,
struct zap_details *details)
{
next = pud_addr_end(addr, end);
if (pud_none_or_clear_bad(pud))
continue;
- zap_pmd_range(tlb, pud, addr, next, details);
+ zap_pmd_range(tlb, vma, pud, addr, next, details);
} while (pud++, addr = next, addr != end);
}
next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(pgd))
continue;
- zap_pud_range(tlb, pgd, addr, next, details);
+ zap_pud_range(tlb, vma, pgd, addr, next, details);
} while (pgd++, addr = next, addr != end);
tlb_end_vma(tlb, vma);
}
int tlb_start_valid = 0;
unsigned long start = start_addr;
spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
- int fullmm = tlb_is_full_mm(*tlbp);
+ int fullmm = (*tlbp)->fullmm;
for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) {
unsigned long end;
continue;
}
- if (!vma || (vma->vm_flags & VM_IO)
+ if (!vma || (vma->vm_flags & (VM_IO | VM_RESERVED))
|| !(flags & vma->vm_flags))
return i ? : -EFAULT;
if (pages) {
pages[i] = page;
flush_dcache_page(page);
- if (!PageReserved(page))
- page_cache_get(page);
+ page_cache_get(page);
}
if (vmas)
vmas[i] = vma;
if (!pte)
return -ENOMEM;
do {
- pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(addr), prot));
+ struct page *page = ZERO_PAGE(addr);
+ pte_t zero_pte = pte_wrprotect(mk_pte(page, prot));
+ page_cache_get(page);
+ page_add_file_rmap(page);
+ inc_mm_counter(mm, file_rss);
BUG_ON(!pte_none(*pte));
set_pte_at(mm, addr, pte, zero_pte);
} while (pte++, addr += PAGE_SIZE, addr != end);
return -ENOMEM;
do {
BUG_ON(!pte_none(*pte));
- if (!pfn_valid(pfn) || PageReserved(pfn_to_page(pfn)))
- set_pte_at(mm, addr, pte, pfn_pte(pfn, prot));
+ set_pte_at(mm, addr, pte, pfn_pte(pfn, prot));
pfn++;
} while (pte++, addr += PAGE_SIZE, addr != end);
pte_unmap(pte - 1);
* rest of the world about it:
* VM_IO tells people not to look at these pages
* (accesses can have side effects).
- * VM_RESERVED tells swapout not to try to touch
- * this region.
+ * VM_RESERVED tells the core MM not to "manage" these pages
+ * (e.g. refcount, mapcount, try to swap them out).
*/
vma->vm_flags |= VM_IO | VM_RESERVED;
return pte;
}
-/*
- * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock
- */
-static inline void break_cow(struct vm_area_struct * vma, struct page * new_page, unsigned long address,
- pte_t *page_table)
-{
- pte_t entry;
-
- entry = maybe_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot)),
- vma);
- ptep_establish(vma, address, page_table, entry);
- update_mmu_cache(vma, address, entry);
- lazy_mmu_prot_update(entry);
-}
-
/*
* This routine handles present pages, when users try to write
* to a shared page. It is done by copying the page to a new address
* and decrementing the shared-page counter for the old page.
*
- * Goto-purists beware: the only reason for goto's here is that it results
- * in better assembly code.. The "default" path will see no jumps at all.
- *
* Note that this routine assumes that the protection checks have been
* done by the caller (the low-level page fault routine in most cases).
* Thus we can safely just mark it writable once we've done any necessary
* We hold the mm semaphore and the page_table_lock on entry and exit
* with the page_table_lock released.
*/
-static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
- unsigned long address, pte_t *page_table, pmd_t *pmd, pte_t pte)
+static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long address, pte_t *page_table, pmd_t *pmd,
+ pte_t orig_pte)
{
struct page *old_page, *new_page;
- unsigned long pfn = pte_pfn(pte);
+ unsigned long pfn = pte_pfn(orig_pte);
pte_t entry;
- int ret;
+ int ret = VM_FAULT_MINOR;
+
+ BUG_ON(vma->vm_flags & VM_RESERVED);
if (unlikely(!pfn_valid(pfn))) {
/*
- * This should really halt the system so it can be debugged or
- * at least the kernel stops what it's doing before it corrupts
- * data, but for the moment just pretend this is OOM.
+ * Page table corrupted: show pte and kill process.
*/
- pte_unmap(page_table);
- printk(KERN_ERR "do_wp_page: bogus page at address %08lx\n",
- address);
- spin_unlock(&mm->page_table_lock);
- return VM_FAULT_OOM;
+ print_bad_pte(vma, orig_pte, address);
+ ret = VM_FAULT_OOM;
+ goto unlock;
}
old_page = pfn_to_page(pfn);
unlock_page(old_page);
if (reuse) {
flush_cache_page(vma, address, pfn);
- entry = maybe_mkwrite(pte_mkyoung(pte_mkdirty(pte)),
- vma);
+ entry = pte_mkyoung(orig_pte);
+ entry = maybe_mkwrite(pte_mkdirty(entry), vma);
ptep_set_access_flags(vma, address, page_table, entry, 1);
update_mmu_cache(vma, address, entry);
lazy_mmu_prot_update(entry);
- pte_unmap(page_table);
- spin_unlock(&mm->page_table_lock);
- return VM_FAULT_MINOR|VM_FAULT_WRITE;
+ ret |= VM_FAULT_WRITE;
+ goto unlock;
}
}
- pte_unmap(page_table);
/*
* Ok, we need to copy. Oh, well..
*/
- if (!PageReserved(old_page))
- page_cache_get(old_page);
+ page_cache_get(old_page);
+ pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
if (unlikely(anon_vma_prepare(vma)))
- goto no_new_page;
+ goto oom;
if (old_page == ZERO_PAGE(address)) {
new_page = alloc_zeroed_user_highpage(vma, address);
if (!new_page)
- goto no_new_page;
+ goto oom;
} else {
new_page = alloc_page_vma(GFP_HIGHUSER, vma, address);
if (!new_page)
- goto no_new_page;
+ goto oom;
copy_user_highpage(new_page, old_page, address);
}
+
/*
* Re-check the pte - we dropped the lock
*/
- ret = VM_FAULT_MINOR;
spin_lock(&mm->page_table_lock);
page_table = pte_offset_map(pmd, address);
- if (likely(pte_same(*page_table, pte))) {
- if (PageAnon(old_page))
- dec_mm_counter(mm, anon_rss);
- if (PageReserved(old_page))
- inc_mm_counter(mm, rss);
- else
- page_remove_rmap(old_page);
+ if (likely(pte_same(*page_table, orig_pte))) {
+ page_remove_rmap(old_page);
+ if (!PageAnon(old_page)) {
+ inc_mm_counter(mm, anon_rss);
+ dec_mm_counter(mm, file_rss);
+ }
flush_cache_page(vma, address, pfn);
- break_cow(vma, new_page, address, page_table);
+ entry = mk_pte(new_page, vma->vm_page_prot);
+ entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+ ptep_establish(vma, address, page_table, entry);
+ update_mmu_cache(vma, address, entry);
+ lazy_mmu_prot_update(entry);
+
lru_cache_add_active(new_page);
page_add_anon_rmap(new_page, vma, address);
new_page = old_page;
ret |= VM_FAULT_WRITE;
}
- pte_unmap(page_table);
page_cache_release(new_page);
page_cache_release(old_page);
+unlock:
+ pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
return ret;
-
-no_new_page:
+oom:
page_cache_release(old_page);
return VM_FAULT_OOM;
}
* We hold the mm semaphore and the page_table_lock on entry and
* should release the pagetable lock on exit..
*/
-static int do_swap_page(struct mm_struct * mm,
- struct vm_area_struct * vma, unsigned long address,
- pte_t *page_table, pmd_t *pmd, pte_t orig_pte, int write_access)
+static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long address, pte_t *page_table, pmd_t *pmd,
+ int write_access, pte_t orig_pte)
{
struct page *page;
- swp_entry_t entry = pte_to_swp_entry(orig_pte);
+ swp_entry_t entry;
pte_t pte;
int ret = VM_FAULT_MINOR;
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
+
+ entry = pte_to_swp_entry(orig_pte);
page = lookup_swap_cache(entry);
if (!page) {
swapin_readahead(entry, address, vma);
page_table = pte_offset_map(pmd, address);
if (likely(pte_same(*page_table, orig_pte)))
ret = VM_FAULT_OOM;
- else
- ret = VM_FAULT_MINOR;
- pte_unmap(page_table);
- spin_unlock(&mm->page_table_lock);
- goto out;
+ goto unlock;
}
/* Had to read the page from swap area: Major fault */
/* The page isn't present yet, go ahead with the fault. */
- inc_mm_counter(mm, rss);
+ inc_mm_counter(mm, anon_rss);
pte = mk_pte(page, vma->vm_page_prot);
if (write_access && can_share_swap_page(page)) {
pte = maybe_mkwrite(pte_mkdirty(pte), vma);
/* No need to invalidate - it was non-present before */
update_mmu_cache(vma, address, pte);
lazy_mmu_prot_update(pte);
+unlock:
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
out:
spin_unlock(&mm->page_table_lock);
unlock_page(page);
page_cache_release(page);
- goto out;
+ return ret;
}
/*
* spinlock held to protect against concurrent faults in
* multithreaded programs.
*/
-static int
-do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
- pte_t *page_table, pmd_t *pmd, int write_access,
- unsigned long addr)
+static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long address, pte_t *page_table, pmd_t *pmd,
+ int write_access)
{
+ struct page *page = ZERO_PAGE(addr);
pte_t entry;
- struct page * page = ZERO_PAGE(addr);
- /* Read-only mapping of ZERO_PAGE. */
- entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot));
+ /* Mapping of ZERO_PAGE - vm_page_prot is readonly */
+ entry = mk_pte(page, vma->vm_page_prot);
- /* ..except if it's a write access */
if (write_access) {
/* Allocate our own private page. */
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
if (unlikely(anon_vma_prepare(vma)))
- goto no_mem;
- page = alloc_zeroed_user_highpage(vma, addr);
+ goto oom;
+ page = alloc_zeroed_user_highpage(vma, address);
if (!page)
- goto no_mem;
+ goto oom;
spin_lock(&mm->page_table_lock);
- page_table = pte_offset_map(pmd, addr);
+ page_table = pte_offset_map(pmd, address);
if (!pte_none(*page_table)) {
- pte_unmap(page_table);
page_cache_release(page);
- spin_unlock(&mm->page_table_lock);
- goto out;
+ goto unlock;
}
- inc_mm_counter(mm, rss);
- entry = maybe_mkwrite(pte_mkdirty(mk_pte(page,
- vma->vm_page_prot)),
- vma);
+ inc_mm_counter(mm, anon_rss);
+ entry = mk_pte(page, vma->vm_page_prot);
+ entry = maybe_mkwrite(pte_mkdirty(entry), vma);
lru_cache_add_active(page);
SetPageReferenced(page);
- page_add_anon_rmap(page, vma, addr);
+ page_add_anon_rmap(page, vma, address);
+ } else {
+ inc_mm_counter(mm, file_rss);
+ page_add_file_rmap(page);
+ page_cache_get(page);
}
- set_pte_at(mm, addr, page_table, entry);
- pte_unmap(page_table);
+ set_pte_at(mm, address, page_table, entry);
/* No need to invalidate - it was non-present before */
- update_mmu_cache(vma, addr, entry);
+ update_mmu_cache(vma, address, entry);
lazy_mmu_prot_update(entry);
+unlock:
+ pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
-out:
return VM_FAULT_MINOR;
-no_mem:
+oom:
return VM_FAULT_OOM;
}
* This is called with the MM semaphore held and the page table
* spinlock held. Exit with the spinlock released.
*/
-static int
-do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long address, int write_access, pte_t *page_table, pmd_t *pmd)
+static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long address, pte_t *page_table, pmd_t *pmd,
+ int write_access)
{
- struct page * new_page;
+ struct page *new_page;
struct address_space *mapping = NULL;
pte_t entry;
unsigned int sequence = 0;
int ret = VM_FAULT_MINOR;
int anon = 0;
- if (!vma->vm_ops || !vma->vm_ops->nopage)
- return do_anonymous_page(mm, vma, page_table,
- pmd, write_access, address);
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
smp_rmb(); /* serializes i_size against truncate_count */
}
retry:
- cond_resched();
new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
/*
* No smp_rmb is needed here as long as there's a full
* retry getting the page.
*/
if (mapping && unlikely(sequence != mapping->truncate_count)) {
- sequence = mapping->truncate_count;
spin_unlock(&mm->page_table_lock);
page_cache_release(new_page);
+ cond_resched();
+ sequence = mapping->truncate_count;
+ smp_rmb();
goto retry;
}
page_table = pte_offset_map(pmd, address);
*/
/* Only go through if we didn't race with anybody else... */
if (pte_none(*page_table)) {
- if (!PageReserved(new_page))
- inc_mm_counter(mm, rss);
-
flush_icache_page(vma, new_page);
entry = mk_pte(new_page, vma->vm_page_prot);
if (write_access)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
set_pte_at(mm, address, page_table, entry);
if (anon) {
+ inc_mm_counter(mm, anon_rss);
lru_cache_add_active(new_page);
page_add_anon_rmap(new_page, vma, address);
- } else
+ } else if (!(vma->vm_flags & VM_RESERVED)) {
+ inc_mm_counter(mm, file_rss);
page_add_file_rmap(new_page);
- pte_unmap(page_table);
+ }
} else {
/* One of our sibling threads was faster, back out. */
- pte_unmap(page_table);
page_cache_release(new_page);
- spin_unlock(&mm->page_table_lock);
- goto out;
+ goto unlock;
}
/* no need to invalidate: a not-present page shouldn't be cached */
update_mmu_cache(vma, address, entry);
lazy_mmu_prot_update(entry);
+unlock:
+ pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
-out:
return ret;
oom:
page_cache_release(new_page);
- ret = VM_FAULT_OOM;
- goto out;
+ return VM_FAULT_OOM;
}
/*
* from the encoded file_pte if possible. This enables swappable
* nonlinear vmas.
*/
-static int do_file_page(struct mm_struct * mm, struct vm_area_struct * vma,
- unsigned long address, int write_access, pte_t *pte, pmd_t *pmd)
+static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long address, pte_t *page_table, pmd_t *pmd,
+ int write_access, pte_t orig_pte)
{
- unsigned long pgoff;
+ pgoff_t pgoff;
int err;
- BUG_ON(!vma->vm_ops || !vma->vm_ops->nopage);
- /*
- * Fall back to the linear mapping if the fs does not support
- * ->populate:
- */
- if (!vma->vm_ops->populate ||
- (write_access && !(vma->vm_flags & VM_SHARED))) {
- pte_clear(mm, address, pte);
- return do_no_page(mm, vma, address, write_access, pte, pmd);
- }
-
- pgoff = pte_to_pgoff(*pte);
-
- pte_unmap(pte);
+ pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
- err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE, vma->vm_page_prot, pgoff, 0);
+ if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) {
+ /*
+ * Page table corrupted: show pte and kill process.
+ */
+ print_bad_pte(vma, orig_pte, address);
+ return VM_FAULT_OOM;
+ }
+ /* We can then assume vm->vm_ops && vma->vm_ops->populate */
+
+ pgoff = pte_to_pgoff(orig_pte);
+ err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE,
+ vma->vm_page_prot, pgoff, 0);
if (err == -ENOMEM)
return VM_FAULT_OOM;
if (err)
* release it when done.
*/
static inline int handle_pte_fault(struct mm_struct *mm,
- struct vm_area_struct * vma, unsigned long address,
- int write_access, pte_t *pte, pmd_t *pmd)
+ struct vm_area_struct *vma, unsigned long address,
+ pte_t *pte, pmd_t *pmd, int write_access)
{
pte_t entry;
entry = *pte;
if (!pte_present(entry)) {
- /*
- * If it truly wasn't present, we know that kswapd
- * and the PTE updates will not touch it later. So
- * drop the lock.
- */
- if (pte_none(entry))
- return do_no_page(mm, vma, address, write_access, pte, pmd);
+ if (pte_none(entry)) {
+ if (!vma->vm_ops || !vma->vm_ops->nopage)
+ return do_anonymous_page(mm, vma, address,
+ pte, pmd, write_access);
+ return do_no_page(mm, vma, address,
+ pte, pmd, write_access);
+ }
if (pte_file(entry))
- return do_file_page(mm, vma, address, write_access, pte, pmd);
- return do_swap_page(mm, vma, address, pte, pmd, entry, write_access);
+ return do_file_page(mm, vma, address,
+ pte, pmd, write_access, entry);
+ return do_swap_page(mm, vma, address,
+ pte, pmd, write_access, entry);
}
if (write_access) {
/*
* By the time we get here, we already hold the mm semaphore
*/
-int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
+int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, int write_access)
{
pgd_t *pgd;
if (!pte)
goto oom;
- return handle_pte_fault(mm, vma, address, write_access, pte, pmd);
+ return handle_pte_fault(mm, vma, address, pte, pmd, write_access);
oom:
spin_unlock(&mm->page_table_lock);
void update_mem_hiwater(struct task_struct *tsk)
{
if (tsk->mm) {
- unsigned long rss = get_mm_counter(tsk->mm, rss);
+ unsigned long rss = get_mm_rss(tsk->mm);
if (tsk->mm->hiwater_rss < rss)
tsk->mm->hiwater_rss = rss;
gate_vma.vm_start = FIXADDR_USER_START;
gate_vma.vm_end = FIXADDR_USER_END;
gate_vma.vm_page_prot = PAGE_READONLY;
- gate_vma.vm_flags = 0;
+ gate_vma.vm_flags = VM_RESERVED;
return 0;
}
__initcall(gate_vma_init);