mm/sparse.c: fix typo in online_mem_sections
[linux-2.6-microblaze.git] / mm / shmem.c
index fbcb3c9..ace53a5 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/swap.h>
 #include <linux/uio.h>
 #include <linux/khugepaged.h>
+#include <linux/hugetlb.h>
 
 #include <asm/tlbflush.h> /* for arch/microblaze update_mmu_cache() */
 
@@ -188,6 +189,38 @@ static inline void shmem_unacct_blocks(unsigned long flags, long pages)
                vm_unacct_memory(pages * VM_ACCT(PAGE_SIZE));
 }
 
+static inline bool shmem_inode_acct_block(struct inode *inode, long pages)
+{
+       struct shmem_inode_info *info = SHMEM_I(inode);
+       struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+
+       if (shmem_acct_block(info->flags, pages))
+               return false;
+
+       if (sbinfo->max_blocks) {
+               if (percpu_counter_compare(&sbinfo->used_blocks,
+                                          sbinfo->max_blocks - pages) > 0)
+                       goto unacct;
+               percpu_counter_add(&sbinfo->used_blocks, pages);
+       }
+
+       return true;
+
+unacct:
+       shmem_unacct_blocks(info->flags, pages);
+       return false;
+}
+
+static inline void shmem_inode_unacct_blocks(struct inode *inode, long pages)
+{
+       struct shmem_inode_info *info = SHMEM_I(inode);
+       struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+
+       if (sbinfo->max_blocks)
+               percpu_counter_sub(&sbinfo->used_blocks, pages);
+       shmem_unacct_blocks(info->flags, pages);
+}
+
 static const struct super_operations shmem_ops;
 static const struct address_space_operations shmem_aops;
 static const struct file_operations shmem_file_operations;
@@ -249,23 +282,20 @@ static void shmem_recalc_inode(struct inode *inode)
 
        freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
        if (freed > 0) {
-               struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
-               if (sbinfo->max_blocks)
-                       percpu_counter_add(&sbinfo->used_blocks, -freed);
                info->alloced -= freed;
                inode->i_blocks -= freed * BLOCKS_PER_PAGE;
-               shmem_unacct_blocks(info->flags, freed);
+               shmem_inode_unacct_blocks(inode, freed);
        }
 }
 
 bool shmem_charge(struct inode *inode, long pages)
 {
        struct shmem_inode_info *info = SHMEM_I(inode);
-       struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
        unsigned long flags;
 
-       if (shmem_acct_block(info->flags, pages))
+       if (!shmem_inode_acct_block(inode, pages))
                return false;
+
        spin_lock_irqsave(&info->lock, flags);
        info->alloced += pages;
        inode->i_blocks += pages * BLOCKS_PER_PAGE;
@@ -273,26 +303,12 @@ bool shmem_charge(struct inode *inode, long pages)
        spin_unlock_irqrestore(&info->lock, flags);
        inode->i_mapping->nrpages += pages;
 
-       if (!sbinfo->max_blocks)
-               return true;
-       if (percpu_counter_compare(&sbinfo->used_blocks,
-                               sbinfo->max_blocks - pages) > 0) {
-               inode->i_mapping->nrpages -= pages;
-               spin_lock_irqsave(&info->lock, flags);
-               info->alloced -= pages;
-               shmem_recalc_inode(inode);
-               spin_unlock_irqrestore(&info->lock, flags);
-               shmem_unacct_blocks(info->flags, pages);
-               return false;
-       }
-       percpu_counter_add(&sbinfo->used_blocks, pages);
        return true;
 }
 
 void shmem_uncharge(struct inode *inode, long pages)
 {
        struct shmem_inode_info *info = SHMEM_I(inode);
-       struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
        unsigned long flags;
 
        spin_lock_irqsave(&info->lock, flags);
@@ -301,9 +317,7 @@ void shmem_uncharge(struct inode *inode, long pages)
        shmem_recalc_inode(inode);
        spin_unlock_irqrestore(&info->lock, flags);
 
-       if (sbinfo->max_blocks)
-               percpu_counter_sub(&sbinfo->used_blocks, pages);
-       shmem_unacct_blocks(info->flags, pages);
+       shmem_inode_unacct_blocks(inode, pages);
 }
 
 /*
@@ -1452,9 +1466,10 @@ static struct page *shmem_alloc_page(gfp_t gfp,
 }
 
 static struct page *shmem_alloc_and_acct_page(gfp_t gfp,
-               struct shmem_inode_info *info, struct shmem_sb_info *sbinfo,
+               struct inode *inode,
                pgoff_t index, bool huge)
 {
+       struct shmem_inode_info *info = SHMEM_I(inode);
        struct page *page;
        int nr;
        int err = -ENOSPC;
@@ -1463,14 +1478,8 @@ static struct page *shmem_alloc_and_acct_page(gfp_t gfp,
                huge = false;
        nr = huge ? HPAGE_PMD_NR : 1;
 
-       if (shmem_acct_block(info->flags, nr))
+       if (!shmem_inode_acct_block(inode, nr))
                goto failed;
-       if (sbinfo->max_blocks) {
-               if (percpu_counter_compare(&sbinfo->used_blocks,
-                                       sbinfo->max_blocks - nr) > 0)
-                       goto unacct;
-               percpu_counter_add(&sbinfo->used_blocks, nr);
-       }
 
        if (huge)
                page = shmem_alloc_hugepage(gfp, info, index);
@@ -1483,10 +1492,7 @@ static struct page *shmem_alloc_and_acct_page(gfp_t gfp,
        }
 
        err = -ENOMEM;
-       if (sbinfo->max_blocks)
-               percpu_counter_add(&sbinfo->used_blocks, -nr);
-unacct:
-       shmem_unacct_blocks(info->flags, nr);
+       shmem_inode_unacct_blocks(inode, nr);
 failed:
        return ERR_PTR(err);
 }
@@ -1644,7 +1650,7 @@ repeat:
 
        if (swap.val) {
                /* Look it up and read it in.. */
-               page = lookup_swap_cache(swap);
+               page = lookup_swap_cache(swap, NULL, 0);
                if (!page) {
                        /* Or update major stats only when swapin succeeds?? */
                        if (fault_type) {
@@ -1751,10 +1757,9 @@ repeat:
                }
 
 alloc_huge:
-               page = shmem_alloc_and_acct_page(gfp, info, sbinfo,
-                               index, true);
+               page = shmem_alloc_and_acct_page(gfp, inode, index, true);
                if (IS_ERR(page)) {
-alloc_nohuge:          page = shmem_alloc_and_acct_page(gfp, info, sbinfo,
+alloc_nohuge:          page = shmem_alloc_and_acct_page(gfp, inode,
                                        index, false);
                }
                if (IS_ERR(page)) {
@@ -1876,10 +1881,7 @@ clear:
         * Error recovery.
         */
 unacct:
-       if (sbinfo->max_blocks)
-               percpu_counter_sub(&sbinfo->used_blocks,
-                               1 << compound_order(page));
-       shmem_unacct_blocks(info->flags, 1 << compound_order(page));
+       shmem_inode_unacct_blocks(inode, 1 << compound_order(page));
 
        if (PageTransHuge(page)) {
                unlock_page(page);
@@ -2206,16 +2208,16 @@ bool shmem_mapping(struct address_space *mapping)
        return mapping->a_ops == &shmem_aops;
 }
 
-int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm,
-                          pmd_t *dst_pmd,
-                          struct vm_area_struct *dst_vma,
-                          unsigned long dst_addr,
-                          unsigned long src_addr,
-                          struct page **pagep)
+static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
+                                 pmd_t *dst_pmd,
+                                 struct vm_area_struct *dst_vma,
+                                 unsigned long dst_addr,
+                                 unsigned long src_addr,
+                                 bool zeropage,
+                                 struct page **pagep)
 {
        struct inode *inode = file_inode(dst_vma->vm_file);
        struct shmem_inode_info *info = SHMEM_I(inode);
-       struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
        struct address_space *mapping = inode->i_mapping;
        gfp_t gfp = mapping_gfp_mask(mapping);
        pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
@@ -2227,33 +2229,30 @@ int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm,
        int ret;
 
        ret = -ENOMEM;
-       if (shmem_acct_block(info->flags, 1))
+       if (!shmem_inode_acct_block(inode, 1))
                goto out;
-       if (sbinfo->max_blocks) {
-               if (percpu_counter_compare(&sbinfo->used_blocks,
-                                          sbinfo->max_blocks) >= 0)
-                       goto out_unacct_blocks;
-               percpu_counter_inc(&sbinfo->used_blocks);
-       }
 
        if (!*pagep) {
                page = shmem_alloc_page(gfp, info, pgoff);
                if (!page)
-                       goto out_dec_used_blocks;
-
-               page_kaddr = kmap_atomic(page);
-               ret = copy_from_user(page_kaddr, (const void __user *)src_addr,
-                                    PAGE_SIZE);
-               kunmap_atomic(page_kaddr);
-
-               /* fallback to copy_from_user outside mmap_sem */
-               if (unlikely(ret)) {
-                       *pagep = page;
-                       if (sbinfo->max_blocks)
-                               percpu_counter_add(&sbinfo->used_blocks, -1);
-                       shmem_unacct_blocks(info->flags, 1);
-                       /* don't free the page */
-                       return -EFAULT;
+                       goto out_unacct_blocks;
+
+               if (!zeropage) {        /* mcopy_atomic */
+                       page_kaddr = kmap_atomic(page);
+                       ret = copy_from_user(page_kaddr,
+                                            (const void __user *)src_addr,
+                                            PAGE_SIZE);
+                       kunmap_atomic(page_kaddr);
+
+                       /* fallback to copy_from_user outside mmap_sem */
+                       if (unlikely(ret)) {
+                               *pagep = page;
+                               shmem_inode_unacct_blocks(inode, 1);
+                               /* don't free the page */
+                               return -EFAULT;
+                       }
+               } else {                /* mfill_zeropage_atomic */
+                       clear_highpage(page);
                }
        } else {
                page = *pagep;
@@ -2314,14 +2313,33 @@ out_release_uncharge:
 out_release:
        unlock_page(page);
        put_page(page);
-out_dec_used_blocks:
-       if (sbinfo->max_blocks)
-               percpu_counter_add(&sbinfo->used_blocks, -1);
 out_unacct_blocks:
-       shmem_unacct_blocks(info->flags, 1);
+       shmem_inode_unacct_blocks(inode, 1);
        goto out;
 }
 
+int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm,
+                          pmd_t *dst_pmd,
+                          struct vm_area_struct *dst_vma,
+                          unsigned long dst_addr,
+                          unsigned long src_addr,
+                          struct page **pagep)
+{
+       return shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma,
+                                     dst_addr, src_addr, false, pagep);
+}
+
+int shmem_mfill_zeropage_pte(struct mm_struct *dst_mm,
+                            pmd_t *dst_pmd,
+                            struct vm_area_struct *dst_vma,
+                            unsigned long dst_addr)
+{
+       struct page *page = NULL;
+
+       return shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma,
+                                     dst_addr, 0, true, &page);
+}
+
 #ifdef CONFIG_TMPFS
 static const struct inode_operations shmem_symlink_inode_operations;
 static const struct inode_operations shmem_short_symlink_operations;
@@ -3635,7 +3653,7 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root)
 #define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1)
 #define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN)
 
-#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING)
+#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB)
 
 SYSCALL_DEFINE2(memfd_create,
                const char __user *, uname,
@@ -3647,8 +3665,18 @@ SYSCALL_DEFINE2(memfd_create,
        char *name;
        long len;
 
-       if (flags & ~(unsigned int)MFD_ALL_FLAGS)
-               return -EINVAL;
+       if (!(flags & MFD_HUGETLB)) {
+               if (flags & ~(unsigned int)MFD_ALL_FLAGS)
+                       return -EINVAL;
+       } else {
+               /* Sealing not supported in hugetlbfs (MFD_HUGETLB) */
+               if (flags & MFD_ALLOW_SEALING)
+                       return -EINVAL;
+               /* Allow huge page size encoding in flags. */
+               if (flags & ~(unsigned int)(MFD_ALL_FLAGS |
+                               (MFD_HUGE_MASK << MFD_HUGE_SHIFT)))
+                       return -EINVAL;
+       }
 
        /* length includes terminating zero */
        len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1);
@@ -3679,16 +3707,30 @@ SYSCALL_DEFINE2(memfd_create,
                goto err_name;
        }
 
-       file = shmem_file_setup(name, 0, VM_NORESERVE);
+       if (flags & MFD_HUGETLB) {
+               struct user_struct *user = NULL;
+
+               file = hugetlb_file_setup(name, 0, VM_NORESERVE, &user,
+                                       HUGETLB_ANONHUGE_INODE,
+                                       (flags >> MFD_HUGE_SHIFT) &
+                                       MFD_HUGE_MASK);
+       } else
+               file = shmem_file_setup(name, 0, VM_NORESERVE);
        if (IS_ERR(file)) {
                error = PTR_ERR(file);
                goto err_fd;
        }
-       info = SHMEM_I(file_inode(file));
        file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
        file->f_flags |= O_RDWR | O_LARGEFILE;
-       if (flags & MFD_ALLOW_SEALING)
+
+       if (flags & MFD_ALLOW_SEALING) {
+               /*
+                * flags check at beginning of function ensures
+                * this is not a hugetlbfs (MFD_HUGETLB) file.
+                */
+               info = SHMEM_I(file_inode(file));
                info->seals &= ~F_SEAL_SEAL;
+       }
 
        fd_install(fd, file);
        kfree(name);