mm,shmem,thp: limit shmem THP allocations to requested zones
[linux-2.6-microblaze.git] / mm / shmem.c
index 1b254fb..b2db4ed 100644 (file)
@@ -713,7 +713,7 @@ next:
                }
                if (PageTransHuge(page)) {
                        count_vm_event(THP_FILE_ALLOC);
-                       __inc_lruvec_page_state(page, NR_SHMEM_THPS);
+                       __mod_lruvec_page_state(page, NR_SHMEM_THPS, nr);
                }
                mapping->nrpages += nr;
                __mod_lruvec_page_state(page, NR_FILE_PAGES, nr);
@@ -842,7 +842,6 @@ unsigned long shmem_swap_usage(struct vm_area_struct *vma)
 void shmem_unlock_mapping(struct address_space *mapping)
 {
        struct pagevec pvec;
-       pgoff_t indices[PAGEVEC_SIZE];
        pgoff_t index = 0;
 
        pagevec_init(&pvec);
@@ -850,16 +849,8 @@ void shmem_unlock_mapping(struct address_space *mapping)
         * Minor point, but we might as well stop if someone else SHM_LOCKs it.
         */
        while (!mapping_unevictable(mapping)) {
-               /*
-                * Avoid pagevec_lookup(): find_get_pages() returns 0 as if it
-                * has finished, if it hits a row of PAGEVEC_SIZE swap entries.
-                */
-               pvec.nr = find_get_entries(mapping, index,
-                                          PAGEVEC_SIZE, pvec.pages, indices);
-               if (!pvec.nr)
+               if (!pagevec_lookup(&pvec, mapping, &index))
                        break;
-               index = indices[pvec.nr - 1] + 1;
-               pagevec_remove_exceptionals(&pvec);
                check_move_unevictable_pages(&pvec);
                pagevec_release(&pvec);
                cond_resched();
@@ -916,18 +907,12 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 
        pagevec_init(&pvec);
        index = start;
-       while (index < end) {
-               pvec.nr = find_get_entries(mapping, index,
-                       min(end - index, (pgoff_t)PAGEVEC_SIZE),
-                       pvec.pages, indices);
-               if (!pvec.nr)
-                       break;
+       while (index < end && find_lock_entries(mapping, index, end - 1,
+                       &pvec, indices)) {
                for (i = 0; i < pagevec_count(&pvec); i++) {
                        struct page *page = pvec.pages[i];
 
                        index = indices[i];
-                       if (index >= end)
-                               break;
 
                        if (xa_is_value(page)) {
                                if (unfalloc)
@@ -936,18 +921,10 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
                                                                index, page);
                                continue;
                        }
+                       index += thp_nr_pages(page) - 1;
 
-                       VM_BUG_ON_PAGE(page_to_pgoff(page) != index, page);
-
-                       if (!trylock_page(page))
-                               continue;
-
-                       if ((!unfalloc || !PageUptodate(page)) &&
-                           page_mapping(page) == mapping) {
-                               VM_BUG_ON_PAGE(PageWriteback(page), page);
-                               if (shmem_punch_compound(page, start, end))
-                                       truncate_inode_page(mapping, page);
-                       }
+                       if (!unfalloc || !PageUptodate(page))
+                               truncate_inode_page(mapping, page);
                        unlock_page(page);
                }
                pagevec_remove_exceptionals(&pvec);
@@ -988,10 +965,8 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
        while (index < end) {
                cond_resched();
 
-               pvec.nr = find_get_entries(mapping, index,
-                               min(end - index, (pgoff_t)PAGEVEC_SIZE),
-                               pvec.pages, indices);
-               if (!pvec.nr) {
+               if (!find_get_entries(mapping, index, end - 1, &pvec,
+                               indices)) {
                        /* If all gone or hole-punch or unfalloc, we're done */
                        if (index == start || end != -1)
                                break;
@@ -1003,9 +978,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
                        struct page *page = pvec.pages[i];
 
                        index = indices[i];
-                       if (index >= end)
-                               break;
-
                        if (xa_is_value(page)) {
                                if (unfalloc)
                                        continue;
@@ -1060,7 +1032,8 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
 }
 EXPORT_SYMBOL_GPL(shmem_truncate_range);
 
-static int shmem_getattr(const struct path *path, struct kstat *stat,
+static int shmem_getattr(struct user_namespace *mnt_userns,
+                        const struct path *path, struct kstat *stat,
                         u32 request_mask, unsigned int query_flags)
 {
        struct inode *inode = path->dentry->d_inode;
@@ -1072,7 +1045,7 @@ static int shmem_getattr(const struct path *path, struct kstat *stat,
                shmem_recalc_inode(inode);
                spin_unlock_irq(&info->lock);
        }
-       generic_fillattr(inode, stat);
+       generic_fillattr(&init_user_ns, inode, stat);
 
        if (is_huge_enabled(sb_info))
                stat->blksize = HPAGE_PMD_SIZE;
@@ -1080,14 +1053,15 @@ static int shmem_getattr(const struct path *path, struct kstat *stat,
        return 0;
 }
 
-static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
+static int shmem_setattr(struct user_namespace *mnt_userns,
+                        struct dentry *dentry, struct iattr *attr)
 {
        struct inode *inode = d_inode(dentry);
        struct shmem_inode_info *info = SHMEM_I(inode);
        struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
        int error;
 
-       error = setattr_prepare(dentry, attr);
+       error = setattr_prepare(&init_user_ns, dentry, attr);
        if (error)
                return error;
 
@@ -1141,9 +1115,9 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
                }
        }
 
-       setattr_copy(inode, attr);
+       setattr_copy(&init_user_ns, inode, attr);
        if (attr->ia_valid & ATTR_MODE)
-               error = posix_acl_chmod(inode, inode->i_mode);
+               error = posix_acl_chmod(&init_user_ns, inode, inode->i_mode);
        return error;
 }
 
@@ -1531,6 +1505,30 @@ static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
        return page;
 }
 
+/*
+ * Make sure huge_gfp is always more limited than limit_gfp.
+ * Some of the flags set permissions, while others set limitations.
+ */
+static gfp_t limit_gfp_mask(gfp_t huge_gfp, gfp_t limit_gfp)
+{
+       gfp_t allowflags = __GFP_IO | __GFP_FS | __GFP_RECLAIM;
+       gfp_t denyflags = __GFP_NOWARN | __GFP_NORETRY;
+       gfp_t zoneflags = limit_gfp & GFP_ZONEMASK;
+       gfp_t result = huge_gfp & ~(allowflags | GFP_ZONEMASK);
+
+       /* Allow allocations only from the originally specified zones. */
+       result |= zoneflags;
+
+       /*
+        * Minimize the result gfp by taking the union with the deny flags,
+        * and the intersection of the allow flags.
+        */
+       result |= (limit_gfp & denyflags);
+       result |= (huge_gfp & limit_gfp) & allowflags;
+
+       return result;
+}
+
 static struct page *shmem_alloc_hugepage(gfp_t gfp,
                struct shmem_inode_info *info, pgoff_t index)
 {
@@ -1545,8 +1543,8 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
                return NULL;
 
        shmem_pseudo_vma_init(&pvma, info, hindex);
-       page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
-                       HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true);
+       page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(),
+                              true);
        shmem_pseudo_vma_destroy(&pvma);
        if (page)
                prep_transhuge_page(page);
@@ -1802,6 +1800,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
        struct page *page;
        enum sgp_type sgp_huge = sgp;
        pgoff_t hindex = index;
+       gfp_t huge_gfp;
        int error;
        int once = 0;
        int alloced = 0;
@@ -1819,7 +1818,8 @@ repeat:
        sbinfo = SHMEM_SB(inode->i_sb);
        charge_mm = vma ? vma->vm_mm : current->mm;
 
-       page = find_lock_entry(mapping, index);
+       page = pagecache_get_page(mapping, index,
+                                       FGP_ENTRY | FGP_HEAD | FGP_LOCK, 0);
        if (xa_is_value(page)) {
                error = shmem_swapin_page(inode, index, &page,
                                          sgp, gfp, vma, fault_type);
@@ -1887,7 +1887,9 @@ repeat:
        }
 
 alloc_huge:
-       page = shmem_alloc_and_acct_page(gfp, inode, index, true);
+       huge_gfp = vma_thp_gfp_mask(vma);
+       huge_gfp = limit_gfp_mask(huge_gfp, gfp);
+       page = shmem_alloc_and_acct_page(huge_gfp, inode, index, true);
        if (IS_ERR(page)) {
 alloc_nohuge:
                page = shmem_alloc_and_acct_page(gfp, inode,
@@ -2303,7 +2305,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
        inode = new_inode(sb);
        if (inode) {
                inode->i_ino = ino;
-               inode_init_owner(inode, dir, mode);
+               inode_init_owner(&init_user_ns, inode, dir, mode);
                inode->i_blocks = 0;
                inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
                inode->i_generation = prandom_u32();
@@ -2674,86 +2676,20 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
        return retval ? retval : error;
 }
 
-/*
- * llseek SEEK_DATA or SEEK_HOLE through the page cache.
- */
-static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
-                                   pgoff_t index, pgoff_t end, int whence)
-{
-       struct page *page;
-       struct pagevec pvec;
-       pgoff_t indices[PAGEVEC_SIZE];
-       bool done = false;
-       int i;
-
-       pagevec_init(&pvec);
-       pvec.nr = 1;            /* start small: we may be there already */
-       while (!done) {
-               pvec.nr = find_get_entries(mapping, index,
-                                       pvec.nr, pvec.pages, indices);
-               if (!pvec.nr) {
-                       if (whence == SEEK_DATA)
-                               index = end;
-                       break;
-               }
-               for (i = 0; i < pvec.nr; i++, index++) {
-                       if (index < indices[i]) {
-                               if (whence == SEEK_HOLE) {
-                                       done = true;
-                                       break;
-                               }
-                               index = indices[i];
-                       }
-                       page = pvec.pages[i];
-                       if (page && !xa_is_value(page)) {
-                               if (!PageUptodate(page))
-                                       page = NULL;
-                       }
-                       if (index >= end ||
-                           (page && whence == SEEK_DATA) ||
-                           (!page && whence == SEEK_HOLE)) {
-                               done = true;
-                               break;
-                       }
-               }
-               pagevec_remove_exceptionals(&pvec);
-               pagevec_release(&pvec);
-               pvec.nr = PAGEVEC_SIZE;
-               cond_resched();
-       }
-       return index;
-}
-
 static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
 {
        struct address_space *mapping = file->f_mapping;
        struct inode *inode = mapping->host;
-       pgoff_t start, end;
-       loff_t new_offset;
 
        if (whence != SEEK_DATA && whence != SEEK_HOLE)
                return generic_file_llseek_size(file, offset, whence,
                                        MAX_LFS_FILESIZE, i_size_read(inode));
+       if (offset < 0)
+               return -ENXIO;
+
        inode_lock(inode);
        /* We're holding i_mutex so we can access i_size directly */
-
-       if (offset < 0 || offset >= inode->i_size)
-               offset = -ENXIO;
-       else {
-               start = offset >> PAGE_SHIFT;
-               end = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
-               new_offset = shmem_seek_hole_data(mapping, start, end, whence);
-               new_offset <<= PAGE_SHIFT;
-               if (new_offset > offset) {
-                       if (new_offset < inode->i_size)
-                               offset = new_offset;
-                       else if (whence == SEEK_DATA)
-                               offset = -ENXIO;
-                       else
-                               offset = inode->i_size;
-               }
-       }
-
+       offset = mapping_seek_hole_data(mapping, offset, inode->i_size, whence);
        if (offset >= 0)
                offset = vfs_setpos(file, offset, MAX_LFS_FILESIZE);
        inode_unlock(inode);
@@ -2917,7 +2853,8 @@ static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
  * File creation. Allocate an inode, and we're done..
  */
 static int
-shmem_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
+shmem_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+           struct dentry *dentry, umode_t mode, dev_t dev)
 {
        struct inode *inode;
        int error = -ENOSPC;
@@ -2946,7 +2883,8 @@ out_iput:
 }
 
 static int
-shmem_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+shmem_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+             struct dentry *dentry, umode_t mode)
 {
        struct inode *inode;
        int error = -ENOSPC;
@@ -2969,20 +2907,22 @@ out_iput:
        return error;
 }
 
-static int shmem_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int shmem_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                      struct dentry *dentry, umode_t mode)
 {
        int error;
 
-       if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
+       if ((error = shmem_mknod(&init_user_ns, dir, dentry,
+                                mode | S_IFDIR, 0)))
                return error;
        inc_nlink(dir);
        return 0;
 }
 
-static int shmem_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-               bool excl)
+static int shmem_create(struct user_namespace *mnt_userns, struct inode *dir,
+                       struct dentry *dentry, umode_t mode, bool excl)
 {
-       return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
+       return shmem_mknod(&init_user_ns, dir, dentry, mode | S_IFREG, 0);
 }
 
 /*
@@ -3062,7 +3002,8 @@ static int shmem_exchange(struct inode *old_dir, struct dentry *old_dentry, stru
        return 0;
 }
 
-static int shmem_whiteout(struct inode *old_dir, struct dentry *old_dentry)
+static int shmem_whiteout(struct user_namespace *mnt_userns,
+                         struct inode *old_dir, struct dentry *old_dentry)
 {
        struct dentry *whiteout;
        int error;
@@ -3071,7 +3012,7 @@ static int shmem_whiteout(struct inode *old_dir, struct dentry *old_dentry)
        if (!whiteout)
                return -ENOMEM;
 
-       error = shmem_mknod(old_dir, whiteout,
+       error = shmem_mknod(&init_user_ns, old_dir, whiteout,
                            S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
        dput(whiteout);
        if (error)
@@ -3094,7 +3035,10 @@ static int shmem_whiteout(struct inode *old_dir, struct dentry *old_dentry)
  * it exists so that the VFS layer correctly free's it when it
  * gets overwritten.
  */
-static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags)
+static int shmem_rename2(struct user_namespace *mnt_userns,
+                        struct inode *old_dir, struct dentry *old_dentry,
+                        struct inode *new_dir, struct dentry *new_dentry,
+                        unsigned int flags)
 {
        struct inode *inode = d_inode(old_dentry);
        int they_are_dirs = S_ISDIR(inode->i_mode);
@@ -3111,7 +3055,7 @@ static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struc
        if (flags & RENAME_WHITEOUT) {
                int error;
 
-               error = shmem_whiteout(old_dir, old_dentry);
+               error = shmem_whiteout(&init_user_ns, old_dir, old_dentry);
                if (error)
                        return error;
        }
@@ -3135,7 +3079,8 @@ static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struc
        return 0;
 }
 
-static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+static int shmem_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+                        struct dentry *dentry, const char *symname)
 {
        int error;
        int len;
@@ -3273,6 +3218,7 @@ static int shmem_xattr_handler_get(const struct xattr_handler *handler,
 }
 
 static int shmem_xattr_handler_set(const struct xattr_handler *handler,
+                                  struct user_namespace *mnt_userns,
                                   struct dentry *unused, struct inode *inode,
                                   const char *name, const void *value,
                                   size_t size, int flags)