mm, memcg: unify reclaim retry limits with page allocator
[linux-2.6-microblaze.git] / mm / shmem.c
index b2abca3..c5c2818 100644 (file)
@@ -114,11 +114,13 @@ struct shmem_options {
        kuid_t uid;
        kgid_t gid;
        umode_t mode;
+       bool full_inums;
        int huge;
        int seen;
 #define SHMEM_SEEN_BLOCKS 1
 #define SHMEM_SEEN_INODES 2
 #define SHMEM_SEEN_HUGE 4
+#define SHMEM_SEEN_INUMS 8
 };
 
 #ifdef CONFIG_TMPFS
@@ -260,18 +262,76 @@ bool vma_is_shmem(struct vm_area_struct *vma)
 static LIST_HEAD(shmem_swaplist);
 static DEFINE_MUTEX(shmem_swaplist_mutex);
 
-static int shmem_reserve_inode(struct super_block *sb)
+/*
+ * shmem_reserve_inode() performs bookkeeping to reserve a shmem inode, and
+ * produces a novel ino for the newly allocated inode.
+ *
+ * It may also be called when making a hard link to permit the space needed by
+ * each dentry. However, in that case, no new inode number is needed since that
+ * internally draws from another pool of inode numbers (currently global
+ * get_next_ino()). This case is indicated by passing NULL as inop.
+ */
+#define SHMEM_INO_BATCH 1024
+static int shmem_reserve_inode(struct super_block *sb, ino_t *inop)
 {
        struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
-       if (sbinfo->max_inodes) {
+       ino_t ino;
+
+       if (!(sb->s_flags & SB_KERNMOUNT)) {
                spin_lock(&sbinfo->stat_lock);
                if (!sbinfo->free_inodes) {
                        spin_unlock(&sbinfo->stat_lock);
                        return -ENOSPC;
                }
                sbinfo->free_inodes--;
+               if (inop) {
+                       ino = sbinfo->next_ino++;
+                       if (unlikely(is_zero_ino(ino)))
+                               ino = sbinfo->next_ino++;
+                       if (unlikely(!sbinfo->full_inums &&
+                                    ino > UINT_MAX)) {
+                               /*
+                                * Emulate get_next_ino uint wraparound for
+                                * compatibility
+                                */
+                               if (IS_ENABLED(CONFIG_64BIT))
+                                       pr_warn("%s: inode number overflow on device %d, consider using inode64 mount option\n",
+                                               __func__, MINOR(sb->s_dev));
+                               sbinfo->next_ino = 1;
+                               ino = sbinfo->next_ino++;
+                       }
+                       *inop = ino;
+               }
                spin_unlock(&sbinfo->stat_lock);
+       } else if (inop) {
+               /*
+                * __shmem_file_setup, one of our callers, is lock-free: it
+                * doesn't hold stat_lock in shmem_reserve_inode since
+                * max_inodes is always 0, and is called from potentially
+                * unknown contexts. As such, use a per-cpu batched allocator
+                * which doesn't require the per-sb stat_lock unless we are at
+                * the batch boundary.
+                *
+                * We don't need to worry about inode{32,64} since SB_KERNMOUNT
+                * shmem mounts are not exposed to userspace, so we don't need
+                * to worry about things like glibc compatibility.
+                */
+               ino_t *next_ino;
+               next_ino = per_cpu_ptr(sbinfo->ino_batch, get_cpu());
+               ino = *next_ino;
+               if (unlikely(ino % SHMEM_INO_BATCH == 0)) {
+                       spin_lock(&sbinfo->stat_lock);
+                       ino = sbinfo->next_ino;
+                       sbinfo->next_ino += SHMEM_INO_BATCH;
+                       spin_unlock(&sbinfo->stat_lock);
+                       if (unlikely(is_zero_ino(ino)))
+                               ino++;
+               }
+               *inop = ino;
+               *next_ino = ++ino;
+               put_cpu();
        }
+
        return 0;
 }
 
@@ -2222,13 +2282,14 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
        struct inode *inode;
        struct shmem_inode_info *info;
        struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
+       ino_t ino;
 
-       if (shmem_reserve_inode(sb))
+       if (shmem_reserve_inode(sb, &ino))
                return NULL;
 
        inode = new_inode(sb);
        if (inode) {
-               inode->i_ino = get_next_ino();
+               inode->i_ino = ino;
                inode_init_owner(inode, dir, mode);
                inode->i_blocks = 0;
                inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
@@ -2932,7 +2993,7 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr
         * first link must skip that, to get the accounting right.
         */
        if (inode->i_nlink) {
-               ret = shmem_reserve_inode(inode->i_sb);
+               ret = shmem_reserve_inode(inode->i_sb, NULL);
                if (ret)
                        goto out;
        }
@@ -3347,6 +3408,8 @@ enum shmem_param {
        Opt_nr_inodes,
        Opt_size,
        Opt_uid,
+       Opt_inode32,
+       Opt_inode64,
 };
 
 static const struct constant_table shmem_param_enums_huge[] = {
@@ -3366,6 +3429,8 @@ const struct fs_parameter_spec shmem_fs_parameters[] = {
        fsparam_string("nr_inodes",     Opt_nr_inodes),
        fsparam_string("size",          Opt_size),
        fsparam_u32   ("uid",           Opt_uid),
+       fsparam_flag  ("inode32",       Opt_inode32),
+       fsparam_flag  ("inode64",       Opt_inode64),
        {}
 };
 
@@ -3437,6 +3502,18 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
                        break;
                }
                goto unsupported_parameter;
+       case Opt_inode32:
+               ctx->full_inums = false;
+               ctx->seen |= SHMEM_SEEN_INUMS;
+               break;
+       case Opt_inode64:
+               if (sizeof(ino_t) < 8) {
+                       return invalfc(fc,
+                                      "Cannot use inode64 with <64bit inums in kernel\n");
+               }
+               ctx->full_inums = true;
+               ctx->seen |= SHMEM_SEEN_INUMS;
+               break;
        }
        return 0;
 
@@ -3528,8 +3605,16 @@ static int shmem_reconfigure(struct fs_context *fc)
                }
        }
 
+       if ((ctx->seen & SHMEM_SEEN_INUMS) && !ctx->full_inums &&
+           sbinfo->next_ino > UINT_MAX) {
+               err = "Current inum too high to switch to 32-bit inums";
+               goto out;
+       }
+
        if (ctx->seen & SHMEM_SEEN_HUGE)
                sbinfo->huge = ctx->huge;
+       if (ctx->seen & SHMEM_SEEN_INUMS)
+               sbinfo->full_inums = ctx->full_inums;
        if (ctx->seen & SHMEM_SEEN_BLOCKS)
                sbinfo->max_blocks  = ctx->blocks;
        if (ctx->seen & SHMEM_SEEN_INODES) {
@@ -3569,6 +3654,29 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root)
        if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID))
                seq_printf(seq, ",gid=%u",
                                from_kgid_munged(&init_user_ns, sbinfo->gid));
+
+       /*
+        * Showing inode{64,32} might be useful even if it's the system default,
+        * since then people don't have to resort to checking both here and
+        * /proc/config.gz to confirm 64-bit inums were successfully applied
+        * (which may not even exist if IKCONFIG_PROC isn't enabled).
+        *
+        * We hide it when inode64 isn't the default and we are using 32-bit
+        * inodes, since that probably just means the feature isn't even under
+        * consideration.
+        *
+        * As such:
+        *
+        *                     +-----------------+-----------------+
+        *                     | TMPFS_INODE64=y | TMPFS_INODE64=n |
+        *  +------------------+-----------------+-----------------+
+        *  | full_inums=true  | show            | show            |
+        *  | full_inums=false | show            | hide            |
+        *  +------------------+-----------------+-----------------+
+        *
+        */
+       if (IS_ENABLED(CONFIG_TMPFS_INODE64) || sbinfo->full_inums)
+               seq_printf(seq, ",inode%d", (sbinfo->full_inums ? 64 : 32));
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
        /* Rightly or wrongly, show huge mount option unmasked by shmem_huge */
        if (sbinfo->huge)
@@ -3584,6 +3692,7 @@ static void shmem_put_super(struct super_block *sb)
 {
        struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
 
+       free_percpu(sbinfo->ino_batch);
        percpu_counter_destroy(&sbinfo->used_blocks);
        mpol_put(sbinfo->mpol);
        kfree(sbinfo);
@@ -3616,6 +3725,8 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
                        ctx->blocks = shmem_default_max_blocks();
                if (!(ctx->seen & SHMEM_SEEN_INODES))
                        ctx->inodes = shmem_default_max_inodes();
+               if (!(ctx->seen & SHMEM_SEEN_INUMS))
+                       ctx->full_inums = IS_ENABLED(CONFIG_TMPFS_INODE64);
        } else {
                sb->s_flags |= SB_NOUSER;
        }
@@ -3626,8 +3737,14 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
 #endif
        sbinfo->max_blocks = ctx->blocks;
        sbinfo->free_inodes = sbinfo->max_inodes = ctx->inodes;
+       if (sb->s_flags & SB_KERNMOUNT) {
+               sbinfo->ino_batch = alloc_percpu(ino_t);
+               if (!sbinfo->ino_batch)
+                       goto failed;
+       }
        sbinfo->uid = ctx->uid;
        sbinfo->gid = ctx->gid;
+       sbinfo->full_inums = ctx->full_inums;
        sbinfo->mode = ctx->mode;
        sbinfo->huge = ctx->huge;
        sbinfo->mpol = ctx->mpol;