shmem: use raw_spinlock_t for ->stat_lock
authorSebastian Andrzej Siewior <bigeasy@linutronix.de>
Thu, 2 Sep 2021 21:54:03 +0000 (14:54 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 3 Sep 2021 16:58:11 +0000 (09:58 -0700)
Each CPU has SHMEM_INO_BATCH inodes available in `->ino_batch' which is
per-CPU.  Access here is serialized by disabling preemption.  If the pool
is empty, it gets reloaded from `->next_ino'.  Access here is serialized
by ->stat_lock which is a spinlock_t and can not be acquired with disabled
preemption.

One way around it would make per-CPU ino_batch struct containing the inode
number a local_lock_t.

Another solution is to promote ->stat_lock to a raw_spinlock_t.  The
critical sections are short.  The mpol_put() must be moved outside of the
critical section to avoid invoking the destructor with disabled
preemption.

Link: https://lkml.kernel.org/r/20210806142916.jdwkb5bx62q5fwfo@linutronix.de
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Acked-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
include/linux/shmem_fs.h
mm/shmem.c

index 8e775ce..0a8499f 100644 (file)
@@ -31,7 +31,7 @@ struct shmem_sb_info {
        struct percpu_counter used_blocks;  /* How many are allocated */
        unsigned long max_inodes;   /* How many inodes are allowed */
        unsigned long free_inodes;  /* How many are left for allocation */
-       spinlock_t stat_lock;       /* Serialize shmem_sb_info changes */
+       raw_spinlock_t stat_lock;   /* Serialize shmem_sb_info changes */
        umode_t mode;               /* Mount mode for root directory */
        unsigned char huge;         /* Whether to try for hugepages */
        kuid_t uid;                 /* Mount uid for root directory */
index dacda74..7eec13e 100644 (file)
@@ -278,10 +278,10 @@ static int shmem_reserve_inode(struct super_block *sb, ino_t *inop)
        ino_t ino;
 
        if (!(sb->s_flags & SB_KERNMOUNT)) {
-               spin_lock(&sbinfo->stat_lock);
+               raw_spin_lock(&sbinfo->stat_lock);
                if (sbinfo->max_inodes) {
                        if (!sbinfo->free_inodes) {
-                               spin_unlock(&sbinfo->stat_lock);
+                               raw_spin_unlock(&sbinfo->stat_lock);
                                return -ENOSPC;
                        }
                        sbinfo->free_inodes--;
@@ -304,7 +304,7 @@ static int shmem_reserve_inode(struct super_block *sb, ino_t *inop)
                        }
                        *inop = ino;
                }
-               spin_unlock(&sbinfo->stat_lock);
+               raw_spin_unlock(&sbinfo->stat_lock);
        } else if (inop) {
                /*
                 * __shmem_file_setup, one of our callers, is lock-free: it
@@ -319,13 +319,14 @@ static int shmem_reserve_inode(struct super_block *sb, ino_t *inop)
                 * to worry about things like glibc compatibility.
                 */
                ino_t *next_ino;
+
                next_ino = per_cpu_ptr(sbinfo->ino_batch, get_cpu());
                ino = *next_ino;
                if (unlikely(ino % SHMEM_INO_BATCH == 0)) {
-                       spin_lock(&sbinfo->stat_lock);
+                       raw_spin_lock(&sbinfo->stat_lock);
                        ino = sbinfo->next_ino;
                        sbinfo->next_ino += SHMEM_INO_BATCH;
-                       spin_unlock(&sbinfo->stat_lock);
+                       raw_spin_unlock(&sbinfo->stat_lock);
                        if (unlikely(is_zero_ino(ino)))
                                ino++;
                }
@@ -341,9 +342,9 @@ static void shmem_free_inode(struct super_block *sb)
 {
        struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
        if (sbinfo->max_inodes) {
-               spin_lock(&sbinfo->stat_lock);
+               raw_spin_lock(&sbinfo->stat_lock);
                sbinfo->free_inodes++;
-               spin_unlock(&sbinfo->stat_lock);
+               raw_spin_unlock(&sbinfo->stat_lock);
        }
 }
 
@@ -1453,10 +1454,10 @@ static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
 {
        struct mempolicy *mpol = NULL;
        if (sbinfo->mpol) {
-               spin_lock(&sbinfo->stat_lock);  /* prevent replace/use races */
+               raw_spin_lock(&sbinfo->stat_lock);      /* prevent replace/use races */
                mpol = sbinfo->mpol;
                mpol_get(mpol);
-               spin_unlock(&sbinfo->stat_lock);
+               raw_spin_unlock(&sbinfo->stat_lock);
        }
        return mpol;
 }
@@ -3488,9 +3489,10 @@ static int shmem_reconfigure(struct fs_context *fc)
        struct shmem_options *ctx = fc->fs_private;
        struct shmem_sb_info *sbinfo = SHMEM_SB(fc->root->d_sb);
        unsigned long inodes;
+       struct mempolicy *mpol = NULL;
        const char *err;
 
-       spin_lock(&sbinfo->stat_lock);
+       raw_spin_lock(&sbinfo->stat_lock);
        inodes = sbinfo->max_inodes - sbinfo->free_inodes;
        if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) {
                if (!sbinfo->max_blocks) {
@@ -3535,14 +3537,15 @@ static int shmem_reconfigure(struct fs_context *fc)
         * Preserve previous mempolicy unless mpol remount option was specified.
         */
        if (ctx->mpol) {
-               mpol_put(sbinfo->mpol);
+               mpol = sbinfo->mpol;
                sbinfo->mpol = ctx->mpol;       /* transfers initial ref */
                ctx->mpol = NULL;
        }
-       spin_unlock(&sbinfo->stat_lock);
+       raw_spin_unlock(&sbinfo->stat_lock);
+       mpol_put(mpol);
        return 0;
 out:
-       spin_unlock(&sbinfo->stat_lock);
+       raw_spin_unlock(&sbinfo->stat_lock);
        return invalfc(fc, "%s", err);
 }
 
@@ -3659,7 +3662,7 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
        sbinfo->mpol = ctx->mpol;
        ctx->mpol = NULL;
 
-       spin_lock_init(&sbinfo->stat_lock);
+       raw_spin_lock_init(&sbinfo->stat_lock);
        if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL))
                goto failed;
        spin_lock_init(&sbinfo->shrinklist_lock);