mm/swapfile.c: move inode_lock out of claim_swapfile
authorNaohiro Aota <naohiro.aota@wdc.com>
Sun, 29 Mar 2020 02:17:15 +0000 (19:17 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 29 Mar 2020 16:47:05 +0000 (09:47 -0700)
claim_swapfile() currently keeps the inode locked when it is successful,
or the file is already swapfile (with -EBUSY).  And, on the other error
cases, it does not lock the inode.

This inconsistency of the lock state and return value is quite confusing
and actually causing a bad unlock balance as below in the "bad_swap"
section of __do_sys_swapon().

This commit fixes this issue by moving the inode_lock() and IS_SWAPFILE
check out of claim_swapfile().  The inode is unlocked in
"bad_swap_unlock_inode" section, so that the inode is ensured to be
unlocked at "bad_swap".  Thus, error handling codes after the locking now
jumps to "bad_swap_unlock_inode" instead of "bad_swap".

    =====================================
    WARNING: bad unlock balance detected!
    5.5.0-rc7+ #176 Not tainted
    -------------------------------------
    swapon/4294 is trying to release lock (&sb->s_type->i_mutex_key) at: __do_sys_swapon+0x94b/0x3550
    but there are no more locks to release!

    other info that might help us debug this:
    no locks held by swapon/4294.

    stack backtrace:
    CPU: 5 PID: 4294 Comm: swapon Not tainted 5.5.0-rc7-BTRFS-ZNS+ #176
    Hardware name: ASUS All Series/H87-PRO, BIOS 2102 07/29/2014
    Call Trace:
     dump_stack+0xa1/0xea
     print_unlock_imbalance_bug.cold+0x114/0x123
     lock_release+0x562/0xed0
     up_write+0x2d/0x490
     __do_sys_swapon+0x94b/0x3550
     __x64_sys_swapon+0x54/0x80
     do_syscall_64+0xa4/0x4b0
     entry_SYSCALL_64_after_hwframe+0x49/0xbe
    RIP: 0033:0x7f15da0a0dc7

Fixes: 1638045c3677 ("mm: set S_SWAPFILE on blockdev swap devices")
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Tested-by: Qais Youef <qais.yousef@arm.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: <stable@vger.kernel.org>
Link: http://lkml.kernel.org/r/20200206090132.154869-1-naohiro.aota@wdc.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
mm/swapfile.c

index b2a2e45..be33e61 100644 (file)
@@ -2899,10 +2899,6 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
                p->bdev = inode->i_sb->s_bdev;
        }
 
-       inode_lock(inode);
-       if (IS_SWAPFILE(inode))
-               return -EBUSY;
-
        return 0;
 }
 
@@ -3157,36 +3153,41 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
        mapping = swap_file->f_mapping;
        inode = mapping->host;
 
-       /* will take i_rwsem; */
        error = claim_swapfile(p, inode);
        if (unlikely(error))
                goto bad_swap;
 
+       inode_lock(inode);
+       if (IS_SWAPFILE(inode)) {
+               error = -EBUSY;
+               goto bad_swap_unlock_inode;
+       }
+
        /*
         * Read the swap header.
         */
        if (!mapping->a_ops->readpage) {
                error = -EINVAL;
-               goto bad_swap;
+               goto bad_swap_unlock_inode;
        }
        page = read_mapping_page(mapping, 0, swap_file);
        if (IS_ERR(page)) {
                error = PTR_ERR(page);
-               goto bad_swap;
+               goto bad_swap_unlock_inode;
        }
        swap_header = kmap(page);
 
        maxpages = read_swap_header(p, swap_header, inode);
        if (unlikely(!maxpages)) {
                error = -EINVAL;
-               goto bad_swap;
+               goto bad_swap_unlock_inode;
        }
 
        /* OK, set up the swap map and apply the bad block list */
        swap_map = vzalloc(maxpages);
        if (!swap_map) {
                error = -ENOMEM;
-               goto bad_swap;
+               goto bad_swap_unlock_inode;
        }
 
        if (bdi_cap_stable_pages_required(inode_to_bdi(inode)))
@@ -3211,7 +3212,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
                                        GFP_KERNEL);
                if (!cluster_info) {
                        error = -ENOMEM;
-                       goto bad_swap;
+                       goto bad_swap_unlock_inode;
                }
 
                for (ci = 0; ci < nr_cluster; ci++)
@@ -3220,7 +3221,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
                p->percpu_cluster = alloc_percpu(struct percpu_cluster);
                if (!p->percpu_cluster) {
                        error = -ENOMEM;
-                       goto bad_swap;
+                       goto bad_swap_unlock_inode;
                }
                for_each_possible_cpu(cpu) {
                        struct percpu_cluster *cluster;
@@ -3234,13 +3235,13 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 
        error = swap_cgroup_swapon(p->type, maxpages);
        if (error)
-               goto bad_swap;
+               goto bad_swap_unlock_inode;
 
        nr_extents = setup_swap_map_and_extents(p, swap_header, swap_map,
                cluster_info, maxpages, &span);
        if (unlikely(nr_extents < 0)) {
                error = nr_extents;
-               goto bad_swap;
+               goto bad_swap_unlock_inode;
        }
        /* frontswap enabled? set up bit-per-page map for frontswap */
        if (IS_ENABLED(CONFIG_FRONTSWAP))
@@ -3280,7 +3281,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 
        error = init_swap_address_space(p->type, maxpages);
        if (error)
-               goto bad_swap;
+               goto bad_swap_unlock_inode;
 
        /*
         * Flush any pending IO and dirty mappings before we start using this
@@ -3290,7 +3291,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
        error = inode_drain_writes(inode);
        if (error) {
                inode->i_flags &= ~S_SWAPFILE;
-               goto bad_swap;
+               goto bad_swap_unlock_inode;
        }
 
        mutex_lock(&swapon_mutex);
@@ -3315,6 +3316,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 
        error = 0;
        goto out;
+bad_swap_unlock_inode:
+       inode_unlock(inode);
 bad_swap:
        free_percpu(p->percpu_cluster);
        p->percpu_cluster = NULL;
@@ -3322,6 +3325,7 @@ bad_swap:
                set_blocksize(p->bdev, p->old_block_size);
                blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
        }
+       inode = NULL;
        destroy_swap_extents(p);
        swap_cgroup_swapoff(p->type);
        spin_lock(&swap_lock);
@@ -3333,13 +3337,8 @@ bad_swap:
        kvfree(frontswap_map);
        if (inced_nr_rotate_swap)
                atomic_dec(&nr_rotate_swap);
-       if (swap_file) {
-               if (inode) {
-                       inode_unlock(inode);
-                       inode = NULL;
-               }
+       if (swap_file)
                filp_close(swap_file, NULL);
-       }
 out:
        if (page && !IS_ERR(page)) {
                kunmap(page);