Merge tag 'for-5.11-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 29 Jan 2021 21:54:40 +0000 (13:54 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 29 Jan 2021 21:54:40 +0000 (13:54 -0800)
Pull btrfs fixes from David Sterba:
 "A few more fixes for a late rc:

   - fix lockdep complaint on 32bit arches and also remove an unsafe
     memory use due to device vs filesystem lifetime

   - two fixes for free space tree:

      * race during log replay and cache rebuild, now more likely to
        happen due to changes in this dev cycle

      * possible free space tree corruption with online conversion
        during initial tree population"

* tag 'for-5.11-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: fix log replay failure due to race with space cache rebuild
  btrfs: fix lockdep warning due to seqcount_mutex on 32bit arch
  btrfs: fix possible free space tree corruption with online conversion

fs/btrfs/block-group.c
fs/btrfs/ctree.h
fs/btrfs/extent-tree.c
fs/btrfs/free-space-tree.c
fs/btrfs/volumes.c
fs/btrfs/volumes.h

index 0886e81..48ebc10 100644 (file)
@@ -673,7 +673,15 @@ static noinline void caching_thread(struct btrfs_work *work)
                wake_up(&caching_ctl->wait);
        }
 
-       if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
+       /*
+        * If we are in the transaction that populated the free space tree we
+        * can't actually cache from the free space tree as our commit root and
+        * real root are the same, so we could change the contents of the blocks
+        * while caching.  Instead do the slow caching in this case, and after
+        * the transaction has committed we will be safe.
+        */
+       if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
+           !(test_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags)))
                ret = load_free_space_tree(caching_ctl);
        else
                ret = load_extent_tree_free(caching_ctl);
index e6e3759..4debdbd 100644 (file)
@@ -563,6 +563,9 @@ enum {
 
        /* Indicate that we need to cleanup space cache v1 */
        BTRFS_FS_CLEANUP_SPACE_CACHE_V1,
+
+       /* Indicate that we can't trust the free space tree for caching yet */
+       BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED,
 };
 
 /*
index 30b1a63..0c335da 100644 (file)
@@ -2602,8 +2602,6 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
        struct btrfs_block_group *cache;
        int ret;
 
-       btrfs_add_excluded_extent(trans->fs_info, bytenr, num_bytes);
-
        cache = btrfs_lookup_block_group(trans->fs_info, bytenr);
        if (!cache)
                return -EINVAL;
@@ -2615,11 +2613,19 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
         * the pinned extents.
         */
        btrfs_cache_block_group(cache, 1);
+       /*
+        * Make sure we wait until the cache is completely built in case it is
+        * missing or is invalid and therefore needs to be rebuilt.
+        */
+       ret = btrfs_wait_block_group_cache_done(cache);
+       if (ret)
+               goto out;
 
        pin_down_extent(trans, cache, bytenr, num_bytes, 0);
 
        /* remove us from the free space cache (if we're there at all) */
        ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
+out:
        btrfs_put_block_group(cache);
        return ret;
 }
@@ -2629,50 +2635,22 @@ static int __exclude_logged_extent(struct btrfs_fs_info *fs_info,
 {
        int ret;
        struct btrfs_block_group *block_group;
-       struct btrfs_caching_control *caching_ctl;
 
        block_group = btrfs_lookup_block_group(fs_info, start);
        if (!block_group)
                return -EINVAL;
 
-       btrfs_cache_block_group(block_group, 0);
-       caching_ctl = btrfs_get_caching_control(block_group);
-
-       if (!caching_ctl) {
-               /* Logic error */
-               BUG_ON(!btrfs_block_group_done(block_group));
-               ret = btrfs_remove_free_space(block_group, start, num_bytes);
-       } else {
-               /*
-                * We must wait for v1 caching to finish, otherwise we may not
-                * remove our space.
-                */
-               btrfs_wait_space_cache_v1_finished(block_group, caching_ctl);
-               mutex_lock(&caching_ctl->mutex);
-
-               if (start >= caching_ctl->progress) {
-                       ret = btrfs_add_excluded_extent(fs_info, start,
-                                                       num_bytes);
-               } else if (start + num_bytes <= caching_ctl->progress) {
-                       ret = btrfs_remove_free_space(block_group,
-                                                     start, num_bytes);
-               } else {
-                       num_bytes = caching_ctl->progress - start;
-                       ret = btrfs_remove_free_space(block_group,
-                                                     start, num_bytes);
-                       if (ret)
-                               goto out_lock;
+       btrfs_cache_block_group(block_group, 1);
+       /*
+        * Make sure we wait until the cache is completely built in case it is
+        * missing or is invalid and therefore needs to be rebuilt.
+        */
+       ret = btrfs_wait_block_group_cache_done(block_group);
+       if (ret)
+               goto out;
 
-                       num_bytes = (start + num_bytes) -
-                               caching_ctl->progress;
-                       start = caching_ctl->progress;
-                       ret = btrfs_add_excluded_extent(fs_info, start,
-                                                       num_bytes);
-               }
-out_lock:
-               mutex_unlock(&caching_ctl->mutex);
-               btrfs_put_caching_control(caching_ctl);
-       }
+       ret = btrfs_remove_free_space(block_group, start, num_bytes);
+out:
        btrfs_put_block_group(block_group);
        return ret;
 }
@@ -2863,9 +2841,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
                        mutex_unlock(&fs_info->unused_bg_unpin_mutex);
                        break;
                }
-               if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
-                       clear_extent_bits(&fs_info->excluded_extents, start,
-                                         end, EXTENT_UPTODATE);
 
                if (btrfs_test_opt(fs_info, DISCARD_SYNC))
                        ret = btrfs_discard_extent(fs_info, start,
index e33a65b..a33bca9 100644 (file)
@@ -1150,6 +1150,7 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
                return PTR_ERR(trans);
 
        set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
+       set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
        free_space_root = btrfs_create_tree(trans,
                                            BTRFS_FREE_SPACE_TREE_OBJECTID);
        if (IS_ERR(free_space_root)) {
@@ -1171,11 +1172,18 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
        btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
        btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
        clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
+       ret = btrfs_commit_transaction(trans);
 
-       return btrfs_commit_transaction(trans);
+       /*
+        * Now that we've committed the transaction any reading of our commit
+        * root will be safe, so we can cache from the free space tree now.
+        */
+       clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
+       return ret;
 
 abort:
        clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
+       clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
        btrfs_abort_transaction(trans, ret);
        btrfs_end_transaction(trans);
        return ret;
index 0a6de85..d6c24c8 100644 (file)
@@ -433,7 +433,7 @@ static struct btrfs_device *__alloc_device(struct btrfs_fs_info *fs_info)
 
        atomic_set(&dev->reada_in_flight, 0);
        atomic_set(&dev->dev_stats_ccnt, 0);
-       btrfs_device_data_ordered_init(dev, fs_info);
+       btrfs_device_data_ordered_init(dev);
        INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
        INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
        extent_io_tree_init(fs_info, &dev->alloc_state,
index 1997a46..c43663d 100644 (file)
@@ -39,10 +39,10 @@ struct btrfs_io_geometry {
 #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
 #include <linux/seqlock.h>
 #define __BTRFS_NEED_DEVICE_DATA_ORDERED
-#define btrfs_device_data_ordered_init(device, info)                           \
-       seqcount_mutex_init(&device->data_seqcount, &info->chunk_mutex)
+#define btrfs_device_data_ordered_init(device) \
+       seqcount_init(&device->data_seqcount)
 #else
-#define btrfs_device_data_ordered_init(device, info) do { } while (0)
+#define btrfs_device_data_ordered_init(device) do { } while (0)
 #endif
 
 #define BTRFS_DEV_STATE_WRITEABLE      (0)
@@ -76,8 +76,7 @@ struct btrfs_device {
        blk_status_t last_flush_error;
 
 #ifdef __BTRFS_NEED_DEVICE_DATA_ORDERED
-       /* A seqcount_t with associated chunk_mutex (for lockdep) */
-       seqcount_mutex_t data_seqcount;
+       seqcount_t data_seqcount;
 #endif
 
        /* the internal btrfs device id */
@@ -168,9 +167,11 @@ btrfs_device_get_##name(const struct btrfs_device *dev)                    \
 static inline void                                                     \
 btrfs_device_set_##name(struct btrfs_device *dev, u64 size)            \
 {                                                                      \
+       preempt_disable();                                              \
        write_seqcount_begin(&dev->data_seqcount);                      \
        dev->name = size;                                               \
        write_seqcount_end(&dev->data_seqcount);                        \
+       preempt_enable();                                               \
 }
 #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
 #define BTRFS_DEVICE_GETSET_FUNCS(name)                                        \