btrfs: zoned: reorder log node allocation on zoned filesystem
authorNaohiro Aota <naohiro.aota@wdc.com>
Thu, 4 Feb 2021 10:22:20 +0000 (19:22 +0900)
committerDavid Sterba <dsterba@suse.com>
Tue, 9 Feb 2021 01:48:41 +0000 (02:48 +0100)
This is the 3/3 patch to enable tree-log on zoned filesystems.

The allocation order of nodes of "fs_info->log_root_tree" and nodes of
"root->log_root" is not the same as the writing order of them. So, the
writing causes unaligned write errors.

Reorder the allocation of them by delaying allocation of the root node of
"fs_info->log_root_tree," so that the node buffers can go out sequentially
to devices.

Cc: Filipe Manana <fdmanana@gmail.com>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/disk-io.c
fs/btrfs/tree-log.c

index 84c6650..c2576c5 100644 (file)
@@ -1298,16 +1298,18 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
                             struct btrfs_fs_info *fs_info)
 {
        struct btrfs_root *log_root;
-       int ret;
 
        log_root = alloc_log_tree(trans, fs_info);
        if (IS_ERR(log_root))
                return PTR_ERR(log_root);
 
-       ret = btrfs_alloc_log_tree_node(trans, log_root);
-       if (ret) {
-               btrfs_put_root(log_root);
-               return ret;
+       if (!btrfs_is_zoned(fs_info)) {
+               int ret = btrfs_alloc_log_tree_node(trans, log_root);
+
+               if (ret) {
+                       btrfs_put_root(log_root);
+                       return ret;
+               }
        }
 
        WARN_ON(fs_info->log_root_tree);
index 4e72794..fc04625 100644 (file)
@@ -3162,6 +3162,19 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
        list_add_tail(&root_log_ctx.list, &log_root_tree->log_ctxs[index2]);
        root_log_ctx.log_transid = log_root_tree->log_transid;
 
+       if (btrfs_is_zoned(fs_info)) {
+               mutex_lock(&fs_info->tree_root->log_mutex);
+               if (!log_root_tree->node) {
+                       ret = btrfs_alloc_log_tree_node(trans, log_root_tree);
+                       if (ret) {
+                               mutex_unlock(&fs_info->tree_log_mutex);
+                               mutex_unlock(&log_root_tree->log_mutex);
+                               goto out;
+                       }
+               }
+               mutex_unlock(&fs_info->tree_root->log_mutex);
+       }
+
        /*
         * Now we are safe to update the log_root_tree because we're under the
         * log_mutex, and we're a current writer so we're holding the commit
@@ -3320,12 +3333,14 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
                .process_func = process_one_buffer
        };
 
-       ret = walk_log_tree(trans, log, &wc);
-       if (ret) {
-               if (trans)
-                       btrfs_abort_transaction(trans, ret);
-               else
-                       btrfs_handle_fs_error(log->fs_info, ret, NULL);
+       if (log->node) {
+               ret = walk_log_tree(trans, log, &wc);
+               if (ret) {
+                       if (trans)
+                               btrfs_abort_transaction(trans, ret);
+                       else
+                               btrfs_handle_fs_error(log->fs_info, ret, NULL);
+               }
        }
 
        clear_extent_bits(&log->dirty_log_pages, 0, (u64)-1,