Merge tag 'for-linus-20190524' of git://git.kernel.dk/linux-block
[linux-2.6-microblaze.git] / fs / btrfs / tree-log.c
index 561884f..6c47f6e 100644 (file)
@@ -139,7 +139,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
        mutex_lock(&root->log_mutex);
 
        if (root->log_root) {
-               if (btrfs_need_log_full_commit(fs_info, trans)) {
+               if (btrfs_need_log_full_commit(trans)) {
                        ret = -EAGAIN;
                        goto out;
                }
@@ -225,6 +225,17 @@ void btrfs_end_log_trans(struct btrfs_root *root)
        }
 }
 
+static int btrfs_write_tree_block(struct extent_buffer *buf)
+{
+       return filemap_fdatawrite_range(buf->pages[0]->mapping, buf->start,
+                                       buf->start + buf->len - 1);
+}
+
+static void btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
+{
+       filemap_fdatawait_range(buf->pages[0]->mapping,
+                               buf->start, buf->start + buf->len - 1);
+}
 
 /*
  * the walk control struct is used to pass state down the chain when
@@ -304,7 +315,7 @@ static int process_one_buffer(struct btrfs_root *log,
 
        if (!ret && btrfs_buffer_uptodate(eb, gen, 0)) {
                if (wc->pin && btrfs_header_level(eb) == 0)
-                       ret = btrfs_exclude_logged_extents(fs_info, eb);
+                       ret = btrfs_exclude_logged_extents(eb);
                if (wc->write)
                        btrfs_write_tree_block(eb);
                if (wc->wait)
@@ -333,7 +344,6 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
                                   struct extent_buffer *eb, int slot,
                                   struct btrfs_key *key)
 {
-       struct btrfs_fs_info *fs_info = root->fs_info;
        int ret;
        u32 item_size;
        u64 saved_i_size = 0;
@@ -454,10 +464,9 @@ insert:
                found_size = btrfs_item_size_nr(path->nodes[0],
                                                path->slots[0]);
                if (found_size > item_size)
-                       btrfs_truncate_item(fs_info, path, item_size, 1);
+                       btrfs_truncate_item(path, item_size, 1);
                else if (found_size < item_size)
-                       btrfs_extend_item(fs_info, path,
-                                         item_size - found_size);
+                       btrfs_extend_item(path, item_size - found_size);
        } else if (ret) {
                return ret;
        }
@@ -694,9 +703,11 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
                        goto out;
 
                if (ins.objectid > 0) {
+                       struct btrfs_ref ref = { 0 };
                        u64 csum_start;
                        u64 csum_end;
                        LIST_HEAD(ordered_sums);
+
                        /*
                         * is this extent already allocated in the extent
                         * allocation tree?  If so, just add a reference
@@ -704,10 +715,13 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
                        ret = btrfs_lookup_data_extent(fs_info, ins.objectid,
                                                ins.offset);
                        if (ret == 0) {
-                               ret = btrfs_inc_extent_ref(trans, root,
-                                               ins.objectid, ins.offset,
-                                               0, root->root_key.objectid,
+                               btrfs_init_generic_ref(&ref,
+                                               BTRFS_ADD_DELAYED_REF,
+                                               ins.objectid, ins.offset, 0);
+                               btrfs_init_data_ref(&ref,
+                                               root->root_key.objectid,
                                                key->objectid, offset);
+                               ret = btrfs_inc_extent_ref(trans, &ref);
                                if (ret)
                                        goto out;
                        } else {
@@ -2725,7 +2739,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
                                if (trans) {
                                        btrfs_tree_lock(next);
                                        btrfs_set_lock_blocking_write(next);
-                                       clean_tree_block(fs_info, next);
+                                       btrfs_clean_tree_block(next);
                                        btrfs_wait_tree_block_writeback(next);
                                        btrfs_tree_unlock(next);
                                } else {
@@ -2809,7 +2823,7 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
                                if (trans) {
                                        btrfs_tree_lock(next);
                                        btrfs_set_lock_blocking_write(next);
-                                       clean_tree_block(fs_info, next);
+                                       btrfs_clean_tree_block(next);
                                        btrfs_wait_tree_block_writeback(next);
                                        btrfs_tree_unlock(next);
                                } else {
@@ -2891,7 +2905,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
                        if (trans) {
                                btrfs_tree_lock(next);
                                btrfs_set_lock_blocking_write(next);
-                               clean_tree_block(fs_info, next);
+                               btrfs_clean_tree_block(next);
                                btrfs_wait_tree_block_writeback(next);
                                btrfs_tree_unlock(next);
                        } else {
@@ -3066,7 +3080,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
        }
 
        /* bail out if we need to do a full commit */
-       if (btrfs_need_log_full_commit(fs_info, trans)) {
+       if (btrfs_need_log_full_commit(trans)) {
                ret = -EAGAIN;
                mutex_unlock(&root->log_mutex);
                goto out;
@@ -3085,7 +3099,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
        if (ret) {
                blk_finish_plug(&plug);
                btrfs_abort_transaction(trans, ret);
-               btrfs_set_log_full_commit(fs_info, trans);
+               btrfs_set_log_full_commit(trans);
                mutex_unlock(&root->log_mutex);
                goto out;
        }
@@ -3127,7 +3141,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
                        list_del_init(&root_log_ctx.list);
 
                blk_finish_plug(&plug);
-               btrfs_set_log_full_commit(fs_info, trans);
+               btrfs_set_log_full_commit(trans);
 
                if (ret != -ENOSPC) {
                        btrfs_abort_transaction(trans, ret);
@@ -3173,7 +3187,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
         * now that we've moved on to the tree of log tree roots,
         * check the full commit flag again
         */
-       if (btrfs_need_log_full_commit(fs_info, trans)) {
+       if (btrfs_need_log_full_commit(trans)) {
                blk_finish_plug(&plug);
                btrfs_wait_tree_log_extents(log, mark);
                mutex_unlock(&log_root_tree->log_mutex);
@@ -3186,7 +3200,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
                                         EXTENT_DIRTY | EXTENT_NEW);
        blk_finish_plug(&plug);
        if (ret) {
-               btrfs_set_log_full_commit(fs_info, trans);
+               btrfs_set_log_full_commit(trans);
                btrfs_abort_transaction(trans, ret);
                mutex_unlock(&log_root_tree->log_mutex);
                goto out_wake_log_root;
@@ -3196,7 +3210,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
                ret = btrfs_wait_tree_log_extents(log_root_tree,
                                                  EXTENT_NEW | EXTENT_DIRTY);
        if (ret) {
-               btrfs_set_log_full_commit(fs_info, trans);
+               btrfs_set_log_full_commit(trans);
                mutex_unlock(&log_root_tree->log_mutex);
                goto out_wake_log_root;
        }
@@ -3218,7 +3232,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
         */
        ret = write_all_supers(fs_info, 1);
        if (ret) {
-               btrfs_set_log_full_commit(fs_info, trans);
+               btrfs_set_log_full_commit(trans);
                btrfs_abort_transaction(trans, ret);
                goto out_wake_log_root;
        }
@@ -3422,7 +3436,7 @@ fail:
 out_unlock:
        mutex_unlock(&dir->log_mutex);
        if (ret == -ENOSPC) {
-               btrfs_set_log_full_commit(root->fs_info, trans);
+               btrfs_set_log_full_commit(trans);
                ret = 0;
        } else if (ret < 0)
                btrfs_abort_transaction(trans, ret);
@@ -3438,7 +3452,6 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
                               const char *name, int name_len,
                               struct btrfs_inode *inode, u64 dirid)
 {
-       struct btrfs_fs_info *fs_info = root->fs_info;
        struct btrfs_root *log;
        u64 index;
        int ret;
@@ -3456,7 +3469,7 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
                                  dirid, &index);
        mutex_unlock(&inode->log_mutex);
        if (ret == -ENOSPC) {
-               btrfs_set_log_full_commit(fs_info, trans);
+               btrfs_set_log_full_commit(trans);
                ret = 0;
        } else if (ret < 0 && ret != -ENOENT)
                btrfs_abort_transaction(trans, ret);
@@ -4169,6 +4182,7 @@ fill_holes:
                                                               *last_extent, 0,
                                                               0, len, 0, len,
                                                               0, 0, 0);
+                               *last_extent += len;
                        }
                }
        }
@@ -5442,7 +5456,7 @@ static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans,
                 * Make sure any commits to the log are forced to be full
                 * commits.
                 */
-               btrfs_set_log_full_commit(fs_info, trans);
+               btrfs_set_log_full_commit(trans);
                ret = true;
        }
        mutex_unlock(&inode->log_mutex);
@@ -5819,6 +5833,190 @@ out:
        return ret;
 }
 
+static int log_new_ancestors(struct btrfs_trans_handle *trans,
+                            struct btrfs_root *root,
+                            struct btrfs_path *path,
+                            struct btrfs_log_ctx *ctx)
+{
+       struct btrfs_key found_key;
+
+       btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
+
+       while (true) {
+               struct btrfs_fs_info *fs_info = root->fs_info;
+               const u64 last_committed = fs_info->last_trans_committed;
+               struct extent_buffer *leaf = path->nodes[0];
+               int slot = path->slots[0];
+               struct btrfs_key search_key;
+               struct inode *inode;
+               int ret = 0;
+
+               btrfs_release_path(path);
+
+               search_key.objectid = found_key.offset;
+               search_key.type = BTRFS_INODE_ITEM_KEY;
+               search_key.offset = 0;
+               inode = btrfs_iget(fs_info->sb, &search_key, root, NULL);
+               if (IS_ERR(inode))
+                       return PTR_ERR(inode);
+
+               if (BTRFS_I(inode)->generation > last_committed)
+                       ret = btrfs_log_inode(trans, root, BTRFS_I(inode),
+                                             LOG_INODE_EXISTS,
+                                             0, LLONG_MAX, ctx);
+               iput(inode);
+               if (ret)
+                       return ret;
+
+               if (search_key.objectid == BTRFS_FIRST_FREE_OBJECTID)
+                       break;
+
+               search_key.type = BTRFS_INODE_REF_KEY;
+               ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
+               if (ret < 0)
+                       return ret;
+
+               leaf = path->nodes[0];
+               slot = path->slots[0];
+               if (slot >= btrfs_header_nritems(leaf)) {
+                       ret = btrfs_next_leaf(root, path);
+                       if (ret < 0)
+                               return ret;
+                       else if (ret > 0)
+                               return -ENOENT;
+                       leaf = path->nodes[0];
+                       slot = path->slots[0];
+               }
+
+               btrfs_item_key_to_cpu(leaf, &found_key, slot);
+               if (found_key.objectid != search_key.objectid ||
+                   found_key.type != BTRFS_INODE_REF_KEY)
+                       return -ENOENT;
+       }
+       return 0;
+}
+
+static int log_new_ancestors_fast(struct btrfs_trans_handle *trans,
+                                 struct btrfs_inode *inode,
+                                 struct dentry *parent,
+                                 struct btrfs_log_ctx *ctx)
+{
+       struct btrfs_root *root = inode->root;
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct dentry *old_parent = NULL;
+       struct super_block *sb = inode->vfs_inode.i_sb;
+       int ret = 0;
+
+       while (true) {
+               if (!parent || d_really_is_negative(parent) ||
+                   sb != parent->d_sb)
+                       break;
+
+               inode = BTRFS_I(d_inode(parent));
+               if (root != inode->root)
+                       break;
+
+               if (inode->generation > fs_info->last_trans_committed) {
+                       ret = btrfs_log_inode(trans, root, inode,
+                                       LOG_INODE_EXISTS, 0, LLONG_MAX, ctx);
+                       if (ret)
+                               break;
+               }
+               if (IS_ROOT(parent))
+                       break;
+
+               parent = dget_parent(parent);
+               dput(old_parent);
+               old_parent = parent;
+       }
+       dput(old_parent);
+
+       return ret;
+}
+
+static int log_all_new_ancestors(struct btrfs_trans_handle *trans,
+                                struct btrfs_inode *inode,
+                                struct dentry *parent,
+                                struct btrfs_log_ctx *ctx)
+{
+       struct btrfs_root *root = inode->root;
+       const u64 ino = btrfs_ino(inode);
+       struct btrfs_path *path;
+       struct btrfs_key search_key;
+       int ret;
+
+       /*
+        * For a single hard link case, go through a fast path that does not
+        * need to iterate the fs/subvolume tree.
+        */
+       if (inode->vfs_inode.i_nlink < 2)
+               return log_new_ancestors_fast(trans, inode, parent, ctx);
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       search_key.objectid = ino;
+       search_key.type = BTRFS_INODE_REF_KEY;
+       search_key.offset = 0;
+again:
+       ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
+       if (ret < 0)
+               goto out;
+       if (ret == 0)
+               path->slots[0]++;
+
+       while (true) {
+               struct extent_buffer *leaf = path->nodes[0];
+               int slot = path->slots[0];
+               struct btrfs_key found_key;
+
+               if (slot >= btrfs_header_nritems(leaf)) {
+                       ret = btrfs_next_leaf(root, path);
+                       if (ret < 0)
+                               goto out;
+                       else if (ret > 0)
+                               break;
+                       continue;
+               }
+
+               btrfs_item_key_to_cpu(leaf, &found_key, slot);
+               if (found_key.objectid != ino ||
+                   found_key.type > BTRFS_INODE_EXTREF_KEY)
+                       break;
+
+               /*
+                * Don't deal with extended references because they are rare
+                * cases and too complex to deal with (we would need to keep
+                * track of which subitem we are processing for each item in
+                * this loop, etc). So just return some error to fallback to
+                * a transaction commit.
+                */
+               if (found_key.type == BTRFS_INODE_EXTREF_KEY) {
+                       ret = -EMLINK;
+                       goto out;
+               }
+
+               /*
+                * Logging ancestors needs to do more searches on the fs/subvol
+                * tree, so it releases the path as needed to avoid deadlocks.
+                * Keep track of the last inode ref key and resume from that key
+                * after logging all new ancestors for the current hard link.
+                */
+               memcpy(&search_key, &found_key, sizeof(search_key));
+
+               ret = log_new_ancestors(trans, root, path, ctx);
+               if (ret)
+                       goto out;
+               btrfs_release_path(path);
+               goto again;
+       }
+       ret = 0;
+out:
+       btrfs_free_path(path);
+       return ret;
+}
+
 /*
  * helper function around btrfs_log_inode to make sure newly created
  * parent directories also end up in the log.  A minimal inode and backref
@@ -5836,11 +6034,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
        struct btrfs_root *root = inode->root;
        struct btrfs_fs_info *fs_info = root->fs_info;
        struct super_block *sb;
-       struct dentry *old_parent = NULL;
        int ret = 0;
        u64 last_committed = fs_info->last_trans_committed;
        bool log_dentries = false;
-       struct btrfs_inode *orig_inode = inode;
 
        sb = inode->vfs_inode.i_sb;
 
@@ -5946,56 +6142,22 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
         * and has a link count of 2.
         */
        if (inode->last_unlink_trans > last_committed) {
-               ret = btrfs_log_all_parents(trans, orig_inode, ctx);
+               ret = btrfs_log_all_parents(trans, inode, ctx);
                if (ret)
                        goto end_trans;
        }
 
-       /*
-        * If a new hard link was added to the inode in the current transaction
-        * and its link count is now greater than 1, we need to fallback to a
-        * transaction commit, otherwise we can end up not logging all its new
-        * parents for all the hard links. Here just from the dentry used to
-        * fsync, we can not visit the ancestor inodes for all the other hard
-        * links to figure out if any is new, so we fallback to a transaction
-        * commit (instead of adding a lot of complexity of scanning a btree,
-        * since this scenario is not a common use case).
-        */
-       if (inode->vfs_inode.i_nlink > 1 &&
-           inode->last_link_trans > last_committed) {
-               ret = -EMLINK;
+       ret = log_all_new_ancestors(trans, inode, parent, ctx);
+       if (ret)
                goto end_trans;
-       }
 
-       while (1) {
-               if (!parent || d_really_is_negative(parent) || sb != parent->d_sb)
-                       break;
-
-               inode = BTRFS_I(d_inode(parent));
-               if (root != inode->root)
-                       break;
-
-               if (inode->generation > last_committed) {
-                       ret = btrfs_log_inode(trans, root, inode,
-                                       LOG_INODE_EXISTS, 0, LLONG_MAX, ctx);
-                       if (ret)
-                               goto end_trans;
-               }
-               if (IS_ROOT(parent))
-                       break;
-
-               parent = dget_parent(parent);
-               dput(old_parent);
-               old_parent = parent;
-       }
        if (log_dentries)
-               ret = log_new_dir_dentries(trans, root, orig_inode, ctx);
+               ret = log_new_dir_dentries(trans, root, inode, ctx);
        else
                ret = 0;
 end_trans:
-       dput(old_parent);
        if (ret < 0) {
-               btrfs_set_log_full_commit(fs_info, trans);
+               btrfs_set_log_full_commit(trans);
                ret = 1;
        }