goto err;
}
- btrfs_del_inode_ref_in_log(trans, root, name, name_len, inode,
- dir_ino);
- btrfs_del_dir_entries_in_log(trans, root, name, name_len, dir, index);
+ /*
+ * If we are in a rename context, we don't need to update anything in the
+ * log. That will be done later during the rename by btrfs_log_new_name().
+ * Besides that, doing it here would only cause extra unncessary btree
+ * operations on the log tree, increasing latency for applications.
+ */
+ if (!rename_ctx) {
+ btrfs_del_inode_ref_in_log(trans, root, name, name_len, inode,
+ dir_ino);
+ btrfs_del_dir_entries_in_log(trans, root, name, name_len, dir,
+ index);
+ }
/*
* If we have a pending delayed iput we could end up with the final iput
u64 new_idx = 0;
int ret;
int ret2;
- bool root_log_pinned = false;
- bool dest_log_pinned = false;
bool need_abort = false;
/*
BTRFS_I(new_inode), 1);
}
- /*
- * Now pin the logs of the roots. We do it to ensure that no other task
- * can sync the logs while we are in progress with the rename, because
- * that could result in an inconsistency in case any of the inodes that
- * are part of this rename operation were logged before.
- *
- * We pin the logs even if at this precise moment none of the inodes was
- * logged before. This is because right after we checked for that, some
- * other task fsyncing some other inode not involved with this rename
- * operation could log that one of our inodes exists.
- *
- * We don't need to pin the logs before the above calls to
- * btrfs_insert_inode_ref(), since those don't ever need to change a log.
- */
- if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
- btrfs_pin_log_trans(root);
- root_log_pinned = true;
- }
- if (new_ino != BTRFS_FIRST_FREE_OBJECTID) {
- btrfs_pin_log_trans(dest);
- dest_log_pinned = true;
- }
-
/* src is a subvolume */
if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);
if (new_inode->i_nlink == 1)
BTRFS_I(new_inode)->dir_index = new_idx;
- if (root_log_pinned) {
+ /*
+ * Now pin the logs of the roots. We do it to ensure that no other task
+ * can sync the logs while we are in progress with the rename, because
+ * that could result in an inconsistency in case any of the inodes that
+ * are part of this rename operation were logged before.
+ */
+ if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
+ btrfs_pin_log_trans(root);
+ if (new_ino != BTRFS_FIRST_FREE_OBJECTID)
+ btrfs_pin_log_trans(dest);
+
+ /* Do the log updates for all inodes. */
+ if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir),
old_rename_ctx.index, new_dentry->d_parent);
- btrfs_end_log_trans(root);
- root_log_pinned = false;
- }
- if (dest_log_pinned) {
+ if (new_ino != BTRFS_FIRST_FREE_OBJECTID)
btrfs_log_new_name(trans, new_dentry, BTRFS_I(new_dir),
new_rename_ctx.index, old_dentry->d_parent);
+
+ /* Now unpin the logs. */
+ if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
+ btrfs_end_log_trans(root);
+ if (new_ino != BTRFS_FIRST_FREE_OBJECTID)
btrfs_end_log_trans(dest);
- dest_log_pinned = false;
- }
out_fail:
- /*
- * If we have pinned a log and an error happened, we unpin tasks
- * trying to sync the log and force them to fallback to a transaction
- * commit if the log currently contains any of the inodes involved in
- * this rename operation (to ensure we do not persist a log with an
- * inconsistent state for any of these inodes or leading to any
- * inconsistencies when replayed). If the transaction was aborted, the
- * abortion reason is propagated to userspace when attempting to commit
- * the transaction. If the log does not contain any of these inodes, we
- * allow the tasks to sync it.
- */
- if (ret && (root_log_pinned || dest_log_pinned)) {
- if (btrfs_inode_in_log(BTRFS_I(old_dir), fs_info->generation) ||
- btrfs_inode_in_log(BTRFS_I(new_dir), fs_info->generation) ||
- btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) ||
- btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation))
- btrfs_set_log_full_commit(trans);
-
- if (root_log_pinned) {
- btrfs_end_log_trans(root);
- root_log_pinned = false;
- }
- if (dest_log_pinned) {
- btrfs_end_log_trans(dest);
- dest_log_pinned = false;
- }
- }
ret2 = btrfs_end_transaction(trans);
ret = ret ? ret : ret2;
out_notrans:
int ret;
int ret2;
u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
- bool log_pinned = false;
if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
return -EPERM;
if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);
} else {
- /*
- * Now pin the log. We do it to ensure that no other task can
- * sync the log while we are in progress with the rename, as
- * that could result in an inconsistency in case any of the
- * inodes that are part of this rename operation were logged
- * before.
- *
- * We pin the log even if at this precise moment none of the
- * inodes was logged before. This is because right after we
- * checked for that, some other task fsyncing some other inode
- * not involved with this rename operation could log that one of
- * our inodes exists.
- *
- * We don't need to pin the logs before the above call to
- * btrfs_insert_inode_ref(), since that does not need to change
- * a log.
- */
- btrfs_pin_log_trans(root);
- log_pinned = true;
ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir),
BTRFS_I(d_inode(old_dentry)),
old_dentry->d_name.name,
if (old_inode->i_nlink == 1)
BTRFS_I(old_inode)->dir_index = index;
- if (log_pinned) {
+ if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir),
rename_ctx.index, new_dentry->d_parent);
- btrfs_end_log_trans(root);
- log_pinned = false;
- }
if (flags & RENAME_WHITEOUT) {
ret = btrfs_whiteout_for_rename(trans, root, mnt_userns,
}
}
out_fail:
- /*
- * If we have pinned the log and an error happened, we unpin tasks
- * trying to sync the log and force them to fallback to a transaction
- * commit if the log currently contains any of the inodes involved in
- * this rename operation (to ensure we do not persist a log with an
- * inconsistent state for any of these inodes or leading to any
- * inconsistencies when replayed). If the transaction was aborted, the
- * abortion reason is propagated to userspace when attempting to commit
- * the transaction. If the log does not contain any of these inodes, we
- * allow the tasks to sync it.
- */
- if (ret && log_pinned) {
- if (btrfs_inode_in_log(BTRFS_I(old_dir), fs_info->generation) ||
- btrfs_inode_in_log(BTRFS_I(new_dir), fs_info->generation) ||
- btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) ||
- (new_inode &&
- btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation)))
- btrfs_set_log_full_commit(trans);
-
- btrfs_end_log_trans(root);
- log_pinned = false;
- }
ret2 = btrfs_end_transaction(trans);
ret = ret ? ret : ret2;
out_notrans:
u64 old_dir_index, struct dentry *parent)
{
struct btrfs_inode *inode = BTRFS_I(d_inode(old_dentry));
+ struct btrfs_root *root = inode->root;
struct btrfs_log_ctx ctx;
+ bool log_pinned = false;
+ int ret = 0;
/*
* this will force the logging code to walk the dentry chain
if (old_dir && old_dir->logged_trans == trans->transid) {
struct btrfs_root *log = old_dir->root->log_root;
struct btrfs_path *path;
- int ret;
ASSERT(old_dir_index >= BTRFS_DIR_START_INDEX);
+ /*
+ * We have two inodes to update in the log, the old directory and
+ * the inode that got renamed, so we must pin the log to prevent
+ * anyone from syncing the log until we have updated both inodes
+ * in the log.
+ */
+ log_pinned = true;
+ btrfs_pin_log_trans(root);
+
path = btrfs_alloc_path();
if (!path) {
- btrfs_set_log_full_commit(trans);
- return;
+ ret = -ENOMEM;
+ goto out;
}
/*
mutex_unlock(&old_dir->log_mutex);
btrfs_free_path(path);
- if (ret < 0) {
- btrfs_set_log_full_commit(trans);
- return;
- }
+ if (ret < 0)
+ goto out;
}
btrfs_init_log_ctx(&ctx, &inode->vfs_inode);
* inconsistent state after a rename operation.
*/
btrfs_log_inode_parent(trans, inode, parent, LOG_INODE_EXISTS, &ctx);
+out:
+ if (log_pinned) {
+ /*
+ * If an error happened mark the log for a full commit because
+ * it's not consistent and up to date. Do it before unpinning the
+ * log, to avoid any races with someone else trying to commit it.
+ */
+ if (ret < 0)
+ btrfs_set_log_full_commit(trans);
+ btrfs_end_log_trans(root);
+ }
}