btrfs: fix lockdep splat when enabling and disabling qgroups
[linux-2.6-microblaze.git] / fs / btrfs / qgroup.c
index c0f350c..87bd37b 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/slab.h>
 #include <linux/workqueue.h>
 #include <linux/btrfs.h>
+#include <linux/sched/mm.h>
 
 #include "ctree.h"
 #include "transaction.h"
@@ -497,13 +498,13 @@ next2:
                        break;
        }
 out:
+       btrfs_free_path(path);
        fs_info->qgroup_flags |= flags;
        if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
                clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
        else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN &&
                 ret >= 0)
                ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
-       btrfs_free_path(path);
 
        if (ret < 0) {
                ulist_free(fs_info->qgroup_ulist);
@@ -936,6 +937,7 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
        struct btrfs_key found_key;
        struct btrfs_qgroup *qgroup = NULL;
        struct btrfs_trans_handle *trans = NULL;
+       struct ulist *ulist = NULL;
        int ret = 0;
        int slot;
 
@@ -943,8 +945,8 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
        if (fs_info->quota_root)
                goto out;
 
-       fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
-       if (!fs_info->qgroup_ulist) {
+       ulist = ulist_alloc(GFP_KERNEL);
+       if (!ulist) {
                ret = -ENOMEM;
                goto out;
        }
@@ -952,6 +954,22 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
        ret = btrfs_sysfs_add_qgroups(fs_info);
        if (ret < 0)
                goto out;
+
+       /*
+        * Unlock qgroup_ioctl_lock before starting the transaction. This is to
+        * avoid lock acquisition inversion problems (reported by lockdep) between
+        * qgroup_ioctl_lock and the vfs freeze semaphores, acquired when we
+        * start a transaction.
+        * After we started the transaction lock qgroup_ioctl_lock again and
+        * check if someone else created the quota root in the meanwhile. If so,
+        * just return success and release the transaction handle.
+        *
+        * Also we don't need to worry about someone else calling
+        * btrfs_sysfs_add_qgroups() after we unlock and getting an error because
+        * that function returns 0 (success) when the sysfs entries already exist.
+        */
+       mutex_unlock(&fs_info->qgroup_ioctl_lock);
+
        /*
         * 1 for quota root item
         * 1 for BTRFS_QGROUP_STATUS item
@@ -961,12 +979,20 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
         * would be a lot of overkill.
         */
        trans = btrfs_start_transaction(tree_root, 2);
+
+       mutex_lock(&fs_info->qgroup_ioctl_lock);
        if (IS_ERR(trans)) {
                ret = PTR_ERR(trans);
                trans = NULL;
                goto out;
        }
 
+       if (fs_info->quota_root)
+               goto out;
+
+       fs_info->qgroup_ulist = ulist;
+       ulist = NULL;
+
        /*
         * initially create the quota tree
         */
@@ -1026,6 +1052,10 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
                btrfs_item_key_to_cpu(leaf, &found_key, slot);
 
                if (found_key.type == BTRFS_ROOT_REF_KEY) {
+
+                       /* Release locks on tree_root before we access quota_root */
+                       btrfs_release_path(path);
+
                        ret = add_qgroup_item(trans, quota_root,
                                              found_key.offset);
                        if (ret) {
@@ -1044,6 +1074,20 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
                                btrfs_abort_transaction(trans, ret);
                                goto out_free_path;
                        }
+                       ret = btrfs_search_slot_for_read(tree_root, &found_key,
+                                                        path, 1, 0);
+                       if (ret < 0) {
+                               btrfs_abort_transaction(trans, ret);
+                               goto out_free_path;
+                       }
+                       if (ret > 0) {
+                               /*
+                                * Shouldn't happen, but in case it does we
+                                * don't need to do the btrfs_next_item, just
+                                * continue.
+                                */
+                               continue;
+                       }
                }
                ret = btrfs_next_item(tree_root, path);
                if (ret < 0) {
@@ -1106,11 +1150,14 @@ out:
        if (ret) {
                ulist_free(fs_info->qgroup_ulist);
                fs_info->qgroup_ulist = NULL;
-               if (trans)
-                       btrfs_end_transaction(trans);
                btrfs_sysfs_del_qgroups(fs_info);
        }
        mutex_unlock(&fs_info->qgroup_ioctl_lock);
+       if (ret && trans)
+               btrfs_end_transaction(trans);
+       else if (trans)
+               ret = btrfs_end_transaction(trans);
+       ulist_free(ulist);
        return ret;
 }
 
@@ -1123,19 +1170,29 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
        mutex_lock(&fs_info->qgroup_ioctl_lock);
        if (!fs_info->quota_root)
                goto out;
+       mutex_unlock(&fs_info->qgroup_ioctl_lock);
 
        /*
         * 1 For the root item
         *
         * We should also reserve enough items for the quota tree deletion in
         * btrfs_clean_quota_tree but this is not done.
+        *
+        * Also, we must always start a transaction without holding the mutex
+        * qgroup_ioctl_lock, see btrfs_quota_enable().
         */
        trans = btrfs_start_transaction(fs_info->tree_root, 1);
+
+       mutex_lock(&fs_info->qgroup_ioctl_lock);
        if (IS_ERR(trans)) {
                ret = PTR_ERR(trans);
+               trans = NULL;
                goto out;
        }
 
+       if (!fs_info->quota_root)
+               goto out;
+
        clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
        btrfs_qgroup_wait_for_completion(fs_info, false);
        spin_lock(&fs_info->qgroup_lock);
@@ -1149,13 +1206,13 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
        ret = btrfs_clean_quota_tree(trans, quota_root);
        if (ret) {
                btrfs_abort_transaction(trans, ret);
-               goto end_trans;
+               goto out;
        }
 
        ret = btrfs_del_root(trans, &quota_root->root_key);
        if (ret) {
                btrfs_abort_transaction(trans, ret);
-               goto end_trans;
+               goto out;
        }
 
        list_del(&quota_root->dirty_list);
@@ -1167,10 +1224,13 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
 
        btrfs_put_root(quota_root);
 
-end_trans:
-       ret = btrfs_end_transaction(trans);
 out:
        mutex_unlock(&fs_info->qgroup_ioctl_lock);
+       if (ret && trans)
+               btrfs_end_transaction(trans);
+       else if (trans)
+               ret = btrfs_end_transaction(trans);
+
        return ret;
 }
 
@@ -1306,13 +1366,17 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
        struct btrfs_qgroup *member;
        struct btrfs_qgroup_list *list;
        struct ulist *tmp;
+       unsigned int nofs_flag;
        int ret = 0;
 
        /* Check the level of src and dst first */
        if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst))
                return -EINVAL;
 
+       /* We hold a transaction handle open, must do a NOFS allocation. */
+       nofs_flag = memalloc_nofs_save();
        tmp = ulist_alloc(GFP_KERNEL);
+       memalloc_nofs_restore(nofs_flag);
        if (!tmp)
                return -ENOMEM;
 
@@ -1369,10 +1433,14 @@ static int __del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
        struct btrfs_qgroup_list *list;
        struct ulist *tmp;
        bool found = false;
+       unsigned int nofs_flag;
        int ret = 0;
        int ret2;
 
+       /* We hold a transaction handle open, must do a NOFS allocation. */
+       nofs_flag = memalloc_nofs_save();
        tmp = ulist_alloc(GFP_KERNEL);
+       memalloc_nofs_restore(nofs_flag);
        if (!tmp)
                return -ENOMEM;
 
@@ -2315,7 +2383,7 @@ static int qgroup_update_refcnt(struct btrfs_fs_info *fs_info,
  * Update qgroup rfer/excl counters.
  * Rfer update is easy, codes can explain themselves.
  *
- * Excl update is tricky, the update is split into 2 part.
+ * Excl update is tricky, the update is split into 2 parts.
  * Part 1: Possible exclusive <-> sharing detect:
  *     |       A       |       !A      |
  *  -------------------------------------
@@ -3417,24 +3485,20 @@ static int qgroup_unreserve_range(struct btrfs_inode *inode,
 {
        struct rb_node *node;
        struct rb_node *next;
-       struct ulist_node *entry = NULL;
+       struct ulist_node *entry;
        int ret = 0;
 
        node = reserved->range_changed.root.rb_node;
+       if (!node)
+               return 0;
        while (node) {
                entry = rb_entry(node, struct ulist_node, rb_node);
                if (entry->val < start)
                        node = node->rb_right;
-               else if (entry)
-                       node = node->rb_left;
                else
-                       break;
+                       node = node->rb_left;
        }
 
-       /* Empty changeset */
-       if (!entry)
-               return 0;
-
        if (entry->val > start && rb_prev(&entry->rb_node))
                entry = rb_entry(rb_prev(&entry->rb_node), struct ulist_node,
                                 rb_node);
@@ -3498,6 +3562,7 @@ static int try_flush_qgroup(struct btrfs_root *root)
 {
        struct btrfs_trans_handle *trans;
        int ret;
+       bool can_commit = true;
 
        /*
         * We don't want to run flush again and again, so if there is a running
@@ -3509,6 +3574,20 @@ static int try_flush_qgroup(struct btrfs_root *root)
                return 0;
        }
 
+       /*
+        * If current process holds a transaction, we shouldn't flush, as we
+        * assume all space reservation happens before a transaction handle is
+        * held.
+        *
+        * But there are cases like btrfs_delayed_item_reserve_metadata() where
+        * we try to reserve space with one transction handle already held.
+        * In that case we can't commit transaction, but at least try to end it
+        * and hope the started data writes can free some space.
+        */
+       if (current->journal_info &&
+           current->journal_info != BTRFS_SEND_TRANS_STUB)
+               can_commit = false;
+
        ret = btrfs_start_delalloc_snapshot(root);
        if (ret < 0)
                goto out;
@@ -3520,7 +3599,10 @@ static int try_flush_qgroup(struct btrfs_root *root)
                goto out;
        }
 
-       ret = btrfs_commit_transaction(trans);
+       if (can_commit)
+               ret = btrfs_commit_transaction(trans);
+       else
+               ret = btrfs_end_transaction(trans);
 out:
        clear_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state);
        wake_up(&root->qgroup_flush_wait);