When a block group becomes unused and the cleaner kthread is currently
running, we can end up getting the current transaction aborted with error
-ENOENT when we try to commit the transaction, leading to the following
trace:
  [59779.258768] WARNING: CPU: 3 PID: 5990 at fs/btrfs/extent-tree.c:3740 btrfs_write_dirty_block_groups+0x17c/0x214 [btrfs]()
  [59779.272594] BTRFS: Transaction aborted (error -2)
  (...)
  [59779.291137] Call Trace:
  [59779.291621]  [<
ffffffff812566f4>] dump_stack+0x4e/0x79
  [59779.292543]  [<
ffffffff8104d0a6>] warn_slowpath_common+0x9f/0xb8
  [59779.293435]  [<
ffffffffa04cb81f>] ? btrfs_write_dirty_block_groups+0x17c/0x214 [btrfs]
  [59779.295000]  [<
ffffffff8104d107>] warn_slowpath_fmt+0x48/0x50
  [59779.296138]  [<
ffffffffa04c2721>] ? write_one_cache_group.isra.32+0x77/0x82 [btrfs]
  [59779.297663]  [<
ffffffffa04cb81f>] btrfs_write_dirty_block_groups+0x17c/0x214 [btrfs]
  [59779.299141]  [<
ffffffffa0549b0d>] commit_cowonly_roots+0x1de/0x261 [btrfs]
  [59779.300359]  [<
ffffffffa04dd5b6>] btrfs_commit_transaction+0x4c4/0x99c [btrfs]
  [59779.301805]  [<
ffffffffa04b5df4>] btrfs_sync_fs+0x145/0x1ad [btrfs]
  [59779.302893]  [<
ffffffff81196634>] sync_filesystem+0x7f/0x93
  (...)
  [59779.318186] ---[ end trace 
577e2daff90da33a ]---
The following diagram illustrates a sequence of steps leading to this
problem:
       CPU 1                                             CPU 2
                           <at transaction N>
                                                        adds bg A to list
                                                        fs_info->unused_bgs
                                                        adds bg B to list
                                                        fs_info->unused_bgs
                           <transaction kthread
                            commits transaction N
                            and wakes up the
                            cleaner kthread>
  cleaner kthread
    delete_unused_bgs()
      sees bg A in list
      fs_info->unused_bgs
      btrfs_start_transaction()
                           <transaction N + 1 starts>
      deletes bg A
                                                        update_block_group(bg C)
                                                          --> adds bg C to list
                                                              fs_info->unused_bgs
      deletes bg B
      sees bg C in the list
      fs_info->unused_bgs
      btrfs_remove_chunk(bg C)
        btrfs_remove_block_group(bg C)
          --> checks if the block group
              is in a dirty list, and
              because it isn't now, it
              does nothing
          --> the block group item
              is deleted from the
              extent tree
                                                          --> adds bg C to list
                                                              transaction->dirty_bgs
                                                         some task calls
                                                         btrfs_commit_transaction(t N + 1)
                                                           commit_cowonly_roots()
                                                             btrfs_write_dirty_block_groups()
                                                               --> sees bg C in cur_trans->dirty_bgs
                                                               --> calls write_one_cache_group()
                                                                   which returns -ENOENT because
                                                                   it did not find the block group
                                                                   item in the extent tree
                                                               --> transaction aborte with -ENOENT
                                                                   because write_one_cache_group()
                                                                   returned that error
So fix this by adding a block group to the list of dirty block groups
before adding it to the list of unused block groups.
This happened on a stress test using fsstress plus concurrent calls to
fallocate 20G and truncate (releasing part of the space allocated with
fallocate).
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>
                        set_extent_dirty(info->pinned_extents,
                                         bytenr, bytenr + num_bytes - 1,
                                         GFP_NOFS | __GFP_NOFAIL);
-                       /*
-                        * No longer have used bytes in this block group, queue
-                        * it for deletion.
-                        */
-                       if (old_val == 0) {
-                               spin_lock(&info->unused_bgs_lock);
-                               if (list_empty(&cache->bg_list)) {
-                                       btrfs_get_block_group(cache);
-                                       list_add_tail(&cache->bg_list,
-                                                     &info->unused_bgs);
-                               }
-                               spin_unlock(&info->unused_bgs_lock);
-                       }
                }
 
                spin_lock(&trans->transaction->dirty_bgs_lock);
                }
                spin_unlock(&trans->transaction->dirty_bgs_lock);
 
+               /*
+                * No longer have used bytes in this block group, queue it for
+                * deletion. We do this after adding the block group to the
+                * dirty list to avoid races between cleaner kthread and space
+                * cache writeout.
+                */
+               if (!alloc && old_val == 0) {
+                       spin_lock(&info->unused_bgs_lock);
+                       if (list_empty(&cache->bg_list)) {
+                               btrfs_get_block_group(cache);
+                               list_add_tail(&cache->bg_list,
+                                             &info->unused_bgs);
+                       }
+                       spin_unlock(&info->unused_bgs_lock);
+               }
+
                btrfs_put_block_group(cache);
                total -= num_bytes;
                bytenr += num_bytes;