Merge tag 'for-5.2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 7 May 2019 18:34:19 +0000 (11:34 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 7 May 2019 18:34:19 +0000 (11:34 -0700)
Pull btrfs updates from David Sterba:
 "This time the majority of changes are cleanups, though there's still a
  number of changes of user interest.

  User visible changes:

   - better read time and write checks to catch errors early and before
     writing data to disk (to catch potential memory corruption on data
     that get checksummed)

   - qgroups + metadata relocation: last speed up patch int the series
     to address the slowness, there should be no overhead comparing
     balance with and without qgroups

   - FIEMAP ioctl does not start a transaction unnecessarily, this can
     result in a speed up and less blocking due to IO

   - LOGICAL_INO (v1, v2) does not start transaction unnecessarily, this
     can speed up the mentioned ioctl and scrub as well

   - fsync on files with many (but not too many) hardlinks is faster,
     finer decision if the links should be fsynced individually or
     completely

   - send tries harder to find ranges to clone

   - trim/discard will skip unallocated chunks that haven't been touched
     since the last mount

  Fixes:

   - send flushes delayed allocation before start, otherwise it could
     miss some changes in case of a very recent rw->ro switch of a
     subvolume

   - fix fallocate with qgroups that could lead to space accounting
     underflow, reported as a warning

   - trim/discard ioctl honours the requested range

   - starting send and dedupe on a subvolume at the same time will let
     only one of them succeed, this is to prevent changes that send
     could miss due to dedupe; both operations are restartable

  Core changes:

   - more tree-checker validations, errors reported by fuzzing tools:
      - device item
      - inode item
      - block group profiles

   - tracepoints for extent buffer locking

   - async cow preallocates memory to avoid errors happening too deep in
     the call chain

   - metadata reservations for delalloc reworked to better adapt in
     many-writers/low-space scenarios

   - improved space flushing logic for intense DIO vs buffered workloads

   - lots of cleanups
      - removed unused struct members
      - redundant argument removal
      - properties and xattrs
      - extent buffer locking
      - selftests
      - use common file type conversions
      - many-argument functions reduction"

* tag 'for-5.2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (227 commits)
  btrfs: Use kvmalloc for allocating compressed path context
  btrfs: Factor out common extent locking code in submit_compressed_extents
  btrfs: Set io_tree only once in submit_compressed_extents
  btrfs: Replace clear_extent_bit with unlock_extent
  btrfs: Make compress_file_range take only struct async_chunk
  btrfs: Remove fs_info from struct async_chunk
  btrfs: Rename async_cow to async_chunk
  btrfs: Preallocate chunks in cow_file_range_async
  btrfs: reserve delalloc metadata differently
  btrfs: track DIO bytes in flight
  btrfs: merge calls of btrfs_setxattr and btrfs_setxattr_trans in btrfs_set_prop
  btrfs: delete unused function btrfs_set_prop_trans
  btrfs: start transaction in xattr_handler_set_prop
  btrfs: drop local copy of inode i_mode
  btrfs: drop old_fsflags in btrfs_ioctl_setflags
  btrfs: modify local copy of btrfs_inode flags
  btrfs: drop useless inode i_flags copy and restore
  btrfs: start transaction in btrfs_ioctl_setflags()
  btrfs: export btrfs_set_prop
  btrfs: refactor btrfs_set_props to validate externally
  ...

64 files changed:
fs/btrfs/acl.c
fs/btrfs/backref.c
fs/btrfs/btrfs_inode.h
fs/btrfs/compression.c
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/delayed-inode.c
fs/btrfs/delayed-ref.c
fs/btrfs/delayed-ref.h
fs/btrfs/dev-replace.c
fs/btrfs/dev-replace.h
fs/btrfs/dir-item.c
fs/btrfs/disk-io.c
fs/btrfs/disk-io.h
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h
fs/btrfs/extent_map.c
fs/btrfs/file-item.c
fs/btrfs/file.c
fs/btrfs/free-space-cache.c
fs/btrfs/free-space-cache.h
fs/btrfs/free-space-tree.c
fs/btrfs/free-space-tree.h
fs/btrfs/inode-item.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/locking.c
fs/btrfs/ordered-data.c
fs/btrfs/ordered-data.h
fs/btrfs/print-tree.c
fs/btrfs/props.c
fs/btrfs/props.h
fs/btrfs/qgroup.c
fs/btrfs/ref-verify.c
fs/btrfs/ref-verify.h
fs/btrfs/relocation.c
fs/btrfs/root-tree.c
fs/btrfs/scrub.c
fs/btrfs/send.c
fs/btrfs/super.c
fs/btrfs/tests/btrfs-tests.c
fs/btrfs/tests/btrfs-tests.h
fs/btrfs/tests/extent-buffer-tests.c
fs/btrfs/tests/extent-io-tests.c
fs/btrfs/tests/extent-map-tests.c
fs/btrfs/tests/free-space-tests.c
fs/btrfs/tests/free-space-tree-tests.c
fs/btrfs/tests/inode-tests.c
fs/btrfs/tests/qgroup-tests.c
fs/btrfs/transaction.c
fs/btrfs/transaction.h
fs/btrfs/tree-checker.c
fs/btrfs/tree-checker.h
fs/btrfs/tree-log.c
fs/btrfs/tree-log.h
fs/btrfs/uuid-tree.c
fs/btrfs/volumes.c
fs/btrfs/volumes.h
fs/btrfs/xattr.c
fs/btrfs/xattr.h
fs/btrfs/zstd.c
include/trace/events/btrfs.h
include/uapi/linux/btrfs_tree.h

index 5810463..a0af1b9 100644 (file)
@@ -93,7 +93,11 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
                        goto out;
        }
 
-       ret = btrfs_setxattr(trans, inode, name, value, size, 0);
+       if (trans)
+               ret = btrfs_setxattr(trans, inode, name, value, size, 0);
+       else
+               ret = btrfs_setxattr_trans(inode, name, value, size, 0);
+
 out:
        kfree(value);
 
index 11459fe..982152d 100644 (file)
@@ -791,7 +791,7 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
                        count = node->ref_mod * -1;
                        break;
                default:
-                       BUG_ON(1);
+                       BUG();
                }
                *total_refs += count;
                switch (node->type) {
@@ -1460,8 +1460,8 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
  * callers (such as fiemap) which want to know whether the extent is
  * shared but do not need a ref count.
  *
- * This attempts to allocate a transaction in order to account for
- * delayed refs, but continues on even when the alloc fails.
+ * This attempts to attach to the running transaction in order to account for
+ * delayed refs, but continues on even when no running transaction exists.
  *
  * Return: 0 if extent is not shared, 1 if it is shared, < 0 on error.
  */
@@ -1484,13 +1484,16 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr)
        tmp = ulist_alloc(GFP_NOFS);
        roots = ulist_alloc(GFP_NOFS);
        if (!tmp || !roots) {
-               ulist_free(tmp);
-               ulist_free(roots);
-               return -ENOMEM;
+               ret = -ENOMEM;
+               goto out;
        }
 
-       trans = btrfs_join_transaction(root);
+       trans = btrfs_attach_transaction(root);
        if (IS_ERR(trans)) {
+               if (PTR_ERR(trans) != -ENOENT && PTR_ERR(trans) != -EROFS) {
+                       ret = PTR_ERR(trans);
+                       goto out;
+               }
                trans = NULL;
                down_read(&fs_info->commit_root_sem);
        } else {
@@ -1523,6 +1526,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr)
        } else {
                up_read(&fs_info->commit_root_sem);
        }
+out:
        ulist_free(tmp);
        ulist_free(roots);
        return ret;
@@ -1747,7 +1751,7 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
                else if (flags & BTRFS_EXTENT_FLAG_DATA)
                        *flags_ret = BTRFS_EXTENT_FLAG_DATA;
                else
-                       BUG_ON(1);
+                       BUG();
                return 0;
        }
 
@@ -1912,13 +1916,19 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
                        extent_item_objectid);
 
        if (!search_commit_root) {
-               trans = btrfs_join_transaction(fs_info->extent_root);
-               if (IS_ERR(trans))
-                       return PTR_ERR(trans);
+               trans = btrfs_attach_transaction(fs_info->extent_root);
+               if (IS_ERR(trans)) {
+                       if (PTR_ERR(trans) != -ENOENT &&
+                           PTR_ERR(trans) != -EROFS)
+                               return PTR_ERR(trans);
+                       trans = NULL;
+               }
+       }
+
+       if (trans)
                btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
-       } else {
+       else
                down_read(&fs_info->commit_root_sem);
-       }
 
        ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
                                   tree_mod_seq_elem.seq, &refs,
@@ -1951,7 +1961,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
 
        free_leaf_list(refs);
 out:
-       if (!search_commit_root) {
+       if (trans) {
                btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
                btrfs_end_transaction(trans);
        } else {
index 6f5d074..d5b4387 100644 (file)
@@ -147,12 +147,6 @@ struct btrfs_inode {
         */
        u64 last_unlink_trans;
 
-       /*
-        * Track the transaction id of the last transaction used to create a
-        * hard link for the inode. This is used by the log tree (fsync).
-        */
-       u64 last_link_trans;
-
        /*
         * Number of bytes outstanding that are going to need csums.  This is
         * used in ENOSPC accounting.
@@ -203,8 +197,6 @@ struct btrfs_inode {
        struct inode vfs_inode;
 };
 
-extern unsigned char btrfs_filetype_table[];
-
 static inline struct btrfs_inode *BTRFS_I(const struct inode *inode)
 {
        return container_of(inode, struct btrfs_inode, vfs_inode);
index 4f2a8ae..1463e14 100644 (file)
@@ -251,7 +251,7 @@ static void end_compressed_bio_write(struct bio *bio)
        cb->compressed_pages[0]->mapping = cb->inode->i_mapping;
        btrfs_writepage_endio_finish_ordered(cb->compressed_pages[0],
                        cb->start, cb->start + cb->len - 1,
-                       bio->bi_status ? BLK_STS_OK : BLK_STS_NOTSUPP);
+                       bio->bi_status == BLK_STS_OK);
        cb->compressed_pages[0]->mapping = NULL;
 
        end_compressed_writeback(inode, cb);
index 324df36..5df76c1 100644 (file)
@@ -21,11 +21,9 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                      const struct btrfs_key *ins_key, struct btrfs_path *path,
                      int data_size, int extend);
 static int push_node_left(struct btrfs_trans_handle *trans,
-                         struct btrfs_fs_info *fs_info,
                          struct extent_buffer *dst,
                          struct extent_buffer *src, int empty);
 static int balance_node_right(struct btrfs_trans_handle *trans,
-                             struct btrfs_fs_info *fs_info,
                              struct extent_buffer *dst_buf,
                              struct extent_buffer *src_buf);
 static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
@@ -726,11 +724,11 @@ tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
        return __tree_mod_log_search(fs_info, start, min_seq, 0);
 }
 
-static noinline int
-tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
+static noinline int tree_mod_log_eb_copy(struct extent_buffer *dst,
                     struct extent_buffer *src, unsigned long dst_offset,
                     unsigned long src_offset, int nr_items)
 {
+       struct btrfs_fs_info *fs_info = dst->fs_info;
        int ret = 0;
        struct tree_mod_elem **tm_list = NULL;
        struct tree_mod_elem **tm_list_add, **tm_list_rem;
@@ -950,7 +948,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
                if (new_flags != 0) {
                        int level = btrfs_header_level(buf);
 
-                       ret = btrfs_set_disk_extent_flags(trans, fs_info,
+                       ret = btrfs_set_disk_extent_flags(trans,
                                                          buf->start,
                                                          buf->len,
                                                          new_flags, level, 0);
@@ -970,7 +968,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
                        if (ret)
                                return ret;
                }
-               clean_tree_block(fs_info, buf);
+               btrfs_clean_tree_block(buf);
                *last_ref = 1;
        }
        return 0;
@@ -1792,9 +1790,8 @@ static void root_sub_used(struct btrfs_root *root, u32 size)
 /* given a node and slot number, this reads the blocks it points to.  The
  * extent buffer is returned with a reference taken (but unlocked).
  */
-static noinline struct extent_buffer *
-read_node_slot(struct btrfs_fs_info *fs_info, struct extent_buffer *parent,
-              int slot)
+static noinline struct extent_buffer *read_node_slot(
+                               struct extent_buffer *parent, int slot)
 {
        int level = btrfs_header_level(parent);
        struct extent_buffer *eb;
@@ -1806,7 +1803,7 @@ read_node_slot(struct btrfs_fs_info *fs_info, struct extent_buffer *parent,
        BUG_ON(level == 0);
 
        btrfs_node_key_to_cpu(parent, &first_key, slot);
-       eb = read_tree_block(fs_info, btrfs_node_blockptr(parent, slot),
+       eb = read_tree_block(parent->fs_info, btrfs_node_blockptr(parent, slot),
                             btrfs_node_ptr_generation(parent, slot),
                             level - 1, &first_key);
        if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) {
@@ -1863,7 +1860,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                        return 0;
 
                /* promote the child to a root */
-               child = read_node_slot(fs_info, mid, 0);
+               child = read_node_slot(mid, 0);
                if (IS_ERR(child)) {
                        ret = PTR_ERR(child);
                        btrfs_handle_fs_error(fs_info, ret, NULL);
@@ -1888,7 +1885,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 
                path->locks[level] = 0;
                path->nodes[level] = NULL;
-               clean_tree_block(fs_info, mid);
+               btrfs_clean_tree_block(mid);
                btrfs_tree_unlock(mid);
                /* once for the path */
                free_extent_buffer(mid);
@@ -1903,7 +1900,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
            BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 4)
                return 0;
 
-       left = read_node_slot(fs_info, parent, pslot - 1);
+       left = read_node_slot(parent, pslot - 1);
        if (IS_ERR(left))
                left = NULL;
 
@@ -1918,7 +1915,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                }
        }
 
-       right = read_node_slot(fs_info, parent, pslot + 1);
+       right = read_node_slot(parent, pslot + 1);
        if (IS_ERR(right))
                right = NULL;
 
@@ -1936,7 +1933,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
        /* first, try to make some room in the middle buffer */
        if (left) {
                orig_slot += btrfs_header_nritems(left);
-               wret = push_node_left(trans, fs_info, left, mid, 1);
+               wret = push_node_left(trans, left, mid, 1);
                if (wret < 0)
                        ret = wret;
        }
@@ -1945,11 +1942,11 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
         * then try to empty the right most buffer into the middle
         */
        if (right) {
-               wret = push_node_left(trans, fs_info, mid, right, 1);
+               wret = push_node_left(trans, mid, right, 1);
                if (wret < 0 && wret != -ENOSPC)
                        ret = wret;
                if (btrfs_header_nritems(right) == 0) {
-                       clean_tree_block(fs_info, right);
+                       btrfs_clean_tree_block(right);
                        btrfs_tree_unlock(right);
                        del_ptr(root, path, level + 1, pslot + 1);
                        root_sub_used(root, right->len);
@@ -1981,20 +1978,20 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                        btrfs_handle_fs_error(fs_info, ret, NULL);
                        goto enospc;
                }
-               wret = balance_node_right(trans, fs_info, mid, left);
+               wret = balance_node_right(trans, mid, left);
                if (wret < 0) {
                        ret = wret;
                        goto enospc;
                }
                if (wret == 1) {
-                       wret = push_node_left(trans, fs_info, left, mid, 1);
+                       wret = push_node_left(trans, left, mid, 1);
                        if (wret < 0)
                                ret = wret;
                }
                BUG_ON(wret == 1);
        }
        if (btrfs_header_nritems(mid) == 0) {
-               clean_tree_block(fs_info, mid);
+               btrfs_clean_tree_block(mid);
                btrfs_tree_unlock(mid);
                del_ptr(root, path, level + 1, pslot);
                root_sub_used(root, mid->len);
@@ -2078,7 +2075,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
        if (!parent)
                return 1;
 
-       left = read_node_slot(fs_info, parent, pslot - 1);
+       left = read_node_slot(parent, pslot - 1);
        if (IS_ERR(left))
                left = NULL;
 
@@ -2098,8 +2095,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
                        if (ret)
                                wret = 1;
                        else {
-                               wret = push_node_left(trans, fs_info,
-                                                     left, mid, 0);
+                               wret = push_node_left(trans, left, mid, 0);
                        }
                }
                if (wret < 0)
@@ -2131,7 +2127,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
                btrfs_tree_unlock(left);
                free_extent_buffer(left);
        }
-       right = read_node_slot(fs_info, parent, pslot + 1);
+       right = read_node_slot(parent, pslot + 1);
        if (IS_ERR(right))
                right = NULL;
 
@@ -2154,8 +2150,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
                        if (ret)
                                wret = 1;
                        else {
-                               wret = balance_node_right(trans, fs_info,
-                                                         right, mid);
+                               wret = balance_node_right(trans, right, mid);
                        }
                }
                if (wret < 0)
@@ -2416,6 +2411,16 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
        if (tmp) {
                /* first we do an atomic uptodate check */
                if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
+                       /*
+                        * Do extra check for first_key, eb can be stale due to
+                        * being cached, read from scrub, or have multiple
+                        * parents (shared tree blocks).
+                        */
+                       if (btrfs_verify_level_key(tmp,
+                                       parent_level - 1, &first_key, gen)) {
+                               free_extent_buffer(tmp);
+                               return -EUCLEAN;
+                       }
                        *eb_ret = tmp;
                        return 0;
                }
@@ -2706,7 +2711,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                      const struct btrfs_key *key, struct btrfs_path *p,
                      int ins_len, int cow)
 {
-       struct btrfs_fs_info *fs_info = root->fs_info;
        struct extent_buffer *b;
        int slot;
        int ret;
@@ -2904,7 +2908,7 @@ cow_done:
                } else {
                        p->slots[level] = slot;
                        if (ins_len > 0 &&
-                           btrfs_leaf_free_space(fs_info, b) < ins_len) {
+                           btrfs_leaf_free_space(b) < ins_len) {
                                if (write_lock_level < 1) {
                                        write_lock_level = 1;
                                        btrfs_release_path(p);
@@ -3181,11 +3185,31 @@ void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
        slot = path->slots[0];
        if (slot > 0) {
                btrfs_item_key(eb, &disk_key, slot - 1);
-               BUG_ON(comp_keys(&disk_key, new_key) >= 0);
+               if (unlikely(comp_keys(&disk_key, new_key) >= 0)) {
+                       btrfs_crit(fs_info,
+               "slot %u key (%llu %u %llu) new key (%llu %u %llu)",
+                                  slot, btrfs_disk_key_objectid(&disk_key),
+                                  btrfs_disk_key_type(&disk_key),
+                                  btrfs_disk_key_offset(&disk_key),
+                                  new_key->objectid, new_key->type,
+                                  new_key->offset);
+                       btrfs_print_leaf(eb);
+                       BUG();
+               }
        }
        if (slot < btrfs_header_nritems(eb) - 1) {
                btrfs_item_key(eb, &disk_key, slot + 1);
-               BUG_ON(comp_keys(&disk_key, new_key) <= 0);
+               if (unlikely(comp_keys(&disk_key, new_key) <= 0)) {
+                       btrfs_crit(fs_info,
+               "slot %u key (%llu %u %llu) new key (%llu %u %llu)",
+                                  slot, btrfs_disk_key_objectid(&disk_key),
+                                  btrfs_disk_key_type(&disk_key),
+                                  btrfs_disk_key_offset(&disk_key),
+                                  new_key->objectid, new_key->type,
+                                  new_key->offset);
+                       btrfs_print_leaf(eb);
+                       BUG();
+               }
        }
 
        btrfs_cpu_key_to_disk(&disk_key, new_key);
@@ -3203,10 +3227,10 @@ void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
  * error, and > 0 if there was no room in the left hand block.
  */
 static int push_node_left(struct btrfs_trans_handle *trans,
-                         struct btrfs_fs_info *fs_info,
                          struct extent_buffer *dst,
                          struct extent_buffer *src, int empty)
 {
+       struct btrfs_fs_info *fs_info = trans->fs_info;
        int push_items = 0;
        int src_nritems;
        int dst_nritems;
@@ -3239,8 +3263,7 @@ static int push_node_left(struct btrfs_trans_handle *trans,
        } else
                push_items = min(src_nritems - 8, push_items);
 
-       ret = tree_mod_log_eb_copy(fs_info, dst, src, dst_nritems, 0,
-                                  push_items);
+       ret = tree_mod_log_eb_copy(dst, src, dst_nritems, 0, push_items);
        if (ret) {
                btrfs_abort_transaction(trans, ret);
                return ret;
@@ -3278,10 +3301,10 @@ static int push_node_left(struct btrfs_trans_handle *trans,
  * this will  only push up to 1/2 the contents of the left node over
  */
 static int balance_node_right(struct btrfs_trans_handle *trans,
-                             struct btrfs_fs_info *fs_info,
                              struct extent_buffer *dst,
                              struct extent_buffer *src)
 {
+       struct btrfs_fs_info *fs_info = trans->fs_info;
        int push_items = 0;
        int max_push;
        int src_nritems;
@@ -3315,8 +3338,8 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
                                      (dst_nritems) *
                                      sizeof(struct btrfs_key_ptr));
 
-       ret = tree_mod_log_eb_copy(fs_info, dst, src, 0,
-                                  src_nritems - push_items, push_items);
+       ret = tree_mod_log_eb_copy(dst, src, 0, src_nritems - push_items,
+                                  push_items);
        if (ret) {
                btrfs_abort_transaction(trans, ret);
                return ret;
@@ -3404,7 +3427,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
  * blocknr is the block the key points to.
  */
 static void insert_ptr(struct btrfs_trans_handle *trans,
-                      struct btrfs_fs_info *fs_info, struct btrfs_path *path,
+                      struct btrfs_path *path,
                       struct btrfs_disk_key *key, u64 bytenr,
                       int slot, int level)
 {
@@ -3417,7 +3440,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
        lower = path->nodes[level];
        nritems = btrfs_header_nritems(lower);
        BUG_ON(slot > nritems);
-       BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(fs_info));
+       BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(trans->fs_info));
        if (slot != nritems) {
                if (level) {
                        ret = tree_mod_log_insert_move(lower, slot + 1, slot,
@@ -3501,7 +3524,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
        root_add_used(root, fs_info->nodesize);
        ASSERT(btrfs_header_level(c) == level);
 
-       ret = tree_mod_log_eb_copy(fs_info, split, c, 0, mid, c_nritems - mid);
+       ret = tree_mod_log_eb_copy(split, c, 0, mid, c_nritems - mid);
        if (ret) {
                btrfs_abort_transaction(trans, ret);
                return ret;
@@ -3517,7 +3540,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(c);
        btrfs_mark_buffer_dirty(split);
 
-       insert_ptr(trans, fs_info, path, &disk_key, split->start,
+       insert_ptr(trans, path, &disk_key, split->start,
                   path->slots[level + 1] + 1, level + 1);
 
        if (path->slots[level] >= mid) {
@@ -3565,9 +3588,9 @@ static int leaf_space_used(struct extent_buffer *l, int start, int nr)
  * the start of the leaf data.  IOW, how much room
  * the leaf has left for both items and data
  */
-noinline int btrfs_leaf_free_space(struct btrfs_fs_info *fs_info,
-                                  struct extent_buffer *leaf)
+noinline int btrfs_leaf_free_space(struct extent_buffer *leaf)
 {
+       struct btrfs_fs_info *fs_info = leaf->fs_info;
        int nritems = btrfs_header_nritems(leaf);
        int ret;
 
@@ -3586,13 +3609,13 @@ noinline int btrfs_leaf_free_space(struct btrfs_fs_info *fs_info,
  * min slot controls the lowest index we're willing to push to the
  * right.  We'll push up to and including min_slot, but no lower
  */
-static noinline int __push_leaf_right(struct btrfs_fs_info *fs_info,
-                                     struct btrfs_path *path,
+static noinline int __push_leaf_right(struct btrfs_path *path,
                                      int data_size, int empty,
                                      struct extent_buffer *right,
                                      int free_space, u32 left_nritems,
                                      u32 min_slot)
 {
+       struct btrfs_fs_info *fs_info = right->fs_info;
        struct extent_buffer *left = path->nodes[0];
        struct extent_buffer *upper = path->nodes[1];
        struct btrfs_map_token token;
@@ -3626,7 +3649,8 @@ static noinline int __push_leaf_right(struct btrfs_fs_info *fs_info,
                        if (path->slots[0] > i)
                                break;
                        if (path->slots[0] == i) {
-                               int space = btrfs_leaf_free_space(fs_info, left);
+                               int space = btrfs_leaf_free_space(left);
+
                                if (space + push_space * 2 > free_space)
                                        break;
                        }
@@ -3655,10 +3679,10 @@ static noinline int __push_leaf_right(struct btrfs_fs_info *fs_info,
        right_nritems = btrfs_header_nritems(right);
 
        push_space = btrfs_item_end_nr(left, left_nritems - push_items);
-       push_space -= leaf_data_end(fs_info, left);
+       push_space -= leaf_data_end(left);
 
        /* make room in the right data area */
-       data_end = leaf_data_end(fs_info, right);
+       data_end = leaf_data_end(right);
        memmove_extent_buffer(right,
                              BTRFS_LEAF_DATA_OFFSET + data_end - push_space,
                              BTRFS_LEAF_DATA_OFFSET + data_end,
@@ -3667,7 +3691,7 @@ static noinline int __push_leaf_right(struct btrfs_fs_info *fs_info,
        /* copy from the left data area */
        copy_extent_buffer(right, left, BTRFS_LEAF_DATA_OFFSET +
                     BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
-                    BTRFS_LEAF_DATA_OFFSET + leaf_data_end(fs_info, left),
+                    BTRFS_LEAF_DATA_OFFSET + leaf_data_end(left),
                     push_space);
 
        memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
@@ -3695,7 +3719,7 @@ static noinline int __push_leaf_right(struct btrfs_fs_info *fs_info,
        if (left_nritems)
                btrfs_mark_buffer_dirty(left);
        else
-               clean_tree_block(fs_info, left);
+               btrfs_clean_tree_block(left);
 
        btrfs_mark_buffer_dirty(right);
 
@@ -3707,7 +3731,7 @@ static noinline int __push_leaf_right(struct btrfs_fs_info *fs_info,
        if (path->slots[0] >= left_nritems) {
                path->slots[0] -= left_nritems;
                if (btrfs_header_nritems(path->nodes[0]) == 0)
-                       clean_tree_block(fs_info, path->nodes[0]);
+                       btrfs_clean_tree_block(path->nodes[0]);
                btrfs_tree_unlock(path->nodes[0]);
                free_extent_buffer(path->nodes[0]);
                path->nodes[0] = right;
@@ -3739,7 +3763,6 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
                           int min_data_size, int data_size,
                           int empty, u32 min_slot)
 {
-       struct btrfs_fs_info *fs_info = root->fs_info;
        struct extent_buffer *left = path->nodes[0];
        struct extent_buffer *right;
        struct extent_buffer *upper;
@@ -3758,7 +3781,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
 
        btrfs_assert_tree_locked(path->nodes[1]);
 
-       right = read_node_slot(fs_info, upper, slot + 1);
+       right = read_node_slot(upper, slot + 1);
        /*
         * slot + 1 is not valid or we fail to read the right node,
         * no big deal, just return.
@@ -3769,7 +3792,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
        btrfs_tree_lock(right);
        btrfs_set_lock_blocking_write(right);
 
-       free_space = btrfs_leaf_free_space(fs_info, right);
+       free_space = btrfs_leaf_free_space(right);
        if (free_space < data_size)
                goto out_unlock;
 
@@ -3779,7 +3802,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
        if (ret)
                goto out_unlock;
 
-       free_space = btrfs_leaf_free_space(fs_info, right);
+       free_space = btrfs_leaf_free_space(right);
        if (free_space < data_size)
                goto out_unlock;
 
@@ -3800,7 +3823,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
                return 0;
        }
 
-       return __push_leaf_right(fs_info, path, min_data_size, empty,
+       return __push_leaf_right(path, min_data_size, empty,
                                right, free_space, left_nritems, min_slot);
 out_unlock:
        btrfs_tree_unlock(right);
@@ -3816,12 +3839,12 @@ out_unlock:
  * item at 'max_slot' won't be touched.  Use (u32)-1 to make us do all the
  * items
  */
-static noinline int __push_leaf_left(struct btrfs_fs_info *fs_info,
-                                    struct btrfs_path *path, int data_size,
+static noinline int __push_leaf_left(struct btrfs_path *path, int data_size,
                                     int empty, struct extent_buffer *left,
                                     int free_space, u32 right_nritems,
                                     u32 max_slot)
 {
+       struct btrfs_fs_info *fs_info = left->fs_info;
        struct btrfs_disk_key disk_key;
        struct extent_buffer *right = path->nodes[0];
        int i;
@@ -3849,7 +3872,8 @@ static noinline int __push_leaf_left(struct btrfs_fs_info *fs_info,
                        if (path->slots[0] < i)
                                break;
                        if (path->slots[0] == i) {
-                               int space = btrfs_leaf_free_space(fs_info, right);
+                               int space = btrfs_leaf_free_space(right);
+
                                if (space + push_space * 2 > free_space)
                                        break;
                        }
@@ -3882,7 +3906,7 @@ static noinline int __push_leaf_left(struct btrfs_fs_info *fs_info,
                     btrfs_item_offset_nr(right, push_items - 1);
 
        copy_extent_buffer(left, right, BTRFS_LEAF_DATA_OFFSET +
-                    leaf_data_end(fs_info, left) - push_space,
+                    leaf_data_end(left) - push_space,
                     BTRFS_LEAF_DATA_OFFSET +
                     btrfs_item_offset_nr(right, push_items - 1),
                     push_space);
@@ -3909,11 +3933,11 @@ static noinline int __push_leaf_left(struct btrfs_fs_info *fs_info,
 
        if (push_items < right_nritems) {
                push_space = btrfs_item_offset_nr(right, push_items - 1) -
-                                                 leaf_data_end(fs_info, right);
+                                                 leaf_data_end(right);
                memmove_extent_buffer(right, BTRFS_LEAF_DATA_OFFSET +
                                      BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
                                      BTRFS_LEAF_DATA_OFFSET +
-                                     leaf_data_end(fs_info, right), push_space);
+                                     leaf_data_end(right), push_space);
 
                memmove_extent_buffer(right, btrfs_item_nr_offset(0),
                              btrfs_item_nr_offset(push_items),
@@ -3935,7 +3959,7 @@ static noinline int __push_leaf_left(struct btrfs_fs_info *fs_info,
        if (right_nritems)
                btrfs_mark_buffer_dirty(right);
        else
-               clean_tree_block(fs_info, right);
+               btrfs_clean_tree_block(right);
 
        btrfs_item_key(right, &disk_key, 0);
        fixup_low_keys(path, &disk_key, 1);
@@ -3972,7 +3996,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
                          *root, struct btrfs_path *path, int min_data_size,
                          int data_size, int empty, u32 max_slot)
 {
-       struct btrfs_fs_info *fs_info = root->fs_info;
        struct extent_buffer *right = path->nodes[0];
        struct extent_buffer *left;
        int slot;
@@ -3992,7 +4015,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
 
        btrfs_assert_tree_locked(path->nodes[1]);
 
-       left = read_node_slot(fs_info, path->nodes[1], slot - 1);
+       left = read_node_slot(path->nodes[1], slot - 1);
        /*
         * slot - 1 is not valid or we fail to read the left node,
         * no big deal, just return.
@@ -4003,7 +4026,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
        btrfs_tree_lock(left);
        btrfs_set_lock_blocking_write(left);
 
-       free_space = btrfs_leaf_free_space(fs_info, left);
+       free_space = btrfs_leaf_free_space(left);
        if (free_space < data_size) {
                ret = 1;
                goto out;
@@ -4019,13 +4042,13 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
                goto out;
        }
 
-       free_space = btrfs_leaf_free_space(fs_info, left);
+       free_space = btrfs_leaf_free_space(left);
        if (free_space < data_size) {
                ret = 1;
                goto out;
        }
 
-       return __push_leaf_left(fs_info, path, min_data_size,
+       return __push_leaf_left(path, min_data_size,
                               empty, left, free_space, right_nritems,
                               max_slot);
 out:
@@ -4039,12 +4062,12 @@ out:
  * available for the resulting leaf level of the path.
  */
 static noinline void copy_for_split(struct btrfs_trans_handle *trans,
-                                   struct btrfs_fs_info *fs_info,
                                    struct btrfs_path *path,
                                    struct extent_buffer *l,
                                    struct extent_buffer *right,
                                    int slot, int mid, int nritems)
 {
+       struct btrfs_fs_info *fs_info = trans->fs_info;
        int data_copy_size;
        int rt_data_off;
        int i;
@@ -4055,7 +4078,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
 
        nritems = nritems - mid;
        btrfs_set_header_nritems(right, nritems);
-       data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(fs_info, l);
+       data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(l);
 
        copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
                           btrfs_item_nr_offset(mid),
@@ -4064,7 +4087,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
        copy_extent_buffer(right, l,
                     BTRFS_LEAF_DATA_OFFSET + BTRFS_LEAF_DATA_SIZE(fs_info) -
                     data_copy_size, BTRFS_LEAF_DATA_OFFSET +
-                    leaf_data_end(fs_info, l), data_copy_size);
+                    leaf_data_end(l), data_copy_size);
 
        rt_data_off = BTRFS_LEAF_DATA_SIZE(fs_info) - btrfs_item_end_nr(l, mid);
 
@@ -4079,8 +4102,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
 
        btrfs_set_header_nritems(l, mid);
        btrfs_item_key(right, &disk_key, 0);
-       insert_ptr(trans, fs_info, path, &disk_key, right->start,
-                  path->slots[1] + 1, 1);
+       insert_ptr(trans, path, &disk_key, right->start, path->slots[1] + 1, 1);
 
        btrfs_mark_buffer_dirty(right);
        btrfs_mark_buffer_dirty(l);
@@ -4115,7 +4137,6 @@ static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
                                          struct btrfs_path *path,
                                          int data_size)
 {
-       struct btrfs_fs_info *fs_info = root->fs_info;
        int ret;
        int progress = 0;
        int slot;
@@ -4124,7 +4145,7 @@ static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
 
        slot = path->slots[0];
        if (slot < btrfs_header_nritems(path->nodes[0]))
-               space_needed -= btrfs_leaf_free_space(fs_info, path->nodes[0]);
+               space_needed -= btrfs_leaf_free_space(path->nodes[0]);
 
        /*
         * try to push all the items after our slot into the
@@ -4145,14 +4166,14 @@ static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
        if (path->slots[0] == 0 || path->slots[0] == nritems)
                return 0;
 
-       if (btrfs_leaf_free_space(fs_info, path->nodes[0]) >= data_size)
+       if (btrfs_leaf_free_space(path->nodes[0]) >= data_size)
                return 0;
 
        /* try to push all the items before our slot into the next leaf */
        slot = path->slots[0];
        space_needed = data_size;
        if (slot > 0)
-               space_needed -= btrfs_leaf_free_space(fs_info, path->nodes[0]);
+               space_needed -= btrfs_leaf_free_space(path->nodes[0]);
        ret = push_leaf_left(trans, root, path, 1, space_needed, 0, slot);
        if (ret < 0)
                return ret;
@@ -4201,7 +4222,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
                int space_needed = data_size;
 
                if (slot < btrfs_header_nritems(l))
-                       space_needed -= btrfs_leaf_free_space(fs_info, l);
+                       space_needed -= btrfs_leaf_free_space(l);
 
                wret = push_leaf_right(trans, root, path, space_needed,
                                       space_needed, 0, 0);
@@ -4210,8 +4231,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
                if (wret) {
                        space_needed = data_size;
                        if (slot > 0)
-                               space_needed -= btrfs_leaf_free_space(fs_info,
-                                                                     l);
+                               space_needed -= btrfs_leaf_free_space(l);
                        wret = push_leaf_left(trans, root, path, space_needed,
                                              space_needed, 0, (u32)-1);
                        if (wret < 0)
@@ -4220,7 +4240,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
                l = path->nodes[0];
 
                /* did the pushes work? */
-               if (btrfs_leaf_free_space(fs_info, l) >= data_size)
+               if (btrfs_leaf_free_space(l) >= data_size)
                        return 0;
        }
 
@@ -4288,7 +4308,7 @@ again:
        if (split == 0) {
                if (mid <= slot) {
                        btrfs_set_header_nritems(right, 0);
-                       insert_ptr(trans, fs_info, path, &disk_key,
+                       insert_ptr(trans, path, &disk_key,
                                   right->start, path->slots[1] + 1, 1);
                        btrfs_tree_unlock(path->nodes[0]);
                        free_extent_buffer(path->nodes[0]);
@@ -4297,7 +4317,7 @@ again:
                        path->slots[1] += 1;
                } else {
                        btrfs_set_header_nritems(right, 0);
-                       insert_ptr(trans, fs_info, path, &disk_key,
+                       insert_ptr(trans, path, &disk_key,
                                   right->start, path->slots[1], 1);
                        btrfs_tree_unlock(path->nodes[0]);
                        free_extent_buffer(path->nodes[0]);
@@ -4314,7 +4334,7 @@ again:
                return ret;
        }
 
-       copy_for_split(trans, fs_info, path, l, right, slot, mid, nritems);
+       copy_for_split(trans, path, l, right, slot, mid, nritems);
 
        if (split == 2) {
                BUG_ON(num_doubles != 0);
@@ -4327,7 +4347,7 @@ again:
 push_for_double:
        push_for_double_split(trans, root, path, data_size);
        tried_avoid_double = 1;
-       if (btrfs_leaf_free_space(fs_info, path->nodes[0]) >= data_size)
+       if (btrfs_leaf_free_space(path->nodes[0]) >= data_size)
                return 0;
        goto again;
 }
@@ -4336,7 +4356,6 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
                                         struct btrfs_root *root,
                                         struct btrfs_path *path, int ins_len)
 {
-       struct btrfs_fs_info *fs_info = root->fs_info;
        struct btrfs_key key;
        struct extent_buffer *leaf;
        struct btrfs_file_extent_item *fi;
@@ -4350,7 +4369,7 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
        BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
               key.type != BTRFS_EXTENT_CSUM_KEY);
 
-       if (btrfs_leaf_free_space(fs_info, leaf) >= ins_len)
+       if (btrfs_leaf_free_space(leaf) >= ins_len)
                return 0;
 
        item_size = btrfs_item_size_nr(leaf, path->slots[0]);
@@ -4377,7 +4396,7 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
                goto err;
 
        /* the leaf has  changed, it now has room.  return now */
-       if (btrfs_leaf_free_space(fs_info, path->nodes[0]) >= ins_len)
+       if (btrfs_leaf_free_space(path->nodes[0]) >= ins_len)
                goto err;
 
        if (key.type == BTRFS_EXTENT_DATA_KEY) {
@@ -4400,8 +4419,7 @@ err:
        return ret;
 }
 
-static noinline int split_item(struct btrfs_fs_info *fs_info,
-                              struct btrfs_path *path,
+static noinline int split_item(struct btrfs_path *path,
                               const struct btrfs_key *new_key,
                               unsigned long split_offset)
 {
@@ -4416,7 +4434,7 @@ static noinline int split_item(struct btrfs_fs_info *fs_info,
        struct btrfs_disk_key disk_key;
 
        leaf = path->nodes[0];
-       BUG_ON(btrfs_leaf_free_space(fs_info, leaf) < sizeof(struct btrfs_item));
+       BUG_ON(btrfs_leaf_free_space(leaf) < sizeof(struct btrfs_item));
 
        btrfs_set_path_blocking(path);
 
@@ -4465,7 +4483,7 @@ static noinline int split_item(struct btrfs_fs_info *fs_info,
                            item_size - split_offset);
        btrfs_mark_buffer_dirty(leaf);
 
-       BUG_ON(btrfs_leaf_free_space(fs_info, leaf) < 0);
+       BUG_ON(btrfs_leaf_free_space(leaf) < 0);
        kfree(buf);
        return 0;
 }
@@ -4497,7 +4515,7 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
        if (ret)
                return ret;
 
-       ret = split_item(root->fs_info, path, new_key, split_offset);
+       ret = split_item(path, new_key, split_offset);
        return ret;
 }
 
@@ -4543,8 +4561,7 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
  * off the end of the item or if we shift the item to chop bytes off
  * the front.
  */
-void btrfs_truncate_item(struct btrfs_fs_info *fs_info,
-                        struct btrfs_path *path, u32 new_size, int from_end)
+void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end)
 {
        int slot;
        struct extent_buffer *leaf;
@@ -4567,7 +4584,7 @@ void btrfs_truncate_item(struct btrfs_fs_info *fs_info,
                return;
 
        nritems = btrfs_header_nritems(leaf);
-       data_end = leaf_data_end(fs_info, leaf);
+       data_end = leaf_data_end(leaf);
 
        old_data_start = btrfs_item_offset_nr(leaf, slot);
 
@@ -4633,7 +4650,7 @@ void btrfs_truncate_item(struct btrfs_fs_info *fs_info,
        btrfs_set_item_size(leaf, item, new_size);
        btrfs_mark_buffer_dirty(leaf);
 
-       if (btrfs_leaf_free_space(fs_info, leaf) < 0) {
+       if (btrfs_leaf_free_space(leaf) < 0) {
                btrfs_print_leaf(leaf);
                BUG();
        }
@@ -4642,8 +4659,7 @@ void btrfs_truncate_item(struct btrfs_fs_info *fs_info,
 /*
  * make the item pointed to by the path bigger, data_size is the added size.
  */
-void btrfs_extend_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
-                      u32 data_size)
+void btrfs_extend_item(struct btrfs_path *path, u32 data_size)
 {
        int slot;
        struct extent_buffer *leaf;
@@ -4660,9 +4676,9 @@ void btrfs_extend_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
        leaf = path->nodes[0];
 
        nritems = btrfs_header_nritems(leaf);
-       data_end = leaf_data_end(fs_info, leaf);
+       data_end = leaf_data_end(leaf);
 
-       if (btrfs_leaf_free_space(fs_info, leaf) < data_size) {
+       if (btrfs_leaf_free_space(leaf) < data_size) {
                btrfs_print_leaf(leaf);
                BUG();
        }
@@ -4672,9 +4688,9 @@ void btrfs_extend_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
        BUG_ON(slot < 0);
        if (slot >= nritems) {
                btrfs_print_leaf(leaf);
-               btrfs_crit(fs_info, "slot %d too large, nritems %d",
+               btrfs_crit(leaf->fs_info, "slot %d too large, nritems %d",
                           slot, nritems);
-               BUG_ON(1);
+               BUG();
        }
 
        /*
@@ -4701,7 +4717,7 @@ void btrfs_extend_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
        btrfs_set_item_size(leaf, item, old_size + data_size);
        btrfs_mark_buffer_dirty(leaf);
 
-       if (btrfs_leaf_free_space(fs_info, leaf) < 0) {
+       if (btrfs_leaf_free_space(leaf) < 0) {
                btrfs_print_leaf(leaf);
                BUG();
        }
@@ -4738,12 +4754,12 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
        slot = path->slots[0];
 
        nritems = btrfs_header_nritems(leaf);
-       data_end = leaf_data_end(fs_info, leaf);
+       data_end = leaf_data_end(leaf);
 
-       if (btrfs_leaf_free_space(fs_info, leaf) < total_size) {
+       if (btrfs_leaf_free_space(leaf) < total_size) {
                btrfs_print_leaf(leaf);
                btrfs_crit(fs_info, "not enough freespace need %u have %d",
-                          total_size, btrfs_leaf_free_space(fs_info, leaf));
+                          total_size, btrfs_leaf_free_space(leaf));
                BUG();
        }
 
@@ -4754,7 +4770,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
                        btrfs_print_leaf(leaf);
                        btrfs_crit(fs_info, "slot %d old_data %d data_end %d",
                                   slot, old_data, data_end);
-                       BUG_ON(1);
+                       BUG();
                }
                /*
                 * item0..itemN ... dataN.offset..dataN.size .. data0.size
@@ -4794,7 +4810,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
        btrfs_set_header_nritems(leaf, nritems + nr);
        btrfs_mark_buffer_dirty(leaf);
 
-       if (btrfs_leaf_free_space(fs_info, leaf) < 0) {
+       if (btrfs_leaf_free_space(leaf) < 0) {
                btrfs_print_leaf(leaf);
                BUG();
        }
@@ -4966,7 +4982,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
        nritems = btrfs_header_nritems(leaf);
 
        if (slot + nr != nritems) {
-               int data_end = leaf_data_end(fs_info, leaf);
+               int data_end = leaf_data_end(leaf);
 
                memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
                              data_end + dsize,
@@ -4996,7 +5012,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                        btrfs_set_header_level(leaf, 0);
                } else {
                        btrfs_set_path_blocking(path);
-                       clean_tree_block(fs_info, leaf);
+                       btrfs_clean_tree_block(leaf);
                        btrfs_del_leaf(trans, root, path, leaf);
                }
        } else {
@@ -5126,7 +5142,6 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
                         struct btrfs_path *path,
                         u64 min_trans)
 {
-       struct btrfs_fs_info *fs_info = root->fs_info;
        struct extent_buffer *cur;
        struct btrfs_key found_key;
        int slot;
@@ -5207,7 +5222,7 @@ find_next_key:
                        goto out;
                }
                btrfs_set_path_blocking(path);
-               cur = read_node_slot(fs_info, cur, slot);
+               cur = read_node_slot(cur, slot);
                if (IS_ERR(cur)) {
                        ret = PTR_ERR(cur);
                        goto out;
@@ -5229,14 +5244,12 @@ out:
        return ret;
 }
 
-static int tree_move_down(struct btrfs_fs_info *fs_info,
-                          struct btrfs_path *path,
-                          int *level)
+static int tree_move_down(struct btrfs_path *path, int *level)
 {
        struct extent_buffer *eb;
 
        BUG_ON(*level == 0);
-       eb = read_node_slot(fs_info, path->nodes[*level], path->slots[*level]);
+       eb = read_node_slot(path->nodes[*level], path->slots[*level]);
        if (IS_ERR(eb))
                return PTR_ERR(eb);
 
@@ -5276,8 +5289,7 @@ static int tree_move_next_or_upnext(struct btrfs_path *path,
  * Returns 1 if it had to move up and next. 0 is returned if it moved only next
  * or down.
  */
-static int tree_advance(struct btrfs_fs_info *fs_info,
-                       struct btrfs_path *path,
+static int tree_advance(struct btrfs_path *path,
                        int *level, int root_level,
                        int allow_down,
                        struct btrfs_key *key)
@@ -5287,7 +5299,7 @@ static int tree_advance(struct btrfs_fs_info *fs_info,
        if (*level == 0 || !allow_down) {
                ret = tree_move_next_or_upnext(path, level, root_level);
        } else {
-               ret = tree_move_down(fs_info, path, level);
+               ret = tree_move_down(path, level);
        }
        if (ret >= 0) {
                if (*level == 0)
@@ -5464,7 +5476,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
 
        while (1) {
                if (advance_left && !left_end_reached) {
-                       ret = tree_advance(fs_info, left_path, &left_level,
+                       ret = tree_advance(left_path, &left_level,
                                        left_root_level,
                                        advance_left != ADVANCE_ONLY_NEXT,
                                        &left_key);
@@ -5475,7 +5487,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
                        advance_left = 0;
                }
                if (advance_right && !right_end_reached) {
-                       ret = tree_advance(fs_info, right_path, &right_level,
+                       ret = tree_advance(right_path, &right_level,
                                        right_root_level,
                                        advance_right != ADVANCE_ONLY_NEXT,
                                        &right_key);
index 5260a92..0a61dff 100644 (file)
@@ -41,6 +41,7 @@ extern struct kmem_cache *btrfs_bit_radix_cachep;
 extern struct kmem_cache *btrfs_path_cachep;
 extern struct kmem_cache *btrfs_free_space_cachep;
 struct btrfs_ordered_sum;
+struct btrfs_ref;
 
 #define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
 
@@ -1015,6 +1016,7 @@ struct btrfs_fs_info {
        /* used to keep from writing metadata until there is a nice batch */
        struct percpu_counter dirty_metadata_bytes;
        struct percpu_counter delalloc_bytes;
+       struct percpu_counter dio_bytes;
        s32 dirty_metadata_batch;
        s32 delalloc_batch;
 
@@ -1092,10 +1094,7 @@ struct btrfs_fs_info {
 
        /* holds configuration and tracking. Protected by qgroup_lock */
        struct rb_root qgroup_tree;
-       struct rb_root qgroup_op_tree;
        spinlock_t qgroup_lock;
-       spinlock_t qgroup_op_lock;
-       atomic_t qgroup_op_seq;
 
        /*
         * used to avoid frequently calling ulist_alloc()/ulist_free()
@@ -1152,12 +1151,6 @@ struct btrfs_fs_info {
        struct mutex unused_bg_unpin_mutex;
        struct mutex delete_unused_bgs_mutex;
 
-       /*
-        * Chunks that can't be freed yet (under a trim/discard operation)
-        * and will be latter freed. Protected by fs_info->chunk_mutex.
-        */
-       struct list_head pinned_chunks;
-
        /* Cached block sizes */
        u32 nodesize;
        u32 sectorsize;
@@ -1348,6 +1341,12 @@ struct btrfs_root {
         * manipulation with the read-only status via SUBVOL_SETFLAGS
         */
        int send_in_progress;
+       /*
+        * Number of currently running deduplication operations that have a
+        * destination inode belonging to this root. Protected by the lock
+        * root_item_lock.
+        */
+       int dedupe_in_progress;
        struct btrfs_subvolume_writers *subv_writers;
        atomic_t will_be_snapshotted;
        atomic_t snapshot_force_cow;
@@ -1540,6 +1539,21 @@ do {                                                                   \
 
 #define BTRFS_INODE_ROOT_ITEM_INIT     (1 << 31)
 
+#define BTRFS_INODE_FLAG_MASK                                          \
+       (BTRFS_INODE_NODATASUM |                                        \
+        BTRFS_INODE_NODATACOW |                                        \
+        BTRFS_INODE_READONLY |                                         \
+        BTRFS_INODE_NOCOMPRESS |                                       \
+        BTRFS_INODE_PREALLOC |                                         \
+        BTRFS_INODE_SYNC |                                             \
+        BTRFS_INODE_IMMUTABLE |                                        \
+        BTRFS_INODE_APPEND |                                           \
+        BTRFS_INODE_NODUMP |                                           \
+        BTRFS_INODE_NOATIME |                                          \
+        BTRFS_INODE_DIRSYNC |                                          \
+        BTRFS_INODE_COMPRESS |                                         \
+        BTRFS_INODE_ROOT_ITEM_INIT)
+
 struct btrfs_map_token {
        const struct extent_buffer *eb;
        char *kaddr;
@@ -2163,18 +2177,16 @@ static inline int btrfs_header_flag(const struct extent_buffer *eb, u64 flag)
        return (btrfs_header_flags(eb) & flag) == flag;
 }
 
-static inline int btrfs_set_header_flag(struct extent_buffer *eb, u64 flag)
+static inline void btrfs_set_header_flag(struct extent_buffer *eb, u64 flag)
 {
        u64 flags = btrfs_header_flags(eb);
        btrfs_set_header_flags(eb, flags | flag);
-       return (flags & flag) == flag;
 }
 
-static inline int btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag)
+static inline void btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag)
 {
        u64 flags = btrfs_header_flags(eb);
        btrfs_set_header_flags(eb, flags & ~flag);
-       return (flags & flag) == flag;
 }
 
 static inline int btrfs_header_backref_rev(const struct extent_buffer *eb)
@@ -2445,13 +2457,12 @@ static inline int btrfs_super_csum_size(const struct btrfs_super_block *s)
  * this returns the address of the start of the last item,
  * which is the stop of the leaf data stack
  */
-static inline unsigned int leaf_data_end(const struct btrfs_fs_info *fs_info,
-                                        const struct extent_buffer *leaf)
+static inline unsigned int leaf_data_end(const struct extent_buffer *leaf)
 {
        u32 nr = btrfs_header_nritems(leaf);
 
        if (nr == 0)
-               return BTRFS_LEAF_DATA_SIZE(fs_info);
+               return BTRFS_LEAF_DATA_SIZE(leaf->fs_info);
        return btrfs_item_offset_nr(leaf, nr - 1);
 }
 
@@ -2698,8 +2709,6 @@ void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg);
 void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
                           unsigned long count);
-int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info,
-                                unsigned long count, u64 transid, int wait);
 void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
                                  struct btrfs_delayed_ref_root *delayed_refs,
                                  struct btrfs_delayed_ref_head *head);
@@ -2711,8 +2720,7 @@ int btrfs_pin_extent(struct btrfs_fs_info *fs_info,
                     u64 bytenr, u64 num, int reserved);
 int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info,
                                    u64 bytenr, u64 num_bytes);
-int btrfs_exclude_logged_extents(struct btrfs_fs_info *fs_info,
-                                struct extent_buffer *eb);
+int btrfs_exclude_logged_extents(struct extent_buffer *eb);
 int btrfs_cross_ref_exist(struct btrfs_root *root,
                          u64 objectid, u64 offset, u64 bytenr);
 struct btrfs_block_group_cache *btrfs_lookup_block_group(
@@ -2745,13 +2753,9 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                  struct extent_buffer *buf, int full_backref);
 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
-                               struct btrfs_fs_info *fs_info,
                                u64 bytenr, u64 num_bytes, u64 flags,
                                int level, int is_data);
-int btrfs_free_extent(struct btrfs_trans_handle *trans,
-                     struct btrfs_root *root,
-                     u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
-                     u64 owner, u64 offset);
+int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref);
 
 int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
                               u64 start, u64 len, int delalloc);
@@ -2760,15 +2764,11 @@ int btrfs_free_and_pin_reserved_extent(struct btrfs_fs_info *fs_info,
 void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info);
 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans);
 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
-                        struct btrfs_root *root,
-                        u64 bytenr, u64 num_bytes, u64 parent,
-                        u64 root_objectid, u64 owner, u64 offset);
+                        struct btrfs_ref *generic_ref);
 
 int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans);
-int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
-                                  struct btrfs_fs_info *fs_info);
-int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
-                           struct btrfs_fs_info *fs_info);
+int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans);
+int btrfs_setup_space_cache(struct btrfs_trans_handle *trans);
 int btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr);
 int btrfs_free_block_groups(struct btrfs_fs_info *info);
 int btrfs_read_block_groups(struct btrfs_fs_info *info);
@@ -2936,10 +2936,8 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
                      struct extent_buffer **cow_ret, u64 new_root_objectid);
 int btrfs_block_can_be_shared(struct btrfs_root *root,
                              struct extent_buffer *buf);
-void btrfs_extend_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
-                      u32 data_size);
-void btrfs_truncate_item(struct btrfs_fs_info *fs_info,
-                        struct btrfs_path *path, u32 new_size, int from_end);
+void btrfs_extend_item(struct btrfs_path *path, u32 data_size);
+void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end);
 int btrfs_split_item(struct btrfs_trans_handle *trans,
                     struct btrfs_root *root,
                     struct btrfs_path *path,
@@ -3015,8 +3013,7 @@ static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
 {
        return btrfs_next_old_item(root, p, 0);
 }
-int btrfs_leaf_free_space(struct btrfs_fs_info *fs_info,
-                         struct extent_buffer *leaf);
+int btrfs_leaf_free_space(struct extent_buffer *leaf);
 int __must_check btrfs_drop_snapshot(struct btrfs_root *root,
                                     struct btrfs_block_rsv *block_rsv,
                                     int update_ref, int for_reloc);
@@ -3756,8 +3753,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
 void btrfs_scrub_pause(struct btrfs_fs_info *fs_info);
 void btrfs_scrub_continue(struct btrfs_fs_info *fs_info);
 int btrfs_scrub_cancel(struct btrfs_fs_info *info);
-int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info,
-                          struct btrfs_device *dev);
+int btrfs_scrub_cancel_dev(struct btrfs_device *dev);
 int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
                         struct btrfs_scrub_progress *progress);
 static inline void btrfs_init_full_stripe_locks_tree(
@@ -3806,6 +3802,8 @@ static inline int btrfs_defrag_cancelled(struct btrfs_fs_info *fs_info)
        return signal_pending(current);
 }
 
+#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len))
+
 /* Sanity test specific functions */
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
 void btrfs_test_inode_set_ops(struct inode *inode);
index c669f25..43fdb29 100644 (file)
@@ -691,7 +691,6 @@ static int btrfs_batch_insert_items(struct btrfs_root *root,
                                    struct btrfs_path *path,
                                    struct btrfs_delayed_item *item)
 {
-       struct btrfs_fs_info *fs_info = root->fs_info;
        struct btrfs_delayed_item *curr, *next;
        int free_space;
        int total_data_size = 0, total_size = 0;
@@ -708,7 +707,7 @@ static int btrfs_batch_insert_items(struct btrfs_root *root,
        BUG_ON(!path->nodes[0]);
 
        leaf = path->nodes[0];
-       free_space = btrfs_leaf_free_space(fs_info, leaf);
+       free_space = btrfs_leaf_free_space(leaf);
        INIT_LIST_HEAD(&head);
 
        next = item;
@@ -1692,7 +1691,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
                name = (char *)(di + 1);
                name_len = btrfs_stack_dir_name_len(di);
 
-               d_type = btrfs_filetype_table[di->type];
+               d_type = fs_ftype_to_dtype(di->type);
                btrfs_disk_key_to_cpu(&location, &di->location);
 
                over = !dir_emit(ctx, name, name_len,
index 7d2a413..a73fc23 100644 (file)
@@ -735,8 +735,7 @@ static void init_delayed_ref_common(struct btrfs_fs_info *fs_info,
  * transaction commits.
  */
 int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
-                              u64 bytenr, u64 num_bytes, u64 parent,
-                              u64 ref_root,  int level, int action,
+                              struct btrfs_ref *generic_ref,
                               struct btrfs_delayed_extent_op *extent_op,
                               int *old_ref_mod, int *new_ref_mod)
 {
@@ -746,10 +745,18 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
        struct btrfs_delayed_ref_root *delayed_refs;
        struct btrfs_qgroup_extent_record *record = NULL;
        int qrecord_inserted;
-       bool is_system = (ref_root == BTRFS_CHUNK_TREE_OBJECTID);
+       bool is_system;
+       int action = generic_ref->action;
+       int level = generic_ref->tree_ref.level;
        int ret;
+       u64 bytenr = generic_ref->bytenr;
+       u64 num_bytes = generic_ref->len;
+       u64 parent = generic_ref->parent;
        u8 ref_type;
 
+       is_system = (generic_ref->real_root == BTRFS_CHUNK_TREE_OBJECTID);
+
+       ASSERT(generic_ref->type == BTRFS_REF_METADATA && generic_ref->action);
        BUG_ON(extent_op && extent_op->is_data);
        ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
        if (!ref)
@@ -762,7 +769,9 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
        }
 
        if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
-           is_fstree(ref_root)) {
+           is_fstree(generic_ref->real_root) &&
+           is_fstree(generic_ref->tree_ref.root) &&
+           !generic_ref->skip_qgroup) {
                record = kzalloc(sizeof(*record), GFP_NOFS);
                if (!record) {
                        kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
@@ -777,13 +786,14 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
                ref_type = BTRFS_TREE_BLOCK_REF_KEY;
 
        init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
-                               ref_root, action, ref_type);
-       ref->root = ref_root;
+                               generic_ref->tree_ref.root, action, ref_type);
+       ref->root = generic_ref->tree_ref.root;
        ref->parent = parent;
        ref->level = level;
 
        init_delayed_ref_head(head_ref, record, bytenr, num_bytes,
-                             ref_root, 0, action, false, is_system);
+                             generic_ref->tree_ref.root, 0, action, false,
+                             is_system);
        head_ref->extent_op = extent_op;
 
        delayed_refs = &trans->transaction->delayed_refs;
@@ -822,10 +832,9 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
  * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
  */
 int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
-                              u64 bytenr, u64 num_bytes,
-                              u64 parent, u64 ref_root,
-                              u64 owner, u64 offset, u64 reserved, int action,
-                              int *old_ref_mod, int *new_ref_mod)
+                              struct btrfs_ref *generic_ref,
+                              u64 reserved, int *old_ref_mod,
+                              int *new_ref_mod)
 {
        struct btrfs_fs_info *fs_info = trans->fs_info;
        struct btrfs_delayed_data_ref *ref;
@@ -833,9 +842,17 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
        struct btrfs_delayed_ref_root *delayed_refs;
        struct btrfs_qgroup_extent_record *record = NULL;
        int qrecord_inserted;
+       int action = generic_ref->action;
        int ret;
+       u64 bytenr = generic_ref->bytenr;
+       u64 num_bytes = generic_ref->len;
+       u64 parent = generic_ref->parent;
+       u64 ref_root = generic_ref->data_ref.ref_root;
+       u64 owner = generic_ref->data_ref.ino;
+       u64 offset = generic_ref->data_ref.offset;
        u8 ref_type;
 
+       ASSERT(generic_ref->type == BTRFS_REF_DATA && action);
        ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
        if (!ref)
                return -ENOMEM;
@@ -859,7 +876,9 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
        }
 
        if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
-           is_fstree(ref_root)) {
+           is_fstree(ref_root) &&
+           is_fstree(generic_ref->real_root) &&
+           !generic_ref->skip_qgroup) {
                record = kzalloc(sizeof(*record), GFP_NOFS);
                if (!record) {
                        kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
@@ -905,8 +924,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
        return 0;
 }
 
-int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
-                               struct btrfs_trans_handle *trans,
+int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
                                u64 bytenr, u64 num_bytes,
                                struct btrfs_delayed_extent_op *extent_op)
 {
index 70606da..c18f93e 100644 (file)
@@ -176,6 +176,83 @@ struct btrfs_delayed_ref_root {
        u64 qgroup_to_skip;
 };
 
+enum btrfs_ref_type {
+       BTRFS_REF_NOT_SET,
+       BTRFS_REF_DATA,
+       BTRFS_REF_METADATA,
+       BTRFS_REF_LAST,
+};
+
+struct btrfs_data_ref {
+       /* For EXTENT_DATA_REF */
+
+       /* Root which refers to this data extent */
+       u64 ref_root;
+
+       /* Inode which refers to this data extent */
+       u64 ino;
+
+       /*
+        * file_offset - extent_offset
+        *
+        * file_offset is the key.offset of the EXTENT_DATA key.
+        * extent_offset is btrfs_file_extent_offset() of the EXTENT_DATA data.
+        */
+       u64 offset;
+};
+
+struct btrfs_tree_ref {
+       /*
+        * Level of this tree block
+        *
+        * Shared for skinny (TREE_BLOCK_REF) and normal tree ref.
+        */
+       int level;
+
+       /*
+        * Root which refers to this tree block.
+        *
+        * For TREE_BLOCK_REF (skinny metadata, either inline or keyed)
+        */
+       u64 root;
+
+       /* For non-skinny metadata, no special member needed */
+};
+
+struct btrfs_ref {
+       enum btrfs_ref_type type;
+       int action;
+
+       /*
+        * Whether this extent should go through qgroup record.
+        *
+        * Normally false, but for certain cases like delayed subtree scan,
+        * setting this flag can hugely reduce qgroup overhead.
+        */
+       bool skip_qgroup;
+
+       /*
+        * Optional. For which root is this modification.
+        * Mostly used for qgroup optimization.
+        *
+        * When unset, data/tree ref init code will populate it.
+        * In certain cases, we're modifying reference for a different root.
+        * E.g. COW fs tree blocks for balance.
+        * In that case, tree_ref::root will be fs tree, but we're doing this
+        * for reloc tree, then we should set @real_root to reloc tree.
+        */
+       u64 real_root;
+       u64 bytenr;
+       u64 len;
+
+       /* Bytenr of the parent tree block */
+       u64 parent;
+       union {
+               struct btrfs_data_ref data_ref;
+               struct btrfs_tree_ref tree_ref;
+       };
+};
+
 extern struct kmem_cache *btrfs_delayed_ref_head_cachep;
 extern struct kmem_cache *btrfs_delayed_tree_ref_cachep;
 extern struct kmem_cache *btrfs_delayed_data_ref_cachep;
@@ -184,6 +261,38 @@ extern struct kmem_cache *btrfs_delayed_extent_op_cachep;
 int __init btrfs_delayed_ref_init(void);
 void __cold btrfs_delayed_ref_exit(void);
 
+static inline void btrfs_init_generic_ref(struct btrfs_ref *generic_ref,
+                               int action, u64 bytenr, u64 len, u64 parent)
+{
+       generic_ref->action = action;
+       generic_ref->bytenr = bytenr;
+       generic_ref->len = len;
+       generic_ref->parent = parent;
+}
+
+static inline void btrfs_init_tree_ref(struct btrfs_ref *generic_ref,
+                               int level, u64 root)
+{
+       /* If @real_root not set, use @root as fallback */
+       if (!generic_ref->real_root)
+               generic_ref->real_root = root;
+       generic_ref->tree_ref.level = level;
+       generic_ref->tree_ref.root = root;
+       generic_ref->type = BTRFS_REF_METADATA;
+}
+
+static inline void btrfs_init_data_ref(struct btrfs_ref *generic_ref,
+                               u64 ref_root, u64 ino, u64 offset)
+{
+       /* If @real_root not set, use @root as fallback */
+       if (!generic_ref->real_root)
+               generic_ref->real_root = ref_root;
+       generic_ref->data_ref.ref_root = ref_root;
+       generic_ref->data_ref.ino = ino;
+       generic_ref->data_ref.offset = offset;
+       generic_ref->type = BTRFS_REF_DATA;
+}
+
 static inline struct btrfs_delayed_extent_op *
 btrfs_alloc_delayed_extent_op(void)
 {
@@ -224,17 +333,14 @@ static inline void btrfs_put_delayed_ref_head(struct btrfs_delayed_ref_head *hea
 }
 
 int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
-                              u64 bytenr, u64 num_bytes, u64 parent,
-                              u64 ref_root, int level, int action,
+                              struct btrfs_ref *generic_ref,
                               struct btrfs_delayed_extent_op *extent_op,
                               int *old_ref_mod, int *new_ref_mod);
 int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
-                              u64 bytenr, u64 num_bytes,
-                              u64 parent, u64 ref_root,
-                              u64 owner, u64 offset, u64 reserved, int action,
-                              int *old_ref_mod, int *new_ref_mod);
-int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
-                               struct btrfs_trans_handle *trans,
+                              struct btrfs_ref *generic_ref,
+                              u64 reserved, int *old_ref_mod,
+                              int *new_ref_mod);
+int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
                                u64 bytenr, u64 num_bytes,
                                struct btrfs_delayed_extent_op *extent_op);
 void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
index ee193c5..55c15f3 100644 (file)
@@ -273,9 +273,9 @@ error:
  * called from commit_transaction. Writes changed device replace state to
  * disk.
  */
-int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
-                         struct btrfs_fs_info *fs_info)
+int btrfs_run_dev_replace(struct btrfs_trans_handle *trans)
 {
+       struct btrfs_fs_info *fs_info = trans->fs_info;
        int ret;
        struct btrfs_root *dev_root = fs_info->dev_root;
        struct btrfs_path *path;
@@ -662,7 +662,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
        btrfs_device_set_disk_total_bytes(tgt_device,
                                          src_device->disk_total_bytes);
        btrfs_device_set_bytes_used(tgt_device, src_device->bytes_used);
-       ASSERT(list_empty(&src_device->resized_list));
+       ASSERT(list_empty(&src_device->post_commit_list));
        tgt_device->commit_total_bytes = src_device->commit_total_bytes;
        tgt_device->commit_bytes_used = src_device->bytes_used;
 
@@ -696,7 +696,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
 
        /* replace the sysfs entry */
        btrfs_sysfs_rm_device_link(fs_info->fs_devices, src_device);
-       btrfs_rm_dev_replace_free_srcdev(fs_info, src_device);
+       btrfs_rm_dev_replace_free_srcdev(src_device);
 
        /* write back the superblocks */
        trans = btrfs_start_transaction(root, 0);
index 4aa40ba..78c5d8f 100644 (file)
@@ -9,8 +9,7 @@
 struct btrfs_ioctl_dev_replace_args;
 
 int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info);
-int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
-                         struct btrfs_fs_info *fs_info);
+int btrfs_run_dev_replace(struct btrfs_trans_handle *trans);
 int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info,
                            struct btrfs_ioctl_dev_replace_args *args);
 void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
index 8de74d8..863367c 100644 (file)
@@ -36,7 +36,7 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
                di = btrfs_match_dir_item_name(fs_info, path, name, name_len);
                if (di)
                        return ERR_PTR(-EEXIST);
-               btrfs_extend_item(fs_info, path, data_size);
+               btrfs_extend_item(path, data_size);
        } else if (ret < 0)
                return ERR_PTR(ret);
        WARN_ON(ret > 0);
@@ -429,8 +429,7 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
                start = btrfs_item_ptr_offset(leaf, path->slots[0]);
                memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
                        item_len - (ptr + sub_item_len - start));
-               btrfs_truncate_item(root->fs_info, path,
-                                   item_len - sub_item_len, 1);
+               btrfs_truncate_item(path, item_len - sub_item_len, 1);
        }
        return ret;
 }
index 6fe9197..663efce 100644 (file)
@@ -260,15 +260,12 @@ void btrfs_csum_final(u32 crc, u8 *result)
 }
 
 /*
- * compute the csum for a btree block, and either verify it or write it
- * into the csum field of the block.
+ * Compute the csum of a btree block and store the result to provided buffer.
+ *
+ * Returns error if the extent buffer cannot be mapped.
  */
-static int csum_tree_block(struct btrfs_fs_info *fs_info,
-                          struct extent_buffer *buf,
-                          int verify)
+static int csum_tree_block(struct extent_buffer *buf, u8 *result)
 {
-       u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
-       char result[BTRFS_CSUM_SIZE];
        unsigned long len;
        unsigned long cur_len;
        unsigned long offset = BTRFS_CSUM_SIZE;
@@ -288,7 +285,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
                 */
                err = map_private_extent_buffer(buf, offset, 32,
                                        &kaddr, &map_start, &map_len);
-               if (err)
+               if (WARN_ON(err))
                        return err;
                cur_len = min(len, map_len - (offset - map_start));
                crc = btrfs_csum_data(kaddr + offset - map_start,
@@ -300,23 +297,6 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
 
        btrfs_csum_final(crc, result);
 
-       if (verify) {
-               if (memcmp_extent_buffer(buf, result, 0, csum_size)) {
-                       u32 val;
-                       u32 found = 0;
-                       memcpy(&found, result, csum_size);
-
-                       read_extent_buffer(buf, &val, 0, csum_size);
-                       btrfs_warn_rl(fs_info,
-                               "%s checksum verify failed on %llu wanted %X found %X level %d",
-                               fs_info->sb->s_id, buf->start,
-                               val, found, btrfs_header_level(buf));
-                       return -EUCLEAN;
-               }
-       } else {
-               write_extent_buffer(buf, result, 0, csum_size);
-       }
-
        return 0;
 }
 
@@ -414,22 +394,21 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
        return ret;
 }
 
-static int verify_level_key(struct btrfs_fs_info *fs_info,
-                           struct extent_buffer *eb, int level,
-                           struct btrfs_key *first_key, u64 parent_transid)
+int btrfs_verify_level_key(struct extent_buffer *eb, int level,
+                          struct btrfs_key *first_key, u64 parent_transid)
 {
+       struct btrfs_fs_info *fs_info = eb->fs_info;
        int found_level;
        struct btrfs_key found_key;
        int ret;
 
        found_level = btrfs_header_level(eb);
        if (found_level != level) {
-#ifdef CONFIG_BTRFS_DEBUG
-               WARN_ON(1);
+               WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
+                    KERN_ERR "BTRFS: tree level check failed\n");
                btrfs_err(fs_info,
 "tree level mismatch detected, bytenr=%llu level expected=%u has=%u",
                          eb->start, level, found_level);
-#endif
                return -EIO;
        }
 
@@ -450,9 +429,9 @@ static int verify_level_key(struct btrfs_fs_info *fs_info,
                btrfs_item_key_to_cpu(eb, &found_key, 0);
        ret = btrfs_comp_cpu_keys(first_key, &found_key);
 
-#ifdef CONFIG_BTRFS_DEBUG
        if (ret) {
-               WARN_ON(1);
+               WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
+                    KERN_ERR "BTRFS: tree first key check failed\n");
                btrfs_err(fs_info,
 "tree first key mismatch detected, bytenr=%llu parent_transid=%llu key expected=(%llu,%u,%llu) has=(%llu,%u,%llu)",
                          eb->start, parent_transid, first_key->objectid,
@@ -460,7 +439,6 @@ static int verify_level_key(struct btrfs_fs_info *fs_info,
                          found_key.objectid, found_key.type,
                          found_key.offset);
        }
-#endif
        return ret;
 }
 
@@ -472,11 +450,11 @@ static int verify_level_key(struct btrfs_fs_info *fs_info,
  * @level:             expected level, mandatory check
  * @first_key:         expected key of first slot, skip check if NULL
  */
-static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
-                                         struct extent_buffer *eb,
+static int btree_read_extent_buffer_pages(struct extent_buffer *eb,
                                          u64 parent_transid, int level,
                                          struct btrfs_key *first_key)
 {
+       struct btrfs_fs_info *fs_info = eb->fs_info;
        struct extent_io_tree *io_tree;
        int failed = 0;
        int ret;
@@ -487,14 +465,13 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
        io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
        while (1) {
                clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
-               ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE,
-                                              mirror_num);
+               ret = read_extent_buffer_pages(eb, WAIT_COMPLETE, mirror_num);
                if (!ret) {
                        if (verify_parent_transid(io_tree, eb,
                                                   parent_transid, 0))
                                ret = -EIO;
-                       else if (verify_level_key(fs_info, eb, level,
-                                                 first_key, parent_transid))
+                       else if (btrfs_verify_level_key(eb, level,
+                                               first_key, parent_transid))
                                ret = -EUCLEAN;
                        else
                                break;
@@ -519,7 +496,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
        }
 
        if (failed && !ret && failed_mirror)
-               repair_eb_io_failure(fs_info, eb, failed_mirror);
+               btrfs_repair_eb_io_failure(eb, failed_mirror);
 
        return ret;
 }
@@ -533,7 +510,10 @@ static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct page *page)
 {
        u64 start = page_offset(page);
        u64 found_start;
+       u8 result[BTRFS_CSUM_SIZE];
+       u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
        struct extent_buffer *eb;
+       int ret;
 
        eb = (struct extent_buffer *)page->private;
        if (page != eb->pages[0])
@@ -552,12 +532,28 @@ static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct page *page)
        ASSERT(memcmp_extent_buffer(eb, fs_info->fs_devices->metadata_uuid,
                        btrfs_header_fsid(), BTRFS_FSID_SIZE) == 0);
 
-       return csum_tree_block(fs_info, eb, 0);
+       if (csum_tree_block(eb, result))
+               return -EINVAL;
+
+       if (btrfs_header_level(eb))
+               ret = btrfs_check_node(eb);
+       else
+               ret = btrfs_check_leaf_full(eb);
+
+       if (ret < 0) {
+               btrfs_err(fs_info,
+               "block=%llu write time tree block corruption detected",
+                         eb->start);
+               return ret;
+       }
+       write_extent_buffer(eb, result, 0, csum_size);
+
+       return 0;
 }
 
-static int check_tree_block_fsid(struct btrfs_fs_info *fs_info,
-                                struct extent_buffer *eb)
+static int check_tree_block_fsid(struct extent_buffer *eb)
 {
+       struct btrfs_fs_info *fs_info = eb->fs_info;
        struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
        u8 fsid[BTRFS_FSID_SIZE];
        int ret = 1;
@@ -595,7 +591,9 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
        struct extent_buffer *eb;
        struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
        struct btrfs_fs_info *fs_info = root->fs_info;
+       u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
        int ret = 0;
+       u8 result[BTRFS_CSUM_SIZE];
        int reads_done;
 
        if (!page->private)
@@ -625,7 +623,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
                ret = -EIO;
                goto err;
        }
-       if (check_tree_block_fsid(fs_info, eb)) {
+       if (check_tree_block_fsid(eb)) {
                btrfs_err_rl(fs_info, "bad fsid on block %llu",
                             eb->start);
                ret = -EIO;
@@ -642,25 +640,44 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
        btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
                                       eb, found_level);
 
-       ret = csum_tree_block(fs_info, eb, 1);
+       ret = csum_tree_block(eb, result);
        if (ret)
                goto err;
 
+       if (memcmp_extent_buffer(eb, result, 0, csum_size)) {
+               u32 val;
+               u32 found = 0;
+
+               memcpy(&found, result, csum_size);
+
+               read_extent_buffer(eb, &val, 0, csum_size);
+               btrfs_warn_rl(fs_info,
+               "%s checksum verify failed on %llu wanted %x found %x level %d",
+                             fs_info->sb->s_id, eb->start,
+                             val, found, btrfs_header_level(eb));
+               ret = -EUCLEAN;
+               goto err;
+       }
+
        /*
         * If this is a leaf block and it is corrupt, set the corrupt bit so
         * that we don't try and read the other copies of this block, just
         * return -EIO.
         */
-       if (found_level == 0 && btrfs_check_leaf_full(fs_info, eb)) {
+       if (found_level == 0 && btrfs_check_leaf_full(eb)) {
                set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
                ret = -EIO;
        }
 
-       if (found_level > 0 && btrfs_check_node(fs_info, eb))
+       if (found_level > 0 && btrfs_check_node(eb))
                ret = -EIO;
 
        if (!ret)
                set_extent_buffer_uptodate(eb);
+       else
+               btrfs_err(fs_info,
+                         "block=%llu read time tree block corruption detected",
+                         eb->start);
 err:
        if (reads_done &&
            test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
@@ -867,11 +884,10 @@ static int check_async_write(struct btrfs_inode *bi)
        return 1;
 }
 
-static blk_status_t btree_submit_bio_hook(void *private_data, struct bio *bio,
-                                         int mirror_num, unsigned long bio_flags,
-                                         u64 bio_offset)
+static blk_status_t btree_submit_bio_hook(struct inode *inode, struct bio *bio,
+                                         int mirror_num,
+                                         unsigned long bio_flags)
 {
-       struct inode *inode = private_data;
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        int async = check_async_write(BTRFS_I(inode));
        blk_status_t ret;
@@ -897,8 +913,7 @@ static blk_status_t btree_submit_bio_hook(void *private_data, struct bio *bio,
                 * checksumming can happen in parallel across all CPUs
                 */
                ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, 0,
-                                         bio_offset, private_data,
-                                         btree_submit_bio_start);
+                                         0, inode, btree_submit_bio_start);
        }
 
        if (ret)
@@ -1017,22 +1032,23 @@ static const struct address_space_operations btree_aops = {
 void readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr)
 {
        struct extent_buffer *buf = NULL;
-       struct inode *btree_inode = fs_info->btree_inode;
+       int ret;
 
        buf = btrfs_find_create_tree_block(fs_info, bytenr);
        if (IS_ERR(buf))
                return;
-       read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
-                                buf, WAIT_NONE, 0);
-       free_extent_buffer(buf);
+
+       ret = read_extent_buffer_pages(buf, WAIT_NONE, 0);
+       if (ret < 0)
+               free_extent_buffer_stale(buf);
+       else
+               free_extent_buffer(buf);
 }
 
 int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
                         int mirror_num, struct extent_buffer **eb)
 {
        struct extent_buffer *buf = NULL;
-       struct inode *btree_inode = fs_info->btree_inode;
-       struct extent_io_tree *io_tree = &BTRFS_I(btree_inode)->io_tree;
        int ret;
 
        buf = btrfs_find_create_tree_block(fs_info, bytenr);
@@ -1041,15 +1057,14 @@ int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
 
        set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags);
 
-       ret = read_extent_buffer_pages(io_tree, buf, WAIT_PAGE_LOCK,
-                                      mirror_num);
+       ret = read_extent_buffer_pages(buf, WAIT_PAGE_LOCK, mirror_num);
        if (ret) {
-               free_extent_buffer(buf);
+               free_extent_buffer_stale(buf);
                return ret;
        }
 
        if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) {
-               free_extent_buffer(buf);
+               free_extent_buffer_stale(buf);
                return -EIO;
        } else if (extent_buffer_uptodate(buf)) {
                *eb = buf;
@@ -1068,19 +1083,6 @@ struct extent_buffer *btrfs_find_create_tree_block(
        return alloc_extent_buffer(fs_info, bytenr);
 }
 
-
-int btrfs_write_tree_block(struct extent_buffer *buf)
-{
-       return filemap_fdatawrite_range(buf->pages[0]->mapping, buf->start,
-                                       buf->start + buf->len - 1);
-}
-
-void btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
-{
-       filemap_fdatawait_range(buf->pages[0]->mapping,
-                               buf->start, buf->start + buf->len - 1);
-}
-
 /*
  * Read tree block at logical address @bytenr and do variant basic but critical
  * verification.
@@ -1100,19 +1102,19 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
        if (IS_ERR(buf))
                return buf;
 
-       ret = btree_read_extent_buffer_pages(fs_info, buf, parent_transid,
+       ret = btree_read_extent_buffer_pages(buf, parent_transid,
                                             level, first_key);
        if (ret) {
-               free_extent_buffer(buf);
+               free_extent_buffer_stale(buf);
                return ERR_PTR(ret);
        }
        return buf;
 
 }
 
-void clean_tree_block(struct btrfs_fs_info *fs_info,
-                     struct extent_buffer *buf)
+void btrfs_clean_tree_block(struct extent_buffer *buf)
 {
+       struct btrfs_fs_info *fs_info = buf->fs_info;
        if (btrfs_header_generation(buf) ==
            fs_info->running_transaction->transid) {
                btrfs_assert_tree_locked(buf);
@@ -1208,7 +1210,8 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
        root->log_transid_committed = -1;
        root->last_log_commit = 0;
        if (!dummy)
-               extent_io_tree_init(&root->dirty_log_pages, NULL);
+               extent_io_tree_init(fs_info, &root->dirty_log_pages,
+                                   IO_TREE_ROOT_DIRTY_LOG_PAGES, NULL);
 
        memset(&root->root_key, 0, sizeof(root->root_key));
        memset(&root->root_item, 0, sizeof(root->root_item));
@@ -1255,9 +1258,9 @@ struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info)
 #endif
 
 struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
-                                    struct btrfs_fs_info *fs_info,
                                     u64 objectid)
 {
+       struct btrfs_fs_info *fs_info = trans->fs_info;
        struct extent_buffer *leaf;
        struct btrfs_root *tree_root = fs_info->tree_root;
        struct btrfs_root *root;
@@ -2138,8 +2141,9 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info)
        inode->i_mapping->a_ops = &btree_aops;
 
        RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
-       extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode);
-       BTRFS_I(inode)->io_tree.track_uptodate = 0;
+       extent_io_tree_init(fs_info, &BTRFS_I(inode)->io_tree,
+                           IO_TREE_INODE_IO, inode);
+       BTRFS_I(inode)->io_tree.track_uptodate = false;
        extent_map_tree_init(&BTRFS_I(inode)->extent_tree);
 
        BTRFS_I(inode)->io_tree.ops = &btree_extent_io_ops;
@@ -2162,7 +2166,6 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
        spin_lock_init(&fs_info->qgroup_lock);
        mutex_init(&fs_info->qgroup_ioctl_lock);
        fs_info->qgroup_tree = RB_ROOT;
-       fs_info->qgroup_op_tree = RB_ROOT;
        INIT_LIST_HEAD(&fs_info->dirty_qgroups);
        fs_info->qgroup_seq = 1;
        fs_info->qgroup_ulist = NULL;
@@ -2630,11 +2633,17 @@ int open_ctree(struct super_block *sb,
                goto fail;
        }
 
-       ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL);
+       ret = percpu_counter_init(&fs_info->dio_bytes, 0, GFP_KERNEL);
        if (ret) {
                err = ret;
                goto fail_srcu;
        }
+
+       ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL);
+       if (ret) {
+               err = ret;
+               goto fail_dio_bytes;
+       }
        fs_info->dirty_metadata_batch = PAGE_SIZE *
                                        (1 + ilog2(nr_cpu_ids));
 
@@ -2667,7 +2676,6 @@ int open_ctree(struct super_block *sb,
        spin_lock_init(&fs_info->defrag_inodes_lock);
        spin_lock_init(&fs_info->tree_mod_seq_lock);
        spin_lock_init(&fs_info->super_lock);
-       spin_lock_init(&fs_info->qgroup_op_lock);
        spin_lock_init(&fs_info->buffer_lock);
        spin_lock_init(&fs_info->unused_bgs_lock);
        rwlock_init(&fs_info->tree_mod_log_lock);
@@ -2694,7 +2702,6 @@ int open_ctree(struct super_block *sb,
 
        atomic_set(&fs_info->async_delalloc_pages, 0);
        atomic_set(&fs_info->defrag_running, 0);
-       atomic_set(&fs_info->qgroup_op_seq, 0);
        atomic_set(&fs_info->reada_works_cnt, 0);
        atomic_set(&fs_info->nr_delayed_iputs, 0);
        atomic64_set(&fs_info->tree_mod_seq, 0);
@@ -2748,8 +2755,10 @@ int open_ctree(struct super_block *sb,
        fs_info->block_group_cache_tree = RB_ROOT;
        fs_info->first_logical_byte = (u64)-1;
 
-       extent_io_tree_init(&fs_info->freed_extents[0], NULL);
-       extent_io_tree_init(&fs_info->freed_extents[1], NULL);
+       extent_io_tree_init(fs_info, &fs_info->freed_extents[0],
+                           IO_TREE_FS_INFO_FREED_EXTENTS0, NULL);
+       extent_io_tree_init(fs_info, &fs_info->freed_extents[1],
+                           IO_TREE_FS_INFO_FREED_EXTENTS1, NULL);
        fs_info->pinned_extents = &fs_info->freed_extents[0];
        set_bit(BTRFS_FS_BARRIER, &fs_info->flags);
 
@@ -2776,8 +2785,6 @@ int open_ctree(struct super_block *sb,
        init_waitqueue_head(&fs_info->async_submit_wait);
        init_waitqueue_head(&fs_info->delayed_iputs_wait);
 
-       INIT_LIST_HEAD(&fs_info->pinned_chunks);
-
        /* Usable values until the real ones are cached from the superblock */
        fs_info->nodesize = 4096;
        fs_info->sectorsize = 4096;
@@ -3335,6 +3342,8 @@ fail_delalloc_bytes:
        percpu_counter_destroy(&fs_info->delalloc_bytes);
 fail_dirty_metadata_bytes:
        percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
+fail_dio_bytes:
+       percpu_counter_destroy(&fs_info->dio_bytes);
 fail_srcu:
        cleanup_srcu_struct(&fs_info->subvol_srcu);
 fail:
@@ -4016,6 +4025,10 @@ void close_ctree(struct btrfs_fs_info *fs_info)
                       percpu_counter_sum(&fs_info->delalloc_bytes));
        }
 
+       if (percpu_counter_sum(&fs_info->dio_bytes))
+               btrfs_info(fs_info, "at unmount dio bytes count %lld",
+                          percpu_counter_sum(&fs_info->dio_bytes));
+
        btrfs_sysfs_remove_mounted(fs_info);
        btrfs_sysfs_remove_fsid(fs_info->fs_devices);
 
@@ -4042,25 +4055,17 @@ void close_ctree(struct btrfs_fs_info *fs_info)
                btrfsic_unmount(fs_info->fs_devices);
 #endif
 
-       btrfs_close_devices(fs_info->fs_devices);
        btrfs_mapping_tree_free(&fs_info->mapping_tree);
+       btrfs_close_devices(fs_info->fs_devices);
 
        percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
        percpu_counter_destroy(&fs_info->delalloc_bytes);
+       percpu_counter_destroy(&fs_info->dio_bytes);
        percpu_counter_destroy(&fs_info->dev_replace.bio_counter);
        cleanup_srcu_struct(&fs_info->subvol_srcu);
 
        btrfs_free_stripe_hash_table(fs_info);
        btrfs_free_ref_cache(fs_info);
-
-       while (!list_empty(&fs_info->pinned_chunks)) {
-               struct extent_map *em;
-
-               em = list_first_entry(&fs_info->pinned_chunks,
-                                     struct extent_map, list);
-               list_del_init(&em->list);
-               free_extent_map(em);
-       }
 }
 
 int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
@@ -4114,7 +4119,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
         * So here we should only check item pointers, not item data.
         */
        if (btrfs_header_level(buf) == 0 &&
-           btrfs_check_leaf_relaxed(fs_info, buf)) {
+           btrfs_check_leaf_relaxed(buf)) {
                btrfs_print_leaf(buf);
                ASSERT(0);
        }
@@ -4157,10 +4162,7 @@ void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info)
 int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level,
                      struct btrfs_key *first_key)
 {
-       struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
-       struct btrfs_fs_info *fs_info = root->fs_info;
-
-       return btree_read_extent_buffer_pages(fs_info, buf, parent_transid,
+       return btree_read_extent_buffer_pages(buf, parent_transid,
                                              level, first_key);
 }
 
@@ -4484,10 +4486,17 @@ void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans,
 void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
                                   struct btrfs_fs_info *fs_info)
 {
+       struct btrfs_device *dev, *tmp;
+
        btrfs_cleanup_dirty_bgs(cur_trans, fs_info);
        ASSERT(list_empty(&cur_trans->dirty_bgs));
        ASSERT(list_empty(&cur_trans->io_bgs));
 
+       list_for_each_entry_safe(dev, tmp, &cur_trans->dev_update_list,
+                                post_commit_list) {
+               list_del_init(&dev->post_commit_list);
+       }
+
        btrfs_destroy_delayed_refs(cur_trans, fs_info);
 
        cur_trans->state = TRANS_STATE_COMMIT_START;
index 987a64b..a0161aa 100644 (file)
@@ -39,6 +39,8 @@ static inline u64 btrfs_sb_offset(int mirror)
 struct btrfs_device;
 struct btrfs_fs_devices;
 
+int btrfs_verify_level_key(struct extent_buffer *eb, int level,
+                          struct btrfs_key *first_key, u64 parent_transid);
 struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
                                      u64 parent_transid, int level,
                                      struct btrfs_key *first_key);
@@ -48,7 +50,7 @@ int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
 struct extent_buffer *btrfs_find_create_tree_block(
                                                struct btrfs_fs_info *fs_info,
                                                u64 bytenr);
-void clean_tree_block(struct btrfs_fs_info *fs_info, struct extent_buffer *buf);
+void btrfs_clean_tree_block(struct extent_buffer *buf);
 int open_ctree(struct super_block *sb,
               struct btrfs_fs_devices *fs_devices,
               char *options);
@@ -123,8 +125,6 @@ blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
                        extent_submit_bio_start_t *submit_bio_start);
 blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio,
                          int mirror_num);
-int btrfs_write_tree_block(struct extent_buffer *buf);
-void btrfs_wait_tree_block_writeback(struct extent_buffer *buf);
 int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
                             struct btrfs_fs_info *fs_info);
 int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
@@ -134,7 +134,6 @@ void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *trans,
 void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans,
                                  struct btrfs_fs_info *fs_info);
 struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
-                                    struct btrfs_fs_info *fs_info,
                                     u64 objectid);
 int btree_lock_page_hook(struct page *page, void *data,
                                void (*flush_fn)(void *));
index c588032..f79e477 100644 (file)
@@ -643,7 +643,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
 
        if (btrfs_test_opt(fs_info, SPACE_CACHE)) {
                mutex_lock(&caching_ctl->mutex);
-               ret = load_free_space_cache(fs_info, cache);
+               ret = load_free_space_cache(cache);
 
                spin_lock(&cache->lock);
                if (ret == 1) {
@@ -756,14 +756,15 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
        return NULL;
 }
 
-static void add_pinned_bytes(struct btrfs_fs_info *fs_info, s64 num_bytes,
-                            bool metadata, u64 root_objectid)
+static void add_pinned_bytes(struct btrfs_fs_info *fs_info,
+                            struct btrfs_ref *ref)
 {
        struct btrfs_space_info *space_info;
+       s64 num_bytes = -ref->len;
        u64 flags;
 
-       if (metadata) {
-               if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID)
+       if (ref->type == BTRFS_REF_METADATA) {
+               if (ref->tree_ref.root == BTRFS_CHUNK_TREE_OBJECTID)
                        flags = BTRFS_BLOCK_GROUP_SYSTEM;
                else
                        flags = BTRFS_BLOCK_GROUP_METADATA;
@@ -1704,7 +1705,7 @@ void setup_inline_extent_backref(struct btrfs_fs_info *fs_info,
        type = extent_ref_type(parent, owner);
        size = btrfs_extent_inline_ref_size(type);
 
-       btrfs_extend_item(fs_info, path, size);
+       btrfs_extend_item(path, size);
 
        ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
        refs = btrfs_extent_refs(leaf, ei);
@@ -1779,7 +1780,6 @@ void update_inline_extent_backref(struct btrfs_path *path,
                                  int *last_ref)
 {
        struct extent_buffer *leaf = path->nodes[0];
-       struct btrfs_fs_info *fs_info = leaf->fs_info;
        struct btrfs_extent_item *ei;
        struct btrfs_extent_data_ref *dref = NULL;
        struct btrfs_shared_data_ref *sref = NULL;
@@ -1834,7 +1834,7 @@ void update_inline_extent_backref(struct btrfs_path *path,
                        memmove_extent_buffer(leaf, ptr, ptr + size,
                                              end - ptr - size);
                item_size -= size;
-               btrfs_truncate_item(fs_info, path, item_size, 1);
+               btrfs_truncate_item(path, item_size, 1);
        }
        btrfs_mark_buffer_dirty(leaf);
 }
@@ -1905,7 +1905,6 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
        return ret;
 }
 
-#define in_range(b, first, len)        ((b) >= (first) && (b) < (first) + (len))
 static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
                               u64 *discarded_bytes)
 {
@@ -2043,39 +2042,28 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
 
 /* Can return -ENOMEM */
 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
-                        struct btrfs_root *root,
-                        u64 bytenr, u64 num_bytes, u64 parent,
-                        u64 root_objectid, u64 owner, u64 offset)
+                        struct btrfs_ref *generic_ref)
 {
-       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct btrfs_fs_info *fs_info = trans->fs_info;
        int old_ref_mod, new_ref_mod;
        int ret;
 
-       BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
-              root_objectid == BTRFS_TREE_LOG_OBJECTID);
+       ASSERT(generic_ref->type != BTRFS_REF_NOT_SET &&
+              generic_ref->action);
+       BUG_ON(generic_ref->type == BTRFS_REF_METADATA &&
+              generic_ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID);
 
-       btrfs_ref_tree_mod(root, bytenr, num_bytes, parent, root_objectid,
-                          owner, offset, BTRFS_ADD_DELAYED_REF);
-
-       if (owner < BTRFS_FIRST_FREE_OBJECTID) {
-               ret = btrfs_add_delayed_tree_ref(trans, bytenr,
-                                                num_bytes, parent,
-                                                root_objectid, (int)owner,
-                                                BTRFS_ADD_DELAYED_REF, NULL,
-                                                &old_ref_mod, &new_ref_mod);
-       } else {
-               ret = btrfs_add_delayed_data_ref(trans, bytenr,
-                                                num_bytes, parent,
-                                                root_objectid, owner, offset,
-                                                0, BTRFS_ADD_DELAYED_REF,
+       if (generic_ref->type == BTRFS_REF_METADATA)
+               ret = btrfs_add_delayed_tree_ref(trans, generic_ref,
+                               NULL, &old_ref_mod, &new_ref_mod);
+       else
+               ret = btrfs_add_delayed_data_ref(trans, generic_ref, 0,
                                                 &old_ref_mod, &new_ref_mod);
-       }
 
-       if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0) {
-               bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
+       btrfs_ref_tree_mod(fs_info, generic_ref);
 
-               add_pinned_bytes(fs_info, -num_bytes, metadata, root_objectid);
-       }
+       if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0)
+               add_pinned_bytes(fs_info, generic_ref);
 
        return ret;
 }
@@ -2877,97 +2865,6 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans)
        return btrfs_check_space_for_delayed_refs(trans->fs_info);
 }
 
-struct async_delayed_refs {
-       struct btrfs_root *root;
-       u64 transid;
-       int count;
-       int error;
-       int sync;
-       struct completion wait;
-       struct btrfs_work work;
-};
-
-static inline struct async_delayed_refs *
-to_async_delayed_refs(struct btrfs_work *work)
-{
-       return container_of(work, struct async_delayed_refs, work);
-}
-
-static void delayed_ref_async_start(struct btrfs_work *work)
-{
-       struct async_delayed_refs *async = to_async_delayed_refs(work);
-       struct btrfs_trans_handle *trans;
-       struct btrfs_fs_info *fs_info = async->root->fs_info;
-       int ret;
-
-       /* if the commit is already started, we don't need to wait here */
-       if (btrfs_transaction_blocked(fs_info))
-               goto done;
-
-       trans = btrfs_join_transaction(async->root);
-       if (IS_ERR(trans)) {
-               async->error = PTR_ERR(trans);
-               goto done;
-       }
-
-       /*
-        * trans->sync means that when we call end_transaction, we won't
-        * wait on delayed refs
-        */
-       trans->sync = true;
-
-       /* Don't bother flushing if we got into a different transaction */
-       if (trans->transid > async->transid)
-               goto end;
-
-       ret = btrfs_run_delayed_refs(trans, async->count);
-       if (ret)
-               async->error = ret;
-end:
-       ret = btrfs_end_transaction(trans);
-       if (ret && !async->error)
-               async->error = ret;
-done:
-       if (async->sync)
-               complete(&async->wait);
-       else
-               kfree(async);
-}
-
-int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info,
-                                unsigned long count, u64 transid, int wait)
-{
-       struct async_delayed_refs *async;
-       int ret;
-
-       async = kmalloc(sizeof(*async), GFP_NOFS);
-       if (!async)
-               return -ENOMEM;
-
-       async->root = fs_info->tree_root;
-       async->count = count;
-       async->error = 0;
-       async->transid = transid;
-       if (wait)
-               async->sync = 1;
-       else
-               async->sync = 0;
-       init_completion(&async->wait);
-
-       btrfs_init_work(&async->work, btrfs_extent_refs_helper,
-                       delayed_ref_async_start, NULL, NULL);
-
-       btrfs_queue_work(fs_info->extent_workers, &async->work);
-
-       if (wait) {
-               wait_for_completion(&async->wait);
-               ret = async->error;
-               kfree(async);
-               return ret;
-       }
-       return 0;
-}
-
 /*
  * this starts processing the delayed reference count updates and
  * extent insertions we have queued up so far.  count can be
@@ -3036,7 +2933,6 @@ out:
 }
 
 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
-                               struct btrfs_fs_info *fs_info,
                                u64 bytenr, u64 num_bytes, u64 flags,
                                int level, int is_data)
 {
@@ -3053,8 +2949,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
        extent_op->is_data = is_data ? true : false;
        extent_op->level = level;
 
-       ret = btrfs_add_delayed_extent_op(fs_info, trans, bytenr,
-                                         num_bytes, extent_op);
+       ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op);
        if (ret)
                btrfs_free_delayed_extent_op(extent_op);
        return ret;
@@ -3246,13 +3141,12 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
        u32 nritems;
        struct btrfs_key key;
        struct btrfs_file_extent_item *fi;
+       struct btrfs_ref generic_ref = { 0 };
+       bool for_reloc = btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC);
        int i;
+       int action;
        int level;
        int ret = 0;
-       int (*process_func)(struct btrfs_trans_handle *,
-                           struct btrfs_root *,
-                           u64, u64, u64, u64, u64, u64);
-
 
        if (btrfs_is_testing(fs_info))
                return 0;
@@ -3264,15 +3158,14 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
        if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0)
                return 0;
 
-       if (inc)
-               process_func = btrfs_inc_extent_ref;
-       else
-               process_func = btrfs_free_extent;
-
        if (full_backref)
                parent = buf->start;
        else
                parent = 0;
+       if (inc)
+               action = BTRFS_ADD_DELAYED_REF;
+       else
+               action = BTRFS_DROP_DELAYED_REF;
 
        for (i = 0; i < nritems; i++) {
                if (level == 0) {
@@ -3290,16 +3183,30 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
 
                        num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
                        key.offset -= btrfs_file_extent_offset(buf, fi);
-                       ret = process_func(trans, root, bytenr, num_bytes,
-                                          parent, ref_root, key.objectid,
-                                          key.offset);
+                       btrfs_init_generic_ref(&generic_ref, action, bytenr,
+                                              num_bytes, parent);
+                       generic_ref.real_root = root->root_key.objectid;
+                       btrfs_init_data_ref(&generic_ref, ref_root, key.objectid,
+                                           key.offset);
+                       generic_ref.skip_qgroup = for_reloc;
+                       if (inc)
+                               ret = btrfs_inc_extent_ref(trans, &generic_ref);
+                       else
+                               ret = btrfs_free_extent(trans, &generic_ref);
                        if (ret)
                                goto fail;
                } else {
                        bytenr = btrfs_node_blockptr(buf, i);
                        num_bytes = fs_info->nodesize;
-                       ret = process_func(trans, root, bytenr, num_bytes,
-                                          parent, ref_root, level - 1, 0);
+                       btrfs_init_generic_ref(&generic_ref, action, bytenr,
+                                              num_bytes, parent);
+                       generic_ref.real_root = root->root_key.objectid;
+                       btrfs_init_tree_ref(&generic_ref, level - 1, ref_root);
+                       generic_ref.skip_qgroup = for_reloc;
+                       if (inc)
+                               ret = btrfs_inc_extent_ref(trans, &generic_ref);
+                       else
+                               ret = btrfs_free_extent(trans, &generic_ref);
                        if (ret)
                                goto fail;
                }
@@ -3322,10 +3229,10 @@ int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 }
 
 static int write_one_cache_group(struct btrfs_trans_handle *trans,
-                                struct btrfs_fs_info *fs_info,
                                 struct btrfs_path *path,
                                 struct btrfs_block_group_cache *cache)
 {
+       struct btrfs_fs_info *fs_info = trans->fs_info;
        int ret;
        struct btrfs_root *extent_root = fs_info->extent_root;
        unsigned long bi;
@@ -3348,10 +3255,10 @@ fail:
 
 }
 
-static struct btrfs_block_group_cache *
-next_block_group(struct btrfs_fs_info *fs_info,
-                struct btrfs_block_group_cache *cache)
+static struct btrfs_block_group_cache *next_block_group(
+               struct btrfs_block_group_cache *cache)
 {
+       struct btrfs_fs_info *fs_info = cache->fs_info;
        struct rb_node *node;
 
        spin_lock(&fs_info->block_group_cache_lock);
@@ -3404,7 +3311,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
        if (trans->aborted)
                return 0;
 again:
-       inode = lookup_free_space_inode(fs_info, block_group, path);
+       inode = lookup_free_space_inode(block_group, path);
        if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
                ret = PTR_ERR(inode);
                btrfs_release_path(path);
@@ -3418,8 +3325,7 @@ again:
                if (block_group->ro)
                        goto out_free;
 
-               ret = create_free_space_inode(fs_info, trans, block_group,
-                                             path);
+               ret = create_free_space_inode(trans, block_group, path);
                if (ret)
                        goto out_free;
                goto again;
@@ -3538,9 +3444,9 @@ out:
        return ret;
 }
 
-int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
-                           struct btrfs_fs_info *fs_info)
+int btrfs_setup_space_cache(struct btrfs_trans_handle *trans)
 {
+       struct btrfs_fs_info *fs_info = trans->fs_info;
        struct btrfs_block_group_cache *cache, *tmp;
        struct btrfs_transaction *cur_trans = trans->transaction;
        struct btrfs_path *path;
@@ -3652,8 +3558,7 @@ again:
 
                if (cache->disk_cache_state == BTRFS_DC_SETUP) {
                        cache->io_ctl.inode = NULL;
-                       ret = btrfs_write_out_cache(fs_info, trans,
-                                                   cache, path);
+                       ret = btrfs_write_out_cache(trans, cache, path);
                        if (ret == 0 && cache->io_ctl.inode) {
                                num_started++;
                                should_put = 0;
@@ -3673,8 +3578,7 @@ again:
                        }
                }
                if (!ret) {
-                       ret = write_one_cache_group(trans, fs_info,
-                                                   path, cache);
+                       ret = write_one_cache_group(trans, path, cache);
                        /*
                         * Our block group might still be attached to the list
                         * of new block groups in the transaction handle of some
@@ -3744,9 +3648,9 @@ again:
        return ret;
 }
 
-int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
-                                  struct btrfs_fs_info *fs_info)
+int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
 {
+       struct btrfs_fs_info *fs_info = trans->fs_info;
        struct btrfs_block_group_cache *cache;
        struct btrfs_transaction *cur_trans = trans->transaction;
        int ret = 0;
@@ -3809,8 +3713,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
 
                if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) {
                        cache->io_ctl.inode = NULL;
-                       ret = btrfs_write_out_cache(fs_info, trans,
-                                                   cache, path);
+                       ret = btrfs_write_out_cache(trans, cache, path);
                        if (ret == 0 && cache->io_ctl.inode) {
                                num_started++;
                                should_put = 0;
@@ -3824,8 +3727,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                        }
                }
                if (!ret) {
-                       ret = write_one_cache_group(trans, fs_info,
-                                                   path, cache);
+                       ret = write_one_cache_group(trans, path, cache);
                        /*
                         * One of the free space endio workers might have
                         * created a new block group while updating a free space
@@ -3842,8 +3744,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                        if (ret == -ENOENT) {
                                wait_event(cur_trans->writer_wait,
                                   atomic_read(&cur_trans->num_writers) == 1);
-                               ret = write_one_cache_group(trans, fs_info,
-                                                           path, cache);
+                               ret = write_one_cache_group(trans, path, cache);
                        }
                        if (ret)
                                btrfs_abort_transaction(trans, ret);
@@ -4732,6 +4633,7 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
        struct btrfs_space_info *space_info;
        struct btrfs_trans_handle *trans;
        u64 delalloc_bytes;
+       u64 dio_bytes;
        u64 async_pages;
        u64 items;
        long time_left;
@@ -4747,7 +4649,8 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
 
        delalloc_bytes = percpu_counter_sum_positive(
                                                &fs_info->delalloc_bytes);
-       if (delalloc_bytes == 0) {
+       dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes);
+       if (delalloc_bytes == 0 && dio_bytes == 0) {
                if (trans)
                        return;
                if (wait_ordered)
@@ -4755,8 +4658,16 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
                return;
        }
 
+       /*
+        * If we are doing more ordered than delalloc we need to just wait on
+        * ordered extents, otherwise we'll waste time trying to flush delalloc
+        * that likely won't give us the space back we need.
+        */
+       if (dio_bytes > delalloc_bytes)
+               wait_ordered = true;
+
        loops = 0;
-       while (delalloc_bytes && loops < 3) {
+       while ((delalloc_bytes || dio_bytes) && loops < 3) {
                nr_pages = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT;
 
                /*
@@ -4806,6 +4717,7 @@ skip_async:
                }
                delalloc_bytes = percpu_counter_sum_positive(
                                                &fs_info->delalloc_bytes);
+               dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes);
        }
 }
 
@@ -5803,85 +5715,6 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
        return ret;
 }
 
-static void calc_refill_bytes(struct btrfs_block_rsv *block_rsv,
-                               u64 *metadata_bytes, u64 *qgroup_bytes)
-{
-       *metadata_bytes = 0;
-       *qgroup_bytes = 0;
-
-       spin_lock(&block_rsv->lock);
-       if (block_rsv->reserved < block_rsv->size)
-               *metadata_bytes = block_rsv->size - block_rsv->reserved;
-       if (block_rsv->qgroup_rsv_reserved < block_rsv->qgroup_rsv_size)
-               *qgroup_bytes = block_rsv->qgroup_rsv_size -
-                       block_rsv->qgroup_rsv_reserved;
-       spin_unlock(&block_rsv->lock);
-}
-
-/**
- * btrfs_inode_rsv_refill - refill the inode block rsv.
- * @inode - the inode we are refilling.
- * @flush - the flushing restriction.
- *
- * Essentially the same as btrfs_block_rsv_refill, except it uses the
- * block_rsv->size as the minimum size.  We'll either refill the missing amount
- * or return if we already have enough space.  This will also handle the reserve
- * tracepoint for the reserved amount.
- */
-static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
-                                 enum btrfs_reserve_flush_enum flush)
-{
-       struct btrfs_root *root = inode->root;
-       struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
-       u64 num_bytes, last = 0;
-       u64 qgroup_num_bytes;
-       int ret = -ENOSPC;
-
-       calc_refill_bytes(block_rsv, &num_bytes, &qgroup_num_bytes);
-       if (num_bytes == 0)
-               return 0;
-
-       do {
-               ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_num_bytes,
-                                                        true);
-               if (ret)
-                       return ret;
-               ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
-               if (ret) {
-                       btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
-                       last = num_bytes;
-                       /*
-                        * If we are fragmented we can end up with a lot of
-                        * outstanding extents which will make our size be much
-                        * larger than our reserved amount.
-                        *
-                        * If the reservation happens here, it might be very
-                        * big though not needed in the end, if the delalloc
-                        * flushing happens.
-                        *
-                        * If this is the case try and do the reserve again.
-                        */
-                       if (flush == BTRFS_RESERVE_FLUSH_ALL)
-                               calc_refill_bytes(block_rsv, &num_bytes,
-                                                  &qgroup_num_bytes);
-                       if (num_bytes == 0)
-                               return 0;
-               }
-       } while (ret && last != num_bytes);
-
-       if (!ret) {
-               block_rsv_add_bytes(block_rsv, num_bytes, false);
-               trace_btrfs_space_reservation(root->fs_info, "delalloc",
-                                             btrfs_ino(inode), num_bytes, 1);
-
-               /* Don't forget to increase qgroup_rsv_reserved */
-               spin_lock(&block_rsv->lock);
-               block_rsv->qgroup_rsv_reserved += qgroup_num_bytes;
-               spin_unlock(&block_rsv->lock);
-       }
-       return ret;
-}
-
 static u64 __btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
                                     struct btrfs_block_rsv *block_rsv,
                                     u64 num_bytes, u64 *qgroup_to_release)
@@ -6182,9 +6015,25 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
        spin_unlock(&block_rsv->lock);
 }
 
+static void calc_inode_reservations(struct btrfs_fs_info *fs_info,
+                                   u64 num_bytes, u64 *meta_reserve,
+                                   u64 *qgroup_reserve)
+{
+       u64 nr_extents = count_max_extents(num_bytes);
+       u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, num_bytes);
+
+       /* We add one for the inode update at finish ordered time */
+       *meta_reserve = btrfs_calc_trans_metadata_size(fs_info,
+                                               nr_extents + csum_leaves + 1);
+       *qgroup_reserve = nr_extents * fs_info->nodesize;
+}
+
 int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
 {
-       struct btrfs_fs_info *fs_info = inode->root->fs_info;
+       struct btrfs_root *root = inode->root;
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
+       u64 meta_reserve, qgroup_reserve;
        unsigned nr_extents;
        enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
        int ret = 0;
@@ -6214,7 +6063,31 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
 
        num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
 
-       /* Add our new extents and calculate the new rsv size. */
+       /*
+        * We always want to do it this way, every other way is wrong and ends
+        * in tears.  Pre-reserving the amount we are going to add will always
+        * be the right way, because otherwise if we have enough parallelism we
+        * could end up with thousands of inodes all holding little bits of
+        * reservations they were able to make previously and the only way to
+        * reclaim that space is to ENOSPC out the operations and clear
+        * everything out and try again, which is bad.  This way we just
+        * over-reserve slightly, and clean up the mess when we are done.
+        */
+       calc_inode_reservations(fs_info, num_bytes, &meta_reserve,
+                               &qgroup_reserve);
+       ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true);
+       if (ret)
+               goto out_fail;
+       ret = reserve_metadata_bytes(root, block_rsv, meta_reserve, flush);
+       if (ret)
+               goto out_qgroup;
+
+       /*
+        * Now we need to update our outstanding extents and csum bytes _first_
+        * and then add the reservation to the block_rsv.  This keeps us from
+        * racing with an ordered completion or some such that would think it
+        * needs to free the reservation we just made.
+        */
        spin_lock(&inode->lock);
        nr_extents = count_max_extents(num_bytes);
        btrfs_mod_outstanding_extents(inode, nr_extents);
@@ -6222,22 +6095,21 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
        btrfs_calculate_inode_block_rsv_size(fs_info, inode);
        spin_unlock(&inode->lock);
 
-       ret = btrfs_inode_rsv_refill(inode, flush);
-       if (unlikely(ret))
-               goto out_fail;
+       /* Now we can safely add our space to our block rsv */
+       block_rsv_add_bytes(block_rsv, meta_reserve, false);
+       trace_btrfs_space_reservation(root->fs_info, "delalloc",
+                                     btrfs_ino(inode), meta_reserve, 1);
+
+       spin_lock(&block_rsv->lock);
+       block_rsv->qgroup_rsv_reserved += qgroup_reserve;
+       spin_unlock(&block_rsv->lock);
 
        if (delalloc_lock)
                mutex_unlock(&inode->delalloc_mutex);
        return 0;
-
+out_qgroup:
+       btrfs_qgroup_free_meta_prealloc(root, qgroup_reserve);
 out_fail:
-       spin_lock(&inode->lock);
-       nr_extents = count_max_extents(num_bytes);
-       btrfs_mod_outstanding_extents(inode, -nr_extents);
-       inode->csum_bytes -= num_bytes;
-       btrfs_calculate_inode_block_rsv_size(fs_info, inode);
-       spin_unlock(&inode->lock);
-
        btrfs_inode_rsv_release(inode, true);
        if (delalloc_lock)
                mutex_unlock(&inode->delalloc_mutex);
@@ -6361,9 +6233,9 @@ void btrfs_delalloc_release_space(struct inode *inode,
 }
 
 static int update_block_group(struct btrfs_trans_handle *trans,
-                             struct btrfs_fs_info *info, u64 bytenr,
-                             u64 num_bytes, int alloc)
+                             u64 bytenr, u64 num_bytes, int alloc)
 {
+       struct btrfs_fs_info *info = trans->fs_info;
        struct btrfs_block_group_cache *cache = NULL;
        u64 total = num_bytes;
        u64 old_val;
@@ -6444,7 +6316,6 @@ static int update_block_group(struct btrfs_trans_handle *trans,
                if (list_empty(&cache->dirty_list)) {
                        list_add_tail(&cache->dirty_list,
                                      &trans->transaction->dirty_bgs);
-                       trans->transaction->num_dirty_bgs++;
                        trans->delayed_ref_updates++;
                        btrfs_get_block_group(cache);
                }
@@ -6491,10 +6362,11 @@ static u64 first_logical_byte(struct btrfs_fs_info *fs_info, u64 search_start)
        return bytenr;
 }
 
-static int pin_down_extent(struct btrfs_fs_info *fs_info,
-                          struct btrfs_block_group_cache *cache,
+static int pin_down_extent(struct btrfs_block_group_cache *cache,
                           u64 bytenr, u64 num_bytes, int reserved)
 {
+       struct btrfs_fs_info *fs_info = cache->fs_info;
+
        spin_lock(&cache->space_info->lock);
        spin_lock(&cache->lock);
        cache->pinned += num_bytes;
@@ -6526,7 +6398,7 @@ int btrfs_pin_extent(struct btrfs_fs_info *fs_info,
        cache = btrfs_lookup_block_group(fs_info, bytenr);
        BUG_ON(!cache); /* Logic error */
 
-       pin_down_extent(fs_info, cache, bytenr, num_bytes, reserved);
+       pin_down_extent(cache, bytenr, num_bytes, reserved);
 
        btrfs_put_block_group(cache);
        return 0;
@@ -6553,7 +6425,7 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info,
         */
        cache_block_group(cache, 1);
 
-       pin_down_extent(fs_info, cache, bytenr, num_bytes, 0);
+       pin_down_extent(cache, bytenr, num_bytes, 0);
 
        /* remove us from the free space cache (if we're there at all) */
        ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
@@ -6607,9 +6479,9 @@ out_lock:
        return ret;
 }
 
-int btrfs_exclude_logged_extents(struct btrfs_fs_info *fs_info,
-                                struct extent_buffer *eb)
+int btrfs_exclude_logged_extents(struct extent_buffer *eb)
 {
+       struct btrfs_fs_info *fs_info = eb->fs_info;
        struct btrfs_file_extent_item *item;
        struct btrfs_key key;
        int found_type;
@@ -7198,7 +7070,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                        goto out;
                }
 
-               ret = update_block_group(trans, info, bytenr, num_bytes, 0);
+               ret = update_block_group(trans, bytenr, num_bytes, 0);
                if (ret) {
                        btrfs_abort_transaction(trans, ret);
                        goto out;
@@ -7272,21 +7144,20 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
                           u64 parent, int last_ref)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
+       struct btrfs_ref generic_ref = { 0 };
        int pin = 1;
        int ret;
 
+       btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
+                              buf->start, buf->len, parent);
+       btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf),
+                           root->root_key.objectid);
+
        if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
                int old_ref_mod, new_ref_mod;
 
-               btrfs_ref_tree_mod(root, buf->start, buf->len, parent,
-                                  root->root_key.objectid,
-                                  btrfs_header_level(buf), 0,
-                                  BTRFS_DROP_DELAYED_REF);
-               ret = btrfs_add_delayed_tree_ref(trans, buf->start,
-                                                buf->len, parent,
-                                                root->root_key.objectid,
-                                                btrfs_header_level(buf),
-                                                BTRFS_DROP_DELAYED_REF, NULL,
+               btrfs_ref_tree_mod(fs_info, &generic_ref);
+               ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL,
                                                 &old_ref_mod, &new_ref_mod);
                BUG_ON(ret); /* -ENOMEM */
                pin = old_ref_mod >= 0 && new_ref_mod < 0;
@@ -7305,8 +7176,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
                cache = btrfs_lookup_block_group(fs_info, buf->start);
 
                if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
-                       pin_down_extent(fs_info, cache, buf->start,
-                                       buf->len, 1);
+                       pin_down_extent(cache, buf->start, buf->len, 1);
                        btrfs_put_block_group(cache);
                        goto out;
                }
@@ -7320,8 +7190,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
        }
 out:
        if (pin)
-               add_pinned_bytes(fs_info, buf->len, true,
-                                root->root_key.objectid);
+               add_pinned_bytes(fs_info, &generic_ref);
 
        if (last_ref) {
                /*
@@ -7333,52 +7202,43 @@ out:
 }
 
 /* Can return -ENOMEM */
-int btrfs_free_extent(struct btrfs_trans_handle *trans,
-                     struct btrfs_root *root,
-                     u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
-                     u64 owner, u64 offset)
+int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
 {
-       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct btrfs_fs_info *fs_info = trans->fs_info;
        int old_ref_mod, new_ref_mod;
        int ret;
 
        if (btrfs_is_testing(fs_info))
                return 0;
 
-       if (root_objectid != BTRFS_TREE_LOG_OBJECTID)
-               btrfs_ref_tree_mod(root, bytenr, num_bytes, parent,
-                                  root_objectid, owner, offset,
-                                  BTRFS_DROP_DELAYED_REF);
-
        /*
         * tree log blocks never actually go into the extent allocation
         * tree, just update pinning info and exit early.
         */
-       if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
-               WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
+       if ((ref->type == BTRFS_REF_METADATA &&
+            ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) ||
+           (ref->type == BTRFS_REF_DATA &&
+            ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)) {
                /* unlocks the pinned mutex */
-               btrfs_pin_extent(fs_info, bytenr, num_bytes, 1);
+               btrfs_pin_extent(fs_info, ref->bytenr, ref->len, 1);
                old_ref_mod = new_ref_mod = 0;
                ret = 0;
-       } else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
-               ret = btrfs_add_delayed_tree_ref(trans, bytenr,
-                                                num_bytes, parent,
-                                                root_objectid, (int)owner,
-                                                BTRFS_DROP_DELAYED_REF, NULL,
+       } else if (ref->type == BTRFS_REF_METADATA) {
+               ret = btrfs_add_delayed_tree_ref(trans, ref, NULL,
                                                 &old_ref_mod, &new_ref_mod);
        } else {
-               ret = btrfs_add_delayed_data_ref(trans, bytenr,
-                                                num_bytes, parent,
-                                                root_objectid, owner, offset,
-                                                0, BTRFS_DROP_DELAYED_REF,
+               ret = btrfs_add_delayed_data_ref(trans, ref, 0,
                                                 &old_ref_mod, &new_ref_mod);
        }
 
-       if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0) {
-               bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
+       if (!((ref->type == BTRFS_REF_METADATA &&
+              ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) ||
+             (ref->type == BTRFS_REF_DATA &&
+              ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)))
+               btrfs_ref_tree_mod(fs_info, ref);
 
-               add_pinned_bytes(fs_info, num_bytes, metadata, root_objectid);
-       }
+       if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0)
+               add_pinned_bytes(fs_info, ref);
 
        return ret;
 }
@@ -7569,7 +7429,6 @@ static int find_free_extent_clustered(struct btrfs_block_group_cache *bg,
                struct find_free_extent_ctl *ffe_ctl,
                struct btrfs_block_group_cache **cluster_bg_ret)
 {
-       struct btrfs_fs_info *fs_info = bg->fs_info;
        struct btrfs_block_group_cache *cluster_bg;
        u64 aligned_cluster;
        u64 offset;
@@ -7629,9 +7488,8 @@ refill_cluster:
        aligned_cluster = max_t(u64,
                        ffe_ctl->empty_cluster + ffe_ctl->empty_size,
                        bg->full_stripe_len);
-       ret = btrfs_find_space_cluster(fs_info, bg, last_ptr,
-                       ffe_ctl->search_start, ffe_ctl->num_bytes,
-                       aligned_cluster);
+       ret = btrfs_find_space_cluster(bg, last_ptr, ffe_ctl->search_start,
+                       ffe_ctl->num_bytes, aligned_cluster);
        if (ret == 0) {
                /* Now pull our allocation out of this cluster */
                offset = btrfs_alloc_from_cluster(bg, last_ptr,
@@ -8281,7 +8139,7 @@ static int __btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
        }
 
        if (pin)
-               pin_down_extent(fs_info, cache, start, len, 1);
+               pin_down_extent(cache, start, len, 1);
        else {
                if (btrfs_test_opt(fs_info, DISCARD))
                        ret = btrfs_discard_extent(fs_info, start, len, NULL);
@@ -8370,7 +8228,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
        if (ret)
                return ret;
 
-       ret = update_block_group(trans, fs_info, ins->objectid, ins->offset, 1);
+       ret = update_block_group(trans, ins->objectid, ins->offset, 1);
        if (ret) { /* -ENOENT, logic error */
                btrfs_err(fs_info, "update block group failed for %llu %llu",
                        ins->objectid, ins->offset);
@@ -8460,7 +8318,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
        if (ret)
                return ret;
 
-       ret = update_block_group(trans, fs_info, extent_key.objectid,
+       ret = update_block_group(trans, extent_key.objectid,
                                 fs_info->nodesize, 1);
        if (ret) { /* -ENOENT, logic error */
                btrfs_err(fs_info, "update block group failed for %llu %llu",
@@ -8478,19 +8336,17 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
                                     u64 offset, u64 ram_bytes,
                                     struct btrfs_key *ins)
 {
+       struct btrfs_ref generic_ref = { 0 };
        int ret;
 
        BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
 
-       btrfs_ref_tree_mod(root, ins->objectid, ins->offset, 0,
-                          root->root_key.objectid, owner, offset,
-                          BTRFS_ADD_DELAYED_EXTENT);
-
-       ret = btrfs_add_delayed_data_ref(trans, ins->objectid,
-                                        ins->offset, 0,
-                                        root->root_key.objectid, owner,
-                                        offset, ram_bytes,
-                                        BTRFS_ADD_DELAYED_EXTENT, NULL, NULL);
+       btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
+                              ins->objectid, ins->offset, 0);
+       btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner, offset);
+       btrfs_ref_tree_mod(root->fs_info, &generic_ref);
+       ret = btrfs_add_delayed_data_ref(trans, &generic_ref,
+                                        ram_bytes, NULL, NULL);
        return ret;
 }
 
@@ -8563,7 +8419,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 
        btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
        btrfs_tree_lock(buf);
-       clean_tree_block(fs_info, buf);
+       btrfs_clean_tree_block(buf);
        clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
 
        btrfs_set_lock_blocking_write(buf);
@@ -8682,6 +8538,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
        struct btrfs_block_rsv *block_rsv;
        struct extent_buffer *buf;
        struct btrfs_delayed_extent_op *extent_op;
+       struct btrfs_ref generic_ref = { 0 };
        u64 flags = 0;
        int ret;
        u32 blocksize = fs_info->nodesize;
@@ -8736,13 +8593,12 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
                extent_op->is_data = false;
                extent_op->level = level;
 
-               btrfs_ref_tree_mod(root, ins.objectid, ins.offset, parent,
-                                  root_objectid, level, 0,
-                                  BTRFS_ADD_DELAYED_EXTENT);
-               ret = btrfs_add_delayed_tree_ref(trans, ins.objectid,
-                                                ins.offset, parent,
-                                                root_objectid, level,
-                                                BTRFS_ADD_DELAYED_EXTENT,
+               btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
+                                      ins.objectid, ins.offset, parent);
+               generic_ref.real_root = root->root_key.objectid;
+               btrfs_init_tree_ref(&generic_ref, level, root_objectid);
+               btrfs_ref_tree_mod(fs_info, &generic_ref);
+               ret = btrfs_add_delayed_tree_ref(trans, &generic_ref,
                                                 extent_op, NULL, NULL);
                if (ret)
                        goto out_free_delayed;
@@ -8918,7 +8774,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
                BUG_ON(ret); /* -ENOMEM */
                ret = btrfs_dec_ref(trans, root, eb, 0);
                BUG_ON(ret); /* -ENOMEM */
-               ret = btrfs_set_disk_extent_flags(trans, fs_info, eb->start,
+               ret = btrfs_set_disk_extent_flags(trans, eb->start,
                                                  eb->len, flag,
                                                  btrfs_header_level(eb), 0);
                BUG_ON(ret); /* -ENOMEM */
@@ -8987,6 +8843,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
        u64 parent;
        struct btrfs_key key;
        struct btrfs_key first_key;
+       struct btrfs_ref ref = { 0 };
        struct extent_buffer *next;
        int level = wc->level;
        int reada = 0;
@@ -9159,9 +9016,10 @@ skip:
                wc->drop_level = level;
                find_next_key(path, level, &wc->drop_progress);
 
-               ret = btrfs_free_extent(trans, root, bytenr, fs_info->nodesize,
-                                       parent, root->root_key.objectid,
-                                       level - 1, 0);
+               btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
+                                      fs_info->nodesize, parent);
+               btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid);
+               ret = btrfs_free_extent(trans, &ref);
                if (ret)
                        goto out_unlock;
        }
@@ -9251,21 +9109,23 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
                        else
                                ret = btrfs_dec_ref(trans, root, eb, 0);
                        BUG_ON(ret); /* -ENOMEM */
-                       ret = btrfs_qgroup_trace_leaf_items(trans, eb);
-                       if (ret) {
-                               btrfs_err_rl(fs_info,
-                                            "error %d accounting leaf items. Quota is out of sync, rescan required.",
+                       if (is_fstree(root->root_key.objectid)) {
+                               ret = btrfs_qgroup_trace_leaf_items(trans, eb);
+                               if (ret) {
+                                       btrfs_err_rl(fs_info,
+       "error %d accounting leaf items, quota is out of sync, rescan required",
                                             ret);
+                               }
                        }
                }
-               /* make block locked assertion in clean_tree_block happy */
+               /* make block locked assertion in btrfs_clean_tree_block happy */
                if (!path->locks[level] &&
                    btrfs_header_generation(eb) == trans->transid) {
                        btrfs_tree_lock(eb);
                        btrfs_set_lock_blocking_write(eb);
                        path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
                }
-               clean_tree_block(fs_info, eb);
+               btrfs_clean_tree_block(eb);
        }
 
        if (eb == root->node) {
@@ -9921,12 +9781,10 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache)
  */
 int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
 {
-       struct btrfs_root *root = fs_info->extent_root;
        struct btrfs_block_group_cache *block_group;
        struct btrfs_space_info *space_info;
        struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
        struct btrfs_device *device;
-       struct btrfs_trans_handle *trans;
        u64 min_free;
        u64 dev_min = 1;
        u64 dev_nr = 0;
@@ -10025,13 +9883,6 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
                min_free = div64_u64(min_free, dev_min);
        }
 
-       /* We need to do this so that we can look at pending chunks */
-       trans = btrfs_join_transaction(root);
-       if (IS_ERR(trans)) {
-               ret = PTR_ERR(trans);
-               goto out;
-       }
-
        mutex_lock(&fs_info->chunk_mutex);
        list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
                u64 dev_offset;
@@ -10042,7 +9893,7 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
                 */
                if (device->total_bytes > device->bytes_used + min_free &&
                    !test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
-                       ret = find_free_dev_extent(trans, device, min_free,
+                       ret = find_free_dev_extent(device, min_free,
                                                   &dev_offset, NULL);
                        if (!ret)
                                dev_nr++;
@@ -10058,7 +9909,6 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
                           "no space to allocate a new chunk for block group %llu",
                           block_group->key.objectid);
        mutex_unlock(&fs_info->chunk_mutex);
-       btrfs_end_transaction(trans);
 out:
        btrfs_put_block_group(block_group);
        return ret;
@@ -10159,7 +10009,7 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
                        if (block_group->iref)
                                break;
                        spin_unlock(&block_group->lock);
-                       block_group = next_block_group(info, block_group);
+                       block_group = next_block_group(block_group);
                }
                if (!block_group) {
                        if (last == 0)
@@ -10660,7 +10510,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
        struct btrfs_block_group_cache *cache;
        int ret;
 
-       btrfs_set_log_full_commit(fs_info, trans);
+       btrfs_set_log_full_commit(trans);
 
        cache = btrfs_create_block_group_cache(fs_info, chunk_offset, size);
        if (!cache)
@@ -10808,7 +10658,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
         * get the inode first so any iput calls done for the io_list
         * aren't the final iput (no unlinks allowed now)
         */
-       inode = lookup_free_space_inode(fs_info, block_group, path);
+       inode = lookup_free_space_inode(block_group, path);
 
        mutex_lock(&trans->transaction->cache_write_mutex);
        /*
@@ -10952,10 +10802,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
        memcpy(&key, &block_group->key, sizeof(key));
 
        mutex_lock(&fs_info->chunk_mutex);
-       if (!list_empty(&em->list)) {
-               /* We're in the transaction->pending_chunks list. */
-               free_extent_map(em);
-       }
        spin_lock(&block_group->lock);
        block_group->removed = 1;
        /*
@@ -10982,25 +10828,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
         * the transaction commit has completed.
         */
        remove_em = (atomic_read(&block_group->trimming) == 0);
-       /*
-        * Make sure a trimmer task always sees the em in the pinned_chunks list
-        * if it sees block_group->removed == 1 (needs to lock block_group->lock
-        * before checking block_group->removed).
-        */
-       if (!remove_em) {
-               /*
-                * Our em might be in trans->transaction->pending_chunks which
-                * is protected by fs_info->chunk_mutex ([lock|unlock]_chunks),
-                * and so is the fs_info->pinned_chunks list.
-                *
-                * So at this point we must be holding the chunk_mutex to avoid
-                * any races with chunk allocation (more specifically at
-                * volumes.c:contains_pending_extent()), to ensure it always
-                * sees the em, either in the pending_chunks list or in the
-                * pinned_chunks list.
-                */
-               list_move_tail(&em->list, &fs_info->pinned_chunks);
-       }
        spin_unlock(&block_group->lock);
 
        if (remove_em) {
@@ -11008,11 +10835,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 
                em_tree = &fs_info->mapping_tree.map_tree;
                write_lock(&em_tree->lock);
-               /*
-                * The em might be in the pending_chunks list, so make sure the
-                * chunk mutex is locked, since remove_extent_mapping() will
-                * delete us from that list.
-                */
                remove_extent_mapping(em_tree, em);
                write_unlock(&em_tree->lock);
                /* once for the tree */
@@ -11315,11 +11137,12 @@ int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
  * held back allocations.
  */
 static int btrfs_trim_free_extents(struct btrfs_device *device,
-                                  u64 minlen, u64 *trimmed)
+                                  struct fstrim_range *range, u64 *trimmed)
 {
-       u64 start = 0, len = 0;
+       u64 start, len = 0, end = 0;
        int ret;
 
+       start = max_t(u64, range->start, SZ_1M);
        *trimmed = 0;
 
        /* Discard not supported = nothing to do. */
@@ -11338,43 +11161,52 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
 
        while (1) {
                struct btrfs_fs_info *fs_info = device->fs_info;
-               struct btrfs_transaction *trans;
                u64 bytes;
 
                ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
                if (ret)
                        break;
 
-               ret = down_read_killable(&fs_info->commit_root_sem);
-               if (ret) {
+               find_first_clear_extent_bit(&device->alloc_state, start,
+                                           &start, &end,
+                                           CHUNK_TRIMMED | CHUNK_ALLOCATED);
+               /*
+                * If find_first_clear_extent_bit find a range that spans the
+                * end of the device it will set end to -1, in this case it's up
+                * to the caller to trim the value to the size of the device.
+                */
+               end = min(end, device->total_bytes - 1);
+               len = end - start + 1;
+
+               /* We didn't find any extents */
+               if (!len) {
                        mutex_unlock(&fs_info->chunk_mutex);
+                       ret = 0;
                        break;
                }
 
-               spin_lock(&fs_info->trans_lock);
-               trans = fs_info->running_transaction;
-               if (trans)
-                       refcount_inc(&trans->use_count);
-               spin_unlock(&fs_info->trans_lock);
-
-               if (!trans)
-                       up_read(&fs_info->commit_root_sem);
-
-               ret = find_free_dev_extent_start(trans, device, minlen, start,
-                                                &start, &len);
-               if (trans) {
-                       up_read(&fs_info->commit_root_sem);
-                       btrfs_put_transaction(trans);
+               /* Keep going until we satisfy minlen or reach end of space */
+               if (len < range->minlen) {
+                       mutex_unlock(&fs_info->chunk_mutex);
+                       start += len;
+                       continue;
                }
 
-               if (ret) {
+               /* If we are out of the passed range break */
+               if (start > range->start + range->len - 1) {
                        mutex_unlock(&fs_info->chunk_mutex);
-                       if (ret == -ENOSPC)
-                               ret = 0;
                        break;
                }
 
-               ret = btrfs_issue_discard(device->bdev, start, len, &bytes);
+               start = max(range->start, start);
+               len = min(range->len, len);
+
+               ret = btrfs_issue_discard(device->bdev, start, len,
+                                         &bytes);
+               if (!ret)
+                       set_extent_bits(&device->alloc_state, start,
+                                       start + bytes - 1,
+                                       CHUNK_TRIMMED);
                mutex_unlock(&fs_info->chunk_mutex);
 
                if (ret)
@@ -11383,6 +11215,10 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
                start += len;
                *trimmed += bytes;
 
+               /* We've trimmed enough */
+               if (*trimmed >= range->len)
+                       break;
+
                if (fatal_signal_pending(current)) {
                        ret = -ERESTARTSYS;
                        break;
@@ -11419,7 +11255,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
        int ret = 0;
 
        cache = btrfs_lookup_first_block_group(fs_info, range->start);
-       for (; cache; cache = next_block_group(fs_info, cache)) {
+       for (; cache; cache = next_block_group(cache)) {
                if (cache->key.objectid >= (range->start + range->len)) {
                        btrfs_put_block_group(cache);
                        break;
@@ -11466,8 +11302,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
        mutex_lock(&fs_info->fs_devices->device_list_mutex);
        devices = &fs_info->fs_devices->devices;
        list_for_each_entry(device, devices, dev_list) {
-               ret = btrfs_trim_free_extents(device, range->minlen,
-                                             &group_trimmed);
+               ret = btrfs_trim_free_extents(device, range, &group_trimmed);
                if (ret) {
                        dev_failed++;
                        dev_ret = ret;
index ca8b8e7..13fca7b 100644 (file)
@@ -109,8 +109,6 @@ static inline void __btrfs_debug_check_extent_io_range(const char *caller,
 #define btrfs_debug_check_extent_io_range(c, s, e)     do {} while (0)
 #endif
 
-#define BUFFER_LRU_MAX 64
-
 struct tree_entry {
        u64 start;
        u64 end;
@@ -151,34 +149,51 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
                                       unsigned long bio_flags)
 {
        blk_status_t ret = 0;
-       struct bio_vec *bvec = bio_last_bvec_all(bio);
-       struct bio_vec bv;
        struct extent_io_tree *tree = bio->bi_private;
-       u64 start;
-
-       mp_bvec_last_segment(bvec, &bv);
-       start = page_offset(bv.bv_page) + bv.bv_offset;
 
        bio->bi_private = NULL;
 
        if (tree->ops)
                ret = tree->ops->submit_bio_hook(tree->private_data, bio,
-                                          mirror_num, bio_flags, start);
+                                                mirror_num, bio_flags);
        else
                btrfsic_submit_bio(bio);
 
        return blk_status_to_errno(ret);
 }
 
-static void flush_write_bio(struct extent_page_data *epd)
+/* Cleanup unsubmitted bios */
+static void end_write_bio(struct extent_page_data *epd, int ret)
 {
        if (epd->bio) {
-               int ret;
+               epd->bio->bi_status = errno_to_blk_status(ret);
+               bio_endio(epd->bio);
+               epd->bio = NULL;
+       }
+}
 
+/*
+ * Submit bio from extent page data via submit_one_bio
+ *
+ * Return 0 if everything is OK.
+ * Return <0 for error.
+ */
+static int __must_check flush_write_bio(struct extent_page_data *epd)
+{
+       int ret = 0;
+
+       if (epd->bio) {
                ret = submit_one_bio(epd->bio, 0, 0);
-               BUG_ON(ret < 0); /* -ENOMEM */
+               /*
+                * Clean up of epd->bio is handled by its endio function.
+                * And endio is either triggered by successful bio execution
+                * or the error handler of submit bio hook.
+                * So at this point, no matter what happened, we don't need
+                * to clean up epd->bio.
+                */
                epd->bio = NULL;
        }
+       return ret;
 }
 
 int __init extent_io_init(void)
@@ -232,14 +247,46 @@ void __cold extent_io_exit(void)
        bioset_exit(&btrfs_bioset);
 }
 
-void extent_io_tree_init(struct extent_io_tree *tree,
+void extent_io_tree_init(struct btrfs_fs_info *fs_info,
+                        struct extent_io_tree *tree, unsigned int owner,
                         void *private_data)
 {
+       tree->fs_info = fs_info;
        tree->state = RB_ROOT;
        tree->ops = NULL;
        tree->dirty_bytes = 0;
        spin_lock_init(&tree->lock);
        tree->private_data = private_data;
+       tree->owner = owner;
+}
+
+void extent_io_tree_release(struct extent_io_tree *tree)
+{
+       spin_lock(&tree->lock);
+       /*
+        * Do a single barrier for the waitqueue_active check here, the state
+        * of the waitqueue should not change once extent_io_tree_release is
+        * called.
+        */
+       smp_mb();
+       while (!RB_EMPTY_ROOT(&tree->state)) {
+               struct rb_node *node;
+               struct extent_state *state;
+
+               node = rb_first(&tree->state);
+               state = rb_entry(node, struct extent_state, rb_node);
+               rb_erase(&state->rb_node, &tree->state);
+               RB_CLEAR_NODE(&state->rb_node);
+               /*
+                * btree io trees aren't supposed to have tasks waiting for
+                * changes in the flags of extent states ever.
+                */
+               ASSERT(!waitqueue_active(&state->wq));
+               free_extent_state(state);
+
+               cond_resched_lock(&tree->lock);
+       }
+       spin_unlock(&tree->lock);
 }
 
 static struct extent_state *alloc_extent_state(gfp_t mask)
@@ -400,7 +447,7 @@ static void merge_state(struct extent_io_tree *tree,
        struct extent_state *other;
        struct rb_node *other_node;
 
-       if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY))
+       if (state->state & (EXTENT_LOCKED | EXTENT_BOUNDARY))
                return;
 
        other_node = rb_prev(&state->rb_node);
@@ -611,6 +658,7 @@ int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
        int clear = 0;
 
        btrfs_debug_check_extent_io_range(tree, start, end);
+       trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits);
 
        if (bits & EXTENT_DELALLOC)
                bits |= EXTENT_NORESERVE;
@@ -618,7 +666,7 @@ int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
        if (delete)
                bits |= ~EXTENT_CTLBITS;
 
-       if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY))
+       if (bits & (EXTENT_LOCKED | EXTENT_BOUNDARY))
                clear = 1;
 again:
        if (!prealloc && gfpflags_allow_blocking(mask)) {
@@ -850,7 +898,7 @@ static void cache_state(struct extent_state *state,
                        struct extent_state **cached_ptr)
 {
        return cache_state_if_flags(state, cached_ptr,
-                                   EXTENT_IOBITS | EXTENT_BOUNDARY);
+                                   EXTENT_LOCKED | EXTENT_BOUNDARY);
 }
 
 /*
@@ -880,6 +928,7 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
        u64 last_end;
 
        btrfs_debug_check_extent_io_range(tree, start, end);
+       trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits);
 
 again:
        if (!prealloc && gfpflags_allow_blocking(mask)) {
@@ -1112,6 +1161,8 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
        bool first_iteration = true;
 
        btrfs_debug_check_extent_io_range(tree, start, end);
+       trace_btrfs_convert_extent_bit(tree, start, end - start + 1, bits,
+                                      clear_bits);
 
 again:
        if (!prealloc) {
@@ -1311,6 +1362,13 @@ int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
                                changeset);
 }
 
+int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start, u64 end,
+                          unsigned bits)
+{
+       return __set_extent_bit(tree, start, end, bits, 0, NULL, NULL,
+                               GFP_NOWAIT, NULL);
+}
+
 int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
                     unsigned bits, int wake, int delete,
                     struct extent_state **cached)
@@ -1478,6 +1536,79 @@ out:
        return ret;
 }
 
+/**
+ * find_first_clear_extent_bit - finds the first range that has @bits not set
+ * and that starts after @start
+ *
+ * @tree - the tree to search
+ * @start - the offset at/after which the found extent should start
+ * @start_ret - records the beginning of the range
+ * @end_ret - records the end of the range (inclusive)
+ * @bits - the set of bits which must be unset
+ *
+ * Since unallocated range is also considered one which doesn't have the bits
+ * set it's possible that @end_ret contains -1, this happens in case the range
+ * spans (last_range_end, end of device]. In this case it's up to the caller to
+ * trim @end_ret to the appropriate size.
+ */
+void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
+                                u64 *start_ret, u64 *end_ret, unsigned bits)
+{
+       struct extent_state *state;
+       struct rb_node *node, *prev = NULL, *next;
+
+       spin_lock(&tree->lock);
+
+       /* Find first extent with bits cleared */
+       while (1) {
+               node = __etree_search(tree, start, &next, &prev, NULL, NULL);
+               if (!node) {
+                       node = next;
+                       if (!node) {
+                               /*
+                                * We are past the last allocated chunk,
+                                * set start at the end of the last extent. The
+                                * device alloc tree should never be empty so
+                                * prev is always set.
+                                */
+                               ASSERT(prev);
+                               state = rb_entry(prev, struct extent_state, rb_node);
+                               *start_ret = state->end + 1;
+                               *end_ret = -1;
+                               goto out;
+                       }
+               }
+               state = rb_entry(node, struct extent_state, rb_node);
+               if (in_range(start, state->start, state->end - state->start + 1) &&
+                       (state->state & bits)) {
+                       start = state->end + 1;
+               } else {
+                       *start_ret = start;
+                       break;
+               }
+       }
+
+       /*
+        * Find the longest stretch from start until an entry which has the
+        * bits set
+        */
+       while (1) {
+               state = rb_entry(node, struct extent_state, rb_node);
+               if (state->end >= start && !(state->state & bits)) {
+                       *end_ret = state->end;
+               } else {
+                       *end_ret = state->start - 1;
+                       break;
+               }
+
+               node = rb_next(node);
+               if (!node)
+                       break;
+       }
+out:
+       spin_unlock(&tree->lock);
+}
+
 /*
  * find a contiguous range of bytes in the file marked as delalloc, not
  * more than 'max_bytes'.  start and end are used to return the range,
@@ -2061,9 +2192,9 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
        return 0;
 }
 
-int repair_eb_io_failure(struct btrfs_fs_info *fs_info,
-                        struct extent_buffer *eb, int mirror_num)
+int btrfs_repair_eb_io_failure(struct extent_buffer *eb, int mirror_num)
 {
+       struct btrfs_fs_info *fs_info = eb->fs_info;
        u64 start = eb->start;
        int i, num_pages = num_extent_pages(eb);
        int ret = 0;
@@ -2409,7 +2540,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
                read_mode, failrec->this_mirror, failrec->in_validation);
 
        status = tree->ops->submit_bio_hook(tree->private_data, bio, failrec->this_mirror,
-                                        failrec->bio_flags, 0);
+                                        failrec->bio_flags);
        if (status) {
                free_io_failure(failure_tree, tree, failrec);
                bio_put(bio);
@@ -2607,8 +2738,6 @@ static void end_bio_extent_readpage(struct bio *bio)
                        if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD,
                                               &eb->bflags))
                                btree_readahead_hook(eb, -EIO);
-
-                       ret = -EIO;
                }
 readpage_ok:
                if (likely(uptodate)) {
@@ -3069,7 +3198,7 @@ out:
        return ret;
 }
 
-static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
+static inline void contiguous_readpages(struct extent_io_tree *tree,
                                             struct page *pages[], int nr_pages,
                                             u64 start, u64 end,
                                             struct extent_map **em_cached,
@@ -3100,46 +3229,6 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
        }
 }
 
-static void __extent_readpages(struct extent_io_tree *tree,
-                              struct page *pages[],
-                              int nr_pages,
-                              struct extent_map **em_cached,
-                              struct bio **bio, unsigned long *bio_flags,
-                              u64 *prev_em_start)
-{
-       u64 start = 0;
-       u64 end = 0;
-       u64 page_start;
-       int index;
-       int first_index = 0;
-
-       for (index = 0; index < nr_pages; index++) {
-               page_start = page_offset(pages[index]);
-               if (!end) {
-                       start = page_start;
-                       end = start + PAGE_SIZE - 1;
-                       first_index = index;
-               } else if (end + 1 == page_start) {
-                       end += PAGE_SIZE;
-               } else {
-                       __do_contiguous_readpages(tree, &pages[first_index],
-                                                 index - first_index, start,
-                                                 end, em_cached,
-                                                 bio, bio_flags,
-                                                 prev_em_start);
-                       start = page_start;
-                       end = start + PAGE_SIZE - 1;
-                       first_index = index;
-               }
-       }
-
-       if (end)
-               __do_contiguous_readpages(tree, &pages[first_index],
-                                         index - first_index, start,
-                                         end, em_cached, bio,
-                                         bio_flags, prev_em_start);
-}
-
 static int __extent_read_full_page(struct extent_io_tree *tree,
                                   struct page *page,
                                   get_extent_t *get_extent,
@@ -3419,6 +3508,9 @@ done:
  * records are inserted to lock ranges in the tree, and as dirty areas
  * are found, they are marked writeback.  Then the lock bits are removed
  * and the end_io handler clears the writeback ranges
+ *
+ * Return 0 if everything goes well.
+ * Return <0 for error.
  */
 static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                              struct extent_page_data *epd)
@@ -3488,6 +3580,7 @@ done:
                end_extent_writepage(page, ret, start, page_end);
        }
        unlock_page(page);
+       ASSERT(ret <= 0);
        return ret;
 
 done_unlocked:
@@ -3500,18 +3593,26 @@ void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
                       TASK_UNINTERRUPTIBLE);
 }
 
-static noinline_for_stack int
-lock_extent_buffer_for_io(struct extent_buffer *eb,
-                         struct btrfs_fs_info *fs_info,
+/*
+ * Lock eb pages and flush the bio if we can't the locks
+ *
+ * Return  0 if nothing went wrong
+ * Return >0 is same as 0, except bio is not submitted
+ * Return <0 if something went wrong, no page is locked
+ */
+static noinline_for_stack int lock_extent_buffer_for_io(struct extent_buffer *eb,
                          struct extent_page_data *epd)
 {
-       int i, num_pages;
+       struct btrfs_fs_info *fs_info = eb->fs_info;
+       int i, num_pages, failed_page_nr;
        int flush = 0;
        int ret = 0;
 
        if (!btrfs_try_tree_write_lock(eb)) {
+               ret = flush_write_bio(epd);
+               if (ret < 0)
+                       return ret;
                flush = 1;
-               flush_write_bio(epd);
                btrfs_tree_lock(eb);
        }
 
@@ -3520,7 +3621,9 @@ lock_extent_buffer_for_io(struct extent_buffer *eb,
                if (!epd->sync_io)
                        return 0;
                if (!flush) {
-                       flush_write_bio(epd);
+                       ret = flush_write_bio(epd);
+                       if (ret < 0)
+                               return ret;
                        flush = 1;
                }
                while (1) {
@@ -3561,7 +3664,11 @@ lock_extent_buffer_for_io(struct extent_buffer *eb,
 
                if (!trylock_page(p)) {
                        if (!flush) {
-                               flush_write_bio(epd);
+                               ret = flush_write_bio(epd);
+                               if (ret < 0) {
+                                       failed_page_nr = i;
+                                       goto err_unlock;
+                               }
                                flush = 1;
                        }
                        lock_page(p);
@@ -3569,6 +3676,11 @@ lock_extent_buffer_for_io(struct extent_buffer *eb,
        }
 
        return ret;
+err_unlock:
+       /* Unlock already locked pages */
+       for (i = 0; i < failed_page_nr; i++)
+               unlock_page(eb->pages[i]);
+       return ret;
 }
 
 static void end_extent_buffer_writeback(struct extent_buffer *eb)
@@ -3672,10 +3784,10 @@ static void end_bio_extent_buffer_writepage(struct bio *bio)
 }
 
 static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
-                       struct btrfs_fs_info *fs_info,
                        struct writeback_control *wbc,
                        struct extent_page_data *epd)
 {
+       struct btrfs_fs_info *fs_info = eb->fs_info;
        struct block_device *bdev = fs_info->fs_devices->latest_bdev;
        struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
        u64 offset = eb->start;
@@ -3701,7 +3813,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
                 * header 0 1 2 .. N ... data_N .. data_2 data_1 data_0
                 */
                start = btrfs_item_nr_offset(nritems);
-               end = BTRFS_LEAF_DATA_OFFSET + leaf_data_end(fs_info, eb);
+               end = BTRFS_LEAF_DATA_OFFSET + leaf_data_end(eb);
                memzero_extent_buffer(eb, start, end - start);
        }
 
@@ -3744,7 +3856,6 @@ int btree_write_cache_pages(struct address_space *mapping,
                                   struct writeback_control *wbc)
 {
        struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
-       struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info;
        struct extent_buffer *eb, *prev_eb = NULL;
        struct extent_page_data epd = {
                .bio = NULL,
@@ -3819,13 +3930,13 @@ retry:
                                continue;
 
                        prev_eb = eb;
-                       ret = lock_extent_buffer_for_io(eb, fs_info, &epd);
+                       ret = lock_extent_buffer_for_io(eb, &epd);
                        if (!ret) {
                                free_extent_buffer(eb);
                                continue;
                        }
 
-                       ret = write_one_eb(eb, fs_info, wbc, &epd);
+                       ret = write_one_eb(eb, wbc, &epd);
                        if (ret) {
                                done = 1;
                                free_extent_buffer(eb);
@@ -3852,7 +3963,12 @@ retry:
                index = 0;
                goto retry;
        }
-       flush_write_bio(&epd);
+       ASSERT(ret <= 0);
+       if (ret < 0) {
+               end_write_bio(&epd, ret);
+               return ret;
+       }
+       ret = flush_write_bio(&epd);
        return ret;
 }
 
@@ -3949,7 +4065,8 @@ retry:
                         * tmpfs file mapping
                         */
                        if (!trylock_page(page)) {
-                               flush_write_bio(epd);
+                               ret = flush_write_bio(epd);
+                               BUG_ON(ret < 0);
                                lock_page(page);
                        }
 
@@ -3959,8 +4076,10 @@ retry:
                        }
 
                        if (wbc->sync_mode != WB_SYNC_NONE) {
-                               if (PageWriteback(page))
-                                       flush_write_bio(epd);
+                               if (PageWriteback(page)) {
+                                       ret = flush_write_bio(epd);
+                                       BUG_ON(ret < 0);
+                               }
                                wait_on_page_writeback(page);
                        }
 
@@ -3971,11 +4090,6 @@ retry:
                        }
 
                        ret = __extent_writepage(page, wbc, epd);
-
-                       if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
-                               unlock_page(page);
-                               ret = 0;
-                       }
                        if (ret < 0) {
                                /*
                                 * done_index is set past this page,
@@ -4029,8 +4143,14 @@ int extent_write_full_page(struct page *page, struct writeback_control *wbc)
        };
 
        ret = __extent_writepage(page, wbc, &epd);
+       ASSERT(ret <= 0);
+       if (ret < 0) {
+               end_write_bio(&epd, ret);
+               return ret;
+       }
 
-       flush_write_bio(&epd);
+       ret = flush_write_bio(&epd);
+       ASSERT(ret <= 0);
        return ret;
 }
 
@@ -4070,7 +4190,12 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
                start += PAGE_SIZE;
        }
 
-       flush_write_bio(&epd);
+       ASSERT(ret <= 0);
+       if (ret < 0) {
+               end_write_bio(&epd, ret);
+               return ret;
+       }
+       ret = flush_write_bio(&epd);
        return ret;
 }
 
@@ -4086,7 +4211,12 @@ int extent_writepages(struct address_space *mapping,
        };
 
        ret = extent_write_cache_pages(mapping, wbc, &epd);
-       flush_write_bio(&epd);
+       ASSERT(ret <= 0);
+       if (ret < 0) {
+               end_write_bio(&epd, ret);
+               return ret;
+       }
+       ret = flush_write_bio(&epd);
        return ret;
 }
 
@@ -4102,6 +4232,8 @@ int extent_readpages(struct address_space *mapping, struct list_head *pages,
        u64 prev_em_start = (u64)-1;
 
        while (!list_empty(pages)) {
+               u64 contig_end = 0;
+
                for (nr = 0; nr < ARRAY_SIZE(pagepool) && !list_empty(pages);) {
                        struct page *page = lru_to_page(pages);
 
@@ -4110,14 +4242,22 @@ int extent_readpages(struct address_space *mapping, struct list_head *pages,
                        if (add_to_page_cache_lru(page, mapping, page->index,
                                                readahead_gfp_mask(mapping))) {
                                put_page(page);
-                               continue;
+                               break;
                        }
 
                        pagepool[nr++] = page;
+                       contig_end = page_offset(page) + PAGE_SIZE - 1;
                }
 
-               __extent_readpages(tree, pagepool, nr, &em_cached, &bio,
-                                  &bio_flags, &prev_em_start);
+               if (nr) {
+                       u64 contig_start = page_offset(pagepool[0]);
+
+                       ASSERT(contig_start + nr * PAGE_SIZE - 1 == contig_end);
+
+                       contiguous_readpages(tree, pagepool, nr, contig_start,
+                                    contig_end, &em_cached, &bio, &bio_flags,
+                                    &prev_em_start);
+               }
        }
 
        if (em_cached)
@@ -4166,10 +4306,9 @@ static int try_release_extent_state(struct extent_io_tree *tree,
        u64 end = start + PAGE_SIZE - 1;
        int ret = 1;
 
-       if (test_range_bit(tree, start, end,
-                          EXTENT_IOBITS, 0, NULL))
+       if (test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) {
                ret = 0;
-       else {
+       else {
                /*
                 * at this point we can safely clear everything except the
                 * locked bit and the nodatasum bit
@@ -4222,8 +4361,7 @@ int try_release_extent_mapping(struct page *page, gfp_t mask)
                        }
                        if (!test_range_bit(tree, em->start,
                                            extent_map_end(em) - 1,
-                                           EXTENT_LOCKED | EXTENT_WRITEBACK,
-                                           0, NULL)) {
+                                           EXTENT_LOCKED, 0, NULL)) {
                                set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
                                        &btrfs_inode->runtime_flags);
                                remove_extent_mapping(map, em);
@@ -4372,8 +4510,7 @@ try_submit_last:
  * In this case, the first extent range will be cached but not emitted.
  * So we must emit it before ending extent_fiemap().
  */
-static int emit_last_fiemap_cache(struct btrfs_fs_info *fs_info,
-                                 struct fiemap_extent_info *fieinfo,
+static int emit_last_fiemap_cache(struct fiemap_extent_info *fieinfo,
                                  struct fiemap_cache *cache)
 {
        int ret;
@@ -4580,7 +4717,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        }
 out_free:
        if (!ret)
-               ret = emit_last_fiemap_cache(root->fs_info, fieinfo, &cache);
+               ret = emit_last_fiemap_cache(fieinfo, &cache);
        free_extent_map(em);
 out:
        btrfs_free_path(path);
@@ -4672,13 +4809,9 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
        eb->fs_info = fs_info;
        eb->bflags = 0;
        rwlock_init(&eb->lock);
-       atomic_set(&eb->write_locks, 0);
-       atomic_set(&eb->read_locks, 0);
        atomic_set(&eb->blocking_readers, 0);
        atomic_set(&eb->blocking_writers, 0);
-       atomic_set(&eb->spinning_readers, 0);
-       atomic_set(&eb->spinning_writers, 0);
-       eb->lock_nested = 0;
+       eb->lock_nested = false;
        init_waitqueue_head(&eb->write_lock_wq);
        init_waitqueue_head(&eb->read_lock_wq);
 
@@ -4695,6 +4828,13 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
                > MAX_INLINE_EXTENT_BUFFER_SIZE);
        BUG_ON(len > MAX_INLINE_EXTENT_BUFFER_SIZE);
 
+#ifdef CONFIG_BTRFS_DEBUG
+       atomic_set(&eb->spinning_writers, 0);
+       atomic_set(&eb->spinning_readers, 0);
+       atomic_set(&eb->read_locks, 0);
+       atomic_set(&eb->write_locks, 0);
+#endif
+
        return eb;
 }
 
@@ -5183,8 +5323,7 @@ void set_extent_buffer_uptodate(struct extent_buffer *eb)
        }
 }
 
-int read_extent_buffer_pages(struct extent_io_tree *tree,
-                            struct extent_buffer *eb, int wait, int mirror_num)
+int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
 {
        int i;
        struct page *page;
@@ -5196,6 +5335,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
        unsigned long num_reads = 0;
        struct bio *bio = NULL;
        unsigned long bio_flags = 0;
+       struct extent_io_tree *tree = &BTRFS_I(eb->fs_info->btree_inode)->io_tree;
 
        if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
                return 0;
@@ -5746,13 +5886,13 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
                btrfs_err(fs_info,
                        "memmove bogus src_offset %lu move len %lu dst len %lu",
                         src_offset, len, dst->len);
-               BUG_ON(1);
+               BUG();
        }
        if (dst_offset + len > dst->len) {
                btrfs_err(fs_info,
                        "memmove bogus dst_offset %lu move len %lu dst len %lu",
                         dst_offset, len, dst->len);
-               BUG_ON(1);
+               BUG();
        }
 
        while (len > 0) {
@@ -5793,13 +5933,13 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
                btrfs_err(fs_info,
                          "memmove bogus src_offset %lu move len %lu len %lu",
                          src_offset, len, dst->len);
-               BUG_ON(1);
+               BUG();
        }
        if (dst_offset + len > dst->len) {
                btrfs_err(fs_info,
                          "memmove bogus dst_offset %lu move len %lu len %lu",
                          dst_offset, len, dst->len);
-               BUG_ON(1);
+               BUG();
        }
        if (dst_offset < src_offset) {
                memcpy_extent_buffer(dst, dst_offset, src_offset, len);
index 08749e0..aa18a16 100644 (file)
@@ -9,26 +9,33 @@
 
 /* bits for the extent state */
 #define EXTENT_DIRTY           (1U << 0)
-#define EXTENT_WRITEBACK       (1U << 1)
-#define EXTENT_UPTODATE                (1U << 2)
-#define EXTENT_LOCKED          (1U << 3)
-#define EXTENT_NEW             (1U << 4)
-#define EXTENT_DELALLOC                (1U << 5)
-#define EXTENT_DEFRAG          (1U << 6)
-#define EXTENT_BOUNDARY                (1U << 9)
-#define EXTENT_NODATASUM       (1U << 10)
-#define EXTENT_CLEAR_META_RESV (1U << 11)
-#define EXTENT_NEED_WAIT       (1U << 12)
-#define EXTENT_DAMAGED         (1U << 13)
-#define EXTENT_NORESERVE       (1U << 14)
-#define EXTENT_QGROUP_RESERVED (1U << 15)
-#define EXTENT_CLEAR_DATA_RESV (1U << 16)
-#define EXTENT_DELALLOC_NEW    (1U << 17)
-#define EXTENT_IOBITS          (EXTENT_LOCKED | EXTENT_WRITEBACK)
+#define EXTENT_UPTODATE                (1U << 1)
+#define EXTENT_LOCKED          (1U << 2)
+#define EXTENT_NEW             (1U << 3)
+#define EXTENT_DELALLOC                (1U << 4)
+#define EXTENT_DEFRAG          (1U << 5)
+#define EXTENT_BOUNDARY                (1U << 6)
+#define EXTENT_NODATASUM       (1U << 7)
+#define EXTENT_CLEAR_META_RESV (1U << 8)
+#define EXTENT_NEED_WAIT       (1U << 9)
+#define EXTENT_DAMAGED         (1U << 10)
+#define EXTENT_NORESERVE       (1U << 11)
+#define EXTENT_QGROUP_RESERVED (1U << 12)
+#define EXTENT_CLEAR_DATA_RESV (1U << 13)
+#define EXTENT_DELALLOC_NEW    (1U << 14)
 #define EXTENT_DO_ACCOUNTING    (EXTENT_CLEAR_META_RESV | \
                                 EXTENT_CLEAR_DATA_RESV)
 #define EXTENT_CTLBITS         (EXTENT_DO_ACCOUNTING)
 
+/*
+ * Redefined bits above which are used only in the device allocation tree,
+ * shouldn't be using EXTENT_LOCKED / EXTENT_BOUNDARY / EXTENT_CLEAR_META_RESV
+ * / EXTENT_CLEAR_DATA_RESV because they have special meaning to the bit
+ * manipulation functions
+ */
+#define CHUNK_ALLOCATED EXTENT_DIRTY
+#define CHUNK_TRIMMED   EXTENT_DEFRAG
+
 /*
  * flags for bio submission. The high bits indicate the compression
  * type for this bio
@@ -88,9 +95,6 @@ struct btrfs_inode;
 struct btrfs_io_bio;
 struct io_failure_record;
 
-typedef        blk_status_t (extent_submit_bio_hook_t)(void *private_data, struct bio *bio,
-                                      int mirror_num, unsigned long bio_flags,
-                                      u64 bio_offset);
 
 typedef blk_status_t (extent_submit_bio_start_t)(void *private_data,
                struct bio *bio, u64 bio_offset);
@@ -100,17 +104,34 @@ struct extent_io_ops {
         * The following callbacks must be always defined, the function
         * pointer will be called unconditionally.
         */
-       extent_submit_bio_hook_t *submit_bio_hook;
+       blk_status_t (*submit_bio_hook)(struct inode *inode, struct bio *bio,
+                                       int mirror_num, unsigned long bio_flags);
        int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset,
                                    struct page *page, u64 start, u64 end,
                                    int mirror);
 };
 
+enum {
+       IO_TREE_FS_INFO_FREED_EXTENTS0,
+       IO_TREE_FS_INFO_FREED_EXTENTS1,
+       IO_TREE_INODE_IO,
+       IO_TREE_INODE_IO_FAILURE,
+       IO_TREE_RELOC_BLOCKS,
+       IO_TREE_TRANS_DIRTY_PAGES,
+       IO_TREE_ROOT_DIRTY_LOG_PAGES,
+       IO_TREE_SELFTEST,
+};
+
 struct extent_io_tree {
        struct rb_root state;
+       struct btrfs_fs_info *fs_info;
        void *private_data;
        u64 dirty_bytes;
-       int track_uptodate;
+       bool track_uptodate;
+
+       /* Who owns this io tree, should be one of IO_TREE_* */
+       u8 owner;
+
        spinlock_t lock;
        const struct extent_io_ops *ops;
 };
@@ -146,14 +167,9 @@ struct extent_buffer {
        struct rcu_head rcu_head;
        pid_t lock_owner;
 
-       /* count of read lock holders on the extent buffer */
-       atomic_t write_locks;
-       atomic_t read_locks;
        atomic_t blocking_writers;
        atomic_t blocking_readers;
-       atomic_t spinning_readers;
-       atomic_t spinning_writers;
-       short lock_nested;
+       bool lock_nested;
        /* >= 0 if eb belongs to a log tree, -1 otherwise */
        short log_index;
 
@@ -171,6 +187,10 @@ struct extent_buffer {
        wait_queue_head_t read_lock_wq;
        struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
 #ifdef CONFIG_BTRFS_DEBUG
+       atomic_t spinning_writers;
+       atomic_t spinning_readers;
+       atomic_t read_locks;
+       atomic_t write_locks;
        struct list_head leak_list;
 #endif
 };
@@ -239,7 +259,10 @@ typedef struct extent_map *(get_extent_t)(struct btrfs_inode *inode,
                                          u64 start, u64 len,
                                          int create);
 
-void extent_io_tree_init(struct extent_io_tree *tree, void *private_data);
+void extent_io_tree_init(struct btrfs_fs_info *fs_info,
+                        struct extent_io_tree *tree, unsigned int owner,
+                        void *private_data);
+void extent_io_tree_release(struct extent_io_tree *tree);
 int try_release_extent_mapping(struct page *page, gfp_t mask);
 int try_release_extent_buffer(struct page *page);
 int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
@@ -309,6 +332,8 @@ int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
 int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
                   unsigned bits, u64 *failed_start,
                   struct extent_state **cached_state, gfp_t mask);
+int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start, u64 end,
+                          unsigned bits);
 
 static inline int set_extent_bits(struct extent_io_tree *tree, u64 start,
                u64 end, unsigned bits)
@@ -376,6 +401,8 @@ static inline int set_extent_uptodate(struct extent_io_tree *tree, u64 start,
 int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
                          u64 *start_ret, u64 *end_ret, unsigned bits,
                          struct extent_state **cached_state);
+void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
+                                u64 *start_ret, u64 *end_ret, unsigned bits);
 int extent_invalidatepage(struct extent_io_tree *tree,
                          struct page *page, unsigned long offset);
 int extent_write_full_page(struct page *page, struct writeback_control *wbc);
@@ -405,8 +432,7 @@ void free_extent_buffer_stale(struct extent_buffer *eb);
 #define WAIT_NONE      0
 #define WAIT_COMPLETE  1
 #define WAIT_PAGE_LOCK 2
-int read_extent_buffer_pages(struct extent_io_tree *tree,
-                            struct extent_buffer *eb, int wait,
+int read_extent_buffer_pages(struct extent_buffer *eb, int wait,
                             int mirror_num);
 void wait_on_extent_buffer_writeback(struct extent_buffer *eb);
 
@@ -487,8 +513,7 @@ int clean_io_failure(struct btrfs_fs_info *fs_info,
                     struct extent_io_tree *io_tree, u64 start,
                     struct page *page, u64 ino, unsigned int pg_offset);
 void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
-int repair_eb_io_failure(struct btrfs_fs_info *fs_info,
-                        struct extent_buffer *eb, int mirror_num);
+int btrfs_repair_eb_io_failure(struct extent_buffer *eb, int mirror_num);
 
 /*
  * When IO fails, either with EIO or csum verification fails, we
index 928f729..9558d79 100644 (file)
@@ -4,6 +4,7 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include "ctree.h"
+#include "volumes.h"
 #include "extent_map.h"
 #include "compression.h"
 
@@ -337,6 +338,37 @@ static inline void setup_extent_mapping(struct extent_map_tree *tree,
                try_merge_map(tree, em);
 }
 
+static void extent_map_device_set_bits(struct extent_map *em, unsigned bits)
+{
+       struct map_lookup *map = em->map_lookup;
+       u64 stripe_size = em->orig_block_len;
+       int i;
+
+       for (i = 0; i < map->num_stripes; i++) {
+               struct btrfs_bio_stripe *stripe = &map->stripes[i];
+               struct btrfs_device *device = stripe->dev;
+
+               set_extent_bits_nowait(&device->alloc_state, stripe->physical,
+                                stripe->physical + stripe_size - 1, bits);
+       }
+}
+
+static void extent_map_device_clear_bits(struct extent_map *em, unsigned bits)
+{
+       struct map_lookup *map = em->map_lookup;
+       u64 stripe_size = em->orig_block_len;
+       int i;
+
+       for (i = 0; i < map->num_stripes; i++) {
+               struct btrfs_bio_stripe *stripe = &map->stripes[i];
+               struct btrfs_device *device = stripe->dev;
+
+               __clear_extent_bit(&device->alloc_state, stripe->physical,
+                                  stripe->physical + stripe_size - 1, bits,
+                                  0, 0, NULL, GFP_NOWAIT, NULL);
+       }
+}
+
 /**
  * add_extent_mapping - add new extent map to the extent tree
  * @tree:      tree to insert new map in
@@ -357,6 +389,10 @@ int add_extent_mapping(struct extent_map_tree *tree,
                goto out;
 
        setup_extent_mapping(tree, em, modified);
+       if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags)) {
+               extent_map_device_set_bits(em, CHUNK_ALLOCATED);
+               extent_map_device_clear_bits(em, CHUNK_TRIMMED);
+       }
 out:
        return ret;
 }
@@ -438,6 +474,8 @@ void remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
        rb_erase_cached(&em->rb_node, &tree->map);
        if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags))
                list_del_init(&em->list);
+       if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags))
+               extent_map_device_clear_bits(em, CHUNK_ALLOCATED);
        RB_CLEAR_NODE(&em->rb_node);
 }
 
index cccc75d..d431ea8 100644 (file)
@@ -413,6 +413,16 @@ fail:
        return ret;
 }
 
+/*
+ * btrfs_csum_one_bio - Calculates checksums of the data contained inside a bio
+ * @inode:      Owner of the data inside the bio
+ * @bio:        Contains the data to be checksummed
+ * @file_start:  offset in file this bio begins to describe
+ * @contig:     Boolean. If true/1 means all bio vecs in this bio are
+ *              contiguous and they begin at @file_start in the file. False/0
+ *              means this bio can contains potentially discontigous bio vecs
+ *              so the logical offset of each should be calculated separately.
+ */
 blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
                       u64 file_start, int contig)
 {
@@ -458,8 +468,6 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
                        BUG_ON(!ordered); /* Logic error */
                }
 
-               data = kmap_atomic(bvec.bv_page);
-
                nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info,
                                                 bvec.bv_len + fs_info->sectorsize
                                                 - 1);
@@ -469,10 +477,9 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
                                offset < ordered->file_offset) {
                                unsigned long bytes_left;
 
-                               kunmap_atomic(data);
                                sums->len = this_sum_bytes;
                                this_sum_bytes = 0;
-                               btrfs_add_ordered_sum(inode, ordered, sums);
+                               btrfs_add_ordered_sum(ordered, sums);
                                btrfs_put_ordered_extent(ordered);
 
                                bytes_left = bio->bi_iter.bi_size - total_bytes;
@@ -489,16 +496,16 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
                                sums->bytenr = ((u64)bio->bi_iter.bi_sector << 9)
                                        + total_bytes;
                                index = 0;
-
-                               data = kmap_atomic(bvec.bv_page);
                        }
 
                        sums->sums[index] = ~(u32)0;
+                       data = kmap_atomic(bvec.bv_page);
                        sums->sums[index]
                                = btrfs_csum_data(data + bvec.bv_offset
                                                + (i * fs_info->sectorsize),
                                                sums->sums[index],
                                                fs_info->sectorsize);
+                       kunmap_atomic(data);
                        btrfs_csum_final(sums->sums[index],
                                        (char *)(sums->sums + index));
                        index++;
@@ -507,10 +514,9 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
                        total_bytes += fs_info->sectorsize;
                }
 
-               kunmap_atomic(data);
        }
        this_sum_bytes = 0;
-       btrfs_add_ordered_sum(inode, ordered, sums);
+       btrfs_add_ordered_sum(ordered, sums);
        btrfs_put_ordered_extent(ordered);
        return 0;
 }
@@ -551,7 +557,7 @@ static noinline void truncate_one_csum(struct btrfs_fs_info *fs_info,
                 */
                u32 new_size = (bytenr - key->offset) >> blocksize_bits;
                new_size *= csum_size;
-               btrfs_truncate_item(fs_info, path, new_size, 1);
+               btrfs_truncate_item(path, new_size, 1);
        } else if (key->offset >= bytenr && csum_end > end_byte &&
                   end_byte > key->offset) {
                /*
@@ -563,7 +569,7 @@ static noinline void truncate_one_csum(struct btrfs_fs_info *fs_info,
                u32 new_size = (csum_end - end_byte) >> blocksize_bits;
                new_size *= csum_size;
 
-               btrfs_truncate_item(fs_info, path, new_size, 0);
+               btrfs_truncate_item(path, new_size, 0);
 
                key->offset = end_byte;
                btrfs_set_item_key_safe(fs_info, path, key);
@@ -832,11 +838,11 @@ again:
                u32 diff;
                u32 free_space;
 
-               if (btrfs_leaf_free_space(fs_info, leaf) <
+               if (btrfs_leaf_free_space(leaf) <
                                 sizeof(struct btrfs_item) + csum_size * 2)
                        goto insert;
 
-               free_space = btrfs_leaf_free_space(fs_info, leaf) -
+               free_space = btrfs_leaf_free_space(leaf) -
                                         sizeof(struct btrfs_item) - csum_size;
                tmp = sums->len - total_bytes;
                tmp >>= fs_info->sb->s_blocksize_bits;
@@ -852,7 +858,7 @@ again:
                diff /= csum_size;
                diff *= csum_size;
 
-               btrfs_extend_item(fs_info, path, diff);
+               btrfs_extend_item(path, diff);
                ret = 0;
                goto csum;
        }
index 34fe8a5..7e85dca 100644 (file)
@@ -754,6 +754,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
        struct btrfs_fs_info *fs_info = root->fs_info;
        struct extent_buffer *leaf;
        struct btrfs_file_extent_item *fi;
+       struct btrfs_ref ref = { 0 };
        struct btrfs_key key;
        struct btrfs_key new_key;
        u64 ino = btrfs_ino(BTRFS_I(inode));
@@ -909,11 +910,14 @@ next_slot:
                        btrfs_mark_buffer_dirty(leaf);
 
                        if (update_refs && disk_bytenr > 0) {
-                               ret = btrfs_inc_extent_ref(trans, root,
-                                               disk_bytenr, num_bytes, 0,
+                               btrfs_init_generic_ref(&ref,
+                                               BTRFS_ADD_DELAYED_REF,
+                                               disk_bytenr, num_bytes, 0);
+                               btrfs_init_data_ref(&ref,
                                                root->root_key.objectid,
                                                new_key.objectid,
                                                start - extent_offset);
+                               ret = btrfs_inc_extent_ref(trans, &ref);
                                BUG_ON(ret); /* -ENOMEM */
                        }
                        key.offset = start;
@@ -993,11 +997,14 @@ delete_extent_item:
                                extent_end = ALIGN(extent_end,
                                                   fs_info->sectorsize);
                        } else if (update_refs && disk_bytenr > 0) {
-                               ret = btrfs_free_extent(trans, root,
-                                               disk_bytenr, num_bytes, 0,
+                               btrfs_init_generic_ref(&ref,
+                                               BTRFS_DROP_DELAYED_REF,
+                                               disk_bytenr, num_bytes, 0);
+                               btrfs_init_data_ref(&ref,
                                                root->root_key.objectid,
-                                               key.objectid, key.offset -
-                                               extent_offset);
+                                               key.objectid,
+                                               key.offset - extent_offset);
+                               ret = btrfs_free_extent(trans, &ref);
                                BUG_ON(ret); /* -ENOMEM */
                                inode_sub_bytes(inode,
                                                extent_end - key.offset);
@@ -1025,7 +1032,7 @@ delete_extent_item:
                        continue;
                }
 
-               BUG_ON(1);
+               BUG();
        }
 
        if (!ret && del_nr > 0) {
@@ -1050,7 +1057,7 @@ delete_extent_item:
        if (!ret && replace_extent && leafs_visited == 1 &&
            (path->locks[0] == BTRFS_WRITE_LOCK_BLOCKING ||
             path->locks[0] == BTRFS_WRITE_LOCK) &&
-           btrfs_leaf_free_space(fs_info, leaf) >=
+           btrfs_leaf_free_space(leaf) >=
            sizeof(struct btrfs_item) + extent_item_size) {
 
                key.objectid = ino;
@@ -1142,6 +1149,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
        struct extent_buffer *leaf;
        struct btrfs_path *path;
        struct btrfs_file_extent_item *fi;
+       struct btrfs_ref ref = { 0 };
        struct btrfs_key key;
        struct btrfs_key new_key;
        u64 bytenr;
@@ -1287,9 +1295,11 @@ again:
                                                extent_end - split);
                btrfs_mark_buffer_dirty(leaf);
 
-               ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes,
-                                          0, root->root_key.objectid,
-                                          ino, orig_offset);
+               btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, bytenr,
+                                      num_bytes, 0);
+               btrfs_init_data_ref(&ref, root->root_key.objectid, ino,
+                                   orig_offset);
+               ret = btrfs_inc_extent_ref(trans, &ref);
                if (ret) {
                        btrfs_abort_transaction(trans, ret);
                        goto out;
@@ -1311,6 +1321,9 @@ again:
 
        other_start = end;
        other_end = 0;
+       btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
+                              num_bytes, 0);
+       btrfs_init_data_ref(&ref, root->root_key.objectid, ino, orig_offset);
        if (extent_mergeable(leaf, path->slots[0] + 1,
                             ino, bytenr, orig_offset,
                             &other_start, &other_end)) {
@@ -1321,9 +1334,7 @@ again:
                extent_end = other_end;
                del_slot = path->slots[0] + 1;
                del_nr++;
-               ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
-                                       0, root->root_key.objectid,
-                                       ino, orig_offset);
+               ret = btrfs_free_extent(trans, &ref);
                if (ret) {
                        btrfs_abort_transaction(trans, ret);
                        goto out;
@@ -1341,9 +1352,7 @@ again:
                key.offset = other_start;
                del_slot = path->slots[0];
                del_nr++;
-               ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
-                                       0, root->root_key.objectid,
-                                       ino, orig_offset);
+               ret = btrfs_free_extent(trans, &ref);
                if (ret) {
                        btrfs_abort_transaction(trans, ret);
                        goto out;
@@ -2165,7 +2174,6 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
                inode_unlock(inode);
                goto out;
        }
-       trans->sync = true;
 
        ret = btrfs_log_dentry_safe(trans, dentry, start, end, &ctx);
        if (ret < 0) {
@@ -3132,6 +3140,7 @@ static long btrfs_fallocate(struct file *file, int mode,
                        ret = btrfs_qgroup_reserve_data(inode, &data_reserved,
                                        cur_offset, last_byte - cur_offset);
                        if (ret < 0) {
+                               cur_offset = last_byte;
                                free_extent_map(em);
                                break;
                        }
@@ -3181,7 +3190,7 @@ out:
        /* Let go of our reservation. */
        if (ret != 0 && !(mode & FALLOC_FL_ZERO_RANGE))
                btrfs_free_reserved_data_space(inode, data_reserved,
-                               alloc_start, alloc_end - cur_offset);
+                               cur_offset, alloc_end - cur_offset);
        extent_changeset_free(data_reserved);
        return ret;
 }
index 74aa552..f74dc25 100644 (file)
@@ -88,10 +88,11 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
        return inode;
 }
 
-struct inode *lookup_free_space_inode(struct btrfs_fs_info *fs_info,
-                                     struct btrfs_block_group_cache
-                                     *block_group, struct btrfs_path *path)
+struct inode *lookup_free_space_inode(
+               struct btrfs_block_group_cache *block_group,
+               struct btrfs_path *path)
 {
+       struct btrfs_fs_info *fs_info = block_group->fs_info;
        struct inode *inode = NULL;
        u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
 
@@ -185,20 +186,19 @@ static int __create_free_space_inode(struct btrfs_root *root,
        return 0;
 }
 
-int create_free_space_inode(struct btrfs_fs_info *fs_info,
-                           struct btrfs_trans_handle *trans,
+int create_free_space_inode(struct btrfs_trans_handle *trans,
                            struct btrfs_block_group_cache *block_group,
                            struct btrfs_path *path)
 {
        int ret;
        u64 ino;
 
-       ret = btrfs_find_free_objectid(fs_info->tree_root, &ino);
+       ret = btrfs_find_free_objectid(trans->fs_info->tree_root, &ino);
        if (ret < 0)
                return ret;
 
-       return __create_free_space_inode(fs_info->tree_root, trans, path, ino,
-                                        block_group->key.objectid);
+       return __create_free_space_inode(trans->fs_info->tree_root, trans, path,
+                                        ino, block_group->key.objectid);
 }
 
 int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
@@ -812,9 +812,9 @@ free_cache:
        goto out;
 }
 
-int load_free_space_cache(struct btrfs_fs_info *fs_info,
-                         struct btrfs_block_group_cache *block_group)
+int load_free_space_cache(struct btrfs_block_group_cache *block_group)
 {
+       struct btrfs_fs_info *fs_info = block_group->fs_info;
        struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
        struct inode *inode;
        struct btrfs_path *path;
@@ -858,7 +858,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
         * once created get their ->cached field set to BTRFS_CACHE_FINISHED so
         * we will never try to read their inode item while the fs is mounted.
         */
-       inode = lookup_free_space_inode(fs_info, block_group, path);
+       inode = lookup_free_space_inode(block_group, path);
        if (IS_ERR(inode)) {
                btrfs_free_path(path);
                return 0;
@@ -1039,8 +1039,7 @@ fail:
        return -1;
 }
 
-static noinline_for_stack int
-write_pinned_extent_entries(struct btrfs_fs_info *fs_info,
+static noinline_for_stack int write_pinned_extent_entries(
                            struct btrfs_block_group_cache *block_group,
                            struct btrfs_io_ctl *io_ctl,
                            int *entries)
@@ -1059,7 +1058,7 @@ write_pinned_extent_entries(struct btrfs_fs_info *fs_info,
         * We shouldn't have switched the pinned extents yet so this is the
         * right one
         */
-       unpin = fs_info->pinned_extents;
+       unpin = block_group->fs_info->pinned_extents;
 
        start = block_group->key.objectid;
 
@@ -1235,7 +1234,6 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
                                   struct btrfs_io_ctl *io_ctl,
                                   struct btrfs_trans_handle *trans)
 {
-       struct btrfs_fs_info *fs_info = root->fs_info;
        struct extent_state *cached_state = NULL;
        LIST_HEAD(bitmap_list);
        int entries = 0;
@@ -1293,8 +1291,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
         * If this changes while we are working we'll get added back to
         * the dirty list and redo it.  No locking needed
         */
-       ret = write_pinned_extent_entries(fs_info, block_group,
-                                         io_ctl, &entries);
+       ret = write_pinned_extent_entries(block_group, io_ctl, &entries);
        if (ret)
                goto out_nospc_locked;
 
@@ -1370,11 +1367,11 @@ out_unlock:
        goto out;
 }
 
-int btrfs_write_out_cache(struct btrfs_fs_info *fs_info,
-                         struct btrfs_trans_handle *trans,
+int btrfs_write_out_cache(struct btrfs_trans_handle *trans,
                          struct btrfs_block_group_cache *block_group,
                          struct btrfs_path *path)
 {
+       struct btrfs_fs_info *fs_info = trans->fs_info;
        struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
        struct inode *inode;
        int ret = 0;
@@ -1386,7 +1383,7 @@ int btrfs_write_out_cache(struct btrfs_fs_info *fs_info,
        }
        spin_unlock(&block_group->lock);
 
-       inode = lookup_free_space_inode(fs_info, block_group, path);
+       inode = lookup_free_space_inode(block_group, path);
        if (IS_ERR(inode))
                return 0;
 
@@ -3040,11 +3037,11 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
  * returns zero and sets up cluster if things worked out, otherwise
  * it returns -enospc
  */
-int btrfs_find_space_cluster(struct btrfs_fs_info *fs_info,
-                            struct btrfs_block_group_cache *block_group,
+int btrfs_find_space_cluster(struct btrfs_block_group_cache *block_group,
                             struct btrfs_free_cluster *cluster,
                             u64 offset, u64 bytes, u64 empty_size)
 {
+       struct btrfs_fs_info *fs_info = block_group->fs_info;
        struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
        struct btrfs_free_space *entry, *tmp;
        LIST_HEAD(bitmaps);
@@ -3366,10 +3363,6 @@ void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group)
                em = lookup_extent_mapping(em_tree, block_group->key.objectid,
                                           1);
                BUG_ON(!em); /* logic error, can't happen */
-               /*
-                * remove_extent_mapping() will delete us from the pinned_chunks
-                * list, which is protected by the chunk mutex.
-                */
                remove_extent_mapping(em_tree, em);
                write_unlock(&em_tree->lock);
                mutex_unlock(&fs_info->chunk_mutex);
index 15e30b9..8760acb 100644 (file)
@@ -38,11 +38,10 @@ struct btrfs_free_space_op {
 
 struct btrfs_io_ctl;
 
-struct inode *lookup_free_space_inode(struct btrfs_fs_info *fs_info,
-                                     struct btrfs_block_group_cache
-                                     *block_group, struct btrfs_path *path);
-int create_free_space_inode(struct btrfs_fs_info *fs_info,
-                           struct btrfs_trans_handle *trans,
+struct inode *lookup_free_space_inode(
+               struct btrfs_block_group_cache *block_group,
+               struct btrfs_path *path);
+int create_free_space_inode(struct btrfs_trans_handle *trans,
                            struct btrfs_block_group_cache *block_group,
                            struct btrfs_path *path);
 
@@ -51,13 +50,11 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
 int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
                                    struct btrfs_block_group_cache *block_group,
                                    struct inode *inode);
-int load_free_space_cache(struct btrfs_fs_info *fs_info,
-                         struct btrfs_block_group_cache *block_group);
+int load_free_space_cache(struct btrfs_block_group_cache *block_group);
 int btrfs_wait_cache_io(struct btrfs_trans_handle *trans,
                        struct btrfs_block_group_cache *block_group,
                        struct btrfs_path *path);
-int btrfs_write_out_cache(struct btrfs_fs_info *fs_info,
-                         struct btrfs_trans_handle *trans,
+int btrfs_write_out_cache(struct btrfs_trans_handle *trans,
                          struct btrfs_block_group_cache *block_group,
                          struct btrfs_path *path);
 struct inode *lookup_free_ino_inode(struct btrfs_root *root,
@@ -95,8 +92,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root);
 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
                           u64 bytes);
-int btrfs_find_space_cluster(struct btrfs_fs_info *fs_info,
-                            struct btrfs_block_group_cache *block_group,
+int btrfs_find_space_cluster(struct btrfs_block_group_cache *block_group,
                             struct btrfs_free_cluster *cluster,
                             u64 offset, u64 bytes, u64 empty_size);
 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster);
index e508908..f5dc115 100644 (file)
@@ -76,10 +76,11 @@ out:
 
 EXPORT_FOR_TESTS
 struct btrfs_free_space_info *search_free_space_info(
-               struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info,
+               struct btrfs_trans_handle *trans,
                struct btrfs_block_group_cache *block_group,
                struct btrfs_path *path, int cow)
 {
+       struct btrfs_fs_info *fs_info = block_group->fs_info;
        struct btrfs_root *root = fs_info->free_space_root;
        struct btrfs_key key;
        int ret;
@@ -253,7 +254,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
                btrfs_release_path(path);
        }
 
-       info = search_free_space_info(trans, fs_info, block_group, path, 1);
+       info = search_free_space_info(trans, block_group, path, 1);
        if (IS_ERR(info)) {
                ret = PTR_ERR(info);
                goto out;
@@ -398,7 +399,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
                btrfs_release_path(path);
        }
 
-       info = search_free_space_info(trans, fs_info, block_group, path, 1);
+       info = search_free_space_info(trans, block_group, path, 1);
        if (IS_ERR(info)) {
                ret = PTR_ERR(info);
                goto out;
@@ -463,8 +464,7 @@ static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
        if (new_extents == 0)
                return 0;
 
-       info = search_free_space_info(trans, trans->fs_info, block_group, path,
-                                     1);
+       info = search_free_space_info(trans, block_group, path, 1);
        if (IS_ERR(info)) {
                ret = PTR_ERR(info);
                goto out;
@@ -793,8 +793,7 @@ int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
                        return ret;
        }
 
-       info = search_free_space_info(NULL, trans->fs_info, block_group, path,
-                                     0);
+       info = search_free_space_info(NULL, block_group, path, 0);
        if (IS_ERR(info))
                return PTR_ERR(info);
        flags = btrfs_free_space_flags(path->nodes[0], info);
@@ -977,7 +976,6 @@ int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
                             struct btrfs_block_group_cache *block_group,
                             struct btrfs_path *path, u64 start, u64 size)
 {
-       struct btrfs_fs_info *fs_info = trans->fs_info;
        struct btrfs_free_space_info *info;
        u32 flags;
        int ret;
@@ -988,7 +986,7 @@ int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
                        return ret;
        }
 
-       info = search_free_space_info(NULL, fs_info, block_group, path, 0);
+       info = search_free_space_info(NULL, block_group, path, 0);
        if (IS_ERR(info))
                return PTR_ERR(info);
        flags = btrfs_free_space_flags(path->nodes[0], info);
@@ -1150,7 +1148,7 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
                return PTR_ERR(trans);
 
        set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
-       free_space_root = btrfs_create_tree(trans, fs_info,
+       free_space_root = btrfs_create_tree(trans,
                                            BTRFS_FREE_SPACE_TREE_OBJECTID);
        if (IS_ERR(free_space_root)) {
                ret = PTR_ERR(free_space_root);
@@ -1248,7 +1246,7 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
        list_del(&free_space_root->dirty_list);
 
        btrfs_tree_lock(free_space_root->node);
-       clean_tree_block(fs_info, free_space_root->node);
+       btrfs_clean_tree_block(free_space_root->node);
        btrfs_tree_unlock(free_space_root->node);
        btrfs_free_tree_block(trans, free_space_root, free_space_root->node,
                              0, 1);
@@ -1534,14 +1532,12 @@ out:
 int load_free_space_tree(struct btrfs_caching_control *caching_ctl)
 {
        struct btrfs_block_group_cache *block_group;
-       struct btrfs_fs_info *fs_info;
        struct btrfs_free_space_info *info;
        struct btrfs_path *path;
        u32 extent_count, flags;
        int ret;
 
        block_group = caching_ctl->block_group;
-       fs_info = block_group->fs_info;
 
        path = btrfs_alloc_path();
        if (!path)
@@ -1555,7 +1551,7 @@ int load_free_space_tree(struct btrfs_caching_control *caching_ctl)
        path->search_commit_root = 1;
        path->reada = READA_FORWARD;
 
-       info = search_free_space_info(NULL, fs_info, block_group, path, 0);
+       info = search_free_space_info(NULL, block_group, path, 0);
        if (IS_ERR(info)) {
                ret = PTR_ERR(info);
                goto out;
index 3133651..22b7602 100644 (file)
@@ -30,7 +30,6 @@ int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
 struct btrfs_free_space_info *
 search_free_space_info(struct btrfs_trans_handle *trans,
-                      struct btrfs_fs_info *fs_info,
                       struct btrfs_block_group_cache *block_group,
                       struct btrfs_path *path, int cow);
 int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
index a8956a3..30d62ef 100644 (file)
@@ -170,7 +170,7 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
        memmove_extent_buffer(leaf, ptr, ptr + del_len,
                              item_size - (ptr + del_len - item_start));
 
-       btrfs_truncate_item(root->fs_info, path, item_size - del_len, 1);
+       btrfs_truncate_item(path, item_size - del_len, 1);
 
 out:
        btrfs_free_path(path);
@@ -234,7 +234,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
        item_start = btrfs_item_ptr_offset(leaf, path->slots[0]);
        memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
                              item_size - (ptr + sub_item_len - item_start));
-       btrfs_truncate_item(root->fs_info, path, item_size - sub_item_len, 1);
+       btrfs_truncate_item(path, item_size - sub_item_len, 1);
 out:
        btrfs_free_path(path);
 
@@ -288,7 +288,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
                                                   name, name_len, NULL))
                        goto out;
 
-               btrfs_extend_item(root->fs_info, path, ins_len);
+               btrfs_extend_item(path, ins_len);
                ret = 0;
        }
        if (ret < 0)
@@ -347,7 +347,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
                        goto out;
 
                old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
-               btrfs_extend_item(fs_info, path, ins_len);
+               btrfs_extend_item(path, ins_len);
                ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
                                     struct btrfs_inode_ref);
                ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size);
index ade7d0c..56929da 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/magic.h>
 #include <linux/iversion.h>
 #include <linux/swap.h>
+#include <linux/sched/mm.h>
 #include <asm/unaligned.h>
 #include "ctree.h"
 #include "disk-io.h"
@@ -73,17 +74,6 @@ struct kmem_cache *btrfs_trans_handle_cachep;
 struct kmem_cache *btrfs_path_cachep;
 struct kmem_cache *btrfs_free_space_cachep;
 
-#define S_SHIFT 12
-static const unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
-       [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
-       [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
-       [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
-       [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
-       [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
-       [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
-       [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
-};
-
 static int btrfs_setsize(struct inode *inode, struct iattr *attr);
 static int btrfs_truncate(struct inode *inode, bool skip_writeback);
 static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
@@ -366,18 +356,24 @@ struct async_extent {
        struct list_head list;
 };
 
-struct async_cow {
+struct async_chunk {
        struct inode *inode;
-       struct btrfs_fs_info *fs_info;
        struct page *locked_page;
        u64 start;
        u64 end;
        unsigned int write_flags;
        struct list_head extents;
        struct btrfs_work work;
+       atomic_t *pending;
 };
 
-static noinline int add_async_extent(struct async_cow *cow,
+struct async_cow {
+       /* Number of chunks in flight; must be first in the structure */
+       atomic_t num_chunks;
+       struct async_chunk chunks[];
+};
+
+static noinline int add_async_extent(struct async_chunk *cow,
                                     u64 start, u64 ram_size,
                                     u64 compressed_size,
                                     struct page **pages,
@@ -444,14 +440,14 @@ static inline void inode_should_defrag(struct btrfs_inode *inode,
  * are written in the same order that the flusher thread sent them
  * down.
  */
-static noinline void compress_file_range(struct inode *inode,
-                                       struct page *locked_page,
-                                       u64 start, u64 end,
-                                       struct async_cow *async_cow,
-                                       int *num_added)
+static noinline void compress_file_range(struct async_chunk *async_chunk,
+                                        int *num_added)
 {
+       struct inode *inode = async_chunk->inode;
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        u64 blocksize = fs_info->sectorsize;
+       u64 start = async_chunk->start;
+       u64 end = async_chunk->end;
        u64 actual_end;
        int ret = 0;
        struct page **pages = NULL;
@@ -630,7 +626,7 @@ cont:
                         * allocation on disk for these compressed pages, and
                         * will submit them to the elevator.
                         */
-                       add_async_extent(async_cow, start, total_in,
+                       add_async_extent(async_chunk, start, total_in,
                                        total_compressed, pages, nr_pages,
                                        compress_type);
 
@@ -670,14 +666,14 @@ cleanup_and_bail_uncompressed:
         * to our extent and set things up for the async work queue to run
         * cow_file_range to do the normal delalloc dance.
         */
-       if (page_offset(locked_page) >= start &&
-           page_offset(locked_page) <= end)
-               __set_page_dirty_nobuffers(locked_page);
+       if (page_offset(async_chunk->locked_page) >= start &&
+           page_offset(async_chunk->locked_page) <= end)
+               __set_page_dirty_nobuffers(async_chunk->locked_page);
                /* unlocked later on in the async handlers */
 
        if (redirty)
                extent_range_redirty_for_io(inode, start, end);
-       add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0,
+       add_async_extent(async_chunk, start, end - start + 1, 0, NULL, 0,
                         BTRFS_COMPRESS_NONE);
        *num_added += 1;
 
@@ -713,38 +709,34 @@ static void free_async_extent_pages(struct async_extent *async_extent)
  * queued.  We walk all the async extents created by compress_file_range
  * and send them down to the disk.
  */
-static noinline void submit_compressed_extents(struct async_cow *async_cow)
+static noinline void submit_compressed_extents(struct async_chunk *async_chunk)
 {
-       struct inode *inode = async_cow->inode;
+       struct inode *inode = async_chunk->inode;
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        struct async_extent *async_extent;
        u64 alloc_hint = 0;
        struct btrfs_key ins;
        struct extent_map *em;
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct extent_io_tree *io_tree;
+       struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        int ret = 0;
 
 again:
-       while (!list_empty(&async_cow->extents)) {
-               async_extent = list_entry(async_cow->extents.next,
+       while (!list_empty(&async_chunk->extents)) {
+               async_extent = list_entry(async_chunk->extents.next,
                                          struct async_extent, list);
                list_del(&async_extent->list);
 
-               io_tree = &BTRFS_I(inode)->io_tree;
-
 retry:
+               lock_extent(io_tree, async_extent->start,
+                           async_extent->start + async_extent->ram_size - 1);
                /* did the compression code fall back to uncompressed IO? */
                if (!async_extent->pages) {
                        int page_started = 0;
                        unsigned long nr_written = 0;
 
-                       lock_extent(io_tree, async_extent->start,
-                                        async_extent->start +
-                                        async_extent->ram_size - 1);
-
                        /* allocate blocks */
-                       ret = cow_file_range(inode, async_cow->locked_page,
+                       ret = cow_file_range(inode, async_chunk->locked_page,
                                             async_extent->start,
                                             async_extent->start +
                                             async_extent->ram_size - 1,
@@ -768,15 +760,12 @@ retry:
                                                  async_extent->ram_size - 1,
                                                  WB_SYNC_ALL);
                        else if (ret)
-                               unlock_page(async_cow->locked_page);
+                               unlock_page(async_chunk->locked_page);
                        kfree(async_extent);
                        cond_resched();
                        continue;
                }
 
-               lock_extent(io_tree, async_extent->start,
-                           async_extent->start + async_extent->ram_size - 1);
-
                ret = btrfs_reserve_extent(root, async_extent->ram_size,
                                           async_extent->compressed_size,
                                           async_extent->compressed_size,
@@ -855,7 +844,7 @@ retry:
                                    ins.objectid,
                                    ins.offset, async_extent->pages,
                                    async_extent->nr_pages,
-                                   async_cow->write_flags)) {
+                                   async_chunk->write_flags)) {
                        struct page *p = async_extent->pages[0];
                        const u64 start = async_extent->start;
                        const u64 end = start + async_extent->ram_size - 1;
@@ -1132,16 +1121,15 @@ out_unlock:
  */
 static noinline void async_cow_start(struct btrfs_work *work)
 {
-       struct async_cow *async_cow;
+       struct async_chunk *async_chunk;
        int num_added = 0;
-       async_cow = container_of(work, struct async_cow, work);
 
-       compress_file_range(async_cow->inode, async_cow->locked_page,
-                           async_cow->start, async_cow->end, async_cow,
-                           &num_added);
+       async_chunk = container_of(work, struct async_chunk, work);
+
+       compress_file_range(async_chunk, &num_added);
        if (num_added == 0) {
-               btrfs_add_delayed_iput(async_cow->inode);
-               async_cow->inode = NULL;
+               btrfs_add_delayed_iput(async_chunk->inode);
+               async_chunk->inode = NULL;
        }
 }
 
@@ -1150,14 +1138,12 @@ static noinline void async_cow_start(struct btrfs_work *work)
  */
 static noinline void async_cow_submit(struct btrfs_work *work)
 {
-       struct btrfs_fs_info *fs_info;
-       struct async_cow *async_cow;
+       struct async_chunk *async_chunk = container_of(work, struct async_chunk,
+                                                    work);
+       struct btrfs_fs_info *fs_info = btrfs_work_owner(work);
        unsigned long nr_pages;
 
-       async_cow = container_of(work, struct async_cow, work);
-
-       fs_info = async_cow->fs_info;
-       nr_pages = (async_cow->end - async_cow->start + PAGE_SIZE) >>
+       nr_pages = (async_chunk->end - async_chunk->start + PAGE_SIZE) >>
                PAGE_SHIFT;
 
        /* atomic_sub_return implies a barrier */
@@ -1166,22 +1152,28 @@ static noinline void async_cow_submit(struct btrfs_work *work)
                cond_wake_up_nomb(&fs_info->async_submit_wait);
 
        /*
-        * ->inode could be NULL if async_cow_start has failed to compress,
+        * ->inode could be NULL if async_chunk_start has failed to compress,
         * in which case we don't have anything to submit, yet we need to
         * always adjust ->async_delalloc_pages as its paired with the init
         * happening in cow_file_range_async
         */
-       if (async_cow->inode)
-               submit_compressed_extents(async_cow);
+       if (async_chunk->inode)
+               submit_compressed_extents(async_chunk);
 }
 
 static noinline void async_cow_free(struct btrfs_work *work)
 {
-       struct async_cow *async_cow;
-       async_cow = container_of(work, struct async_cow, work);
-       if (async_cow->inode)
-               btrfs_add_delayed_iput(async_cow->inode);
-       kfree(async_cow);
+       struct async_chunk *async_chunk;
+
+       async_chunk = container_of(work, struct async_chunk, work);
+       if (async_chunk->inode)
+               btrfs_add_delayed_iput(async_chunk->inode);
+       /*
+        * Since the pointer to 'pending' is at the beginning of the array of
+        * async_chunk's, freeing it ensures the whole array has been freed.
+        */
+       if (atomic_dec_and_test(async_chunk->pending))
+               kvfree(async_chunk->pending);
 }
 
 static int cow_file_range_async(struct inode *inode, struct page *locked_page,
@@ -1190,45 +1182,73 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
                                unsigned int write_flags)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-       struct async_cow *async_cow;
+       struct async_cow *ctx;
+       struct async_chunk *async_chunk;
        unsigned long nr_pages;
        u64 cur_end;
+       u64 num_chunks = DIV_ROUND_UP(end - start, SZ_512K);
+       int i;
+       bool should_compress;
+       unsigned nofs_flag;
+
+       unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
+
+       if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
+           !btrfs_test_opt(fs_info, FORCE_COMPRESS)) {
+               num_chunks = 1;
+               should_compress = false;
+       } else {
+               should_compress = true;
+       }
+
+       nofs_flag = memalloc_nofs_save();
+       ctx = kvmalloc(struct_size(ctx, chunks, num_chunks), GFP_KERNEL);
+       memalloc_nofs_restore(nofs_flag);
+
+       if (!ctx) {
+               unsigned clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC |
+                       EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
+                       EXTENT_DO_ACCOUNTING;
+               unsigned long page_ops = PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
+                       PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK |
+                       PAGE_SET_ERROR;
+
+               extent_clear_unlock_delalloc(inode, start, end, 0, locked_page,
+                                            clear_bits, page_ops);
+               return -ENOMEM;
+       }
+
+       async_chunk = ctx->chunks;
+       atomic_set(&ctx->num_chunks, num_chunks);
+
+       for (i = 0; i < num_chunks; i++) {
+               if (should_compress)
+                       cur_end = min(end, start + SZ_512K - 1);
+               else
+                       cur_end = end;
 
-       clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
-                        1, 0, NULL);
-       while (start < end) {
-               async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
-               BUG_ON(!async_cow); /* -ENOMEM */
                /*
                 * igrab is called higher up in the call chain, take only the
                 * lightweight reference for the callback lifetime
                 */
                ihold(inode);
-               async_cow->inode = inode;
-               async_cow->fs_info = fs_info;
-               async_cow->locked_page = locked_page;
-               async_cow->start = start;
-               async_cow->write_flags = write_flags;
-
-               if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
-                   !btrfs_test_opt(fs_info, FORCE_COMPRESS))
-                       cur_end = end;
-               else
-                       cur_end = min(end, start + SZ_512K - 1);
-
-               async_cow->end = cur_end;
-               INIT_LIST_HEAD(&async_cow->extents);
-
-               btrfs_init_work(&async_cow->work,
+               async_chunk[i].pending = &ctx->num_chunks;
+               async_chunk[i].inode = inode;
+               async_chunk[i].start = start;
+               async_chunk[i].end = cur_end;
+               async_chunk[i].locked_page = locked_page;
+               async_chunk[i].write_flags = write_flags;
+               INIT_LIST_HEAD(&async_chunk[i].extents);
+
+               btrfs_init_work(&async_chunk[i].work,
                                btrfs_delalloc_helper,
                                async_cow_start, async_cow_submit,
                                async_cow_free);
 
-               nr_pages = (cur_end - start + PAGE_SIZE) >>
-                       PAGE_SHIFT;
+               nr_pages = DIV_ROUND_UP(cur_end - start, PAGE_SIZE);
                atomic_add(nr_pages, &fs_info->async_delalloc_pages);
 
-               btrfs_queue_work(fs_info->delalloc_workers, &async_cow->work);
+               btrfs_queue_work(fs_info->delalloc_workers, &async_chunk[i].work);
 
                *nr_written += nr_pages;
                start = cur_end + 1;
@@ -1451,7 +1471,7 @@ next_slot:
                        extent_end = ALIGN(extent_end,
                                           fs_info->sectorsize);
                } else {
-                       BUG_ON(1);
+                       BUG();
                }
 out_check:
                if (extent_end <= start) {
@@ -1964,11 +1984,11 @@ static blk_status_t btrfs_submit_bio_start(void *private_data, struct bio *bio,
  *
  *    c-3) otherwise:                  async submit
  */
-static blk_status_t btrfs_submit_bio_hook(void *private_data, struct bio *bio,
-                                int mirror_num, unsigned long bio_flags,
-                                u64 bio_offset)
+static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
+                                         int mirror_num,
+                                         unsigned long bio_flags)
+
 {
-       struct inode *inode = private_data;
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        struct btrfs_root *root = BTRFS_I(inode)->root;
        enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
@@ -2003,8 +2023,7 @@ static blk_status_t btrfs_submit_bio_hook(void *private_data, struct bio *bio,
                        goto mapit;
                /* we're doing a write, do the async checksumming */
                ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, bio_flags,
-                                         bio_offset, inode,
-                                         btrfs_submit_bio_start);
+                                         0, inode, btrfs_submit_bio_start);
                goto out;
        } else if (!skip_sum) {
                ret = btrfs_csum_one_bio(inode, bio, 0, 0);
@@ -2531,6 +2550,7 @@ static noinline int relink_extent_backref(struct btrfs_path *path,
        struct btrfs_file_extent_item *item;
        struct btrfs_ordered_extent *ordered;
        struct btrfs_trans_handle *trans;
+       struct btrfs_ref ref = { 0 };
        struct btrfs_root *root;
        struct btrfs_key key;
        struct extent_buffer *leaf;
@@ -2701,10 +2721,11 @@ again:
        inode_add_bytes(inode, len);
        btrfs_release_path(path);
 
-       ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
-                       new->disk_len, 0,
-                       backref->root_id, backref->inum,
-                       new->file_pos); /* start - extent_offset */
+       btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new->bytenr,
+                              new->disk_len, 0);
+       btrfs_init_data_ref(&ref, backref->root_id, backref->inum,
+                           new->file_pos);  /* start - extent_offset */
+       ret = btrfs_inc_extent_ref(trans, &ref);
        if (ret) {
                btrfs_abort_transaction(trans, ret);
                goto out_free_path;
@@ -3699,21 +3720,6 @@ cache_index:
         * inode is not a directory, logging its parent unnecessarily.
         */
        BTRFS_I(inode)->last_unlink_trans = BTRFS_I(inode)->last_trans;
-       /*
-        * Similar reasoning for last_link_trans, needs to be set otherwise
-        * for a case like the following:
-        *
-        * mkdir A
-        * touch foo
-        * ln foo A/bar
-        * echo 2 > /proc/sys/vm/drop_caches
-        * fsync foo
-        * <power failure>
-        *
-        * Would result in link bar and directory A not existing after the power
-        * failure.
-        */
-       BTRFS_I(inode)->last_link_trans = BTRFS_I(inode)->last_trans;
 
        path->slots[0]++;
        if (inode->i_nlink != 1 ||
@@ -4679,7 +4685,7 @@ search_again:
 
                                btrfs_set_file_extent_ram_bytes(leaf, fi, size);
                                size = btrfs_file_extent_calc_inline_size(size);
-                               btrfs_truncate_item(root->fs_info, path, size, 1);
+                               btrfs_truncate_item(path, size, 1);
                        } else if (!del_item) {
                                /*
                                 * We have to bail so the last_size is set to
@@ -4718,12 +4724,17 @@ delete:
                if (found_extent &&
                    (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
                     root == fs_info->tree_root)) {
+                       struct btrfs_ref ref = { 0 };
+
                        btrfs_set_path_blocking(path);
                        bytes_deleted += extent_num_bytes;
-                       ret = btrfs_free_extent(trans, root, extent_start,
-                                               extent_num_bytes, 0,
-                                               btrfs_header_owner(leaf),
-                                               ino, extent_offset);
+
+                       btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF,
+                                       extent_start, extent_num_bytes, 0);
+                       ref.real_root = root->root_key.objectid;
+                       btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
+                                       ino, extent_offset);
+                       ret = btrfs_free_extent(trans, &ref);
                        if (ret) {
                                btrfs_abort_transaction(trans, ret);
                                break;
@@ -5448,12 +5459,14 @@ no_delete:
 }
 
 /*
- * this returns the key found in the dir entry in the location pointer.
+ * Return the key found in the dir entry in the location pointer, fill @type
+ * with BTRFS_FT_*, and return 0.
+ *
  * If no dir entries were found, returns -ENOENT.
  * If found a corrupted location in dir entry, returns -EUCLEAN.
  */
 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
-                              struct btrfs_key *location)
+                              struct btrfs_key *location, u8 *type)
 {
        const char *name = dentry->d_name.name;
        int namelen = dentry->d_name.len;
@@ -5482,6 +5495,8 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
                           __func__, name, btrfs_ino(BTRFS_I(dir)),
                           location->objectid, location->type, location->offset);
        }
+       if (!ret)
+               *type = btrfs_dir_type(path->nodes[0], di);
 out:
        btrfs_free_path(path);
        return ret;
@@ -5719,6 +5734,24 @@ static struct inode *new_simple_dir(struct super_block *s,
        return inode;
 }
 
+static inline u8 btrfs_inode_type(struct inode *inode)
+{
+       /*
+        * Compile-time asserts that generic FT_* types still match
+        * BTRFS_FT_* types
+        */
+       BUILD_BUG_ON(BTRFS_FT_UNKNOWN != FT_UNKNOWN);
+       BUILD_BUG_ON(BTRFS_FT_REG_FILE != FT_REG_FILE);
+       BUILD_BUG_ON(BTRFS_FT_DIR != FT_DIR);
+       BUILD_BUG_ON(BTRFS_FT_CHRDEV != FT_CHRDEV);
+       BUILD_BUG_ON(BTRFS_FT_BLKDEV != FT_BLKDEV);
+       BUILD_BUG_ON(BTRFS_FT_FIFO != FT_FIFO);
+       BUILD_BUG_ON(BTRFS_FT_SOCK != FT_SOCK);
+       BUILD_BUG_ON(BTRFS_FT_SYMLINK != FT_SYMLINK);
+
+       return fs_umode_to_ftype(inode->i_mode);
+}
+
 struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
@@ -5726,18 +5759,31 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
        struct btrfs_root *root = BTRFS_I(dir)->root;
        struct btrfs_root *sub_root = root;
        struct btrfs_key location;
+       u8 di_type = 0;
        int index;
        int ret = 0;
 
        if (dentry->d_name.len > BTRFS_NAME_LEN)
                return ERR_PTR(-ENAMETOOLONG);
 
-       ret = btrfs_inode_by_name(dir, dentry, &location);
+       ret = btrfs_inode_by_name(dir, dentry, &location, &di_type);
        if (ret < 0)
                return ERR_PTR(ret);
 
        if (location.type == BTRFS_INODE_ITEM_KEY) {
                inode = btrfs_iget(dir->i_sb, &location, root, NULL);
+               if (IS_ERR(inode))
+                       return inode;
+
+               /* Do extra check against inode mode with di_type */
+               if (btrfs_inode_type(inode) != di_type) {
+                       btrfs_crit(fs_info,
+"inode mode mismatch with dir: inode mode=0%o btrfs type=%u dir type=%u",
+                                 inode->i_mode, btrfs_inode_type(inode),
+                                 di_type);
+                       iput(inode);
+                       return ERR_PTR(-EUCLEAN);
+               }
                return inode;
        }
 
@@ -5797,10 +5843,6 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
        return d_splice_alias(inode, dentry);
 }
 
-unsigned char btrfs_filetype_table[] = {
-       DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
-};
-
 /*
  * All this infrastructure exists because dir_emit can fault, and we are holding
  * the tree lock when doing readdir.  For now just allocate a buffer and copy
@@ -5939,7 +5981,7 @@ again:
                name_ptr = (char *)(entry + 1);
                read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1),
                                   name_len);
-               put_unaligned(btrfs_filetype_table[btrfs_dir_type(leaf, di)],
+               put_unaligned(fs_ftype_to_dtype(btrfs_dir_type(leaf, di)),
                                &entry->type);
                btrfs_dir_item_key_to_cpu(leaf, di, &location);
                put_unaligned(location.objectid, &entry->ino);
@@ -6342,11 +6384,6 @@ fail:
        return ERR_PTR(ret);
 }
 
-static inline u8 btrfs_inode_type(struct inode *inode)
-{
-       return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
-}
-
 /*
  * utility function to add 'inode' into 'parent_inode' with
  * a give name and a given sequence number.
@@ -6634,7 +6671,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
                        if (err)
                                goto fail;
                }
-               BTRFS_I(inode)->last_link_trans = trans->transid;
                d_instantiate(dentry, inode);
                ret = btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent,
                                         true, NULL);
@@ -6864,6 +6900,14 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
        extent_start = found_key.offset;
        if (extent_type == BTRFS_FILE_EXTENT_REG ||
            extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
+               /* Only regular file could have regular/prealloc extent */
+               if (!S_ISREG(inode->vfs_inode.i_mode)) {
+                       ret = -EUCLEAN;
+                       btrfs_crit(fs_info,
+               "regular/prealloc extent found for non-regular inode %llu",
+                                  btrfs_ino(inode));
+                       goto out;
+               }
                extent_end = extent_start +
                       btrfs_file_extent_num_bytes(leaf, item);
 
@@ -9163,7 +9207,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
        ei->index_cnt = (u64)-1;
        ei->dir_index = 0;
        ei->last_unlink_trans = 0;
-       ei->last_link_trans = 0;
        ei->last_log_commit = 0;
 
        spin_lock_init(&ei->lock);
@@ -9182,10 +9225,11 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
 
        inode = &ei->vfs_inode;
        extent_map_tree_init(&ei->extent_tree);
-       extent_io_tree_init(&ei->io_tree, inode);
-       extent_io_tree_init(&ei->io_failure_tree, inode);
-       ei->io_tree.track_uptodate = 1;
-       ei->io_failure_tree.track_uptodate = 1;
+       extent_io_tree_init(fs_info, &ei->io_tree, IO_TREE_INODE_IO, inode);
+       extent_io_tree_init(fs_info, &ei->io_failure_tree,
+                           IO_TREE_INODE_IO_FAILURE, inode);
+       ei->io_tree.track_uptodate = true;
+       ei->io_failure_tree.track_uptodate = true;
        atomic_set(&ei->sync_writers, 0);
        mutex_init(&ei->log_mutex);
        mutex_init(&ei->delalloc_mutex);
@@ -9427,7 +9471,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
        /* Reference for the source. */
        if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
                /* force full log commit if subvolume involved. */
-               btrfs_set_log_full_commit(fs_info, trans);
+               btrfs_set_log_full_commit(trans);
        } else {
                btrfs_pin_log_trans(root);
                root_log_pinned = true;
@@ -9444,7 +9488,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
        /* And now for the dest. */
        if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
                /* force full log commit if subvolume involved. */
-               btrfs_set_log_full_commit(fs_info, trans);
+               btrfs_set_log_full_commit(trans);
        } else {
                btrfs_pin_log_trans(dest);
                dest_log_pinned = true;
@@ -9580,7 +9624,7 @@ out_fail:
                    btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) ||
                    (new_inode &&
                     btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation)))
-                       btrfs_set_log_full_commit(fs_info, trans);
+                       btrfs_set_log_full_commit(trans);
 
                if (root_log_pinned) {
                        btrfs_end_log_trans(root);
@@ -9766,7 +9810,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        BTRFS_I(old_inode)->dir_index = 0ULL;
        if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
                /* force full log commit if subvolume involved. */
-               btrfs_set_log_full_commit(fs_info, trans);
+               btrfs_set_log_full_commit(trans);
        } else {
                btrfs_pin_log_trans(root);
                log_pinned = true;
@@ -9887,7 +9931,7 @@ out_fail:
                    btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) ||
                    (new_inode &&
                     btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation)))
-                       btrfs_set_log_full_commit(fs_info, trans);
+                       btrfs_set_log_full_commit(trans);
 
                btrfs_end_log_trans(root);
                log_pinned = false;
@@ -10190,7 +10234,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
 
        inode->i_op = &btrfs_symlink_inode_operations;
        inode_nohighmem(inode);
-       inode->i_mapping->a_ops = &btrfs_aops;
        inode_set_bytes(inode, name_len);
        btrfs_i_size_write(BTRFS_I(inode), name_len);
        err = btrfs_update_inode(trans, root, inode);
index cd4e693..6dafa85 100644 (file)
@@ -187,11 +187,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
        struct btrfs_inode *binode = BTRFS_I(inode);
        struct btrfs_root *root = binode->root;
        struct btrfs_trans_handle *trans;
-       unsigned int fsflags, old_fsflags;
+       unsigned int fsflags;
        int ret;
-       u64 old_flags;
-       unsigned int old_i_flags;
-       umode_t mode;
+       const char *comp = NULL;
+       u32 binode_flags = binode->flags;
 
        if (!inode_owner_or_capable(inode))
                return -EPERM;
@@ -212,13 +211,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
 
        inode_lock(inode);
 
-       old_flags = binode->flags;
-       old_i_flags = inode->i_flags;
-       mode = inode->i_mode;
-
        fsflags = btrfs_mask_fsflags_for_type(inode, fsflags);
-       old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags);
-       if ((fsflags ^ old_fsflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
+       if ((fsflags ^ btrfs_inode_flags_to_fsflags(binode->flags)) &
+           (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
                if (!capable(CAP_LINUX_IMMUTABLE)) {
                        ret = -EPERM;
                        goto out_unlock;
@@ -226,52 +221,52 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
        }
 
        if (fsflags & FS_SYNC_FL)
-               binode->flags |= BTRFS_INODE_SYNC;
+               binode_flags |= BTRFS_INODE_SYNC;
        else
-               binode->flags &= ~BTRFS_INODE_SYNC;
+               binode_flags &= ~BTRFS_INODE_SYNC;
        if (fsflags & FS_IMMUTABLE_FL)
-               binode->flags |= BTRFS_INODE_IMMUTABLE;
+               binode_flags |= BTRFS_INODE_IMMUTABLE;
        else
-               binode->flags &= ~BTRFS_INODE_IMMUTABLE;
+               binode_flags &= ~BTRFS_INODE_IMMUTABLE;
        if (fsflags & FS_APPEND_FL)
-               binode->flags |= BTRFS_INODE_APPEND;
+               binode_flags |= BTRFS_INODE_APPEND;
        else
-               binode->flags &= ~BTRFS_INODE_APPEND;
+               binode_flags &= ~BTRFS_INODE_APPEND;
        if (fsflags & FS_NODUMP_FL)
-               binode->flags |= BTRFS_INODE_NODUMP;
+               binode_flags |= BTRFS_INODE_NODUMP;
        else
-               binode->flags &= ~BTRFS_INODE_NODUMP;
+               binode_flags &= ~BTRFS_INODE_NODUMP;
        if (fsflags & FS_NOATIME_FL)
-               binode->flags |= BTRFS_INODE_NOATIME;
+               binode_flags |= BTRFS_INODE_NOATIME;
        else
-               binode->flags &= ~BTRFS_INODE_NOATIME;
+               binode_flags &= ~BTRFS_INODE_NOATIME;
        if (fsflags & FS_DIRSYNC_FL)
-               binode->flags |= BTRFS_INODE_DIRSYNC;
+               binode_flags |= BTRFS_INODE_DIRSYNC;
        else
-               binode->flags &= ~BTRFS_INODE_DIRSYNC;
+               binode_flags &= ~BTRFS_INODE_DIRSYNC;
        if (fsflags & FS_NOCOW_FL) {
-               if (S_ISREG(mode)) {
+               if (S_ISREG(inode->i_mode)) {
                        /*
                         * It's safe to turn csums off here, no extents exist.
                         * Otherwise we want the flag to reflect the real COW
                         * status of the file and will not set it.
                         */
                        if (inode->i_size == 0)
-                               binode->flags |= BTRFS_INODE_NODATACOW
-                                             | BTRFS_INODE_NODATASUM;
+                               binode_flags |= BTRFS_INODE_NODATACOW |
+                                               BTRFS_INODE_NODATASUM;
                } else {
-                       binode->flags |= BTRFS_INODE_NODATACOW;
+                       binode_flags |= BTRFS_INODE_NODATACOW;
                }
        } else {
                /*
                 * Revert back under same assumptions as above
                 */
-               if (S_ISREG(mode)) {
+               if (S_ISREG(inode->i_mode)) {
                        if (inode->i_size == 0)
-                               binode->flags &= ~(BTRFS_INODE_NODATACOW
-                                            | BTRFS_INODE_NODATASUM);
+                               binode_flags &= ~(BTRFS_INODE_NODATACOW |
+                                                 BTRFS_INODE_NODATASUM);
                } else {
-                       binode->flags &= ~BTRFS_INODE_NODATACOW;
+                       binode_flags &= ~BTRFS_INODE_NODATACOW;
                }
        }
 
@@ -281,57 +276,61 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
         * things smaller.
         */
        if (fsflags & FS_NOCOMP_FL) {
-               binode->flags &= ~BTRFS_INODE_COMPRESS;
-               binode->flags |= BTRFS_INODE_NOCOMPRESS;
-
-               ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0);
-               if (ret && ret != -ENODATA)
-                       goto out_drop;
+               binode_flags &= ~BTRFS_INODE_COMPRESS;
+               binode_flags |= BTRFS_INODE_NOCOMPRESS;
        } else if (fsflags & FS_COMPR_FL) {
-               const char *comp;
 
                if (IS_SWAPFILE(inode)) {
                        ret = -ETXTBSY;
                        goto out_unlock;
                }
 
-               binode->flags |= BTRFS_INODE_COMPRESS;
-               binode->flags &= ~BTRFS_INODE_NOCOMPRESS;
+               binode_flags |= BTRFS_INODE_COMPRESS;
+               binode_flags &= ~BTRFS_INODE_NOCOMPRESS;
 
                comp = btrfs_compress_type2str(fs_info->compress_type);
                if (!comp || comp[0] == 0)
                        comp = btrfs_compress_type2str(BTRFS_COMPRESS_ZLIB);
-
-               ret = btrfs_set_prop(inode, "btrfs.compression",
-                                    comp, strlen(comp), 0);
-               if (ret)
-                       goto out_drop;
-
        } else {
-               ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0);
-               if (ret && ret != -ENODATA)
-                       goto out_drop;
-               binode->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
+               binode_flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
        }
 
-       trans = btrfs_start_transaction(root, 1);
+       /*
+        * 1 for inode item
+        * 2 for properties
+        */
+       trans = btrfs_start_transaction(root, 3);
        if (IS_ERR(trans)) {
                ret = PTR_ERR(trans);
-               goto out_drop;
+               goto out_unlock;
+       }
+
+       if (comp) {
+               ret = btrfs_set_prop(trans, inode, "btrfs.compression", comp,
+                                    strlen(comp), 0);
+               if (ret) {
+                       btrfs_abort_transaction(trans, ret);
+                       goto out_end_trans;
+               }
+               set_bit(BTRFS_INODE_COPY_EVERYTHING,
+                       &BTRFS_I(inode)->runtime_flags);
+       } else {
+               ret = btrfs_set_prop(trans, inode, "btrfs.compression", NULL,
+                                    0, 0);
+               if (ret && ret != -ENODATA) {
+                       btrfs_abort_transaction(trans, ret);
+                       goto out_end_trans;
+               }
        }
 
+       binode->flags = binode_flags;
        btrfs_sync_inode_flags_to_i_flags(inode);
        inode_inc_iversion(inode);
        inode->i_ctime = current_time(inode);
        ret = btrfs_update_inode(trans, root, inode);
 
+ out_end_trans:
        btrfs_end_transaction(trans);
- out_drop:
-       if (ret) {
-               binode->flags = old_flags;
-               inode->i_flags = old_i_flags;
-       }
-
  out_unlock:
        inode_unlock(inode);
        mnt_drop_write_file(file);
@@ -3260,6 +3259,19 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
 {
        int ret;
        u64 i, tail_len, chunk_count;
+       struct btrfs_root *root_dst = BTRFS_I(dst)->root;
+
+       spin_lock(&root_dst->root_item_lock);
+       if (root_dst->send_in_progress) {
+               btrfs_warn_rl(root_dst->fs_info,
+"cannot deduplicate to root %llu while send operations are using it (%d in progress)",
+                             root_dst->root_key.objectid,
+                             root_dst->send_in_progress);
+               spin_unlock(&root_dst->root_item_lock);
+               return -EAGAIN;
+       }
+       root_dst->dedupe_in_progress++;
+       spin_unlock(&root_dst->root_item_lock);
 
        tail_len = olen % BTRFS_MAX_DEDUPE_LEN;
        chunk_count = div_u64(olen, BTRFS_MAX_DEDUPE_LEN);
@@ -3268,7 +3280,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
                ret = btrfs_extent_same_range(src, loff, BTRFS_MAX_DEDUPE_LEN,
                                              dst, dst_loff);
                if (ret)
-                       return ret;
+                       goto out;
 
                loff += BTRFS_MAX_DEDUPE_LEN;
                dst_loff += BTRFS_MAX_DEDUPE_LEN;
@@ -3277,6 +3289,10 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
        if (tail_len > 0)
                ret = btrfs_extent_same_range(src, loff, tail_len, dst,
                                              dst_loff);
+out:
+       spin_lock(&root_dst->root_item_lock);
+       root_dst->dedupe_in_progress--;
+       spin_unlock(&root_dst->root_item_lock);
 
        return ret;
 }
@@ -3735,13 +3751,16 @@ process_slot:
                                                                datal);
 
                                if (disko) {
+                                       struct btrfs_ref ref = { 0 };
                                        inode_add_bytes(inode, datal);
-                                       ret = btrfs_inc_extent_ref(trans,
-                                                       root,
-                                                       disko, diskl, 0,
-                                                       root->root_key.objectid,
-                                                       btrfs_ino(BTRFS_I(inode)),
-                                                       new_key.offset - datao);
+                                       btrfs_init_generic_ref(&ref,
+                                               BTRFS_ADD_DELAYED_REF, disko,
+                                               diskl, 0);
+                                       btrfs_init_data_ref(&ref,
+                                               root->root_key.objectid,
+                                               btrfs_ino(BTRFS_I(inode)),
+                                               new_key.offset - datao);
+                                       ret = btrfs_inc_extent_ref(trans, &ref);
                                        if (ret) {
                                                btrfs_abort_transaction(trans,
                                                                        ret);
@@ -3948,16 +3967,10 @@ static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in,
                        return -EXDEV;
        }
 
-       if (same_inode)
-               inode_lock(inode_in);
-       else
-               lock_two_nondirectories(inode_in, inode_out);
-
        /* don't make the dst file partly checksummed */
        if ((BTRFS_I(inode_in)->flags & BTRFS_INODE_NODATASUM) !=
            (BTRFS_I(inode_out)->flags & BTRFS_INODE_NODATASUM)) {
-               ret = -EINVAL;
-               goto out_unlock;
+               return -EINVAL;
        }
 
        /*
@@ -3991,26 +4004,14 @@ static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in,
        ret = btrfs_wait_ordered_range(inode_in, ALIGN_DOWN(pos_in, bs),
                                       wb_len);
        if (ret < 0)
-               goto out_unlock;
+               return ret;
        ret = btrfs_wait_ordered_range(inode_out, ALIGN_DOWN(pos_out, bs),
                                       wb_len);
        if (ret < 0)
-               goto out_unlock;
+               return ret;
 
-       ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out,
+       return generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out,
                                            len, remap_flags);
-       if (ret < 0 || *len == 0)
-               goto out_unlock;
-
-       return 0;
-
- out_unlock:
-       if (same_inode)
-               inode_unlock(inode_in);
-       else
-               unlock_two_nondirectories(inode_in, inode_out);
-
-       return ret;
 }
 
 loff_t btrfs_remap_file_range(struct file *src_file, loff_t off,
@@ -4025,16 +4026,22 @@ loff_t btrfs_remap_file_range(struct file *src_file, loff_t off,
        if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
                return -EINVAL;
 
+       if (same_inode)
+               inode_lock(src_inode);
+       else
+               lock_two_nondirectories(src_inode, dst_inode);
+
        ret = btrfs_remap_file_range_prep(src_file, off, dst_file, destoff,
                                          &len, remap_flags);
        if (ret < 0 || len == 0)
-               return ret;
+               goto out_unlock;
 
        if (remap_flags & REMAP_FILE_DEDUP)
                ret = btrfs_extent_same(src_inode, off, len, dst_inode, destoff);
        else
                ret = btrfs_clone_files(dst_file, src_file, off, len, destoff);
 
+out_unlock:
        if (same_inode)
                inode_unlock(src_inode);
        else
index 82b84e4..2f6c3c7 100644 (file)
 #include "extent_io.h"
 #include "locking.h"
 
-static void btrfs_assert_tree_read_locked(struct extent_buffer *eb);
+#ifdef CONFIG_BTRFS_DEBUG
+static void btrfs_assert_spinning_writers_get(struct extent_buffer *eb)
+{
+       WARN_ON(atomic_read(&eb->spinning_writers));
+       atomic_inc(&eb->spinning_writers);
+}
+
+static void btrfs_assert_spinning_writers_put(struct extent_buffer *eb)
+{
+       WARN_ON(atomic_read(&eb->spinning_writers) != 1);
+       atomic_dec(&eb->spinning_writers);
+}
+
+static void btrfs_assert_no_spinning_writers(struct extent_buffer *eb)
+{
+       WARN_ON(atomic_read(&eb->spinning_writers));
+}
+
+static void btrfs_assert_spinning_readers_get(struct extent_buffer *eb)
+{
+       atomic_inc(&eb->spinning_readers);
+}
+
+static void btrfs_assert_spinning_readers_put(struct extent_buffer *eb)
+{
+       WARN_ON(atomic_read(&eb->spinning_readers) == 0);
+       atomic_dec(&eb->spinning_readers);
+}
+
+static void btrfs_assert_tree_read_locks_get(struct extent_buffer *eb)
+{
+       atomic_inc(&eb->read_locks);
+}
+
+static void btrfs_assert_tree_read_locks_put(struct extent_buffer *eb)
+{
+       atomic_dec(&eb->read_locks);
+}
+
+static void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
+{
+       BUG_ON(!atomic_read(&eb->read_locks));
+}
+
+static void btrfs_assert_tree_write_locks_get(struct extent_buffer *eb)
+{
+       atomic_inc(&eb->write_locks);
+}
+
+static void btrfs_assert_tree_write_locks_put(struct extent_buffer *eb)
+{
+       atomic_dec(&eb->write_locks);
+}
+
+void btrfs_assert_tree_locked(struct extent_buffer *eb)
+{
+       BUG_ON(!atomic_read(&eb->write_locks));
+}
+
+#else
+static void btrfs_assert_spinning_writers_get(struct extent_buffer *eb) { }
+static void btrfs_assert_spinning_writers_put(struct extent_buffer *eb) { }
+static void btrfs_assert_no_spinning_writers(struct extent_buffer *eb) { }
+static void btrfs_assert_spinning_readers_put(struct extent_buffer *eb) { }
+static void btrfs_assert_spinning_readers_get(struct extent_buffer *eb) { }
+static void btrfs_assert_tree_read_locked(struct extent_buffer *eb) { }
+static void btrfs_assert_tree_read_locks_get(struct extent_buffer *eb) { }
+static void btrfs_assert_tree_read_locks_put(struct extent_buffer *eb) { }
+void btrfs_assert_tree_locked(struct extent_buffer *eb) { }
+static void btrfs_assert_tree_write_locks_get(struct extent_buffer *eb) { }
+static void btrfs_assert_tree_write_locks_put(struct extent_buffer *eb) { }
+#endif
 
 void btrfs_set_lock_blocking_read(struct extent_buffer *eb)
 {
+       trace_btrfs_set_lock_blocking_read(eb);
        /*
         * No lock is required.  The lock owner may change if we have a read
         * lock, but it won't change to or away from us.  If we have the write
@@ -25,13 +97,13 @@ void btrfs_set_lock_blocking_read(struct extent_buffer *eb)
                return;
        btrfs_assert_tree_read_locked(eb);
        atomic_inc(&eb->blocking_readers);
-       WARN_ON(atomic_read(&eb->spinning_readers) == 0);
-       atomic_dec(&eb->spinning_readers);
+       btrfs_assert_spinning_readers_put(eb);
        read_unlock(&eb->lock);
 }
 
 void btrfs_set_lock_blocking_write(struct extent_buffer *eb)
 {
+       trace_btrfs_set_lock_blocking_write(eb);
        /*
         * No lock is required.  The lock owner may change if we have a read
         * lock, but it won't change to or away from us.  If we have the write
@@ -40,8 +112,7 @@ void btrfs_set_lock_blocking_write(struct extent_buffer *eb)
        if (eb->lock_nested && current->pid == eb->lock_owner)
                return;
        if (atomic_read(&eb->blocking_writers) == 0) {
-               WARN_ON(atomic_read(&eb->spinning_writers) != 1);
-               atomic_dec(&eb->spinning_writers);
+               btrfs_assert_spinning_writers_put(eb);
                btrfs_assert_tree_locked(eb);
                atomic_inc(&eb->blocking_writers);
                write_unlock(&eb->lock);
@@ -50,6 +121,7 @@ void btrfs_set_lock_blocking_write(struct extent_buffer *eb)
 
 void btrfs_clear_lock_blocking_read(struct extent_buffer *eb)
 {
+       trace_btrfs_clear_lock_blocking_read(eb);
        /*
         * No lock is required.  The lock owner may change if we have a read
         * lock, but it won't change to or away from us.  If we have the write
@@ -59,7 +131,7 @@ void btrfs_clear_lock_blocking_read(struct extent_buffer *eb)
                return;
        BUG_ON(atomic_read(&eb->blocking_readers) == 0);
        read_lock(&eb->lock);
-       atomic_inc(&eb->spinning_readers);
+       btrfs_assert_spinning_readers_get(eb);
        /* atomic_dec_and_test implies a barrier */
        if (atomic_dec_and_test(&eb->blocking_readers))
                cond_wake_up_nomb(&eb->read_lock_wq);
@@ -67,6 +139,7 @@ void btrfs_clear_lock_blocking_read(struct extent_buffer *eb)
 
 void btrfs_clear_lock_blocking_write(struct extent_buffer *eb)
 {
+       trace_btrfs_clear_lock_blocking_write(eb);
        /*
         * no lock is required.  The lock owner may change if
         * we have a read lock, but it won't change to or away
@@ -77,8 +150,7 @@ void btrfs_clear_lock_blocking_write(struct extent_buffer *eb)
                return;
        BUG_ON(atomic_read(&eb->blocking_writers) != 1);
        write_lock(&eb->lock);
-       WARN_ON(atomic_read(&eb->spinning_writers));
-       atomic_inc(&eb->spinning_writers);
+       btrfs_assert_spinning_writers_get(eb);
        /* atomic_dec_and_test implies a barrier */
        if (atomic_dec_and_test(&eb->blocking_writers))
                cond_wake_up_nomb(&eb->write_lock_wq);
@@ -90,6 +162,10 @@ void btrfs_clear_lock_blocking_write(struct extent_buffer *eb)
  */
 void btrfs_tree_read_lock(struct extent_buffer *eb)
 {
+       u64 start_ns = 0;
+
+       if (trace_btrfs_tree_read_lock_enabled())
+               start_ns = ktime_get_ns();
 again:
        BUG_ON(!atomic_read(&eb->blocking_writers) &&
               current->pid == eb->lock_owner);
@@ -104,8 +180,9 @@ again:
                 * called on a partly (write-)locked tree.
                 */
                BUG_ON(eb->lock_nested);
-               eb->lock_nested = 1;
+               eb->lock_nested = true;
                read_unlock(&eb->lock);
+               trace_btrfs_tree_read_lock(eb, start_ns);
                return;
        }
        if (atomic_read(&eb->blocking_writers)) {
@@ -114,8 +191,9 @@ again:
                           atomic_read(&eb->blocking_writers) == 0);
                goto again;
        }
-       atomic_inc(&eb->read_locks);
-       atomic_inc(&eb->spinning_readers);
+       btrfs_assert_tree_read_locks_get(eb);
+       btrfs_assert_spinning_readers_get(eb);
+       trace_btrfs_tree_read_lock(eb, start_ns);
 }
 
 /*
@@ -133,8 +211,9 @@ int btrfs_tree_read_lock_atomic(struct extent_buffer *eb)
                read_unlock(&eb->lock);
                return 0;
        }
-       atomic_inc(&eb->read_locks);
-       atomic_inc(&eb->spinning_readers);
+       btrfs_assert_tree_read_locks_get(eb);
+       btrfs_assert_spinning_readers_get(eb);
+       trace_btrfs_tree_read_lock_atomic(eb);
        return 1;
 }
 
@@ -154,8 +233,9 @@ int btrfs_try_tree_read_lock(struct extent_buffer *eb)
                read_unlock(&eb->lock);
                return 0;
        }
-       atomic_inc(&eb->read_locks);
-       atomic_inc(&eb->spinning_readers);
+       btrfs_assert_tree_read_locks_get(eb);
+       btrfs_assert_spinning_readers_get(eb);
+       trace_btrfs_try_tree_read_lock(eb);
        return 1;
 }
 
@@ -175,9 +255,10 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb)
                write_unlock(&eb->lock);
                return 0;
        }
-       atomic_inc(&eb->write_locks);
-       atomic_inc(&eb->spinning_writers);
+       btrfs_assert_tree_write_locks_get(eb);
+       btrfs_assert_spinning_writers_get(eb);
        eb->lock_owner = current->pid;
+       trace_btrfs_try_tree_write_lock(eb);
        return 1;
 }
 
@@ -186,6 +267,7 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb)
  */
 void btrfs_tree_read_unlock(struct extent_buffer *eb)
 {
+       trace_btrfs_tree_read_unlock(eb);
        /*
         * if we're nested, we have the write lock.  No new locking
         * is needed as long as we are the lock owner.
@@ -193,13 +275,12 @@ void btrfs_tree_read_unlock(struct extent_buffer *eb)
         * field only matters to the lock owner.
         */
        if (eb->lock_nested && current->pid == eb->lock_owner) {
-               eb->lock_nested = 0;
+               eb->lock_nested = false;
                return;
        }
        btrfs_assert_tree_read_locked(eb);
-       WARN_ON(atomic_read(&eb->spinning_readers) == 0);
-       atomic_dec(&eb->spinning_readers);
-       atomic_dec(&eb->read_locks);
+       btrfs_assert_spinning_readers_put(eb);
+       btrfs_assert_tree_read_locks_put(eb);
        read_unlock(&eb->lock);
 }
 
@@ -208,6 +289,7 @@ void btrfs_tree_read_unlock(struct extent_buffer *eb)
  */
 void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
 {
+       trace_btrfs_tree_read_unlock_blocking(eb);
        /*
         * if we're nested, we have the write lock.  No new locking
         * is needed as long as we are the lock owner.
@@ -215,7 +297,7 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
         * field only matters to the lock owner.
         */
        if (eb->lock_nested && current->pid == eb->lock_owner) {
-               eb->lock_nested = 0;
+               eb->lock_nested = false;
                return;
        }
        btrfs_assert_tree_read_locked(eb);
@@ -223,7 +305,7 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
        /* atomic_dec_and_test implies a barrier */
        if (atomic_dec_and_test(&eb->blocking_readers))
                cond_wake_up_nomb(&eb->read_lock_wq);
-       atomic_dec(&eb->read_locks);
+       btrfs_assert_tree_read_locks_put(eb);
 }
 
 /*
@@ -232,6 +314,11 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
  */
 void btrfs_tree_lock(struct extent_buffer *eb)
 {
+       u64 start_ns = 0;
+
+       if (trace_btrfs_tree_lock_enabled())
+               start_ns = ktime_get_ns();
+
        WARN_ON(eb->lock_owner == current->pid);
 again:
        wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0);
@@ -242,10 +329,10 @@ again:
                write_unlock(&eb->lock);
                goto again;
        }
-       WARN_ON(atomic_read(&eb->spinning_writers));
-       atomic_inc(&eb->spinning_writers);
-       atomic_inc(&eb->write_locks);
+       btrfs_assert_spinning_writers_get(eb);
+       btrfs_assert_tree_write_locks_get(eb);
        eb->lock_owner = current->pid;
+       trace_btrfs_tree_lock(eb, start_ns);
 }
 
 /*
@@ -258,28 +345,18 @@ void btrfs_tree_unlock(struct extent_buffer *eb)
        BUG_ON(blockers > 1);
 
        btrfs_assert_tree_locked(eb);
+       trace_btrfs_tree_unlock(eb);
        eb->lock_owner = 0;
-       atomic_dec(&eb->write_locks);
+       btrfs_assert_tree_write_locks_put(eb);
 
        if (blockers) {
-               WARN_ON(atomic_read(&eb->spinning_writers));
+               btrfs_assert_no_spinning_writers(eb);
                atomic_dec(&eb->blocking_writers);
                /* Use the lighter barrier after atomic */
                smp_mb__after_atomic();
                cond_wake_up_nomb(&eb->write_lock_wq);
        } else {
-               WARN_ON(atomic_read(&eb->spinning_writers) != 1);
-               atomic_dec(&eb->spinning_writers);
+               btrfs_assert_spinning_writers_put(eb);
                write_unlock(&eb->lock);
        }
 }
-
-void btrfs_assert_tree_locked(struct extent_buffer *eb)
-{
-       BUG_ON(!atomic_read(&eb->write_locks));
-}
-
-static void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
-{
-       BUG_ON(!atomic_read(&eb->read_locks));
-}
index 45e3cfd..52889da 100644 (file)
@@ -195,8 +195,11 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
        if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
                set_bit(type, &entry->flags);
 
-       if (dio)
+       if (dio) {
+               percpu_counter_add_batch(&fs_info->dio_bytes, len,
+                                        fs_info->delalloc_batch);
                set_bit(BTRFS_ORDERED_DIRECT, &entry->flags);
+       }
 
        /* one ref for the tree */
        refcount_set(&entry->refs, 1);
@@ -271,13 +274,12 @@ int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
  * when an ordered extent is finished.  If the list covers more than one
  * ordered extent, it is split across multiples.
  */
-void btrfs_add_ordered_sum(struct inode *inode,
-                          struct btrfs_ordered_extent *entry,
+void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
                           struct btrfs_ordered_sum *sum)
 {
        struct btrfs_ordered_inode_tree *tree;
 
-       tree = &BTRFS_I(inode)->ordered_tree;
+       tree = &BTRFS_I(entry->inode)->ordered_tree;
        spin_lock_irq(&tree->lock);
        list_add_tail(&sum->list, &entry->list);
        spin_unlock_irq(&tree->lock);
@@ -469,6 +471,10 @@ void btrfs_remove_ordered_extent(struct inode *inode,
        if (root != fs_info->tree_root)
                btrfs_delalloc_release_metadata(btrfs_inode, entry->len, false);
 
+       if (test_bit(BTRFS_ORDERED_DIRECT, &entry->flags))
+               percpu_counter_add_batch(&fs_info->dio_bytes, -entry->len,
+                                        fs_info->delalloc_batch);
+
        tree = &btrfs_inode->ordered_tree;
        spin_lock_irq(&tree->lock);
        node = &entry->rb_node;
index fb9a161..4c5991c 100644 (file)
@@ -167,8 +167,7 @@ int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
 int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
                                      u64 start, u64 len, u64 disk_len,
                                      int type, int compress_type);
-void btrfs_add_ordered_sum(struct inode *inode,
-                          struct btrfs_ordered_extent *entry,
+void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
                           struct btrfs_ordered_sum *sum);
 struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
                                                         u64 file_offset);
index df49931..1141ca5 100644 (file)
@@ -189,7 +189,7 @@ void btrfs_print_leaf(struct extent_buffer *l)
        btrfs_info(fs_info,
                   "leaf %llu gen %llu total ptrs %d free space %d owner %llu",
                   btrfs_header_bytenr(l), btrfs_header_generation(l), nr,
-                  btrfs_leaf_free_space(fs_info, l), btrfs_header_owner(l));
+                  btrfs_leaf_free_space(l), btrfs_header_owner(l));
        print_eb_refs_lock(l);
        for (i = 0 ; i < nr ; i++) {
                item = btrfs_item_nr(i);
index 61d22a5..ca27169 100644 (file)
@@ -23,36 +23,6 @@ struct prop_handler {
        int inheritable;
 };
 
-static int prop_compression_validate(const char *value, size_t len);
-static int prop_compression_apply(struct inode *inode,
-                                 const char *value,
-                                 size_t len);
-static const char *prop_compression_extract(struct inode *inode);
-
-static struct prop_handler prop_handlers[] = {
-       {
-               .xattr_name = XATTR_BTRFS_PREFIX "compression",
-               .validate = prop_compression_validate,
-               .apply = prop_compression_apply,
-               .extract = prop_compression_extract,
-               .inheritable = 1
-       },
-};
-
-void __init btrfs_props_init(void)
-{
-       int i;
-
-       hash_init(prop_handlers_ht);
-
-       for (i = 0; i < ARRAY_SIZE(prop_handlers); i++) {
-               struct prop_handler *p = &prop_handlers[i];
-               u64 h = btrfs_name_hash(p->xattr_name, strlen(p->xattr_name));
-
-               hash_add(prop_handlers_ht, &p->node, h);
-       }
-}
-
 static const struct hlist_head *find_prop_handlers_by_hash(const u64 hash)
 {
        struct hlist_head *h;
@@ -85,26 +55,37 @@ find_prop_handler(const char *name,
        return NULL;
 }
 
-static int __btrfs_set_prop(struct btrfs_trans_handle *trans,
-                           struct inode *inode,
-                           const char *name,
-                           const char *value,
-                           size_t value_len,
-                           int flags)
+int btrfs_validate_prop(const char *name, const char *value, size_t value_len)
 {
        const struct prop_handler *handler;
-       int ret;
 
        if (strlen(name) <= XATTR_BTRFS_PREFIX_LEN)
                return -EINVAL;
 
+       handler = find_prop_handler(name, NULL);
+       if (!handler)
+               return -EINVAL;
+
+       if (value_len == 0)
+               return 0;
+
+       return handler->validate(value, value_len);
+}
+
+int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode,
+                  const char *name, const char *value, size_t value_len,
+                  int flags)
+{
+       const struct prop_handler *handler;
+       int ret;
+
        handler = find_prop_handler(name, NULL);
        if (!handler)
                return -EINVAL;
 
        if (value_len == 0) {
                ret = btrfs_setxattr(trans, inode, handler->xattr_name,
-                                      NULL, 0, flags);
+                                    NULL, 0, flags);
                if (ret)
                        return ret;
 
@@ -114,17 +95,14 @@ static int __btrfs_set_prop(struct btrfs_trans_handle *trans,
                return ret;
        }
 
-       ret = handler->validate(value, value_len);
-       if (ret)
-               return ret;
-       ret = btrfs_setxattr(trans, inode, handler->xattr_name,
-                              value, value_len, flags);
+       ret = btrfs_setxattr(trans, inode, handler->xattr_name, value,
+                            value_len, flags);
        if (ret)
                return ret;
        ret = handler->apply(inode, value, value_len);
        if (ret) {
-               btrfs_setxattr(trans, inode, handler->xattr_name,
-                                NULL, 0, flags);
+               btrfs_setxattr(trans, inode, handler->xattr_name, NULL,
+                              0, flags);
                return ret;
        }
 
@@ -133,15 +111,6 @@ static int __btrfs_set_prop(struct btrfs_trans_handle *trans,
        return 0;
 }
 
-int btrfs_set_prop(struct inode *inode,
-                  const char *name,
-                  const char *value,
-                  size_t value_len,
-                  int flags)
-{
-       return __btrfs_set_prop(NULL, inode, name, value, value_len, flags);
-}
-
 static int iterate_object_props(struct btrfs_root *root,
                                struct btrfs_path *path,
                                u64 objectid,
@@ -283,6 +252,78 @@ int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path)
        return ret;
 }
 
+static int prop_compression_validate(const char *value, size_t len)
+{
+       if (!value)
+               return 0;
+
+       if (!strncmp("lzo", value, 3))
+               return 0;
+       else if (!strncmp("zlib", value, 4))
+               return 0;
+       else if (!strncmp("zstd", value, 4))
+               return 0;
+
+       return -EINVAL;
+}
+
+static int prop_compression_apply(struct inode *inode, const char *value,
+                                 size_t len)
+{
+       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       int type;
+
+       if (len == 0) {
+               BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
+               BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
+               BTRFS_I(inode)->prop_compress = BTRFS_COMPRESS_NONE;
+
+               return 0;
+       }
+
+       if (!strncmp("lzo", value, 3)) {
+               type = BTRFS_COMPRESS_LZO;
+               btrfs_set_fs_incompat(fs_info, COMPRESS_LZO);
+       } else if (!strncmp("zlib", value, 4)) {
+               type = BTRFS_COMPRESS_ZLIB;
+       } else if (!strncmp("zstd", value, 4)) {
+               type = BTRFS_COMPRESS_ZSTD;
+               btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD);
+       } else {
+               return -EINVAL;
+       }
+
+       BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
+       BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
+       BTRFS_I(inode)->prop_compress = type;
+
+       return 0;
+}
+
+static const char *prop_compression_extract(struct inode *inode)
+{
+       switch (BTRFS_I(inode)->prop_compress) {
+       case BTRFS_COMPRESS_ZLIB:
+       case BTRFS_COMPRESS_LZO:
+       case BTRFS_COMPRESS_ZSTD:
+               return btrfs_compress_type2str(BTRFS_I(inode)->prop_compress);
+       default:
+               break;
+       }
+
+       return NULL;
+}
+
+static struct prop_handler prop_handlers[] = {
+       {
+               .xattr_name = XATTR_BTRFS_PREFIX "compression",
+               .validate = prop_compression_validate,
+               .apply = prop_compression_apply,
+               .extract = prop_compression_extract,
+               .inheritable = 1
+       },
+};
+
 static int inherit_props(struct btrfs_trans_handle *trans,
                         struct inode *inode,
                         struct inode *parent)
@@ -308,20 +349,38 @@ static int inherit_props(struct btrfs_trans_handle *trans,
                if (!value)
                        continue;
 
+               /*
+                * This is not strictly necessary as the property should be
+                * valid, but in case it isn't, don't propagate it futher.
+                */
+               ret = h->validate(value, strlen(value));
+               if (ret)
+                       continue;
+
                num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
                ret = btrfs_block_rsv_add(root, trans->block_rsv,
                                          num_bytes, BTRFS_RESERVE_NO_FLUSH);
                if (ret)
-                       goto out;
-               ret = __btrfs_set_prop(trans, inode, h->xattr_name,
-                                      value, strlen(value), 0);
+                       return ret;
+
+               ret = btrfs_setxattr(trans, inode, h->xattr_name, value,
+                                    strlen(value), 0);
+               if (!ret) {
+                       ret = h->apply(inode, value, strlen(value));
+                       if (ret)
+                               btrfs_setxattr(trans, inode, h->xattr_name,
+                                              NULL, 0, 0);
+                       else
+                               set_bit(BTRFS_INODE_HAS_PROPS,
+                                       &BTRFS_I(inode)->runtime_flags);
+               }
+
                btrfs_block_rsv_release(fs_info, trans->block_rsv, num_bytes);
                if (ret)
-                       goto out;
+                       return ret;
        }
-       ret = 0;
-out:
-       return ret;
+
+       return 0;
 }
 
 int btrfs_inode_inherit_props(struct btrfs_trans_handle *trans,
@@ -364,64 +423,17 @@ int btrfs_subvol_inherit_props(struct btrfs_trans_handle *trans,
        return ret;
 }
 
-static int prop_compression_validate(const char *value, size_t len)
-{
-       if (!strncmp("lzo", value, 3))
-               return 0;
-       else if (!strncmp("zlib", value, 4))
-               return 0;
-       else if (!strncmp("zstd", value, 4))
-               return 0;
-
-       return -EINVAL;
-}
-
-static int prop_compression_apply(struct inode *inode,
-                                 const char *value,
-                                 size_t len)
+void __init btrfs_props_init(void)
 {
-       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-       int type;
-
-       if (len == 0) {
-               BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
-               BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
-               BTRFS_I(inode)->prop_compress = BTRFS_COMPRESS_NONE;
-
-               return 0;
-       }
-
-       if (!strncmp("lzo", value, 3)) {
-               type = BTRFS_COMPRESS_LZO;
-               btrfs_set_fs_incompat(fs_info, COMPRESS_LZO);
-       } else if (!strncmp("zlib", value, 4)) {
-               type = BTRFS_COMPRESS_ZLIB;
-       } else if (!strncmp("zstd", value, 4)) {
-               type = BTRFS_COMPRESS_ZSTD;
-               btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD);
-       } else {
-               return -EINVAL;
-       }
+       int i;
 
-       BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
-       BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
-       BTRFS_I(inode)->prop_compress = type;
+       hash_init(prop_handlers_ht);
 
-       return 0;
-}
+       for (i = 0; i < ARRAY_SIZE(prop_handlers); i++) {
+               struct prop_handler *p = &prop_handlers[i];
+               u64 h = btrfs_name_hash(p->xattr_name, strlen(p->xattr_name));
 
-static const char *prop_compression_extract(struct inode *inode)
-{
-       switch (BTRFS_I(inode)->prop_compress) {
-       case BTRFS_COMPRESS_ZLIB:
-       case BTRFS_COMPRESS_LZO:
-       case BTRFS_COMPRESS_ZSTD:
-               return btrfs_compress_type2str(BTRFS_I(inode)->prop_compress);
-       default:
-               break;
+               hash_add(prop_handlers_ht, &p->node, h);
        }
-
-       return NULL;
 }
 
-
index 618815b..40b2c65 100644 (file)
 
 void __init btrfs_props_init(void);
 
-int btrfs_set_prop(struct inode *inode,
-                  const char *name,
-                  const char *value,
-                  size_t value_len,
+int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode,
+                  const char *name, const char *value, size_t value_len,
                   int flags);
+int btrfs_validate_prop(const char *name, const char *value, size_t value_len);
 
 int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path);
 
index e659d9d..2f708f2 100644 (file)
@@ -918,8 +918,7 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
        /*
         * initially create the quota tree
         */
-       quota_root = btrfs_create_tree(trans, fs_info,
-                                      BTRFS_QUOTA_TREE_OBJECTID);
+       quota_root = btrfs_create_tree(trans, BTRFS_QUOTA_TREE_OBJECTID);
        if (IS_ERR(quota_root)) {
                ret =  PTR_ERR(quota_root);
                btrfs_abort_transaction(trans, ret);
@@ -1101,7 +1100,7 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
        list_del(&quota_root->dirty_list);
 
        btrfs_tree_lock(quota_root->node);
-       clean_tree_block(fs_info, quota_root->node);
+       btrfs_clean_tree_block(quota_root->node);
        btrfs_tree_unlock(quota_root->node);
        btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
 
index b283d3a..5cec2c6 100644 (file)
@@ -659,36 +659,43 @@ static void dump_block_entry(struct btrfs_fs_info *fs_info,
 
 /*
  * btrfs_ref_tree_mod: called when we modify a ref for a bytenr
- * @root: the root we are making this modification from.
- * @bytenr: the bytenr we are modifying.
- * @num_bytes: number of bytes.
- * @parent: the parent bytenr.
- * @ref_root: the original root owner of the bytenr.
- * @owner: level in the case of metadata, inode in the case of data.
- * @offset: 0 for metadata, file offset for data.
- * @action: the action that we are doing, this is the same as the delayed ref
- *     action.
  *
  * This will add an action item to the given bytenr and do sanity checks to make
  * sure we haven't messed something up.  If we are making a new allocation and
  * this block entry has history we will delete all previous actions as long as
  * our sanity checks pass as they are no longer needed.
  */
-int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
-                      u64 parent, u64 ref_root, u64 owner, u64 offset,
-                      int action)
+int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
+                      struct btrfs_ref *generic_ref)
 {
-       struct btrfs_fs_info *fs_info = root->fs_info;
        struct ref_entry *ref = NULL, *exist;
        struct ref_action *ra = NULL;
        struct block_entry *be = NULL;
        struct root_entry *re = NULL;
+       int action = generic_ref->action;
        int ret = 0;
-       bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
+       bool metadata;
+       u64 bytenr = generic_ref->bytenr;
+       u64 num_bytes = generic_ref->len;
+       u64 parent = generic_ref->parent;
+       u64 ref_root;
+       u64 owner;
+       u64 offset;
 
-       if (!btrfs_test_opt(root->fs_info, REF_VERIFY))
+       if (!btrfs_test_opt(fs_info, REF_VERIFY))
                return 0;
 
+       if (generic_ref->type == BTRFS_REF_METADATA) {
+               ref_root = generic_ref->tree_ref.root;
+               owner = generic_ref->tree_ref.level;
+               offset = 0;
+       } else {
+               ref_root = generic_ref->data_ref.ref_root;
+               owner = generic_ref->data_ref.ino;
+               offset = generic_ref->data_ref.offset;
+       }
+       metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
+
        ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS);
        ra = kmalloc(sizeof(struct ref_action), GFP_NOFS);
        if (!ra || !ref) {
@@ -721,7 +728,7 @@ int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
 
        INIT_LIST_HEAD(&ra->list);
        ra->action = action;
-       ra->root = root->root_key.objectid;
+       ra->root = generic_ref->real_root;
 
        /*
         * This is an allocation, preallocate the block_entry in case we haven't
@@ -734,7 +741,7 @@ int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
                 * is and the new root objectid, so let's not treat the passed
                 * in root as if it really has a ref for this bytenr.
                 */
-               be = add_block_entry(root->fs_info, bytenr, num_bytes, ref_root);
+               be = add_block_entry(fs_info, bytenr, num_bytes, ref_root);
                if (IS_ERR(be)) {
                        kfree(ra);
                        ret = PTR_ERR(be);
@@ -776,13 +783,13 @@ int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
                         * one we want to lookup below when we modify the
                         * re->num_refs.
                         */
-                       ref_root = root->root_key.objectid;
-                       re->root_objectid = root->root_key.objectid;
+                       ref_root = generic_ref->real_root;
+                       re->root_objectid = generic_ref->real_root;
                        re->num_refs = 0;
                }
 
-               spin_lock(&root->fs_info->ref_verify_lock);
-               be = lookup_block_entry(&root->fs_info->block_tree, bytenr);
+               spin_lock(&fs_info->ref_verify_lock);
+               be = lookup_block_entry(&fs_info->block_tree, bytenr);
                if (!be) {
                        btrfs_err(fs_info,
 "trying to do action %d to bytenr %llu num_bytes %llu but there is no existing entry!",
@@ -851,7 +858,7 @@ int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
                         * didn't think of some other corner case.
                         */
                        btrfs_err(fs_info, "failed to find root %llu for %llu",
-                                 root->root_key.objectid, be->bytenr);
+                                 generic_ref->real_root, be->bytenr);
                        dump_block_entry(fs_info, be);
                        dump_ref_action(fs_info, ra);
                        kfree(ra);
@@ -870,7 +877,7 @@ int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
        list_add_tail(&ra->list, &be->actions);
        ret = 0;
 out_unlock:
-       spin_unlock(&root->fs_info->ref_verify_lock);
+       spin_unlock(&fs_info->ref_verify_lock);
 out:
        if (ret)
                btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
index b7d2a4e..855de37 100644 (file)
@@ -9,9 +9,8 @@
 #ifdef CONFIG_BTRFS_FS_REF_VERIFY
 int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info);
 void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info);
-int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
-                      u64 parent, u64 ref_root, u64 owner, u64 offset,
-                      int action);
+int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
+                      struct btrfs_ref *generic_ref);
 void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start,
                               u64 len);
 
@@ -30,9 +29,8 @@ static inline void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info)
 {
 }
 
-static inline int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr,
-                                    u64 num_bytes, u64 parent, u64 ref_root,
-                                    u64 owner, u64 offset, int action)
+static inline int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
+                      struct btrfs_ref *generic_ref)
 {
        return 0;
 }
index ddf0285..a459ecd 100644 (file)
@@ -1643,6 +1643,8 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
 
        nritems = btrfs_header_nritems(leaf);
        for (i = 0; i < nritems; i++) {
+               struct btrfs_ref ref = { 0 };
+
                cond_resched();
                btrfs_item_key_to_cpu(leaf, &key, i);
                if (key.type != BTRFS_EXTENT_DATA_KEY)
@@ -1703,18 +1705,23 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
                dirty = 1;
 
                key.offset -= btrfs_file_extent_offset(leaf, fi);
-               ret = btrfs_inc_extent_ref(trans, root, new_bytenr,
-                                          num_bytes, parent,
-                                          btrfs_header_owner(leaf),
-                                          key.objectid, key.offset);
+               btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr,
+                                      num_bytes, parent);
+               ref.real_root = root->root_key.objectid;
+               btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
+                                   key.objectid, key.offset);
+               ret = btrfs_inc_extent_ref(trans, &ref);
                if (ret) {
                        btrfs_abort_transaction(trans, ret);
                        break;
                }
 
-               ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
-                                       parent, btrfs_header_owner(leaf),
-                                       key.objectid, key.offset);
+               btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
+                                      num_bytes, parent);
+               ref.real_root = root->root_key.objectid;
+               btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
+                                   key.objectid, key.offset);
+               ret = btrfs_free_extent(trans, &ref);
                if (ret) {
                        btrfs_abort_transaction(trans, ret);
                        break;
@@ -1756,6 +1763,7 @@ int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc,
        struct btrfs_fs_info *fs_info = dest->fs_info;
        struct extent_buffer *eb;
        struct extent_buffer *parent;
+       struct btrfs_ref ref = { 0 };
        struct btrfs_key key;
        u64 old_bytenr;
        u64 new_bytenr;
@@ -1916,23 +1924,31 @@ again:
                                              path->slots[level], old_ptr_gen);
                btrfs_mark_buffer_dirty(path->nodes[level]);
 
-               ret = btrfs_inc_extent_ref(trans, src, old_bytenr,
-                                       blocksize, path->nodes[level]->start,
-                                       src->root_key.objectid, level - 1, 0);
+               btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, old_bytenr,
+                                      blocksize, path->nodes[level]->start);
+               ref.skip_qgroup = true;
+               btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid);
+               ret = btrfs_inc_extent_ref(trans, &ref);
                BUG_ON(ret);
-               ret = btrfs_inc_extent_ref(trans, dest, new_bytenr,
-                                       blocksize, 0, dest->root_key.objectid,
-                                       level - 1, 0);
+               btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr,
+                                      blocksize, 0);
+               ref.skip_qgroup = true;
+               btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid);
+               ret = btrfs_inc_extent_ref(trans, &ref);
                BUG_ON(ret);
 
-               ret = btrfs_free_extent(trans, src, new_bytenr, blocksize,
-                                       path->nodes[level]->start,
-                                       src->root_key.objectid, level - 1, 0);
+               btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, new_bytenr,
+                                      blocksize, path->nodes[level]->start);
+               btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid);
+               ref.skip_qgroup = true;
+               ret = btrfs_free_extent(trans, &ref);
                BUG_ON(ret);
 
-               ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize,
-                                       0, dest->root_key.objectid, level - 1,
-                                       0);
+               btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, old_bytenr,
+                                      blocksize, 0);
+               btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid);
+               ref.skip_qgroup = true;
+               ret = btrfs_free_extent(trans, &ref);
                BUG_ON(ret);
 
                btrfs_unlock_up_safe(path, 0);
@@ -2721,6 +2737,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
        rc->backref_cache.path[node->level] = node;
        list_for_each_entry(edge, &node->upper, list[LOWER]) {
                struct btrfs_key first_key;
+               struct btrfs_ref ref = { 0 };
 
                cond_resched();
 
@@ -2826,11 +2843,13 @@ static int do_relocation(struct btrfs_trans_handle *trans,
                                                      trans->transid);
                        btrfs_mark_buffer_dirty(upper->eb);
 
-                       ret = btrfs_inc_extent_ref(trans, root,
-                                               node->eb->start, blocksize,
-                                               upper->eb->start,
-                                               btrfs_header_owner(upper->eb),
-                                               node->level, 0);
+                       btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF,
+                                              node->eb->start, blocksize,
+                                              upper->eb->start);
+                       ref.real_root = root->root_key.objectid;
+                       btrfs_init_tree_ref(&ref, node->level,
+                                           btrfs_header_owner(upper->eb));
+                       ret = btrfs_inc_extent_ref(trans, &ref);
                        BUG_ON(ret);
 
                        ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
@@ -4222,7 +4241,7 @@ out:
        return inode;
 }
 
-static struct reloc_control *alloc_reloc_control(void)
+static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info)
 {
        struct reloc_control *rc;
 
@@ -4234,7 +4253,8 @@ static struct reloc_control *alloc_reloc_control(void)
        INIT_LIST_HEAD(&rc->dirty_subvol_roots);
        backref_cache_init(&rc->backref_cache);
        mapping_tree_init(&rc->reloc_root_tree);
-       extent_io_tree_init(&rc->processed_blocks, NULL);
+       extent_io_tree_init(fs_info, &rc->processed_blocks,
+                           IO_TREE_RELOC_BLOCKS, NULL);
        return rc;
 }
 
@@ -4276,7 +4296,7 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
                return -ETXTBSY;
        }
 
-       rc = alloc_reloc_control();
+       rc = alloc_reloc_control(fs_info);
        if (!rc) {
                btrfs_put_block_group(bg);
                return -ENOMEM;
@@ -4298,7 +4318,7 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
                goto out;
        }
 
-       inode = lookup_free_space_inode(fs_info, rc->block_group, path);
+       inode = lookup_free_space_inode(rc->block_group, path);
        btrfs_free_path(path);
 
        if (!IS_ERR(inode))
@@ -4330,27 +4350,36 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
                mutex_lock(&fs_info->cleaner_mutex);
                ret = relocate_block_group(rc);
                mutex_unlock(&fs_info->cleaner_mutex);
-               if (ret < 0) {
+               if (ret < 0)
                        err = ret;
-                       goto out;
-               }
-
-               if (rc->extents_found == 0)
-                       break;
-
-               btrfs_info(fs_info, "found %llu extents", rc->extents_found);
 
+               /*
+                * We may have gotten ENOSPC after we already dirtied some
+                * extents.  If writeout happens while we're relocating a
+                * different block group we could end up hitting the
+                * BUG_ON(rc->stage == UPDATE_DATA_PTRS) in
+                * btrfs_reloc_cow_block.  Make sure we write everything out
+                * properly so we don't trip over this problem, and then break
+                * out of the loop if we hit an error.
+                */
                if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
                        ret = btrfs_wait_ordered_range(rc->data_inode, 0,
                                                       (u64)-1);
-                       if (ret) {
+                       if (ret)
                                err = ret;
-                               goto out;
-                       }
                        invalidate_mapping_pages(rc->data_inode->i_mapping,
                                                 0, -1);
                        rc->stage = UPDATE_DATA_PTRS;
                }
+
+               if (err < 0)
+                       goto out;
+
+               if (rc->extents_found == 0)
+                       break;
+
+               btrfs_info(fs_info, "found %llu extents", rc->extents_found);
+
        }
 
        WARN_ON(rc->block_group->pinned > 0);
@@ -4472,7 +4501,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
        if (list_empty(&reloc_roots))
                goto out;
 
-       rc = alloc_reloc_control();
+       rc = alloc_reloc_control(fs_info);
        if (!rc) {
                err = -ENOMEM;
                goto out;
@@ -4594,7 +4623,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
                new_bytenr = ordered->start + (sums->bytenr - disk_bytenr);
                sums->bytenr = new_bytenr;
 
-               btrfs_add_ordered_sum(inode, ordered, sums);
+               btrfs_add_ordered_sum(ordered, sums);
        }
 out:
        btrfs_put_ordered_extent(ordered);
@@ -4667,14 +4696,12 @@ int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
 void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending,
                              u64 *bytes_to_reserve)
 {
-       struct btrfs_root *root;
-       struct reloc_control *rc;
+       struct btrfs_root *root = pending->root;
+       struct reloc_control *rc = root->fs_info->reloc_ctl;
 
-       root = pending->root;
-       if (!root->reloc_root)
+       if (!root->reloc_root || !rc)
                return;
 
-       rc = root->fs_info->reloc_ctl;
        if (!rc->merge_reloc_tree)
                return;
 
@@ -4703,10 +4730,10 @@ int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
        struct btrfs_root *root = pending->root;
        struct btrfs_root *reloc_root;
        struct btrfs_root *new_root;
-       struct reloc_control *rc;
+       struct reloc_control *rc = root->fs_info->reloc_ctl;
        int ret;
 
-       if (!root->reloc_root)
+       if (!root->reloc_root || !rc)
                return 0;
 
        rc = root->fs_info->reloc_ctl;
index 893d12f..1b9a5d0 100644 (file)
@@ -137,11 +137,14 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
                goto out;
        }
 
-       if (ret != 0) {
-               btrfs_print_leaf(path->nodes[0]);
-               btrfs_crit(fs_info, "unable to update root key %llu %u %llu",
-                          key->objectid, key->type, key->offset);
-               BUG_ON(1);
+       if (ret > 0) {
+               btrfs_crit(fs_info,
+                       "unable to find root key (%llu %u %llu) in tree %llu",
+                       key->objectid, key->type, key->offset,
+                       root->root_key.objectid);
+               ret = -EUCLEAN;
+               btrfs_abort_transaction(trans, ret);
+               goto out;
        }
 
        l = path->nodes[0];
index a995885..f7b29f9 100644 (file)
@@ -3791,7 +3791,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
        struct btrfs_workqueue *scrub_parity = NULL;
 
        if (btrfs_fs_closing(fs_info))
-               return -EINVAL;
+               return -EAGAIN;
 
        if (fs_info->nodesize > BTRFS_STRIPE_LEN) {
                /*
@@ -3999,9 +3999,9 @@ int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info)
        return 0;
 }
 
-int btrfs_scrub_cancel_dev(struct btrfs_fs_info *fs_info,
-                          struct btrfs_device *dev)
+int btrfs_scrub_cancel_dev(struct btrfs_device *dev)
 {
+       struct btrfs_fs_info *fs_info = dev->fs_info;
        struct scrub_ctx *sctx;
 
        mutex_lock(&fs_info->scrub_lock);
index 7ea2d6b..dd38dfe 100644 (file)
@@ -1160,7 +1160,6 @@ out:
 struct backref_ctx {
        struct send_ctx *sctx;
 
-       struct btrfs_path *path;
        /* number of total found references */
        u64 found;
 
@@ -1213,8 +1212,6 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
 {
        struct backref_ctx *bctx = ctx_;
        struct clone_root *found;
-       int ret;
-       u64 i_size;
 
        /* First check if the root is in the list of accepted clone sources */
        found = bsearch((void *)(uintptr_t)root, bctx->sctx->clone_roots,
@@ -1230,19 +1227,6 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
                bctx->found_itself = 1;
        }
 
-       /*
-        * There are inodes that have extents that lie behind its i_size. Don't
-        * accept clones from these extents.
-        */
-       ret = __get_inode_info(found->root, bctx->path, ino, &i_size, NULL, NULL,
-                              NULL, NULL, NULL);
-       btrfs_release_path(bctx->path);
-       if (ret < 0)
-               return ret;
-
-       if (offset + bctx->data_offset + bctx->extent_len > i_size)
-               return 0;
-
        /*
         * Make sure we don't consider clones from send_root that are
         * behind the current inode/offset.
@@ -1319,8 +1303,6 @@ static int find_extent_clone(struct send_ctx *sctx,
                goto out;
        }
 
-       backref_ctx->path = tmp_path;
-
        if (data_offset >= ino_size) {
                /*
                 * There may be extents that lie behind the file's size.
@@ -5082,6 +5064,7 @@ static int clone_range(struct send_ctx *sctx,
        struct btrfs_path *path;
        struct btrfs_key key;
        int ret;
+       u64 clone_src_i_size;
 
        /*
         * Prevent cloning from a zero offset with a length matching the sector
@@ -5106,6 +5089,16 @@ static int clone_range(struct send_ctx *sctx,
        if (!path)
                return -ENOMEM;
 
+       /*
+        * There are inodes that have extents that lie behind its i_size. Don't
+        * accept clones from these extents.
+        */
+       ret = __get_inode_info(clone_root->root, path, clone_root->ino,
+                              &clone_src_i_size, NULL, NULL, NULL, NULL, NULL);
+       btrfs_release_path(path);
+       if (ret < 0)
+               goto out;
+
        /*
         * We can't send a clone operation for the entire range if we find
         * extent items in the respective range in the source file that
@@ -5148,6 +5141,7 @@ static int clone_range(struct send_ctx *sctx,
                u8 type;
                u64 ext_len;
                u64 clone_len;
+               u64 clone_data_offset;
 
                if (slot >= btrfs_header_nritems(leaf)) {
                        ret = btrfs_next_leaf(clone_root->root, path);
@@ -5201,10 +5195,30 @@ static int clone_range(struct send_ctx *sctx,
                if (key.offset >= clone_root->offset + len)
                        break;
 
+               if (key.offset >= clone_src_i_size)
+                       break;
+
+               if (key.offset + ext_len > clone_src_i_size)
+                       ext_len = clone_src_i_size - key.offset;
+
+               clone_data_offset = btrfs_file_extent_offset(leaf, ei);
+               if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte) {
+                       clone_root->offset = key.offset;
+                       if (clone_data_offset < data_offset &&
+                               clone_data_offset + ext_len > data_offset) {
+                               u64 extent_offset;
+
+                               extent_offset = data_offset - clone_data_offset;
+                               ext_len -= extent_offset;
+                               clone_data_offset += extent_offset;
+                               clone_root->offset += extent_offset;
+                       }
+               }
+
                clone_len = min_t(u64, ext_len, len);
 
                if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte &&
-                   btrfs_file_extent_offset(leaf, ei) == data_offset)
+                   clone_data_offset == data_offset)
                        ret = send_clone(sctx, offset, clone_len, clone_root);
                else
                        ret = send_extent_data(sctx, offset, clone_len);
@@ -6579,6 +6593,38 @@ commit_trans:
        return btrfs_commit_transaction(trans);
 }
 
+/*
+ * Make sure any existing dellaloc is flushed for any root used by a send
+ * operation so that we do not miss any data and we do not race with writeback
+ * finishing and changing a tree while send is using the tree. This could
+ * happen if a subvolume is in RW mode, has delalloc, is turned to RO mode and
+ * a send operation then uses the subvolume.
+ * After flushing delalloc ensure_commit_roots_uptodate() must be called.
+ */
+static int flush_delalloc_roots(struct send_ctx *sctx)
+{
+       struct btrfs_root *root = sctx->parent_root;
+       int ret;
+       int i;
+
+       if (root) {
+               ret = btrfs_start_delalloc_snapshot(root);
+               if (ret)
+                       return ret;
+               btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
+       }
+
+       for (i = 0; i < sctx->clone_roots_cnt; i++) {
+               root = sctx->clone_roots[i].root;
+               ret = btrfs_start_delalloc_snapshot(root);
+               if (ret)
+                       return ret;
+               btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
+       }
+
+       return 0;
+}
+
 static void btrfs_root_dec_send_in_progress(struct btrfs_root* root)
 {
        spin_lock(&root->root_item_lock);
@@ -6594,6 +6640,13 @@ static void btrfs_root_dec_send_in_progress(struct btrfs_root* root)
        spin_unlock(&root->root_item_lock);
 }
 
+static void dedupe_in_progress_warn(const struct btrfs_root *root)
+{
+       btrfs_warn_rl(root->fs_info,
+"cannot use root %llu for send while deduplications on it are in progress (%d in progress)",
+                     root->root_key.objectid, root->dedupe_in_progress);
+}
+
 long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
 {
        int ret = 0;
@@ -6617,6 +6670,11 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
         * making it RW. This also protects against deletion.
         */
        spin_lock(&send_root->root_item_lock);
+       if (btrfs_root_readonly(send_root) && send_root->dedupe_in_progress) {
+               dedupe_in_progress_warn(send_root);
+               spin_unlock(&send_root->root_item_lock);
+               return -EAGAIN;
+       }
        send_root->send_in_progress++;
        spin_unlock(&send_root->root_item_lock);
 
@@ -6751,6 +6809,13 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
                                ret = -EPERM;
                                goto out;
                        }
+                       if (clone_root->dedupe_in_progress) {
+                               dedupe_in_progress_warn(clone_root);
+                               spin_unlock(&clone_root->root_item_lock);
+                               srcu_read_unlock(&fs_info->subvol_srcu, index);
+                               ret = -EAGAIN;
+                               goto out;
+                       }
                        clone_root->send_in_progress++;
                        spin_unlock(&clone_root->root_item_lock);
                        srcu_read_unlock(&fs_info->subvol_srcu, index);
@@ -6785,6 +6850,13 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
                        ret = -EPERM;
                        goto out;
                }
+               if (sctx->parent_root->dedupe_in_progress) {
+                       dedupe_in_progress_warn(sctx->parent_root);
+                       spin_unlock(&sctx->parent_root->root_item_lock);
+                       srcu_read_unlock(&fs_info->subvol_srcu, index);
+                       ret = -EAGAIN;
+                       goto out;
+               }
                spin_unlock(&sctx->parent_root->root_item_lock);
 
                srcu_read_unlock(&fs_info->subvol_srcu, index);
@@ -6803,6 +6875,10 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
                        NULL);
        sort_clone_roots = 1;
 
+       ret = flush_delalloc_roots(sctx);
+       if (ret)
+               goto out;
+
        ret = ensure_commit_roots_uptodate(sctx);
        if (ret)
                goto out;
index 236f812..0645ec4 100644 (file)
@@ -1400,7 +1400,7 @@ static inline int is_subvolume_inode(struct inode *inode)
 }
 
 static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
-                                  const char *device_name, struct vfsmount *mnt)
+                                  struct vfsmount *mnt)
 {
        struct dentry *root;
        int ret;
@@ -1649,7 +1649,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
        }
 
        /* mount_subvol() will free subvol_name and mnt_root */
-       root = mount_subvol(subvol_name, subvol_objectid, device_name, mnt_root);
+       root = mount_subvol(subvol_name, subvol_objectid, mnt_root);
 
 out:
        return root;
index 8a59597..9238fd4 100644 (file)
 
 static struct vfsmount *test_mnt = NULL;
 
+const char *test_error[] = {
+       [TEST_ALLOC_FS_INFO]         = "cannot allocate fs_info",
+       [TEST_ALLOC_ROOT]            = "cannot allocate root",
+       [TEST_ALLOC_EXTENT_BUFFER]   = "cannot extent buffer",
+       [TEST_ALLOC_PATH]            = "cannot allocate path",
+       [TEST_ALLOC_INODE]           = "cannot allocate inode",
+       [TEST_ALLOC_BLOCK_GROUP]     = "cannot allocate block group",
+       [TEST_ALLOC_EXTENT_MAP]      = "cannot allocate extent map",
+};
+
 static const struct super_operations btrfs_test_super_ops = {
        .alloc_inode    = btrfs_alloc_inode,
        .destroy_inode  = btrfs_test_destroy_inode,
@@ -99,7 +109,6 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize)
 
        spin_lock_init(&fs_info->buffer_lock);
        spin_lock_init(&fs_info->qgroup_lock);
-       spin_lock_init(&fs_info->qgroup_op_lock);
        spin_lock_init(&fs_info->super_lock);
        spin_lock_init(&fs_info->fs_roots_radix_lock);
        spin_lock_init(&fs_info->tree_mod_seq_lock);
@@ -115,8 +124,10 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize)
        INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
        INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
        INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
-       extent_io_tree_init(&fs_info->freed_extents[0], NULL);
-       extent_io_tree_init(&fs_info->freed_extents[1], NULL);
+       extent_io_tree_init(fs_info, &fs_info->freed_extents[0],
+                           IO_TREE_FS_INFO_FREED_EXTENTS0, NULL);
+       extent_io_tree_init(fs_info, &fs_info->freed_extents[1],
+                           IO_TREE_FS_INFO_FREED_EXTENTS1, NULL);
        fs_info->pinned_extents = &fs_info->freed_extents[0];
        set_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state);
 
index 70ff9f9..ee277bb 100644 (file)
 int btrfs_run_sanity_tests(void);
 
 #define test_msg(fmt, ...) pr_info("BTRFS: selftest: " fmt "\n", ##__VA_ARGS__)
-#define test_err(fmt, ...) pr_err("BTRFS: selftest: " fmt "\n", ##__VA_ARGS__)
+#define test_err(fmt, ...) pr_err("BTRFS: selftest: %s:%d " fmt "\n",  \
+               __FILE__, __LINE__, ##__VA_ARGS__)
+
+#define test_std_err(index)    test_err("%s", test_error[index])
+
+enum {
+       TEST_ALLOC_FS_INFO,
+       TEST_ALLOC_ROOT,
+       TEST_ALLOC_EXTENT_BUFFER,
+       TEST_ALLOC_PATH,
+       TEST_ALLOC_INODE,
+       TEST_ALLOC_BLOCK_GROUP,
+       TEST_ALLOC_EXTENT_MAP,
+};
+
+extern const char *test_error[];
 
 struct btrfs_root;
 struct btrfs_trans_handle;
index 7d72eab..a1b9f9b 100644 (file)
@@ -30,27 +30,27 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
 
        fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
        if (!fs_info) {
-               test_err("could not allocate fs_info");
+               test_std_err(TEST_ALLOC_FS_INFO);
                return -ENOMEM;
        }
 
        root = btrfs_alloc_dummy_root(fs_info);
        if (IS_ERR(root)) {
-               test_err("could not allocate root");
+               test_std_err(TEST_ALLOC_ROOT);
                ret = PTR_ERR(root);
                goto out;
        }
 
        path = btrfs_alloc_path();
        if (!path) {
-               test_err("could not allocate path");
+               test_std_err(TEST_ALLOC_PATH);
                ret = -ENOMEM;
                goto out;
        }
 
        path->nodes[0] = eb = alloc_dummy_extent_buffer(fs_info, nodesize);
        if (!eb) {
-               test_err("could not allocate dummy buffer");
+               test_std_err(TEST_ALLOC_EXTENT_BUFFER);
                ret = -ENOMEM;
                goto out;
        }
index 3c46d7f..7bf4d57 100644 (file)
@@ -73,11 +73,15 @@ static int test_find_delalloc(u32 sectorsize)
 
        inode = btrfs_new_test_inode();
        if (!inode) {
-               test_err("failed to allocate test inode");
+               test_std_err(TEST_ALLOC_INODE);
                return -ENOMEM;
        }
 
-       extent_io_tree_init(&tmp, NULL);
+       /*
+        * Passing NULL as we don't have fs_info but tracepoints are not used
+        * at this point
+        */
+       extent_io_tree_init(NULL, &tmp, IO_TREE_SELFTEST, NULL);
 
        /*
         * First go through and create and mark all of our pages dirty, we pin
@@ -374,8 +378,8 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
 {
        struct btrfs_fs_info *fs_info;
        unsigned long len;
-       unsigned long *bitmap;
-       struct extent_buffer *eb;
+       unsigned long *bitmap = NULL;
+       struct extent_buffer *eb = NULL;
        int ret;
 
        test_msg("running extent buffer bitmap tests");
@@ -388,18 +392,23 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
                ? sectorsize * 4 : sectorsize;
 
        fs_info = btrfs_alloc_dummy_fs_info(len, len);
+       if (!fs_info) {
+               test_std_err(TEST_ALLOC_FS_INFO);
+               return -ENOMEM;
+       }
 
        bitmap = kmalloc(len, GFP_KERNEL);
        if (!bitmap) {
                test_err("couldn't allocate test bitmap");
-               return -ENOMEM;
+               ret = -ENOMEM;
+               goto out;
        }
 
        eb = __alloc_dummy_extent_buffer(fs_info, 0, len);
        if (!eb) {
-               test_err("couldn't allocate test extent buffer");
-               kfree(bitmap);
-               return -ENOMEM;
+               test_std_err(TEST_ALLOC_ROOT);
+               ret = -ENOMEM;
+               goto out;
        }
 
        ret = __test_eb_bitmaps(bitmap, eb, len);
@@ -408,17 +417,18 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
 
        /* Do it over again with an extent buffer which isn't page-aligned. */
        free_extent_buffer(eb);
-       eb = __alloc_dummy_extent_buffer(NULL, nodesize / 2, len);
+       eb = __alloc_dummy_extent_buffer(fs_info, nodesize / 2, len);
        if (!eb) {
-               test_err("couldn't allocate test extent buffer");
-               kfree(bitmap);
-               return -ENOMEM;
+               test_std_err(TEST_ALLOC_ROOT);
+               ret = -ENOMEM;
+               goto out;
        }
 
        ret = __test_eb_bitmaps(bitmap, eb, len);
 out:
        free_extent_buffer(eb);
        kfree(bitmap);
+       btrfs_free_dummy_fs_info(fs_info);
        return ret;
 }
 
@@ -434,6 +444,5 @@ int btrfs_test_extent_io(u32 sectorsize, u32 nodesize)
 
        ret = test_eb_bitmaps(sectorsize, nodesize);
 out:
-       test_msg("extent I/O tests finished");
        return ret;
 }
index bf15d3a..87aeabe 100644 (file)
@@ -47,7 +47,7 @@ static void free_extent_map_tree(struct extent_map_tree *em_tree)
  *                                    ->add_extent_mapping(0, 16K)
  *                                    -> #handle -EEXIST
  */
-static void test_case_1(struct btrfs_fs_info *fs_info,
+static int test_case_1(struct btrfs_fs_info *fs_info,
                struct extent_map_tree *em_tree)
 {
        struct extent_map *em;
@@ -56,9 +56,10 @@ static void test_case_1(struct btrfs_fs_info *fs_info,
        int ret;
 
        em = alloc_extent_map();
-       if (!em)
-               /* Skip the test on error. */
-               return;
+       if (!em) {
+               test_std_err(TEST_ALLOC_EXTENT_MAP);
+               return -ENOMEM;
+       }
 
        /* Add [0, 16K) */
        em->start = 0;
@@ -66,25 +67,37 @@ static void test_case_1(struct btrfs_fs_info *fs_info,
        em->block_start = 0;
        em->block_len = SZ_16K;
        ret = add_extent_mapping(em_tree, em, 0);
-       ASSERT(ret == 0);
+       if (ret < 0) {
+               test_err("cannot add extent range [0, 16K)");
+               goto out;
+       }
        free_extent_map(em);
 
        /* Add [16K, 20K) following [0, 16K)  */
        em = alloc_extent_map();
-       if (!em)
+       if (!em) {
+               test_std_err(TEST_ALLOC_EXTENT_MAP);
+               ret = -ENOMEM;
                goto out;
+       }
 
        em->start = SZ_16K;
        em->len = SZ_4K;
        em->block_start = SZ_32K; /* avoid merging */
        em->block_len = SZ_4K;
        ret = add_extent_mapping(em_tree, em, 0);
-       ASSERT(ret == 0);
+       if (ret < 0) {
+               test_err("cannot add extent range [16K, 20K)");
+               goto out;
+       }
        free_extent_map(em);
 
        em = alloc_extent_map();
-       if (!em)
+       if (!em) {
+               test_std_err(TEST_ALLOC_EXTENT_MAP);
+               ret = -ENOMEM;
                goto out;
+       }
 
        /* Add [0, 8K), should return [0, 16K) instead. */
        em->start = start;
@@ -92,19 +105,24 @@ static void test_case_1(struct btrfs_fs_info *fs_info,
        em->block_start = start;
        em->block_len = len;
        ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, em->start, em->len);
-       if (ret)
+       if (ret) {
                test_err("case1 [%llu %llu]: ret %d", start, start + len, ret);
+               goto out;
+       }
        if (em &&
            (em->start != 0 || extent_map_end(em) != SZ_16K ||
-            em->block_start != 0 || em->block_len != SZ_16K))
+            em->block_start != 0 || em->block_len != SZ_16K)) {
                test_err(
 "case1 [%llu %llu]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu",
                         start, start + len, ret, em->start, em->len,
                         em->block_start, em->block_len);
+               ret = -EINVAL;
+       }
        free_extent_map(em);
 out:
-       /* free memory */
        free_extent_map_tree(em_tree);
+
+       return ret;
 }
 
 /*
@@ -113,16 +131,17 @@ out:
  * Reading the inline ending up with EEXIST, ie. read an inline
  * extent and discard page cache and read it again.
  */
-static void test_case_2(struct btrfs_fs_info *fs_info,
+static int test_case_2(struct btrfs_fs_info *fs_info,
                struct extent_map_tree *em_tree)
 {
        struct extent_map *em;
        int ret;
 
        em = alloc_extent_map();
-       if (!em)
-               /* Skip the test on error. */
-               return;
+       if (!em) {
+               test_std_err(TEST_ALLOC_EXTENT_MAP);
+               return -ENOMEM;
+       }
 
        /* Add [0, 1K) */
        em->start = 0;
@@ -130,25 +149,37 @@ static void test_case_2(struct btrfs_fs_info *fs_info,
        em->block_start = EXTENT_MAP_INLINE;
        em->block_len = (u64)-1;
        ret = add_extent_mapping(em_tree, em, 0);
-       ASSERT(ret == 0);
+       if (ret < 0) {
+               test_err("cannot add extent range [0, 1K)");
+               goto out;
+       }
        free_extent_map(em);
 
-       /* Add [4K, 4K) following [0, 1K)  */
+       /* Add [4K, 8K) following [0, 1K)  */
        em = alloc_extent_map();
-       if (!em)
+       if (!em) {
+               test_std_err(TEST_ALLOC_EXTENT_MAP);
+               ret = -ENOMEM;
                goto out;
+       }
 
        em->start = SZ_4K;
        em->len = SZ_4K;
        em->block_start = SZ_4K;
        em->block_len = SZ_4K;
        ret = add_extent_mapping(em_tree, em, 0);
-       ASSERT(ret == 0);
+       if (ret < 0) {
+               test_err("cannot add extent range [4K, 8K)");
+               goto out;
+       }
        free_extent_map(em);
 
        em = alloc_extent_map();
-       if (!em)
+       if (!em) {
+               test_std_err(TEST_ALLOC_EXTENT_MAP);
+               ret = -ENOMEM;
                goto out;
+       }
 
        /* Add [0, 1K) */
        em->start = 0;
@@ -156,22 +187,27 @@ static void test_case_2(struct btrfs_fs_info *fs_info,
        em->block_start = EXTENT_MAP_INLINE;
        em->block_len = (u64)-1;
        ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, em->start, em->len);
-       if (ret)
+       if (ret) {
                test_err("case2 [0 1K]: ret %d", ret);
+               goto out;
+       }
        if (em &&
            (em->start != 0 || extent_map_end(em) != SZ_1K ||
-            em->block_start != EXTENT_MAP_INLINE || em->block_len != (u64)-1))
+            em->block_start != EXTENT_MAP_INLINE || em->block_len != (u64)-1)) {
                test_err(
 "case2 [0 1K]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu",
                         ret, em->start, em->len, em->block_start,
                         em->block_len);
+               ret = -EINVAL;
+       }
        free_extent_map(em);
 out:
-       /* free memory */
        free_extent_map_tree(em_tree);
+
+       return ret;
 }
 
-static void __test_case_3(struct btrfs_fs_info *fs_info,
+static int __test_case_3(struct btrfs_fs_info *fs_info,
                struct extent_map_tree *em_tree, u64 start)
 {
        struct extent_map *em;
@@ -179,9 +215,10 @@ static void __test_case_3(struct btrfs_fs_info *fs_info,
        int ret;
 
        em = alloc_extent_map();
-       if (!em)
-               /* Skip this test on error. */
-               return;
+       if (!em) {
+               test_std_err(TEST_ALLOC_EXTENT_MAP);
+               return -ENOMEM;
+       }
 
        /* Add [4K, 8K) */
        em->start = SZ_4K;
@@ -189,12 +226,18 @@ static void __test_case_3(struct btrfs_fs_info *fs_info,
        em->block_start = SZ_4K;
        em->block_len = SZ_4K;
        ret = add_extent_mapping(em_tree, em, 0);
-       ASSERT(ret == 0);
+       if (ret < 0) {
+               test_err("cannot add extent range [4K, 8K)");
+               goto out;
+       }
        free_extent_map(em);
 
        em = alloc_extent_map();
-       if (!em)
+       if (!em) {
+               test_std_err(TEST_ALLOC_EXTENT_MAP);
+               ret = -ENOMEM;
                goto out;
+       }
 
        /* Add [0, 16K) */
        em->start = 0;
@@ -202,24 +245,29 @@ static void __test_case_3(struct btrfs_fs_info *fs_info,
        em->block_start = 0;
        em->block_len = SZ_16K;
        ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len);
-       if (ret)
+       if (ret) {
                test_err("case3 [0x%llx 0x%llx): ret %d",
                         start, start + len, ret);
+               goto out;
+       }
        /*
         * Since bytes within em are contiguous, em->block_start is identical to
         * em->start.
         */
        if (em &&
            (start < em->start || start + len > extent_map_end(em) ||
-            em->start != em->block_start || em->len != em->block_len))
+            em->start != em->block_start || em->len != em->block_len)) {
                test_err(
 "case3 [0x%llx 0x%llx): ret %d em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx)",
                         start, start + len, ret, em->start, em->len,
                         em->block_start, em->block_len);
+               ret = -EINVAL;
+       }
        free_extent_map(em);
 out:
-       /* free memory */
        free_extent_map_tree(em_tree);
+
+       return ret;
 }
 
 /*
@@ -238,15 +286,23 @@ out:
  *   -> add_extent_mapping()
  *                            -> add_extent_mapping()
  */
-static void test_case_3(struct btrfs_fs_info *fs_info,
+static int test_case_3(struct btrfs_fs_info *fs_info,
                struct extent_map_tree *em_tree)
 {
-       __test_case_3(fs_info, em_tree, 0);
-       __test_case_3(fs_info, em_tree, SZ_8K);
-       __test_case_3(fs_info, em_tree, (12 * 1024ULL));
+       int ret;
+
+       ret = __test_case_3(fs_info, em_tree, 0);
+       if (ret)
+               return ret;
+       ret = __test_case_3(fs_info, em_tree, SZ_8K);
+       if (ret)
+               return ret;
+       ret = __test_case_3(fs_info, em_tree, (12 * SZ_1K));
+
+       return ret;
 }
 
-static void __test_case_4(struct btrfs_fs_info *fs_info,
+static int __test_case_4(struct btrfs_fs_info *fs_info,
                struct extent_map_tree *em_tree, u64 start)
 {
        struct extent_map *em;
@@ -254,9 +310,10 @@ static void __test_case_4(struct btrfs_fs_info *fs_info,
        int ret;
 
        em = alloc_extent_map();
-       if (!em)
-               /* Skip this test on error. */
-               return;
+       if (!em) {
+               test_std_err(TEST_ALLOC_EXTENT_MAP);
+               return -ENOMEM;
+       }
 
        /* Add [0K, 8K) */
        em->start = 0;
@@ -264,44 +321,60 @@ static void __test_case_4(struct btrfs_fs_info *fs_info,
        em->block_start = 0;
        em->block_len = SZ_8K;
        ret = add_extent_mapping(em_tree, em, 0);
-       ASSERT(ret == 0);
+       if (ret < 0) {
+               test_err("cannot add extent range [0, 8K)");
+               goto out;
+       }
        free_extent_map(em);
 
        em = alloc_extent_map();
-       if (!em)
+       if (!em) {
+               test_std_err(TEST_ALLOC_EXTENT_MAP);
+               ret = -ENOMEM;
                goto out;
+       }
 
-       /* Add [8K, 24K) */
+       /* Add [8K, 32K) */
        em->start = SZ_8K;
-       em->len = 24 * 1024ULL;
+       em->len = 24 * SZ_1K;
        em->block_start = SZ_16K; /* avoid merging */
-       em->block_len = 24 * 1024ULL;
+       em->block_len = 24 * SZ_1K;
        ret = add_extent_mapping(em_tree, em, 0);
-       ASSERT(ret == 0);
+       if (ret < 0) {
+               test_err("cannot add extent range [8K, 32K)");
+               goto out;
+       }
        free_extent_map(em);
 
        em = alloc_extent_map();
-       if (!em)
+       if (!em) {
+               test_std_err(TEST_ALLOC_EXTENT_MAP);
+               ret = -ENOMEM;
                goto out;
+       }
        /* Add [0K, 32K) */
        em->start = 0;
        em->len = SZ_32K;
        em->block_start = 0;
        em->block_len = SZ_32K;
        ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len);
-       if (ret)
+       if (ret) {
                test_err("case4 [0x%llx 0x%llx): ret %d",
                         start, len, ret);
-       if (em &&
-           (start < em->start || start + len > extent_map_end(em)))
+               goto out;
+       }
+       if (em && (start < em->start || start + len > extent_map_end(em))) {
                test_err(
 "case4 [0x%llx 0x%llx): ret %d, added wrong em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx)",
                         start, len, ret, em->start, em->len, em->block_start,
                         em->block_len);
+               ret = -EINVAL;
+       }
        free_extent_map(em);
 out:
-       /* free memory */
        free_extent_map_tree(em_tree);
+
+       return ret;
 }
 
 /*
@@ -329,17 +402,24 @@ out:
  *                                             # handle -EEXIST when adding
  *                                             # [0, 32K)
  */
-static void test_case_4(struct btrfs_fs_info *fs_info,
+static int test_case_4(struct btrfs_fs_info *fs_info,
                struct extent_map_tree *em_tree)
 {
-       __test_case_4(fs_info, em_tree, 0);
-       __test_case_4(fs_info, em_tree, SZ_4K);
+       int ret;
+
+       ret = __test_case_4(fs_info, em_tree, 0);
+       if (ret)
+               return ret;
+       ret = __test_case_4(fs_info, em_tree, SZ_4K);
+
+       return ret;
 }
 
 int btrfs_test_extent_map(void)
 {
        struct btrfs_fs_info *fs_info = NULL;
        struct extent_map_tree *em_tree;
+       int ret = 0;
 
        test_msg("running extent_map tests");
 
@@ -349,25 +429,32 @@ int btrfs_test_extent_map(void)
         */
        fs_info = btrfs_alloc_dummy_fs_info(PAGE_SIZE, PAGE_SIZE);
        if (!fs_info) {
-               test_msg("Couldn't allocate dummy fs info");
+               test_std_err(TEST_ALLOC_FS_INFO);
                return -ENOMEM;
        }
 
        em_tree = kzalloc(sizeof(*em_tree), GFP_KERNEL);
-       if (!em_tree)
-               /* Skip the test on error. */
+       if (!em_tree) {
+               ret = -ENOMEM;
                goto out;
+       }
 
        extent_map_tree_init(em_tree);
 
-       test_case_1(fs_info, em_tree);
-       test_case_2(fs_info, em_tree);
-       test_case_3(fs_info, em_tree);
-       test_case_4(fs_info, em_tree);
+       ret = test_case_1(fs_info, em_tree);
+       if (ret)
+               goto out;
+       ret = test_case_2(fs_info, em_tree);
+       if (ret)
+               goto out;
+       ret = test_case_3(fs_info, em_tree);
+       if (ret)
+               goto out;
+       ret = test_case_4(fs_info, em_tree);
 
-       kfree(em_tree);
 out:
+       kfree(em_tree);
        btrfs_free_dummy_fs_info(fs_info);
 
-       return 0;
+       return ret;
 }
index 5c2f77e..af89f66 100644 (file)
@@ -404,7 +404,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
        };
        const struct btrfs_free_space_op *orig_free_space_ops;
 
-       test_msg("running space stealing from bitmap to extent");
+       test_msg("running space stealing from bitmap to extent tests");
 
        /*
         * For this test, we want to ensure we end up with an extent entry
@@ -834,9 +834,10 @@ int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize)
 
        test_msg("running btrfs free space cache tests");
        fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
-       if (!fs_info)
+       if (!fs_info) {
+               test_std_err(TEST_ALLOC_FS_INFO);
                return -ENOMEM;
-
+       }
 
        /*
         * For ppc64 (with 64k page size), bytes per bitmap might be
@@ -846,13 +847,14 @@ int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize)
        cache = btrfs_alloc_dummy_block_group(fs_info,
                                      BITS_PER_BITMAP * sectorsize + PAGE_SIZE);
        if (!cache) {
-               test_err("couldn't run the tests");
+               test_std_err(TEST_ALLOC_BLOCK_GROUP);
                btrfs_free_dummy_fs_info(fs_info);
                return 0;
        }
 
        root = btrfs_alloc_dummy_root(fs_info);
        if (IS_ERR(root)) {
+               test_std_err(TEST_ALLOC_ROOT);
                ret = PTR_ERR(root);
                goto out;
        }
@@ -874,6 +876,5 @@ out:
        btrfs_free_dummy_block_group(cache);
        btrfs_free_dummy_root(root);
        btrfs_free_dummy_fs_info(fs_info);
-       test_msg("free space cache tests finished");
        return ret;
 }
index f7a969b..a90dad1 100644 (file)
@@ -30,7 +30,7 @@ static int __check_free_space_extents(struct btrfs_trans_handle *trans,
        unsigned int i;
        int ret;
 
-       info = search_free_space_info(trans, fs_info, cache, path, 0);
+       info = search_free_space_info(trans, cache, path, 0);
        if (IS_ERR(info)) {
                test_err("could not find free space info");
                ret = PTR_ERR(info);
@@ -115,7 +115,7 @@ static int check_free_space_extents(struct btrfs_trans_handle *trans,
        u32 flags;
        int ret;
 
-       info = search_free_space_info(trans, fs_info, cache, path, 0);
+       info = search_free_space_info(trans, cache, path, 0);
        if (IS_ERR(info)) {
                test_err("could not find free space info");
                btrfs_release_path(path);
@@ -444,14 +444,14 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
 
        fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
        if (!fs_info) {
-               test_err("couldn't allocate dummy fs info");
+               test_std_err(TEST_ALLOC_FS_INFO);
                ret = -ENOMEM;
                goto out;
        }
 
        root = btrfs_alloc_dummy_root(fs_info);
        if (IS_ERR(root)) {
-               test_err("couldn't allocate dummy root");
+               test_std_err(TEST_ALLOC_ROOT);
                ret = PTR_ERR(root);
                goto out;
        }
@@ -463,7 +463,7 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
 
        root->node = alloc_test_extent_buffer(root->fs_info, nodesize);
        if (!root->node) {
-               test_err("couldn't allocate dummy buffer");
+               test_std_err(TEST_ALLOC_EXTENT_BUFFER);
                ret = -ENOMEM;
                goto out;
        }
@@ -473,7 +473,7 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
 
        cache = btrfs_alloc_dummy_block_group(fs_info, 8 * alignment);
        if (!cache) {
-               test_err("couldn't allocate dummy block group cache");
+               test_std_err(TEST_ALLOC_BLOCK_GROUP);
                ret = -ENOMEM;
                goto out;
        }
@@ -486,7 +486,7 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
 
        path = btrfs_alloc_path();
        if (!path) {
-               test_err("couldn't allocate path");
+               test_std_err(TEST_ALLOC_ROOT);
                ret = -ENOMEM;
                goto out;
        }
index af0c8e3..bc6dbd1 100644 (file)
@@ -226,31 +226,34 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
        u64 offset;
        int ret = -ENOMEM;
 
+       test_msg("running btrfs_get_extent tests");
+
        inode = btrfs_new_test_inode();
        if (!inode) {
-               test_err("couldn't allocate inode");
+               test_std_err(TEST_ALLOC_INODE);
                return ret;
        }
 
+       inode->i_mode = S_IFREG;
        BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
        BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
        BTRFS_I(inode)->location.offset = 0;
 
        fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
        if (!fs_info) {
-               test_err("couldn't allocate dummy fs info");
+               test_std_err(TEST_ALLOC_FS_INFO);
                goto out;
        }
 
        root = btrfs_alloc_dummy_root(fs_info);
        if (IS_ERR(root)) {
-               test_err("couldn't allocate root");
+               test_std_err(TEST_ALLOC_ROOT);
                goto out;
        }
 
        root->node = alloc_dummy_extent_buffer(fs_info, nodesize);
        if (!root->node) {
-               test_err("couldn't allocate dummy buffer");
+               test_std_err(TEST_ALLOC_ROOT);
                goto out;
        }
 
@@ -827,9 +830,11 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
        struct extent_map *em = NULL;
        int ret = -ENOMEM;
 
+       test_msg("running hole first btrfs_get_extent test");
+
        inode = btrfs_new_test_inode();
        if (!inode) {
-               test_err("couldn't allocate inode");
+               test_std_err(TEST_ALLOC_INODE);
                return ret;
        }
 
@@ -839,19 +844,19 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
 
        fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
        if (!fs_info) {
-               test_err("couldn't allocate dummy fs info");
+               test_std_err(TEST_ALLOC_FS_INFO);
                goto out;
        }
 
        root = btrfs_alloc_dummy_root(fs_info);
        if (IS_ERR(root)) {
-               test_err("couldn't allocate root");
+               test_std_err(TEST_ALLOC_ROOT);
                goto out;
        }
 
        root->node = alloc_dummy_extent_buffer(fs_info, nodesize);
        if (!root->node) {
-               test_err("couldn't allocate dummy buffer");
+               test_std_err(TEST_ALLOC_ROOT);
                goto out;
        }
 
@@ -927,21 +932,23 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
        struct btrfs_root *root = NULL;
        int ret = -ENOMEM;
 
+       test_msg("running outstanding_extents tests");
+
        inode = btrfs_new_test_inode();
        if (!inode) {
-               test_err("couldn't allocate inode");
+               test_std_err(TEST_ALLOC_INODE);
                return ret;
        }
 
        fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
        if (!fs_info) {
-               test_err("couldn't allocate dummy fs info");
+               test_std_err(TEST_ALLOC_FS_INFO);
                goto out;
        }
 
        root = btrfs_alloc_dummy_root(fs_info);
        if (IS_ERR(root)) {
-               test_err("couldn't allocate root");
+               test_std_err(TEST_ALLOC_ROOT);
                goto out;
        }
 
@@ -1110,17 +1117,16 @@ int btrfs_test_inodes(u32 sectorsize, u32 nodesize)
 {
        int ret;
 
+       test_msg("running inode tests");
+
        set_bit(EXTENT_FLAG_COMPRESSED, &compressed_only);
        set_bit(EXTENT_FLAG_PREALLOC, &prealloc_only);
 
-       test_msg("running btrfs_get_extent tests");
        ret = test_btrfs_get_extent(sectorsize, nodesize);
        if (ret)
                return ret;
-       test_msg("running hole first btrfs_get_extent test");
        ret = test_hole_first(sectorsize, nodesize);
        if (ret)
                return ret;
-       test_msg("running outstanding_extents tests");
        return test_extent_accounting(sectorsize, nodesize);
 }
index 412b910..09aaca1 100644 (file)
@@ -32,7 +32,7 @@ static int insert_normal_tree_ref(struct btrfs_root *root, u64 bytenr,
 
        path = btrfs_alloc_path();
        if (!path) {
-               test_err("couldn't allocate path");
+               test_std_err(TEST_ALLOC_ROOT);
                return -ENOMEM;
        }
 
@@ -82,7 +82,7 @@ static int add_tree_ref(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
 
        path = btrfs_alloc_path();
        if (!path) {
-               test_err("couldn't allocate path");
+               test_std_err(TEST_ALLOC_ROOT);
                return -ENOMEM;
        }
 
@@ -132,7 +132,7 @@ static int remove_extent_item(struct btrfs_root *root, u64 bytenr,
 
        path = btrfs_alloc_path();
        if (!path) {
-               test_err("couldn't allocate path");
+               test_std_err(TEST_ALLOC_ROOT);
                return -ENOMEM;
        }
        path->leave_spinning = 1;
@@ -166,7 +166,7 @@ static int remove_extent_ref(struct btrfs_root *root, u64 bytenr,
 
        path = btrfs_alloc_path();
        if (!path) {
-               test_err("couldn't allocate path");
+               test_std_err(TEST_ALLOC_ROOT);
                return -ENOMEM;
        }
 
@@ -215,7 +215,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
 
        btrfs_init_dummy_trans(&trans, fs_info);
 
-       test_msg("qgroup basic add");
+       test_msg("running qgroup add/remove tests");
        ret = btrfs_create_qgroup(&trans, BTRFS_FS_TREE_OBJECTID);
        if (ret) {
                test_err("couldn't create a qgroup %d", ret);
@@ -316,7 +316,7 @@ static int test_multiple_refs(struct btrfs_root *root,
 
        btrfs_init_dummy_trans(&trans, fs_info);
 
-       test_msg("qgroup multiple refs test");
+       test_msg("running qgroup multiple refs test");
 
        /*
         * We have BTRFS_FS_TREE_OBJECTID created already from the
@@ -457,13 +457,13 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
 
        fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
        if (!fs_info) {
-               test_err("couldn't allocate dummy fs info");
+               test_std_err(TEST_ALLOC_FS_INFO);
                return -ENOMEM;
        }
 
        root = btrfs_alloc_dummy_root(fs_info);
        if (IS_ERR(root)) {
-               test_err("couldn't allocate root");
+               test_std_err(TEST_ALLOC_ROOT);
                ret = PTR_ERR(root);
                goto out;
        }
@@ -495,7 +495,7 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
 
        tmp_root = btrfs_alloc_dummy_root(fs_info);
        if (IS_ERR(tmp_root)) {
-               test_err("couldn't allocate a fs root");
+               test_std_err(TEST_ALLOC_ROOT);
                ret = PTR_ERR(tmp_root);
                goto out;
        }
@@ -510,7 +510,7 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
 
        tmp_root = btrfs_alloc_dummy_root(fs_info);
        if (IS_ERR(tmp_root)) {
-               test_err("couldn't allocate a fs root");
+               test_std_err(TEST_ALLOC_ROOT);
                ret = PTR_ERR(tmp_root);
                goto out;
        }
index e4e665f..3f6811c 100644 (file)
@@ -50,14 +50,6 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
                        btrfs_err(transaction->fs_info,
                                  "pending csums is %llu",
                                  transaction->delayed_refs.pending_csums);
-               while (!list_empty(&transaction->pending_chunks)) {
-                       struct extent_map *em;
-
-                       em = list_first_entry(&transaction->pending_chunks,
-                                             struct extent_map, list);
-                       list_del_init(&em->list);
-                       free_extent_map(em);
-               }
                /*
                 * If any block groups are found in ->deleted_bgs then it's
                 * because the transaction was aborted and a commit did not
@@ -75,39 +67,11 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
                        btrfs_put_block_group_trimming(cache);
                        btrfs_put_block_group(cache);
                }
+               WARN_ON(!list_empty(&transaction->dev_update_list));
                kfree(transaction);
        }
 }
 
-static void clear_btree_io_tree(struct extent_io_tree *tree)
-{
-       spin_lock(&tree->lock);
-       /*
-        * Do a single barrier for the waitqueue_active check here, the state
-        * of the waitqueue should not change once clear_btree_io_tree is
-        * called.
-        */
-       smp_mb();
-       while (!RB_EMPTY_ROOT(&tree->state)) {
-               struct rb_node *node;
-               struct extent_state *state;
-
-               node = rb_first(&tree->state);
-               state = rb_entry(node, struct extent_state, rb_node);
-               rb_erase(&state->rb_node, &tree->state);
-               RB_CLEAR_NODE(&state->rb_node);
-               /*
-                * btree io trees aren't supposed to have tasks waiting for
-                * changes in the flags of extent states ever.
-                */
-               ASSERT(!waitqueue_active(&state->wq));
-               free_extent_state(state);
-
-               cond_resched_lock(&tree->lock);
-       }
-       spin_unlock(&tree->lock);
-}
-
 static noinline void switch_commit_roots(struct btrfs_transaction *trans)
 {
        struct btrfs_fs_info *fs_info = trans->fs_info;
@@ -121,7 +85,7 @@ static noinline void switch_commit_roots(struct btrfs_transaction *trans)
                root->commit_root = btrfs_root_node(root);
                if (is_fstree(root->root_key.objectid))
                        btrfs_unpin_free_ino(root);
-               clear_btree_io_tree(&root->dirty_log_pages);
+               extent_io_tree_release(&root->dirty_log_pages);
                btrfs_qgroup_clean_swapped_blocks(root);
        }
 
@@ -263,19 +227,18 @@ loop:
        spin_lock_init(&cur_trans->delayed_refs.lock);
 
        INIT_LIST_HEAD(&cur_trans->pending_snapshots);
-       INIT_LIST_HEAD(&cur_trans->pending_chunks);
+       INIT_LIST_HEAD(&cur_trans->dev_update_list);
        INIT_LIST_HEAD(&cur_trans->switch_commits);
        INIT_LIST_HEAD(&cur_trans->dirty_bgs);
        INIT_LIST_HEAD(&cur_trans->io_bgs);
        INIT_LIST_HEAD(&cur_trans->dropped_roots);
        mutex_init(&cur_trans->cache_write_mutex);
-       cur_trans->num_dirty_bgs = 0;
        spin_lock_init(&cur_trans->dirty_bgs_lock);
        INIT_LIST_HEAD(&cur_trans->deleted_bgs);
        spin_lock_init(&cur_trans->dropped_roots_lock);
        list_add_tail(&cur_trans->list, &fs_info->trans_list);
-       extent_io_tree_init(&cur_trans->dirty_pages,
-                            fs_info->btree_inode);
+       extent_io_tree_init(fs_info, &cur_trans->dirty_pages,
+                       IO_TREE_TRANS_DIRTY_PAGES, fs_info->btree_inode);
        fs_info->generation++;
        cur_trans->transid = fs_info->generation;
        fs_info->running_transaction = cur_trans;
@@ -928,7 +891,7 @@ int btrfs_write_marked_extents(struct btrfs_fs_info *fs_info,
                 * superblock that points to btree nodes/leafs for which
                 * writeback hasn't finished yet (and without errors).
                 * We cleanup any entries left in the io tree when committing
-                * the transaction (through clear_btree_io_tree()).
+                * the transaction (through extent_io_tree_release()).
                 */
                if (err == -ENOMEM) {
                        err = 0;
@@ -973,7 +936,7 @@ static int __btrfs_wait_marked_extents(struct btrfs_fs_info *fs_info,
                 * left in the io tree. For a log commit, we don't remove them
                 * after committing the log because the tree can be accessed
                 * concurrently - we do it only at transaction commit time when
-                * it's safe to do it (through clear_btree_io_tree()).
+                * it's safe to do it (through extent_io_tree_release()).
                 */
                err = clear_extent_bit(dirty_pages, start, end,
                                       EXTENT_NEED_WAIT, 0, 0, &cached_state);
@@ -1051,7 +1014,7 @@ static int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans)
        blk_finish_plug(&plug);
        ret2 = btrfs_wait_extents(fs_info, dirty_pages);
 
-       clear_btree_io_tree(&trans->transaction->dirty_pages);
+       extent_io_tree_release(&trans->transaction->dirty_pages);
 
        if (ret)
                return ret;
@@ -1130,17 +1093,17 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans)
        if (ret)
                return ret;
 
-       ret = btrfs_run_dev_stats(trans, fs_info);
+       ret = btrfs_run_dev_stats(trans);
        if (ret)
                return ret;
-       ret = btrfs_run_dev_replace(trans, fs_info);
+       ret = btrfs_run_dev_replace(trans);
        if (ret)
                return ret;
        ret = btrfs_run_qgroups(trans);
        if (ret)
                return ret;
 
-       ret = btrfs_setup_space_cache(trans, fs_info);
+       ret = btrfs_setup_space_cache(trans);
        if (ret)
                return ret;
 
@@ -1168,7 +1131,7 @@ again:
        }
 
        while (!list_empty(dirty_bgs) || !list_empty(io_bgs)) {
-               ret = btrfs_write_dirty_block_groups(trans, fs_info);
+               ret = btrfs_write_dirty_block_groups(trans);
                if (ret)
                        return ret;
                ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
@@ -2241,8 +2204,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
        memcpy(fs_info->super_for_commit, fs_info->super_copy,
               sizeof(*fs_info->super_copy));
 
-       btrfs_update_commit_device_size(fs_info);
-       btrfs_update_commit_device_bytes_used(cur_trans);
+       btrfs_commit_device_sizes(cur_trans);
 
        clear_bit(BTRFS_FS_LOG1_ERR, &fs_info->flags);
        clear_bit(BTRFS_FS_LOG2_ERR, &fs_info->flags);
index f1ba789..78c446c 100644 (file)
@@ -51,7 +51,7 @@ struct btrfs_transaction {
        wait_queue_head_t writer_wait;
        wait_queue_head_t commit_wait;
        struct list_head pending_snapshots;
-       struct list_head pending_chunks;
+       struct list_head dev_update_list;
        struct list_head switch_commits;
        struct list_head dirty_bgs;
 
@@ -80,7 +80,6 @@ struct btrfs_transaction {
         */
        struct mutex cache_write_mutex;
        spinlock_t dirty_bgs_lock;
-       unsigned int num_dirty_bgs;
        /* Protected by spin lock fs_info->unused_bgs_lock. */
        struct list_head deleted_bgs;
        spinlock_t dropped_roots_lock;
@@ -120,7 +119,6 @@ struct btrfs_trans_handle {
        bool allocating_chunk;
        bool can_flush_pending_bgs;
        bool reloc_reserved;
-       bool sync;
        bool dirty;
        struct btrfs_root *root;
        struct btrfs_fs_info *fs_info;
index a62e1e8..748cd15 100644 (file)
@@ -15,6 +15,9 @@
  * carefully reviewed otherwise so it does not prevent mount of valid images.
  */
 
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/error-injection.h>
 #include "ctree.h"
 #include "tree-checker.h"
 #include "disk-io.h"
  * Append generic "corrupt leaf/node root=%llu block=%llu slot=%d: " to @fmt.
  * Allows callers to customize the output.
  */
-__printf(4, 5)
+__printf(3, 4)
 __cold
-static void generic_err(const struct btrfs_fs_info *fs_info,
-                       const struct extent_buffer *eb, int slot,
+static void generic_err(const struct extent_buffer *eb, int slot,
                        const char *fmt, ...)
 {
+       const struct btrfs_fs_info *fs_info = eb->fs_info;
        struct va_format vaf;
        va_list args;
 
@@ -66,12 +69,12 @@ static void generic_err(const struct btrfs_fs_info *fs_info,
  * Customized reporter for extent data item, since its key objectid and
  * offset has its own meaning.
  */
-__printf(4, 5)
+__printf(3, 4)
 __cold
-static void file_extent_err(const struct btrfs_fs_info *fs_info,
-                           const struct extent_buffer *eb, int slot,
+static void file_extent_err(const struct extent_buffer *eb, int slot,
                            const char *fmt, ...)
 {
+       const struct btrfs_fs_info *fs_info = eb->fs_info;
        struct btrfs_key key;
        struct va_format vaf;
        va_list args;
@@ -94,26 +97,26 @@ static void file_extent_err(const struct btrfs_fs_info *fs_info,
  * Return 0 if the btrfs_file_extent_##name is aligned to @alignment
  * Else return 1
  */
-#define CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, name, alignment)           \
+#define CHECK_FE_ALIGNED(leaf, slot, fi, name, alignment)                    \
 ({                                                                           \
        if (!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment))) \
-               file_extent_err((fs_info), (leaf), (slot),                    \
+               file_extent_err((leaf), (slot),                               \
        "invalid %s for file extent, have %llu, should be aligned to %u",     \
                        (#name), btrfs_file_extent_##name((leaf), (fi)),      \
                        (alignment));                                         \
        (!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment)));   \
 })
 
-static int check_extent_data_item(struct btrfs_fs_info *fs_info,
-                                 struct extent_buffer *leaf,
+static int check_extent_data_item(struct extent_buffer *leaf,
                                  struct btrfs_key *key, int slot)
 {
+       struct btrfs_fs_info *fs_info = leaf->fs_info;
        struct btrfs_file_extent_item *fi;
        u32 sectorsize = fs_info->sectorsize;
        u32 item_size = btrfs_item_size_nr(leaf, slot);
 
        if (!IS_ALIGNED(key->offset, sectorsize)) {
-               file_extent_err(fs_info, leaf, slot,
+               file_extent_err(leaf, slot,
 "unaligned file_offset for file extent, have %llu should be aligned to %u",
                        key->offset, sectorsize);
                return -EUCLEAN;
@@ -122,7 +125,7 @@ static int check_extent_data_item(struct btrfs_fs_info *fs_info,
        fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
 
        if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) {
-               file_extent_err(fs_info, leaf, slot,
+               file_extent_err(leaf, slot,
                "invalid type for file extent, have %u expect range [0, %u]",
                        btrfs_file_extent_type(leaf, fi),
                        BTRFS_FILE_EXTENT_TYPES);
@@ -134,14 +137,14 @@ static int check_extent_data_item(struct btrfs_fs_info *fs_info,
         * and must be caught in open_ctree().
         */
        if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) {
-               file_extent_err(fs_info, leaf, slot,
+               file_extent_err(leaf, slot,
        "invalid compression for file extent, have %u expect range [0, %u]",
                        btrfs_file_extent_compression(leaf, fi),
                        BTRFS_COMPRESS_TYPES);
                return -EUCLEAN;
        }
        if (btrfs_file_extent_encryption(leaf, fi)) {
-               file_extent_err(fs_info, leaf, slot,
+               file_extent_err(leaf, slot,
                        "invalid encryption for file extent, have %u expect 0",
                        btrfs_file_extent_encryption(leaf, fi));
                return -EUCLEAN;
@@ -149,7 +152,7 @@ static int check_extent_data_item(struct btrfs_fs_info *fs_info,
        if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
                /* Inline extent must have 0 as key offset */
                if (key->offset) {
-                       file_extent_err(fs_info, leaf, slot,
+                       file_extent_err(leaf, slot,
                "invalid file_offset for inline file extent, have %llu expect 0",
                                key->offset);
                        return -EUCLEAN;
@@ -163,7 +166,7 @@ static int check_extent_data_item(struct btrfs_fs_info *fs_info,
                /* Uncompressed inline extent size must match item size */
                if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START +
                    btrfs_file_extent_ram_bytes(leaf, fi)) {
-                       file_extent_err(fs_info, leaf, slot,
+                       file_extent_err(leaf, slot,
        "invalid ram_bytes for uncompressed inline extent, have %u expect %llu",
                                item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START +
                                btrfs_file_extent_ram_bytes(leaf, fi));
@@ -174,41 +177,41 @@ static int check_extent_data_item(struct btrfs_fs_info *fs_info,
 
        /* Regular or preallocated extent has fixed item size */
        if (item_size != sizeof(*fi)) {
-               file_extent_err(fs_info, leaf, slot,
+               file_extent_err(leaf, slot,
        "invalid item size for reg/prealloc file extent, have %u expect %zu",
                        item_size, sizeof(*fi));
                return -EUCLEAN;
        }
-       if (CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, ram_bytes, sectorsize) ||
-           CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, disk_bytenr, sectorsize) ||
-           CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, disk_num_bytes, sectorsize) ||
-           CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, offset, sectorsize) ||
-           CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, num_bytes, sectorsize))
+       if (CHECK_FE_ALIGNED(leaf, slot, fi, ram_bytes, sectorsize) ||
+           CHECK_FE_ALIGNED(leaf, slot, fi, disk_bytenr, sectorsize) ||
+           CHECK_FE_ALIGNED(leaf, slot, fi, disk_num_bytes, sectorsize) ||
+           CHECK_FE_ALIGNED(leaf, slot, fi, offset, sectorsize) ||
+           CHECK_FE_ALIGNED(leaf, slot, fi, num_bytes, sectorsize))
                return -EUCLEAN;
        return 0;
 }
 
-static int check_csum_item(struct btrfs_fs_info *fs_info,
-                          struct extent_buffer *leaf, struct btrfs_key *key,
+static int check_csum_item(struct extent_buffer *leaf, struct btrfs_key *key,
                           int slot)
 {
+       struct btrfs_fs_info *fs_info = leaf->fs_info;
        u32 sectorsize = fs_info->sectorsize;
        u32 csumsize = btrfs_super_csum_size(fs_info->super_copy);
 
        if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) {
-               generic_err(fs_info, leaf, slot,
+               generic_err(leaf, slot,
                "invalid key objectid for csum item, have %llu expect %llu",
                        key->objectid, BTRFS_EXTENT_CSUM_OBJECTID);
                return -EUCLEAN;
        }
        if (!IS_ALIGNED(key->offset, sectorsize)) {
-               generic_err(fs_info, leaf, slot,
+               generic_err(leaf, slot,
        "unaligned key offset for csum item, have %llu should be aligned to %u",
                        key->offset, sectorsize);
                return -EUCLEAN;
        }
        if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) {
-               generic_err(fs_info, leaf, slot,
+               generic_err(leaf, slot,
        "unaligned item size for csum item, have %u should be aligned to %u",
                        btrfs_item_size_nr(leaf, slot), csumsize);
                return -EUCLEAN;
@@ -220,12 +223,12 @@ static int check_csum_item(struct btrfs_fs_info *fs_info,
  * Customized reported for dir_item, only important new info is key->objectid,
  * which represents inode number
  */
-__printf(4, 5)
+__printf(3, 4)
 __cold
-static void dir_item_err(const struct btrfs_fs_info *fs_info,
-                        const struct extent_buffer *eb, int slot,
+static void dir_item_err(const struct extent_buffer *eb, int slot,
                         const char *fmt, ...)
 {
+       const struct btrfs_fs_info *fs_info = eb->fs_info;
        struct btrfs_key key;
        struct va_format vaf;
        va_list args;
@@ -244,10 +247,10 @@ static void dir_item_err(const struct btrfs_fs_info *fs_info,
        va_end(args);
 }
 
-static int check_dir_item(struct btrfs_fs_info *fs_info,
-                         struct extent_buffer *leaf,
+static int check_dir_item(struct extent_buffer *leaf,
                          struct btrfs_key *key, int slot)
 {
+       struct btrfs_fs_info *fs_info = leaf->fs_info;
        struct btrfs_dir_item *di;
        u32 item_size = btrfs_item_size_nr(leaf, slot);
        u32 cur = 0;
@@ -263,7 +266,7 @@ static int check_dir_item(struct btrfs_fs_info *fs_info,
 
                /* header itself should not cross item boundary */
                if (cur + sizeof(*di) > item_size) {
-                       dir_item_err(fs_info, leaf, slot,
+                       dir_item_err(leaf, slot,
                "dir item header crosses item boundary, have %zu boundary %u",
                                cur + sizeof(*di), item_size);
                        return -EUCLEAN;
@@ -272,7 +275,7 @@ static int check_dir_item(struct btrfs_fs_info *fs_info,
                /* dir type check */
                dir_type = btrfs_dir_type(leaf, di);
                if (dir_type >= BTRFS_FT_MAX) {
-                       dir_item_err(fs_info, leaf, slot,
+                       dir_item_err(leaf, slot,
                        "invalid dir item type, have %u expect [0, %u)",
                                dir_type, BTRFS_FT_MAX);
                        return -EUCLEAN;
@@ -280,14 +283,14 @@ static int check_dir_item(struct btrfs_fs_info *fs_info,
 
                if (key->type == BTRFS_XATTR_ITEM_KEY &&
                    dir_type != BTRFS_FT_XATTR) {
-                       dir_item_err(fs_info, leaf, slot,
+                       dir_item_err(leaf, slot,
                "invalid dir item type for XATTR key, have %u expect %u",
                                dir_type, BTRFS_FT_XATTR);
                        return -EUCLEAN;
                }
                if (dir_type == BTRFS_FT_XATTR &&
                    key->type != BTRFS_XATTR_ITEM_KEY) {
-                       dir_item_err(fs_info, leaf, slot,
+                       dir_item_err(leaf, slot,
                        "xattr dir type found for non-XATTR key");
                        return -EUCLEAN;
                }
@@ -300,13 +303,13 @@ static int check_dir_item(struct btrfs_fs_info *fs_info,
                name_len = btrfs_dir_name_len(leaf, di);
                data_len = btrfs_dir_data_len(leaf, di);
                if (name_len > max_name_len) {
-                       dir_item_err(fs_info, leaf, slot,
+                       dir_item_err(leaf, slot,
                        "dir item name len too long, have %u max %u",
                                name_len, max_name_len);
                        return -EUCLEAN;
                }
                if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(fs_info)) {
-                       dir_item_err(fs_info, leaf, slot,
+                       dir_item_err(leaf, slot,
                        "dir item name and data len too long, have %u max %u",
                                name_len + data_len,
                                BTRFS_MAX_XATTR_SIZE(fs_info));
@@ -314,7 +317,7 @@ static int check_dir_item(struct btrfs_fs_info *fs_info,
                }
 
                if (data_len && dir_type != BTRFS_FT_XATTR) {
-                       dir_item_err(fs_info, leaf, slot,
+                       dir_item_err(leaf, slot,
                        "dir item with invalid data len, have %u expect 0",
                                data_len);
                        return -EUCLEAN;
@@ -324,7 +327,7 @@ static int check_dir_item(struct btrfs_fs_info *fs_info,
 
                /* header and name/data should not cross item boundary */
                if (cur + total_size > item_size) {
-                       dir_item_err(fs_info, leaf, slot,
+                       dir_item_err(leaf, slot,
                "dir item data crosses item boundary, have %u boundary %u",
                                cur + total_size, item_size);
                        return -EUCLEAN;
@@ -342,7 +345,7 @@ static int check_dir_item(struct btrfs_fs_info *fs_info,
                                        (unsigned long)(di + 1), name_len);
                        name_hash = btrfs_name_hash(namebuf, name_len);
                        if (key->offset != name_hash) {
-                               dir_item_err(fs_info, leaf, slot,
+                               dir_item_err(leaf, slot,
                "name hash mismatch with key, have 0x%016x expect 0x%016llx",
                                        name_hash, key->offset);
                                return -EUCLEAN;
@@ -354,12 +357,12 @@ static int check_dir_item(struct btrfs_fs_info *fs_info,
        return 0;
 }
 
-__printf(4, 5)
+__printf(3, 4)
 __cold
-static void block_group_err(const struct btrfs_fs_info *fs_info,
-                           const struct extent_buffer *eb, int slot,
+static void block_group_err(const struct extent_buffer *eb, int slot,
                            const char *fmt, ...)
 {
+       const struct btrfs_fs_info *fs_info = eb->fs_info;
        struct btrfs_key key;
        struct va_format vaf;
        va_list args;
@@ -378,8 +381,7 @@ static void block_group_err(const struct btrfs_fs_info *fs_info,
        va_end(args);
 }
 
-static int check_block_group_item(struct btrfs_fs_info *fs_info,
-                                 struct extent_buffer *leaf,
+static int check_block_group_item(struct extent_buffer *leaf,
                                  struct btrfs_key *key, int slot)
 {
        struct btrfs_block_group_item bgi;
@@ -392,13 +394,13 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info,
         * handle it.  We care more about the size.
         */
        if (key->offset == 0) {
-               block_group_err(fs_info, leaf, slot,
+               block_group_err(leaf, slot,
                                "invalid block group size 0");
                return -EUCLEAN;
        }
 
        if (item_size != sizeof(bgi)) {
-               block_group_err(fs_info, leaf, slot,
+               block_group_err(leaf, slot,
                        "invalid item size, have %u expect %zu",
                                item_size, sizeof(bgi));
                return -EUCLEAN;
@@ -408,7 +410,7 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info,
                           sizeof(bgi));
        if (btrfs_block_group_chunk_objectid(&bgi) !=
            BTRFS_FIRST_CHUNK_TREE_OBJECTID) {
-               block_group_err(fs_info, leaf, slot,
+               block_group_err(leaf, slot,
                "invalid block group chunk objectid, have %llu expect %llu",
                                btrfs_block_group_chunk_objectid(&bgi),
                                BTRFS_FIRST_CHUNK_TREE_OBJECTID);
@@ -416,7 +418,7 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info,
        }
 
        if (btrfs_block_group_used(&bgi) > key->offset) {
-               block_group_err(fs_info, leaf, slot,
+               block_group_err(leaf, slot,
                        "invalid block group used, have %llu expect [0, %llu)",
                                btrfs_block_group_used(&bgi), key->offset);
                return -EUCLEAN;
@@ -424,7 +426,7 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info,
 
        flags = btrfs_block_group_flags(&bgi);
        if (hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1) {
-               block_group_err(fs_info, leaf, slot,
+               block_group_err(leaf, slot,
 "invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set",
                        flags & BTRFS_BLOCK_GROUP_PROFILE_MASK,
                        hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK));
@@ -437,7 +439,7 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info,
            type != BTRFS_BLOCK_GROUP_SYSTEM &&
            type != (BTRFS_BLOCK_GROUP_METADATA |
                           BTRFS_BLOCK_GROUP_DATA)) {
-               block_group_err(fs_info, leaf, slot,
+               block_group_err(leaf, slot,
 "invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx",
                        type, hweight64(type),
                        BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
@@ -448,37 +450,367 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info,
        return 0;
 }
 
+__printf(4, 5)
+__cold
+static void chunk_err(const struct extent_buffer *leaf,
+                     const struct btrfs_chunk *chunk, u64 logical,
+                     const char *fmt, ...)
+{
+       const struct btrfs_fs_info *fs_info = leaf->fs_info;
+       bool is_sb;
+       struct va_format vaf;
+       va_list args;
+       int i;
+       int slot = -1;
+
+       /* Only superblock eb is able to have such small offset */
+       is_sb = (leaf->start == BTRFS_SUPER_INFO_OFFSET);
+
+       if (!is_sb) {
+               /*
+                * Get the slot number by iterating through all slots, this
+                * would provide better readability.
+                */
+               for (i = 0; i < btrfs_header_nritems(leaf); i++) {
+                       if (btrfs_item_ptr_offset(leaf, i) ==
+                                       (unsigned long)chunk) {
+                               slot = i;
+                               break;
+                       }
+               }
+       }
+       va_start(args, fmt);
+       vaf.fmt = fmt;
+       vaf.va = &args;
+
+       if (is_sb)
+               btrfs_crit(fs_info,
+               "corrupt superblock syschunk array: chunk_start=%llu, %pV",
+                          logical, &vaf);
+       else
+               btrfs_crit(fs_info,
+       "corrupt leaf: root=%llu block=%llu slot=%d chunk_start=%llu, %pV",
+                          BTRFS_CHUNK_TREE_OBJECTID, leaf->start, slot,
+                          logical, &vaf);
+       va_end(args);
+}
+
+/*
+ * The common chunk check which could also work on super block sys chunk array.
+ *
+ * Return -EUCLEAN if anything is corrupted.
+ * Return 0 if everything is OK.
+ */
+int btrfs_check_chunk_valid(struct extent_buffer *leaf,
+                           struct btrfs_chunk *chunk, u64 logical)
+{
+       struct btrfs_fs_info *fs_info = leaf->fs_info;
+       u64 length;
+       u64 stripe_len;
+       u16 num_stripes;
+       u16 sub_stripes;
+       u64 type;
+       u64 features;
+       bool mixed = false;
+
+       length = btrfs_chunk_length(leaf, chunk);
+       stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
+       num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+       sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
+       type = btrfs_chunk_type(leaf, chunk);
+
+       if (!num_stripes) {
+               chunk_err(leaf, chunk, logical,
+                         "invalid chunk num_stripes, have %u", num_stripes);
+               return -EUCLEAN;
+       }
+       if (!IS_ALIGNED(logical, fs_info->sectorsize)) {
+               chunk_err(leaf, chunk, logical,
+               "invalid chunk logical, have %llu should aligned to %u",
+                         logical, fs_info->sectorsize);
+               return -EUCLEAN;
+       }
+       if (btrfs_chunk_sector_size(leaf, chunk) != fs_info->sectorsize) {
+               chunk_err(leaf, chunk, logical,
+                         "invalid chunk sectorsize, have %u expect %u",
+                         btrfs_chunk_sector_size(leaf, chunk),
+                         fs_info->sectorsize);
+               return -EUCLEAN;
+       }
+       if (!length || !IS_ALIGNED(length, fs_info->sectorsize)) {
+               chunk_err(leaf, chunk, logical,
+                         "invalid chunk length, have %llu", length);
+               return -EUCLEAN;
+       }
+       if (!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN) {
+               chunk_err(leaf, chunk, logical,
+                         "invalid chunk stripe length: %llu",
+                         stripe_len);
+               return -EUCLEAN;
+       }
+       if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) &
+           type) {
+               chunk_err(leaf, chunk, logical,
+                         "unrecognized chunk type: 0x%llx",
+                         ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
+                           BTRFS_BLOCK_GROUP_PROFILE_MASK) &
+                         btrfs_chunk_type(leaf, chunk));
+               return -EUCLEAN;
+       }
+
+       if (!is_power_of_2(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) &&
+           (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0) {
+               chunk_err(leaf, chunk, logical,
+               "invalid chunk profile flag: 0x%llx, expect 0 or 1 bit set",
+                         type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
+               return -EUCLEAN;
+       }
+       if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) {
+               chunk_err(leaf, chunk, logical,
+       "missing chunk type flag, have 0x%llx one bit must be set in 0x%llx",
+                         type, BTRFS_BLOCK_GROUP_TYPE_MASK);
+               return -EUCLEAN;
+       }
+
+       if ((type & BTRFS_BLOCK_GROUP_SYSTEM) &&
+           (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) {
+               chunk_err(leaf, chunk, logical,
+                         "system chunk with data or metadata type: 0x%llx",
+                         type);
+               return -EUCLEAN;
+       }
+
+       features = btrfs_super_incompat_flags(fs_info->super_copy);
+       if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
+               mixed = true;
+
+       if (!mixed) {
+               if ((type & BTRFS_BLOCK_GROUP_METADATA) &&
+                   (type & BTRFS_BLOCK_GROUP_DATA)) {
+                       chunk_err(leaf, chunk, logical,
+                       "mixed chunk type in non-mixed mode: 0x%llx", type);
+                       return -EUCLEAN;
+               }
+       }
+
+       if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
+           (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes != 2) ||
+           (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
+           (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) ||
+           (type & BTRFS_BLOCK_GROUP_DUP && num_stripes != 2) ||
+           ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 && num_stripes != 1)) {
+               chunk_err(leaf, chunk, logical,
+                       "invalid num_stripes:sub_stripes %u:%u for profile %llu",
+                       num_stripes, sub_stripes,
+                       type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
+               return -EUCLEAN;
+       }
+
+       return 0;
+}
+
+__printf(3, 4)
+__cold
+static void dev_item_err(const struct extent_buffer *eb, int slot,
+                        const char *fmt, ...)
+{
+       struct btrfs_key key;
+       struct va_format vaf;
+       va_list args;
+
+       btrfs_item_key_to_cpu(eb, &key, slot);
+       va_start(args, fmt);
+
+       vaf.fmt = fmt;
+       vaf.va = &args;
+
+       btrfs_crit(eb->fs_info,
+       "corrupt %s: root=%llu block=%llu slot=%d devid=%llu %pV",
+               btrfs_header_level(eb) == 0 ? "leaf" : "node",
+               btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
+               key.objectid, &vaf);
+       va_end(args);
+}
+
+static int check_dev_item(struct extent_buffer *leaf,
+                         struct btrfs_key *key, int slot)
+{
+       struct btrfs_fs_info *fs_info = leaf->fs_info;
+       struct btrfs_dev_item *ditem;
+       u64 max_devid = max(BTRFS_MAX_DEVS(fs_info), BTRFS_MAX_DEVS_SYS_CHUNK);
+
+       if (key->objectid != BTRFS_DEV_ITEMS_OBJECTID) {
+               dev_item_err(leaf, slot,
+                            "invalid objectid: has=%llu expect=%llu",
+                            key->objectid, BTRFS_DEV_ITEMS_OBJECTID);
+               return -EUCLEAN;
+       }
+       if (key->offset > max_devid) {
+               dev_item_err(leaf, slot,
+                            "invalid devid: has=%llu expect=[0, %llu]",
+                            key->offset, max_devid);
+               return -EUCLEAN;
+       }
+       ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item);
+       if (btrfs_device_id(leaf, ditem) != key->offset) {
+               dev_item_err(leaf, slot,
+                            "devid mismatch: key has=%llu item has=%llu",
+                            key->offset, btrfs_device_id(leaf, ditem));
+               return -EUCLEAN;
+       }
+
+       /*
+        * For device total_bytes, we don't have reliable way to check it, as
+        * it can be 0 for device removal. Device size check can only be done
+        * by dev extents check.
+        */
+       if (btrfs_device_bytes_used(leaf, ditem) >
+           btrfs_device_total_bytes(leaf, ditem)) {
+               dev_item_err(leaf, slot,
+                            "invalid bytes used: have %llu expect [0, %llu]",
+                            btrfs_device_bytes_used(leaf, ditem),
+                            btrfs_device_total_bytes(leaf, ditem));
+               return -EUCLEAN;
+       }
+       /*
+        * Remaining members like io_align/type/gen/dev_group aren't really
+        * utilized.  Skip them to make later usage of them easier.
+        */
+       return 0;
+}
+
+/* Inode item error output has the same format as dir_item_err() */
+#define inode_item_err(fs_info, eb, slot, fmt, ...)                    \
+       dir_item_err(eb, slot, fmt, __VA_ARGS__)
+
+static int check_inode_item(struct extent_buffer *leaf,
+                           struct btrfs_key *key, int slot)
+{
+       struct btrfs_fs_info *fs_info = leaf->fs_info;
+       struct btrfs_inode_item *iitem;
+       u64 super_gen = btrfs_super_generation(fs_info->super_copy);
+       u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777);
+       u32 mode;
+
+       if ((key->objectid < BTRFS_FIRST_FREE_OBJECTID ||
+            key->objectid > BTRFS_LAST_FREE_OBJECTID) &&
+           key->objectid != BTRFS_ROOT_TREE_DIR_OBJECTID &&
+           key->objectid != BTRFS_FREE_INO_OBJECTID) {
+               generic_err(leaf, slot,
+       "invalid key objectid: has %llu expect %llu or [%llu, %llu] or %llu",
+                           key->objectid, BTRFS_ROOT_TREE_DIR_OBJECTID,
+                           BTRFS_FIRST_FREE_OBJECTID,
+                           BTRFS_LAST_FREE_OBJECTID,
+                           BTRFS_FREE_INO_OBJECTID);
+               return -EUCLEAN;
+       }
+       if (key->offset != 0) {
+               inode_item_err(fs_info, leaf, slot,
+                       "invalid key offset: has %llu expect 0",
+                       key->offset);
+               return -EUCLEAN;
+       }
+       iitem = btrfs_item_ptr(leaf, slot, struct btrfs_inode_item);
+
+       /* Here we use super block generation + 1 to handle log tree */
+       if (btrfs_inode_generation(leaf, iitem) > super_gen + 1) {
+               inode_item_err(fs_info, leaf, slot,
+                       "invalid inode generation: has %llu expect (0, %llu]",
+                              btrfs_inode_generation(leaf, iitem),
+                              super_gen + 1);
+               return -EUCLEAN;
+       }
+       /* Note for ROOT_TREE_DIR_ITEM, mkfs could set its transid 0 */
+       if (btrfs_inode_transid(leaf, iitem) > super_gen + 1) {
+               inode_item_err(fs_info, leaf, slot,
+                       "invalid inode generation: has %llu expect [0, %llu]",
+                              btrfs_inode_transid(leaf, iitem), super_gen + 1);
+               return -EUCLEAN;
+       }
+
+       /*
+        * For size and nbytes it's better not to be too strict, as for dir
+        * item its size/nbytes can easily get wrong, but doesn't affect
+        * anything in the fs. So here we skip the check.
+        */
+       mode = btrfs_inode_mode(leaf, iitem);
+       if (mode & ~valid_mask) {
+               inode_item_err(fs_info, leaf, slot,
+                              "unknown mode bit detected: 0x%x",
+                              mode & ~valid_mask);
+               return -EUCLEAN;
+       }
+
+       /*
+        * S_IFMT is not bit mapped so we can't completely rely on is_power_of_2,
+        * but is_power_of_2() can save us from checking FIFO/CHR/DIR/REG.
+        * Only needs to check BLK, LNK and SOCKS
+        */
+       if (!is_power_of_2(mode & S_IFMT)) {
+               if (!S_ISLNK(mode) && !S_ISBLK(mode) && !S_ISSOCK(mode)) {
+                       inode_item_err(fs_info, leaf, slot,
+                       "invalid mode: has 0%o expect valid S_IF* bit(s)",
+                                      mode & S_IFMT);
+                       return -EUCLEAN;
+               }
+       }
+       if (S_ISDIR(mode) && btrfs_inode_nlink(leaf, iitem) > 1) {
+               inode_item_err(fs_info, leaf, slot,
+                      "invalid nlink: has %u expect no more than 1 for dir",
+                       btrfs_inode_nlink(leaf, iitem));
+               return -EUCLEAN;
+       }
+       if (btrfs_inode_flags(leaf, iitem) & ~BTRFS_INODE_FLAG_MASK) {
+               inode_item_err(fs_info, leaf, slot,
+                              "unknown flags detected: 0x%llx",
+                              btrfs_inode_flags(leaf, iitem) &
+                              ~BTRFS_INODE_FLAG_MASK);
+               return -EUCLEAN;
+       }
+       return 0;
+}
+
 /*
  * Common point to switch the item-specific validation.
  */
-static int check_leaf_item(struct btrfs_fs_info *fs_info,
-                          struct extent_buffer *leaf,
+static int check_leaf_item(struct extent_buffer *leaf,
                           struct btrfs_key *key, int slot)
 {
        int ret = 0;
+       struct btrfs_chunk *chunk;
 
        switch (key->type) {
        case BTRFS_EXTENT_DATA_KEY:
-               ret = check_extent_data_item(fs_info, leaf, key, slot);
+               ret = check_extent_data_item(leaf, key, slot);
                break;
        case BTRFS_EXTENT_CSUM_KEY:
-               ret = check_csum_item(fs_info, leaf, key, slot);
+               ret = check_csum_item(leaf, key, slot);
                break;
        case BTRFS_DIR_ITEM_KEY:
        case BTRFS_DIR_INDEX_KEY:
        case BTRFS_XATTR_ITEM_KEY:
-               ret = check_dir_item(fs_info, leaf, key, slot);
+               ret = check_dir_item(leaf, key, slot);
                break;
        case BTRFS_BLOCK_GROUP_ITEM_KEY:
-               ret = check_block_group_item(fs_info, leaf, key, slot);
+               ret = check_block_group_item(leaf, key, slot);
+               break;
+       case BTRFS_CHUNK_ITEM_KEY:
+               chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
+               ret = btrfs_check_chunk_valid(leaf, chunk, key->offset);
+               break;
+       case BTRFS_DEV_ITEM_KEY:
+               ret = check_dev_item(leaf, key, slot);
+               break;
+       case BTRFS_INODE_ITEM_KEY:
+               ret = check_inode_item(leaf, key, slot);
                break;
        }
        return ret;
 }
 
-static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
-                     bool check_item_data)
+static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
 {
+       struct btrfs_fs_info *fs_info = leaf->fs_info;
        /* No valid key type is 0, so all key should be larger than this key */
        struct btrfs_key prev_key = {0, 0, 0};
        struct btrfs_key key;
@@ -486,7 +818,7 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
        int slot;
 
        if (btrfs_header_level(leaf) != 0) {
-               generic_err(fs_info, leaf, 0,
+               generic_err(leaf, 0,
                        "invalid level for leaf, have %d expect 0",
                        btrfs_header_level(leaf));
                return -EUCLEAN;
@@ -502,7 +834,6 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
         */
        if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
                u64 owner = btrfs_header_owner(leaf);
-               struct btrfs_root *check_root;
 
                /* These trees must never be empty */
                if (owner == BTRFS_ROOT_TREE_OBJECTID ||
@@ -511,34 +842,11 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
                    owner == BTRFS_DEV_TREE_OBJECTID ||
                    owner == BTRFS_FS_TREE_OBJECTID ||
                    owner == BTRFS_DATA_RELOC_TREE_OBJECTID) {
-                       generic_err(fs_info, leaf, 0,
+                       generic_err(leaf, 0,
                        "invalid root, root %llu must never be empty",
                                    owner);
                        return -EUCLEAN;
                }
-               key.objectid = owner;
-               key.type = BTRFS_ROOT_ITEM_KEY;
-               key.offset = (u64)-1;
-
-               check_root = btrfs_get_fs_root(fs_info, &key, false);
-               /*
-                * The only reason we also check NULL here is that during
-                * open_ctree() some roots has not yet been set up.
-                */
-               if (!IS_ERR_OR_NULL(check_root)) {
-                       struct extent_buffer *eb;
-
-                       eb = btrfs_root_node(check_root);
-                       /* if leaf is the root, then it's fine */
-                       if (leaf != eb) {
-                               generic_err(fs_info, leaf, 0,
-               "invalid nritems, have %u should not be 0 for non-root leaf",
-                                       nritems);
-                               free_extent_buffer(eb);
-                               return -EUCLEAN;
-                       }
-                       free_extent_buffer(eb);
-               }
                return 0;
        }
 
@@ -564,7 +872,7 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
 
                /* Make sure the keys are in the right order */
                if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) {
-                       generic_err(fs_info, leaf, slot,
+                       generic_err(leaf, slot,
        "bad key order, prev (%llu %u %llu) current (%llu %u %llu)",
                                prev_key.objectid, prev_key.type,
                                prev_key.offset, key.objectid, key.type,
@@ -583,7 +891,7 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
                        item_end_expected = btrfs_item_offset_nr(leaf,
                                                                 slot - 1);
                if (btrfs_item_end_nr(leaf, slot) != item_end_expected) {
-                       generic_err(fs_info, leaf, slot,
+                       generic_err(leaf, slot,
                                "unexpected item end, have %u expect %u",
                                btrfs_item_end_nr(leaf, slot),
                                item_end_expected);
@@ -597,7 +905,7 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
                 */
                if (btrfs_item_end_nr(leaf, slot) >
                    BTRFS_LEAF_DATA_SIZE(fs_info)) {
-                       generic_err(fs_info, leaf, slot,
+                       generic_err(leaf, slot,
                        "slot end outside of leaf, have %u expect range [0, %u]",
                                btrfs_item_end_nr(leaf, slot),
                                BTRFS_LEAF_DATA_SIZE(fs_info));
@@ -607,7 +915,7 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
                /* Also check if the item pointer overlaps with btrfs item. */
                if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) >
                    btrfs_item_ptr_offset(leaf, slot)) {
-                       generic_err(fs_info, leaf, slot,
+                       generic_err(leaf, slot,
                "slot overlaps with its data, item end %lu data start %lu",
                                btrfs_item_nr_offset(slot) +
                                sizeof(struct btrfs_item),
@@ -620,7 +928,7 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
                         * Check if the item size and content meet other
                         * criteria
                         */
-                       ret = check_leaf_item(fs_info, leaf, &key, slot);
+                       ret = check_leaf_item(leaf, &key, slot);
                        if (ret < 0)
                                return ret;
                }
@@ -633,20 +941,20 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
        return 0;
 }
 
-int btrfs_check_leaf_full(struct btrfs_fs_info *fs_info,
-                         struct extent_buffer *leaf)
+int btrfs_check_leaf_full(struct extent_buffer *leaf)
 {
-       return check_leaf(fs_info, leaf, true);
+       return check_leaf(leaf, true);
 }
+ALLOW_ERROR_INJECTION(btrfs_check_leaf_full, ERRNO);
 
-int btrfs_check_leaf_relaxed(struct btrfs_fs_info *fs_info,
-                            struct extent_buffer *leaf)
+int btrfs_check_leaf_relaxed(struct extent_buffer *leaf)
 {
-       return check_leaf(fs_info, leaf, false);
+       return check_leaf(leaf, false);
 }
 
-int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node)
+int btrfs_check_node(struct extent_buffer *node)
 {
+       struct btrfs_fs_info *fs_info = node->fs_info;
        unsigned long nr = btrfs_header_nritems(node);
        struct btrfs_key key, next_key;
        int slot;
@@ -655,7 +963,7 @@ int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node)
        int ret = 0;
 
        if (level <= 0 || level >= BTRFS_MAX_LEVEL) {
-               generic_err(fs_info, node, 0,
+               generic_err(node, 0,
                        "invalid level for node, have %d expect [1, %d]",
                        level, BTRFS_MAX_LEVEL - 1);
                return -EUCLEAN;
@@ -675,13 +983,13 @@ int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node)
                btrfs_node_key_to_cpu(node, &next_key, slot + 1);
 
                if (!bytenr) {
-                       generic_err(fs_info, node, slot,
+                       generic_err(node, slot,
                                "invalid NULL node pointer");
                        ret = -EUCLEAN;
                        goto out;
                }
                if (!IS_ALIGNED(bytenr, fs_info->sectorsize)) {
-                       generic_err(fs_info, node, slot,
+                       generic_err(node, slot,
                        "unaligned pointer, have %llu should be aligned to %u",
                                bytenr, fs_info->sectorsize);
                        ret = -EUCLEAN;
@@ -689,7 +997,7 @@ int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node)
                }
 
                if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) {
-                       generic_err(fs_info, node, slot,
+                       generic_err(node, slot,
        "bad key order, current (%llu %u %llu) next (%llu %u %llu)",
                                key.objectid, key.type, key.offset,
                                next_key.objectid, next_key.type,
@@ -701,3 +1009,4 @@ int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node)
 out:
        return ret;
 }
+ALLOW_ERROR_INJECTION(btrfs_check_node, ERRNO);
index ff04327..32fecc9 100644 (file)
  * Will check not only the item pointers, but also every possible member
  * in item data.
  */
-int btrfs_check_leaf_full(struct btrfs_fs_info *fs_info,
-                         struct extent_buffer *leaf);
+int btrfs_check_leaf_full(struct extent_buffer *leaf);
 
 /*
  * Less strict leaf checker.
  * Will only check item pointers, not reading item data.
  */
-int btrfs_check_leaf_relaxed(struct btrfs_fs_info *fs_info,
-                            struct extent_buffer *leaf);
-int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node);
+int btrfs_check_leaf_relaxed(struct extent_buffer *leaf);
+int btrfs_check_node(struct extent_buffer *node);
+
+int btrfs_check_chunk_valid(struct extent_buffer *leaf,
+                           struct btrfs_chunk *chunk, u64 logical);
 
 #endif
index 561884f..6adcd8a 100644 (file)
@@ -139,7 +139,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
        mutex_lock(&root->log_mutex);
 
        if (root->log_root) {
-               if (btrfs_need_log_full_commit(fs_info, trans)) {
+               if (btrfs_need_log_full_commit(trans)) {
                        ret = -EAGAIN;
                        goto out;
                }
@@ -225,6 +225,17 @@ void btrfs_end_log_trans(struct btrfs_root *root)
        }
 }
 
+static int btrfs_write_tree_block(struct extent_buffer *buf)
+{
+       return filemap_fdatawrite_range(buf->pages[0]->mapping, buf->start,
+                                       buf->start + buf->len - 1);
+}
+
+static void btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
+{
+       filemap_fdatawait_range(buf->pages[0]->mapping,
+                               buf->start, buf->start + buf->len - 1);
+}
 
 /*
  * the walk control struct is used to pass state down the chain when
@@ -304,7 +315,7 @@ static int process_one_buffer(struct btrfs_root *log,
 
        if (!ret && btrfs_buffer_uptodate(eb, gen, 0)) {
                if (wc->pin && btrfs_header_level(eb) == 0)
-                       ret = btrfs_exclude_logged_extents(fs_info, eb);
+                       ret = btrfs_exclude_logged_extents(eb);
                if (wc->write)
                        btrfs_write_tree_block(eb);
                if (wc->wait)
@@ -333,7 +344,6 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
                                   struct extent_buffer *eb, int slot,
                                   struct btrfs_key *key)
 {
-       struct btrfs_fs_info *fs_info = root->fs_info;
        int ret;
        u32 item_size;
        u64 saved_i_size = 0;
@@ -454,10 +464,9 @@ insert:
                found_size = btrfs_item_size_nr(path->nodes[0],
                                                path->slots[0]);
                if (found_size > item_size)
-                       btrfs_truncate_item(fs_info, path, item_size, 1);
+                       btrfs_truncate_item(path, item_size, 1);
                else if (found_size < item_size)
-                       btrfs_extend_item(fs_info, path,
-                                         item_size - found_size);
+                       btrfs_extend_item(path, item_size - found_size);
        } else if (ret) {
                return ret;
        }
@@ -694,9 +703,11 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
                        goto out;
 
                if (ins.objectid > 0) {
+                       struct btrfs_ref ref = { 0 };
                        u64 csum_start;
                        u64 csum_end;
                        LIST_HEAD(ordered_sums);
+
                        /*
                         * is this extent already allocated in the extent
                         * allocation tree?  If so, just add a reference
@@ -704,10 +715,13 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
                        ret = btrfs_lookup_data_extent(fs_info, ins.objectid,
                                                ins.offset);
                        if (ret == 0) {
-                               ret = btrfs_inc_extent_ref(trans, root,
-                                               ins.objectid, ins.offset,
-                                               0, root->root_key.objectid,
+                               btrfs_init_generic_ref(&ref,
+                                               BTRFS_ADD_DELAYED_REF,
+                                               ins.objectid, ins.offset, 0);
+                               btrfs_init_data_ref(&ref,
+                                               root->root_key.objectid,
                                                key->objectid, offset);
+                               ret = btrfs_inc_extent_ref(trans, &ref);
                                if (ret)
                                        goto out;
                        } else {
@@ -2725,7 +2739,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
                                if (trans) {
                                        btrfs_tree_lock(next);
                                        btrfs_set_lock_blocking_write(next);
-                                       clean_tree_block(fs_info, next);
+                                       btrfs_clean_tree_block(next);
                                        btrfs_wait_tree_block_writeback(next);
                                        btrfs_tree_unlock(next);
                                } else {
@@ -2809,7 +2823,7 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
                                if (trans) {
                                        btrfs_tree_lock(next);
                                        btrfs_set_lock_blocking_write(next);
-                                       clean_tree_block(fs_info, next);
+                                       btrfs_clean_tree_block(next);
                                        btrfs_wait_tree_block_writeback(next);
                                        btrfs_tree_unlock(next);
                                } else {
@@ -2891,7 +2905,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
                        if (trans) {
                                btrfs_tree_lock(next);
                                btrfs_set_lock_blocking_write(next);
-                               clean_tree_block(fs_info, next);
+                               btrfs_clean_tree_block(next);
                                btrfs_wait_tree_block_writeback(next);
                                btrfs_tree_unlock(next);
                        } else {
@@ -3066,7 +3080,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
        }
 
        /* bail out if we need to do a full commit */
-       if (btrfs_need_log_full_commit(fs_info, trans)) {
+       if (btrfs_need_log_full_commit(trans)) {
                ret = -EAGAIN;
                mutex_unlock(&root->log_mutex);
                goto out;
@@ -3085,7 +3099,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
        if (ret) {
                blk_finish_plug(&plug);
                btrfs_abort_transaction(trans, ret);
-               btrfs_set_log_full_commit(fs_info, trans);
+               btrfs_set_log_full_commit(trans);
                mutex_unlock(&root->log_mutex);
                goto out;
        }
@@ -3127,7 +3141,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
                        list_del_init(&root_log_ctx.list);
 
                blk_finish_plug(&plug);
-               btrfs_set_log_full_commit(fs_info, trans);
+               btrfs_set_log_full_commit(trans);
 
                if (ret != -ENOSPC) {
                        btrfs_abort_transaction(trans, ret);
@@ -3173,7 +3187,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
         * now that we've moved on to the tree of log tree roots,
         * check the full commit flag again
         */
-       if (btrfs_need_log_full_commit(fs_info, trans)) {
+       if (btrfs_need_log_full_commit(trans)) {
                blk_finish_plug(&plug);
                btrfs_wait_tree_log_extents(log, mark);
                mutex_unlock(&log_root_tree->log_mutex);
@@ -3186,7 +3200,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
                                         EXTENT_DIRTY | EXTENT_NEW);
        blk_finish_plug(&plug);
        if (ret) {
-               btrfs_set_log_full_commit(fs_info, trans);
+               btrfs_set_log_full_commit(trans);
                btrfs_abort_transaction(trans, ret);
                mutex_unlock(&log_root_tree->log_mutex);
                goto out_wake_log_root;
@@ -3196,7 +3210,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
                ret = btrfs_wait_tree_log_extents(log_root_tree,
                                                  EXTENT_NEW | EXTENT_DIRTY);
        if (ret) {
-               btrfs_set_log_full_commit(fs_info, trans);
+               btrfs_set_log_full_commit(trans);
                mutex_unlock(&log_root_tree->log_mutex);
                goto out_wake_log_root;
        }
@@ -3218,7 +3232,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
         */
        ret = write_all_supers(fs_info, 1);
        if (ret) {
-               btrfs_set_log_full_commit(fs_info, trans);
+               btrfs_set_log_full_commit(trans);
                btrfs_abort_transaction(trans, ret);
                goto out_wake_log_root;
        }
@@ -3422,7 +3436,7 @@ fail:
 out_unlock:
        mutex_unlock(&dir->log_mutex);
        if (ret == -ENOSPC) {
-               btrfs_set_log_full_commit(root->fs_info, trans);
+               btrfs_set_log_full_commit(trans);
                ret = 0;
        } else if (ret < 0)
                btrfs_abort_transaction(trans, ret);
@@ -3438,7 +3452,6 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
                               const char *name, int name_len,
                               struct btrfs_inode *inode, u64 dirid)
 {
-       struct btrfs_fs_info *fs_info = root->fs_info;
        struct btrfs_root *log;
        u64 index;
        int ret;
@@ -3456,7 +3469,7 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
                                  dirid, &index);
        mutex_unlock(&inode->log_mutex);
        if (ret == -ENOSPC) {
-               btrfs_set_log_full_commit(fs_info, trans);
+               btrfs_set_log_full_commit(trans);
                ret = 0;
        } else if (ret < 0 && ret != -ENOENT)
                btrfs_abort_transaction(trans, ret);
@@ -5442,7 +5455,7 @@ static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans,
                 * Make sure any commits to the log are forced to be full
                 * commits.
                 */
-               btrfs_set_log_full_commit(fs_info, trans);
+               btrfs_set_log_full_commit(trans);
                ret = true;
        }
        mutex_unlock(&inode->log_mutex);
@@ -5819,6 +5832,190 @@ out:
        return ret;
 }
 
+static int log_new_ancestors(struct btrfs_trans_handle *trans,
+                            struct btrfs_root *root,
+                            struct btrfs_path *path,
+                            struct btrfs_log_ctx *ctx)
+{
+       struct btrfs_key found_key;
+
+       btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
+
+       while (true) {
+               struct btrfs_fs_info *fs_info = root->fs_info;
+               const u64 last_committed = fs_info->last_trans_committed;
+               struct extent_buffer *leaf = path->nodes[0];
+               int slot = path->slots[0];
+               struct btrfs_key search_key;
+               struct inode *inode;
+               int ret = 0;
+
+               btrfs_release_path(path);
+
+               search_key.objectid = found_key.offset;
+               search_key.type = BTRFS_INODE_ITEM_KEY;
+               search_key.offset = 0;
+               inode = btrfs_iget(fs_info->sb, &search_key, root, NULL);
+               if (IS_ERR(inode))
+                       return PTR_ERR(inode);
+
+               if (BTRFS_I(inode)->generation > last_committed)
+                       ret = btrfs_log_inode(trans, root, BTRFS_I(inode),
+                                             LOG_INODE_EXISTS,
+                                             0, LLONG_MAX, ctx);
+               iput(inode);
+               if (ret)
+                       return ret;
+
+               if (search_key.objectid == BTRFS_FIRST_FREE_OBJECTID)
+                       break;
+
+               search_key.type = BTRFS_INODE_REF_KEY;
+               ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
+               if (ret < 0)
+                       return ret;
+
+               leaf = path->nodes[0];
+               slot = path->slots[0];
+               if (slot >= btrfs_header_nritems(leaf)) {
+                       ret = btrfs_next_leaf(root, path);
+                       if (ret < 0)
+                               return ret;
+                       else if (ret > 0)
+                               return -ENOENT;
+                       leaf = path->nodes[0];
+                       slot = path->slots[0];
+               }
+
+               btrfs_item_key_to_cpu(leaf, &found_key, slot);
+               if (found_key.objectid != search_key.objectid ||
+                   found_key.type != BTRFS_INODE_REF_KEY)
+                       return -ENOENT;
+       }
+       return 0;
+}
+
+static int log_new_ancestors_fast(struct btrfs_trans_handle *trans,
+                                 struct btrfs_inode *inode,
+                                 struct dentry *parent,
+                                 struct btrfs_log_ctx *ctx)
+{
+       struct btrfs_root *root = inode->root;
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct dentry *old_parent = NULL;
+       struct super_block *sb = inode->vfs_inode.i_sb;
+       int ret = 0;
+
+       while (true) {
+               if (!parent || d_really_is_negative(parent) ||
+                   sb != parent->d_sb)
+                       break;
+
+               inode = BTRFS_I(d_inode(parent));
+               if (root != inode->root)
+                       break;
+
+               if (inode->generation > fs_info->last_trans_committed) {
+                       ret = btrfs_log_inode(trans, root, inode,
+                                       LOG_INODE_EXISTS, 0, LLONG_MAX, ctx);
+                       if (ret)
+                               break;
+               }
+               if (IS_ROOT(parent))
+                       break;
+
+               parent = dget_parent(parent);
+               dput(old_parent);
+               old_parent = parent;
+       }
+       dput(old_parent);
+
+       return ret;
+}
+
+static int log_all_new_ancestors(struct btrfs_trans_handle *trans,
+                                struct btrfs_inode *inode,
+                                struct dentry *parent,
+                                struct btrfs_log_ctx *ctx)
+{
+       struct btrfs_root *root = inode->root;
+       const u64 ino = btrfs_ino(inode);
+       struct btrfs_path *path;
+       struct btrfs_key search_key;
+       int ret;
+
+       /*
+        * For a single hard link case, go through a fast path that does not
+        * need to iterate the fs/subvolume tree.
+        */
+       if (inode->vfs_inode.i_nlink < 2)
+               return log_new_ancestors_fast(trans, inode, parent, ctx);
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       search_key.objectid = ino;
+       search_key.type = BTRFS_INODE_REF_KEY;
+       search_key.offset = 0;
+again:
+       ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
+       if (ret < 0)
+               goto out;
+       if (ret == 0)
+               path->slots[0]++;
+
+       while (true) {
+               struct extent_buffer *leaf = path->nodes[0];
+               int slot = path->slots[0];
+               struct btrfs_key found_key;
+
+               if (slot >= btrfs_header_nritems(leaf)) {
+                       ret = btrfs_next_leaf(root, path);
+                       if (ret < 0)
+                               goto out;
+                       else if (ret > 0)
+                               break;
+                       continue;
+               }
+
+               btrfs_item_key_to_cpu(leaf, &found_key, slot);
+               if (found_key.objectid != ino ||
+                   found_key.type > BTRFS_INODE_EXTREF_KEY)
+                       break;
+
+               /*
+                * Don't deal with extended references because they are rare
+                * cases and too complex to deal with (we would need to keep
+                * track of which subitem we are processing for each item in
+                * this loop, etc). So just return some error to fallback to
+                * a transaction commit.
+                */
+               if (found_key.type == BTRFS_INODE_EXTREF_KEY) {
+                       ret = -EMLINK;
+                       goto out;
+               }
+
+               /*
+                * Logging ancestors needs to do more searches on the fs/subvol
+                * tree, so it releases the path as needed to avoid deadlocks.
+                * Keep track of the last inode ref key and resume from that key
+                * after logging all new ancestors for the current hard link.
+                */
+               memcpy(&search_key, &found_key, sizeof(search_key));
+
+               ret = log_new_ancestors(trans, root, path, ctx);
+               if (ret)
+                       goto out;
+               btrfs_release_path(path);
+               goto again;
+       }
+       ret = 0;
+out:
+       btrfs_free_path(path);
+       return ret;
+}
+
 /*
  * helper function around btrfs_log_inode to make sure newly created
  * parent directories also end up in the log.  A minimal inode and backref
@@ -5836,11 +6033,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
        struct btrfs_root *root = inode->root;
        struct btrfs_fs_info *fs_info = root->fs_info;
        struct super_block *sb;
-       struct dentry *old_parent = NULL;
        int ret = 0;
        u64 last_committed = fs_info->last_trans_committed;
        bool log_dentries = false;
-       struct btrfs_inode *orig_inode = inode;
 
        sb = inode->vfs_inode.i_sb;
 
@@ -5946,56 +6141,22 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
         * and has a link count of 2.
         */
        if (inode->last_unlink_trans > last_committed) {
-               ret = btrfs_log_all_parents(trans, orig_inode, ctx);
+               ret = btrfs_log_all_parents(trans, inode, ctx);
                if (ret)
                        goto end_trans;
        }
 
-       /*
-        * If a new hard link was added to the inode in the current transaction
-        * and its link count is now greater than 1, we need to fallback to a
-        * transaction commit, otherwise we can end up not logging all its new
-        * parents for all the hard links. Here just from the dentry used to
-        * fsync, we can not visit the ancestor inodes for all the other hard
-        * links to figure out if any is new, so we fallback to a transaction
-        * commit (instead of adding a lot of complexity of scanning a btree,
-        * since this scenario is not a common use case).
-        */
-       if (inode->vfs_inode.i_nlink > 1 &&
-           inode->last_link_trans > last_committed) {
-               ret = -EMLINK;
+       ret = log_all_new_ancestors(trans, inode, parent, ctx);
+       if (ret)
                goto end_trans;
-       }
 
-       while (1) {
-               if (!parent || d_really_is_negative(parent) || sb != parent->d_sb)
-                       break;
-
-               inode = BTRFS_I(d_inode(parent));
-               if (root != inode->root)
-                       break;
-
-               if (inode->generation > last_committed) {
-                       ret = btrfs_log_inode(trans, root, inode,
-                                       LOG_INODE_EXISTS, 0, LLONG_MAX, ctx);
-                       if (ret)
-                               goto end_trans;
-               }
-               if (IS_ROOT(parent))
-                       break;
-
-               parent = dget_parent(parent);
-               dput(old_parent);
-               old_parent = parent;
-       }
        if (log_dentries)
-               ret = log_new_dir_dentries(trans, root, orig_inode, ctx);
+               ret = log_new_dir_dentries(trans, root, inode, ctx);
        else
                ret = 0;
 end_trans:
-       dput(old_parent);
        if (ret < 0) {
-               btrfs_set_log_full_commit(fs_info, trans);
+               btrfs_set_log_full_commit(trans);
                ret = 1;
        }
 
index 0fab84a..132e43d 100644 (file)
@@ -30,16 +30,14 @@ static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx,
        INIT_LIST_HEAD(&ctx->list);
 }
 
-static inline void btrfs_set_log_full_commit(struct btrfs_fs_info *fs_info,
-                                            struct btrfs_trans_handle *trans)
+static inline void btrfs_set_log_full_commit(struct btrfs_trans_handle *trans)
 {
-       WRITE_ONCE(fs_info->last_trans_log_full_commit, trans->transid);
+       WRITE_ONCE(trans->fs_info->last_trans_log_full_commit, trans->transid);
 }
 
-static inline int btrfs_need_log_full_commit(struct btrfs_fs_info *fs_info,
-                                            struct btrfs_trans_handle *trans)
+static inline int btrfs_need_log_full_commit(struct btrfs_trans_handle *trans)
 {
-       return READ_ONCE(fs_info->last_trans_log_full_commit) ==
+       return READ_ONCE(trans->fs_info->last_trans_log_full_commit) ==
                trans->transid;
 }
 
index 3b2ae34..91caab6 100644 (file)
@@ -121,12 +121,12 @@ int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
                 * An item with that type already exists.
                 * Extend the item and store the new subid at the end.
                 */
-               btrfs_extend_item(fs_info, path, sizeof(subid_le));
+               btrfs_extend_item(path, sizeof(subid_le));
                eb = path->nodes[0];
                slot = path->slots[0];
                offset = btrfs_item_ptr_offset(eb, slot);
                offset += btrfs_item_size_nr(eb, slot) - sizeof(subid_le);
-       } else if (ret < 0) {
+       } else {
                btrfs_warn(fs_info,
                           "insert uuid item failed %d (0x%016llx, 0x%016llx) type %u!",
                           ret, (unsigned long long)key.objectid,
@@ -219,7 +219,7 @@ int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
        move_src = offset + sizeof(subid);
        move_len = item_size - (move_src - btrfs_item_ptr_offset(eb, slot));
        memmove_extent_buffer(eb, move_dst, move_src, move_len);
-       btrfs_truncate_item(fs_info, path, item_size - sizeof(subid), 1);
+       btrfs_truncate_item(path, item_size - sizeof(subid), 1);
 
 out:
        btrfs_free_path(path);
index db934ce..1c2a6e4 100644 (file)
@@ -27,6 +27,7 @@
 #include "math.h"
 #include "dev-replace.h"
 #include "sysfs.h"
+#include "tree-checker.h"
 
 const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
        [BTRFS_RAID_RAID10] = {
@@ -184,8 +185,7 @@ void btrfs_describe_block_groups(u64 bg_flags, char *buf, u32 size_buf)
 out_overflow:;
 }
 
-static int init_first_rw_device(struct btrfs_trans_handle *trans,
-                               struct btrfs_fs_info *fs_info);
+static int init_first_rw_device(struct btrfs_trans_handle *trans);
 static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info);
 static void __btrfs_reset_dev_stats(struct btrfs_device *dev);
 static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev);
@@ -318,7 +318,6 @@ static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid,
        mutex_init(&fs_devs->device_list_mutex);
 
        INIT_LIST_HEAD(&fs_devs->devices);
-       INIT_LIST_HEAD(&fs_devs->resized_devices);
        INIT_LIST_HEAD(&fs_devs->alloc_list);
        INIT_LIST_HEAD(&fs_devs->fs_list);
        if (fsid)
@@ -334,7 +333,9 @@ static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid,
 
 void btrfs_free_device(struct btrfs_device *device)
 {
+       WARN_ON(!list_empty(&device->post_commit_list));
        rcu_string_free(device->name);
+       extent_io_tree_release(&device->alloc_state);
        bio_put(device->flush_bio);
        kfree(device);
 }
@@ -402,7 +403,7 @@ static struct btrfs_device *__alloc_device(void)
 
        INIT_LIST_HEAD(&dev->dev_list);
        INIT_LIST_HEAD(&dev->dev_alloc_list);
-       INIT_LIST_HEAD(&dev->resized_list);
+       INIT_LIST_HEAD(&dev->post_commit_list);
 
        spin_lock_init(&dev->io_lock);
 
@@ -411,6 +412,7 @@ static struct btrfs_device *__alloc_device(void)
        btrfs_device_data_ordered_init(dev);
        INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
        INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
+       extent_io_tree_init(NULL, &dev->alloc_state, 0, NULL);
 
        return dev;
 }
@@ -1230,14 +1232,6 @@ again:
        mutex_unlock(&uuid_mutex);
 }
 
-static void free_device_rcu(struct rcu_head *head)
-{
-       struct btrfs_device *device;
-
-       device = container_of(head, struct btrfs_device, rcu);
-       btrfs_free_device(device);
-}
-
 static void btrfs_close_bdev(struct btrfs_device *device)
 {
        if (!device->bdev)
@@ -1285,7 +1279,8 @@ static void btrfs_close_one_device(struct btrfs_device *device)
        list_replace_rcu(&device->dev_list, &new_device->dev_list);
        new_device->fs_devices = device->fs_devices;
 
-       call_rcu(&device->rcu, free_device_rcu);
+       synchronize_rcu();
+       btrfs_free_device(device);
 }
 
 static int close_fs_devices(struct btrfs_fs_devices *fs_devices)
@@ -1505,58 +1500,29 @@ error_bdev_put:
        return device;
 }
 
-static int contains_pending_extent(struct btrfs_transaction *transaction,
-                                  struct btrfs_device *device,
-                                  u64 *start, u64 len)
+/*
+ * Try to find a chunk that intersects [start, start + len] range and when one
+ * such is found, record the end of it in *start
+ */
+static bool contains_pending_extent(struct btrfs_device *device, u64 *start,
+                                   u64 len)
 {
-       struct btrfs_fs_info *fs_info = device->fs_info;
-       struct extent_map *em;
-       struct list_head *search_list = &fs_info->pinned_chunks;
-       int ret = 0;
-       u64 physical_start = *start;
+       u64 physical_start, physical_end;
 
-       if (transaction)
-               search_list = &transaction->pending_chunks;
-again:
-       list_for_each_entry(em, search_list, list) {
-               struct map_lookup *map;
-               int i;
+       lockdep_assert_held(&device->fs_info->chunk_mutex);
 
-               map = em->map_lookup;
-               for (i = 0; i < map->num_stripes; i++) {
-                       u64 end;
+       if (!find_first_extent_bit(&device->alloc_state, *start,
+                                  &physical_start, &physical_end,
+                                  CHUNK_ALLOCATED, NULL)) {
 
-                       if (map->stripes[i].dev != device)
-                               continue;
-                       if (map->stripes[i].physical >= physical_start + len ||
-                           map->stripes[i].physical + em->orig_block_len <=
-                           physical_start)
-                               continue;
-                       /*
-                        * Make sure that while processing the pinned list we do
-                        * not override our *start with a lower value, because
-                        * we can have pinned chunks that fall within this
-                        * device hole and that have lower physical addresses
-                        * than the pending chunks we processed before. If we
-                        * do not take this special care we can end up getting
-                        * 2 pending chunks that start at the same physical
-                        * device offsets because the end offset of a pinned
-                        * chunk can be equal to the start offset of some
-                        * pending chunk.
-                        */
-                       end = map->stripes[i].physical + em->orig_block_len;
-                       if (end > *start) {
-                               *start = end;
-                               ret = 1;
-                       }
+               if (in_range(physical_start, *start, len) ||
+                   in_range(*start, physical_start,
+                            physical_end - physical_start)) {
+                       *start = physical_end + 1;
+                       return true;
                }
        }
-       if (search_list != &fs_info->pinned_chunks) {
-               search_list = &fs_info->pinned_chunks;
-               goto again;
-       }
-
-       return ret;
+       return false;
 }
 
 
@@ -1581,8 +1547,7 @@ again:
  * But if we don't find suitable free space, it is used to store the size of
  * the max free space.
  */
-int find_free_dev_extent_start(struct btrfs_transaction *transaction,
-                              struct btrfs_device *device, u64 num_bytes,
+int find_free_dev_extent_start(struct btrfs_device *device, u64 num_bytes,
                               u64 search_start, u64 *start, u64 *len)
 {
        struct btrfs_fs_info *fs_info = device->fs_info;
@@ -1667,15 +1632,12 @@ again:
                         * Have to check before we set max_hole_start, otherwise
                         * we could end up sending back this offset anyway.
                         */
-                       if (contains_pending_extent(transaction, device,
-                                                   &search_start,
+                       if (contains_pending_extent(device, &search_start,
                                                    hole_size)) {
-                               if (key.offset >= search_start) {
+                               if (key.offset >= search_start)
                                        hole_size = key.offset - search_start;
-                               } else {
-                                       WARN_ON_ONCE(1);
+                               else
                                        hole_size = 0;
-                               }
                        }
 
                        if (hole_size > max_hole_size) {
@@ -1716,8 +1678,7 @@ next:
        if (search_end > search_start) {
                hole_size = search_end - search_start;
 
-               if (contains_pending_extent(transaction, device, &search_start,
-                                           hole_size)) {
+               if (contains_pending_extent(device, &search_start, hole_size)) {
                        btrfs_release_path(path);
                        goto again;
                }
@@ -1742,13 +1703,11 @@ out:
        return ret;
 }
 
-int find_free_dev_extent(struct btrfs_trans_handle *trans,
-                        struct btrfs_device *device, u64 num_bytes,
+int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
                         u64 *start, u64 *len)
 {
        /* FIXME use last free of some kind */
-       return find_free_dev_extent_start(trans->transaction, device,
-                                         num_bytes, 0, start, len);
+       return find_free_dev_extent_start(device, num_bytes, 0, start, len);
 }
 
 static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
@@ -1982,10 +1941,9 @@ static void update_dev_time(const char *path_name)
        filp_close(filp, NULL);
 }
 
-static int btrfs_rm_dev_item(struct btrfs_fs_info *fs_info,
-                            struct btrfs_device *device)
+static int btrfs_rm_dev_item(struct btrfs_device *device)
 {
-       struct btrfs_root *root = fs_info->chunk_root;
+       struct btrfs_root *root = device->fs_info->chunk_root;
        int ret;
        struct btrfs_path *path;
        struct btrfs_key key;
@@ -2186,12 +2144,12 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
         * counter although write_all_supers() is not locked out. This
         * could give a filesystem state which requires a degraded mount.
         */
-       ret = btrfs_rm_dev_item(fs_info, device);
+       ret = btrfs_rm_dev_item(device);
        if (ret)
                goto error_undo;
 
        clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
-       btrfs_scrub_cancel_dev(fs_info, device);
+       btrfs_scrub_cancel_dev(device);
 
        /*
         * the device list mutex makes sure that we don't change
@@ -2242,7 +2200,8 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
                btrfs_scratch_superblocks(device->bdev, device->name->str);
 
        btrfs_close_bdev(device);
-       call_rcu(&device->rcu, free_device_rcu);
+       synchronize_rcu();
+       btrfs_free_device(device);
 
        if (cur_devices->open_devices == 0) {
                while (fs_devices) {
@@ -2299,9 +2258,9 @@ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev)
                fs_devices->open_devices--;
 }
 
-void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
-                                     struct btrfs_device *srcdev)
+void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev)
 {
+       struct btrfs_fs_info *fs_info = srcdev->fs_info;
        struct btrfs_fs_devices *fs_devices = srcdev->fs_devices;
 
        if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &srcdev->dev_state)) {
@@ -2310,7 +2269,8 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
        }
 
        btrfs_close_bdev(srcdev);
-       call_rcu(&srcdev->rcu, free_device_rcu);
+       synchronize_rcu();
+       btrfs_free_device(srcdev);
 
        /* if this is no devs we rather delete the fs_devices */
        if (!fs_devices->num_devices) {
@@ -2368,7 +2328,8 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev)
        btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str);
 
        btrfs_close_bdev(tgtdev);
-       call_rcu(&tgtdev->rcu, free_device_rcu);
+       synchronize_rcu();
+       btrfs_free_device(tgtdev);
 }
 
 static struct btrfs_device *btrfs_find_device_by_path(
@@ -2503,9 +2464,9 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
 /*
  * Store the expected generation for seed devices in device items.
  */
-static int btrfs_finish_sprout(struct btrfs_trans_handle *trans,
-                              struct btrfs_fs_info *fs_info)
+static int btrfs_finish_sprout(struct btrfs_trans_handle *trans)
 {
+       struct btrfs_fs_info *fs_info = trans->fs_info;
        struct btrfs_root *root = fs_info->chunk_root;
        struct btrfs_path *path;
        struct extent_buffer *leaf;
@@ -2705,7 +2666,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 
        if (seeding_dev) {
                mutex_lock(&fs_info->chunk_mutex);
-               ret = init_first_rw_device(trans, fs_info);
+               ret = init_first_rw_device(trans);
                mutex_unlock(&fs_info->chunk_mutex);
                if (ret) {
                        btrfs_abort_transaction(trans, ret);
@@ -2722,7 +2683,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
        if (seeding_dev) {
                char fsid_buf[BTRFS_UUID_UNPARSED_SIZE];
 
-               ret = btrfs_finish_sprout(trans, fs_info);
+               ret = btrfs_finish_sprout(trans);
                if (ret) {
                        btrfs_abort_transaction(trans, ret);
                        goto error_sysfs;
@@ -2852,7 +2813,6 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans,
 {
        struct btrfs_fs_info *fs_info = device->fs_info;
        struct btrfs_super_block *super_copy = fs_info->super_copy;
-       struct btrfs_fs_devices *fs_devices;
        u64 old_total;
        u64 diff;
 
@@ -2871,8 +2831,6 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans,
                return -EINVAL;
        }
 
-       fs_devices = fs_info->fs_devices;
-
        btrfs_set_super_total_bytes(super_copy,
                        round_down(old_total + diff, fs_info->sectorsize));
        device->fs_devices->total_rw_bytes += diff;
@@ -2880,9 +2838,9 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans,
        btrfs_device_set_total_bytes(device, new_size);
        btrfs_device_set_disk_total_bytes(device, new_size);
        btrfs_clear_space_info_full(device->fs_info);
-       if (list_empty(&device->resized_list))
-               list_add_tail(&device->resized_list,
-                             &fs_devices->resized_devices);
+       if (list_empty(&device->post_commit_list))
+               list_add_tail(&device->post_commit_list,
+                             &trans->transaction->dev_update_list);
        mutex_unlock(&fs_info->chunk_mutex);
 
        return btrfs_update_device(trans, device);
@@ -3601,10 +3559,10 @@ static int chunk_soft_convert_filter(u64 chunk_type,
        return 0;
 }
 
-static int should_balance_chunk(struct btrfs_fs_info *fs_info,
-                               struct extent_buffer *leaf,
+static int should_balance_chunk(struct extent_buffer *leaf,
                                struct btrfs_chunk *chunk, u64 chunk_offset)
 {
+       struct btrfs_fs_info *fs_info = leaf->fs_info;
        struct btrfs_balance_control *bctl = fs_info->balance_ctl;
        struct btrfs_balance_args *bargs = NULL;
        u64 chunk_type = btrfs_chunk_type(leaf, chunk);
@@ -3784,8 +3742,7 @@ again:
                        spin_unlock(&fs_info->balance_lock);
                }
 
-               ret = should_balance_chunk(fs_info, leaf, chunk,
-                                          found_key.offset);
+               ret = should_balance_chunk(leaf, chunk, found_key.offset);
 
                btrfs_release_path(path);
                if (!ret) {
@@ -4661,8 +4618,7 @@ int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
        if (IS_ERR(trans))
                return PTR_ERR(trans);
 
-       uuid_root = btrfs_create_tree(trans, fs_info,
-                                     BTRFS_UUID_TREE_OBJECTID);
+       uuid_root = btrfs_create_tree(trans, BTRFS_UUID_TREE_OBJECTID);
        if (IS_ERR(uuid_root)) {
                ret = PTR_ERR(uuid_root);
                btrfs_abort_transaction(trans, ret);
@@ -4722,15 +4678,16 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
        int slot;
        int failed = 0;
        bool retried = false;
-       bool checked_pending_chunks = false;
        struct extent_buffer *l;
        struct btrfs_key key;
        struct btrfs_super_block *super_copy = fs_info->super_copy;
        u64 old_total = btrfs_super_total_bytes(super_copy);
        u64 old_size = btrfs_device_get_total_bytes(device);
        u64 diff;
+       u64 start;
 
        new_size = round_down(new_size, fs_info->sectorsize);
+       start = new_size;
        diff = round_down(old_size - new_size, fs_info->sectorsize);
 
        if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
@@ -4742,6 +4699,12 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
 
        path->reada = READA_BACK;
 
+       trans = btrfs_start_transaction(root, 0);
+       if (IS_ERR(trans)) {
+               btrfs_free_path(path);
+               return PTR_ERR(trans);
+       }
+
        mutex_lock(&fs_info->chunk_mutex);
 
        btrfs_device_set_total_bytes(device, new_size);
@@ -4749,7 +4712,21 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
                device->fs_devices->total_rw_bytes -= diff;
                atomic64_sub(diff, &fs_info->free_chunk_space);
        }
-       mutex_unlock(&fs_info->chunk_mutex);
+
+       /*
+        * Once the device's size has been set to the new size, ensure all
+        * in-memory chunks are synced to disk so that the loop below sees them
+        * and relocates them accordingly.
+        */
+       if (contains_pending_extent(device, &start, diff)) {
+               mutex_unlock(&fs_info->chunk_mutex);
+               ret = btrfs_commit_transaction(trans);
+               if (ret)
+                       goto done;
+       } else {
+               mutex_unlock(&fs_info->chunk_mutex);
+               btrfs_end_transaction(trans);
+       }
 
 again:
        key.objectid = device->devid;
@@ -4840,40 +4817,10 @@ again:
        }
 
        mutex_lock(&fs_info->chunk_mutex);
-
-       /*
-        * We checked in the above loop all device extents that were already in
-        * the device tree. However before we have updated the device's
-        * total_bytes to the new size, we might have had chunk allocations that
-        * have not complete yet (new block groups attached to transaction
-        * handles), and therefore their device extents were not yet in the
-        * device tree and we missed them in the loop above. So if we have any
-        * pending chunk using a device extent that overlaps the device range
-        * that we can not use anymore, commit the current transaction and
-        * repeat the search on the device tree - this way we guarantee we will
-        * not have chunks using device extents that end beyond 'new_size'.
-        */
-       if (!checked_pending_chunks) {
-               u64 start = new_size;
-               u64 len = old_size - new_size;
-
-               if (contains_pending_extent(trans->transaction, device,
-                                           &start, len)) {
-                       mutex_unlock(&fs_info->chunk_mutex);
-                       checked_pending_chunks = true;
-                       failed = 0;
-                       retried = false;
-                       ret = btrfs_commit_transaction(trans);
-                       if (ret)
-                               goto done;
-                       goto again;
-               }
-       }
-
        btrfs_device_set_disk_total_bytes(device, new_size);
-       if (list_empty(&device->resized_list))
-               list_add_tail(&device->resized_list,
-                             &fs_info->fs_devices->resized_devices);
+       if (list_empty(&device->post_commit_list))
+               list_add_tail(&device->post_commit_list,
+                             &trans->transaction->dev_update_list);
 
        WARN_ON(diff > old_total);
        btrfs_set_super_total_bytes(super_copy,
@@ -4957,15 +4904,6 @@ static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
        btrfs_set_fs_incompat(info, RAID56);
 }
 
-#define BTRFS_MAX_DEVS(info) ((BTRFS_MAX_ITEM_SIZE(info)       \
-                       - sizeof(struct btrfs_chunk))           \
-                       / sizeof(struct btrfs_stripe) + 1)
-
-#define BTRFS_MAX_DEVS_SYS_CHUNK ((BTRFS_SYSTEM_CHUNK_ARRAY_SIZE       \
-                               - 2 * sizeof(struct btrfs_disk_key)     \
-                               - 2 * sizeof(struct btrfs_chunk))       \
-                               / sizeof(struct btrfs_stripe) + 1)
-
 static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
                               u64 start, u64 type)
 {
@@ -5038,7 +4976,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
        } else {
                btrfs_err(info, "invalid chunk type 0x%llx requested",
                       type);
-               BUG_ON(1);
+               BUG();
        }
 
        /* We don't want a chunk larger than 10% of writable space */
@@ -5079,7 +5017,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
                if (total_avail == 0)
                        continue;
 
-               ret = find_free_dev_extent(trans, device,
+               ret = find_free_dev_extent(device,
                                           max_stripe_size * dev_stripes,
                                           &dev_offset, &max_avail);
                if (ret && ret != -ENOSPC)
@@ -5213,18 +5151,20 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
                free_extent_map(em);
                goto error;
        }
-
-       list_add_tail(&em->list, &trans->transaction->pending_chunks);
-       refcount_inc(&em->refs);
        write_unlock(&em_tree->lock);
 
        ret = btrfs_make_block_group(trans, 0, type, start, chunk_size);
        if (ret)
                goto error_del_extent;
 
-       for (i = 0; i < map->num_stripes; i++)
-               btrfs_device_set_bytes_used(map->stripes[i].dev,
-                               map->stripes[i].dev->bytes_used + stripe_size);
+       for (i = 0; i < map->num_stripes; i++) {
+               struct btrfs_device *dev = map->stripes[i].dev;
+
+               btrfs_device_set_bytes_used(dev, dev->bytes_used + stripe_size);
+               if (list_empty(&dev->post_commit_list))
+                       list_add_tail(&dev->post_commit_list,
+                                     &trans->transaction->dev_update_list);
+       }
 
        atomic64_sub(stripe_size * map->num_stripes, &info->free_chunk_space);
 
@@ -5243,8 +5183,6 @@ error_del_extent:
        free_extent_map(em);
        /* One for the tree reference */
        free_extent_map(em);
-       /* One for the pending_chunks list reference */
-       free_extent_map(em);
 error:
        kfree(devices_info);
        return ret;
@@ -5364,9 +5302,9 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type)
        return __btrfs_alloc_chunk(trans, chunk_offset, type);
 }
 
-static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
-                                        struct btrfs_fs_info *fs_info)
+static noinline int init_first_rw_device(struct btrfs_trans_handle *trans)
 {
+       struct btrfs_fs_info *fs_info = trans->fs_info;
        u64 chunk_offset;
        u64 sys_chunk_offset;
        u64 alloc_profile;
@@ -6714,99 +6652,6 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
        return dev;
 }
 
-/* Return -EIO if any error, otherwise return 0. */
-static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
-                                  struct extent_buffer *leaf,
-                                  struct btrfs_chunk *chunk, u64 logical)
-{
-       u64 length;
-       u64 stripe_len;
-       u16 num_stripes;
-       u16 sub_stripes;
-       u64 type;
-       u64 features;
-       bool mixed = false;
-
-       length = btrfs_chunk_length(leaf, chunk);
-       stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
-       num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
-       sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
-       type = btrfs_chunk_type(leaf, chunk);
-
-       if (!num_stripes) {
-               btrfs_err(fs_info, "invalid chunk num_stripes: %u",
-                         num_stripes);
-               return -EIO;
-       }
-       if (!IS_ALIGNED(logical, fs_info->sectorsize)) {
-               btrfs_err(fs_info, "invalid chunk logical %llu", logical);
-               return -EIO;
-       }
-       if (btrfs_chunk_sector_size(leaf, chunk) != fs_info->sectorsize) {
-               btrfs_err(fs_info, "invalid chunk sectorsize %u",
-                         btrfs_chunk_sector_size(leaf, chunk));
-               return -EIO;
-       }
-       if (!length || !IS_ALIGNED(length, fs_info->sectorsize)) {
-               btrfs_err(fs_info, "invalid chunk length %llu", length);
-               return -EIO;
-       }
-       if (!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN) {
-               btrfs_err(fs_info, "invalid chunk stripe length: %llu",
-                         stripe_len);
-               return -EIO;
-       }
-       if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) &
-           type) {
-               btrfs_err(fs_info, "unrecognized chunk type: %llu",
-                         ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
-                           BTRFS_BLOCK_GROUP_PROFILE_MASK) &
-                         btrfs_chunk_type(leaf, chunk));
-               return -EIO;
-       }
-
-       if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) {
-               btrfs_err(fs_info, "missing chunk type flag: 0x%llx", type);
-               return -EIO;
-       }
-
-       if ((type & BTRFS_BLOCK_GROUP_SYSTEM) &&
-           (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) {
-               btrfs_err(fs_info,
-                       "system chunk with data or metadata type: 0x%llx", type);
-               return -EIO;
-       }
-
-       features = btrfs_super_incompat_flags(fs_info->super_copy);
-       if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
-               mixed = true;
-
-       if (!mixed) {
-               if ((type & BTRFS_BLOCK_GROUP_METADATA) &&
-                   (type & BTRFS_BLOCK_GROUP_DATA)) {
-                       btrfs_err(fs_info,
-                       "mixed chunk type in non-mixed mode: 0x%llx", type);
-                       return -EIO;
-               }
-       }
-
-       if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
-           (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes != 2) ||
-           (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
-           (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) ||
-           (type & BTRFS_BLOCK_GROUP_DUP && num_stripes != 2) ||
-           ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 &&
-            num_stripes != 1)) {
-               btrfs_err(fs_info,
-                       "invalid num_stripes:sub_stripes %u:%u for profile %llu",
-                       num_stripes, sub_stripes,
-                       type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
-               return -EIO;
-       }
-
-       return 0;
-}
-
 static void btrfs_report_missing_device(struct btrfs_fs_info *fs_info,
                                        u64 devid, u8 *uuid, bool error)
 {
@@ -6818,10 +6663,30 @@ static void btrfs_report_missing_device(struct btrfs_fs_info *fs_info,
                              devid, uuid);
 }
 
-static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
-                         struct extent_buffer *leaf,
+static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes)
+{
+       int index = btrfs_bg_flags_to_raid_index(type);
+       int ncopies = btrfs_raid_array[index].ncopies;
+       int data_stripes;
+
+       switch (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
+       case BTRFS_BLOCK_GROUP_RAID5:
+               data_stripes = num_stripes - 1;
+               break;
+       case BTRFS_BLOCK_GROUP_RAID6:
+               data_stripes = num_stripes - 2;
+               break;
+       default:
+               data_stripes = num_stripes / ncopies;
+               break;
+       }
+       return div_u64(chunk_len, data_stripes);
+}
+
+static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
                          struct btrfs_chunk *chunk)
 {
+       struct btrfs_fs_info *fs_info = leaf->fs_info;
        struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
        struct map_lookup *map;
        struct extent_map *em;
@@ -6837,9 +6702,15 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
        length = btrfs_chunk_length(leaf, chunk);
        num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
 
-       ret = btrfs_check_chunk_valid(fs_info, leaf, chunk, logical);
-       if (ret)
-               return ret;
+       /*
+        * Only need to verify chunk item if we're reading from sys chunk array,
+        * as chunk item in tree block is already verified by tree-checker.
+        */
+       if (leaf->start == BTRFS_SUPER_INFO_OFFSET) {
+               ret = btrfs_check_chunk_valid(leaf, chunk, logical);
+               if (ret)
+                       return ret;
+       }
 
        read_lock(&map_tree->map_tree.lock);
        em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
@@ -6877,6 +6748,8 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
        map->type = btrfs_chunk_type(leaf, chunk);
        map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
        map->verified_stripes = 0;
+       em->orig_block_len = calc_stripe_length(map->type, em->len,
+                                               map->num_stripes);
        for (i = 0; i < num_stripes; i++) {
                map->stripes[i].physical =
                        btrfs_stripe_offset_nr(leaf, chunk, i);
@@ -7001,10 +6874,10 @@ out:
        return fs_devices;
 }
 
-static int read_one_dev(struct btrfs_fs_info *fs_info,
-                       struct extent_buffer *leaf,
+static int read_one_dev(struct extent_buffer *leaf,
                        struct btrfs_dev_item *dev_item)
 {
+       struct btrfs_fs_info *fs_info = leaf->fs_info;
        struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
        struct btrfs_device *device;
        u64 devid;
@@ -7193,7 +7066,7 @@ int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
                        if (cur_offset + len > array_size)
                                goto out_short_read;
 
-                       ret = read_one_chunk(fs_info, &key, sb, chunk);
+                       ret = read_one_chunk(&key, sb, chunk);
                        if (ret)
                                break;
                } else {
@@ -7334,14 +7207,14 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
                        struct btrfs_dev_item *dev_item;
                        dev_item = btrfs_item_ptr(leaf, slot,
                                                  struct btrfs_dev_item);
-                       ret = read_one_dev(fs_info, leaf, dev_item);
+                       ret = read_one_dev(leaf, dev_item);
                        if (ret)
                                goto error;
                        total_dev++;
                } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
                        struct btrfs_chunk *chunk;
                        chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
-                       ret = read_one_chunk(fs_info, &found_key, leaf, chunk);
+                       ret = read_one_chunk(&found_key, leaf, chunk);
                        if (ret)
                                goto error;
                }
@@ -7530,9 +7403,9 @@ out:
 /*
  * called from commit_transaction. Writes all changed device stats to disk.
  */
-int btrfs_run_dev_stats(struct btrfs_trans_handle *trans,
-                       struct btrfs_fs_info *fs_info)
+int btrfs_run_dev_stats(struct btrfs_trans_handle *trans)
 {
+       struct btrfs_fs_info *fs_info = trans->fs_info;
        struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
        struct btrfs_device *device;
        int stats_cnt;
@@ -7674,51 +7547,34 @@ void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_pat
 }
 
 /*
- * Update the size of all devices, which is used for writing out the
- * super blocks.
+ * Update the size and bytes used for each device where it changed.  This is
+ * delayed since we would otherwise get errors while writing out the
+ * superblocks.
+ *
+ * Must be invoked during transaction commit.
  */
-void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info)
+void btrfs_commit_device_sizes(struct btrfs_transaction *trans)
 {
-       struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
        struct btrfs_device *curr, *next;
 
-       if (list_empty(&fs_devices->resized_devices))
-               return;
-
-       mutex_lock(&fs_devices->device_list_mutex);
-       mutex_lock(&fs_info->chunk_mutex);
-       list_for_each_entry_safe(curr, next, &fs_devices->resized_devices,
-                                resized_list) {
-               list_del_init(&curr->resized_list);
-               curr->commit_total_bytes = curr->disk_total_bytes;
-       }
-       mutex_unlock(&fs_info->chunk_mutex);
-       mutex_unlock(&fs_devices->device_list_mutex);
-}
+       ASSERT(trans->state == TRANS_STATE_COMMIT_DOING);
 
-/* Must be invoked during the transaction commit */
-void btrfs_update_commit_device_bytes_used(struct btrfs_transaction *trans)
-{
-       struct btrfs_fs_info *fs_info = trans->fs_info;
-       struct extent_map *em;
-       struct map_lookup *map;
-       struct btrfs_device *dev;
-       int i;
-
-       if (list_empty(&trans->pending_chunks))
+       if (list_empty(&trans->dev_update_list))
                return;
 
-       /* In order to kick the device replace finish process */
-       mutex_lock(&fs_info->chunk_mutex);
-       list_for_each_entry(em, &trans->pending_chunks, list) {
-               map = em->map_lookup;
-
-               for (i = 0; i < map->num_stripes; i++) {
-                       dev = map->stripes[i].dev;
-                       dev->commit_bytes_used = dev->bytes_used;
-               }
+       /*
+        * We don't need the device_list_mutex here.  This list is owned by the
+        * transaction and the transaction must complete before the device is
+        * released.
+        */
+       mutex_lock(&trans->fs_info->chunk_mutex);
+       list_for_each_entry_safe(curr, next, &trans->dev_update_list,
+                                post_commit_list) {
+               list_del_init(&curr->post_commit_list);
+               curr->commit_total_bytes = curr->disk_total_bytes;
+               curr->commit_bytes_used = curr->bytes_used;
        }
-       mutex_unlock(&fs_info->chunk_mutex);
+       mutex_unlock(&trans->fs_info->chunk_mutex);
 }
 
 void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info)
@@ -7751,25 +7607,6 @@ int btrfs_bg_type_to_factor(u64 flags)
 }
 
 
-static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes)
-{
-       int index = btrfs_bg_flags_to_raid_index(type);
-       int ncopies = btrfs_raid_array[index].ncopies;
-       int data_stripes;
-
-       switch (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
-       case BTRFS_BLOCK_GROUP_RAID5:
-               data_stripes = num_stripes - 1;
-               break;
-       case BTRFS_BLOCK_GROUP_RAID6:
-               data_stripes = num_stripes - 2;
-               break;
-       default:
-               data_stripes = num_stripes / ncopies;
-               break;
-       }
-       return div_u64(chunk_len, data_stripes);
-}
 
 static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
                                 u64 chunk_offset, u64 devid,
index 3ad9d58..b8a0e8d 100644 (file)
@@ -45,6 +45,7 @@ struct btrfs_pending_bios {
 struct btrfs_device {
        struct list_head dev_list;
        struct list_head dev_alloc_list;
+       struct list_head post_commit_list; /* chunk mutex */
        struct btrfs_fs_devices *fs_devices;
        struct btrfs_fs_info *fs_info;
 
@@ -102,18 +103,12 @@ struct btrfs_device {
         * size of the device on the current transaction
         *
         * This variant is update when committing the transaction,
-        * and protected by device_list_mutex
+        * and protected by chunk mutex
         */
        u64 commit_total_bytes;
 
        /* bytes used on the current transaction */
        u64 commit_bytes_used;
-       /*
-        * used to manage the device which is resized
-        *
-        * It is protected by chunk_lock.
-        */
-       struct list_head resized_list;
 
        /* for sending down flush barriers */
        struct bio *flush_bio;
@@ -123,7 +118,6 @@ struct btrfs_device {
        struct scrub_ctx *scrub_ctx;
 
        struct btrfs_work work;
-       struct rcu_head rcu;
 
        /* readahead state */
        atomic_t reada_in_flight;
@@ -139,6 +133,8 @@ struct btrfs_device {
        /* Counter to record the change of device stats */
        atomic_t dev_stats_ccnt;
        atomic_t dev_stat_values[BTRFS_DEV_STAT_VALUES_MAX];
+
+       struct extent_io_tree alloc_state;
 };
 
 /*
@@ -235,7 +231,6 @@ struct btrfs_fs_devices {
        struct mutex device_list_mutex;
        struct list_head devices;
 
-       struct list_head resized_devices;
        /* devices not currently being allocated */
        struct list_head alloc_list;
 
@@ -258,6 +253,15 @@ struct btrfs_fs_devices {
 
 #define BTRFS_BIO_INLINE_CSUM_SIZE     64
 
+#define BTRFS_MAX_DEVS(info) ((BTRFS_MAX_ITEM_SIZE(info)       \
+                       - sizeof(struct btrfs_chunk))           \
+                       / sizeof(struct btrfs_stripe) + 1)
+
+#define BTRFS_MAX_DEVS_SYS_CHUNK ((BTRFS_SYSTEM_CHUNK_ARRAY_SIZE       \
+                               - 2 * sizeof(struct btrfs_disk_key)     \
+                               - 2 * sizeof(struct btrfs_chunk))       \
+                               / sizeof(struct btrfs_stripe) + 1)
+
 /*
  * we need the mirror number and stripe index to be passed around
  * the call chain while we are processing end_io (especially errors).
@@ -449,22 +453,18 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
 int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info);
 int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info);
 int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset);
-int find_free_dev_extent_start(struct btrfs_transaction *transaction,
-                        struct btrfs_device *device, u64 num_bytes,
-                        u64 search_start, u64 *start, u64 *max_avail);
-int find_free_dev_extent(struct btrfs_trans_handle *trans,
-                        struct btrfs_device *device, u64 num_bytes,
+int find_free_dev_extent_start(struct btrfs_device *device, u64 num_bytes,
+                              u64 search_start, u64 *start, u64 *max_avail);
+int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
                         u64 *start, u64 *max_avail);
 void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
 int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
                        struct btrfs_ioctl_get_dev_stats *stats);
 void btrfs_init_devices_late(struct btrfs_fs_info *fs_info);
 int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info);
-int btrfs_run_dev_stats(struct btrfs_trans_handle *trans,
-                       struct btrfs_fs_info *fs_info);
+int btrfs_run_dev_stats(struct btrfs_trans_handle *trans);
 void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev);
-void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
-                                     struct btrfs_device *srcdev);
+void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev);
 void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev);
 void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path);
 int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
@@ -558,8 +558,7 @@ static inline enum btrfs_raid_types btrfs_bg_flags_to_raid_index(u64 flags)
 
 const char *get_raid_name(enum btrfs_raid_types type);
 
-void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info);
-void btrfs_update_commit_device_bytes_used(struct btrfs_transaction *trans);
+void btrfs_commit_device_sizes(struct btrfs_transaction *trans);
 
 struct list_head *btrfs_get_fs_uuids(void);
 void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info);
index f141b45..78b6ba2 100644 (file)
@@ -76,9 +76,8 @@ out:
        return ret;
 }
 
-static int do_setxattr(struct btrfs_trans_handle *trans,
-                      struct inode *inode, const char *name,
-                      const void *value, size_t size, int flags)
+int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode,
+                  const char *name, const void *value, size_t size, int flags)
 {
        struct btrfs_dir_item *di = NULL;
        struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -87,6 +86,8 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
        size_t name_len = strlen(name);
        int ret = 0;
 
+       ASSERT(trans);
+
        if (name_len + size > BTRFS_MAX_XATTR_SIZE(root->fs_info))
                return -ENOSPC;
 
@@ -174,7 +175,7 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
                char *ptr;
 
                if (size > old_data_len) {
-                       if (btrfs_leaf_free_space(fs_info, leaf) <
+                       if (btrfs_leaf_free_space(leaf) <
                            (size - old_data_len)) {
                                ret = -ENOSPC;
                                goto out;
@@ -184,17 +185,15 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
                if (old_data_len + name_len + sizeof(*di) == item_size) {
                        /* No other xattrs packed in the same leaf item. */
                        if (size > old_data_len)
-                               btrfs_extend_item(fs_info, path,
-                                                 size - old_data_len);
+                               btrfs_extend_item(path, size - old_data_len);
                        else if (size < old_data_len)
-                               btrfs_truncate_item(fs_info, path,
-                                                   data_size, 1);
+                               btrfs_truncate_item(path, data_size, 1);
                } else {
                        /* There are other xattrs packed in the same item. */
                        ret = btrfs_delete_one_dir_name(trans, root, path, di);
                        if (ret)
                                goto out;
-                       btrfs_extend_item(fs_info, path, data_size);
+                       btrfs_extend_item(path, data_size);
                }
 
                item = btrfs_item_nr(slot);
@@ -220,24 +219,18 @@ out:
 /*
  * @value: "" makes the attribute to empty, NULL removes it
  */
-int btrfs_setxattr(struct btrfs_trans_handle *trans,
-                    struct inode *inode, const char *name,
-                    const void *value, size_t size, int flags)
+int btrfs_setxattr_trans(struct inode *inode, const char *name,
+                        const void *value, size_t size, int flags)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct btrfs_trans_handle *trans;
        int ret;
 
-       if (btrfs_root_readonly(root))
-               return -EROFS;
-
-       if (trans)
-               return do_setxattr(trans, inode, name, value, size, flags);
-
        trans = btrfs_start_transaction(root, 2);
        if (IS_ERR(trans))
                return PTR_ERR(trans);
 
-       ret = do_setxattr(trans, inode, name, value, size, flags);
+       ret = btrfs_setxattr(trans, inode, name, value, size, flags);
        if (ret)
                goto out;
 
@@ -370,7 +363,7 @@ static int btrfs_xattr_handler_set(const struct xattr_handler *handler,
                                   size_t size, int flags)
 {
        name = xattr_full_name(handler, name);
-       return btrfs_setxattr(NULL, inode, name, buffer, size, flags);
+       return btrfs_setxattr_trans(inode, name, buffer, size, flags);
 }
 
 static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler,
@@ -378,8 +371,32 @@ static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler,
                                        const char *name, const void *value,
                                        size_t size, int flags)
 {
+       int ret;
+       struct btrfs_trans_handle *trans;
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+
        name = xattr_full_name(handler, name);
-       return btrfs_set_prop(inode, name, value, size, flags);
+       ret = btrfs_validate_prop(name, value, size);
+       if (ret)
+               return ret;
+
+       trans = btrfs_start_transaction(root, 2);
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
+
+       ret = btrfs_set_prop(trans, inode, name, value, size, flags);
+       if (!ret) {
+               inode_inc_iversion(inode);
+               inode->i_ctime = current_time(inode);
+               set_bit(BTRFS_INODE_COPY_EVERYTHING,
+                       &BTRFS_I(inode)->runtime_flags);
+               ret = btrfs_update_inode(trans, root, inode);
+               BUG_ON(ret);
+       }
+
+       btrfs_end_transaction(trans);
+
+       return ret;
 }
 
 static const struct xattr_handler btrfs_security_xattr_handler = {
@@ -419,10 +436,10 @@ const struct xattr_handler *btrfs_xattr_handlers[] = {
 };
 
 static int btrfs_initxattrs(struct inode *inode,
-                           const struct xattr *xattr_array, void *fs_info)
+                           const struct xattr *xattr_array, void *fs_private)
 {
+       struct btrfs_trans_handle *trans = fs_private;
        const struct xattr *xattr;
-       struct btrfs_trans_handle *trans = fs_info;
        unsigned int nofs_flag;
        char *name;
        int err = 0;
@@ -442,7 +459,7 @@ static int btrfs_initxattrs(struct inode *inode,
                strcpy(name, XATTR_SECURITY_PREFIX);
                strcpy(name + XATTR_SECURITY_PREFIX_LEN, xattr->name);
                err = btrfs_setxattr(trans, inode, name, xattr->value,
-                               xattr->value_len, 0);
+                                    xattr->value_len, 0);
                kfree(name);
                if (err < 0)
                        break;
index 471fcac..1cd3fc0 100644 (file)
@@ -12,9 +12,10 @@ extern const struct xattr_handler *btrfs_xattr_handlers[];
 
 int btrfs_getxattr(struct inode *inode, const char *name,
                void *buffer, size_t size);
-int btrfs_setxattr(struct btrfs_trans_handle *trans,
-                           struct inode *inode, const char *name,
-                           const void *value, size_t size, int flags);
+int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode,
+                  const char *name, const void *value, size_t size, int flags);
+int btrfs_setxattr_trans(struct inode *inode, const char *name,
+                        const void *value, size_t size, int flags);
 ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
 
 int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
index 6b9e29d..a6ff07c 100644 (file)
@@ -90,6 +90,9 @@ static inline struct workspace *list_to_workspace(struct list_head *list)
        return container_of(list, struct workspace, list);
 }
 
+static void zstd_free_workspace(struct list_head *ws);
+static struct list_head *zstd_alloc_workspace(unsigned int level);
+
 /*
  * zstd_reclaim_timer_fn - reclaim timer
  * @t: timer
@@ -124,7 +127,7 @@ static void zstd_reclaim_timer_fn(struct timer_list *timer)
                level = victim->level;
                list_del(&victim->lru_list);
                list_del(&victim->list);
-               wsm.ops->free_workspace(&victim->list);
+               zstd_free_workspace(&victim->list);
 
                if (list_empty(&wsm.idle_ws[level - 1]))
                        clear_bit(level - 1, &wsm.active_map);
@@ -180,7 +183,7 @@ static void zstd_init_workspace_manager(void)
        for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++)
                INIT_LIST_HEAD(&wsm.idle_ws[i]);
 
-       ws = wsm.ops->alloc_workspace(ZSTD_BTRFS_MAX_LEVEL);
+       ws = zstd_alloc_workspace(ZSTD_BTRFS_MAX_LEVEL);
        if (IS_ERR(ws)) {
                pr_warn(
                "BTRFS: cannot preallocate zstd compression workspace\n");
@@ -202,7 +205,7 @@ static void zstd_cleanup_workspace_manager(void)
                                                 struct workspace, list);
                        list_del(&workspace->list);
                        list_del(&workspace->lru_list);
-                       wsm.ops->free_workspace(&workspace->list);
+                       zstd_free_workspace(&workspace->list);
                }
        }
        spin_unlock(&wsm.lock);
@@ -272,7 +275,7 @@ again:
                return ws;
 
        nofs_flag = memalloc_nofs_save();
-       ws = wsm.ops->alloc_workspace(level);
+       ws = zstd_alloc_workspace(level);
        memalloc_nofs_restore(nofs_flag);
 
        if (IS_ERR(ws)) {
index b9b7465..f9eff01 100644 (file)
@@ -27,6 +27,7 @@ struct btrfs_work;
 struct __btrfs_workqueue;
 struct btrfs_qgroup_extent_record;
 struct btrfs_qgroup;
+struct extent_io_tree;
 struct prelim_ref;
 
 TRACE_DEFINE_ENUM(FLUSH_DELAYED_ITEMS_NR);
@@ -77,6 +78,17 @@ TRACE_DEFINE_ENUM(COMMIT_TRANS);
                { BTRFS_QGROUP_RSV_META_PERTRANS, "META_PERTRANS" },    \
                { BTRFS_QGROUP_RSV_META_PREALLOC, "META_PREALLOC" })
 
+#define show_extent_io_tree_owner(owner)                                      \
+       __print_symbolic(owner,                                                \
+               { IO_TREE_FS_INFO_FREED_EXTENTS0, "FREED_EXTENTS0" },          \
+               { IO_TREE_FS_INFO_FREED_EXTENTS1, "FREED_EXTENTS1" },          \
+               { IO_TREE_INODE_IO,               "INODE_IO" },                \
+               { IO_TREE_INODE_IO_FAILURE,       "INODE_IO_FAILURE" },        \
+               { IO_TREE_RELOC_BLOCKS,           "RELOC_BLOCKS" },            \
+               { IO_TREE_TRANS_DIRTY_PAGES,      "TRANS_DIRTY_PAGES" },       \
+               { IO_TREE_ROOT_DIRTY_LOG_PAGES,   "ROOT_DIRTY_LOG_PAGES" },    \
+               { IO_TREE_SELFTEST,               "SELFTEST" })
+
 #define BTRFS_GROUP_FLAGS      \
        { BTRFS_BLOCK_GROUP_DATA,       "DATA"},        \
        { BTRFS_BLOCK_GROUP_SYSTEM,     "SYSTEM"},      \
@@ -88,11 +100,34 @@ TRACE_DEFINE_ENUM(COMMIT_TRANS);
        { BTRFS_BLOCK_GROUP_RAID5,      "RAID5"},       \
        { BTRFS_BLOCK_GROUP_RAID6,      "RAID6"}
 
+#define EXTENT_FLAGS                                           \
+       { EXTENT_DIRTY,                 "DIRTY"},               \
+       { EXTENT_UPTODATE,              "UPTODATE"},            \
+       { EXTENT_LOCKED,                "LOCKED"},              \
+       { EXTENT_NEW,                   "NEW"},                 \
+       { EXTENT_DELALLOC,              "DELALLOC"},            \
+       { EXTENT_DEFRAG,                "DEFRAG"},              \
+       { EXTENT_BOUNDARY,              "BOUNDARY"},            \
+       { EXTENT_NODATASUM,             "NODATASUM"},           \
+       { EXTENT_CLEAR_META_RESV,       "CLEAR_META_RESV"},     \
+       { EXTENT_NEED_WAIT,             "NEED_WAIT"},           \
+       { EXTENT_DAMAGED,               "DAMAGED"},             \
+       { EXTENT_NORESERVE,             "NORESERVE"},           \
+       { EXTENT_QGROUP_RESERVED,       "QGROUP_RESERVED"},     \
+       { EXTENT_CLEAR_DATA_RESV,       "CLEAR_DATA_RESV"},     \
+       { EXTENT_DELALLOC_NEW,          "DELALLOC_NEW"}
+
 #define BTRFS_FSID_SIZE 16
 #define TP_STRUCT__entry_fsid __array(u8, fsid, BTRFS_FSID_SIZE)
 
 #define TP_fast_assign_fsid(fs_info)                                   \
-       memcpy(__entry->fsid, fs_info->fs_devices->fsid, BTRFS_FSID_SIZE)
+({                                                                     \
+       if (fs_info)                                                    \
+               memcpy(__entry->fsid, fs_info->fs_devices->fsid,        \
+                      BTRFS_FSID_SIZE);                                \
+       else                                                            \
+               memset(__entry->fsid, 0, BTRFS_FSID_SIZE);              \
+})
 
 #define TP_STRUCT__entry_btrfs(args...)                                        \
        TP_STRUCT__entry(                                               \
@@ -1850,6 +1885,212 @@ DEFINE_EVENT(btrfs__block_group, btrfs_skip_unused_block_group,
        TP_ARGS(bg_cache)
 );
 
+TRACE_EVENT(btrfs_set_extent_bit,
+       TP_PROTO(const struct extent_io_tree *tree,
+                u64 start, u64 len, unsigned set_bits),
+
+       TP_ARGS(tree, start, len, set_bits),
+
+       TP_STRUCT__entry_btrfs(
+               __field(        unsigned,       owner   )
+               __field(        u64,            ino     )
+               __field(        u64,            rootid  )
+               __field(        u64,            start   )
+               __field(        u64,            len     )
+               __field(        unsigned,       set_bits)
+       ),
+
+       TP_fast_assign_btrfs(tree->fs_info,
+               __entry->owner = tree->owner;
+               if (tree->private_data) {
+                       struct inode *inode = tree->private_data;
+
+                       __entry->ino    = btrfs_ino(BTRFS_I(inode));
+                       __entry->rootid =
+                               BTRFS_I(inode)->root->root_key.objectid;
+               } else {
+                       __entry->ino    = 0;
+                       __entry->rootid = 0;
+               }
+               __entry->start          = start;
+               __entry->len            = len;
+               __entry->set_bits       = set_bits;
+       ),
+
+       TP_printk_btrfs(
+               "io_tree=%s ino=%llu root=%llu start=%llu len=%llu set_bits=%s",
+               show_extent_io_tree_owner(__entry->owner), __entry->ino,
+               __entry->rootid, __entry->start, __entry->len,
+               __print_flags(__entry->set_bits, "|", EXTENT_FLAGS))
+);
+
+TRACE_EVENT(btrfs_clear_extent_bit,
+       TP_PROTO(const struct extent_io_tree *tree,
+                u64 start, u64 len, unsigned clear_bits),
+
+       TP_ARGS(tree, start, len, clear_bits),
+
+       TP_STRUCT__entry_btrfs(
+               __field(        unsigned,       owner   )
+               __field(        u64,            ino     )
+               __field(        u64,            rootid  )
+               __field(        u64,            start   )
+               __field(        u64,            len     )
+               __field(        unsigned,       clear_bits)
+       ),
+
+       TP_fast_assign_btrfs(tree->fs_info,
+               __entry->owner = tree->owner;
+               if (tree->private_data) {
+                       struct inode *inode = tree->private_data;
+
+                       __entry->ino    = btrfs_ino(BTRFS_I(inode));
+                       __entry->rootid =
+                               BTRFS_I(inode)->root->root_key.objectid;
+               } else {
+                       __entry->ino    = 0;
+                       __entry->rootid = 0;
+               }
+               __entry->start          = start;
+               __entry->len            = len;
+               __entry->clear_bits     = clear_bits;
+       ),
+
+       TP_printk_btrfs(
+               "io_tree=%s ino=%llu root=%llu start=%llu len=%llu clear_bits=%s",
+               show_extent_io_tree_owner(__entry->owner), __entry->ino,
+               __entry->rootid, __entry->start, __entry->len,
+               __print_flags(__entry->clear_bits, "|", EXTENT_FLAGS))
+);
+
+TRACE_EVENT(btrfs_convert_extent_bit,
+       TP_PROTO(const struct extent_io_tree *tree,
+                u64 start, u64 len, unsigned set_bits, unsigned clear_bits),
+
+       TP_ARGS(tree, start, len, set_bits, clear_bits),
+
+       TP_STRUCT__entry_btrfs(
+               __field(        unsigned,       owner   )
+               __field(        u64,            ino     )
+               __field(        u64,            rootid  )
+               __field(        u64,            start   )
+               __field(        u64,            len     )
+               __field(        unsigned,       set_bits)
+               __field(        unsigned,       clear_bits)
+       ),
+
+       TP_fast_assign_btrfs(tree->fs_info,
+               __entry->owner = tree->owner;
+               if (tree->private_data) {
+                       struct inode *inode = tree->private_data;
+
+                       __entry->ino    = btrfs_ino(BTRFS_I(inode));
+                       __entry->rootid =
+                               BTRFS_I(inode)->root->root_key.objectid;
+               } else {
+                       __entry->ino    = 0;
+                       __entry->rootid = 0;
+               }
+               __entry->start          = start;
+               __entry->len            = len;
+               __entry->set_bits       = set_bits;
+               __entry->clear_bits     = clear_bits;
+       ),
+
+       TP_printk_btrfs(
+"io_tree=%s ino=%llu root=%llu start=%llu len=%llu set_bits=%s clear_bits=%s",
+                 show_extent_io_tree_owner(__entry->owner), __entry->ino,
+                 __entry->rootid, __entry->start, __entry->len,
+                 __print_flags(__entry->set_bits , "|", EXTENT_FLAGS),
+                 __print_flags(__entry->clear_bits, "|", EXTENT_FLAGS))
+);
+
+DECLARE_EVENT_CLASS(btrfs_sleep_tree_lock,
+       TP_PROTO(const struct extent_buffer *eb, u64 start_ns),
+
+       TP_ARGS(eb, start_ns),
+
+       TP_STRUCT__entry_btrfs(
+               __field(        u64,    block           )
+               __field(        u64,    generation      )
+               __field(        u64,    start_ns        )
+               __field(        u64,    end_ns          )
+               __field(        u64,    diff_ns         )
+               __field(        u64,    owner           )
+               __field(        int,    is_log_tree     )
+       ),
+
+       TP_fast_assign_btrfs(eb->fs_info,
+               __entry->block          = eb->start;
+               __entry->generation     = btrfs_header_generation(eb);
+               __entry->start_ns       = start_ns;
+               __entry->end_ns         = ktime_get_ns();
+               __entry->diff_ns        = __entry->end_ns - start_ns;
+               __entry->owner          = btrfs_header_owner(eb);
+               __entry->is_log_tree    = (eb->log_index >= 0);
+       ),
+
+       TP_printk_btrfs(
+"block=%llu generation=%llu start_ns=%llu end_ns=%llu diff_ns=%llu owner=%llu is_log_tree=%d",
+               __entry->block, __entry->generation,
+               __entry->start_ns, __entry->end_ns, __entry->diff_ns,
+               __entry->owner, __entry->is_log_tree)
+);
+
+DEFINE_EVENT(btrfs_sleep_tree_lock, btrfs_tree_read_lock,
+       TP_PROTO(const struct extent_buffer *eb, u64 start_ns),
+
+       TP_ARGS(eb, start_ns)
+);
+
+DEFINE_EVENT(btrfs_sleep_tree_lock, btrfs_tree_lock,
+       TP_PROTO(const struct extent_buffer *eb, u64 start_ns),
+
+       TP_ARGS(eb, start_ns)
+);
+
+DECLARE_EVENT_CLASS(btrfs_locking_events,
+       TP_PROTO(const struct extent_buffer *eb),
+
+       TP_ARGS(eb),
+
+       TP_STRUCT__entry_btrfs(
+               __field(        u64,    block           )
+               __field(        u64,    generation      )
+               __field(        u64,    owner           )
+               __field(        int,    is_log_tree     )
+       ),
+
+       TP_fast_assign_btrfs(eb->fs_info,
+               __entry->block          = eb->start;
+               __entry->generation     = btrfs_header_generation(eb);
+               __entry->owner          = btrfs_header_owner(eb);
+               __entry->is_log_tree    = (eb->log_index >= 0);
+       ),
+
+       TP_printk_btrfs("block=%llu generation=%llu owner=%llu is_log_tree=%d",
+               __entry->block, __entry->generation,
+               __entry->owner, __entry->is_log_tree)
+);
+
+#define DEFINE_BTRFS_LOCK_EVENT(name)                          \
+DEFINE_EVENT(btrfs_locking_events, name,                       \
+               TP_PROTO(const struct extent_buffer *eb),       \
+                                                               \
+               TP_ARGS(eb)                                     \
+)
+
+DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_unlock);
+DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_read_unlock);
+DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_read_unlock_blocking);
+DEFINE_BTRFS_LOCK_EVENT(btrfs_set_lock_blocking_read);
+DEFINE_BTRFS_LOCK_EVENT(btrfs_set_lock_blocking_write);
+DEFINE_BTRFS_LOCK_EVENT(btrfs_clear_lock_blocking_read);
+DEFINE_BTRFS_LOCK_EVENT(btrfs_clear_lock_blocking_write);
+DEFINE_BTRFS_LOCK_EVENT(btrfs_try_tree_read_lock);
+DEFINE_BTRFS_LOCK_EVENT(btrfs_try_tree_write_lock);
+DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_read_lock_atomic);
+
 #endif /* _TRACE_BTRFS_H */
 
 /* This part must be outside protection */
index e974f4b..421239b 100644 (file)
  *
  * Used by:
  * struct btrfs_dir_item.type
+ *
+ * Values 0..7 must match common file type values in fs_types.h.
  */
 #define BTRFS_FT_UNKNOWN       0
 #define BTRFS_FT_REG_FILE      1