Merge tag 'for-5.2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 7 May 2019 18:34:19 +0000 (11:34 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 7 May 2019 18:34:19 +0000 (11:34 -0700)
Pull btrfs updates from David Sterba:
 "This time the majority of changes are cleanups, though there's still a
  number of changes of user interest.

  User visible changes:

   - better read time and write checks to catch errors early and before
     writing data to disk (to catch potential memory corruption on data
     that get checksummed)

   - qgroups + metadata relocation: last speed up patch int the series
     to address the slowness, there should be no overhead comparing
     balance with and without qgroups

   - FIEMAP ioctl does not start a transaction unnecessarily, this can
     result in a speed up and less blocking due to IO

   - LOGICAL_INO (v1, v2) does not start transaction unnecessarily, this
     can speed up the mentioned ioctl and scrub as well

   - fsync on files with many (but not too many) hardlinks is faster,
     finer decision if the links should be fsynced individually or
     completely

   - send tries harder to find ranges to clone

   - trim/discard will skip unallocated chunks that haven't been touched
     since the last mount

  Fixes:

   - send flushes delayed allocation before start, otherwise it could
     miss some changes in case of a very recent rw->ro switch of a
     subvolume

   - fix fallocate with qgroups that could lead to space accounting
     underflow, reported as a warning

   - trim/discard ioctl honours the requested range

   - starting send and dedupe on a subvolume at the same time will let
     only one of them succeed, this is to prevent changes that send
     could miss due to dedupe; both operations are restartable

  Core changes:

   - more tree-checker validations, errors reported by fuzzing tools:
      - device item
      - inode item
      - block group profiles

   - tracepoints for extent buffer locking

   - async cow preallocates memory to avoid errors happening too deep in
     the call chain

   - metadata reservations for delalloc reworked to better adapt in
     many-writers/low-space scenarios

   - improved space flushing logic for intense DIO vs buffered workloads

   - lots of cleanups
      - removed unused struct members
      - redundant argument removal
      - properties and xattrs
      - extent buffer locking
      - selftests
      - use common file type conversions
      - many-argument functions reduction"

* tag 'for-5.2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (227 commits)
  btrfs: Use kvmalloc for allocating compressed path context
  btrfs: Factor out common extent locking code in submit_compressed_extents
  btrfs: Set io_tree only once in submit_compressed_extents
  btrfs: Replace clear_extent_bit with unlock_extent
  btrfs: Make compress_file_range take only struct async_chunk
  btrfs: Remove fs_info from struct async_chunk
  btrfs: Rename async_cow to async_chunk
  btrfs: Preallocate chunks in cow_file_range_async
  btrfs: reserve delalloc metadata differently
  btrfs: track DIO bytes in flight
  btrfs: merge calls of btrfs_setxattr and btrfs_setxattr_trans in btrfs_set_prop
  btrfs: delete unused function btrfs_set_prop_trans
  btrfs: start transaction in xattr_handler_set_prop
  btrfs: drop local copy of inode i_mode
  btrfs: drop old_fsflags in btrfs_ioctl_setflags
  btrfs: modify local copy of btrfs_inode flags
  btrfs: drop useless inode i_flags copy and restore
  btrfs: start transaction in btrfs_ioctl_setflags()
  btrfs: export btrfs_set_prop
  btrfs: refactor btrfs_set_props to validate externally
  ...

1  2 
fs/btrfs/ctree.h
fs/btrfs/inode.c
fs/btrfs/ref-verify.c
fs/btrfs/super.c
fs/btrfs/tests/free-space-tree-tests.c
include/trace/events/btrfs.h

diff --combined fs/btrfs/ctree.h
@@@ -41,6 -41,7 +41,7 @@@ extern struct kmem_cache *btrfs_bit_rad
  extern struct kmem_cache *btrfs_path_cachep;
  extern struct kmem_cache *btrfs_free_space_cachep;
  struct btrfs_ordered_sum;
+ struct btrfs_ref;
  
  #define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
  
@@@ -1015,6 -1016,7 +1016,7 @@@ struct btrfs_fs_info 
        /* used to keep from writing metadata until there is a nice batch */
        struct percpu_counter dirty_metadata_bytes;
        struct percpu_counter delalloc_bytes;
+       struct percpu_counter dio_bytes;
        s32 dirty_metadata_batch;
        s32 delalloc_batch;
  
  
        /* holds configuration and tracking. Protected by qgroup_lock */
        struct rb_root qgroup_tree;
-       struct rb_root qgroup_op_tree;
        spinlock_t qgroup_lock;
-       spinlock_t qgroup_op_lock;
-       atomic_t qgroup_op_seq;
  
        /*
         * used to avoid frequently calling ulist_alloc()/ulist_free()
        struct mutex unused_bg_unpin_mutex;
        struct mutex delete_unused_bgs_mutex;
  
-       /*
-        * Chunks that can't be freed yet (under a trim/discard operation)
-        * and will be latter freed. Protected by fs_info->chunk_mutex.
-        */
-       struct list_head pinned_chunks;
        /* Cached block sizes */
        u32 nodesize;
        u32 sectorsize;
@@@ -1348,6 -1341,12 +1341,12 @@@ struct btrfs_root 
         * manipulation with the read-only status via SUBVOL_SETFLAGS
         */
        int send_in_progress;
+       /*
+        * Number of currently running deduplication operations that have a
+        * destination inode belonging to this root. Protected by the lock
+        * root_item_lock.
+        */
+       int dedupe_in_progress;
        struct btrfs_subvolume_writers *subv_writers;
        atomic_t will_be_snapshotted;
        atomic_t snapshot_force_cow;
@@@ -1540,6 -1539,21 +1539,21 @@@ do 
  
  #define BTRFS_INODE_ROOT_ITEM_INIT    (1 << 31)
  
+ #define BTRFS_INODE_FLAG_MASK                                         \
+       (BTRFS_INODE_NODATASUM |                                        \
+        BTRFS_INODE_NODATACOW |                                        \
+        BTRFS_INODE_READONLY |                                         \
+        BTRFS_INODE_NOCOMPRESS |                                       \
+        BTRFS_INODE_PREALLOC |                                         \
+        BTRFS_INODE_SYNC |                                             \
+        BTRFS_INODE_IMMUTABLE |                                        \
+        BTRFS_INODE_APPEND |                                           \
+        BTRFS_INODE_NODUMP |                                           \
+        BTRFS_INODE_NOATIME |                                          \
+        BTRFS_INODE_DIRSYNC |                                          \
+        BTRFS_INODE_COMPRESS |                                         \
+        BTRFS_INODE_ROOT_ITEM_INIT)
  struct btrfs_map_token {
        const struct extent_buffer *eb;
        char *kaddr;
@@@ -2163,18 -2177,16 +2177,16 @@@ static inline int btrfs_header_flag(con
        return (btrfs_header_flags(eb) & flag) == flag;
  }
  
- static inline int btrfs_set_header_flag(struct extent_buffer *eb, u64 flag)
+ static inline void btrfs_set_header_flag(struct extent_buffer *eb, u64 flag)
  {
        u64 flags = btrfs_header_flags(eb);
        btrfs_set_header_flags(eb, flags | flag);
-       return (flags & flag) == flag;
  }
  
- static inline int btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag)
+ static inline void btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag)
  {
        u64 flags = btrfs_header_flags(eb);
        btrfs_set_header_flags(eb, flags & ~flag);
-       return (flags & flag) == flag;
  }
  
  static inline int btrfs_header_backref_rev(const struct extent_buffer *eb)
@@@ -2445,13 -2457,12 +2457,12 @@@ static inline int btrfs_super_csum_size
   * this returns the address of the start of the last item,
   * which is the stop of the leaf data stack
   */
- static inline unsigned int leaf_data_end(const struct btrfs_fs_info *fs_info,
-                                        const struct extent_buffer *leaf)
+ static inline unsigned int leaf_data_end(const struct extent_buffer *leaf)
  {
        u32 nr = btrfs_header_nritems(leaf);
  
        if (nr == 0)
-               return BTRFS_LEAF_DATA_SIZE(fs_info);
+               return BTRFS_LEAF_DATA_SIZE(leaf->fs_info);
        return btrfs_item_offset_nr(leaf, nr - 1);
  }
  
@@@ -2698,8 -2709,6 +2709,6 @@@ void btrfs_wait_nocow_writers(struct bt
  void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
  int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
                           unsigned long count);
- int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info,
-                                unsigned long count, u64 transid, int wait);
  void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
                                  struct btrfs_delayed_ref_root *delayed_refs,
                                  struct btrfs_delayed_ref_head *head);
@@@ -2711,8 -2720,7 +2720,7 @@@ int btrfs_pin_extent(struct btrfs_fs_in
                     u64 bytenr, u64 num, int reserved);
  int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info,
                                    u64 bytenr, u64 num_bytes);
- int btrfs_exclude_logged_extents(struct btrfs_fs_info *fs_info,
-                                struct extent_buffer *eb);
+ int btrfs_exclude_logged_extents(struct extent_buffer *eb);
  int btrfs_cross_ref_exist(struct btrfs_root *root,
                          u64 objectid, u64 offset, u64 bytenr);
  struct btrfs_block_group_cache *btrfs_lookup_block_group(
@@@ -2745,13 -2753,9 +2753,9 @@@ int btrfs_inc_ref(struct btrfs_trans_ha
  int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                  struct extent_buffer *buf, int full_backref);
  int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
-                               struct btrfs_fs_info *fs_info,
                                u64 bytenr, u64 num_bytes, u64 flags,
                                int level, int is_data);
- int btrfs_free_extent(struct btrfs_trans_handle *trans,
-                     struct btrfs_root *root,
-                     u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
-                     u64 owner, u64 offset);
+ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref);
  
  int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
                               u64 start, u64 len, int delalloc);
@@@ -2760,15 -2764,11 +2764,11 @@@ int btrfs_free_and_pin_reserved_extent(
  void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info);
  int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans);
  int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
-                        struct btrfs_root *root,
-                        u64 bytenr, u64 num_bytes, u64 parent,
-                        u64 root_objectid, u64 owner, u64 offset);
+                        struct btrfs_ref *generic_ref);
  
  int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans);
- int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
-                                  struct btrfs_fs_info *fs_info);
- int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
-                           struct btrfs_fs_info *fs_info);
+ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans);
+ int btrfs_setup_space_cache(struct btrfs_trans_handle *trans);
  int btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr);
  int btrfs_free_block_groups(struct btrfs_fs_info *info);
  int btrfs_read_block_groups(struct btrfs_fs_info *info);
@@@ -2936,10 -2936,8 +2936,8 @@@ int btrfs_copy_root(struct btrfs_trans_
                      struct extent_buffer **cow_ret, u64 new_root_objectid);
  int btrfs_block_can_be_shared(struct btrfs_root *root,
                              struct extent_buffer *buf);
- void btrfs_extend_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
-                      u32 data_size);
- void btrfs_truncate_item(struct btrfs_fs_info *fs_info,
-                        struct btrfs_path *path, u32 new_size, int from_end);
+ void btrfs_extend_item(struct btrfs_path *path, u32 data_size);
+ void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end);
  int btrfs_split_item(struct btrfs_trans_handle *trans,
                     struct btrfs_root *root,
                     struct btrfs_path *path,
@@@ -3015,8 -3013,7 +3013,7 @@@ static inline int btrfs_next_item(struc
  {
        return btrfs_next_old_item(root, p, 0);
  }
- int btrfs_leaf_free_space(struct btrfs_fs_info *fs_info,
-                         struct extent_buffer *leaf);
+ int btrfs_leaf_free_space(struct extent_buffer *leaf);
  int __must_check btrfs_drop_snapshot(struct btrfs_root *root,
                                     struct btrfs_block_rsv *block_rsv,
                                     int update_ref, int for_reloc);
@@@ -3267,7 -3264,6 +3264,7 @@@ void btrfs_evict_inode(struct inode *in
  int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
  struct inode *btrfs_alloc_inode(struct super_block *sb);
  void btrfs_destroy_inode(struct inode *inode);
 +void btrfs_free_inode(struct inode *inode);
  int btrfs_drop_inode(struct inode *inode);
  int __init btrfs_init_cachep(void);
  void __cold btrfs_destroy_cachep(void);
@@@ -3756,8 -3752,7 +3753,7 @@@ int btrfs_scrub_dev(struct btrfs_fs_inf
  void btrfs_scrub_pause(struct btrfs_fs_info *fs_info);
  void btrfs_scrub_continue(struct btrfs_fs_info *fs_info);
  int btrfs_scrub_cancel(struct btrfs_fs_info *info);
- int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info,
-                          struct btrfs_device *dev);
+ int btrfs_scrub_cancel_dev(struct btrfs_device *dev);
  int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
                         struct btrfs_scrub_progress *progress);
  static inline void btrfs_init_full_stripe_locks_tree(
@@@ -3806,6 -3801,8 +3802,8 @@@ static inline int btrfs_defrag_cancelle
        return signal_pending(current);
  }
  
+ #define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len))
  /* Sanity test specific functions */
  #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
  void btrfs_test_inode_set_ops(struct inode *inode);
diff --combined fs/btrfs/inode.c
@@@ -28,6 -28,7 +28,7 @@@
  #include <linux/magic.h>
  #include <linux/iversion.h>
  #include <linux/swap.h>
+ #include <linux/sched/mm.h>
  #include <asm/unaligned.h>
  #include "ctree.h"
  #include "disk-io.h"
@@@ -73,17 -74,6 +74,6 @@@ struct kmem_cache *btrfs_trans_handle_c
  struct kmem_cache *btrfs_path_cachep;
  struct kmem_cache *btrfs_free_space_cachep;
  
- #define S_SHIFT 12
- static const unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
-       [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
-       [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
-       [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
-       [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
-       [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
-       [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
-       [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
- };
  static int btrfs_setsize(struct inode *inode, struct iattr *attr);
  static int btrfs_truncate(struct inode *inode, bool skip_writeback);
  static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
@@@ -366,18 -356,24 +356,24 @@@ struct async_extent 
        struct list_head list;
  };
  
- struct async_cow {
+ struct async_chunk {
        struct inode *inode;
-       struct btrfs_fs_info *fs_info;
        struct page *locked_page;
        u64 start;
        u64 end;
        unsigned int write_flags;
        struct list_head extents;
        struct btrfs_work work;
+       atomic_t *pending;
  };
  
- static noinline int add_async_extent(struct async_cow *cow,
+ struct async_cow {
+       /* Number of chunks in flight; must be first in the structure */
+       atomic_t num_chunks;
+       struct async_chunk chunks[];
+ };
+ static noinline int add_async_extent(struct async_chunk *cow,
                                     u64 start, u64 ram_size,
                                     u64 compressed_size,
                                     struct page **pages,
@@@ -444,14 -440,14 +440,14 @@@ static inline void inode_should_defrag(
   * are written in the same order that the flusher thread sent them
   * down.
   */
- static noinline void compress_file_range(struct inode *inode,
-                                       struct page *locked_page,
-                                       u64 start, u64 end,
-                                       struct async_cow *async_cow,
-                                       int *num_added)
+ static noinline void compress_file_range(struct async_chunk *async_chunk,
+                                        int *num_added)
  {
+       struct inode *inode = async_chunk->inode;
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        u64 blocksize = fs_info->sectorsize;
+       u64 start = async_chunk->start;
+       u64 end = async_chunk->end;
        u64 actual_end;
        int ret = 0;
        struct page **pages = NULL;
@@@ -630,7 -626,7 +626,7 @@@ cont
                         * allocation on disk for these compressed pages, and
                         * will submit them to the elevator.
                         */
-                       add_async_extent(async_cow, start, total_in,
+                       add_async_extent(async_chunk, start, total_in,
                                        total_compressed, pages, nr_pages,
                                        compress_type);
  
@@@ -670,14 -666,14 +666,14 @@@ cleanup_and_bail_uncompressed
         * to our extent and set things up for the async work queue to run
         * cow_file_range to do the normal delalloc dance.
         */
-       if (page_offset(locked_page) >= start &&
-           page_offset(locked_page) <= end)
-               __set_page_dirty_nobuffers(locked_page);
+       if (page_offset(async_chunk->locked_page) >= start &&
+           page_offset(async_chunk->locked_page) <= end)
+               __set_page_dirty_nobuffers(async_chunk->locked_page);
                /* unlocked later on in the async handlers */
  
        if (redirty)
                extent_range_redirty_for_io(inode, start, end);
-       add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0,
+       add_async_extent(async_chunk, start, end - start + 1, 0, NULL, 0,
                         BTRFS_COMPRESS_NONE);
        *num_added += 1;
  
@@@ -713,38 -709,34 +709,34 @@@ static void free_async_extent_pages(str
   * queued.  We walk all the async extents created by compress_file_range
   * and send them down to the disk.
   */
- static noinline void submit_compressed_extents(struct async_cow *async_cow)
+ static noinline void submit_compressed_extents(struct async_chunk *async_chunk)
  {
-       struct inode *inode = async_cow->inode;
+       struct inode *inode = async_chunk->inode;
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        struct async_extent *async_extent;
        u64 alloc_hint = 0;
        struct btrfs_key ins;
        struct extent_map *em;
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct extent_io_tree *io_tree;
+       struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        int ret = 0;
  
  again:
-       while (!list_empty(&async_cow->extents)) {
-               async_extent = list_entry(async_cow->extents.next,
+       while (!list_empty(&async_chunk->extents)) {
+               async_extent = list_entry(async_chunk->extents.next,
                                          struct async_extent, list);
                list_del(&async_extent->list);
  
-               io_tree = &BTRFS_I(inode)->io_tree;
  retry:
+               lock_extent(io_tree, async_extent->start,
+                           async_extent->start + async_extent->ram_size - 1);
                /* did the compression code fall back to uncompressed IO? */
                if (!async_extent->pages) {
                        int page_started = 0;
                        unsigned long nr_written = 0;
  
-                       lock_extent(io_tree, async_extent->start,
-                                        async_extent->start +
-                                        async_extent->ram_size - 1);
                        /* allocate blocks */
-                       ret = cow_file_range(inode, async_cow->locked_page,
+                       ret = cow_file_range(inode, async_chunk->locked_page,
                                             async_extent->start,
                                             async_extent->start +
                                             async_extent->ram_size - 1,
                                                  async_extent->ram_size - 1,
                                                  WB_SYNC_ALL);
                        else if (ret)
-                               unlock_page(async_cow->locked_page);
+                               unlock_page(async_chunk->locked_page);
                        kfree(async_extent);
                        cond_resched();
                        continue;
                }
  
-               lock_extent(io_tree, async_extent->start,
-                           async_extent->start + async_extent->ram_size - 1);
                ret = btrfs_reserve_extent(root, async_extent->ram_size,
                                           async_extent->compressed_size,
                                           async_extent->compressed_size,
                                    ins.objectid,
                                    ins.offset, async_extent->pages,
                                    async_extent->nr_pages,
-                                   async_cow->write_flags)) {
+                                   async_chunk->write_flags)) {
                        struct page *p = async_extent->pages[0];
                        const u64 start = async_extent->start;
                        const u64 end = start + async_extent->ram_size - 1;
@@@ -1132,16 -1121,15 +1121,15 @@@ out_unlock
   */
  static noinline void async_cow_start(struct btrfs_work *work)
  {
-       struct async_cow *async_cow;
+       struct async_chunk *async_chunk;
        int num_added = 0;
-       async_cow = container_of(work, struct async_cow, work);
  
-       compress_file_range(async_cow->inode, async_cow->locked_page,
-                           async_cow->start, async_cow->end, async_cow,
-                           &num_added);
+       async_chunk = container_of(work, struct async_chunk, work);
+       compress_file_range(async_chunk, &num_added);
        if (num_added == 0) {
-               btrfs_add_delayed_iput(async_cow->inode);
-               async_cow->inode = NULL;
+               btrfs_add_delayed_iput(async_chunk->inode);
+               async_chunk->inode = NULL;
        }
  }
  
   */
  static noinline void async_cow_submit(struct btrfs_work *work)
  {
-       struct btrfs_fs_info *fs_info;
-       struct async_cow *async_cow;
+       struct async_chunk *async_chunk = container_of(work, struct async_chunk,
+                                                    work);
+       struct btrfs_fs_info *fs_info = btrfs_work_owner(work);
        unsigned long nr_pages;
  
-       async_cow = container_of(work, struct async_cow, work);
-       fs_info = async_cow->fs_info;
-       nr_pages = (async_cow->end - async_cow->start + PAGE_SIZE) >>
+       nr_pages = (async_chunk->end - async_chunk->start + PAGE_SIZE) >>
                PAGE_SHIFT;
  
        /* atomic_sub_return implies a barrier */
                cond_wake_up_nomb(&fs_info->async_submit_wait);
  
        /*
-        * ->inode could be NULL if async_cow_start has failed to compress,
+        * ->inode could be NULL if async_chunk_start has failed to compress,
         * in which case we don't have anything to submit, yet we need to
         * always adjust ->async_delalloc_pages as its paired with the init
         * happening in cow_file_range_async
         */
-       if (async_cow->inode)
-               submit_compressed_extents(async_cow);
+       if (async_chunk->inode)
+               submit_compressed_extents(async_chunk);
  }
  
  static noinline void async_cow_free(struct btrfs_work *work)
  {
-       struct async_cow *async_cow;
-       async_cow = container_of(work, struct async_cow, work);
-       if (async_cow->inode)
-               btrfs_add_delayed_iput(async_cow->inode);
-       kfree(async_cow);
+       struct async_chunk *async_chunk;
+       async_chunk = container_of(work, struct async_chunk, work);
+       if (async_chunk->inode)
+               btrfs_add_delayed_iput(async_chunk->inode);
+       /*
+        * Since the pointer to 'pending' is at the beginning of the array of
+        * async_chunk's, freeing it ensures the whole array has been freed.
+        */
+       if (atomic_dec_and_test(async_chunk->pending))
+               kvfree(async_chunk->pending);
  }
  
  static int cow_file_range_async(struct inode *inode, struct page *locked_page,
                                unsigned int write_flags)
  {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-       struct async_cow *async_cow;
+       struct async_cow *ctx;
+       struct async_chunk *async_chunk;
        unsigned long nr_pages;
        u64 cur_end;
+       u64 num_chunks = DIV_ROUND_UP(end - start, SZ_512K);
+       int i;
+       bool should_compress;
+       unsigned nofs_flag;
+       unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
+       if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
+           !btrfs_test_opt(fs_info, FORCE_COMPRESS)) {
+               num_chunks = 1;
+               should_compress = false;
+       } else {
+               should_compress = true;
+       }
+       nofs_flag = memalloc_nofs_save();
+       ctx = kvmalloc(struct_size(ctx, chunks, num_chunks), GFP_KERNEL);
+       memalloc_nofs_restore(nofs_flag);
+       if (!ctx) {
+               unsigned clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC |
+                       EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
+                       EXTENT_DO_ACCOUNTING;
+               unsigned long page_ops = PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
+                       PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK |
+                       PAGE_SET_ERROR;
+               extent_clear_unlock_delalloc(inode, start, end, 0, locked_page,
+                                            clear_bits, page_ops);
+               return -ENOMEM;
+       }
+       async_chunk = ctx->chunks;
+       atomic_set(&ctx->num_chunks, num_chunks);
+       for (i = 0; i < num_chunks; i++) {
+               if (should_compress)
+                       cur_end = min(end, start + SZ_512K - 1);
+               else
+                       cur_end = end;
  
-       clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
-                        1, 0, NULL);
-       while (start < end) {
-               async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
-               BUG_ON(!async_cow); /* -ENOMEM */
                /*
                 * igrab is called higher up in the call chain, take only the
                 * lightweight reference for the callback lifetime
                 */
                ihold(inode);
-               async_cow->inode = inode;
-               async_cow->fs_info = fs_info;
-               async_cow->locked_page = locked_page;
-               async_cow->start = start;
-               async_cow->write_flags = write_flags;
-               if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
-                   !btrfs_test_opt(fs_info, FORCE_COMPRESS))
-                       cur_end = end;
-               else
-                       cur_end = min(end, start + SZ_512K - 1);
-               async_cow->end = cur_end;
-               INIT_LIST_HEAD(&async_cow->extents);
-               btrfs_init_work(&async_cow->work,
+               async_chunk[i].pending = &ctx->num_chunks;
+               async_chunk[i].inode = inode;
+               async_chunk[i].start = start;
+               async_chunk[i].end = cur_end;
+               async_chunk[i].locked_page = locked_page;
+               async_chunk[i].write_flags = write_flags;
+               INIT_LIST_HEAD(&async_chunk[i].extents);
+               btrfs_init_work(&async_chunk[i].work,
                                btrfs_delalloc_helper,
                                async_cow_start, async_cow_submit,
                                async_cow_free);
  
-               nr_pages = (cur_end - start + PAGE_SIZE) >>
-                       PAGE_SHIFT;
+               nr_pages = DIV_ROUND_UP(cur_end - start, PAGE_SIZE);
                atomic_add(nr_pages, &fs_info->async_delalloc_pages);
  
-               btrfs_queue_work(fs_info->delalloc_workers, &async_cow->work);
+               btrfs_queue_work(fs_info->delalloc_workers, &async_chunk[i].work);
  
                *nr_written += nr_pages;
                start = cur_end + 1;
@@@ -1451,7 -1471,7 +1471,7 @@@ next_slot
                        extent_end = ALIGN(extent_end,
                                           fs_info->sectorsize);
                } else {
-                       BUG_ON(1);
+                       BUG();
                }
  out_check:
                if (extent_end <= start) {
@@@ -1964,11 -1984,11 +1984,11 @@@ static blk_status_t btrfs_submit_bio_st
   *
   *    c-3) otherwise:                 async submit
   */
- static blk_status_t btrfs_submit_bio_hook(void *private_data, struct bio *bio,
-                                int mirror_num, unsigned long bio_flags,
-                                u64 bio_offset)
+ static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
+                                         int mirror_num,
+                                         unsigned long bio_flags)
  {
-       struct inode *inode = private_data;
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        struct btrfs_root *root = BTRFS_I(inode)->root;
        enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
                        goto mapit;
                /* we're doing a write, do the async checksumming */
                ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, bio_flags,
-                                         bio_offset, inode,
-                                         btrfs_submit_bio_start);
+                                         0, inode, btrfs_submit_bio_start);
                goto out;
        } else if (!skip_sum) {
                ret = btrfs_csum_one_bio(inode, bio, 0, 0);
@@@ -2531,6 -2550,7 +2550,7 @@@ static noinline int relink_extent_backr
        struct btrfs_file_extent_item *item;
        struct btrfs_ordered_extent *ordered;
        struct btrfs_trans_handle *trans;
+       struct btrfs_ref ref = { 0 };
        struct btrfs_root *root;
        struct btrfs_key key;
        struct extent_buffer *leaf;
@@@ -2701,10 -2721,11 +2721,11 @@@ again
        inode_add_bytes(inode, len);
        btrfs_release_path(path);
  
-       ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
-                       new->disk_len, 0,
-                       backref->root_id, backref->inum,
-                       new->file_pos); /* start - extent_offset */
+       btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new->bytenr,
+                              new->disk_len, 0);
+       btrfs_init_data_ref(&ref, backref->root_id, backref->inum,
+                           new->file_pos);  /* start - extent_offset */
+       ret = btrfs_inc_extent_ref(trans, &ref);
        if (ret) {
                btrfs_abort_transaction(trans, ret);
                goto out_free_path;
@@@ -3699,21 -3720,6 +3720,6 @@@ cache_index
         * inode is not a directory, logging its parent unnecessarily.
         */
        BTRFS_I(inode)->last_unlink_trans = BTRFS_I(inode)->last_trans;
-       /*
-        * Similar reasoning for last_link_trans, needs to be set otherwise
-        * for a case like the following:
-        *
-        * mkdir A
-        * touch foo
-        * ln foo A/bar
-        * echo 2 > /proc/sys/vm/drop_caches
-        * fsync foo
-        * <power failure>
-        *
-        * Would result in link bar and directory A not existing after the power
-        * failure.
-        */
-       BTRFS_I(inode)->last_link_trans = BTRFS_I(inode)->last_trans;
  
        path->slots[0]++;
        if (inode->i_nlink != 1 ||
@@@ -4679,7 -4685,7 +4685,7 @@@ search_again
  
                                btrfs_set_file_extent_ram_bytes(leaf, fi, size);
                                size = btrfs_file_extent_calc_inline_size(size);
-                               btrfs_truncate_item(root->fs_info, path, size, 1);
+                               btrfs_truncate_item(path, size, 1);
                        } else if (!del_item) {
                                /*
                                 * We have to bail so the last_size is set to
@@@ -4718,12 -4724,17 +4724,17 @@@ delete
                if (found_extent &&
                    (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
                     root == fs_info->tree_root)) {
+                       struct btrfs_ref ref = { 0 };
                        btrfs_set_path_blocking(path);
                        bytes_deleted += extent_num_bytes;
-                       ret = btrfs_free_extent(trans, root, extent_start,
-                                               extent_num_bytes, 0,
-                                               btrfs_header_owner(leaf),
-                                               ino, extent_offset);
+                       btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF,
+                                       extent_start, extent_num_bytes, 0);
+                       ref.real_root = root->root_key.objectid;
+                       btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
+                                       ino, extent_offset);
+                       ret = btrfs_free_extent(trans, &ref);
                        if (ret) {
                                btrfs_abort_transaction(trans, ret);
                                break;
@@@ -5448,12 -5459,14 +5459,14 @@@ no_delete
  }
  
  /*
-  * this returns the key found in the dir entry in the location pointer.
+  * Return the key found in the dir entry in the location pointer, fill @type
+  * with BTRFS_FT_*, and return 0.
+  *
   * If no dir entries were found, returns -ENOENT.
   * If found a corrupted location in dir entry, returns -EUCLEAN.
   */
  static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
-                              struct btrfs_key *location)
+                              struct btrfs_key *location, u8 *type)
  {
        const char *name = dentry->d_name.name;
        int namelen = dentry->d_name.len;
                           __func__, name, btrfs_ino(BTRFS_I(dir)),
                           location->objectid, location->type, location->offset);
        }
+       if (!ret)
+               *type = btrfs_dir_type(path->nodes[0], di);
  out:
        btrfs_free_path(path);
        return ret;
@@@ -5719,6 -5734,24 +5734,24 @@@ static struct inode *new_simple_dir(str
        return inode;
  }
  
+ static inline u8 btrfs_inode_type(struct inode *inode)
+ {
+       /*
+        * Compile-time asserts that generic FT_* types still match
+        * BTRFS_FT_* types
+        */
+       BUILD_BUG_ON(BTRFS_FT_UNKNOWN != FT_UNKNOWN);
+       BUILD_BUG_ON(BTRFS_FT_REG_FILE != FT_REG_FILE);
+       BUILD_BUG_ON(BTRFS_FT_DIR != FT_DIR);
+       BUILD_BUG_ON(BTRFS_FT_CHRDEV != FT_CHRDEV);
+       BUILD_BUG_ON(BTRFS_FT_BLKDEV != FT_BLKDEV);
+       BUILD_BUG_ON(BTRFS_FT_FIFO != FT_FIFO);
+       BUILD_BUG_ON(BTRFS_FT_SOCK != FT_SOCK);
+       BUILD_BUG_ON(BTRFS_FT_SYMLINK != FT_SYMLINK);
+       return fs_umode_to_ftype(inode->i_mode);
+ }
  struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
  {
        struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
        struct btrfs_root *root = BTRFS_I(dir)->root;
        struct btrfs_root *sub_root = root;
        struct btrfs_key location;
+       u8 di_type = 0;
        int index;
        int ret = 0;
  
        if (dentry->d_name.len > BTRFS_NAME_LEN)
                return ERR_PTR(-ENAMETOOLONG);
  
-       ret = btrfs_inode_by_name(dir, dentry, &location);
+       ret = btrfs_inode_by_name(dir, dentry, &location, &di_type);
        if (ret < 0)
                return ERR_PTR(ret);
  
        if (location.type == BTRFS_INODE_ITEM_KEY) {
                inode = btrfs_iget(dir->i_sb, &location, root, NULL);
+               if (IS_ERR(inode))
+                       return inode;
+               /* Do extra check against inode mode with di_type */
+               if (btrfs_inode_type(inode) != di_type) {
+                       btrfs_crit(fs_info,
+ "inode mode mismatch with dir: inode mode=0%o btrfs type=%u dir type=%u",
+                                 inode->i_mode, btrfs_inode_type(inode),
+                                 di_type);
+                       iput(inode);
+                       return ERR_PTR(-EUCLEAN);
+               }
                return inode;
        }
  
@@@ -5797,10 -5843,6 +5843,6 @@@ static struct dentry *btrfs_lookup(stru
        return d_splice_alias(inode, dentry);
  }
  
- unsigned char btrfs_filetype_table[] = {
-       DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
- };
  /*
   * All this infrastructure exists because dir_emit can fault, and we are holding
   * the tree lock when doing readdir.  For now just allocate a buffer and copy
@@@ -5939,7 -5981,7 +5981,7 @@@ again
                name_ptr = (char *)(entry + 1);
                read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1),
                                   name_len);
-               put_unaligned(btrfs_filetype_table[btrfs_dir_type(leaf, di)],
+               put_unaligned(fs_ftype_to_dtype(btrfs_dir_type(leaf, di)),
                                &entry->type);
                btrfs_dir_item_key_to_cpu(leaf, di, &location);
                put_unaligned(location.objectid, &entry->ino);
@@@ -6342,11 -6384,6 +6384,6 @@@ fail
        return ERR_PTR(ret);
  }
  
- static inline u8 btrfs_inode_type(struct inode *inode)
- {
-       return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
- }
  /*
   * utility function to add 'inode' into 'parent_inode' with
   * a give name and a given sequence number.
@@@ -6634,7 -6671,6 +6671,6 @@@ static int btrfs_link(struct dentry *ol
                        if (err)
                                goto fail;
                }
-               BTRFS_I(inode)->last_link_trans = trans->transid;
                d_instantiate(dentry, inode);
                ret = btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent,
                                         true, NULL);
@@@ -6783,7 -6819,7 +6819,7 @@@ struct extent_map *btrfs_get_extent(str
        u64 extent_start = 0;
        u64 extent_end = 0;
        u64 objectid = btrfs_ino(inode);
 -      u8 extent_type;
 +      int extent_type = -1;
        struct btrfs_path *path = NULL;
        struct btrfs_root *root = inode->root;
        struct btrfs_file_extent_item *item;
        extent_start = found_key.offset;
        if (extent_type == BTRFS_FILE_EXTENT_REG ||
            extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
+               /* Only regular file could have regular/prealloc extent */
+               if (!S_ISREG(inode->vfs_inode.i_mode)) {
+                       ret = -EUCLEAN;
+                       btrfs_crit(fs_info,
+               "regular/prealloc extent found for non-regular inode %llu",
+                                  btrfs_ino(inode));
+                       goto out;
+               }
                extent_end = extent_start +
                       btrfs_file_extent_num_bytes(leaf, item);
  
@@@ -9163,7 -9207,6 +9207,6 @@@ struct inode *btrfs_alloc_inode(struct 
        ei->index_cnt = (u64)-1;
        ei->dir_index = 0;
        ei->last_unlink_trans = 0;
-       ei->last_link_trans = 0;
        ei->last_log_commit = 0;
  
        spin_lock_init(&ei->lock);
  
        inode = &ei->vfs_inode;
        extent_map_tree_init(&ei->extent_tree);
-       extent_io_tree_init(&ei->io_tree, inode);
-       extent_io_tree_init(&ei->io_failure_tree, inode);
-       ei->io_tree.track_uptodate = 1;
-       ei->io_failure_tree.track_uptodate = 1;
+       extent_io_tree_init(fs_info, &ei->io_tree, IO_TREE_INODE_IO, inode);
+       extent_io_tree_init(fs_info, &ei->io_failure_tree,
+                           IO_TREE_INODE_IO_FAILURE, inode);
+       ei->io_tree.track_uptodate = true;
+       ei->io_failure_tree.track_uptodate = true;
        atomic_set(&ei->sync_writers, 0);
        mutex_init(&ei->log_mutex);
        mutex_init(&ei->delalloc_mutex);
@@@ -9206,8 -9250,9 +9250,8 @@@ void btrfs_test_destroy_inode(struct in
  }
  #endif
  
 -static void btrfs_i_callback(struct rcu_head *head)
 +void btrfs_free_inode(struct inode *inode)
  {
 -      struct inode *inode = container_of(head, struct inode, i_rcu);
        kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
  }
  
@@@ -9233,7 -9278,7 +9277,7 @@@ void btrfs_destroy_inode(struct inode *
         * created.
         */
        if (!root)
 -              goto free;
 +              return;
  
        while (1) {
                ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
        btrfs_qgroup_check_reserved_leak(inode);
        inode_tree_del(inode);
        btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
 -free:
 -      call_rcu(&inode->i_rcu, btrfs_i_callback);
  }
  
  int btrfs_drop_inode(struct inode *inode)
@@@ -9427,7 -9474,7 +9471,7 @@@ static int btrfs_rename_exchange(struc
        /* Reference for the source. */
        if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
                /* force full log commit if subvolume involved. */
-               btrfs_set_log_full_commit(fs_info, trans);
+               btrfs_set_log_full_commit(trans);
        } else {
                btrfs_pin_log_trans(root);
                root_log_pinned = true;
        /* And now for the dest. */
        if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
                /* force full log commit if subvolume involved. */
-               btrfs_set_log_full_commit(fs_info, trans);
+               btrfs_set_log_full_commit(trans);
        } else {
                btrfs_pin_log_trans(dest);
                dest_log_pinned = true;
@@@ -9580,7 -9627,7 +9624,7 @@@ out_fail
                    btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) ||
                    (new_inode &&
                     btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation)))
-                       btrfs_set_log_full_commit(fs_info, trans);
+                       btrfs_set_log_full_commit(trans);
  
                if (root_log_pinned) {
                        btrfs_end_log_trans(root);
@@@ -9766,7 -9813,7 +9810,7 @@@ static int btrfs_rename(struct inode *o
        BTRFS_I(old_inode)->dir_index = 0ULL;
        if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
                /* force full log commit if subvolume involved. */
-               btrfs_set_log_full_commit(fs_info, trans);
+               btrfs_set_log_full_commit(trans);
        } else {
                btrfs_pin_log_trans(root);
                log_pinned = true;
@@@ -9887,7 -9934,7 +9931,7 @@@ out_fail
                    btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) ||
                    (new_inode &&
                     btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation)))
-                       btrfs_set_log_full_commit(fs_info, trans);
+                       btrfs_set_log_full_commit(trans);
  
                btrfs_end_log_trans(root);
                log_pinned = false;
@@@ -10190,7 -10237,6 +10234,6 @@@ static int btrfs_symlink(struct inode *
  
        inode->i_op = &btrfs_symlink_inode_operations;
        inode_nohighmem(inode);
-       inode->i_mapping->a_ops = &btrfs_aops;
        inode_set_bytes(inode, name_len);
        btrfs_i_size_write(BTRFS_I(inode), name_len);
        err = btrfs_update_inode(trans, root, inode);
diff --combined fs/btrfs/ref-verify.c
@@@ -205,17 -205,28 +205,17 @@@ static struct root_entry *lookup_root_e
  #ifdef CONFIG_STACKTRACE
  static void __save_stack_trace(struct ref_action *ra)
  {
 -      struct stack_trace stack_trace;
 -
 -      stack_trace.max_entries = MAX_TRACE;
 -      stack_trace.nr_entries = 0;
 -      stack_trace.entries = ra->trace;
 -      stack_trace.skip = 2;
 -      save_stack_trace(&stack_trace);
 -      ra->trace_len = stack_trace.nr_entries;
 +      ra->trace_len = stack_trace_save(ra->trace, MAX_TRACE, 2);
  }
  
  static void __print_stack_trace(struct btrfs_fs_info *fs_info,
                                struct ref_action *ra)
  {
 -      struct stack_trace trace;
 -
        if (ra->trace_len == 0) {
                btrfs_err(fs_info, "  ref-verify: no stacktrace");
                return;
        }
 -      trace.nr_entries = ra->trace_len;
 -      trace.entries = ra->trace;
 -      print_stack_trace(&trace, 2);
 +      stack_trace_print(ra->trace, ra->trace_len, 2);
  }
  #else
  static void inline __save_stack_trace(struct ref_action *ra)
@@@ -659,36 -670,43 +659,43 @@@ static void dump_block_entry(struct btr
  
  /*
   * btrfs_ref_tree_mod: called when we modify a ref for a bytenr
-  * @root: the root we are making this modification from.
-  * @bytenr: the bytenr we are modifying.
-  * @num_bytes: number of bytes.
-  * @parent: the parent bytenr.
-  * @ref_root: the original root owner of the bytenr.
-  * @owner: level in the case of metadata, inode in the case of data.
-  * @offset: 0 for metadata, file offset for data.
-  * @action: the action that we are doing, this is the same as the delayed ref
-  *    action.
   *
   * This will add an action item to the given bytenr and do sanity checks to make
   * sure we haven't messed something up.  If we are making a new allocation and
   * this block entry has history we will delete all previous actions as long as
   * our sanity checks pass as they are no longer needed.
   */
- int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
-                      u64 parent, u64 ref_root, u64 owner, u64 offset,
-                      int action)
+ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
+                      struct btrfs_ref *generic_ref)
  {
-       struct btrfs_fs_info *fs_info = root->fs_info;
        struct ref_entry *ref = NULL, *exist;
        struct ref_action *ra = NULL;
        struct block_entry *be = NULL;
        struct root_entry *re = NULL;
+       int action = generic_ref->action;
        int ret = 0;
-       bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
+       bool metadata;
+       u64 bytenr = generic_ref->bytenr;
+       u64 num_bytes = generic_ref->len;
+       u64 parent = generic_ref->parent;
+       u64 ref_root;
+       u64 owner;
+       u64 offset;
  
-       if (!btrfs_test_opt(root->fs_info, REF_VERIFY))
+       if (!btrfs_test_opt(fs_info, REF_VERIFY))
                return 0;
  
+       if (generic_ref->type == BTRFS_REF_METADATA) {
+               ref_root = generic_ref->tree_ref.root;
+               owner = generic_ref->tree_ref.level;
+               offset = 0;
+       } else {
+               ref_root = generic_ref->data_ref.ref_root;
+               owner = generic_ref->data_ref.ino;
+               offset = generic_ref->data_ref.offset;
+       }
+       metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
        ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS);
        ra = kmalloc(sizeof(struct ref_action), GFP_NOFS);
        if (!ra || !ref) {
  
        INIT_LIST_HEAD(&ra->list);
        ra->action = action;
-       ra->root = root->root_key.objectid;
+       ra->root = generic_ref->real_root;
  
        /*
         * This is an allocation, preallocate the block_entry in case we haven't
                 * is and the new root objectid, so let's not treat the passed
                 * in root as if it really has a ref for this bytenr.
                 */
-               be = add_block_entry(root->fs_info, bytenr, num_bytes, ref_root);
+               be = add_block_entry(fs_info, bytenr, num_bytes, ref_root);
                if (IS_ERR(be)) {
                        kfree(ra);
                        ret = PTR_ERR(be);
                         * one we want to lookup below when we modify the
                         * re->num_refs.
                         */
-                       ref_root = root->root_key.objectid;
-                       re->root_objectid = root->root_key.objectid;
+                       ref_root = generic_ref->real_root;
+                       re->root_objectid = generic_ref->real_root;
                        re->num_refs = 0;
                }
  
-               spin_lock(&root->fs_info->ref_verify_lock);
-               be = lookup_block_entry(&root->fs_info->block_tree, bytenr);
+               spin_lock(&fs_info->ref_verify_lock);
+               be = lookup_block_entry(&fs_info->block_tree, bytenr);
                if (!be) {
                        btrfs_err(fs_info,
  "trying to do action %d to bytenr %llu num_bytes %llu but there is no existing entry!",
                         * didn't think of some other corner case.
                         */
                        btrfs_err(fs_info, "failed to find root %llu for %llu",
-                                 root->root_key.objectid, be->bytenr);
+                                 generic_ref->real_root, be->bytenr);
                        dump_block_entry(fs_info, be);
                        dump_ref_action(fs_info, ra);
                        kfree(ra);
        list_add_tail(&ra->list, &be->actions);
        ret = 0;
  out_unlock:
-       spin_unlock(&root->fs_info->ref_verify_lock);
+       spin_unlock(&fs_info->ref_verify_lock);
  out:
        if (ret)
                btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
diff --combined fs/btrfs/super.c
@@@ -1400,7 -1400,7 +1400,7 @@@ static inline int is_subvolume_inode(st
  }
  
  static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
-                                  const char *device_name, struct vfsmount *mnt)
+                                  struct vfsmount *mnt)
  {
        struct dentry *root;
        int ret;
@@@ -1649,7 -1649,7 +1649,7 @@@ static struct dentry *btrfs_mount(struc
        }
  
        /* mount_subvol() will free subvol_name and mnt_root */
-       root = mount_subvol(subvol_name, subvol_objectid, device_name, mnt_root);
+       root = mount_subvol(subvol_name, subvol_objectid, mnt_root);
  
  out:
        return root;
@@@ -2298,7 -2298,6 +2298,7 @@@ static const struct super_operations bt
        .show_devname   = btrfs_show_devname,
        .alloc_inode    = btrfs_alloc_inode,
        .destroy_inode  = btrfs_destroy_inode,
 +      .free_inode     = btrfs_free_inode,
        .statfs         = btrfs_statfs,
        .remount_fs     = btrfs_remount,
        .freeze_fs      = btrfs_freeze,
@@@ -30,7 -30,7 +30,7 @@@ static int __check_free_space_extents(s
        unsigned int i;
        int ret;
  
-       info = search_free_space_info(trans, fs_info, cache, path, 0);
+       info = search_free_space_info(trans, cache, path, 0);
        if (IS_ERR(info)) {
                test_err("could not find free space info");
                ret = PTR_ERR(info);
@@@ -115,7 -115,7 +115,7 @@@ static int check_free_space_extents(str
        u32 flags;
        int ret;
  
-       info = search_free_space_info(trans, fs_info, cache, path, 0);
+       info = search_free_space_info(trans, cache, path, 0);
        if (IS_ERR(info)) {
                test_err("could not find free space info");
                btrfs_release_path(path);
@@@ -444,14 -444,14 +444,14 @@@ static int run_test(test_func_t test_fu
  
        fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
        if (!fs_info) {
-               test_err("couldn't allocate dummy fs info");
+               test_std_err(TEST_ALLOC_FS_INFO);
                ret = -ENOMEM;
                goto out;
        }
  
        root = btrfs_alloc_dummy_root(fs_info);
        if (IS_ERR(root)) {
-               test_err("couldn't allocate dummy root");
+               test_std_err(TEST_ALLOC_ROOT);
                ret = PTR_ERR(root);
                goto out;
        }
  
        root->node = alloc_test_extent_buffer(root->fs_info, nodesize);
        if (!root->node) {
-               test_err("couldn't allocate dummy buffer");
+               test_std_err(TEST_ALLOC_EXTENT_BUFFER);
                ret = -ENOMEM;
                goto out;
        }
  
        cache = btrfs_alloc_dummy_block_group(fs_info, 8 * alignment);
        if (!cache) {
-               test_err("couldn't allocate dummy block group cache");
+               test_std_err(TEST_ALLOC_BLOCK_GROUP);
                ret = -ENOMEM;
                goto out;
        }
  
        path = btrfs_alloc_path();
        if (!path) {
-               test_err("couldn't allocate path");
+               test_std_err(TEST_ALLOC_ROOT);
                ret = -ENOMEM;
                goto out;
        }
@@@ -539,7 -539,7 +539,7 @@@ static int run_test_both_formats(test_f
        ret = run_test(test_func, 0, sectorsize, nodesize, alignment);
        if (ret) {
                test_err(
 -      "%pf failed with extents, sectorsize=%u, nodesize=%u, alignment=%u",
 +      "%ps failed with extents, sectorsize=%u, nodesize=%u, alignment=%u",
                         test_func, sectorsize, nodesize, alignment);
                test_ret = ret;
        }
        ret = run_test(test_func, 1, sectorsize, nodesize, alignment);
        if (ret) {
                test_err(
 -      "%pf failed with bitmaps, sectorsize=%u, nodesize=%u, alignment=%u",
 +      "%ps failed with bitmaps, sectorsize=%u, nodesize=%u, alignment=%u",
                         test_func, sectorsize, nodesize, alignment);
                test_ret = ret;
        }
@@@ -27,6 -27,7 +27,7 @@@ struct btrfs_work
  struct __btrfs_workqueue;
  struct btrfs_qgroup_extent_record;
  struct btrfs_qgroup;
+ struct extent_io_tree;
  struct prelim_ref;
  
  TRACE_DEFINE_ENUM(FLUSH_DELAYED_ITEMS_NR);
@@@ -77,6 -78,17 +78,17 @@@ TRACE_DEFINE_ENUM(COMMIT_TRANS)
                { BTRFS_QGROUP_RSV_META_PERTRANS, "META_PERTRANS" },    \
                { BTRFS_QGROUP_RSV_META_PREALLOC, "META_PREALLOC" })
  
+ #define show_extent_io_tree_owner(owner)                                     \
+       __print_symbolic(owner,                                                \
+               { IO_TREE_FS_INFO_FREED_EXTENTS0, "FREED_EXTENTS0" },          \
+               { IO_TREE_FS_INFO_FREED_EXTENTS1, "FREED_EXTENTS1" },          \
+               { IO_TREE_INODE_IO,               "INODE_IO" },                \
+               { IO_TREE_INODE_IO_FAILURE,       "INODE_IO_FAILURE" },        \
+               { IO_TREE_RELOC_BLOCKS,           "RELOC_BLOCKS" },            \
+               { IO_TREE_TRANS_DIRTY_PAGES,      "TRANS_DIRTY_PAGES" },       \
+               { IO_TREE_ROOT_DIRTY_LOG_PAGES,   "ROOT_DIRTY_LOG_PAGES" },    \
+               { IO_TREE_SELFTEST,               "SELFTEST" })
  #define BTRFS_GROUP_FLAGS     \
        { BTRFS_BLOCK_GROUP_DATA,       "DATA"},        \
        { BTRFS_BLOCK_GROUP_SYSTEM,     "SYSTEM"},      \
        { BTRFS_BLOCK_GROUP_RAID5,      "RAID5"},       \
        { BTRFS_BLOCK_GROUP_RAID6,      "RAID6"}
  
+ #define EXTENT_FLAGS                                          \
+       { EXTENT_DIRTY,                 "DIRTY"},               \
+       { EXTENT_UPTODATE,              "UPTODATE"},            \
+       { EXTENT_LOCKED,                "LOCKED"},              \
+       { EXTENT_NEW,                   "NEW"},                 \
+       { EXTENT_DELALLOC,              "DELALLOC"},            \
+       { EXTENT_DEFRAG,                "DEFRAG"},              \
+       { EXTENT_BOUNDARY,              "BOUNDARY"},            \
+       { EXTENT_NODATASUM,             "NODATASUM"},           \
+       { EXTENT_CLEAR_META_RESV,       "CLEAR_META_RESV"},     \
+       { EXTENT_NEED_WAIT,             "NEED_WAIT"},           \
+       { EXTENT_DAMAGED,               "DAMAGED"},             \
+       { EXTENT_NORESERVE,             "NORESERVE"},           \
+       { EXTENT_QGROUP_RESERVED,       "QGROUP_RESERVED"},     \
+       { EXTENT_CLEAR_DATA_RESV,       "CLEAR_DATA_RESV"},     \
+       { EXTENT_DELALLOC_NEW,          "DELALLOC_NEW"}
  #define BTRFS_FSID_SIZE 16
  #define TP_STRUCT__entry_fsid __array(u8, fsid, BTRFS_FSID_SIZE)
  
  #define TP_fast_assign_fsid(fs_info)                                  \
-       memcpy(__entry->fsid, fs_info->fs_devices->fsid, BTRFS_FSID_SIZE)
+ ({                                                                    \
+       if (fs_info)                                                    \
+               memcpy(__entry->fsid, fs_info->fs_devices->fsid,        \
+                      BTRFS_FSID_SIZE);                                \
+       else                                                            \
+               memset(__entry->fsid, 0, BTRFS_FSID_SIZE);              \
+ })
  
  #define TP_STRUCT__entry_btrfs(args...)                                       \
        TP_STRUCT__entry(                                               \
@@@ -1345,7 -1380,7 +1380,7 @@@ DECLARE_EVENT_CLASS(btrfs__work
                __entry->normal_work    = &work->normal_work;
        ),
  
 -      TP_printk_btrfs("work=%p (normal_work=%p) wq=%p func=%pf ordered_func=%p "
 +      TP_printk_btrfs("work=%p (normal_work=%p) wq=%p func=%ps ordered_func=%p "
                  "ordered_free=%p",
                  __entry->work, __entry->normal_work, __entry->wq,
                   __entry->func, __entry->ordered_func, __entry->ordered_free)
@@@ -1850,6 -1885,212 +1885,212 @@@ DEFINE_EVENT(btrfs__block_group, btrfs_
        TP_ARGS(bg_cache)
  );
  
+ TRACE_EVENT(btrfs_set_extent_bit,
+       TP_PROTO(const struct extent_io_tree *tree,
+                u64 start, u64 len, unsigned set_bits),
+       TP_ARGS(tree, start, len, set_bits),
+       TP_STRUCT__entry_btrfs(
+               __field(        unsigned,       owner   )
+               __field(        u64,            ino     )
+               __field(        u64,            rootid  )
+               __field(        u64,            start   )
+               __field(        u64,            len     )
+               __field(        unsigned,       set_bits)
+       ),
+       TP_fast_assign_btrfs(tree->fs_info,
+               __entry->owner = tree->owner;
+               if (tree->private_data) {
+                       struct inode *inode = tree->private_data;
+                       __entry->ino    = btrfs_ino(BTRFS_I(inode));
+                       __entry->rootid =
+                               BTRFS_I(inode)->root->root_key.objectid;
+               } else {
+                       __entry->ino    = 0;
+                       __entry->rootid = 0;
+               }
+               __entry->start          = start;
+               __entry->len            = len;
+               __entry->set_bits       = set_bits;
+       ),
+       TP_printk_btrfs(
+               "io_tree=%s ino=%llu root=%llu start=%llu len=%llu set_bits=%s",
+               show_extent_io_tree_owner(__entry->owner), __entry->ino,
+               __entry->rootid, __entry->start, __entry->len,
+               __print_flags(__entry->set_bits, "|", EXTENT_FLAGS))
+ );
+ TRACE_EVENT(btrfs_clear_extent_bit,
+       TP_PROTO(const struct extent_io_tree *tree,
+                u64 start, u64 len, unsigned clear_bits),
+       TP_ARGS(tree, start, len, clear_bits),
+       TP_STRUCT__entry_btrfs(
+               __field(        unsigned,       owner   )
+               __field(        u64,            ino     )
+               __field(        u64,            rootid  )
+               __field(        u64,            start   )
+               __field(        u64,            len     )
+               __field(        unsigned,       clear_bits)
+       ),
+       TP_fast_assign_btrfs(tree->fs_info,
+               __entry->owner = tree->owner;
+               if (tree->private_data) {
+                       struct inode *inode = tree->private_data;
+                       __entry->ino    = btrfs_ino(BTRFS_I(inode));
+                       __entry->rootid =
+                               BTRFS_I(inode)->root->root_key.objectid;
+               } else {
+                       __entry->ino    = 0;
+                       __entry->rootid = 0;
+               }
+               __entry->start          = start;
+               __entry->len            = len;
+               __entry->clear_bits     = clear_bits;
+       ),
+       TP_printk_btrfs(
+               "io_tree=%s ino=%llu root=%llu start=%llu len=%llu clear_bits=%s",
+               show_extent_io_tree_owner(__entry->owner), __entry->ino,
+               __entry->rootid, __entry->start, __entry->len,
+               __print_flags(__entry->clear_bits, "|", EXTENT_FLAGS))
+ );
+ TRACE_EVENT(btrfs_convert_extent_bit,
+       TP_PROTO(const struct extent_io_tree *tree,
+                u64 start, u64 len, unsigned set_bits, unsigned clear_bits),
+       TP_ARGS(tree, start, len, set_bits, clear_bits),
+       TP_STRUCT__entry_btrfs(
+               __field(        unsigned,       owner   )
+               __field(        u64,            ino     )
+               __field(        u64,            rootid  )
+               __field(        u64,            start   )
+               __field(        u64,            len     )
+               __field(        unsigned,       set_bits)
+               __field(        unsigned,       clear_bits)
+       ),
+       TP_fast_assign_btrfs(tree->fs_info,
+               __entry->owner = tree->owner;
+               if (tree->private_data) {
+                       struct inode *inode = tree->private_data;
+                       __entry->ino    = btrfs_ino(BTRFS_I(inode));
+                       __entry->rootid =
+                               BTRFS_I(inode)->root->root_key.objectid;
+               } else {
+                       __entry->ino    = 0;
+                       __entry->rootid = 0;
+               }
+               __entry->start          = start;
+               __entry->len            = len;
+               __entry->set_bits       = set_bits;
+               __entry->clear_bits     = clear_bits;
+       ),
+       TP_printk_btrfs(
+ "io_tree=%s ino=%llu root=%llu start=%llu len=%llu set_bits=%s clear_bits=%s",
+                 show_extent_io_tree_owner(__entry->owner), __entry->ino,
+                 __entry->rootid, __entry->start, __entry->len,
+                 __print_flags(__entry->set_bits , "|", EXTENT_FLAGS),
+                 __print_flags(__entry->clear_bits, "|", EXTENT_FLAGS))
+ );
+ DECLARE_EVENT_CLASS(btrfs_sleep_tree_lock,
+       TP_PROTO(const struct extent_buffer *eb, u64 start_ns),
+       TP_ARGS(eb, start_ns),
+       TP_STRUCT__entry_btrfs(
+               __field(        u64,    block           )
+               __field(        u64,    generation      )
+               __field(        u64,    start_ns        )
+               __field(        u64,    end_ns          )
+               __field(        u64,    diff_ns         )
+               __field(        u64,    owner           )
+               __field(        int,    is_log_tree     )
+       ),
+       TP_fast_assign_btrfs(eb->fs_info,
+               __entry->block          = eb->start;
+               __entry->generation     = btrfs_header_generation(eb);
+               __entry->start_ns       = start_ns;
+               __entry->end_ns         = ktime_get_ns();
+               __entry->diff_ns        = __entry->end_ns - start_ns;
+               __entry->owner          = btrfs_header_owner(eb);
+               __entry->is_log_tree    = (eb->log_index >= 0);
+       ),
+       TP_printk_btrfs(
+ "block=%llu generation=%llu start_ns=%llu end_ns=%llu diff_ns=%llu owner=%llu is_log_tree=%d",
+               __entry->block, __entry->generation,
+               __entry->start_ns, __entry->end_ns, __entry->diff_ns,
+               __entry->owner, __entry->is_log_tree)
+ );
+ DEFINE_EVENT(btrfs_sleep_tree_lock, btrfs_tree_read_lock,
+       TP_PROTO(const struct extent_buffer *eb, u64 start_ns),
+       TP_ARGS(eb, start_ns)
+ );
+ DEFINE_EVENT(btrfs_sleep_tree_lock, btrfs_tree_lock,
+       TP_PROTO(const struct extent_buffer *eb, u64 start_ns),
+       TP_ARGS(eb, start_ns)
+ );
+ DECLARE_EVENT_CLASS(btrfs_locking_events,
+       TP_PROTO(const struct extent_buffer *eb),
+       TP_ARGS(eb),
+       TP_STRUCT__entry_btrfs(
+               __field(        u64,    block           )
+               __field(        u64,    generation      )
+               __field(        u64,    owner           )
+               __field(        int,    is_log_tree     )
+       ),
+       TP_fast_assign_btrfs(eb->fs_info,
+               __entry->block          = eb->start;
+               __entry->generation     = btrfs_header_generation(eb);
+               __entry->owner          = btrfs_header_owner(eb);
+               __entry->is_log_tree    = (eb->log_index >= 0);
+       ),
+       TP_printk_btrfs("block=%llu generation=%llu owner=%llu is_log_tree=%d",
+               __entry->block, __entry->generation,
+               __entry->owner, __entry->is_log_tree)
+ );
+ #define DEFINE_BTRFS_LOCK_EVENT(name)                         \
+ DEFINE_EVENT(btrfs_locking_events, name,                      \
+               TP_PROTO(const struct extent_buffer *eb),       \
+                                                               \
+               TP_ARGS(eb)                                     \
+ )
+ DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_unlock);
+ DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_read_unlock);
+ DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_read_unlock_blocking);
+ DEFINE_BTRFS_LOCK_EVENT(btrfs_set_lock_blocking_read);
+ DEFINE_BTRFS_LOCK_EVENT(btrfs_set_lock_blocking_write);
+ DEFINE_BTRFS_LOCK_EVENT(btrfs_clear_lock_blocking_read);
+ DEFINE_BTRFS_LOCK_EVENT(btrfs_clear_lock_blocking_write);
+ DEFINE_BTRFS_LOCK_EVENT(btrfs_try_tree_read_lock);
+ DEFINE_BTRFS_LOCK_EVENT(btrfs_try_tree_write_lock);
+ DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_read_lock_atomic);
  #endif /* _TRACE_BTRFS_H */
  
  /* This part must be outside protection */