Merge tag 'for-5.11/drivers-2020-12-14' of git://git.kernel.dk/linux-block
[linux-2.6-microblaze.git] / fs / btrfs / inode.c
index 7e8d816..8e23780 100644 (file)
@@ -45,7 +45,6 @@
 #include "compression.h"
 #include "locking.h"
 #include "free-space-cache.h"
-#include "inode-map.h"
 #include "props.h"
 #include "qgroup.h"
 #include "delalloc-space.h"
@@ -62,7 +61,6 @@ struct btrfs_dio_data {
        loff_t length;
        ssize_t submitted;
        struct extent_changeset *data_reserved;
-       bool sync;
 };
 
 static const struct inode_operations btrfs_dir_inode_operations;
@@ -95,6 +93,51 @@ static void __endio_write_update_ordered(struct btrfs_inode *inode,
                                         const u64 offset, const u64 bytes,
                                         const bool uptodate);
 
+/*
+ * btrfs_inode_lock - lock inode i_rwsem based on arguments passed
+ *
+ * ilock_flags can have the following bit set:
+ *
+ * BTRFS_ILOCK_SHARED - acquire a shared lock on the inode
+ * BTRFS_ILOCK_TRY - try to acquire the lock, if fails on first attempt
+ *                  return -EAGAIN
+ */
+int btrfs_inode_lock(struct inode *inode, unsigned int ilock_flags)
+{
+       if (ilock_flags & BTRFS_ILOCK_SHARED) {
+               if (ilock_flags & BTRFS_ILOCK_TRY) {
+                       if (!inode_trylock_shared(inode))
+                               return -EAGAIN;
+                       else
+                               return 0;
+               }
+               inode_lock_shared(inode);
+       } else {
+               if (ilock_flags & BTRFS_ILOCK_TRY) {
+                       if (!inode_trylock(inode))
+                               return -EAGAIN;
+                       else
+                               return 0;
+               }
+               inode_lock(inode);
+       }
+       return 0;
+}
+
+/*
+ * btrfs_inode_unlock - unock inode i_rwsem
+ *
+ * ilock_flags should contain the same bits set as passed to btrfs_inode_lock()
+ * to decide whether the lock acquired is shared or exclusive.
+ */
+void btrfs_inode_unlock(struct inode *inode, unsigned int ilock_flags)
+{
+       if (ilock_flags & BTRFS_ILOCK_SHARED)
+               inode_unlock_shared(inode);
+       else
+               inode_unlock(inode);
+}
+
 /*
  * Cleanup all submitted ordered extents in specified range to handle errors
  * from the btrfs_run_delalloc_range() callback.
@@ -158,7 +201,7 @@ static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
  * no overlapping inline items exist in the btree
  */
 static int insert_inline_extent(struct btrfs_trans_handle *trans,
-                               struct btrfs_path *path, int extent_inserted,
+                               struct btrfs_path *path, bool extent_inserted,
                                struct btrfs_root *root, struct inode *inode,
                                u64 start, size_t size, size_t compressed_size,
                                int compress_type,
@@ -179,8 +222,6 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
        if (compressed_size && compressed_pages)
                cur_size = compressed_size;
 
-       inode_add_bytes(inode, size);
-
        if (!extent_inserted) {
                struct btrfs_key key;
                size_t datasize;
@@ -190,7 +231,6 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
                key.type = BTRFS_EXTENT_DATA_KEY;
 
                datasize = btrfs_file_extent_calc_inline_size(cur_size);
-               path->leave_spinning = 1;
                ret = btrfs_insert_empty_item(trans, root, path, &key,
                                              datasize);
                if (ret)
@@ -256,8 +296,6 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
         * could end up racing with unlink.
         */
        BTRFS_I(inode)->disk_i_size = inode->i_size;
-       ret = btrfs_update_inode(trans, root, inode);
-
 fail:
        return ret;
 }
@@ -273,6 +311,7 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 start,
                                          int compress_type,
                                          struct page **compressed_pages)
 {
+       struct btrfs_drop_extents_args drop_args = { 0 };
        struct btrfs_root *root = inode->root;
        struct btrfs_fs_info *fs_info = root->fs_info;
        struct btrfs_trans_handle *trans;
@@ -283,8 +322,6 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 start,
        u64 data_len = inline_len;
        int ret;
        struct btrfs_path *path;
-       int extent_inserted = 0;
-       u32 extent_item_size;
 
        if (compressed_size)
                data_len = compressed_size;
@@ -310,16 +347,20 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 start,
        }
        trans->block_rsv = &inode->block_rsv;
 
+       drop_args.path = path;
+       drop_args.start = start;
+       drop_args.end = aligned_end;
+       drop_args.drop_cache = true;
+       drop_args.replace_extent = true;
+
        if (compressed_size && compressed_pages)
-               extent_item_size = btrfs_file_extent_calc_inline_size(
+               drop_args.extent_item_size = btrfs_file_extent_calc_inline_size(
                   compressed_size);
        else
-               extent_item_size = btrfs_file_extent_calc_inline_size(
+               drop_args.extent_item_size = btrfs_file_extent_calc_inline_size(
                    inline_len);
 
-       ret = __btrfs_drop_extents(trans, root, inode, path, start, aligned_end,
-                                  NULL, 1, 1, extent_item_size,
-                                  &extent_inserted);
+       ret = btrfs_drop_extents(trans, root, inode, &drop_args);
        if (ret) {
                btrfs_abort_transaction(trans, ret);
                goto out;
@@ -327,7 +368,7 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 start,
 
        if (isize > actual_end)
                inline_len = min_t(u64, isize, actual_end);
-       ret = insert_inline_extent(trans, path, extent_inserted,
+       ret = insert_inline_extent(trans, path, drop_args.extent_inserted,
                                   root, &inode->vfs_inode, start,
                                   inline_len, compressed_size,
                                   compress_type, compressed_pages);
@@ -339,8 +380,17 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 start,
                goto out;
        }
 
+       btrfs_update_inode_bytes(inode, inline_len, drop_args.bytes_found);
+       ret = btrfs_update_inode(trans, root, inode);
+       if (ret && ret != -ENOSPC) {
+               btrfs_abort_transaction(trans, ret);
+               goto out;
+       } else if (ret == -ENOSPC) {
+               ret = 1;
+               goto out;
+       }
+
        set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
-       btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
 out:
        /*
         * Don't forget to free the reserved space, as for inlined extent
@@ -1598,6 +1648,15 @@ next_slot:
                                goto out_check;
                        if (extent_type == BTRFS_FILE_EXTENT_REG && !force)
                                goto out_check;
+
+                       /*
+                        * The following checks can be expensive, as they need to
+                        * take other locks and do btree or rbtree searches, so
+                        * release the path to avoid blocking other tasks for too
+                        * long.
+                        */
+                       btrfs_release_path(path);
+
                        /* If extent is RO, we must COW it */
                        if (btrfs_extent_readonly(fs_info, disk_bytenr))
                                goto out_check;
@@ -1673,12 +1732,12 @@ out_check:
                        cur_offset = extent_end;
                        if (cur_offset > end)
                                break;
+                       if (!path->nodes[0])
+                               continue;
                        path->slots[0]++;
                        goto next_slot;
                }
 
-               btrfs_release_path(path);
-
                /*
                 * COW range from cow_start to found_key.offset - 1. As the key
                 * will contain the beginning of the first extent that can be
@@ -2098,6 +2157,8 @@ void btrfs_clear_delalloc_extent(struct inode *vfs_inode,
                spin_lock(&inode->lock);
                ASSERT(inode->new_delalloc_bytes >= len);
                inode->new_delalloc_bytes -= len;
+               if (*bits & EXTENT_ADD_INODE_BYTES)
+                       inode_add_bytes(&inode->vfs_inode, len);
                spin_unlock(&inode->lock);
        }
 }
@@ -2121,7 +2182,7 @@ int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,
 {
        struct inode *inode = page->mapping->host;
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-       u64 logical = (u64)bio->bi_iter.bi_sector << 9;
+       u64 logical = bio->bi_iter.bi_sector << 9;
        u64 length = 0;
        u64 map_length;
        int ret;
@@ -2150,11 +2211,9 @@ int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,
  * At IO completion time the cums attached on the ordered extent record
  * are inserted into the btree
  */
-static blk_status_t btrfs_submit_bio_start(void *private_data, struct bio *bio,
-                                   u64 bio_offset)
+static blk_status_t btrfs_submit_bio_start(struct inode *inode, struct bio *bio,
+                                          u64 dio_file_offset)
 {
-       struct inode *inode = private_data;
-
        return btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0);
 }
 
@@ -2187,7 +2246,8 @@ blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
        int skip_sum;
        int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
 
-       skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
+       skip_sum = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) ||
+                  !fs_info->csum_root;
 
        if (btrfs_is_free_space_inode(BTRFS_I(inode)))
                metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
@@ -2202,8 +2262,13 @@ blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
                                                           mirror_num,
                                                           bio_flags);
                        goto out;
-               } else if (!skip_sum) {
-                       ret = btrfs_lookup_bio_sums(inode, bio, (u64)-1, NULL);
+               } else {
+                       /*
+                        * Lookup bio sums does extra checks around whether we
+                        * need to csum or not, which is why we ignore skip_sum
+                        * here.
+                        */
+                       ret = btrfs_lookup_bio_sums(inode, bio, NULL);
                        if (ret)
                                goto out;
                }
@@ -2213,8 +2278,8 @@ blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
                if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
                        goto mapit;
                /* we're doing a write, do the async checksumming */
-               ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, bio_flags,
-                                         0, inode, btrfs_submit_bio_start);
+               ret = btrfs_wq_submit_bio(inode, bio, mirror_num, bio_flags,
+                                         0, btrfs_submit_bio_start);
                goto out;
        } else if (!skip_sum) {
                ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0);
@@ -2282,8 +2347,8 @@ static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
 
                ret = set_extent_bit(&inode->io_tree, search_start,
                                     search_start + em_len - 1,
-                                    EXTENT_DELALLOC_NEW,
-                                    NULL, cached_state, GFP_NOFS);
+                                    EXTENT_DELALLOC_NEW, 0, NULL, cached_state,
+                                    GFP_NOFS, NULL);
 next:
                search_start = extent_map_end(em);
                free_extent_map(em);
@@ -2511,9 +2576,11 @@ int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end)
 static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
                                       struct btrfs_inode *inode, u64 file_pos,
                                       struct btrfs_file_extent_item *stack_fi,
+                                      const bool update_inode_bytes,
                                       u64 qgroup_reserved)
 {
        struct btrfs_root *root = inode->root;
+       const u64 sectorsize = root->fs_info->sectorsize;
        struct btrfs_path *path;
        struct extent_buffer *leaf;
        struct btrfs_key ins;
@@ -2521,7 +2588,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
        u64 disk_bytenr = btrfs_stack_file_extent_disk_bytenr(stack_fi);
        u64 num_bytes = btrfs_stack_file_extent_num_bytes(stack_fi);
        u64 ram_bytes = btrfs_stack_file_extent_ram_bytes(stack_fi);
-       int extent_inserted = 0;
+       struct btrfs_drop_extents_args drop_args = { 0 };
        int ret;
 
        path = btrfs_alloc_path();
@@ -2537,18 +2604,20 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
         * the caller is expected to unpin it and allow it to be merged
         * with the others.
         */
-       ret = __btrfs_drop_extents(trans, root, inode, path, file_pos,
-                                  file_pos + num_bytes, NULL, 0,
-                                  1, sizeof(*stack_fi), &extent_inserted);
+       drop_args.path = path;
+       drop_args.start = file_pos;
+       drop_args.end = file_pos + num_bytes;
+       drop_args.replace_extent = true;
+       drop_args.extent_item_size = sizeof(*stack_fi);
+       ret = btrfs_drop_extents(trans, root, inode, &drop_args);
        if (ret)
                goto out;
 
-       if (!extent_inserted) {
+       if (!drop_args.extent_inserted) {
                ins.objectid = btrfs_ino(inode);
                ins.offset = file_pos;
                ins.type = BTRFS_EXTENT_DATA_KEY;
 
-               path->leave_spinning = 1;
                ret = btrfs_insert_empty_item(trans, root, path, &ins,
                                              sizeof(*stack_fi));
                if (ret)
@@ -2563,7 +2632,24 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(leaf);
        btrfs_release_path(path);
 
-       inode_add_bytes(&inode->vfs_inode, num_bytes);
+       /*
+        * If we dropped an inline extent here, we know the range where it is
+        * was not marked with the EXTENT_DELALLOC_NEW bit, so we update the
+        * number of bytes only for that range contaning the inline extent.
+        * The remaining of the range will be processed when clearning the
+        * EXTENT_DELALLOC_BIT bit through the ordered extent completion.
+        */
+       if (file_pos == 0 && !IS_ALIGNED(drop_args.bytes_found, sectorsize)) {
+               u64 inline_size = round_down(drop_args.bytes_found, sectorsize);
+
+               inline_size = drop_args.bytes_found - inline_size;
+               btrfs_update_inode_bytes(inode, sectorsize, inline_size);
+               drop_args.bytes_found -= inline_size;
+               num_bytes -= sectorsize;
+       }
+
+       if (update_inode_bytes)
+               btrfs_update_inode_bytes(inode, num_bytes, drop_args.bytes_found);
 
        ins.objectid = disk_bytenr;
        ins.offset = disk_num_bytes;
@@ -2601,6 +2687,7 @@ static int insert_ordered_extent_file_extent(struct btrfs_trans_handle *trans,
 {
        struct btrfs_file_extent_item stack_fi;
        u64 logical_len;
+       bool update_inode_bytes;
 
        memset(&stack_fi, 0, sizeof(stack_fi));
        btrfs_set_stack_file_extent_type(&stack_fi, BTRFS_FILE_EXTENT_REG);
@@ -2616,9 +2703,18 @@ static int insert_ordered_extent_file_extent(struct btrfs_trans_handle *trans,
        btrfs_set_stack_file_extent_compression(&stack_fi, oe->compress_type);
        /* Encryption and other encoding is reserved and all 0 */
 
+       /*
+        * For delalloc, when completing an ordered extent we update the inode's
+        * bytes when clearing the range in the inode's io tree, so pass false
+        * as the argument 'update_inode_bytes' to insert_reserved_file_extent(),
+        * except if the ordered extent was truncated.
+        */
+       update_inode_bytes = test_bit(BTRFS_ORDERED_DIRECT, &oe->flags) ||
+                            test_bit(BTRFS_ORDERED_TRUNCATED, &oe->flags);
+
        return insert_reserved_file_extent(trans, BTRFS_I(oe->inode),
                                           oe->file_offset, &stack_fi,
-                                          oe->qgroup_rsv);
+                                          update_inode_bytes, oe->qgroup_rsv);
 }
 
 /*
@@ -2628,11 +2724,11 @@ static int insert_ordered_extent_file_extent(struct btrfs_trans_handle *trans,
  */
 static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 {
-       struct inode *inode = ordered_extent->inode;
-       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct btrfs_inode *inode = BTRFS_I(ordered_extent->inode);
+       struct btrfs_root *root = inode->root;
+       struct btrfs_fs_info *fs_info = root->fs_info;
        struct btrfs_trans_handle *trans = NULL;
-       struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+       struct extent_io_tree *io_tree = &inode->io_tree;
        struct extent_state *cached_state = NULL;
        u64 start, end;
        int compress_type = 0;
@@ -2640,10 +2736,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
        u64 logical_len = ordered_extent->num_bytes;
        bool freespace_inode;
        bool truncated = false;
-       bool range_locked = false;
-       bool clear_new_delalloc_bytes = false;
        bool clear_reserved_extent = true;
-       unsigned int clear_bits;
+       unsigned int clear_bits = EXTENT_DEFRAG;
 
        start = ordered_extent->file_offset;
        end = start + ordered_extent->num_bytes - 1;
@@ -2651,16 +2745,16 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
        if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
            !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags) &&
            !test_bit(BTRFS_ORDERED_DIRECT, &ordered_extent->flags))
-               clear_new_delalloc_bytes = true;
+               clear_bits |= EXTENT_DELALLOC_NEW;
 
-       freespace_inode = btrfs_is_free_space_inode(BTRFS_I(inode));
+       freespace_inode = btrfs_is_free_space_inode(inode);
 
        if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) {
                ret = -EIO;
                goto out;
        }
 
-       btrfs_free_io_failure_record(BTRFS_I(inode), start, end);
+       btrfs_free_io_failure_record(inode, start, end);
 
        if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
                truncated = true;
@@ -2683,14 +2777,14 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
                        trans = NULL;
                        goto out;
                }
-               trans->block_rsv = &BTRFS_I(inode)->block_rsv;
+               trans->block_rsv = &inode->block_rsv;
                ret = btrfs_update_inode_fallback(trans, root, inode);
                if (ret) /* -ENOMEM or corruption */
                        btrfs_abort_transaction(trans, ret);
                goto out;
        }
 
-       range_locked = true;
+       clear_bits |= EXTENT_LOCKED;
        lock_extent_bits(io_tree, start, end, &cached_state);
 
        if (freespace_inode)
@@ -2703,13 +2797,13 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
                goto out;
        }
 
-       trans->block_rsv = &BTRFS_I(inode)->block_rsv;
+       trans->block_rsv = &inode->block_rsv;
 
        if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
                compress_type = ordered_extent->compress_type;
        if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
                BUG_ON(compress_type);
-               ret = btrfs_mark_extent_written(trans, BTRFS_I(inode),
+               ret = btrfs_mark_extent_written(trans, inode,
                                                ordered_extent->file_offset,
                                                ordered_extent->file_offset +
                                                logical_len);
@@ -2723,8 +2817,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
                                                ordered_extent->disk_num_bytes);
                }
        }
-       unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
-                          ordered_extent->file_offset,
+       unpin_extent_cache(&inode->extent_tree, ordered_extent->file_offset,
                           ordered_extent->num_bytes, trans->transid);
        if (ret < 0) {
                btrfs_abort_transaction(trans, ret);
@@ -2737,6 +2830,17 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
                goto out;
        }
 
+       /*
+        * If this is a new delalloc range, clear its new delalloc flag to
+        * update the inode's number of bytes. This needs to be done first
+        * before updating the inode item.
+        */
+       if ((clear_bits & EXTENT_DELALLOC_NEW) &&
+           !test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags))
+               clear_extent_bit(&inode->io_tree, start, end,
+                                EXTENT_DELALLOC_NEW | EXTENT_ADD_INODE_BYTES,
+                                0, 0, &cached_state);
+
        btrfs_inode_safe_disk_i_size_write(inode, 0);
        ret = btrfs_update_inode_fallback(trans, root, inode);
        if (ret) { /* -ENOMEM or corruption */
@@ -2745,12 +2849,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
        }
        ret = 0;
 out:
-       clear_bits = EXTENT_DEFRAG;
-       if (range_locked)
-               clear_bits |= EXTENT_LOCKED;
-       if (clear_new_delalloc_bytes)
-               clear_bits |= EXTENT_DELALLOC_NEW;
-       clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, clear_bits,
+       clear_extent_bit(&inode->io_tree, start, end, clear_bits,
                         (clear_bits & EXTENT_LOCKED) ? 1 : 0, 0,
                         &cached_state);
 
@@ -2765,7 +2864,7 @@ out:
                clear_extent_uptodate(io_tree, unwritten_start, end, NULL);
 
                /* Drop the cache for the part of the extent we didn't write. */
-               btrfs_drop_extent_cache(BTRFS_I(inode), unwritten_start, end, 0);
+               btrfs_drop_extent_cache(inode, unwritten_start, end, 0);
 
                /*
                 * If the ordered extent had an IOERR or something else went
@@ -2800,7 +2899,7 @@ out:
         * This needs to be done to make sure anybody waiting knows we are done
         * updating everything for this ordered extent.
         */
-       btrfs_remove_ordered_extent(BTRFS_I(inode), ordered_extent);
+       btrfs_remove_ordered_extent(inode, ordered_extent);
 
        /* once for us */
        btrfs_put_ordered_extent(ordered_extent);
@@ -2841,18 +2940,32 @@ void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start,
        btrfs_queue_work(wq, &ordered_extent->work);
 }
 
+/*
+ * check_data_csum - verify checksum of one sector of uncompressed data
+ * @inode:     inode
+ * @io_bio:    btrfs_io_bio which contains the csum
+ * @bio_offset:        offset to the beginning of the bio (in bytes)
+ * @page:      page where is the data to be verified
+ * @pgoff:     offset inside the page
+ *
+ * The length of such check is always one sector size.
+ */
 static int check_data_csum(struct inode *inode, struct btrfs_io_bio *io_bio,
-                          int icsum, struct page *page, int pgoff, u64 start,
-                          size_t len)
+                          u32 bio_offset, struct page *page, u32 pgoff)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
        char *kaddr;
-       u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
+       u32 len = fs_info->sectorsize;
+       const u32 csum_size = fs_info->csum_size;
+       unsigned int offset_sectors;
        u8 *csum_expected;
        u8 csum[BTRFS_CSUM_SIZE];
 
-       csum_expected = ((u8 *)io_bio->csum) + icsum * csum_size;
+       ASSERT(pgoff + len <= PAGE_SIZE);
+
+       offset_sectors = bio_offset >> fs_info->sectorsize_bits;
+       csum_expected = ((u8 *)io_bio->csum) + offset_sectors * csum_size;
 
        kaddr = kmap_atomic(page);
        shash->tfm = fs_info->csum_shash;
@@ -2865,8 +2978,8 @@ static int check_data_csum(struct inode *inode, struct btrfs_io_bio *io_bio,
        kunmap_atomic(kaddr);
        return 0;
 zeroit:
-       btrfs_print_data_csum_error(BTRFS_I(inode), start, csum, csum_expected,
-                                   io_bio->mirror_num);
+       btrfs_print_data_csum_error(BTRFS_I(inode), page_offset(page) + pgoff,
+                                   csum, csum_expected, io_bio->mirror_num);
        if (io_bio->device)
                btrfs_dev_stat_inc_and_print(io_bio->device,
                                             BTRFS_DEV_STAT_CORRUPTION_ERRS);
@@ -2877,17 +2990,23 @@ zeroit:
 }
 
 /*
- * when reads are done, we need to check csums to verify the data is correct
+ * When reads are done, we need to check csums to verify the data is correct.
  * if there's a match, we allow the bio to finish.  If not, the code in
  * extent_io.c will try to find good copies for us.
+ *
+ * @bio_offset:        offset to the beginning of the bio (in bytes)
+ * @start:     file offset of the range start
+ * @end:       file offset of the range end (inclusive)
+ * @mirror:    mirror number
  */
-int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u64 phy_offset,
+int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u32 bio_offset,
                           struct page *page, u64 start, u64 end, int mirror)
 {
-       size_t offset = start - page_offset(page);
        struct inode *inode = page->mapping->host;
        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        struct btrfs_root *root = BTRFS_I(inode)->root;
+       const u32 sectorsize = root->fs_info->sectorsize;
+       u32 pg_off;
 
        if (PageChecked(page)) {
                ClearPageChecked(page);
@@ -2897,15 +3016,27 @@ int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u64 phy_offset,
        if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
                return 0;
 
+       if (!root->fs_info->csum_root)
+               return 0;
+
        if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
            test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
                clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM);
                return 0;
        }
 
-       phy_offset >>= inode->i_sb->s_blocksize_bits;
-       return check_data_csum(inode, io_bio, phy_offset, page, offset, start,
-                              (size_t)(end - start + 1));
+       ASSERT(page_offset(page) <= start &&
+              end <= page_offset(page) + PAGE_SIZE - 1);
+       for (pg_off = offset_in_page(start);
+            pg_off < offset_in_page(end);
+            pg_off += sectorsize, bio_offset += sectorsize) {
+               int ret;
+
+               ret = check_data_csum(inode, io_bio, bio_offset, page, pg_off);
+               if (ret < 0)
+                       return -EIO;
+       }
+       return 0;
 }
 
 /*
@@ -3515,7 +3646,8 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
  * copy everything in the in-memory inode into the btree.
  */
 static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
-                               struct btrfs_root *root, struct inode *inode)
+                               struct btrfs_root *root,
+                               struct btrfs_inode *inode)
 {
        struct btrfs_inode_item *inode_item;
        struct btrfs_path *path;
@@ -3526,9 +3658,7 @@ static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
        if (!path)
                return -ENOMEM;
 
-       path->leave_spinning = 1;
-       ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location,
-                                1);
+       ret = btrfs_lookup_inode(trans, root, path, &inode->location, 1);
        if (ret) {
                if (ret > 0)
                        ret = -ENOENT;
@@ -3539,9 +3669,9 @@ static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
        inode_item = btrfs_item_ptr(leaf, path->slots[0],
                                    struct btrfs_inode_item);
 
-       fill_inode_item(trans, leaf, inode_item, inode);
+       fill_inode_item(trans, leaf, inode_item, &inode->vfs_inode);
        btrfs_mark_buffer_dirty(leaf);
-       btrfs_set_inode_last_trans(trans, BTRFS_I(inode));
+       btrfs_set_inode_last_trans(trans, inode);
        ret = 0;
 failed:
        btrfs_free_path(path);
@@ -3552,7 +3682,8 @@ failed:
  * copy everything in the in-memory inode into the btree.
  */
 noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
-                               struct btrfs_root *root, struct inode *inode)
+                               struct btrfs_root *root,
+                               struct btrfs_inode *inode)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
        int ret;
@@ -3564,23 +3695,22 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
         * The data relocation inode should also be directly updated
         * without delay
         */
-       if (!btrfs_is_free_space_inode(BTRFS_I(inode))
+       if (!btrfs_is_free_space_inode(inode)
            && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
            && !test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
                btrfs_update_root_times(trans, root);
 
                ret = btrfs_delayed_update_inode(trans, root, inode);
                if (!ret)
-                       btrfs_set_inode_last_trans(trans, BTRFS_I(inode));
+                       btrfs_set_inode_last_trans(trans, inode);
                return ret;
        }
 
        return btrfs_update_inode_item(trans, root, inode);
 }
 
-noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
-                                        struct btrfs_root *root,
-                                        struct inode *inode)
+int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root, struct btrfs_inode *inode)
 {
        int ret;
 
@@ -3615,7 +3745,6 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
                goto out;
        }
 
-       path->leave_spinning = 1;
        di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
                                    name, name_len, -1);
        if (IS_ERR_OR_NULL(di)) {
@@ -3695,7 +3824,7 @@ err:
        inode_inc_iversion(&dir->vfs_inode);
        inode->vfs_inode.i_ctime = dir->vfs_inode.i_mtime =
                dir->vfs_inode.i_ctime = current_time(&inode->vfs_inode);
-       ret = btrfs_update_inode(trans, root, &dir->vfs_inode);
+       ret = btrfs_update_inode(trans, root, dir);
 out:
        return ret;
 }
@@ -3709,7 +3838,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
        ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
        if (!ret) {
                drop_nlink(&inode->vfs_inode);
-               ret = btrfs_update_inode(trans, root, &inode->vfs_inode);
+               ret = btrfs_update_inode(trans, root, inode);
        }
        return ret;
 }
@@ -3858,7 +3987,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
        btrfs_i_size_write(BTRFS_I(dir), dir->i_size - name_len * 2);
        inode_inc_iversion(dir);
        dir->i_mtime = dir->i_ctime = current_time(dir);
-       ret = btrfs_update_inode_fallback(trans, root, dir);
+       ret = btrfs_update_inode_fallback(trans, root, BTRFS_I(dir));
        if (ret)
                btrfs_abort_transaction(trans, ret);
 out:
@@ -3995,7 +4124,6 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
        struct btrfs_block_rsv block_rsv;
        u64 root_flags;
        int ret;
-       int err;
 
        /*
         * Don't allow to delete a subvolume with send in progress. This is
@@ -4017,8 +4145,8 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
 
        down_write(&fs_info->subvol_sem);
 
-       err = may_destroy_subvol(dest);
-       if (err)
+       ret = may_destroy_subvol(dest);
+       if (ret)
                goto out_up_write;
 
        btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
@@ -4027,13 +4155,13 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
         * two for dir entries,
         * two for root ref/backref.
         */
-       err = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
-       if (err)
+       ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
+       if (ret)
                goto out_up_write;
 
        trans = btrfs_start_transaction(root, 0);
        if (IS_ERR(trans)) {
-               err = PTR_ERR(trans);
+               ret = PTR_ERR(trans);
                goto out_release;
        }
        trans->block_rsv = &block_rsv;
@@ -4043,7 +4171,6 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
 
        ret = btrfs_unlink_subvol(trans, dir, dentry);
        if (ret) {
-               err = ret;
                btrfs_abort_transaction(trans, ret);
                goto out_end_trans;
        }
@@ -4052,7 +4179,7 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
 
        memset(&dest->root_item.drop_progress, 0,
                sizeof(dest->root_item.drop_progress));
-       dest->root_item.drop_level = 0;
+       btrfs_set_root_drop_level(&dest->root_item, 0);
        btrfs_set_root_refs(&dest->root_item, 0);
 
        if (!test_and_set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &dest->state)) {
@@ -4061,7 +4188,6 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
                                        dest->root_key.objectid);
                if (ret) {
                        btrfs_abort_transaction(trans, ret);
-                       err = ret;
                        goto out_end_trans;
                }
        }
@@ -4071,7 +4197,6 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
                                  dest->root_key.objectid);
        if (ret && ret != -ENOENT) {
                btrfs_abort_transaction(trans, ret);
-               err = ret;
                goto out_end_trans;
        }
        if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) {
@@ -4081,7 +4206,6 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
                                          dest->root_key.objectid);
                if (ret && ret != -ENOENT) {
                        btrfs_abort_transaction(trans, ret);
-                       err = ret;
                        goto out_end_trans;
                }
        }
@@ -4092,14 +4216,12 @@ out_end_trans:
        trans->block_rsv = NULL;
        trans->bytes_reserved = 0;
        ret = btrfs_end_transaction(trans);
-       if (ret && !err)
-               err = ret;
        inode->i_flags |= S_DEAD;
 out_release:
        btrfs_subvolume_release_metadata(root, &block_rsv);
 out_up_write:
        up_write(&fs_info->subvol_sem);
-       if (err) {
+       if (ret) {
                spin_lock(&dest->root_item_lock);
                root_flags = btrfs_root_flags(&dest->root_item);
                btrfs_set_root_flags(&dest->root_item,
@@ -4109,15 +4231,9 @@ out_up_write:
                d_invalidate(dentry);
                btrfs_prune_dentries(dest);
                ASSERT(dest->send_in_progress == 0);
-
-               /* the last ref */
-               if (dest->ino_cache_inode) {
-                       iput(dest->ino_cache_inode);
-                       dest->ino_cache_inode = NULL;
-               }
        }
 
-       return err;
+       return ret;
 }
 
 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
@@ -4194,7 +4310,7 @@ out:
  */
 int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
                               struct btrfs_root *root,
-                              struct inode *inode,
+                              struct btrfs_inode *inode,
                               u64 new_size, u32 min_type)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
@@ -4215,7 +4331,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
        int pending_del_slot = 0;
        int extent_type = -1;
        int ret;
-       u64 ino = btrfs_ino(BTRFS_I(inode));
+       u64 ino = btrfs_ino(inode);
        u64 bytes_deleted = 0;
        bool be_nice = false;
        bool should_throttle = false;
@@ -4229,7 +4345,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
         * off from time to time.  This means all inodes in subvolume roots,
         * reloc roots, and data reloc roots.
         */
-       if (!btrfs_is_free_space_inode(BTRFS_I(inode)) &&
+       if (!btrfs_is_free_space_inode(inode) &&
            test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
                be_nice = true;
 
@@ -4239,7 +4355,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
        path->reada = READA_BACK;
 
        if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
-               lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, (u64)-1,
+               lock_extent_bits(&inode->io_tree, lock_start, (u64)-1,
                                 &cached_state);
 
                /*
@@ -4247,7 +4363,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
                 * new size is not block aligned since we will be keeping the
                 * last block of the extent just the way it is.
                 */
-               btrfs_drop_extent_cache(BTRFS_I(inode), ALIGN(new_size,
+               btrfs_drop_extent_cache(inode, ALIGN(new_size,
                                        fs_info->sectorsize),
                                        (u64)-1, 0);
        }
@@ -4258,8 +4374,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
         * it is used to drop the logged items. So we shouldn't kill the delayed
         * items.
         */
-       if (min_type == 0 && root == BTRFS_I(inode)->root)
-               btrfs_kill_delayed_inode_items(BTRFS_I(inode));
+       if (min_type == 0 && root == inode->root)
+               btrfs_kill_delayed_inode_items(inode);
 
        key.objectid = ino;
        key.offset = (u64)-1;
@@ -4315,14 +4431,13 @@ search_again:
                                    btrfs_file_extent_num_bytes(leaf, fi);
 
                                trace_btrfs_truncate_show_fi_regular(
-                                       BTRFS_I(inode), leaf, fi,
-                                       found_key.offset);
+                                       inode, leaf, fi, found_key.offset);
                        } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
                                item_end += btrfs_file_extent_ram_bytes(leaf,
                                                                        fi);
 
                                trace_btrfs_truncate_show_fi_inline(
-                                       BTRFS_I(inode), leaf, fi, path->slots[0],
+                                       inode, leaf, fi, path->slots[0],
                                        found_key.offset);
                        }
                        item_end--;
@@ -4361,7 +4476,8 @@ search_again:
                                if (test_bit(BTRFS_ROOT_SHAREABLE,
                                             &root->state) &&
                                    extent_start != 0)
-                                       inode_sub_bytes(inode, num_dec);
+                                       inode_sub_bytes(&inode->vfs_inode,
+                                                       num_dec);
                                btrfs_mark_buffer_dirty(leaf);
                        } else {
                                extent_num_bytes =
@@ -4376,7 +4492,8 @@ search_again:
                                        found_extent = 1;
                                        if (test_bit(BTRFS_ROOT_SHAREABLE,
                                                     &root->state))
-                                               inode_sub_bytes(inode, num_dec);
+                                               inode_sub_bytes(&inode->vfs_inode,
+                                                               num_dec);
                                }
                        }
                        clear_len = num_dec;
@@ -4411,7 +4528,8 @@ search_again:
                        }
 
                        if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
-                               inode_sub_bytes(inode, item_end + 1 - new_size);
+                               inode_sub_bytes(&inode->vfs_inode,
+                                               item_end + 1 - new_size);
                }
 delete:
                /*
@@ -4419,8 +4537,8 @@ delete:
                 * multiple fsyncs, and in this case we don't want to clear the
                 * file extent range because it's just the log.
                 */
-               if (root == BTRFS_I(inode)->root) {
-                       ret = btrfs_inode_clear_file_extent_range(BTRFS_I(inode),
+               if (root == inode->root) {
+                       ret = btrfs_inode_clear_file_extent_range(inode,
                                                  clear_start, clear_len);
                        if (ret) {
                                btrfs_abort_transaction(trans, ret);
@@ -4529,8 +4647,8 @@ out:
                if (!ret && last_size > new_size)
                        last_size = new_size;
                btrfs_inode_safe_disk_i_size_write(inode, last_size);
-               unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start,
-                                    (u64)-1, &cached_state);
+               unlock_extent_cached(&inode->io_tree, lock_start, (u64)-1,
+                                    &cached_state);
        }
 
        btrfs_free_path(path);
@@ -4548,12 +4666,12 @@ out:
  * This will find the block for the "from" offset and cow the block and zero the
  * part we want to zero.  This is used with truncate and hole punching.
  */
-int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
-                       int front)
+int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
+                        int front)
 {
-       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-       struct address_space *mapping = inode->i_mapping;
-       struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+       struct btrfs_fs_info *fs_info = inode->root->fs_info;
+       struct address_space *mapping = inode->vfs_inode.i_mapping;
+       struct extent_io_tree *io_tree = &inode->io_tree;
        struct btrfs_ordered_extent *ordered;
        struct extent_state *cached_state = NULL;
        struct extent_changeset *data_reserved = NULL;
@@ -4576,30 +4694,29 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
        block_start = round_down(from, blocksize);
        block_end = block_start + blocksize - 1;
 
-       ret = btrfs_check_data_free_space(BTRFS_I(inode), &data_reserved,
-                                         block_start, blocksize);
+       ret = btrfs_check_data_free_space(inode, &data_reserved, block_start,
+                                         blocksize);
        if (ret < 0) {
-               if (btrfs_check_nocow_lock(BTRFS_I(inode), block_start,
-                                          &write_bytes) > 0) {
+               if (btrfs_check_nocow_lock(inode, block_start, &write_bytes) > 0) {
                        /* For nocow case, no need to reserve data space */
                        only_release_metadata = true;
                } else {
                        goto out;
                }
        }
-       ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), blocksize);
+       ret = btrfs_delalloc_reserve_metadata(inode, blocksize);
        if (ret < 0) {
                if (!only_release_metadata)
-                       btrfs_free_reserved_data_space(BTRFS_I(inode),
-                                       data_reserved, block_start, blocksize);
+                       btrfs_free_reserved_data_space(inode, data_reserved,
+                                                      block_start, blocksize);
                goto out;
        }
 again:
        page = find_or_create_page(mapping, index, mask);
        if (!page) {
-               btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
-                                            block_start, blocksize, true);
-               btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
+               btrfs_delalloc_release_space(inode, data_reserved, block_start,
+                                            blocksize, true);
+               btrfs_delalloc_release_extents(inode, blocksize);
                ret = -ENOMEM;
                goto out;
        }
@@ -4622,7 +4739,7 @@ again:
        lock_extent_bits(io_tree, block_start, block_end, &cached_state);
        set_page_extent_mapped(page);
 
-       ordered = btrfs_lookup_ordered_extent(BTRFS_I(inode), block_start);
+       ordered = btrfs_lookup_ordered_extent(inode, block_start);
        if (ordered) {
                unlock_extent_cached(io_tree, block_start, block_end,
                                     &cached_state);
@@ -4633,11 +4750,11 @@ again:
                goto again;
        }
 
-       clear_extent_bit(&BTRFS_I(inode)->io_tree, block_start, block_end,
+       clear_extent_bit(&inode->io_tree, block_start, block_end,
                         EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
                         0, 0, &cached_state);
 
-       ret = btrfs_set_extent_delalloc(BTRFS_I(inode), block_start, block_end, 0,
+       ret = btrfs_set_extent_delalloc(inode, block_start, block_end, 0,
                                        &cached_state);
        if (ret) {
                unlock_extent_cached(io_tree, block_start, block_end,
@@ -4663,34 +4780,33 @@ again:
        unlock_extent_cached(io_tree, block_start, block_end, &cached_state);
 
        if (only_release_metadata)
-               set_extent_bit(&BTRFS_I(inode)->io_tree, block_start,
-                               block_end, EXTENT_NORESERVE, NULL, NULL,
-                               GFP_NOFS);
+               set_extent_bit(&inode->io_tree, block_start, block_end,
+                              EXTENT_NORESERVE, 0, NULL, NULL, GFP_NOFS, NULL);
 
 out_unlock:
        if (ret) {
                if (only_release_metadata)
-                       btrfs_delalloc_release_metadata(BTRFS_I(inode),
-                                       blocksize, true);
+                       btrfs_delalloc_release_metadata(inode, blocksize, true);
                else
-                       btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
+                       btrfs_delalloc_release_space(inode, data_reserved,
                                        block_start, blocksize, true);
        }
-       btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
+       btrfs_delalloc_release_extents(inode, blocksize);
        unlock_page(page);
        put_page(page);
 out:
        if (only_release_metadata)
-               btrfs_check_nocow_unlock(BTRFS_I(inode));
+               btrfs_check_nocow_unlock(inode);
        extent_changeset_free(data_reserved);
        return ret;
 }
 
-static int maybe_insert_hole(struct btrfs_root *root, struct inode *inode,
+static int maybe_insert_hole(struct btrfs_root *root, struct btrfs_inode *inode,
                             u64 offset, u64 len)
 {
-       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       struct btrfs_fs_info *fs_info = root->fs_info;
        struct btrfs_trans_handle *trans;
+       struct btrfs_drop_extents_args drop_args = { 0 };
        int ret;
 
        /*
@@ -4698,9 +4814,9 @@ static int maybe_insert_hole(struct btrfs_root *root, struct inode *inode,
         * that any holes get logged if we fsync.
         */
        if (btrfs_fs_incompat(fs_info, NO_HOLES)) {
-               BTRFS_I(inode)->last_trans = fs_info->generation;
-               BTRFS_I(inode)->last_sub_trans = root->log_transid;
-               BTRFS_I(inode)->last_log_commit = root->last_log_commit;
+               inode->last_trans = fs_info->generation;
+               inode->last_sub_trans = root->log_transid;
+               inode->last_log_commit = root->last_log_commit;
                return 0;
        }
 
@@ -4713,19 +4829,25 @@ static int maybe_insert_hole(struct btrfs_root *root, struct inode *inode,
        if (IS_ERR(trans))
                return PTR_ERR(trans);
 
-       ret = btrfs_drop_extents(trans, root, inode, offset, offset + len, 1);
+       drop_args.start = offset;
+       drop_args.end = offset + len;
+       drop_args.drop_cache = true;
+
+       ret = btrfs_drop_extents(trans, root, inode, &drop_args);
        if (ret) {
                btrfs_abort_transaction(trans, ret);
                btrfs_end_transaction(trans);
                return ret;
        }
 
-       ret = btrfs_insert_file_extent(trans, root, btrfs_ino(BTRFS_I(inode)),
+       ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode),
                        offset, 0, 0, len, 0, len, 0, 0, 0);
-       if (ret)
+       if (ret) {
                btrfs_abort_transaction(trans, ret);
-       else
+       } else {
+               btrfs_update_inode_bytes(inode, 0, drop_args.bytes_found);
                btrfs_update_inode(trans, root, inode);
+       }
        btrfs_end_transaction(trans);
        return ret;
 }
@@ -4736,14 +4858,14 @@ static int maybe_insert_hole(struct btrfs_root *root, struct inode *inode,
  * these file extents so that btrfs_get_extent will return a EXTENT_MAP_HOLE for
  * the range between oldsize and size
  */
-int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
+int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size)
 {
-       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-       struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+       struct btrfs_root *root = inode->root;
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct extent_io_tree *io_tree = &inode->io_tree;
        struct extent_map *em = NULL;
        struct extent_state *cached_state = NULL;
-       struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+       struct extent_map_tree *em_tree = &inode->extent_tree;
        u64 hole_start = ALIGN(oldsize, fs_info->sectorsize);
        u64 block_end = ALIGN(size, fs_info->sectorsize);
        u64 last_byte;
@@ -4763,11 +4885,11 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
        if (size <= hole_start)
                return 0;
 
-       btrfs_lock_and_flush_ordered_range(BTRFS_I(inode), hole_start,
-                                          block_end - 1, &cached_state);
+       btrfs_lock_and_flush_ordered_range(inode, hole_start, block_end - 1,
+                                          &cached_state);
        cur_offset = hole_start;
        while (1) {
-               em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
+               em = btrfs_get_extent(inode, NULL, 0, cur_offset,
                                      block_end - cur_offset);
                if (IS_ERR(em)) {
                        err = PTR_ERR(em);
@@ -4786,17 +4908,17 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
                        if (err)
                                break;
 
-                       err = btrfs_inode_set_file_extent_range(BTRFS_I(inode),
+                       err = btrfs_inode_set_file_extent_range(inode,
                                                        cur_offset, hole_size);
                        if (err)
                                break;
 
-                       btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset,
+                       btrfs_drop_extent_cache(inode, cur_offset,
                                                cur_offset + hole_size - 1, 0);
                        hole_em = alloc_extent_map();
                        if (!hole_em) {
                                set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
-                                       &BTRFS_I(inode)->runtime_flags);
+                                       &inode->runtime_flags);
                                goto next;
                        }
                        hole_em->start = cur_offset;
@@ -4816,14 +4938,13 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
                                write_unlock(&em_tree->lock);
                                if (err != -EEXIST)
                                        break;
-                               btrfs_drop_extent_cache(BTRFS_I(inode),
-                                                       cur_offset,
+                               btrfs_drop_extent_cache(inode, cur_offset,
                                                        cur_offset +
                                                        hole_size - 1, 0);
                        }
                        free_extent_map(hole_em);
                } else {
-                       err = btrfs_inode_set_file_extent_range(BTRFS_I(inode),
+                       err = btrfs_inode_set_file_extent_range(inode,
                                                        cur_offset, hole_size);
                        if (err)
                                break;
@@ -4871,7 +4992,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
                 * this truncation.
                 */
                btrfs_drew_write_lock(&root->snapshot_lock);
-               ret = btrfs_cont_expand(inode, oldsize, newsize);
+               ret = btrfs_cont_expand(BTRFS_I(inode), oldsize, newsize);
                if (ret) {
                        btrfs_drew_write_unlock(&root->snapshot_lock);
                        return ret;
@@ -4884,9 +5005,9 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
                }
 
                i_size_write(inode, newsize);
-               btrfs_inode_safe_disk_i_size_write(inode, 0);
+               btrfs_inode_safe_disk_i_size_write(BTRFS_I(inode), 0);
                pagecache_isize_extended(inode, oldsize, newsize);
-               ret = btrfs_update_inode(trans, root, inode);
+               ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
                btrfs_drew_write_unlock(&root->snapshot_lock);
                btrfs_end_transaction(trans);
        } else {
@@ -5157,7 +5278,8 @@ void btrfs_evict_inode(struct inode *inode)
 
                trans->block_rsv = rsv;
 
-               ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
+               ret = btrfs_truncate_inode_items(trans, root, BTRFS_I(inode),
+                                                0, 0);
                trans->block_rsv = &fs_info->trans_block_rsv;
                btrfs_end_transaction(trans);
                btrfs_btree_balance_dirty(fs_info);
@@ -5184,10 +5306,6 @@ void btrfs_evict_inode(struct inode *inode)
                btrfs_end_transaction(trans);
        }
 
-       if (!(root == fs_info->tree_root ||
-             root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID))
-               btrfs_return_ino(root, btrfs_ino(BTRFS_I(inode)));
-
 free_rsv:
        btrfs_free_block_rsv(fs_info, rsv);
 no_delete:
@@ -5797,7 +5915,7 @@ static int btrfs_dirty_inode(struct inode *inode)
        if (IS_ERR(trans))
                return PTR_ERR(trans);
 
-       ret = btrfs_update_inode(trans, root, inode);
+       ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
        if (ret && ret == -ENOSPC) {
                /* whoops, lets try again with the full transaction */
                btrfs_end_transaction(trans);
@@ -5805,7 +5923,7 @@ static int btrfs_dirty_inode(struct inode *inode)
                if (IS_ERR(trans))
                        return PTR_ERR(trans);
 
-               ret = btrfs_update_inode(trans, root, inode);
+               ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
        }
        btrfs_end_transaction(trans);
        if (BTRFS_I(inode)->delayed_node)
@@ -6068,7 +6186,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
                goto fail;
        }
 
-       path->leave_spinning = 1;
        ret = btrfs_insert_empty_items(trans, root, path, key, sizes, nitems);
        if (ret != 0)
                goto fail_unlock;
@@ -6194,7 +6311,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
                parent_inode->vfs_inode.i_mtime = now;
                parent_inode->vfs_inode.i_ctime = now;
        }
-       ret = btrfs_update_inode(trans, root, &parent_inode->vfs_inode);
+       ret = btrfs_update_inode(trans, root, parent_inode);
        if (ret)
                btrfs_abort_transaction(trans, ret);
        return ret;
@@ -6254,7 +6371,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
        if (IS_ERR(trans))
                return PTR_ERR(trans);
 
-       err = btrfs_find_free_ino(root, &objectid);
+       err = btrfs_find_free_objectid(root, &objectid);
        if (err)
                goto out_unlock;
 
@@ -6285,7 +6402,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
        if (err)
                goto out_unlock;
 
-       btrfs_update_inode(trans, root, inode);
+       btrfs_update_inode(trans, root, BTRFS_I(inode));
        d_instantiate_new(dentry, inode);
 
 out_unlock:
@@ -6318,7 +6435,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
        if (IS_ERR(trans))
                return PTR_ERR(trans);
 
-       err = btrfs_find_free_ino(root, &objectid);
+       err = btrfs_find_free_objectid(root, &objectid);
        if (err)
                goto out_unlock;
 
@@ -6344,7 +6461,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
        if (err)
                goto out_unlock;
 
-       err = btrfs_update_inode(trans, root, inode);
+       err = btrfs_update_inode(trans, root, BTRFS_I(inode));
        if (err)
                goto out_unlock;
 
@@ -6416,7 +6533,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
        } else {
                struct dentry *parent = dentry->d_parent;
 
-               err = btrfs_update_inode(trans, root, inode);
+               err = btrfs_update_inode(trans, root, BTRFS_I(inode));
                if (err)
                        goto fail;
                if (inode->i_nlink == 1) {
@@ -6462,7 +6579,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
        if (IS_ERR(trans))
                return PTR_ERR(trans);
 
-       err = btrfs_find_free_ino(root, &objectid);
+       err = btrfs_find_free_objectid(root, &objectid);
        if (err)
                goto out_fail;
 
@@ -6484,7 +6601,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
                goto out_fail;
 
        btrfs_i_size_write(BTRFS_I(inode), 0);
-       err = btrfs_update_inode(trans, root, inode);
+       err = btrfs_update_inode(trans, root, BTRFS_I(inode));
        if (err)
                goto out_fail;
 
@@ -6621,12 +6738,14 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
        path->reada = READA_FORWARD;
 
        /*
-        * Unless we're going to uncompress the inline extent, no sleep would
-        * happen.
+        * The same explanation in load_free_space_cache applies here as well,
+        * we only read when we're loading the free space cache, and at that
+        * point the commit_root has everything we need.
         */
-       path->leave_spinning = 1;
-
-       path->recurse = btrfs_is_free_space_inode(inode);
+       if (btrfs_is_free_space_inode(inode)) {
+               path->search_commit_root = 1;
+               path->skip_locking = 1;
+       }
 
        ret = btrfs_lookup_file_extent(NULL, root, path, objectid, start, 0);
        if (ret < 0) {
@@ -6728,7 +6847,6 @@ next:
                em->orig_start = em->start;
                ptr = btrfs_file_extent_inline_start(item) + extent_offset;
 
-               btrfs_set_path_blocking(path);
                if (!PageUptodate(page)) {
                        if (btrfs_file_extent_compression(leaf, item) !=
                            BTRFS_COMPRESS_NONE) {
@@ -7377,17 +7495,6 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
        int ret = 0;
        u64 len = length;
        bool unlock_extents = false;
-       bool sync = (current->journal_info == BTRFS_DIO_SYNC_STUB);
-
-       /*
-        * We used current->journal_info here to see if we were sync, but
-        * there's a lot of tests in the enospc machinery to not do flushing if
-        * we have a journal_info set, so we need to clear this out and re-set
-        * it in iomap_end.
-        */
-       ASSERT(current->journal_info == NULL ||
-              current->journal_info == BTRFS_DIO_SYNC_STUB);
-       current->journal_info = NULL;
 
        if (!write)
                len = min_t(u64, len, fs_info->sectorsize);
@@ -7413,7 +7520,6 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
        if (!dio_data)
                return -ENOMEM;
 
-       dio_data->sync = sync;
        dio_data->length = length;
        if (write) {
                dio_data->reserve = round_up(length, fs_info->sectorsize);
@@ -7561,14 +7667,6 @@ static int btrfs_dio_iomap_end(struct inode *inode, loff_t pos, loff_t length,
                extent_changeset_free(dio_data->data_reserved);
        }
 out:
-       /*
-        * We're all done, we can re-set the current->journal_info now safely
-        * for our endio.
-        */
-       if (dio_data->sync) {
-               ASSERT(current->journal_info == NULL);
-               current->journal_info = BTRFS_DIO_SYNC_STUB;
-       }
        kfree(dio_data);
        iomap->private = NULL;
 
@@ -7632,7 +7730,7 @@ static blk_status_t btrfs_check_read_dio_bio(struct inode *inode,
        struct bio_vec bvec;
        struct bvec_iter iter;
        u64 start = io_bio->logical;
-       int icsum = 0;
+       u32 bio_offset = 0;
        blk_status_t err = BLK_STS_OK;
 
        __bio_for_each_segment(bvec, &io_bio->bio, iter, io_bio->iter) {
@@ -7643,9 +7741,8 @@ static blk_status_t btrfs_check_read_dio_bio(struct inode *inode,
                for (i = 0; i < nr_sectors; i++) {
                        ASSERT(pgoff < PAGE_SIZE);
                        if (uptodate &&
-                           (!csum || !check_data_csum(inode, io_bio, icsum,
-                                                      bvec.bv_page, pgoff,
-                                                      start, sectorsize))) {
+                           (!csum || !check_data_csum(inode, io_bio,
+                                       bio_offset, bvec.bv_page, pgoff))) {
                                clean_io_failure(fs_info, failure_tree, io_tree,
                                                 start, bvec.bv_page,
                                                 btrfs_ino(BTRFS_I(inode)),
@@ -7653,6 +7750,7 @@ static blk_status_t btrfs_check_read_dio_bio(struct inode *inode,
                        } else {
                                blk_status_t status;
 
+                               ASSERT((start - io_bio->logical) < UINT_MAX);
                                status = btrfs_submit_read_repair(inode,
                                                        &io_bio->bio,
                                                        start - io_bio->logical,
@@ -7665,7 +7763,8 @@ static blk_status_t btrfs_check_read_dio_bio(struct inode *inode,
                                        err = status;
                        }
                        start += sectorsize;
-                       icsum++;
+                       ASSERT(bio_offset + sectorsize > bio_offset);
+                       bio_offset += sectorsize;
                        pgoff += sectorsize;
                }
        }
@@ -7715,12 +7814,11 @@ static void __endio_write_update_ordered(struct btrfs_inode *inode,
        }
 }
 
-static blk_status_t btrfs_submit_bio_start_direct_io(void *private_data,
-                                   struct bio *bio, u64 offset)
+static blk_status_t btrfs_submit_bio_start_direct_io(struct inode *inode,
+                                                    struct bio *bio,
+                                                    u64 dio_file_offset)
 {
-       struct inode *inode = private_data;
-
-       return btrfs_csum_one_bio(BTRFS_I(inode), bio, offset, 1);
+       return btrfs_csum_one_bio(BTRFS_I(inode), bio, dio_file_offset, 1);
 }
 
 static void btrfs_end_dio_bio(struct bio *bio)
@@ -7732,8 +7830,7 @@ static void btrfs_end_dio_bio(struct bio *bio)
                btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
                           "direct IO failed ino %llu rw %d,%u sector %#Lx len %u err no %d",
                           btrfs_ino(BTRFS_I(dip->inode)), bio_op(bio),
-                          bio->bi_opf,
-                          (unsigned long long)bio->bi_iter.bi_sector,
+                          bio->bi_opf, bio->bi_iter.bi_sector,
                           bio->bi_iter.bi_size, err);
 
        if (bio_op(bio) == REQ_OP_READ) {
@@ -7770,8 +7867,7 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
                goto map;
 
        if (write && async_submit) {
-               ret = btrfs_wq_submit_bio(fs_info, bio, 0, 0,
-                                         file_offset, inode,
+               ret = btrfs_wq_submit_bio(inode, bio, 0, 0, file_offset,
                                          btrfs_submit_bio_start_direct_io);
                goto err;
        } else if (write) {
@@ -7786,8 +7882,8 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
                u64 csum_offset;
 
                csum_offset = file_offset - dip->logical_offset;
-               csum_offset >>= inode->i_sb->s_blocksize_bits;
-               csum_offset *= btrfs_super_csum_size(fs_info->super_copy);
+               csum_offset >>= fs_info->sectorsize_bits;
+               csum_offset *= fs_info->csum_size;
                btrfs_io_bio(bio)->csum = dip->csums + csum_offset;
        }
 map:
@@ -7812,11 +7908,10 @@ static struct btrfs_dio_private *btrfs_create_dio_private(struct bio *dio_bio,
        dip_size = sizeof(*dip);
        if (!write && csum) {
                struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-               const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
                size_t nblocks;
 
-               nblocks = dio_bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits;
-               dip_size += csum_size * nblocks;
+               nblocks = dio_bio->bi_iter.bi_size >> fs_info->sectorsize_bits;
+               dip_size += fs_info->csum_size * nblocks;
        }
 
        dip = kzalloc(dip_size, GFP_NOFS);
@@ -7826,7 +7921,7 @@ static struct btrfs_dio_private *btrfs_create_dio_private(struct bio *dio_bio,
        dip->inode = inode;
        dip->logical_offset = file_offset;
        dip->bytes = dio_bio->bi_iter.bi_size;
-       dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9;
+       dip->disk_bytenr = dio_bio->bi_iter.bi_sector << 9;
        dip->dio_bio = dio_bio;
        refcount_set(&dip->refs, 1);
        return dip;
@@ -7836,7 +7931,6 @@ static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap,
                struct bio *dio_bio, loff_t file_offset)
 {
        const bool write = (bio_op(dio_bio) == REQ_OP_WRITE);
-       const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM);
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        const bool raid56 = (btrfs_data_alloc_profile(fs_info) &
                             BTRFS_BLOCK_GROUP_RAID56_MASK);
@@ -7863,13 +7957,14 @@ static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap,
                return BLK_QC_T_NONE;
        }
 
-       if (!write && csum) {
+       if (!write) {
                /*
                 * Load the csums up front to reduce csum tree searches and
                 * contention when submitting bios.
+                *
+                * If we have csums disabled this will do nothing.
                 */
-               status = btrfs_lookup_bio_sums(inode, dio_bio, file_offset,
-                                              dip->csums);
+               status = btrfs_lookup_bio_sums(inode, dio_bio, dip->csums);
                if (status != BLK_STS_OK)
                        goto out_err;
        }
@@ -7944,129 +8039,15 @@ out_err:
        return BLK_QC_T_NONE;
 }
 
-static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info,
-                              const struct iov_iter *iter, loff_t offset)
-{
-       int seg;
-       int i;
-       unsigned int blocksize_mask = fs_info->sectorsize - 1;
-       ssize_t retval = -EINVAL;
-
-       if (offset & blocksize_mask)
-               goto out;
-
-       if (iov_iter_alignment(iter) & blocksize_mask)
-               goto out;
-
-       /* If this is a write we don't need to check anymore */
-       if (iov_iter_rw(iter) != READ || !iter_is_iovec(iter))
-               return 0;
-       /*
-        * Check to make sure we don't have duplicate iov_base's in this
-        * iovec, if so return EINVAL, otherwise we'll get csum errors
-        * when reading back.
-        */
-       for (seg = 0; seg < iter->nr_segs; seg++) {
-               for (i = seg + 1; i < iter->nr_segs; i++) {
-                       if (iter->iov[seg].iov_base == iter->iov[i].iov_base)
-                               goto out;
-               }
-       }
-       retval = 0;
-out:
-       return retval;
-}
-
-static inline int btrfs_maybe_fsync_end_io(struct kiocb *iocb, ssize_t size,
-                                          int error, unsigned flags)
-{
-       /*
-        * Now if we're still in the context of our submitter we know we can't
-        * safely run generic_write_sync(), so clear our flag here so that the
-        * caller knows to follow up with a sync.
-        */
-       if (current->journal_info == BTRFS_DIO_SYNC_STUB) {
-               current->journal_info = NULL;
-               return error;
-       }
-
-       if (error)
-               return error;
-
-       if (size) {
-               iocb->ki_flags |= IOCB_DSYNC;
-               return generic_write_sync(iocb, size);
-       }
-
-       return 0;
-}
-
-static const struct iomap_ops btrfs_dio_iomap_ops = {
+const struct iomap_ops btrfs_dio_iomap_ops = {
        .iomap_begin            = btrfs_dio_iomap_begin,
        .iomap_end              = btrfs_dio_iomap_end,
 };
 
-static const struct iomap_dio_ops btrfs_dio_ops = {
+const struct iomap_dio_ops btrfs_dio_ops = {
        .submit_io              = btrfs_submit_direct,
 };
 
-static const struct iomap_dio_ops btrfs_sync_dops = {
-       .submit_io              = btrfs_submit_direct,
-       .end_io                 = btrfs_maybe_fsync_end_io,
-};
-
-ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
-{
-       struct file *file = iocb->ki_filp;
-       struct inode *inode = file->f_mapping->host;
-       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-       struct extent_changeset *data_reserved = NULL;
-       loff_t offset = iocb->ki_pos;
-       size_t count = 0;
-       bool relock = false;
-       ssize_t ret;
-
-       if (check_direct_IO(fs_info, iter, offset))
-               return 0;
-
-       count = iov_iter_count(iter);
-       if (iov_iter_rw(iter) == WRITE) {
-               /*
-                * If the write DIO is beyond the EOF, we need update
-                * the isize, but it is protected by i_mutex. So we can
-                * not unlock the i_mutex at this case.
-                */
-               if (offset + count <= inode->i_size) {
-                       inode_unlock(inode);
-                       relock = true;
-               }
-               down_read(&BTRFS_I(inode)->dio_sem);
-       }
-
-       /*
-        * We have are actually a sync iocb, so we need our fancy endio to know
-        * if we need to sync.
-        */
-       if (current->journal_info)
-               ret = iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops,
-                                  &btrfs_sync_dops, is_sync_kiocb(iocb));
-       else
-               ret = iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops,
-                                  &btrfs_dio_ops, is_sync_kiocb(iocb));
-
-       if (ret == -ENOTBLK)
-               ret = 0;
-
-       if (iov_iter_rw(iter) == WRITE)
-               up_read(&BTRFS_I(inode)->dio_sem);
-
-       if (relock)
-               inode_lock(inode);
-
-       extent_changeset_free(data_reserved);
-       return ret;
-}
-
 static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                        u64 start, u64 len)
 {
@@ -8186,6 +8167,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
        u64 start;
        u64 end;
        int inode_evicting = inode->vfs_inode.i_state & I_FREEING;
+       bool found_ordered = false;
+       bool completed_ordered = false;
 
        /*
         * we have the page locked, so new writeback can't start,
@@ -8207,15 +8190,17 @@ again:
        start = page_start;
        ordered = btrfs_lookup_ordered_range(inode, start, page_end - start + 1);
        if (ordered) {
+               found_ordered = true;
                end = min(page_end,
                          ordered->file_offset + ordered->num_bytes - 1);
                /*
-                * IO on this page will never be started, so we need
-                * to account for any ordered extents now
+                * IO on this page will never be started, so we need to account
+                * for any ordered extents now. Don't clear EXTENT_DELALLOC_NEW
+                * here, must leave that up for the ordered extent completion.
                 */
                if (!inode_evicting)
                        clear_extent_bit(tree, start, end,
-                                        EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
+                                        EXTENT_DELALLOC |
                                         EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
                                         EXTENT_DEFRAG, 1, 0, &cached_state);
                /*
@@ -8237,8 +8222,10 @@ again:
 
                        if (btrfs_dec_test_ordered_pending(inode, &ordered,
                                                           start,
-                                                          end - start + 1, 1))
+                                                          end - start + 1, 1)) {
                                btrfs_finish_ordered_io(ordered);
+                               completed_ordered = true;
+                       }
                }
                btrfs_put_ordered_extent(ordered);
                if (!inode_evicting) {
@@ -8267,10 +8254,23 @@ again:
         */
        btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE);
        if (!inode_evicting) {
+               bool delete = true;
+
+               /*
+                * If there's an ordered extent for this range and we have not
+                * finished it ourselves, we must leave EXTENT_DELALLOC_NEW set
+                * in the range for the ordered extent completion. We must also
+                * not delete the range, otherwise we would lose that bit (and
+                * any other bits set in the range). Make sure EXTENT_UPTODATE
+                * is cleared if we don't delete, otherwise it can lead to
+                * corruptions if the i_size is extented later.
+                */
+               if (found_ordered && !completed_ordered)
+                       delete = false;
                clear_extent_bit(tree, page_start, page_end, EXTENT_LOCKED |
-                                EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
-                                EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1,
-                                &cached_state);
+                                EXTENT_DELALLOC | EXTENT_UPTODATE |
+                                EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1,
+                                delete, &cached_state);
 
                __btrfs_releasepage(page, GFP_NOFS);
        }
@@ -8519,14 +8519,14 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback)
        trans->block_rsv = rsv;
 
        while (1) {
-               ret = btrfs_truncate_inode_items(trans, root, inode,
+               ret = btrfs_truncate_inode_items(trans, root, BTRFS_I(inode),
                                                 inode->i_size,
                                                 BTRFS_EXTENT_DATA_KEY);
                trans->block_rsv = &fs_info->trans_block_rsv;
                if (ret != -ENOSPC && ret != -EAGAIN)
                        break;
 
-               ret = btrfs_update_inode(trans, root, inode);
+               ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
                if (ret)
                        break;
 
@@ -8557,7 +8557,7 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback)
                btrfs_end_transaction(trans);
                btrfs_btree_balance_dirty(fs_info);
 
-               ret = btrfs_truncate_block(inode, inode->i_size, 0, 0);
+               ret = btrfs_truncate_block(BTRFS_I(inode), inode->i_size, 0, 0);
                if (ret)
                        goto out;
                trans = btrfs_start_transaction(root, 1);
@@ -8565,14 +8565,14 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback)
                        ret = PTR_ERR(trans);
                        goto out;
                }
-               btrfs_inode_safe_disk_i_size_write(inode, 0);
+               btrfs_inode_safe_disk_i_size_write(BTRFS_I(inode), 0);
        }
 
        if (trans) {
                int ret2;
 
                trans->block_rsv = &fs_info->trans_block_rsv;
-               ret2 = btrfs_update_inode(trans, root, inode);
+               ret2 = btrfs_update_inode(trans, root, BTRFS_I(inode));
                if (ret2 && !ret)
                        ret = ret2;
 
@@ -8618,7 +8618,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
                          "error inheriting subvolume %llu properties: %d",
                          new_root->root_key.objectid, err);
 
-       err = btrfs_update_inode(trans, new_root, inode);
+       err = btrfs_update_inode(trans, new_root, BTRFS_I(inode));
 
        iput(inode);
        return err;
@@ -8680,7 +8680,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
        INIT_LIST_HEAD(&ei->delalloc_inodes);
        INIT_LIST_HEAD(&ei->delayed_iput);
        RB_CLEAR_NODE(&ei->rb_node);
-       init_rwsem(&ei->dio_sem);
 
        return inode;
 }
@@ -8820,6 +8819,7 @@ static int btrfs_getattr(const struct path *path, struct kstat *stat,
                         u32 request_mask, unsigned int flags)
 {
        u64 delalloc_bytes;
+       u64 inode_bytes;
        struct inode *inode = d_inode(path->dentry);
        u32 blocksize = inode->i_sb->s_blocksize;
        u32 bi_flags = BTRFS_I(inode)->flags;
@@ -8846,8 +8846,9 @@ static int btrfs_getattr(const struct path *path, struct kstat *stat,
 
        spin_lock(&BTRFS_I(inode)->lock);
        delalloc_bytes = BTRFS_I(inode)->new_delalloc_bytes;
+       inode_bytes = inode_get_bytes(inode);
        spin_unlock(&BTRFS_I(inode)->lock);
-       stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) +
+       stat->blocks = (ALIGN(inode_bytes, blocksize) +
                        ALIGN(delalloc_bytes, blocksize)) >> 9;
        return 0;
 }
@@ -8973,7 +8974,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
                                           old_dentry->d_name.name,
                                           old_dentry->d_name.len);
                if (!ret)
-                       ret = btrfs_update_inode(trans, root, old_inode);
+                       ret = btrfs_update_inode(trans, root, BTRFS_I(old_inode));
        }
        if (ret) {
                btrfs_abort_transaction(trans, ret);
@@ -8989,7 +8990,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
                                           new_dentry->d_name.name,
                                           new_dentry->d_name.len);
                if (!ret)
-                       ret = btrfs_update_inode(trans, dest, new_inode);
+                       ret = btrfs_update_inode(trans, dest, BTRFS_I(new_inode));
        }
        if (ret) {
                btrfs_abort_transaction(trans, ret);
@@ -9078,7 +9079,7 @@ static int btrfs_whiteout_for_rename(struct btrfs_trans_handle *trans,
        u64 objectid;
        u64 index;
 
-       ret = btrfs_find_free_ino(root, &objectid);
+       ret = btrfs_find_free_objectid(root, &objectid);
        if (ret)
                return ret;
 
@@ -9109,7 +9110,7 @@ static int btrfs_whiteout_for_rename(struct btrfs_trans_handle *trans,
        if (ret)
                goto out;
 
-       ret = btrfs_update_inode(trans, root, inode);
+       ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
 out:
        unlock_new_inode(inode);
        if (ret)
@@ -9243,7 +9244,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                                        old_dentry->d_name.name,
                                        old_dentry->d_name.len);
                if (!ret)
-                       ret = btrfs_update_inode(trans, root, old_inode);
+                       ret = btrfs_update_inode(trans, root, BTRFS_I(old_inode));
        }
        if (ret) {
                btrfs_abort_transaction(trans, ret);
@@ -9541,7 +9542,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
        if (IS_ERR(trans))
                return PTR_ERR(trans);
 
-       err = btrfs_find_free_ino(root, &objectid);
+       err = btrfs_find_free_objectid(root, &objectid);
        if (err)
                goto out_unlock;
 
@@ -9603,7 +9604,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
        inode_nohighmem(inode);
        inode_set_bytes(inode, name_len);
        btrfs_i_size_write(BTRFS_I(inode), name_len);
-       err = btrfs_update_inode(trans, root, inode);
+       err = btrfs_update_inode(trans, root, BTRFS_I(inode));
        /*
         * Last step, add directory indexes for our symlink inode. This is the
         * last step to avoid extra cleanup of these indexes if an error happens
@@ -9629,7 +9630,8 @@ out_unlock:
 
 static struct btrfs_trans_handle *insert_prealloc_file_extent(
                                       struct btrfs_trans_handle *trans_in,
-                                      struct inode *inode, struct btrfs_key *ins,
+                                      struct btrfs_inode *inode,
+                                      struct btrfs_key *ins,
                                       u64 file_offset)
 {
        struct btrfs_file_extent_item stack_fi;
@@ -9650,13 +9652,14 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent(
        btrfs_set_stack_file_extent_compression(&stack_fi, BTRFS_COMPRESS_NONE);
        /* Encryption and other encoding is reserved and all 0 */
 
-       ret = btrfs_qgroup_release_data(BTRFS_I(inode), file_offset, len);
+       ret = btrfs_qgroup_release_data(inode, file_offset, len);
        if (ret < 0)
                return ERR_PTR(ret);
 
        if (trans) {
-               ret = insert_reserved_file_extent(trans, BTRFS_I(inode),
-                                                 file_offset, &stack_fi, ret);
+               ret = insert_reserved_file_extent(trans, inode,
+                                                 file_offset, &stack_fi,
+                                                 true, ret);
                if (ret)
                        return ERR_PTR(ret);
                return trans;
@@ -9676,7 +9679,7 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent(
        if (!path)
                return ERR_PTR(-ENOMEM);
 
-       ret = btrfs_replace_file_extents(inode, path, file_offset,
+       ret = btrfs_replace_file_extents(&inode->vfs_inode, path, file_offset,
                                     file_offset + len - 1, &extent_info,
                                     &trans);
        btrfs_free_path(path);
@@ -9732,7 +9735,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
                clear_offset += ins.offset;
 
                last_alloc = ins.offset;
-               trans = insert_prealloc_file_extent(trans, inode, &ins, cur_offset);
+               trans = insert_prealloc_file_extent(trans, BTRFS_I(inode),
+                                                   &ins, cur_offset);
                /*
                 * Now that we inserted the prealloc extent we can finally
                 * decrement the number of reservations in the block group.
@@ -9794,10 +9798,10 @@ next:
                        else
                                i_size = cur_offset;
                        i_size_write(inode, i_size);
-                       btrfs_inode_safe_disk_i_size_write(inode, 0);
+                       btrfs_inode_safe_disk_i_size_write(BTRFS_I(inode), 0);
                }
 
-               ret = btrfs_update_inode(trans, root, inode);
+               ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
 
                if (ret) {
                        btrfs_abort_transaction(trans, ret);
@@ -9872,7 +9876,7 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
        if (IS_ERR(trans))
                return PTR_ERR(trans);
 
-       ret = btrfs_find_free_ino(root, &objectid);
+       ret = btrfs_find_free_objectid(root, &objectid);
        if (ret)
                goto out;
 
@@ -9893,7 +9897,7 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
        if (ret)
                goto out;
 
-       ret = btrfs_update_inode(trans, root, inode);
+       ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
        if (ret)
                goto out;
        ret = btrfs_orphan_add(trans, BTRFS_I(inode));
@@ -10272,6 +10276,27 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
 }
 #endif
 
+/*
+ * Update the number of bytes used in the VFS' inode. When we replace extents in
+ * a range (clone, dedupe, fallocate's zero range), we must update the number of
+ * bytes used by the inode in an atomic manner, so that concurrent stat(2) calls
+ * always get a correct value.
+ */
+void btrfs_update_inode_bytes(struct btrfs_inode *inode,
+                             const u64 add_bytes,
+                             const u64 del_bytes)
+{
+       if (add_bytes == del_bytes)
+               return;
+
+       spin_lock(&inode->lock);
+       if (del_bytes > 0)
+               inode_sub_bytes(&inode->vfs_inode, del_bytes);
+       if (add_bytes > 0)
+               inode_add_bytes(&inode->vfs_inode, add_bytes);
+       spin_unlock(&inode->lock);
+}
+
 static const struct inode_operations btrfs_dir_inode_operations = {
        .getattr        = btrfs_getattr,
        .lookup         = btrfs_lookup,