Merge tag 'for-5.9/io_uring-20200802' of git://git.kernel.dk/linux-block
[linux-2.6-microblaze.git] / fs / btrfs / file.c
index 234a418..bb824c7 100644 (file)
@@ -500,18 +500,18 @@ next:
  * this also makes the decision about creating an inline extent vs
  * doing real data extents, marking pages dirty and delalloc as required.
  */
-int btrfs_dirty_pages(struct inode *inode, struct page **pages,
+int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
                      size_t num_pages, loff_t pos, size_t write_bytes,
                      struct extent_state **cached)
 {
-       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       struct btrfs_fs_info *fs_info = inode->root->fs_info;
        int err = 0;
        int i;
        u64 num_bytes;
        u64 start_pos;
        u64 end_of_last_block;
        u64 end_pos = pos + write_bytes;
-       loff_t isize = i_size_read(inode);
+       loff_t isize = i_size_read(&inode->vfs_inode);
        unsigned int extra_bits = 0;
 
        start_pos = pos & ~((u64) fs_info->sectorsize - 1);
@@ -524,13 +524,13 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages,
         * The pages may have already been dirty, clear out old accounting so
         * we can set things up properly
         */
-       clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos, end_of_last_block,
+       clear_extent_bit(&inode->io_tree, start_pos, end_of_last_block,
                         EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
                         0, 0, cached);
 
-       if (!btrfs_is_free_space_inode(BTRFS_I(inode))) {
+       if (!btrfs_is_free_space_inode(inode)) {
                if (start_pos >= isize &&
-                   !(BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC)) {
+                   !(inode->flags & BTRFS_INODE_PREALLOC)) {
                        /*
                         * There can't be any extents following eof in this case
                         * so just set the delalloc new bit for the range
@@ -538,8 +538,7 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages,
                         */
                        extra_bits |= EXTENT_DELALLOC_NEW;
                } else {
-                       err = btrfs_find_new_delalloc_bytes(BTRFS_I(inode),
-                                                           start_pos,
+                       err = btrfs_find_new_delalloc_bytes(inode, start_pos,
                                                            num_bytes, cached);
                        if (err)
                                return err;
@@ -564,7 +563,7 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages,
         * at this time.
         */
        if (end_pos > isize)
-               i_size_write(inode, end_pos);
+               i_size_write(&inode->vfs_inode, end_pos);
        return 0;
 }
 
@@ -731,7 +730,7 @@ next:
  * is deleted from the tree.
  */
 int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
-                        struct btrfs_root *root, struct inode *inode,
+                        struct btrfs_root *root, struct btrfs_inode *inode,
                         struct btrfs_path *path, u64 start, u64 end,
                         u64 *drop_end, int drop_cache,
                         int replace_extent,
@@ -744,7 +743,8 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
        struct btrfs_ref ref = { 0 };
        struct btrfs_key key;
        struct btrfs_key new_key;
-       u64 ino = btrfs_ino(BTRFS_I(inode));
+       struct inode *vfs_inode = &inode->vfs_inode;
+       u64 ino = btrfs_ino(inode);
        u64 search_start = start;
        u64 disk_bytenr = 0;
        u64 num_bytes = 0;
@@ -762,9 +762,9 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
        int leafs_visited = 0;
 
        if (drop_cache)
-               btrfs_drop_extent_cache(BTRFS_I(inode), start, end - 1, 0);
+               btrfs_drop_extent_cache(inode, start, end - 1, 0);
 
-       if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent)
+       if (start >= inode->disk_i_size && !replace_extent)
                modify_tree = 0;
 
        update_refs = (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) ||
@@ -935,7 +935,7 @@ next_slot:
                                                        extent_end - end);
                        btrfs_mark_buffer_dirty(leaf);
                        if (update_refs && disk_bytenr > 0)
-                               inode_sub_bytes(inode, end - key.offset);
+                               inode_sub_bytes(vfs_inode, end - key.offset);
                        break;
                }
 
@@ -955,7 +955,7 @@ next_slot:
                                                        start - key.offset);
                        btrfs_mark_buffer_dirty(leaf);
                        if (update_refs && disk_bytenr > 0)
-                               inode_sub_bytes(inode, extent_end - start);
+                               inode_sub_bytes(vfs_inode, extent_end - start);
                        if (end == extent_end)
                                break;
 
@@ -979,7 +979,7 @@ delete_extent_item:
 
                        if (update_refs &&
                            extent_type == BTRFS_FILE_EXTENT_INLINE) {
-                               inode_sub_bytes(inode,
+                               inode_sub_bytes(vfs_inode,
                                                extent_end - key.offset);
                                extent_end = ALIGN(extent_end,
                                                   fs_info->sectorsize);
@@ -993,7 +993,7 @@ delete_extent_item:
                                                key.offset - extent_offset);
                                ret = btrfs_free_extent(trans, &ref);
                                BUG_ON(ret); /* -ENOMEM */
-                               inode_sub_bytes(inode,
+                               inode_sub_bytes(vfs_inode,
                                                extent_end - key.offset);
                        }
 
@@ -1082,8 +1082,8 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
-       ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL,
-                                  drop_cache, 0, 0, NULL);
+       ret = __btrfs_drop_extents(trans, root, BTRFS_I(inode), path, start,
+                                  end, NULL, drop_cache, 0, 0, NULL);
        btrfs_free_path(path);
        return ret;
 }
@@ -1532,8 +1532,8 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
        return ret;
 }
 
-static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
-                                   size_t *write_bytes)
+static int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
+                          size_t *write_bytes, bool nowait)
 {
        struct btrfs_fs_info *fs_info = inode->root->fs_info;
        struct btrfs_root *root = inode->root;
@@ -1541,32 +1541,87 @@ static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
        u64 num_bytes;
        int ret;
 
-       if (!btrfs_drew_try_write_lock(&root->snapshot_lock))
+       if (!(inode->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC)))
+               return 0;
+
+       if (!nowait && !btrfs_drew_try_write_lock(&root->snapshot_lock))
                return -EAGAIN;
 
        lockstart = round_down(pos, fs_info->sectorsize);
        lockend = round_up(pos + *write_bytes,
                           fs_info->sectorsize) - 1;
+       num_bytes = lockend - lockstart + 1;
 
-       btrfs_lock_and_flush_ordered_range(inode, lockstart,
-                                          lockend, NULL);
+       if (nowait) {
+               struct btrfs_ordered_extent *ordered;
+
+               if (!try_lock_extent(&inode->io_tree, lockstart, lockend))
+                       return -EAGAIN;
+
+               ordered = btrfs_lookup_ordered_range(inode, lockstart,
+                                                    num_bytes);
+               if (ordered) {
+                       btrfs_put_ordered_extent(ordered);
+                       ret = -EAGAIN;
+                       goto out_unlock;
+               }
+       } else {
+               btrfs_lock_and_flush_ordered_range(inode, lockstart,
+                                                  lockend, NULL);
+       }
 
-       num_bytes = lockend - lockstart + 1;
        ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes,
                        NULL, NULL, NULL);
        if (ret <= 0) {
                ret = 0;
-               btrfs_drew_write_unlock(&root->snapshot_lock);
+               if (!nowait)
+                       btrfs_drew_write_unlock(&root->snapshot_lock);
        } else {
                *write_bytes = min_t(size_t, *write_bytes ,
                                     num_bytes - pos + lockstart);
        }
-
+out_unlock:
        unlock_extent(&inode->io_tree, lockstart, lockend);
 
        return ret;
 }
 
+static int check_nocow_nolock(struct btrfs_inode *inode, loff_t pos,
+                             size_t *write_bytes)
+{
+       return check_can_nocow(inode, pos, write_bytes, true);
+}
+
+/*
+ * Check if we can do nocow write into the range [@pos, @pos + @write_bytes)
+ *
+ * @pos:        File offset
+ * @write_bytes: The length to write, will be updated to the nocow writeable
+ *              range
+ *
+ * This function will flush ordered extents in the range to ensure proper
+ * nocow checks.
+ *
+ * Return:
+ * >0          and update @write_bytes if we can do nocow write
+ *  0          if we can't do nocow write
+ * -EAGAIN     if we can't get the needed lock or there are ordered extents
+ *             for * (nowait == true) case
+ * <0          if other error happened
+ *
+ * NOTE: Callers need to release the lock by btrfs_check_nocow_unlock().
+ */
+int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
+                          size_t *write_bytes)
+{
+       return check_can_nocow(inode, pos, write_bytes, false);
+}
+
+void btrfs_check_nocow_unlock(struct btrfs_inode *inode)
+{
+       btrfs_drew_write_unlock(&inode->root->snapshot_lock);
+}
+
 static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
                                               struct iov_iter *i)
 {
@@ -1574,7 +1629,6 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
        loff_t pos = iocb->ki_pos;
        struct inode *inode = file_inode(file);
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-       struct btrfs_root *root = BTRFS_I(inode)->root;
        struct page **pages = NULL;
        struct extent_changeset *data_reserved = NULL;
        u64 release_bytes = 0;
@@ -1627,13 +1681,12 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
                                fs_info->sectorsize);
 
                extent_changeset_release(data_reserved);
-               ret = btrfs_check_data_free_space(inode, &data_reserved, pos,
+               ret = btrfs_check_data_free_space(BTRFS_I(inode),
+                                                 &data_reserved, pos,
                                                  write_bytes);
                if (ret < 0) {
-                       if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
-                                                     BTRFS_INODE_PREALLOC)) &&
-                           check_can_nocow(BTRFS_I(inode), pos,
-                                       &write_bytes) > 0) {
+                       if (btrfs_check_nocow_lock(BTRFS_I(inode), pos,
+                                                  &write_bytes) > 0) {
                                /*
                                 * For nodata cow case, no need to reserve
                                 * data space.
@@ -1658,11 +1711,11 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
                                reserve_bytes);
                if (ret) {
                        if (!only_release_metadata)
-                               btrfs_free_reserved_data_space(inode,
+                               btrfs_free_reserved_data_space(BTRFS_I(inode),
                                                data_reserved, pos,
                                                write_bytes);
                        else
-                               btrfs_drew_write_unlock(&root->snapshot_lock);
+                               btrfs_check_nocow_unlock(BTRFS_I(inode));
                        break;
                }
 
@@ -1732,7 +1785,7 @@ again:
                                __pos = round_down(pos,
                                                   fs_info->sectorsize) +
                                        (dirty_pages << PAGE_SHIFT);
-                               btrfs_delalloc_release_space(inode,
+                               btrfs_delalloc_release_space(BTRFS_I(inode),
                                                data_reserved, __pos,
                                                release_bytes, true);
                        }
@@ -1742,8 +1795,9 @@ again:
                                        fs_info->sectorsize);
 
                if (copied > 0)
-                       ret = btrfs_dirty_pages(inode, pages, dirty_pages,
-                                               pos, copied, &cached_state);
+                       ret = btrfs_dirty_pages(BTRFS_I(inode), pages,
+                                               dirty_pages, pos, copied,
+                                               &cached_state);
 
                /*
                 * If we have not locked the extent range, because the range's
@@ -1766,7 +1820,7 @@ again:
 
                release_bytes = 0;
                if (only_release_metadata)
-                       btrfs_drew_write_unlock(&root->snapshot_lock);
+                       btrfs_check_nocow_unlock(BTRFS_I(inode));
 
                if (only_release_metadata && copied > 0) {
                        lockstart = round_down(pos,
@@ -1784,8 +1838,6 @@ again:
                cond_resched();
 
                balance_dirty_pages_ratelimited(inode->i_mapping);
-               if (dirty_pages < (fs_info->nodesize >> PAGE_SHIFT) + 1)
-                       btrfs_btree_balance_dirty(fs_info);
 
                pos += copied;
                num_written += copied;
@@ -1795,11 +1847,12 @@ again:
 
        if (release_bytes) {
                if (only_release_metadata) {
-                       btrfs_drew_write_unlock(&root->snapshot_lock);
+                       btrfs_check_nocow_unlock(BTRFS_I(inode));
                        btrfs_delalloc_release_metadata(BTRFS_I(inode),
                                        release_bytes, true);
                } else {
-                       btrfs_delalloc_release_space(inode, data_reserved,
+                       btrfs_delalloc_release_space(BTRFS_I(inode),
+                                       data_reserved,
                                        round_down(pos, fs_info->sectorsize),
                                        release_bytes, true);
                }
@@ -1904,13 +1957,23 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
        pos = iocb->ki_pos;
        count = iov_iter_count(from);
        if (iocb->ki_flags & IOCB_NOWAIT) {
+               size_t nocow_bytes = count;
+
                /*
                 * We will allocate space in case nodatacow is not set,
                 * so bail
                 */
-               if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
-                                             BTRFS_INODE_PREALLOC)) ||
-                   check_can_nocow(BTRFS_I(inode), pos, &count) <= 0) {
+               if (check_nocow_nolock(BTRFS_I(inode), pos, &nocow_bytes)
+                   <= 0) {
+                       inode_unlock(inode);
+                       return -EAGAIN;
+               }
+               /*
+                * There are holes in the range or parts of the range that must
+                * be COWed (shared extents, RO block groups, etc), so just bail
+                * out.
+                */
+               if (nocow_bytes < count) {
                        inode_unlock(inode);
                        return -EAGAIN;
                }
@@ -2570,7 +2633,7 @@ int btrfs_punch_hole_range(struct inode *inode, struct btrfs_path *path,
 
        cur_offset = start;
        while (cur_offset < end) {
-               ret = __btrfs_drop_extents(trans, root, inode, path,
+               ret = __btrfs_drop_extents(trans, root, BTRFS_I(inode), path,
                                           cur_offset, end + 1, &drop_end,
                                           1, 0, 0, NULL);
                if (ret != -ENOSPC) {
@@ -3148,14 +3211,14 @@ reserve_space:
                if (ret < 0)
                        goto out;
                space_reserved = true;
-               ret = btrfs_qgroup_reserve_data(inode, &data_reserved,
-                                               alloc_start, bytes_to_reserve);
-               if (ret)
-                       goto out;
                ret = btrfs_punch_hole_lock_range(inode, lockstart, lockend,
                                                  &cached_state);
                if (ret)
                        goto out;
+               ret = btrfs_qgroup_reserve_data(BTRFS_I(inode), &data_reserved,
+                                               alloc_start, bytes_to_reserve);
+               if (ret)
+                       goto out;
                ret = btrfs_prealloc_file_range(inode, mode, alloc_start,
                                                alloc_end - alloc_start,
                                                i_blocksize(inode),
@@ -3171,7 +3234,7 @@ reserve_space:
        ret = btrfs_fallocate_update_isize(inode, offset + len, mode);
  out:
        if (ret && space_reserved)
-               btrfs_free_reserved_data_space(inode, data_reserved,
+               btrfs_free_reserved_data_space(BTRFS_I(inode), data_reserved,
                                               alloc_start, bytes_to_reserve);
        extent_changeset_free(data_reserved);
 
@@ -3322,8 +3385,9 @@ static long btrfs_fallocate(struct file *file, int mode,
                                free_extent_map(em);
                                break;
                        }
-                       ret = btrfs_qgroup_reserve_data(inode, &data_reserved,
-                                       cur_offset, last_byte - cur_offset);
+                       ret = btrfs_qgroup_reserve_data(BTRFS_I(inode),
+                                       &data_reserved, cur_offset,
+                                       last_byte - cur_offset);
                        if (ret < 0) {
                                cur_offset = last_byte;
                                free_extent_map(em);
@@ -3335,8 +3399,9 @@ static long btrfs_fallocate(struct file *file, int mode,
                         * range, free reserved data space first, otherwise
                         * it'll result in false ENOSPC error.
                         */
-                       btrfs_free_reserved_data_space(inode, data_reserved,
-                                       cur_offset, last_byte - cur_offset);
+                       btrfs_free_reserved_data_space(BTRFS_I(inode),
+                               data_reserved, cur_offset,
+                               last_byte - cur_offset);
                }
                free_extent_map(em);
                cur_offset = last_byte;
@@ -3353,7 +3418,7 @@ static long btrfs_fallocate(struct file *file, int mode,
                                        range->len, i_blocksize(inode),
                                        offset + len, &alloc_hint);
                else
-                       btrfs_free_reserved_data_space(inode,
+                       btrfs_free_reserved_data_space(BTRFS_I(inode),
                                        data_reserved, range->start,
                                        range->len);
                list_del(&range->list);
@@ -3374,7 +3439,7 @@ out:
        inode_unlock(inode);
        /* Let go of our reservation. */
        if (ret != 0 && !(mode & FALLOC_FL_ZERO_RANGE))
-               btrfs_free_reserved_data_space(inode, data_reserved,
+               btrfs_free_reserved_data_space(BTRFS_I(inode), data_reserved,
                                cur_offset, alloc_end - cur_offset);
        extent_changeset_free(data_reserved);
        return ret;
@@ -3481,6 +3546,7 @@ const struct file_operations btrfs_file_operations = {
        .read_iter      = generic_file_read_iter,
        .splice_read    = generic_file_splice_read,
        .write_iter     = btrfs_file_write_iter,
+       .splice_write   = iter_file_splice_write,
        .mmap           = btrfs_file_mmap,
        .open           = btrfs_file_open,
        .release        = btrfs_release_file,