Merge tag 'pm-5.12-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
[linux-2.6-microblaze.git] / fs / btrfs / inode.c
index a8e0a6b..35bfa05 100644 (file)
@@ -50,6 +50,7 @@
 #include "delalloc-space.h"
 #include "block-group.h"
 #include "space-info.h"
+#include "zoned.h"
 
 struct btrfs_iget_args {
        u64 ino;
@@ -692,8 +693,7 @@ cont:
                                                     NULL,
                                                     clear_flags,
                                                     PAGE_UNLOCK |
-                                                    PAGE_CLEAR_DIRTY |
-                                                    PAGE_SET_WRITEBACK |
+                                                    PAGE_START_WRITEBACK |
                                                     page_error_op |
                                                     PAGE_END_WRITEBACK);
 
@@ -917,7 +917,6 @@ retry:
                                                ins.objectid,
                                                async_extent->ram_size,
                                                ins.offset,
-                                               BTRFS_ORDERED_COMPRESSED,
                                                async_extent->compress_type);
                if (ret) {
                        btrfs_drop_extent_cache(inode, async_extent->start,
@@ -934,8 +933,7 @@ retry:
                                async_extent->start +
                                async_extent->ram_size - 1,
                                NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
-                               PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
-                               PAGE_SET_WRITEBACK);
+                               PAGE_UNLOCK | PAGE_START_WRITEBACK);
                if (btrfs_submit_compressed_write(inode, async_extent->start,
                                    async_extent->ram_size,
                                    ins.objectid,
@@ -971,9 +969,8 @@ out_free:
                                     NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
                                     EXTENT_DELALLOC_NEW |
                                     EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
-                                    PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
-                                    PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK |
-                                    PAGE_SET_ERROR);
+                                    PAGE_UNLOCK | PAGE_START_WRITEBACK |
+                                    PAGE_END_WRITEBACK | PAGE_SET_ERROR);
        free_async_extent_pages(async_extent);
        kfree(async_extent);
        goto again;
@@ -1071,8 +1068,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
                                     EXTENT_LOCKED | EXTENT_DELALLOC |
                                     EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
                                     EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
-                                    PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
-                                    PAGE_END_WRITEBACK);
+                                    PAGE_START_WRITEBACK | PAGE_END_WRITEBACK);
                        *nr_written = *nr_written +
                             (end - start + PAGE_SIZE) / PAGE_SIZE;
                        *page_started = 1;
@@ -1127,7 +1123,8 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
                free_extent_map(em);
 
                ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
-                                              ram_size, cur_alloc_size, 0);
+                                              ram_size, cur_alloc_size,
+                                              BTRFS_ORDERED_REGULAR);
                if (ret)
                        goto out_drop_extent_cache;
 
@@ -1194,8 +1191,7 @@ out_reserve:
 out_unlock:
        clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
                EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV;
-       page_ops = PAGE_UNLOCK | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
-               PAGE_END_WRITEBACK;
+       page_ops = PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK;
        /*
         * If we reserved an extent for our delalloc range (or a subrange) and
         * failed to create the respective ordered extent, then it means that
@@ -1320,9 +1316,8 @@ static int cow_file_range_async(struct btrfs_inode *inode,
                unsigned clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC |
                        EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
                        EXTENT_DO_ACCOUNTING;
-               unsigned long page_ops = PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
-                       PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK |
-                       PAGE_SET_ERROR;
+               unsigned long page_ops = PAGE_UNLOCK | PAGE_START_WRITEBACK |
+                                        PAGE_END_WRITEBACK | PAGE_SET_ERROR;
 
                extent_clear_unlock_delalloc(inode, start, end, locked_page,
                                             clear_bits, page_ops);
@@ -1399,6 +1394,29 @@ static int cow_file_range_async(struct btrfs_inode *inode,
        return 0;
 }
 
+static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
+                                      struct page *locked_page, u64 start,
+                                      u64 end, int *page_started,
+                                      unsigned long *nr_written)
+{
+       int ret;
+
+       ret = cow_file_range(inode, locked_page, start, end, page_started,
+                            nr_written, 0);
+       if (ret)
+               return ret;
+
+       if (*page_started)
+               return 0;
+
+       __set_page_dirty_nobuffers(locked_page);
+       account_page_redirty(locked_page);
+       extent_write_locked_range(&inode->vfs_inode, start, end, WB_SYNC_ALL);
+       *page_started = 1;
+
+       return 0;
+}
+
 static noinline int csum_exist_in_range(struct btrfs_fs_info *fs_info,
                                        u64 bytenr, u64 num_bytes)
 {
@@ -1519,8 +1537,7 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
                                             EXTENT_LOCKED | EXTENT_DELALLOC |
                                             EXTENT_DO_ACCOUNTING |
                                             EXTENT_DEFRAG, PAGE_UNLOCK |
-                                            PAGE_CLEAR_DIRTY |
-                                            PAGE_SET_WRITEBACK |
+                                            PAGE_START_WRITEBACK |
                                             PAGE_END_WRITEBACK);
                return -ENOMEM;
        }
@@ -1657,9 +1674,6 @@ next_slot:
                         */
                        btrfs_release_path(path);
 
-                       /* If extent is RO, we must COW it */
-                       if (btrfs_extent_readonly(fs_info, disk_bytenr))
-                               goto out_check;
                        ret = btrfs_cross_ref_exist(root, ino,
                                                    found_key.offset -
                                                    extent_offset, disk_bytenr, false);
@@ -1706,6 +1720,7 @@ next_slot:
                                WARN_ON_ONCE(freespace_inode);
                                goto out_check;
                        }
+                       /* If the extent's block group is RO, we must COW */
                        if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr))
                                goto out_check;
                        nocow = true;
@@ -1842,8 +1857,7 @@ error:
                                             locked_page, EXTENT_LOCKED |
                                             EXTENT_DELALLOC | EXTENT_DEFRAG |
                                             EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
-                                            PAGE_CLEAR_DIRTY |
-                                            PAGE_SET_WRITEBACK |
+                                            PAGE_START_WRITEBACK |
                                             PAGE_END_WRITEBACK);
        btrfs_free_path(path);
        return ret;
@@ -1878,17 +1892,24 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
 {
        int ret;
        int force_cow = need_force_cow(inode, start, end);
+       const bool zoned = btrfs_is_zoned(inode->root->fs_info);
 
        if (inode->flags & BTRFS_INODE_NODATACOW && !force_cow) {
+               ASSERT(!zoned);
                ret = run_delalloc_nocow(inode, locked_page, start, end,
                                         page_started, 1, nr_written);
        } else if (inode->flags & BTRFS_INODE_PREALLOC && !force_cow) {
+               ASSERT(!zoned);
                ret = run_delalloc_nocow(inode, locked_page, start, end,
                                         page_started, 0, nr_written);
        } else if (!inode_can_compress(inode) ||
                   !inode_need_compress(inode, start, end)) {
-               ret = cow_file_range(inode, locked_page, start, end,
-                                    page_started, nr_written, 1);
+               if (zoned)
+                       ret = run_delalloc_zoned(inode, locked_page, start, end,
+                                                page_started, nr_written);
+               else
+                       ret = cow_file_range(inode, locked_page, start, end,
+                                            page_started, nr_written, 1);
        } else {
                set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags);
                ret = cow_file_range_async(inode, wbc, locked_page, start, end,
@@ -2183,9 +2204,10 @@ int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,
        struct inode *inode = page->mapping->host;
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        u64 logical = bio->bi_iter.bi_sector << 9;
+       struct extent_map *em;
        u64 length = 0;
        u64 map_length;
-       int ret;
+       int ret = 0;
        struct btrfs_io_geometry geom;
 
        if (bio_flags & EXTENT_BIO_COMPRESSED)
@@ -2193,14 +2215,19 @@ int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,
 
        length = bio->bi_iter.bi_size;
        map_length = length;
-       ret = btrfs_get_io_geometry(fs_info, btrfs_op(bio), logical, map_length,
-                                   &geom);
+       em = btrfs_get_chunk_map(fs_info, logical, map_length);
+       if (IS_ERR(em))
+               return PTR_ERR(em);
+       ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(bio), logical,
+                                   map_length, &geom);
        if (ret < 0)
-               return ret;
+               goto out;
 
        if (geom.len < length + size)
-               return 1;
-       return 0;
+               ret = 1;
+out:
+       free_extent_map(em);
+       return ret;
 }
 
 /*
@@ -2217,6 +2244,119 @@ static blk_status_t btrfs_submit_bio_start(struct inode *inode, struct bio *bio,
        return btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0);
 }
 
+bool btrfs_bio_fits_in_ordered_extent(struct page *page, struct bio *bio,
+                                     unsigned int size)
+{
+       struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
+       struct btrfs_fs_info *fs_info = inode->root->fs_info;
+       struct btrfs_ordered_extent *ordered;
+       u64 len = bio->bi_iter.bi_size + size;
+       bool ret = true;
+
+       ASSERT(btrfs_is_zoned(fs_info));
+       ASSERT(fs_info->max_zone_append_size > 0);
+       ASSERT(bio_op(bio) == REQ_OP_ZONE_APPEND);
+
+       /* Ordered extent not yet created, so we're good */
+       ordered = btrfs_lookup_ordered_extent(inode, page_offset(page));
+       if (!ordered)
+               return ret;
+
+       if ((bio->bi_iter.bi_sector << SECTOR_SHIFT) + len >
+           ordered->disk_bytenr + ordered->disk_num_bytes)
+               ret = false;
+
+       btrfs_put_ordered_extent(ordered);
+
+       return ret;
+}
+
+static blk_status_t extract_ordered_extent(struct btrfs_inode *inode,
+                                          struct bio *bio, loff_t file_offset)
+{
+       struct btrfs_ordered_extent *ordered;
+       struct extent_map *em = NULL, *em_new = NULL;
+       struct extent_map_tree *em_tree = &inode->extent_tree;
+       u64 start = (u64)bio->bi_iter.bi_sector << SECTOR_SHIFT;
+       u64 len = bio->bi_iter.bi_size;
+       u64 end = start + len;
+       u64 ordered_end;
+       u64 pre, post;
+       int ret = 0;
+
+       ordered = btrfs_lookup_ordered_extent(inode, file_offset);
+       if (WARN_ON_ONCE(!ordered))
+               return BLK_STS_IOERR;
+
+       /* No need to split */
+       if (ordered->disk_num_bytes == len)
+               goto out;
+
+       /* We cannot split once end_bio'd ordered extent */
+       if (WARN_ON_ONCE(ordered->bytes_left != ordered->disk_num_bytes)) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       /* We cannot split a compressed ordered extent */
+       if (WARN_ON_ONCE(ordered->disk_num_bytes != ordered->num_bytes)) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       ordered_end = ordered->disk_bytenr + ordered->disk_num_bytes;
+       /* bio must be in one ordered extent */
+       if (WARN_ON_ONCE(start < ordered->disk_bytenr || end > ordered_end)) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       /* Checksum list should be empty */
+       if (WARN_ON_ONCE(!list_empty(&ordered->list))) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       pre = start - ordered->disk_bytenr;
+       post = ordered_end - end;
+
+       ret = btrfs_split_ordered_extent(ordered, pre, post);
+       if (ret)
+               goto out;
+
+       read_lock(&em_tree->lock);
+       em = lookup_extent_mapping(em_tree, ordered->file_offset, len);
+       if (!em) {
+               read_unlock(&em_tree->lock);
+               ret = -EIO;
+               goto out;
+       }
+       read_unlock(&em_tree->lock);
+
+       ASSERT(!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags));
+       /*
+        * We cannot reuse em_new here but have to create a new one, as
+        * unpin_extent_cache() expects the start of the extent map to be the
+        * logical offset of the file, which does not hold true anymore after
+        * splitting.
+        */
+       em_new = create_io_em(inode, em->start + pre, len,
+                             em->start + pre, em->block_start + pre, len,
+                             len, len, BTRFS_COMPRESS_NONE,
+                             BTRFS_ORDERED_REGULAR);
+       if (IS_ERR(em_new)) {
+               ret = PTR_ERR(em_new);
+               goto out;
+       }
+       free_extent_map(em_new);
+
+out:
+       free_extent_map(em);
+       btrfs_put_ordered_extent(ordered);
+
+       return errno_to_blk_status(ret);
+}
+
 /*
  * extent_io.c submission hook. This does the right thing for csum calculation
  * on write, or reading the csums from the tree before a read.
@@ -2252,7 +2392,16 @@ blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
        if (btrfs_is_free_space_inode(BTRFS_I(inode)))
                metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
 
-       if (bio_op(bio) != REQ_OP_WRITE) {
+       if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
+               struct page *page = bio_first_bvec_all(bio)->bv_page;
+               loff_t file_offset = page_offset(page);
+
+               ret = extract_ordered_extent(BTRFS_I(inode), bio, file_offset);
+               if (ret)
+                       goto out;
+       }
+
+       if (btrfs_op(bio) != BTRFS_MAP_WRITE) {
                ret = btrfs_bio_wq_end_io(fs_info, bio, metadata);
                if (ret)
                        goto out;
@@ -2754,6 +2903,9 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
                goto out;
        }
 
+       if (ordered_extent->disk)
+               btrfs_rewrite_logical_zoned(ordered_extent);
+
        btrfs_free_io_failure_record(inode, start, end);
 
        if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
@@ -3103,14 +3255,16 @@ void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
 }
 
 /**
- * btrfs_wait_on_delayed_iputs - wait on the delayed iputs to be done running
- * @fs_info - the fs_info for this fs
- * @return - EINTR if we were killed, 0 if nothing's pending
+ * Wait for flushing all delayed iputs
+ *
+ * @fs_info:  the filesystem
  *
  * This will wait on any delayed iputs that are currently running with KILLABLE
  * set.  Once they are all done running we will return, unless we are killed in
  * which case we return EINTR. This helps in user operations like fallocate etc
  * that might get blocked on the iputs.
+ *
+ * Return EINTR if we were killed, 0 if nothing's pending
  */
 int btrfs_wait_on_delayed_iputs(struct btrfs_fs_info *fs_info)
 {
@@ -4720,6 +4874,9 @@ again:
                ret = -ENOMEM;
                goto out;
        }
+       ret = set_page_extent_mapped(page);
+       if (ret < 0)
+               goto out_unlock;
 
        if (!PageUptodate(page)) {
                ret = btrfs_readpage(NULL, page);
@@ -4737,7 +4894,6 @@ again:
        wait_on_page_writeback(page);
 
        lock_extent_bits(io_tree, block_start, block_end, &cached_state);
-       set_page_extent_mapped(page);
 
        ordered = btrfs_lookup_ordered_extent(inode, block_start);
        if (ordered) {
@@ -5011,6 +5167,15 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
                btrfs_drew_write_unlock(&root->snapshot_lock);
                btrfs_end_transaction(trans);
        } else {
+               struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+
+               if (btrfs_is_zoned(fs_info)) {
+                       ret = btrfs_wait_ordered_range(inode,
+                                       ALIGN(newsize, fs_info->sectorsize),
+                                       (u64)-1);
+                       if (ret)
+                               return ret;
+               }
 
                /*
                 * We're truncating a file that used to have good data down to
@@ -5045,7 +5210,8 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
        return ret;
 }
 
-static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
+static int btrfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+                        struct iattr *attr)
 {
        struct inode *inode = d_inode(dentry);
        struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -5054,7 +5220,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
        if (btrfs_root_readonly(root))
                return -EROFS;
 
-       err = setattr_prepare(dentry, attr);
+       err = setattr_prepare(&init_user_ns, dentry, attr);
        if (err)
                return err;
 
@@ -5065,12 +5231,13 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
        }
 
        if (attr->ia_valid) {
-               setattr_copy(inode, attr);
+               setattr_copy(&init_user_ns, inode, attr);
                inode_inc_iversion(inode);
                err = btrfs_dirty_inode(inode);
 
                if (!err && attr->ia_valid & ATTR_MODE)
-                       err = posix_acl_chmod(inode, inode->i_mode);
+                       err = posix_acl_chmod(&init_user_ns, inode,
+                                             inode->i_mode);
        }
 
        return err;
@@ -5916,7 +6083,7 @@ static int btrfs_dirty_inode(struct inode *inode)
                return PTR_ERR(trans);
 
        ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
-       if (ret && ret == -ENOSPC) {
+       if (ret && (ret == -ENOSPC || ret == -EDQUOT)) {
                /* whoops, lets try again with the full transaction */
                btrfs_end_transaction(trans);
                trans = btrfs_start_transaction(root, 1);
@@ -6190,7 +6357,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
        if (ret != 0)
                goto fail_unlock;
 
-       inode_init_owner(inode, dir, mode);
+       inode_init_owner(&init_user_ns, inode, dir, mode);
        inode_set_bytes(inode, 0);
 
        inode->i_mtime = current_time(inode);
@@ -6351,8 +6518,8 @@ static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
        return err;
 }
 
-static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
-                       umode_t mode, dev_t rdev)
+static int btrfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+                      struct dentry *dentry, umode_t mode, dev_t rdev)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
        struct btrfs_trans_handle *trans;
@@ -6371,7 +6538,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
        if (IS_ERR(trans))
                return PTR_ERR(trans);
 
-       err = btrfs_find_free_objectid(root, &objectid);
+       err = btrfs_get_free_objectid(root, &objectid);
        if (err)
                goto out_unlock;
 
@@ -6415,8 +6582,8 @@ out_unlock:
        return err;
 }
 
-static int btrfs_create(struct inode *dir, struct dentry *dentry,
-                       umode_t mode, bool excl)
+static int btrfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+                       struct dentry *dentry, umode_t mode, bool excl)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
        struct btrfs_trans_handle *trans;
@@ -6435,7 +6602,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
        if (IS_ERR(trans))
                return PTR_ERR(trans);
 
-       err = btrfs_find_free_objectid(root, &objectid);
+       err = btrfs_get_free_objectid(root, &objectid);
        if (err)
                goto out_unlock;
 
@@ -6560,7 +6727,8 @@ fail:
        return err;
 }
 
-static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int btrfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                      struct dentry *dentry, umode_t mode)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
        struct inode *inode = NULL;
@@ -6579,7 +6747,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
        if (IS_ERR(trans))
                return PTR_ERR(trans);
 
-       err = btrfs_find_free_objectid(root, &objectid);
+       err = btrfs_get_free_objectid(root, &objectid);
        if (err)
                goto out_fail;
 
@@ -7103,9 +7271,6 @@ static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode,
  * @strict:    if true, omit optimizations that might force us into unnecessary
  *             cow. e.g., don't trust generation number.
  *
- * This function will flush ordered extents in the range to ensure proper
- * nocow checks for (nowait == false) case.
- *
  * Return:
  * >0  and update @len if we can do nocow write
  *  0  if we can't do nocow write
@@ -7613,6 +7778,9 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
        iomap->bdev = fs_info->fs_devices->latest_bdev;
        iomap->length = len;
 
+       if (write && btrfs_use_zone_append(BTRFS_I(inode), em))
+               iomap->flags |= IOMAP_F_ZONE_APPEND;
+
        free_extent_map(em);
 
        return 0;
@@ -7682,7 +7850,7 @@ static void btrfs_dio_private_put(struct btrfs_dio_private *dip)
        if (!refcount_dec_and_test(&dip->refs))
                return;
 
-       if (bio_op(dip->dio_bio) == REQ_OP_WRITE) {
+       if (btrfs_op(dip->dio_bio) == BTRFS_MAP_WRITE) {
                __endio_write_update_ordered(BTRFS_I(dip->inode),
                                             dip->logical_offset,
                                             dip->bytes,
@@ -7797,10 +7965,8 @@ static void __endio_write_update_ordered(struct btrfs_inode *inode,
                                        NULL);
                        btrfs_queue_work(wq, &ordered->work);
                }
-               /*
-                * If btrfs_dec_test_ordered_pending does not find any ordered
-                * extent in the range, we can exit.
-                */
+
+               /* No ordered extent found in the range, exit */
                if (ordered_offset == last_offset)
                        return;
                /*
@@ -7841,6 +8007,8 @@ static void btrfs_end_dio_bio(struct bio *bio)
        if (err)
                dip->dio_bio->bi_status = err;
 
+       btrfs_record_physical_zoned(dip->inode, dip->logical_offset, bio);
+
        bio_put(bio);
        btrfs_dio_private_put(dip);
 }
@@ -7850,7 +8018,7 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        struct btrfs_dio_private *dip = bio->bi_private;
-       bool write = bio_op(bio) == REQ_OP_WRITE;
+       bool write = btrfs_op(bio) == BTRFS_MAP_WRITE;
        blk_status_t ret;
 
        /* Check btrfs_submit_bio_hook() for rules about async submit. */
@@ -7900,7 +8068,7 @@ static struct btrfs_dio_private *btrfs_create_dio_private(struct bio *dio_bio,
                                                          struct inode *inode,
                                                          loff_t file_offset)
 {
-       const bool write = (bio_op(dio_bio) == REQ_OP_WRITE);
+       const bool write = (btrfs_op(dio_bio) == BTRFS_MAP_WRITE);
        const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM);
        size_t dip_size;
        struct btrfs_dio_private *dip;
@@ -7930,7 +8098,7 @@ static struct btrfs_dio_private *btrfs_create_dio_private(struct bio *dio_bio,
 static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap,
                struct bio *dio_bio, loff_t file_offset)
 {
-       const bool write = (bio_op(dio_bio) == REQ_OP_WRITE);
+       const bool write = (btrfs_op(dio_bio) == BTRFS_MAP_WRITE);
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        const bool raid56 = (btrfs_data_alloc_profile(fs_info) &
                             BTRFS_BLOCK_GROUP_RAID56_MASK);
@@ -7941,10 +8109,12 @@ static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap,
        u64 submit_len;
        int clone_offset = 0;
        int clone_len;
+       u64 logical;
        int ret;
        blk_status_t status;
        struct btrfs_io_geometry geom;
        struct btrfs_dio_data *dio_data = iomap->private;
+       struct extent_map *em = NULL;
 
        dip = btrfs_create_dio_private(dio_bio, inode, file_offset);
        if (!dip) {
@@ -7973,12 +8143,18 @@ static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap,
        submit_len = dio_bio->bi_iter.bi_size;
 
        do {
-               ret = btrfs_get_io_geometry(fs_info, btrfs_op(dio_bio),
-                                           start_sector << 9, submit_len,
-                                           &geom);
+               logical = start_sector << 9;
+               em = btrfs_get_chunk_map(fs_info, logical, submit_len);
+               if (IS_ERR(em)) {
+                       status = errno_to_blk_status(PTR_ERR(em));
+                       em = NULL;
+                       goto out_err_em;
+               }
+               ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(dio_bio),
+                                           logical, submit_len, &geom);
                if (ret) {
                        status = errno_to_blk_status(ret);
-                       goto out_err;
+                       goto out_err_em;
                }
                ASSERT(geom.len <= INT_MAX);
 
@@ -7993,6 +8169,19 @@ static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap,
                bio->bi_end_io = btrfs_end_dio_bio;
                btrfs_io_bio(bio)->logical = file_offset;
 
+               WARN_ON_ONCE(write && btrfs_is_zoned(fs_info) &&
+                            fs_info->max_zone_append_size &&
+                            bio_op(bio) != REQ_OP_ZONE_APPEND);
+
+               if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
+                       status = extract_ordered_extent(BTRFS_I(inode), bio,
+                                                       file_offset);
+                       if (status) {
+                               bio_put(bio);
+                               goto out_err;
+                       }
+               }
+
                ASSERT(submit_len >= clone_len);
                submit_len -= clone_len;
 
@@ -8023,19 +8212,24 @@ static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap,
                        bio_put(bio);
                        if (submit_len > 0)
                                refcount_dec(&dip->refs);
-                       goto out_err;
+                       goto out_err_em;
                }
 
                dio_data->submitted += clone_len;
                clone_offset += clone_len;
                start_sector += clone_len >> 9;
                file_offset += clone_len;
+
+               free_extent_map(em);
        } while (submit_len > 0);
        return BLK_QC_T_NONE;
 
+out_err_em:
+       free_extent_map(em);
 out_err:
        dip->dio_bio->bi_status = status;
        btrfs_dio_private_put(dip);
+
        return BLK_QC_T_NONE;
 }
 
@@ -8117,7 +8311,7 @@ static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
 {
        int ret = try_release_extent_mapping(page, gfp_flags);
        if (ret == 1)
-               detach_page_private(page);
+               clear_page_extent_mapped(page);
        return ret;
 }
 
@@ -8186,8 +8380,9 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
 
        if (!inode_evicting)
                lock_extent_bits(tree, page_start, page_end, &cached_state);
-again:
+
        start = page_start;
+again:
        ordered = btrfs_lookup_ordered_range(inode, start, page_end - start + 1);
        if (ordered) {
                found_ordered = true;
@@ -8276,7 +8471,7 @@ again:
        }
 
        ClearPageChecked(page);
-       detach_page_private(page);
+       clear_page_extent_mapped(page);
 }
 
 /*
@@ -8355,7 +8550,12 @@ again:
        wait_on_page_writeback(page);
 
        lock_extent_bits(io_tree, page_start, page_end, &cached_state);
-       set_page_extent_mapped(page);
+       ret2 = set_page_extent_mapped(page);
+       if (ret2 < 0) {
+               ret = vmf_error(ret2);
+               unlock_extent_cached(io_tree, page_start, page_end, &cached_state);
+               goto out_unlock;
+       }
 
        /*
         * we can't set the delalloc bits if there are pending ordered
@@ -8592,15 +8792,18 @@ out:
  */
 int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
                             struct btrfs_root *new_root,
-                            struct btrfs_root *parent_root,
-                            u64 new_dirid)
+                            struct btrfs_root *parent_root)
 {
        struct inode *inode;
        int err;
        u64 index = 0;
+       u64 ino;
+
+       err = btrfs_get_free_objectid(new_root, &ino);
+       if (err < 0)
+               return err;
 
-       inode = btrfs_new_inode(trans, new_root, NULL, "..", 2,
-                               new_dirid, new_dirid,
+       inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, ino, ino,
                                S_IFDIR | (~current_umask() & S_IRWXUGO),
                                &index);
        if (IS_ERR(inode))
@@ -8815,7 +9018,8 @@ fail:
        return -ENOMEM;
 }
 
-static int btrfs_getattr(const struct path *path, struct kstat *stat,
+static int btrfs_getattr(struct user_namespace *mnt_userns,
+                        const struct path *path, struct kstat *stat,
                         u32 request_mask, unsigned int flags)
 {
        u64 delalloc_bytes;
@@ -8841,7 +9045,7 @@ static int btrfs_getattr(const struct path *path, struct kstat *stat,
                                  STATX_ATTR_IMMUTABLE |
                                  STATX_ATTR_NODUMP);
 
-       generic_fillattr(inode, stat);
+       generic_fillattr(&init_user_ns, inode, stat);
        stat->dev = BTRFS_I(inode)->root->anon_dev;
 
        spin_lock(&BTRFS_I(inode)->lock);
@@ -9079,7 +9283,7 @@ static int btrfs_whiteout_for_rename(struct btrfs_trans_handle *trans,
        u64 objectid;
        u64 index;
 
-       ret = btrfs_find_free_objectid(root, &objectid);
+       ret = btrfs_get_free_objectid(root, &objectid);
        if (ret)
                return ret;
 
@@ -9332,9 +9536,9 @@ out_notrans:
        return ret;
 }
 
-static int btrfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
-                        struct inode *new_dir, struct dentry *new_dentry,
-                        unsigned int flags)
+static int btrfs_rename2(struct user_namespace *mnt_userns, struct inode *old_dir,
+                        struct dentry *old_dentry, struct inode *new_dir,
+                        struct dentry *new_dentry, unsigned int flags)
 {
        if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
                return -EINVAL;
@@ -9486,11 +9690,11 @@ int btrfs_start_delalloc_snapshot(struct btrfs_root *root)
        return start_delalloc_inodes(root, &wbc, true, false);
 }
 
-int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr,
+int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
                               bool in_reclaim_context)
 {
        struct writeback_control wbc = {
-               .nr_to_write = (nr == U64_MAX) ? LONG_MAX : (unsigned long)nr,
+               .nr_to_write = nr,
                .sync_mode = WB_SYNC_NONE,
                .range_start = 0,
                .range_end = LLONG_MAX,
@@ -9507,12 +9711,12 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr,
        mutex_lock(&fs_info->delalloc_root_mutex);
        spin_lock(&fs_info->delalloc_root_lock);
        list_splice_init(&fs_info->delalloc_roots, &splice);
-       while (!list_empty(&splice) && nr) {
+       while (!list_empty(&splice)) {
                /*
                 * Reset nr_to_write here so we know that we're doing a full
                 * flush.
                 */
-               if (nr == U64_MAX)
+               if (nr == LONG_MAX)
                        wbc.nr_to_write = LONG_MAX;
 
                root = list_first_entry(&splice, struct btrfs_root,
@@ -9542,8 +9746,8 @@ out:
        return ret;
 }
 
-static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
-                        const char *symname)
+static int btrfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+                        struct dentry *dentry, const char *symname)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
        struct btrfs_trans_handle *trans;
@@ -9575,7 +9779,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
        if (IS_ERR(trans))
                return PTR_ERR(trans);
 
-       err = btrfs_find_free_objectid(root, &objectid);
+       err = btrfs_get_free_objectid(root, &objectid);
        if (err)
                goto out_unlock;
 
@@ -9877,7 +10081,8 @@ static int btrfs_set_page_dirty(struct page *page)
        return __set_page_dirty_nobuffers(page);
 }
 
-static int btrfs_permission(struct inode *inode, int mask)
+static int btrfs_permission(struct user_namespace *mnt_userns,
+                           struct inode *inode, int mask)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        umode_t mode = inode->i_mode;
@@ -9889,10 +10094,11 @@ static int btrfs_permission(struct inode *inode, int mask)
                if (BTRFS_I(inode)->flags & BTRFS_INODE_READONLY)
                        return -EACCES;
        }
-       return generic_permission(inode, mask);
+       return generic_permission(&init_user_ns, inode, mask);
 }
 
-static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int btrfs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+                        struct dentry *dentry, umode_t mode)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
        struct btrfs_trans_handle *trans;
@@ -9909,7 +10115,7 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
        if (IS_ERR(trans))
                return PTR_ERR(trans);
 
-       ret = btrfs_find_free_objectid(root, &objectid);
+       ret = btrfs_get_free_objectid(root, &objectid);
        if (ret)
                goto out;
 
@@ -9992,6 +10198,7 @@ static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr,
        sp->ptr = ptr;
        sp->inode = inode;
        sp->is_block_group = is_block_group;
+       sp->bg_extent_count = 1;
 
        spin_lock(&fs_info->swapfile_pins_lock);
        p = &fs_info->swapfile_pins.rb_node;
@@ -10005,6 +10212,8 @@ static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr,
                           (sp->ptr == entry->ptr && sp->inode > entry->inode)) {
                        p = &(*p)->rb_right;
                } else {
+                       if (is_block_group)
+                               entry->bg_extent_count++;
                        spin_unlock(&fs_info->swapfile_pins_lock);
                        kfree(sp);
                        return 1;
@@ -10030,8 +10239,11 @@ static void btrfs_free_swapfile_pins(struct inode *inode)
                sp = rb_entry(node, struct btrfs_swapfile_pin, node);
                if (sp->inode == inode) {
                        rb_erase(&sp->node, &fs_info->swapfile_pins);
-                       if (sp->is_block_group)
+                       if (sp->is_block_group) {
+                               btrfs_dec_block_group_swap_extents(sp->ptr,
+                                                          sp->bg_extent_count);
                                btrfs_put_block_group(sp->ptr);
+                       }
                        kfree(sp);
                }
                node = next;
@@ -10092,7 +10304,8 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
                               sector_t *span)
 {
        struct inode *inode = file_inode(file);
-       struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct btrfs_fs_info *fs_info = root->fs_info;
        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        struct extent_state *cached_state = NULL;
        struct extent_map *em = NULL;
@@ -10143,13 +10356,27 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
           "cannot activate swapfile while exclusive operation is running");
                return -EBUSY;
        }
+
+       /*
+        * Prevent snapshot creation while we are activating the swap file.
+        * We do not want to race with snapshot creation. If snapshot creation
+        * already started before we bumped nr_swapfiles from 0 to 1 and
+        * completes before the first write into the swap file after it is
+        * activated, than that write would fallback to COW.
+        */
+       if (!btrfs_drew_try_write_lock(&root->snapshot_lock)) {
+               btrfs_exclop_finish(fs_info);
+               btrfs_warn(fs_info,
+          "cannot activate swapfile because snapshot creation is in progress");
+               return -EINVAL;
+       }
        /*
         * Snapshots can create extents which require COW even if NODATACOW is
         * set. We use this counter to prevent snapshots. We must increment it
         * before walking the extents because we don't want a concurrent
         * snapshot to run after we've already checked the extents.
         */
-       atomic_inc(&BTRFS_I(inode)->root->nr_swapfiles);
+       atomic_inc(&root->nr_swapfiles);
 
        isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
 
@@ -10246,6 +10473,17 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
                        goto out;
                }
 
+               if (!btrfs_inc_block_group_swap_extents(bg)) {
+                       btrfs_warn(fs_info,
+                          "block group for swapfile at %llu is read-only%s",
+                          bg->start,
+                          atomic_read(&fs_info->scrubs_running) ?
+                                      " (scrub running)" : "");
+                       btrfs_put_block_group(bg);
+                       ret = -EINVAL;
+                       goto out;
+               }
+
                ret = btrfs_add_swapfile_pin(inode, bg, true);
                if (ret) {
                        btrfs_put_block_group(bg);
@@ -10284,6 +10522,8 @@ out:
        if (ret)
                btrfs_swap_deactivate(file);
 
+       btrfs_drew_write_unlock(&root->snapshot_lock);
+
        btrfs_exclop_finish(fs_info);
 
        if (ret)