Merge tag 'folio-5.19' of git://git.infradead.org/users/willy/pagecache

[linux-2.6-microblaze.git] / fs / btrfs / extent_io.c
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c

index e7a6e87..8f6b544 100644 (file)
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -6,6 +6,7 @@
  #include <linux/mm.h>
  #include <linux/pagemap.h>
  #include <linux/page-flags.h>
+#include <linux/sched/mm.h>
  #include <linux/spinlock.h>
  #include <linux/blkdev.h>
  #include <linux/swap.h>
@@ -28,6 +29,7 @@
  #include "subpage.h"
  #include "zoned.h"
  #include "block-group.h"
+#include "compression.h"
  
  static struct kmem_cache *extent_state_cache;
  static struct kmem_cache *extent_buffer_cache;
@@ -75,6 +77,7 @@ void btrfs_extent_buffer_leak_debug_check(struct btrfs_fs_info *fs_info)
         if (!fs_info->allocated_ebs.next)
                 return;
  
+       WARN_ON(!list_empty(&fs_info->allocated_ebs));
         spin_lock_irqsave(&fs_info->eb_leak_lock, flags);
         while (!list_empty(&fs_info->allocated_ebs)) {
                 eb = list_first_entry(&fs_info->allocated_ebs,
@@ -135,6 +138,17 @@ struct tree_entry {
         struct rb_node rb_node;
  };
  
+/*
+ * Structure to record info about the bio being assembled, and other info like
+ * how many bytes are there before stripe/ordered extent boundary.
+ */
+struct btrfs_bio_ctrl {
+       struct bio *bio;
+       enum btrfs_compression_type compress_type;
+       u32 len_to_stripe_boundary;
+       u32 len_to_oe_boundary;
+};
+
  struct extent_page_data {
         struct btrfs_bio_ctrl bio_ctrl;
         /* tells writepage not to lock the state bits for this range
@@ -164,24 +178,27 @@ static int add_extent_changeset(struct extent_state *state, u32 bits,
         return ret;
  }
  
-int __must_check submit_one_bio(struct bio *bio, int mirror_num,
-                               unsigned long bio_flags)
+static void submit_one_bio(struct bio *bio, int mirror_num,
+                          enum btrfs_compression_type compress_type)
  {
-       blk_status_t ret = 0;
         struct extent_io_tree *tree = bio->bi_private;
  
         bio->bi_private = NULL;
  
         /* Caller should ensure the bio has at least some range added */
         ASSERT(bio->bi_iter.bi_size);
+
         if (is_data_inode(tree->private_data))
-               ret = btrfs_submit_data_bio(tree->private_data, bio, mirror_num,
-                                           bio_flags);
+               btrfs_submit_data_bio(tree->private_data, bio, mirror_num,
+                                           compress_type);
         else
-               ret = btrfs_submit_metadata_bio(tree->private_data, bio,
-                                               mirror_num, bio_flags);
-
-       return blk_status_to_errno(ret);
+               btrfs_submit_metadata_bio(tree->private_data, bio, mirror_num);
+       /*
+        * Above submission hooks will handle the error by ending the bio,
+        * which will do the cleanup properly.  So here we should not return
+        * any error, or the caller of submit_extent_page() will do cleanup
+        * again, causing problems.
+        */
  }
  
  /* Cleanup unsubmitted bios */
@@ -202,13 +219,12 @@ static void end_write_bio(struct extent_page_data *epd, int ret)
   * Return 0 if everything is OK.
   * Return <0 for error.
   */
-static int __must_check flush_write_bio(struct extent_page_data *epd)
+static void flush_write_bio(struct extent_page_data *epd)
  {
-       int ret = 0;
         struct bio *bio = epd->bio_ctrl.bio;
  
         if (bio) {
-               ret = submit_one_bio(bio, 0, 0);
+               submit_one_bio(bio, 0, 0);
                 /*
                  * Clean up of epd->bio is handled by its endio function.
                  * And endio is either triggered by successful bio execution
@@ -218,7 +234,6 @@ static int __must_check flush_write_bio(struct extent_page_data *epd)
                  */
                 epd->bio_ctrl.bio = NULL;
         }
-       return ret;
  }
  
  int __init extent_state_cache_init(void)
@@ -2303,12 +2318,13 @@ static int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
                              u64 length, u64 logical, struct page *page,
                              unsigned int pg_offset, int mirror_num)
  {
-       struct bio *bio;
         struct btrfs_device *dev;
+       struct bio_vec bvec;
+       struct bio bio;
         u64 map_length = 0;
         u64 sector;
         struct btrfs_io_context *bioc = NULL;
-       int ret;
+       int ret = 0;
  
         ASSERT(!(fs_info->sb->s_flags & SB_RDONLY));
         BUG_ON(!mirror_num);
@@ -2316,8 +2332,6 @@ static int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
         if (btrfs_repair_one_zone(fs_info, logical))
                 return 0;
  
-       bio = btrfs_bio_alloc(1);
-       bio->bi_iter.bi_size = 0;
         map_length = length;
  
         /*
@@ -2335,52 +2349,50 @@ static int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
                  */
                 ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
                                       &map_length, &bioc, 0);
-               if (ret) {
-                       btrfs_bio_counter_dec(fs_info);
-                       bio_put(bio);
-                       return -EIO;
-               }
+               if (ret)
+                       goto out_counter_dec;
                 ASSERT(bioc->mirror_num == 1);
         } else {
                 ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical,
                                       &map_length, &bioc, mirror_num);
-               if (ret) {
-                       btrfs_bio_counter_dec(fs_info);
-                       bio_put(bio);
-                       return -EIO;
-               }
+               if (ret)
+                       goto out_counter_dec;
                 BUG_ON(mirror_num != bioc->mirror_num);
         }
  
         sector = bioc->stripes[bioc->mirror_num - 1].physical >> 9;
-       bio->bi_iter.bi_sector = sector;
         dev = bioc->stripes[bioc->mirror_num - 1].dev;
         btrfs_put_bioc(bioc);
+
         if (!dev || !dev->bdev ||
             !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
-               btrfs_bio_counter_dec(fs_info);
-               bio_put(bio);
-               return -EIO;
+               ret = -EIO;
+               goto out_counter_dec;
         }
-       bio_set_dev(bio, dev->bdev);
-       bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
-       bio_add_page(bio, page, length, pg_offset);
  
-       if (btrfsic_submit_bio_wait(bio)) {
+       bio_init(&bio, dev->bdev, &bvec, 1, REQ_OP_WRITE | REQ_SYNC);
+       bio.bi_iter.bi_sector = sector;
+       __bio_add_page(&bio, page, length, pg_offset);
+
+       btrfsic_check_bio(&bio);
+       ret = submit_bio_wait(&bio);
+       if (ret) {
                 /* try to remap that extent elsewhere? */
-               btrfs_bio_counter_dec(fs_info);
-               bio_put(bio);
                 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
-               return -EIO;
+               goto out_bio_uninit;
         }
  
         btrfs_info_rl_in_rcu(fs_info,
                 "read error corrected: ino %llu off %llu (dev %s sector %llu)",
                                   ino, start,
                                   rcu_str_deref(dev->name), sector);
+       ret = 0;
+
+out_bio_uninit:
+       bio_uninit(&bio);
+out_counter_dec:
         btrfs_bio_counter_dec(fs_info);
-       bio_put(bio);
-       return 0;
+       return ret;
  }
  
  int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num)
@@ -2527,7 +2539,7 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
         failrec->start = start;
         failrec->len = sectorsize;
         failrec->this_mirror = 0;
-       failrec->bio_flags = 0;
+       failrec->compress_type = BTRFS_COMPRESS_NONE;
  
         read_lock(&em_tree->lock);
         em = lookup_extent_mapping(em_tree, start, failrec->len);
@@ -2551,8 +2563,7 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
         logical = em->block_start + logical;
         if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
                 logical = em->block_start;
-               failrec->bio_flags = EXTENT_BIO_COMPRESSED;
-               extent_set_compress_type(&failrec->bio_flags, em->compress_type);
+               failrec->compress_type = em->compress_type;
         }
  
         btrfs_debug(fs_info,
@@ -2684,7 +2695,7 @@ int btrfs_repair_one_sector(struct inode *inode,
          * will be handled by the endio on the repair_bio, so we can't return an
          * error here.
          */
-       submit_bio_hook(inode, repair_bio, failrec->this_mirror, failrec->bio_flags);
+       submit_bio_hook(inode, repair_bio, failrec->this_mirror, failrec->compress_type);
         return BLK_STS_OK;
  }
  
@@ -2710,18 +2721,19 @@ static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
                 btrfs_page_set_error(fs_info, page, start, len);
         }
  
-       if (fs_info->sectorsize == PAGE_SIZE)
+       if (!btrfs_is_subpage(fs_info, page))
                 unlock_page(page);
         else
                 btrfs_subpage_end_reader(fs_info, page, start, len);
  }
  
-static blk_status_t submit_read_repair(struct inode *inode,
-                                     struct bio *failed_bio, u32 bio_offset,
-                                     struct page *page, unsigned int pgoff,
-                                     u64 start, u64 end, int failed_mirror,
-                                     unsigned int error_bitmap,
-                                     submit_bio_hook_t *submit_bio_hook)
+static blk_status_t submit_data_read_repair(struct inode *inode,
+                                           struct bio *failed_bio,
+                                           u32 bio_offset, struct page *page,
+                                           unsigned int pgoff,
+                                           u64 start, u64 end,
+                                           int failed_mirror,
+                                           unsigned int error_bitmap)
  {
         struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
         const u32 sectorsize = fs_info->sectorsize;
@@ -2731,6 +2743,9 @@ static blk_status_t submit_read_repair(struct inode *inode,
  
         BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
  
+       /* This repair is only for data */
+       ASSERT(is_data_inode(inode));
+
         /* We're here because we had some read errors or csum mismatch */
         ASSERT(error_bitmap);
  
@@ -2759,7 +2774,7 @@ static blk_status_t submit_read_repair(struct inode *inode,
                 ret = btrfs_repair_one_sector(inode, failed_bio,
                                 bio_offset + offset,
                                 page, pgoff + offset, start + offset,
-                               failed_mirror, submit_bio_hook);
+                               failed_mirror, btrfs_submit_data_bio);
                 if (!ret) {
                         /*
                          * We have submitted the read repair, the page release
@@ -2943,7 +2958,7 @@ update:
  static void begin_page_read(struct btrfs_fs_info *fs_info, struct page *page)
  {
         ASSERT(PageLocked(page));
-       if (fs_info->sectorsize == PAGE_SIZE)
+       if (!btrfs_is_subpage(fs_info, page))
                 return;
  
         ASSERT(PagePrivate(page));
@@ -2951,7 +2966,7 @@ static void begin_page_read(struct btrfs_fs_info *fs_info, struct page *page)
  }
  
  /*
- * Find extent buffer for a givne bytenr.
+ * Find extent buffer for a given bytenr.
   *
   * This is for end_bio_extent_readpage(), thus we can't do any unsafe locking
   * in endio context.
@@ -2965,16 +2980,14 @@ static struct extent_buffer *find_extent_buffer_readpage(
          * For regular sectorsize, we can use page->private to grab extent
          * buffer
          */
-       if (fs_info->sectorsize == PAGE_SIZE) {
+       if (fs_info->nodesize >= PAGE_SIZE) {
                 ASSERT(PagePrivate(page) && page->private);
                 return (struct extent_buffer *)page->private;
         }
  
-       /* For subpage case, we need to lookup buffer radix tree */
-       rcu_read_lock();
-       eb = radix_tree_lookup(&fs_info->buffer_radix,
-                              bytenr >> fs_info->sectorsize_bits);
-       rcu_read_unlock();
+       /* For subpage case, we need to lookup extent buffer xarray */
+       eb = xa_load(&fs_info->extent_buffers,
+                    bytenr >> fs_info->sectorsize_bits);
         ASSERT(eb);
         return eb;
  }
@@ -3077,13 +3090,13 @@ static void end_bio_extent_readpage(struct bio *bio)
                                 goto readpage_ok;
  
                         /*
-                        * btrfs_submit_read_repair() will handle all the good
+                        * submit_data_read_repair() will handle all the good
                          * and bad sectors, we just continue to the next bvec.
                          */
-                       submit_read_repair(inode, bio, bio_offset, page,
-                                          start - page_offset(page), start,
-                                          end, mirror, error_bitmap,
-                                          btrfs_submit_data_bio);
+                       submit_data_read_repair(inode, bio, bio_offset, page,
+                                               start - page_offset(page),
+                                               start, end, mirror,
+                                               error_bitmap);
  
                         ASSERT(bio_offset + len > bio_offset);
                         bio_offset += len;
@@ -3132,6 +3145,42 @@ readpage_ok:
         bio_put(bio);
  }
  
+/**
+ * Populate every free slot in a provided array with pages.
+ *
+ * @nr_pages:   number of pages to allocate
+ * @page_array: the array to fill with pages; any existing non-null entries in
+ *             the array will be skipped
+ *
+ * Return: 0        if all pages were able to be allocated;
+ *         -ENOMEM  otherwise, and the caller is responsible for freeing all
+ *                  non-null page pointers in the array.
+ */
+int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array)
+{
+       unsigned int allocated;
+
+       for (allocated = 0; allocated < nr_pages;) {
+               unsigned int last = allocated;
+
+               allocated = alloc_pages_bulk_array(GFP_NOFS, nr_pages, page_array);
+
+               if (allocated == nr_pages)
+                       return 0;
+
+               /*
+                * During this iteration, no page could be allocated, even
+                * though alloc_pages_bulk_array() falls back to alloc_page()
+                * if  it could not bulk-allocate. So we must be out of memory.
+                */
+               if (allocated == last)
+                       return -ENOMEM;
+
+               memalloc_retry_wait(GFP_NOFS);
+       }
+       return 0;
+}
+
  /*
   * Initialize the members up to but not including 'bio'. Use after allocating a
   * new bio by bio_alloc_bioset as it does not initialize the bytes outside of
@@ -3157,13 +3206,13 @@ struct bio *btrfs_bio_alloc(unsigned int nr_iovecs)
         return bio;
  }
  
-struct bio *btrfs_bio_clone(struct bio *bio)
+struct bio *btrfs_bio_clone(struct block_device *bdev, struct bio *bio)
  {
         struct btrfs_bio *bbio;
         struct bio *new;
  
         /* Bio allocation backed by a bioset does not fail */
-       new = bio_alloc_clone(bio->bi_bdev, bio, GFP_NOFS, &btrfs_bioset);
+       new = bio_alloc_clone(bdev, bio, GFP_NOFS, &btrfs_bioset);
         bbio = btrfs_bio(new);
         btrfs_bio_init(bbio);
         bbio->iter = bio->bi_iter;
@@ -3198,7 +3247,7 @@ struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size)
   *                a contiguous page to the previous one
   * @size:      portion of page that we want to write
   * @pg_offset: starting offset in the page
- * @bio_flags: flags of the current bio to see if we can merge them
+ * @compress_type:   compression type of the current bio to see if we can merge them
   *
   * Attempt to add a page to bio considering stripe alignment etc.
   *
@@ -3210,7 +3259,7 @@ static int btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl,
                               struct page *page,
                               u64 disk_bytenr, unsigned int size,
                               unsigned int pg_offset,
-                             unsigned long bio_flags)
+                             enum btrfs_compression_type compress_type)
  {
         struct bio *bio = bio_ctrl->bio;
         u32 bio_size = bio->bi_iter.bi_size;
@@ -3222,10 +3271,10 @@ static int btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl,
         ASSERT(bio);
         /* The limit should be calculated when bio_ctrl->bio is allocated */
         ASSERT(bio_ctrl->len_to_oe_boundary && bio_ctrl->len_to_stripe_boundary);
-       if (bio_ctrl->bio_flags != bio_flags)
+       if (bio_ctrl->compress_type != compress_type)
                 return 0;
  
-       if (bio_ctrl->bio_flags & EXTENT_BIO_COMPRESSED)
+       if (bio_ctrl->compress_type != BTRFS_COMPRESS_NONE)
                 contig = bio->bi_iter.bi_sector == sector;
         else
                 contig = bio_end_sector(bio) == sector;
@@ -3268,7 +3317,7 @@ static int calc_bio_boundaries(struct btrfs_bio_ctrl *bio_ctrl,
          * The split happens for real compressed bio, which happens in
          * btrfs_submit_compressed_read/write().
          */
-       if (bio_ctrl->bio_flags & EXTENT_BIO_COMPRESSED) {
+       if (bio_ctrl->compress_type != BTRFS_COMPRESS_NONE) {
                 bio_ctrl->len_to_oe_boundary = U32_MAX;
                 bio_ctrl->len_to_stripe_boundary = U32_MAX;
                 return 0;
@@ -3311,7 +3360,7 @@ static int alloc_new_bio(struct btrfs_inode *inode,
                          unsigned int opf,
                          bio_end_io_t end_io_func,
                          u64 disk_bytenr, u32 offset, u64 file_offset,
-                        unsigned long bio_flags)
+                        enum btrfs_compression_type compress_type)
  {
         struct btrfs_fs_info *fs_info = inode->root->fs_info;
         struct bio *bio;
@@ -3322,12 +3371,12 @@ static int alloc_new_bio(struct btrfs_inode *inode,
          * For compressed page range, its disk_bytenr is always @disk_bytenr
          * passed in, no matter if we have added any range into previous bio.
          */
-       if (bio_flags & EXTENT_BIO_COMPRESSED)
+       if (compress_type != BTRFS_COMPRESS_NONE)
                 bio->bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
         else
                 bio->bi_iter.bi_sector = (disk_bytenr + offset) >> SECTOR_SHIFT;
         bio_ctrl->bio = bio;
-       bio_ctrl->bio_flags = bio_flags;
+       bio_ctrl->compress_type = compress_type;
         bio->bi_end_io = end_io_func;
         bio->bi_private = &inode->io_tree;
         bio->bi_opf = opf;
@@ -3386,7 +3435,7 @@ error:
   * @end_io_func:     end_io callback for new bio
   * @mirror_num:             desired mirror to read/write
   * @prev_bio_flags:  flags of previous bio to see if we can merge the current one
- * @bio_flags: flags of the current bio to see if we can merge them
+ * @compress_type:   compress type for current bio
   */
  static int submit_extent_page(unsigned int opf,
                               struct writeback_control *wbc,
@@ -3395,7 +3444,7 @@ static int submit_extent_page(unsigned int opf,
                               size_t size, unsigned long pg_offset,
                               bio_end_io_t end_io_func,
                               int mirror_num,
-                             unsigned long bio_flags,
+                             enum btrfs_compression_type compress_type,
                               bool force_bio_submit)
  {
         int ret = 0;
@@ -3407,10 +3456,8 @@ static int submit_extent_page(unsigned int opf,
         ASSERT(pg_offset < PAGE_SIZE && size <= PAGE_SIZE &&
                pg_offset + size <= PAGE_SIZE);
         if (force_bio_submit && bio_ctrl->bio) {
-               ret = submit_one_bio(bio_ctrl->bio, mirror_num, bio_ctrl->bio_flags);
+               submit_one_bio(bio_ctrl->bio, mirror_num, bio_ctrl->compress_type);
                 bio_ctrl->bio = NULL;
-               if (ret < 0)
-                       return ret;
         }
  
         while (cur < pg_offset + size) {
@@ -3422,7 +3469,7 @@ static int submit_extent_page(unsigned int opf,
                         ret = alloc_new_bio(inode, bio_ctrl, wbc, opf,
                                             end_io_func, disk_bytenr, offset,
                                             page_offset(page) + cur,
-                                           bio_flags);
+                                           compress_type);
                         if (ret < 0)
                                 return ret;
                 }
@@ -3430,14 +3477,14 @@ static int submit_extent_page(unsigned int opf,
                  * We must go through btrfs_bio_add_page() to ensure each
                  * page range won't cross various boundaries.
                  */
-               if (bio_flags & EXTENT_BIO_COMPRESSED)
+               if (compress_type != BTRFS_COMPRESS_NONE)
                         added = btrfs_bio_add_page(bio_ctrl, page, disk_bytenr,
                                         size - offset, pg_offset + offset,
-                                       bio_flags);
+                                       compress_type);
                 else
                         added = btrfs_bio_add_page(bio_ctrl, page,
                                         disk_bytenr + offset, size - offset,
-                                       pg_offset + offset, bio_flags);
+                                       pg_offset + offset, compress_type);
  
                 /* Metadata page range should never be split */
                 if (!is_data_inode(&inode->vfs_inode))
@@ -3451,11 +3498,8 @@ static int submit_extent_page(unsigned int opf,
                 if (added < size - offset) {
                         /* The bio should contain some page(s) */
                         ASSERT(bio_ctrl->bio->bi_iter.bi_size);
-                       ret = submit_one_bio(bio_ctrl->bio, mirror_num,
-                                       bio_ctrl->bio_flags);
+                       submit_one_bio(bio_ctrl->bio, mirror_num, bio_ctrl->compress_type);
                         bio_ctrl->bio = NULL;
-                       if (ret < 0)
-                               return ret;
                 }
                 cur += added;
         }
@@ -3478,7 +3522,7 @@ static int attach_extent_buffer_page(struct extent_buffer *eb,
         if (page->mapping)
                 lockdep_assert_held(&page->mapping->private_lock);
  
-       if (fs_info->sectorsize == PAGE_SIZE) {
+       if (fs_info->nodesize >= PAGE_SIZE) {
                 if (!PagePrivate(page))
                         attach_page_private(page, eb);
                 else
@@ -3513,7 +3557,7 @@ int set_page_extent_mapped(struct page *page)
  
         fs_info = btrfs_sb(page->mapping->host->i_sb);
  
-       if (fs_info->sectorsize < PAGE_SIZE)
+       if (btrfs_is_subpage(fs_info, page))
                 return btrfs_attach_subpage(fs_info, page, BTRFS_SUBPAGE_DATA);
  
         attach_page_private(page, (void *)EXTENT_PAGE_PRIVATE);
@@ -3530,7 +3574,7 @@ void clear_page_extent_mapped(struct page *page)
                 return;
  
         fs_info = btrfs_sb(page->mapping->host->i_sb);
-       if (fs_info->sectorsize < PAGE_SIZE)
+       if (btrfs_is_subpage(fs_info, page))
                 return btrfs_detach_subpage(fs_info, page);
  
         detach_page_private(page);
@@ -3569,7 +3613,7 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
   * XXX JDM: This needs looking at to ensure proper page locking
   * return 0 on success, otherwise return error
   */
-int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
+static int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
                       struct btrfs_bio_ctrl *bio_ctrl,
                       unsigned int read_flags, u64 *prev_em_start)
  {
@@ -3638,16 +3682,13 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
                 BUG_ON(extent_map_end(em) <= cur);
                 BUG_ON(end < cur);
  
-               if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
-                       this_bio_flag |= EXTENT_BIO_COMPRESSED;
-                       extent_set_compress_type(&this_bio_flag,
-                                                em->compress_type);
-               }
+               if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
+                       this_bio_flag = em->compress_type;
  
                 iosize = min(extent_map_end(em) - cur, end - cur + 1);
                 cur_end = min(extent_map_end(em) - 1, end);
                 iosize = ALIGN(iosize, blocksize);
-               if (this_bio_flag & EXTENT_BIO_COMPRESSED)
+               if (this_bio_flag != BTRFS_COMPRESS_NONE)
                         disk_bytenr = em->block_start;
                 else
                         disk_bytenr = em->block_start + extent_offset;
@@ -3743,8 +3784,12 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
                                          this_bio_flag,
                                          force_bio_submit);
                 if (ret) {
-                       unlock_extent(tree, cur, cur + iosize - 1);
-                       end_page_read(page, false, cur, iosize);
+                       /*
+                        * We have to unlock the remaining range, or the page
+                        * will never be unlocked.
+                        */
+                       unlock_extent(tree, cur, end);
+                       end_page_read(page, false, cur, end + 1 - cur);
                         goto out;
                 }
                 cur = cur + iosize;
@@ -3754,6 +3799,27 @@ out:
         return ret;
  }
  
+int btrfs_read_folio(struct file *file, struct folio *folio)
+{
+       struct page *page = &folio->page;
+       struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
+       u64 start = page_offset(page);
+       u64 end = start + PAGE_SIZE - 1;
+       struct btrfs_bio_ctrl bio_ctrl = { 0 };
+       int ret;
+
+       btrfs_lock_and_flush_ordered_range(inode, start, end, NULL);
+
+       ret = btrfs_do_readpage(page, NULL, &bio_ctrl, 0, NULL);
+       /*
+        * If btrfs_do_readpage() failed we will want to submit the assembled
+        * bio to do the cleanup.
+        */
+       if (bio_ctrl.bio)
+               submit_one_bio(bio_ctrl.bio, 0, bio_ctrl.compress_type);
+       return ret;
+}
+
  static inline void contiguous_readpages(struct page *pages[], int nr_pages,
                                         u64 start, u64 end,
                                         struct extent_map **em_cached,
@@ -3772,12 +3838,6 @@ static inline void contiguous_readpages(struct page *pages[], int nr_pages,
         }
  }
  
-static void update_nr_written(struct writeback_control *wbc,
-                             unsigned long nr_written)
-{
-       wbc->nr_to_write -= nr_written;
-}
-
  /*
   * helper for __extent_writepage, doing all of the delayed allocation setup.
   *
@@ -3877,7 +3937,7 @@ static void find_next_dirty_byte(struct btrfs_fs_info *fs_info,
          * For regular sector size == page size case, since one page only
          * contains one sector, we return the page offset directly.
          */
-       if (fs_info->sectorsize == PAGE_SIZE) {
+       if (!btrfs_is_subpage(fs_info, page)) {
                 *start = page_offset(page);
                 *end = page_offset(page) + PAGE_SIZE;
                 return;
@@ -3920,10 +3980,12 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
         u64 extent_offset;
         u64 block_start;
         struct extent_map *em;
+       int saved_ret = 0;
         int ret = 0;
         int nr = 0;
         u32 opf = REQ_OP_WRITE;
         const unsigned int write_flags = wbc_to_write_flags(wbc);
+       bool has_error = false;
         bool compressed;
  
         ret = btrfs_writepage_cow_fixup(page);
@@ -3938,7 +4000,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
          * we don't want to touch the inode after unlocking the page,
          * so we update the mapping writeback index now
          */
-       update_nr_written(wbc, 1);
+       wbc->nr_to_write--;
  
         while (cur <= end) {
                 u64 disk_bytenr;
@@ -3973,6 +4035,9 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
                 if (IS_ERR(em)) {
                         btrfs_page_set_error(fs_info, page, cur, end - cur + 1);
                         ret = PTR_ERR_OR_ZERO(em);
+                       has_error = true;
+                       if (!saved_ret)
+                               saved_ret = ret;
                         break;
                 }
  
@@ -4036,6 +4101,10 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
                                          end_bio_extent_writepage,
                                          0, 0, false);
                 if (ret) {
+                       has_error = true;
+                       if (!saved_ret)
+                               saved_ret = ret;
+
                         btrfs_page_set_error(fs_info, page, cur, iosize);
                         if (PageWriteback(page))
                                 btrfs_page_clear_writeback(fs_info, page, cur,
@@ -4049,8 +4118,10 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
          * If we finish without problem, we should not only clear page dirty,
          * but also empty subpage dirty bits
          */
-       if (!ret)
+       if (!has_error)
                 btrfs_page_assert_not_dirty(fs_info, page);
+       else
+               ret = saved_ret;
         *nr_ret = nr;
         return ret;
  }
@@ -4181,9 +4252,6 @@ void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
  
  static void end_extent_buffer_writeback(struct extent_buffer *eb)
  {
-       if (test_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags))
-               btrfs_zone_finish_endio(eb->fs_info, eb->start, eb->len);
-
         clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
         smp_mb__after_atomic();
         wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
@@ -4203,14 +4271,12 @@ static noinline_for_stack int lock_extent_buffer_for_io(struct extent_buffer *eb
                           struct extent_page_data *epd)
  {
         struct btrfs_fs_info *fs_info = eb->fs_info;
-       int i, num_pages, failed_page_nr;
+       int i, num_pages;
         int flush = 0;
         int ret = 0;
  
         if (!btrfs_try_tree_write_lock(eb)) {
-               ret = flush_write_bio(epd);
-               if (ret < 0)
-                       return ret;
+               flush_write_bio(epd);
                 flush = 1;
                 btrfs_tree_lock(eb);
         }
@@ -4220,9 +4286,7 @@ static noinline_for_stack int lock_extent_buffer_for_io(struct extent_buffer *eb
                 if (!epd->sync_io)
                         return 0;
                 if (!flush) {
-                       ret = flush_write_bio(epd);
-                       if (ret < 0)
-                               return ret;
+                       flush_write_bio(epd);
                         flush = 1;
                 }
                 while (1) {
@@ -4260,7 +4324,7 @@ static noinline_for_stack int lock_extent_buffer_for_io(struct extent_buffer *eb
          * Subpage metadata doesn't use page locking at all, so we can skip
          * the page locking.
          */
-       if (!ret || fs_info->sectorsize < PAGE_SIZE)
+       if (!ret || fs_info->nodesize < PAGE_SIZE)
                 return ret;
  
         num_pages = num_extent_pages(eb);
@@ -4269,39 +4333,13 @@ static noinline_for_stack int lock_extent_buffer_for_io(struct extent_buffer *eb
  
                 if (!trylock_page(p)) {
                         if (!flush) {
-                               int err;
-
-                               err = flush_write_bio(epd);
-                               if (err < 0) {
-                                       ret = err;
-                                       failed_page_nr = i;
-                                       goto err_unlock;
-                               }
+                               flush_write_bio(epd);
                                 flush = 1;
                         }
                         lock_page(p);
                 }
         }
  
-       return ret;
-err_unlock:
-       /* Unlock already locked pages */
-       for (i = 0; i < failed_page_nr; i++)
-               unlock_page(eb->pages[i]);
-       /*
-        * Clear EXTENT_BUFFER_WRITEBACK and wake up anyone waiting on it.
-        * Also set back EXTENT_BUFFER_DIRTY so future attempts to this eb can
-        * be made and undo everything done before.
-        */
-       btrfs_tree_lock(eb);
-       spin_lock(&eb->refs_lock);
-       set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
-       end_extent_buffer_writeback(eb);
-       spin_unlock(&eb->refs_lock);
-       percpu_counter_add_batch(&fs_info->dirty_metadata_bytes, eb->len,
-                                fs_info->dirty_metadata_batch);
-       btrfs_clear_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
-       btrfs_tree_unlock(eb);
         return ret;
  }
  
@@ -4397,8 +4435,8 @@ static struct extent_buffer *find_extent_buffer_nolock(
         struct extent_buffer *eb;
  
         rcu_read_lock();
-       eb = radix_tree_lookup(&fs_info->buffer_radix,
-                              start >> fs_info->sectorsize_bits);
+       eb = xa_load(&fs_info->extent_buffers,
+                    start >> fs_info->sectorsize_bits);
         if (eb && atomic_inc_not_zero(&eb->refs)) {
                 rcu_read_unlock();
                 return eb;
@@ -4420,7 +4458,7 @@ static void end_bio_subpage_eb_writepage(struct bio *bio)
         struct bvec_iter_all iter_all;
  
         fs_info = btrfs_sb(bio_first_page_all(bio)->mapping->host->i_sb);
-       ASSERT(fs_info->sectorsize < PAGE_SIZE);
+       ASSERT(fs_info->nodesize < PAGE_SIZE);
  
         ASSERT(!bio_flagged(bio, BIO_CLONED));
         bio_for_each_segment_all(bvec, bio, iter_all) {
@@ -4572,7 +4610,7 @@ static int write_one_subpage_eb(struct extent_buffer *eb,
          * dirty anymore, we have submitted a page.  Update nr_written in wbc.
          */
         if (no_dirty_ebs)
-               update_nr_written(wbc, 1);
+               wbc->nr_to_write--;
         return ret;
  }
  
@@ -4608,7 +4646,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
                         break;
                 }
                 disk_bytenr += PAGE_SIZE;
-               update_nr_written(wbc, 1);
+               wbc->nr_to_write--;
                 unlock_page(p);
         }
  
@@ -4747,7 +4785,7 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc,
         if (!PagePrivate(page))
                 return 0;
  
-       if (btrfs_sb(page->mapping->host->i_sb)->sectorsize < PAGE_SIZE)
+       if (btrfs_sb(page->mapping->host->i_sb)->nodesize < PAGE_SIZE)
                 return submit_eb_subpage(page, wbc, epd);
  
         spin_lock(&mapping->private_lock);
@@ -4803,8 +4841,7 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc,
                 /*
                  * Implies write in zoned mode. Mark the last eb in a block group.
                  */
-               if (cache->seq_zone && eb->start + eb->len == cache->zone_capacity)
-                       set_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags);
+               btrfs_schedule_zone_finish_bg(cache, eb);
                 btrfs_put_block_group(cache);
         }
         ret = write_one_eb(eb, wbc, epd);
@@ -4923,13 +4960,19 @@ retry:
          *   if the fs already has error.
          */
         if (!BTRFS_FS_ERROR(fs_info)) {
-               ret = flush_write_bio(&epd);
+               flush_write_bio(&epd);
         } else {
                 ret = -EROFS;
                 end_write_bio(&epd, ret);
         }
  out:
         btrfs_zoned_meta_io_unlock(fs_info);
+       /*
+        * We can get ret > 0 from submit_extent_page() indicating how many ebs
+        * were submitted. Reset it to 0 to avoid false alerts for the caller.
+        */
+       if (ret > 0)
+               ret = 0;
         return ret;
  }
  
@@ -5031,8 +5074,7 @@ retry:
                          * tmpfs file mapping
                          */
                         if (!trylock_page(page)) {
-                               ret = flush_write_bio(epd);
-                               BUG_ON(ret < 0);
+                               flush_write_bio(epd);
                                 lock_page(page);
                         }
  
@@ -5042,10 +5084,8 @@ retry:
                         }
  
                         if (wbc->sync_mode != WB_SYNC_NONE) {
-                               if (PageWriteback(page)) {
-                                       ret = flush_write_bio(epd);
-                                       BUG_ON(ret < 0);
-                               }
+                               if (PageWriteback(page))
+                                       flush_write_bio(epd);
                                 wait_on_page_writeback(page);
                         }
  
@@ -5085,9 +5125,8 @@ retry:
                  * page in our current bio, and thus deadlock, so flush the
                  * write bio here.
                  */
-               ret = flush_write_bio(epd);
-               if (!ret)
-                       goto retry;
+               flush_write_bio(epd);
+               goto retry;
         }
  
         if (wbc->range_cyclic || (wbc->nr_to_write > 0 && range_whole))
@@ -5113,8 +5152,7 @@ int extent_write_full_page(struct page *page, struct writeback_control *wbc)
                 return ret;
         }
  
-       ret = flush_write_bio(&epd);
-       ASSERT(ret <= 0);
+       flush_write_bio(&epd);
         return ret;
  }
  
@@ -5176,7 +5214,7 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end)
         }
  
         if (!found_error)
-               ret = flush_write_bio(&epd);
+               flush_write_bio(&epd);
         else
                 end_write_bio(&epd, ret);
  
@@ -5209,7 +5247,7 @@ int extent_writepages(struct address_space *mapping,
                 end_write_bio(&epd, ret);
                 return ret;
         }
-       ret = flush_write_bio(&epd);
+       flush_write_bio(&epd);
         return ret;
  }
  
@@ -5232,10 +5270,8 @@ void extent_readahead(struct readahead_control *rac)
         if (em_cached)
                 free_extent_map(em_cached);
  
-       if (bio_ctrl.bio) {
-               if (submit_one_bio(bio_ctrl.bio, 0, bio_ctrl.bio_flags))
-                       return;
-       }
+       if (bio_ctrl.bio)
+               submit_one_bio(bio_ctrl.bio, 0, bio_ctrl.compress_type);
  }
  
  /*
@@ -5804,7 +5840,7 @@ static void detach_extent_buffer_page(struct extent_buffer *eb, struct page *pag
                 return;
         }
  
-       if (fs_info->sectorsize == PAGE_SIZE) {
+       if (fs_info->nodesize >= PAGE_SIZE) {
                 /*
                  * We do this since we'll remove the pages after we've
                  * removed the eb from the radix tree, so we could race
@@ -5911,9 +5947,9 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
  struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src)
  {
         int i;
-       struct page *p;
         struct extent_buffer *new;
         int num_pages = num_extent_pages(src);
+       int ret;
  
         new = __alloc_extent_buffer(src->fs_info, src->start, src->len);
         if (new == NULL)
@@ -5926,22 +5962,23 @@ struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src)
          */
         set_bit(EXTENT_BUFFER_UNMAPPED, &new->bflags);
  
+       memset(new->pages, 0, sizeof(*new->pages) * num_pages);
+       ret = btrfs_alloc_page_array(num_pages, new->pages);
+       if (ret) {
+               btrfs_release_extent_buffer(new);
+               return NULL;
+       }
+
         for (i = 0; i < num_pages; i++) {
                 int ret;
+               struct page *p = new->pages[i];
  
-               p = alloc_page(GFP_NOFS);
-               if (!p) {
-                       btrfs_release_extent_buffer(new);
-                       return NULL;
-               }
                 ret = attach_extent_buffer_page(new, p, NULL);
                 if (ret < 0) {
-                       put_page(p);
                         btrfs_release_extent_buffer(new);
                         return NULL;
                 }
                 WARN_ON(PageDirty(p));
-               new->pages[i] = p;
                 copy_page(page_address(p), page_address(src->pages[i]));
         }
         set_extent_buffer_uptodate(new);
@@ -5955,31 +5992,36 @@ struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
         struct extent_buffer *eb;
         int num_pages;
         int i;
+       int ret;
  
         eb = __alloc_extent_buffer(fs_info, start, len);
         if (!eb)
                 return NULL;
  
         num_pages = num_extent_pages(eb);
+       ret = btrfs_alloc_page_array(num_pages, eb->pages);
+       if (ret)
+               goto err;
+
         for (i = 0; i < num_pages; i++) {
-               int ret;
+               struct page *p = eb->pages[i];
  
-               eb->pages[i] = alloc_page(GFP_NOFS);
-               if (!eb->pages[i])
-                       goto err;
-               ret = attach_extent_buffer_page(eb, eb->pages[i], NULL);
+               ret = attach_extent_buffer_page(eb, p, NULL);
                 if (ret < 0)
                         goto err;
         }
+
         set_extent_buffer_uptodate(eb);
         btrfs_set_header_nritems(eb, 0);
         set_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
  
         return eb;
  err:
-       for (; i > 0; i--) {
-               detach_extent_buffer_page(eb, eb->pages[i - 1]);
-               __free_page(eb->pages[i - 1]);
+       for (i = 0; i < num_pages; i++) {
+               if (eb->pages[i]) {
+                       detach_extent_buffer_page(eb, eb->pages[i]);
+                       __free_page(eb->pages[i]);
+               }
         }
         __free_extent_buffer(eb);
         return NULL;
@@ -6086,24 +6128,22 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
         if (!eb)
                 return ERR_PTR(-ENOMEM);
         eb->fs_info = fs_info;
-again:
-       ret = radix_tree_preload(GFP_NOFS);
-       if (ret) {
-               exists = ERR_PTR(ret);
-               goto free_eb;
-       }
-       spin_lock(&fs_info->buffer_lock);
-       ret = radix_tree_insert(&fs_info->buffer_radix,
-                               start >> fs_info->sectorsize_bits, eb);
-       spin_unlock(&fs_info->buffer_lock);
-       radix_tree_preload_end();
-       if (ret == -EEXIST) {
-               exists = find_extent_buffer(fs_info, start);
-               if (exists)
+
+       do {
+               ret = xa_insert(&fs_info->extent_buffers,
+                               start >> fs_info->sectorsize_bits,
+                               eb, GFP_NOFS);
+               if (ret == -ENOMEM) {
+                       exists = ERR_PTR(ret);
                         goto free_eb;
-               else
-                       goto again;
-       }
+               }
+               if (ret == -EBUSY) {
+                       exists = find_extent_buffer(fs_info, start);
+                       if (exists)
+                               goto free_eb;
+               }
+       } while (ret);
+
         check_buffer_tree_ref(eb);
         set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
  
@@ -6124,7 +6164,7 @@ static struct extent_buffer *grab_extent_buffer(
          * don't try to insert two ebs for the same bytenr.  So here we always
          * return NULL and just continue.
          */
-       if (fs_info->sectorsize < PAGE_SIZE)
+       if (fs_info->nodesize < PAGE_SIZE)
                 return NULL;
  
         /* Page not yet attached to an extent buffer */
@@ -6146,6 +6186,30 @@ static struct extent_buffer *grab_extent_buffer(
         return NULL;
  }
  
+static int check_eb_alignment(struct btrfs_fs_info *fs_info, u64 start)
+{
+       if (!IS_ALIGNED(start, fs_info->sectorsize)) {
+               btrfs_err(fs_info, "bad tree block start %llu", start);
+               return -EINVAL;
+       }
+
+       if (fs_info->nodesize < PAGE_SIZE &&
+           offset_in_page(start) + fs_info->nodesize > PAGE_SIZE) {
+               btrfs_err(fs_info,
+               "tree block crosses page boundary, start %llu nodesize %u",
+                         start, fs_info->nodesize);
+               return -EINVAL;
+       }
+       if (fs_info->nodesize >= PAGE_SIZE &&
+           !IS_ALIGNED(start, PAGE_SIZE)) {
+               btrfs_err(fs_info,
+               "tree block is not page aligned, start %llu nodesize %u",
+                         start, fs_info->nodesize);
+               return -EINVAL;
+       }
+       return 0;
+}
+
  struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
                                           u64 start, u64 owner_root, int level)
  {
@@ -6160,10 +6224,8 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
         int uptodate = 1;
         int ret;
  
-       if (!IS_ALIGNED(start, fs_info->sectorsize)) {
-               btrfs_err(fs_info, "bad tree block start %llu", start);
+       if (check_eb_alignment(fs_info, start))
                 return ERR_PTR(-EINVAL);
-       }
  
  #if BITS_PER_LONG == 32
         if (start >= MAX_LFS_FILESIZE) {
@@ -6176,14 +6238,6 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
                 btrfs_warn_32bit_limit(fs_info);
  #endif
  
-       if (fs_info->sectorsize < PAGE_SIZE &&
-           offset_in_page(start) + len > PAGE_SIZE) {
-               btrfs_err(fs_info,
-               "tree block crosses page boundary, start %llu nodesize %lu",
-                         start, len);
-               return ERR_PTR(-EINVAL);
-       }
-
         eb = find_extent_buffer(fs_info, start);
         if (eb)
                 return eb;
@@ -6213,7 +6267,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
                  * page, but it may change in the future for 16K page size
                  * support, so we still preallocate the memory in the loop.
                  */
-               if (fs_info->sectorsize < PAGE_SIZE) {
+               if (fs_info->nodesize < PAGE_SIZE) {
                         prealloc = btrfs_alloc_subpage(fs_info, BTRFS_SUBPAGE_METADATA);
                         if (IS_ERR(prealloc)) {
                                 ret = PTR_ERR(prealloc);
@@ -6264,25 +6318,22 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
         }
         if (uptodate)
                 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
-again:
-       ret = radix_tree_preload(GFP_NOFS);
-       if (ret) {
-               exists = ERR_PTR(ret);
-               goto free_eb;
-       }
-
-       spin_lock(&fs_info->buffer_lock);
-       ret = radix_tree_insert(&fs_info->buffer_radix,
-                               start >> fs_info->sectorsize_bits, eb);
-       spin_unlock(&fs_info->buffer_lock);
-       radix_tree_preload_end();
-       if (ret == -EEXIST) {
-               exists = find_extent_buffer(fs_info, start);
-               if (exists)
+
+       do {
+               ret = xa_insert(&fs_info->extent_buffers,
+                               start >> fs_info->sectorsize_bits,
+                               eb, GFP_NOFS);
+               if (ret == -ENOMEM) {
+                       exists = ERR_PTR(ret);
                         goto free_eb;
-               else
-                       goto again;
-       }
+               }
+               if (ret == -EBUSY) {
+                       exists = find_extent_buffer(fs_info, start);
+                       if (exists)
+                               goto free_eb;
+               }
+       } while (ret);
+
         /* add one reference for the tree */
         check_buffer_tree_ref(eb);
         set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
@@ -6327,10 +6378,8 @@ static int release_extent_buffer(struct extent_buffer *eb)
  
                         spin_unlock(&eb->refs_lock);
  
-                       spin_lock(&fs_info->buffer_lock);
-                       radix_tree_delete(&fs_info->buffer_radix,
-                                         eb->start >> fs_info->sectorsize_bits);
-                       spin_unlock(&fs_info->buffer_lock);
+                       xa_erase(&fs_info->extent_buffers,
+                                eb->start >> fs_info->sectorsize_bits);
                 } else {
                         spin_unlock(&eb->refs_lock);
                 }
@@ -6432,7 +6481,7 @@ void clear_extent_buffer_dirty(const struct extent_buffer *eb)
         int num_pages;
         struct page *page;
  
-       if (eb->fs_info->sectorsize < PAGE_SIZE)
+       if (eb->fs_info->nodesize < PAGE_SIZE)
                 return clear_subpage_extent_buffer_dirty(eb);
  
         num_pages = num_extent_pages(eb);
@@ -6464,7 +6513,7 @@ bool set_extent_buffer_dirty(struct extent_buffer *eb)
         WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
  
         if (!was_dirty) {
-               bool subpage = eb->fs_info->sectorsize < PAGE_SIZE;
+               bool subpage = eb->fs_info->nodesize < PAGE_SIZE;
  
                 /*
                  * For subpage case, we can have other extent buffers in the
@@ -6504,9 +6553,18 @@ void clear_extent_buffer_uptodate(struct extent_buffer *eb)
         num_pages = num_extent_pages(eb);
         for (i = 0; i < num_pages; i++) {
                 page = eb->pages[i];
-               if (page)
-                       btrfs_page_clear_uptodate(fs_info, page,
-                                                 eb->start, eb->len);
+               if (!page)
+                       continue;
+
+               /*
+                * This is special handling for metadata subpage, as regular
+                * btrfs_is_subpage() can not handle cloned/dummy metadata.
+                */
+               if (fs_info->nodesize >= PAGE_SIZE)
+                       ClearPageUptodate(page);
+               else
+                       btrfs_subpage_clear_uptodate(fs_info, page, eb->start,
+                                                    eb->len);
         }
  }
  
@@ -6521,7 +6579,16 @@ void set_extent_buffer_uptodate(struct extent_buffer *eb)
         num_pages = num_extent_pages(eb);
         for (i = 0; i < num_pages; i++) {
                 page = eb->pages[i];
-               btrfs_page_set_uptodate(fs_info, page, eb->start, eb->len);
+
+               /*
+                * This is special handling for metadata subpage, as regular
+                * btrfs_is_subpage() can not handle cloned/dummy metadata.
+                */
+               if (fs_info->nodesize >= PAGE_SIZE)
+                       SetPageUptodate(page);
+               else
+                       btrfs_subpage_set_uptodate(fs_info, page, eb->start,
+                                                  eb->len);
         }
  }
  
@@ -6577,12 +6644,8 @@ static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait,
                 atomic_dec(&eb->io_pages);
         }
         if (bio_ctrl.bio) {
-               int tmp;
-
-               tmp = submit_one_bio(bio_ctrl.bio, mirror_num, 0);
+               submit_one_bio(bio_ctrl.bio, mirror_num, 0);
                 bio_ctrl.bio = NULL;
-               if (tmp < 0)
-                       return tmp;
         }
         if (ret || wait != WAIT_COMPLETE)
                 return ret;
@@ -6616,7 +6679,7 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
         if (unlikely(test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)))
                 return -EIO;
  
-       if (eb->fs_info->sectorsize < PAGE_SIZE)
+       if (eb->fs_info->nodesize < PAGE_SIZE)
                 return read_extent_buffer_subpage(eb, wait, mirror_num);
  
         num_pages = num_extent_pages(eb);
@@ -6695,10 +6758,8 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
         }
  
         if (bio_ctrl.bio) {
-               err = submit_one_bio(bio_ctrl.bio, mirror_num, bio_ctrl.bio_flags);
+               submit_one_bio(bio_ctrl.bio, mirror_num, bio_ctrl.compress_type);
                 bio_ctrl.bio = NULL;
-               if (err)
-                       return err;
         }
  
         if (ret || wait != WAIT_COMPLETE)
@@ -6871,7 +6932,7 @@ static void assert_eb_page_uptodate(const struct extent_buffer *eb,
          * would have !PageUptodate && !PageError, as we clear PageError before
          * reading.
          */
-       if (fs_info->sectorsize < PAGE_SIZE) {
+       if (fs_info->nodesize < PAGE_SIZE) {
                 bool uptodate, error;
  
                 uptodate = btrfs_subpage_test_uptodate(fs_info, page,
@@ -6973,7 +7034,7 @@ void copy_extent_buffer_full(const struct extent_buffer *dst,
  
         ASSERT(dst->len == src->len);
  
-       if (dst->fs_info->sectorsize == PAGE_SIZE) {
+       if (dst->fs_info->nodesize >= PAGE_SIZE) {
                 num_pages = num_extent_pages(dst);
                 for (i = 0; i < num_pages; i++)
                         copy_page(page_address(dst->pages[i]),
@@ -6982,7 +7043,7 @@ void copy_extent_buffer_full(const struct extent_buffer *dst,
                 size_t src_offset = get_eb_offset_in_page(src, 0);
                 size_t dst_offset = get_eb_offset_in_page(dst, 0);
  
-               ASSERT(src->fs_info->sectorsize < PAGE_SIZE);
+               ASSERT(src->fs_info->nodesize < PAGE_SIZE);
                 memcpy(page_address(dst->pages[0]) + dst_offset,
                        page_address(src->pages[0]) + src_offset,
                        src->len);
@@ -7263,42 +7324,25 @@ void memmove_extent_buffer(const struct extent_buffer *dst,
         }
  }
  
-#define GANG_LOOKUP_SIZE       16
  static struct extent_buffer *get_next_extent_buffer(
                 struct btrfs_fs_info *fs_info, struct page *page, u64 bytenr)
  {
-       struct extent_buffer *gang[GANG_LOOKUP_SIZE];
-       struct extent_buffer *found = NULL;
+       struct extent_buffer *eb;
+       unsigned long index;
         u64 page_start = page_offset(page);
-       u64 cur = page_start;
  
         ASSERT(in_range(bytenr, page_start, PAGE_SIZE));
         lockdep_assert_held(&fs_info->buffer_lock);
  
-       while (cur < page_start + PAGE_SIZE) {
-               int ret;
-               int i;
-
-               ret = radix_tree_gang_lookup(&fs_info->buffer_radix,
-                               (void **)gang, cur >> fs_info->sectorsize_bits,
-                               min_t(unsigned int, GANG_LOOKUP_SIZE,
-                                     PAGE_SIZE / fs_info->nodesize));
-               if (ret == 0)
-                       goto out;
-               for (i = 0; i < ret; i++) {
-                       /* Already beyond page end */
-                       if (gang[i]->start >= page_start + PAGE_SIZE)
-                               goto out;
-                       /* Found one */
-                       if (gang[i]->start >= bytenr) {
-                               found = gang[i];
-                               goto out;
-                       }
-               }
-               cur = gang[ret - 1]->start + gang[ret - 1]->len;
+       xa_for_each_start(&fs_info->extent_buffers, index, eb,
+                         page_start >> fs_info->sectorsize_bits) {
+               if (in_range(eb->start, page_start, PAGE_SIZE))
+                       return eb;
+               else if (eb->start >= page_start + PAGE_SIZE)
+                       /* Already beyond page end */
+                       return NULL;
         }
-out:
-       return found;
+       return NULL;
  }
  
  static int try_release_subpage_extent_buffer(struct page *page)
@@ -7375,7 +7419,7 @@ int try_release_extent_buffer(struct page *page)
  {
         struct extent_buffer *eb;
  
-       if (btrfs_sb(page->mapping->host->i_sb)->sectorsize < PAGE_SIZE)
+       if (btrfs_sb(page->mapping->host->i_sb)->nodesize < PAGE_SIZE)
                 return try_release_subpage_extent_buffer(page);
  
         /*