btrfs: defrag: use defrag_one_cluster() to implement btrfs_defrag_file()
authorQu Wenruo <wqu@suse.com>
Thu, 27 May 2021 12:33:22 +0000 (20:33 +0800)
committerDavid Sterba <dsterba@suse.com>
Tue, 26 Oct 2021 17:07:58 +0000 (19:07 +0200)
The function defrag_one_cluster() is able to defrag one range well
enough, we only need to do preparation for it, including:

- Clamp and align the defrag range
- Exclude invalid cases
- Proper inode locking

The old infrastructures will not be removed in this patch, as it would
be too noisy to review.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/ioctl.c

index 777cb3a..d66fc02 100644 (file)
@@ -1760,25 +1760,15 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
                      u64 newer_than, unsigned long max_to_defrag)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-       struct btrfs_root *root = BTRFS_I(inode)->root;
-       unsigned long last_index;
+       unsigned long sectors_defragged = 0;
        u64 isize = i_size_read(inode);
-       u64 last_len = 0;
-       u64 skip = 0;
-       u64 defrag_end = 0;
-       u64 newer_off = range->start;
-       unsigned long i;
-       unsigned long ra_index = 0;
-       int ret;
-       int defrag_count = 0;
-       int compress_type = BTRFS_COMPRESS_ZLIB;
-       u32 extent_thresh = range->extent_thresh;
-       unsigned long max_cluster = SZ_256K >> PAGE_SHIFT;
-       unsigned long cluster = max_cluster;
-       u64 new_align = ~((u64)SZ_128K - 1);
-       struct page **pages = NULL;
+       u64 cur;
+       u64 last_byte;
        bool do_compress = range->flags & BTRFS_DEFRAG_RANGE_COMPRESS;
        bool ra_allocated = false;
+       int compress_type = BTRFS_COMPRESS_ZLIB;
+       int ret = 0;
+       u32 extent_thresh = range->extent_thresh;
 
        if (isize == 0)
                return 0;
@@ -1796,6 +1786,14 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
        if (extent_thresh == 0)
                extent_thresh = SZ_256K;
 
+       if (range->start + range->len > range->start) {
+               /* Got a specific range */
+               last_byte = min(isize, range->start + range->len) - 1;
+       } else {
+               /* Defrag until file end */
+               last_byte = isize - 1;
+       }
+
        /*
         * If we were not given a ra, allocate a readahead context. As
         * readahead is just an optimization, defrag will work without it so
@@ -1808,159 +1806,67 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
                        file_ra_state_init(ra, inode->i_mapping);
        }
 
-       pages = kmalloc_array(max_cluster, sizeof(struct page *), GFP_KERNEL);
-       if (!pages) {
-               ret = -ENOMEM;
-               goto out_ra;
-       }
+       /* Align the range */
+       cur = round_down(range->start, fs_info->sectorsize);
+       last_byte = round_up(last_byte, fs_info->sectorsize) - 1;
 
-       /* find the last page to defrag */
-       if (range->start + range->len > range->start) {
-               last_index = min_t(u64, isize - 1,
-                        range->start + range->len - 1) >> PAGE_SHIFT;
-       } else {
-               last_index = (isize - 1) >> PAGE_SHIFT;
-       }
-
-       if (newer_than) {
-               ret = find_new_extents(root, inode, newer_than,
-                                      &newer_off, SZ_64K);
-               if (!ret) {
-                       range->start = newer_off;
-                       /*
-                        * we always align our defrag to help keep
-                        * the extents in the file evenly spaced
-                        */
-                       i = (newer_off & new_align) >> PAGE_SHIFT;
-               } else
-                       goto out_ra;
-       } else {
-               i = range->start >> PAGE_SHIFT;
-       }
-       if (!max_to_defrag)
-               max_to_defrag = last_index - i + 1;
-
-       /*
-        * make writeback starts from i, so the defrag range can be
-        * written sequentially.
-        */
-       if (i < inode->i_mapping->writeback_index)
-               inode->i_mapping->writeback_index = i;
-
-       while (i <= last_index && defrag_count < max_to_defrag &&
-              (i < DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE))) {
-               /*
-                * make sure we stop running if someone unmounts
-                * the FS
-                */
-               if (!(inode->i_sb->s_flags & SB_ACTIVE))
-                       break;
-
-               if (btrfs_defrag_cancelled(fs_info)) {
-                       btrfs_debug(fs_info, "defrag_file cancelled");
-                       ret = -EAGAIN;
-                       goto error;
-               }
-
-               if (!should_defrag_range(inode, (u64)i << PAGE_SHIFT,
-                                        extent_thresh, &last_len, &skip,
-                                        &defrag_end, do_compress)){
-                       unsigned long next;
-                       /*
-                        * the should_defrag function tells us how much to skip
-                        * bump our counter by the suggested amount
-                        */
-                       next = DIV_ROUND_UP(skip, PAGE_SIZE);
-                       i = max(i + 1, next);
-                       continue;
-               }
+       while (cur < last_byte) {
+               u64 cluster_end;
 
-               if (!newer_than) {
-                       cluster = (PAGE_ALIGN(defrag_end) >>
-                                  PAGE_SHIFT) - i;
-                       cluster = min(cluster, max_cluster);
-               } else {
-                       cluster = max_cluster;
-               }
+               /* The cluster size 256K should always be page aligned */
+               BUILD_BUG_ON(!IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE));
 
-               if (i + cluster > ra_index) {
-                       ra_index = max(i, ra_index);
-                       if (ra)
-                               page_cache_sync_readahead(inode->i_mapping, ra,
-                                               NULL, ra_index, cluster);
-                       ra_index += cluster;
-               }
+               /* We want the cluster end at page boundary when possible */
+               cluster_end = (((cur >> PAGE_SHIFT) +
+                              (SZ_256K >> PAGE_SHIFT)) << PAGE_SHIFT) - 1;
+               cluster_end = min(cluster_end, last_byte);
 
                btrfs_inode_lock(inode, 0);
                if (IS_SWAPFILE(inode)) {
                        ret = -ETXTBSY;
-               } else {
-                       if (do_compress)
-                               BTRFS_I(inode)->defrag_compress = compress_type;
-                       ret = cluster_pages_for_defrag(inode, pages, i, cluster);
+                       btrfs_inode_unlock(inode, 0);
+                       break;
                }
-               if (ret < 0) {
+               if (!(inode->i_sb->s_flags & SB_ACTIVE)) {
                        btrfs_inode_unlock(inode, 0);
-                       goto out_ra;
+                       break;
                }
-
-               defrag_count += ret;
-               balance_dirty_pages_ratelimited(inode->i_mapping);
+               if (do_compress)
+                       BTRFS_I(inode)->defrag_compress = compress_type;
+               ret = defrag_one_cluster(BTRFS_I(inode), ra, cur,
+                               cluster_end + 1 - cur, extent_thresh,
+                               newer_than, do_compress,
+                               &sectors_defragged, max_to_defrag);
                btrfs_inode_unlock(inode, 0);
-
-               if (newer_than) {
-                       if (newer_off == (u64)-1)
-                               break;
-
-                       if (ret > 0)
-                               i += ret;
-
-                       newer_off = max(newer_off + 1,
-                                       (u64)i << PAGE_SHIFT);
-
-                       ret = find_new_extents(root, inode, newer_than,
-                                              &newer_off, SZ_64K);
-                       if (!ret) {
-                               range->start = newer_off;
-                               i = (newer_off & new_align) >> PAGE_SHIFT;
-                       } else {
-                               break;
-                       }
-               } else {
-                       if (ret > 0) {
-                               i += ret;
-                               last_len += ret << PAGE_SHIFT;
-                       } else {
-                               i++;
-                               last_len = 0;
-                       }
-               }
+               if (ret < 0)
+                       break;
+               cur = cluster_end + 1;
        }
 
-       ret = defrag_count;
-error:
-       if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) {
-               filemap_flush(inode->i_mapping);
-               if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
-                            &BTRFS_I(inode)->runtime_flags))
+       if (ra_allocated)
+               kfree(ra);
+       if (sectors_defragged) {
+               /*
+                * We have defragged some sectors, for compression case they
+                * need to be written back immediately.
+                */
+               if (range->flags & BTRFS_DEFRAG_RANGE_START_IO) {
                        filemap_flush(inode->i_mapping);
+                       if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
+                                    &BTRFS_I(inode)->runtime_flags))
+                               filemap_flush(inode->i_mapping);
+               }
+               if (range->compress_type == BTRFS_COMPRESS_LZO)
+                       btrfs_set_fs_incompat(fs_info, COMPRESS_LZO);
+               else if (range->compress_type == BTRFS_COMPRESS_ZSTD)
+                       btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD);
+               ret = sectors_defragged;
        }
-
-       if (range->compress_type == BTRFS_COMPRESS_LZO) {
-               btrfs_set_fs_incompat(fs_info, COMPRESS_LZO);
-       } else if (range->compress_type == BTRFS_COMPRESS_ZSTD) {
-               btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD);
-       }
-
-out_ra:
        if (do_compress) {
                btrfs_inode_lock(inode, 0);
                BTRFS_I(inode)->defrag_compress = BTRFS_COMPRESS_NONE;
                btrfs_inode_unlock(inode, 0);
        }
-       if (ra_allocated)
-               kfree(ra);
-       kfree(pages);
        return ret;
 }