btrfs: send: send compressed extents with encoded writes
authorOmar Sandoval <osandov@fb.com>
Thu, 17 Mar 2022 17:25:42 +0000 (10:25 -0700)
committerDavid Sterba <dsterba@suse.com>
Mon, 25 Jul 2022 15:45:32 +0000 (17:45 +0200)
Now that all of the pieces are in place, we can use the ENCODED_WRITE
command to send compressed extents when appropriate.

Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/ctree.h
fs/btrfs/inode.c
fs/btrfs/send.c

index 613f46b..9a50da8 100644 (file)
@@ -3363,6 +3363,12 @@ int btrfs_writepage_cow_fixup(struct page *page);
 void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
                                          struct page *page, u64 start,
                                          u64 end, bool uptodate);
+int btrfs_encoded_io_compression_from_extent(struct btrfs_fs_info *fs_info,
+                                            int compress_type);
+int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
+                                         u64 file_offset, u64 disk_bytenr,
+                                         u64 disk_io_size,
+                                         struct page **pages);
 ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
                           struct btrfs_ioctl_encoded_io_args *encoded);
 ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
index 7329a03..1ac43ae 100644 (file)
@@ -10196,9 +10196,8 @@ void btrfs_set_range_writeback(struct btrfs_inode *inode, u64 start, u64 end)
        }
 }
 
-static int btrfs_encoded_io_compression_from_extent(
-                               struct btrfs_fs_info *fs_info,
-                               int compress_type)
+int btrfs_encoded_io_compression_from_extent(struct btrfs_fs_info *fs_info,
+                                            int compress_type)
 {
        switch (compress_type) {
        case BTRFS_COMPRESS_NONE:
@@ -10403,11 +10402,9 @@ static void btrfs_encoded_read_endio(struct bio *bio)
        bio_put(bio);
 }
 
-static int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
-                                                u64 file_offset,
-                                                u64 disk_bytenr,
-                                                u64 disk_io_size,
-                                                struct page **pages)
+int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
+                                         u64 file_offset, u64 disk_bytenr,
+                                         u64 disk_io_size, struct page **pages)
 {
        struct btrfs_fs_info *fs_info = inode->root->fs_info;
        struct btrfs_encoded_read_private priv = {
index 57052fe..bc00393 100644 (file)
@@ -625,6 +625,7 @@ static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len)
                return tlv_put(sctx, attr, &__tmp, sizeof(__tmp));      \
        }
 
+TLV_PUT_DEFINE_INT(32)
 TLV_PUT_DEFINE_INT(64)
 
 static int tlv_put_string(struct send_ctx *sctx, u16 attr,
@@ -5161,17 +5162,214 @@ tlv_put_failure:
        return ret;
 }
 
-static int send_extent_data(struct send_ctx *sctx,
-                           const u64 offset,
-                           const u64 len)
+static int send_encoded_inline_extent(struct send_ctx *sctx,
+                                     struct btrfs_path *path, u64 offset,
+                                     u64 len)
+{
+       struct btrfs_root *root = sctx->send_root;
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct inode *inode;
+       struct fs_path *fspath;
+       struct extent_buffer *leaf = path->nodes[0];
+       struct btrfs_key key;
+       struct btrfs_file_extent_item *ei;
+       u64 ram_bytes;
+       size_t inline_size;
+       int ret;
+
+       inode = btrfs_iget(fs_info->sb, sctx->cur_ino, root);
+       if (IS_ERR(inode))
+               return PTR_ERR(inode);
+
+       fspath = fs_path_alloc();
+       if (!fspath) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       ret = begin_cmd(sctx, BTRFS_SEND_C_ENCODED_WRITE);
+       if (ret < 0)
+               goto out;
+
+       ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, fspath);
+       if (ret < 0)
+               goto out;
+
+       btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+       ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item);
+       ram_bytes = btrfs_file_extent_ram_bytes(leaf, ei);
+       inline_size = btrfs_file_extent_inline_item_len(leaf, path->slots[0]);
+
+       TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, fspath);
+       TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
+       TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_FILE_LEN,
+                   min(key.offset + ram_bytes - offset, len));
+       TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_LEN, ram_bytes);
+       TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_OFFSET, offset - key.offset);
+       ret = btrfs_encoded_io_compression_from_extent(fs_info,
+                               btrfs_file_extent_compression(leaf, ei));
+       if (ret < 0)
+               goto out;
+       TLV_PUT_U32(sctx, BTRFS_SEND_A_COMPRESSION, ret);
+
+       ret = put_data_header(sctx, inline_size);
+       if (ret < 0)
+               goto out;
+       read_extent_buffer(leaf, sctx->send_buf + sctx->send_size,
+                          btrfs_file_extent_inline_start(ei), inline_size);
+       sctx->send_size += inline_size;
+
+       ret = send_cmd(sctx);
+
+tlv_put_failure:
+out:
+       fs_path_free(fspath);
+       iput(inode);
+       return ret;
+}
+
+static int send_encoded_extent(struct send_ctx *sctx, struct btrfs_path *path,
+                              u64 offset, u64 len)
+{
+       struct btrfs_root *root = sctx->send_root;
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct inode *inode;
+       struct fs_path *fspath;
+       struct extent_buffer *leaf = path->nodes[0];
+       struct btrfs_key key;
+       struct btrfs_file_extent_item *ei;
+       u64 disk_bytenr, disk_num_bytes;
+       u32 data_offset;
+       struct btrfs_cmd_header *hdr;
+       u32 crc;
+       int ret;
+
+       inode = btrfs_iget(fs_info->sb, sctx->cur_ino, root);
+       if (IS_ERR(inode))
+               return PTR_ERR(inode);
+
+       fspath = fs_path_alloc();
+       if (!fspath) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       ret = begin_cmd(sctx, BTRFS_SEND_C_ENCODED_WRITE);
+       if (ret < 0)
+               goto out;
+
+       ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, fspath);
+       if (ret < 0)
+               goto out;
+
+       btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+       ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item);
+       disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, ei);
+       disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, ei);
+
+       TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, fspath);
+       TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
+       TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_FILE_LEN,
+                   min(key.offset + btrfs_file_extent_num_bytes(leaf, ei) - offset,
+                       len));
+       TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_LEN,
+                   btrfs_file_extent_ram_bytes(leaf, ei));
+       TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_OFFSET,
+                   offset - key.offset + btrfs_file_extent_offset(leaf, ei));
+       ret = btrfs_encoded_io_compression_from_extent(fs_info,
+                               btrfs_file_extent_compression(leaf, ei));
+       if (ret < 0)
+               goto out;
+       TLV_PUT_U32(sctx, BTRFS_SEND_A_COMPRESSION, ret);
+       TLV_PUT_U32(sctx, BTRFS_SEND_A_ENCRYPTION, 0);
+
+       ret = put_data_header(sctx, disk_num_bytes);
+       if (ret < 0)
+               goto out;
+
+       /*
+        * We want to do I/O directly into the send buffer, so get the next page
+        * boundary in the send buffer. This means that there may be a gap
+        * between the beginning of the command and the file data.
+        */
+       data_offset = ALIGN(sctx->send_size, PAGE_SIZE);
+       if (data_offset > sctx->send_max_size ||
+           sctx->send_max_size - data_offset < disk_num_bytes) {
+               ret = -EOVERFLOW;
+               goto out;
+       }
+
+       /*
+        * Note that send_buf is a mapping of send_buf_pages, so this is really
+        * reading into send_buf.
+        */
+       ret = btrfs_encoded_read_regular_fill_pages(BTRFS_I(inode), offset,
+                                                   disk_bytenr, disk_num_bytes,
+                                                   sctx->send_buf_pages +
+                                                   (data_offset >> PAGE_SHIFT));
+       if (ret)
+               goto out;
+
+       hdr = (struct btrfs_cmd_header *)sctx->send_buf;
+       hdr->len = cpu_to_le32(sctx->send_size + disk_num_bytes - sizeof(*hdr));
+       hdr->crc = 0;
+       crc = btrfs_crc32c(0, sctx->send_buf, sctx->send_size);
+       crc = btrfs_crc32c(crc, sctx->send_buf + data_offset, disk_num_bytes);
+       hdr->crc = cpu_to_le32(crc);
+
+       ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
+                       &sctx->send_off);
+       if (!ret) {
+               ret = write_buf(sctx->send_filp, sctx->send_buf + data_offset,
+                               disk_num_bytes, &sctx->send_off);
+       }
+       sctx->send_size = 0;
+       sctx->put_data = false;
+
+tlv_put_failure:
+out:
+       fs_path_free(fspath);
+       iput(inode);
+       return ret;
+}
+
+static int send_extent_data(struct send_ctx *sctx, struct btrfs_path *path,
+                           const u64 offset, const u64 len)
 {
        const u64 end = offset + len;
+       struct extent_buffer *leaf = path->nodes[0];
+       struct btrfs_file_extent_item *ei;
        u64 read_size = max_send_read_size(sctx);
        u64 sent = 0;
 
        if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
                return send_update_extent(sctx, offset, len);
 
+       ei = btrfs_item_ptr(leaf, path->slots[0],
+                           struct btrfs_file_extent_item);
+       if ((sctx->flags & BTRFS_SEND_FLAG_COMPRESSED) &&
+           btrfs_file_extent_compression(leaf, ei) != BTRFS_COMPRESS_NONE) {
+               bool is_inline = (btrfs_file_extent_type(leaf, ei) ==
+                                 BTRFS_FILE_EXTENT_INLINE);
+
+               /*
+                * Send the compressed extent unless the compressed data is
+                * larger than the decompressed data. This can happen if we're
+                * not sending the entire extent, either because it has been
+                * partially overwritten/truncated or because this is a part of
+                * the extent that we couldn't clone in clone_range().
+                */
+               if (is_inline &&
+                   btrfs_file_extent_inline_item_len(leaf,
+                                                     path->slots[0]) <= len) {
+                       return send_encoded_inline_extent(sctx, path, offset,
+                                                         len);
+               } else if (!is_inline &&
+                          btrfs_file_extent_disk_num_bytes(leaf, ei) <= len) {
+                       return send_encoded_extent(sctx, path, offset, len);
+               }
+       }
+
        if (sctx->cur_inode == NULL) {
                struct btrfs_root *root = sctx->send_root;
 
@@ -5309,12 +5507,9 @@ out:
        return ret;
 }
 
-static int clone_range(struct send_ctx *sctx,
-                      struct clone_root *clone_root,
-                      const u64 disk_byte,
-                      u64 data_offset,
-                      u64 offset,
-                      u64 len)
+static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
+                      struct clone_root *clone_root, const u64 disk_byte,
+                      u64 data_offset, u64 offset, u64 len)
 {
        struct btrfs_path *path;
        struct btrfs_key key;
@@ -5338,7 +5533,7 @@ static int clone_range(struct send_ctx *sctx,
         */
        if (clone_root->offset == 0 &&
            len == sctx->send_root->fs_info->sectorsize)
-               return send_extent_data(sctx, offset, len);
+               return send_extent_data(sctx, dst_path, offset, len);
 
        path = alloc_path_for_send();
        if (!path)
@@ -5435,7 +5630,8 @@ static int clone_range(struct send_ctx *sctx,
 
                        if (hole_len > len)
                                hole_len = len;
-                       ret = send_extent_data(sctx, offset, hole_len);
+                       ret = send_extent_data(sctx, dst_path, offset,
+                                              hole_len);
                        if (ret < 0)
                                goto out;
 
@@ -5508,14 +5704,16 @@ static int clone_range(struct send_ctx *sctx,
                                        if (ret < 0)
                                                goto out;
                                }
-                               ret = send_extent_data(sctx, offset + slen,
+                               ret = send_extent_data(sctx, dst_path,
+                                                      offset + slen,
                                                       clone_len - slen);
                        } else {
                                ret = send_clone(sctx, offset, clone_len,
                                                 clone_root);
                        }
                } else {
-                       ret = send_extent_data(sctx, offset, clone_len);
+                       ret = send_extent_data(sctx, dst_path, offset,
+                                              clone_len);
                }
 
                if (ret < 0)
@@ -5547,7 +5745,7 @@ next:
        }
 
        if (len > 0)
-               ret = send_extent_data(sctx, offset, len);
+               ret = send_extent_data(sctx, dst_path, offset, len);
        else
                ret = 0;
 out:
@@ -5578,10 +5776,10 @@ static int send_write_or_clone(struct send_ctx *sctx,
                                    struct btrfs_file_extent_item);
                disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei);
                data_offset = btrfs_file_extent_offset(path->nodes[0], ei);
-               ret = clone_range(sctx, clone_root, disk_byte, data_offset,
-                                 offset, end - offset);
+               ret = clone_range(sctx, path, clone_root, disk_byte,
+                                 data_offset, offset, end - offset);
        } else {
-               ret = send_extent_data(sctx, offset, end - offset);
+               ret = send_extent_data(sctx, path, offset, end - offset);
        }
        sctx->cur_inode_next_write_offset = end;
        return ret;