Merge branch 'topic/ppc-kvm' of https://git.kernel.org/pub/scm/linux/kernel/git/power...
[linux-2.6-microblaze.git] / fs / btrfs / file-item.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2007 Oracle.  All rights reserved.
4  */
5
6 #include <linux/bio.h>
7 #include <linux/slab.h>
8 #include <linux/pagemap.h>
9 #include <linux/highmem.h>
10 #include <linux/sched/mm.h>
11 #include <crypto/hash.h>
12 #include "misc.h"
13 #include "ctree.h"
14 #include "disk-io.h"
15 #include "transaction.h"
16 #include "volumes.h"
17 #include "print-tree.h"
18 #include "compression.h"
19
20 #define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \
21                                    sizeof(struct btrfs_item) * 2) / \
22                                   size) - 1))
23
24 #define MAX_CSUM_ITEMS(r, size) (min_t(u32, __MAX_CSUM_ITEMS(r, size), \
25                                        PAGE_SIZE))
26
27 /**
28  * Set inode's size according to filesystem options
29  *
30  * @inode:      inode we want to update the disk_i_size for
31  * @new_i_size: i_size we want to set to, 0 if we use i_size
32  *
33  * With NO_HOLES set this simply sets the disk_is_size to whatever i_size_read()
34  * returns as it is perfectly fine with a file that has holes without hole file
35  * extent items.
36  *
37  * However without NO_HOLES we need to only return the area that is contiguous
38  * from the 0 offset of the file.  Otherwise we could end up adjust i_size up
39  * to an extent that has a gap in between.
40  *
41  * Finally new_i_size should only be set in the case of truncate where we're not
42  * ready to use i_size_read() as the limiter yet.
43  */
44 void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_size)
45 {
46         struct btrfs_fs_info *fs_info = inode->root->fs_info;
47         u64 start, end, i_size;
48         int ret;
49
50         i_size = new_i_size ?: i_size_read(&inode->vfs_inode);
51         if (btrfs_fs_incompat(fs_info, NO_HOLES)) {
52                 inode->disk_i_size = i_size;
53                 return;
54         }
55
56         spin_lock(&inode->lock);
57         ret = find_contiguous_extent_bit(&inode->file_extent_tree, 0, &start,
58                                          &end, EXTENT_DIRTY);
59         if (!ret && start == 0)
60                 i_size = min(i_size, end + 1);
61         else
62                 i_size = 0;
63         inode->disk_i_size = i_size;
64         spin_unlock(&inode->lock);
65 }
66
67 /**
68  * Mark range within a file as having a new extent inserted
69  *
70  * @inode: inode being modified
71  * @start: start file offset of the file extent we've inserted
72  * @len:   logical length of the file extent item
73  *
74  * Call when we are inserting a new file extent where there was none before.
75  * Does not need to call this in the case where we're replacing an existing file
76  * extent, however if not sure it's fine to call this multiple times.
77  *
78  * The start and len must match the file extent item, so thus must be sectorsize
79  * aligned.
80  */
81 int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start,
82                                       u64 len)
83 {
84         if (len == 0)
85                 return 0;
86
87         ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize));
88
89         if (btrfs_fs_incompat(inode->root->fs_info, NO_HOLES))
90                 return 0;
91         return set_extent_bits(&inode->file_extent_tree, start, start + len - 1,
92                                EXTENT_DIRTY);
93 }
94
95 /**
96  * Marks an inode range as not having a backing extent
97  *
98  * @inode: inode being modified
99  * @start: start file offset of the file extent we've inserted
100  * @len:   logical length of the file extent item
101  *
102  * Called when we drop a file extent, for example when we truncate.  Doesn't
103  * need to be called for cases where we're replacing a file extent, like when
104  * we've COWed a file extent.
105  *
106  * The start and len must match the file extent item, so thus must be sectorsize
107  * aligned.
108  */
109 int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start,
110                                         u64 len)
111 {
112         if (len == 0)
113                 return 0;
114
115         ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize) ||
116                len == (u64)-1);
117
118         if (btrfs_fs_incompat(inode->root->fs_info, NO_HOLES))
119                 return 0;
120         return clear_extent_bit(&inode->file_extent_tree, start,
121                                 start + len - 1, EXTENT_DIRTY, 0, 0, NULL);
122 }
123
124 static inline u32 max_ordered_sum_bytes(struct btrfs_fs_info *fs_info,
125                                         u16 csum_size)
126 {
127         u32 ncsums = (PAGE_SIZE - sizeof(struct btrfs_ordered_sum)) / csum_size;
128
129         return ncsums * fs_info->sectorsize;
130 }
131
132 int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
133                              struct btrfs_root *root,
134                              u64 objectid, u64 pos,
135                              u64 disk_offset, u64 disk_num_bytes,
136                              u64 num_bytes, u64 offset, u64 ram_bytes,
137                              u8 compression, u8 encryption, u16 other_encoding)
138 {
139         int ret = 0;
140         struct btrfs_file_extent_item *item;
141         struct btrfs_key file_key;
142         struct btrfs_path *path;
143         struct extent_buffer *leaf;
144
145         path = btrfs_alloc_path();
146         if (!path)
147                 return -ENOMEM;
148         file_key.objectid = objectid;
149         file_key.offset = pos;
150         file_key.type = BTRFS_EXTENT_DATA_KEY;
151
152         ret = btrfs_insert_empty_item(trans, root, path, &file_key,
153                                       sizeof(*item));
154         if (ret < 0)
155                 goto out;
156         BUG_ON(ret); /* Can't happen */
157         leaf = path->nodes[0];
158         item = btrfs_item_ptr(leaf, path->slots[0],
159                               struct btrfs_file_extent_item);
160         btrfs_set_file_extent_disk_bytenr(leaf, item, disk_offset);
161         btrfs_set_file_extent_disk_num_bytes(leaf, item, disk_num_bytes);
162         btrfs_set_file_extent_offset(leaf, item, offset);
163         btrfs_set_file_extent_num_bytes(leaf, item, num_bytes);
164         btrfs_set_file_extent_ram_bytes(leaf, item, ram_bytes);
165         btrfs_set_file_extent_generation(leaf, item, trans->transid);
166         btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
167         btrfs_set_file_extent_compression(leaf, item, compression);
168         btrfs_set_file_extent_encryption(leaf, item, encryption);
169         btrfs_set_file_extent_other_encoding(leaf, item, other_encoding);
170
171         btrfs_mark_buffer_dirty(leaf);
172 out:
173         btrfs_free_path(path);
174         return ret;
175 }
176
177 static struct btrfs_csum_item *
178 btrfs_lookup_csum(struct btrfs_trans_handle *trans,
179                   struct btrfs_root *root,
180                   struct btrfs_path *path,
181                   u64 bytenr, int cow)
182 {
183         struct btrfs_fs_info *fs_info = root->fs_info;
184         int ret;
185         struct btrfs_key file_key;
186         struct btrfs_key found_key;
187         struct btrfs_csum_item *item;
188         struct extent_buffer *leaf;
189         u64 csum_offset = 0;
190         const u32 csum_size = fs_info->csum_size;
191         int csums_in_item;
192
193         file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
194         file_key.offset = bytenr;
195         file_key.type = BTRFS_EXTENT_CSUM_KEY;
196         ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow);
197         if (ret < 0)
198                 goto fail;
199         leaf = path->nodes[0];
200         if (ret > 0) {
201                 ret = 1;
202                 if (path->slots[0] == 0)
203                         goto fail;
204                 path->slots[0]--;
205                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
206                 if (found_key.type != BTRFS_EXTENT_CSUM_KEY)
207                         goto fail;
208
209                 csum_offset = (bytenr - found_key.offset) >>
210                                 fs_info->sectorsize_bits;
211                 csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]);
212                 csums_in_item /= csum_size;
213
214                 if (csum_offset == csums_in_item) {
215                         ret = -EFBIG;
216                         goto fail;
217                 } else if (csum_offset > csums_in_item) {
218                         goto fail;
219                 }
220         }
221         item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
222         item = (struct btrfs_csum_item *)((unsigned char *)item +
223                                           csum_offset * csum_size);
224         return item;
225 fail:
226         if (ret > 0)
227                 ret = -ENOENT;
228         return ERR_PTR(ret);
229 }
230
231 int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
232                              struct btrfs_root *root,
233                              struct btrfs_path *path, u64 objectid,
234                              u64 offset, int mod)
235 {
236         int ret;
237         struct btrfs_key file_key;
238         int ins_len = mod < 0 ? -1 : 0;
239         int cow = mod != 0;
240
241         file_key.objectid = objectid;
242         file_key.offset = offset;
243         file_key.type = BTRFS_EXTENT_DATA_KEY;
244         ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow);
245         return ret;
246 }
247
248 /*
249  * Find checksums for logical bytenr range [disk_bytenr, disk_bytenr + len) and
250  * estore the result to @dst.
251  *
252  * Return >0 for the number of sectors we found.
253  * Return 0 for the range [disk_bytenr, disk_bytenr + sectorsize) has no csum
254  * for it. Caller may want to try next sector until one range is hit.
255  * Return <0 for fatal error.
256  */
257 static int search_csum_tree(struct btrfs_fs_info *fs_info,
258                             struct btrfs_path *path, u64 disk_bytenr,
259                             u64 len, u8 *dst)
260 {
261         struct btrfs_csum_item *item = NULL;
262         struct btrfs_key key;
263         const u32 sectorsize = fs_info->sectorsize;
264         const u32 csum_size = fs_info->csum_size;
265         u32 itemsize;
266         int ret;
267         u64 csum_start;
268         u64 csum_len;
269
270         ASSERT(IS_ALIGNED(disk_bytenr, sectorsize) &&
271                IS_ALIGNED(len, sectorsize));
272
273         /* Check if the current csum item covers disk_bytenr */
274         if (path->nodes[0]) {
275                 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
276                                       struct btrfs_csum_item);
277                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
278                 itemsize = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
279
280                 csum_start = key.offset;
281                 csum_len = (itemsize / csum_size) * sectorsize;
282
283                 if (in_range(disk_bytenr, csum_start, csum_len))
284                         goto found;
285         }
286
287         /* Current item doesn't contain the desired range, search again */
288         btrfs_release_path(path);
289         item = btrfs_lookup_csum(NULL, fs_info->csum_root, path, disk_bytenr, 0);
290         if (IS_ERR(item)) {
291                 ret = PTR_ERR(item);
292                 goto out;
293         }
294         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
295         itemsize = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
296
297         csum_start = key.offset;
298         csum_len = (itemsize / csum_size) * sectorsize;
299         ASSERT(in_range(disk_bytenr, csum_start, csum_len));
300
301 found:
302         ret = (min(csum_start + csum_len, disk_bytenr + len) -
303                    disk_bytenr) >> fs_info->sectorsize_bits;
304         read_extent_buffer(path->nodes[0], dst, (unsigned long)item,
305                         ret * csum_size);
306 out:
307         if (ret == -ENOENT)
308                 ret = 0;
309         return ret;
310 }
311
312 /*
313  * Locate the file_offset of @cur_disk_bytenr of a @bio.
314  *
315  * Bio of btrfs represents read range of
316  * [bi_sector << 9, bi_sector << 9 + bi_size).
317  * Knowing this, we can iterate through each bvec to locate the page belong to
318  * @cur_disk_bytenr and get the file offset.
319  *
320  * @inode is used to determine if the bvec page really belongs to @inode.
321  *
322  * Return 0 if we can't find the file offset
323  * Return >0 if we find the file offset and restore it to @file_offset_ret
324  */
325 static int search_file_offset_in_bio(struct bio *bio, struct inode *inode,
326                                      u64 disk_bytenr, u64 *file_offset_ret)
327 {
328         struct bvec_iter iter;
329         struct bio_vec bvec;
330         u64 cur = bio->bi_iter.bi_sector << SECTOR_SHIFT;
331         int ret = 0;
332
333         bio_for_each_segment(bvec, bio, iter) {
334                 struct page *page = bvec.bv_page;
335
336                 if (cur > disk_bytenr)
337                         break;
338                 if (cur + bvec.bv_len <= disk_bytenr) {
339                         cur += bvec.bv_len;
340                         continue;
341                 }
342                 ASSERT(in_range(disk_bytenr, cur, bvec.bv_len));
343                 if (page->mapping && page->mapping->host &&
344                     page->mapping->host == inode) {
345                         ret = 1;
346                         *file_offset_ret = page_offset(page) + bvec.bv_offset +
347                                            disk_bytenr - cur;
348                         break;
349                 }
350         }
351         return ret;
352 }
353
354 /**
355  * Lookup the checksum for the read bio in csum tree.
356  *
357  * @inode: inode that the bio is for.
358  * @bio: bio to look up.
359  * @dst: Buffer of size nblocks * btrfs_super_csum_size() used to return
360  *       checksum (nblocks = bio->bi_iter.bi_size / fs_info->sectorsize). If
361  *       NULL, the checksum buffer is allocated and returned in
362  *       btrfs_io_bio(bio)->csum instead.
363  *
364  * Return: BLK_STS_RESOURCE if allocating memory fails, BLK_STS_OK otherwise.
365  */
366 blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst)
367 {
368         struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
369         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
370         struct btrfs_path *path;
371         const u32 sectorsize = fs_info->sectorsize;
372         const u32 csum_size = fs_info->csum_size;
373         u32 orig_len = bio->bi_iter.bi_size;
374         u64 orig_disk_bytenr = bio->bi_iter.bi_sector << SECTOR_SHIFT;
375         u64 cur_disk_bytenr;
376         u8 *csum;
377         const unsigned int nblocks = orig_len >> fs_info->sectorsize_bits;
378         int count = 0;
379
380         if (!fs_info->csum_root || (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
381                 return BLK_STS_OK;
382
383         /*
384          * This function is only called for read bio.
385          *
386          * This means two things:
387          * - All our csums should only be in csum tree
388          *   No ordered extents csums, as ordered extents are only for write
389          *   path.
390          * - No need to bother any other info from bvec
391          *   Since we're looking up csums, the only important info is the
392          *   disk_bytenr and the length, which can be extracted from bi_iter
393          *   directly.
394          */
395         ASSERT(bio_op(bio) == REQ_OP_READ);
396         path = btrfs_alloc_path();
397         if (!path)
398                 return BLK_STS_RESOURCE;
399
400         if (!dst) {
401                 struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio);
402
403                 if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
404                         btrfs_bio->csum = kmalloc_array(nblocks, csum_size,
405                                                         GFP_NOFS);
406                         if (!btrfs_bio->csum) {
407                                 btrfs_free_path(path);
408                                 return BLK_STS_RESOURCE;
409                         }
410                 } else {
411                         btrfs_bio->csum = btrfs_bio->csum_inline;
412                 }
413                 csum = btrfs_bio->csum;
414         } else {
415                 csum = dst;
416         }
417
418         /*
419          * If requested number of sectors is larger than one leaf can contain,
420          * kick the readahead for csum tree.
421          */
422         if (nblocks > fs_info->csums_per_leaf)
423                 path->reada = READA_FORWARD;
424
425         /*
426          * the free space stuff is only read when it hasn't been
427          * updated in the current transaction.  So, we can safely
428          * read from the commit root and sidestep a nasty deadlock
429          * between reading the free space cache and updating the csum tree.
430          */
431         if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
432                 path->search_commit_root = 1;
433                 path->skip_locking = 1;
434         }
435
436         for (cur_disk_bytenr = orig_disk_bytenr;
437              cur_disk_bytenr < orig_disk_bytenr + orig_len;
438              cur_disk_bytenr += (count * sectorsize)) {
439                 u64 search_len = orig_disk_bytenr + orig_len - cur_disk_bytenr;
440                 unsigned int sector_offset;
441                 u8 *csum_dst;
442
443                 /*
444                  * Although both cur_disk_bytenr and orig_disk_bytenr is u64,
445                  * we're calculating the offset to the bio start.
446                  *
447                  * Bio size is limited to UINT_MAX, thus unsigned int is large
448                  * enough to contain the raw result, not to mention the right
449                  * shifted result.
450                  */
451                 ASSERT(cur_disk_bytenr - orig_disk_bytenr < UINT_MAX);
452                 sector_offset = (cur_disk_bytenr - orig_disk_bytenr) >>
453                                 fs_info->sectorsize_bits;
454                 csum_dst = csum + sector_offset * csum_size;
455
456                 count = search_csum_tree(fs_info, path, cur_disk_bytenr,
457                                          search_len, csum_dst);
458                 if (count <= 0) {
459                         /*
460                          * Either we hit a critical error or we didn't find
461                          * the csum.
462                          * Either way, we put zero into the csums dst, and skip
463                          * to the next sector.
464                          */
465                         memset(csum_dst, 0, csum_size);
466                         count = 1;
467
468                         /*
469                          * For data reloc inode, we need to mark the range
470                          * NODATASUM so that balance won't report false csum
471                          * error.
472                          */
473                         if (BTRFS_I(inode)->root->root_key.objectid ==
474                             BTRFS_DATA_RELOC_TREE_OBJECTID) {
475                                 u64 file_offset;
476                                 int ret;
477
478                                 ret = search_file_offset_in_bio(bio, inode,
479                                                 cur_disk_bytenr, &file_offset);
480                                 if (ret)
481                                         set_extent_bits(io_tree, file_offset,
482                                                 file_offset + sectorsize - 1,
483                                                 EXTENT_NODATASUM);
484                         } else {
485                                 btrfs_warn_rl(fs_info,
486                         "csum hole found for disk bytenr range [%llu, %llu)",
487                                 cur_disk_bytenr, cur_disk_bytenr + sectorsize);
488                         }
489                 }
490         }
491
492         btrfs_free_path(path);
493         return BLK_STS_OK;
494 }
495
496 int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
497                              struct list_head *list, int search_commit)
498 {
499         struct btrfs_fs_info *fs_info = root->fs_info;
500         struct btrfs_key key;
501         struct btrfs_path *path;
502         struct extent_buffer *leaf;
503         struct btrfs_ordered_sum *sums;
504         struct btrfs_csum_item *item;
505         LIST_HEAD(tmplist);
506         unsigned long offset;
507         int ret;
508         size_t size;
509         u64 csum_end;
510         const u32 csum_size = fs_info->csum_size;
511
512         ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
513                IS_ALIGNED(end + 1, fs_info->sectorsize));
514
515         path = btrfs_alloc_path();
516         if (!path)
517                 return -ENOMEM;
518
519         if (search_commit) {
520                 path->skip_locking = 1;
521                 path->reada = READA_FORWARD;
522                 path->search_commit_root = 1;
523         }
524
525         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
526         key.offset = start;
527         key.type = BTRFS_EXTENT_CSUM_KEY;
528
529         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
530         if (ret < 0)
531                 goto fail;
532         if (ret > 0 && path->slots[0] > 0) {
533                 leaf = path->nodes[0];
534                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
535                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
536                     key.type == BTRFS_EXTENT_CSUM_KEY) {
537                         offset = (start - key.offset) >> fs_info->sectorsize_bits;
538                         if (offset * csum_size <
539                             btrfs_item_size_nr(leaf, path->slots[0] - 1))
540                                 path->slots[0]--;
541                 }
542         }
543
544         while (start <= end) {
545                 leaf = path->nodes[0];
546                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
547                         ret = btrfs_next_leaf(root, path);
548                         if (ret < 0)
549                                 goto fail;
550                         if (ret > 0)
551                                 break;
552                         leaf = path->nodes[0];
553                 }
554
555                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
556                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
557                     key.type != BTRFS_EXTENT_CSUM_KEY ||
558                     key.offset > end)
559                         break;
560
561                 if (key.offset > start)
562                         start = key.offset;
563
564                 size = btrfs_item_size_nr(leaf, path->slots[0]);
565                 csum_end = key.offset + (size / csum_size) * fs_info->sectorsize;
566                 if (csum_end <= start) {
567                         path->slots[0]++;
568                         continue;
569                 }
570
571                 csum_end = min(csum_end, end + 1);
572                 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
573                                       struct btrfs_csum_item);
574                 while (start < csum_end) {
575                         size = min_t(size_t, csum_end - start,
576                                      max_ordered_sum_bytes(fs_info, csum_size));
577                         sums = kzalloc(btrfs_ordered_sum_size(fs_info, size),
578                                        GFP_NOFS);
579                         if (!sums) {
580                                 ret = -ENOMEM;
581                                 goto fail;
582                         }
583
584                         sums->bytenr = start;
585                         sums->len = (int)size;
586
587                         offset = (start - key.offset) >> fs_info->sectorsize_bits;
588                         offset *= csum_size;
589                         size >>= fs_info->sectorsize_bits;
590
591                         read_extent_buffer(path->nodes[0],
592                                            sums->sums,
593                                            ((unsigned long)item) + offset,
594                                            csum_size * size);
595
596                         start += fs_info->sectorsize * size;
597                         list_add_tail(&sums->list, &tmplist);
598                 }
599                 path->slots[0]++;
600         }
601         ret = 0;
602 fail:
603         while (ret < 0 && !list_empty(&tmplist)) {
604                 sums = list_entry(tmplist.next, struct btrfs_ordered_sum, list);
605                 list_del(&sums->list);
606                 kfree(sums);
607         }
608         list_splice_tail(&tmplist, list);
609
610         btrfs_free_path(path);
611         return ret;
612 }
613
614 /*
615  * btrfs_csum_one_bio - Calculates checksums of the data contained inside a bio
616  * @inode:       Owner of the data inside the bio
617  * @bio:         Contains the data to be checksummed
618  * @file_start:  offset in file this bio begins to describe
619  * @contig:      Boolean. If true/1 means all bio vecs in this bio are
620  *               contiguous and they begin at @file_start in the file. False/0
621  *               means this bio can contains potentially discontigous bio vecs
622  *               so the logical offset of each should be calculated separately.
623  */
624 blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
625                        u64 file_start, int contig)
626 {
627         struct btrfs_fs_info *fs_info = inode->root->fs_info;
628         SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
629         struct btrfs_ordered_sum *sums;
630         struct btrfs_ordered_extent *ordered = NULL;
631         char *data;
632         struct bvec_iter iter;
633         struct bio_vec bvec;
634         int index;
635         int nr_sectors;
636         unsigned long total_bytes = 0;
637         unsigned long this_sum_bytes = 0;
638         int i;
639         u64 offset;
640         unsigned nofs_flag;
641
642         nofs_flag = memalloc_nofs_save();
643         sums = kvzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
644                        GFP_KERNEL);
645         memalloc_nofs_restore(nofs_flag);
646
647         if (!sums)
648                 return BLK_STS_RESOURCE;
649
650         sums->len = bio->bi_iter.bi_size;
651         INIT_LIST_HEAD(&sums->list);
652
653         if (contig)
654                 offset = file_start;
655         else
656                 offset = 0; /* shut up gcc */
657
658         sums->bytenr = bio->bi_iter.bi_sector << 9;
659         index = 0;
660
661         shash->tfm = fs_info->csum_shash;
662
663         bio_for_each_segment(bvec, bio, iter) {
664                 if (!contig)
665                         offset = page_offset(bvec.bv_page) + bvec.bv_offset;
666
667                 if (!ordered) {
668                         ordered = btrfs_lookup_ordered_extent(inode, offset);
669                         BUG_ON(!ordered); /* Logic error */
670                 }
671
672                 nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info,
673                                                  bvec.bv_len + fs_info->sectorsize
674                                                  - 1);
675
676                 for (i = 0; i < nr_sectors; i++) {
677                         if (offset >= ordered->file_offset + ordered->num_bytes ||
678                             offset < ordered->file_offset) {
679                                 unsigned long bytes_left;
680
681                                 sums->len = this_sum_bytes;
682                                 this_sum_bytes = 0;
683                                 btrfs_add_ordered_sum(ordered, sums);
684                                 btrfs_put_ordered_extent(ordered);
685
686                                 bytes_left = bio->bi_iter.bi_size - total_bytes;
687
688                                 nofs_flag = memalloc_nofs_save();
689                                 sums = kvzalloc(btrfs_ordered_sum_size(fs_info,
690                                                       bytes_left), GFP_KERNEL);
691                                 memalloc_nofs_restore(nofs_flag);
692                                 BUG_ON(!sums); /* -ENOMEM */
693                                 sums->len = bytes_left;
694                                 ordered = btrfs_lookup_ordered_extent(inode,
695                                                                 offset);
696                                 ASSERT(ordered); /* Logic error */
697                                 sums->bytenr = (bio->bi_iter.bi_sector << 9)
698                                         + total_bytes;
699                                 index = 0;
700                         }
701
702                         data = kmap_atomic(bvec.bv_page);
703                         crypto_shash_digest(shash, data + bvec.bv_offset
704                                             + (i * fs_info->sectorsize),
705                                             fs_info->sectorsize,
706                                             sums->sums + index);
707                         kunmap_atomic(data);
708                         index += fs_info->csum_size;
709                         offset += fs_info->sectorsize;
710                         this_sum_bytes += fs_info->sectorsize;
711                         total_bytes += fs_info->sectorsize;
712                 }
713
714         }
715         this_sum_bytes = 0;
716         btrfs_add_ordered_sum(ordered, sums);
717         btrfs_put_ordered_extent(ordered);
718         return 0;
719 }
720
721 /*
722  * helper function for csum removal, this expects the
723  * key to describe the csum pointed to by the path, and it expects
724  * the csum to overlap the range [bytenr, len]
725  *
726  * The csum should not be entirely contained in the range and the
727  * range should not be entirely contained in the csum.
728  *
729  * This calls btrfs_truncate_item with the correct args based on the
730  * overlap, and fixes up the key as required.
731  */
732 static noinline void truncate_one_csum(struct btrfs_fs_info *fs_info,
733                                        struct btrfs_path *path,
734                                        struct btrfs_key *key,
735                                        u64 bytenr, u64 len)
736 {
737         struct extent_buffer *leaf;
738         const u32 csum_size = fs_info->csum_size;
739         u64 csum_end;
740         u64 end_byte = bytenr + len;
741         u32 blocksize_bits = fs_info->sectorsize_bits;
742
743         leaf = path->nodes[0];
744         csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size;
745         csum_end <<= blocksize_bits;
746         csum_end += key->offset;
747
748         if (key->offset < bytenr && csum_end <= end_byte) {
749                 /*
750                  *         [ bytenr - len ]
751                  *         [   ]
752                  *   [csum     ]
753                  *   A simple truncate off the end of the item
754                  */
755                 u32 new_size = (bytenr - key->offset) >> blocksize_bits;
756                 new_size *= csum_size;
757                 btrfs_truncate_item(path, new_size, 1);
758         } else if (key->offset >= bytenr && csum_end > end_byte &&
759                    end_byte > key->offset) {
760                 /*
761                  *         [ bytenr - len ]
762                  *                 [ ]
763                  *                 [csum     ]
764                  * we need to truncate from the beginning of the csum
765                  */
766                 u32 new_size = (csum_end - end_byte) >> blocksize_bits;
767                 new_size *= csum_size;
768
769                 btrfs_truncate_item(path, new_size, 0);
770
771                 key->offset = end_byte;
772                 btrfs_set_item_key_safe(fs_info, path, key);
773         } else {
774                 BUG();
775         }
776 }
777
778 /*
779  * deletes the csum items from the csum tree for a given
780  * range of bytes.
781  */
782 int btrfs_del_csums(struct btrfs_trans_handle *trans,
783                     struct btrfs_root *root, u64 bytenr, u64 len)
784 {
785         struct btrfs_fs_info *fs_info = trans->fs_info;
786         struct btrfs_path *path;
787         struct btrfs_key key;
788         u64 end_byte = bytenr + len;
789         u64 csum_end;
790         struct extent_buffer *leaf;
791         int ret;
792         const u32 csum_size = fs_info->csum_size;
793         u32 blocksize_bits = fs_info->sectorsize_bits;
794
795         ASSERT(root == fs_info->csum_root ||
796                root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
797
798         path = btrfs_alloc_path();
799         if (!path)
800                 return -ENOMEM;
801
802         while (1) {
803                 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
804                 key.offset = end_byte - 1;
805                 key.type = BTRFS_EXTENT_CSUM_KEY;
806
807                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
808                 if (ret > 0) {
809                         if (path->slots[0] == 0)
810                                 break;
811                         path->slots[0]--;
812                 } else if (ret < 0) {
813                         break;
814                 }
815
816                 leaf = path->nodes[0];
817                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
818
819                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
820                     key.type != BTRFS_EXTENT_CSUM_KEY) {
821                         break;
822                 }
823
824                 if (key.offset >= end_byte)
825                         break;
826
827                 csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size;
828                 csum_end <<= blocksize_bits;
829                 csum_end += key.offset;
830
831                 /* this csum ends before we start, we're done */
832                 if (csum_end <= bytenr)
833                         break;
834
835                 /* delete the entire item, it is inside our range */
836                 if (key.offset >= bytenr && csum_end <= end_byte) {
837                         int del_nr = 1;
838
839                         /*
840                          * Check how many csum items preceding this one in this
841                          * leaf correspond to our range and then delete them all
842                          * at once.
843                          */
844                         if (key.offset > bytenr && path->slots[0] > 0) {
845                                 int slot = path->slots[0] - 1;
846
847                                 while (slot >= 0) {
848                                         struct btrfs_key pk;
849
850                                         btrfs_item_key_to_cpu(leaf, &pk, slot);
851                                         if (pk.offset < bytenr ||
852                                             pk.type != BTRFS_EXTENT_CSUM_KEY ||
853                                             pk.objectid !=
854                                             BTRFS_EXTENT_CSUM_OBJECTID)
855                                                 break;
856                                         path->slots[0] = slot;
857                                         del_nr++;
858                                         key.offset = pk.offset;
859                                         slot--;
860                                 }
861                         }
862                         ret = btrfs_del_items(trans, root, path,
863                                               path->slots[0], del_nr);
864                         if (ret)
865                                 goto out;
866                         if (key.offset == bytenr)
867                                 break;
868                 } else if (key.offset < bytenr && csum_end > end_byte) {
869                         unsigned long offset;
870                         unsigned long shift_len;
871                         unsigned long item_offset;
872                         /*
873                          *        [ bytenr - len ]
874                          *     [csum                ]
875                          *
876                          * Our bytes are in the middle of the csum,
877                          * we need to split this item and insert a new one.
878                          *
879                          * But we can't drop the path because the
880                          * csum could change, get removed, extended etc.
881                          *
882                          * The trick here is the max size of a csum item leaves
883                          * enough room in the tree block for a single
884                          * item header.  So, we split the item in place,
885                          * adding a new header pointing to the existing
886                          * bytes.  Then we loop around again and we have
887                          * a nicely formed csum item that we can neatly
888                          * truncate.
889                          */
890                         offset = (bytenr - key.offset) >> blocksize_bits;
891                         offset *= csum_size;
892
893                         shift_len = (len >> blocksize_bits) * csum_size;
894
895                         item_offset = btrfs_item_ptr_offset(leaf,
896                                                             path->slots[0]);
897
898                         memzero_extent_buffer(leaf, item_offset + offset,
899                                              shift_len);
900                         key.offset = bytenr;
901
902                         /*
903                          * btrfs_split_item returns -EAGAIN when the
904                          * item changed size or key
905                          */
906                         ret = btrfs_split_item(trans, root, path, &key, offset);
907                         if (ret && ret != -EAGAIN) {
908                                 btrfs_abort_transaction(trans, ret);
909                                 goto out;
910                         }
911
912                         key.offset = end_byte - 1;
913                 } else {
914                         truncate_one_csum(fs_info, path, &key, bytenr, len);
915                         if (key.offset < bytenr)
916                                 break;
917                 }
918                 btrfs_release_path(path);
919         }
920         ret = 0;
921 out:
922         btrfs_free_path(path);
923         return ret;
924 }
925
926 int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
927                            struct btrfs_root *root,
928                            struct btrfs_ordered_sum *sums)
929 {
930         struct btrfs_fs_info *fs_info = root->fs_info;
931         struct btrfs_key file_key;
932         struct btrfs_key found_key;
933         struct btrfs_path *path;
934         struct btrfs_csum_item *item;
935         struct btrfs_csum_item *item_end;
936         struct extent_buffer *leaf = NULL;
937         u64 next_offset;
938         u64 total_bytes = 0;
939         u64 csum_offset;
940         u64 bytenr;
941         u32 nritems;
942         u32 ins_size;
943         int index = 0;
944         int found_next;
945         int ret;
946         const u32 csum_size = fs_info->csum_size;
947
948         path = btrfs_alloc_path();
949         if (!path)
950                 return -ENOMEM;
951 again:
952         next_offset = (u64)-1;
953         found_next = 0;
954         bytenr = sums->bytenr + total_bytes;
955         file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
956         file_key.offset = bytenr;
957         file_key.type = BTRFS_EXTENT_CSUM_KEY;
958
959         item = btrfs_lookup_csum(trans, root, path, bytenr, 1);
960         if (!IS_ERR(item)) {
961                 ret = 0;
962                 leaf = path->nodes[0];
963                 item_end = btrfs_item_ptr(leaf, path->slots[0],
964                                           struct btrfs_csum_item);
965                 item_end = (struct btrfs_csum_item *)((char *)item_end +
966                            btrfs_item_size_nr(leaf, path->slots[0]));
967                 goto found;
968         }
969         ret = PTR_ERR(item);
970         if (ret != -EFBIG && ret != -ENOENT)
971                 goto out;
972
973         if (ret == -EFBIG) {
974                 u32 item_size;
975                 /* we found one, but it isn't big enough yet */
976                 leaf = path->nodes[0];
977                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
978                 if ((item_size / csum_size) >=
979                     MAX_CSUM_ITEMS(fs_info, csum_size)) {
980                         /* already at max size, make a new one */
981                         goto insert;
982                 }
983         } else {
984                 int slot = path->slots[0] + 1;
985                 /* we didn't find a csum item, insert one */
986                 nritems = btrfs_header_nritems(path->nodes[0]);
987                 if (!nritems || (path->slots[0] >= nritems - 1)) {
988                         ret = btrfs_next_leaf(root, path);
989                         if (ret < 0) {
990                                 goto out;
991                         } else if (ret > 0) {
992                                 found_next = 1;
993                                 goto insert;
994                         }
995                         slot = path->slots[0];
996                 }
997                 btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
998                 if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
999                     found_key.type != BTRFS_EXTENT_CSUM_KEY) {
1000                         found_next = 1;
1001                         goto insert;
1002                 }
1003                 next_offset = found_key.offset;
1004                 found_next = 1;
1005                 goto insert;
1006         }
1007
1008         /*
1009          * At this point, we know the tree has a checksum item that ends at an
1010          * offset matching the start of the checksum range we want to insert.
1011          * We try to extend that item as much as possible and then add as many
1012          * checksums to it as they fit.
1013          *
1014          * First check if the leaf has enough free space for at least one
1015          * checksum. If it has go directly to the item extension code, otherwise
1016          * release the path and do a search for insertion before the extension.
1017          */
1018         if (btrfs_leaf_free_space(leaf) >= csum_size) {
1019                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1020                 csum_offset = (bytenr - found_key.offset) >>
1021                         fs_info->sectorsize_bits;
1022                 goto extend_csum;
1023         }
1024
1025         btrfs_release_path(path);
1026         path->search_for_extension = 1;
1027         ret = btrfs_search_slot(trans, root, &file_key, path,
1028                                 csum_size, 1);
1029         path->search_for_extension = 0;
1030         if (ret < 0)
1031                 goto out;
1032
1033         if (ret > 0) {
1034                 if (path->slots[0] == 0)
1035                         goto insert;
1036                 path->slots[0]--;
1037         }
1038
1039         leaf = path->nodes[0];
1040         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1041         csum_offset = (bytenr - found_key.offset) >> fs_info->sectorsize_bits;
1042
1043         if (found_key.type != BTRFS_EXTENT_CSUM_KEY ||
1044             found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1045             csum_offset >= MAX_CSUM_ITEMS(fs_info, csum_size)) {
1046                 goto insert;
1047         }
1048
1049 extend_csum:
1050         if (csum_offset == btrfs_item_size_nr(leaf, path->slots[0]) /
1051             csum_size) {
1052                 int extend_nr;
1053                 u64 tmp;
1054                 u32 diff;
1055
1056                 tmp = sums->len - total_bytes;
1057                 tmp >>= fs_info->sectorsize_bits;
1058                 WARN_ON(tmp < 1);
1059
1060                 extend_nr = max_t(int, 1, (int)tmp);
1061                 diff = (csum_offset + extend_nr) * csum_size;
1062                 diff = min(diff,
1063                            MAX_CSUM_ITEMS(fs_info, csum_size) * csum_size);
1064
1065                 diff = diff - btrfs_item_size_nr(leaf, path->slots[0]);
1066                 diff = min_t(u32, btrfs_leaf_free_space(leaf), diff);
1067                 diff /= csum_size;
1068                 diff *= csum_size;
1069
1070                 btrfs_extend_item(path, diff);
1071                 ret = 0;
1072                 goto csum;
1073         }
1074
1075 insert:
1076         btrfs_release_path(path);
1077         csum_offset = 0;
1078         if (found_next) {
1079                 u64 tmp;
1080
1081                 tmp = sums->len - total_bytes;
1082                 tmp >>= fs_info->sectorsize_bits;
1083                 tmp = min(tmp, (next_offset - file_key.offset) >>
1084                                          fs_info->sectorsize_bits);
1085
1086                 tmp = max_t(u64, 1, tmp);
1087                 tmp = min_t(u64, tmp, MAX_CSUM_ITEMS(fs_info, csum_size));
1088                 ins_size = csum_size * tmp;
1089         } else {
1090                 ins_size = csum_size;
1091         }
1092         ret = btrfs_insert_empty_item(trans, root, path, &file_key,
1093                                       ins_size);
1094         if (ret < 0)
1095                 goto out;
1096         if (WARN_ON(ret != 0))
1097                 goto out;
1098         leaf = path->nodes[0];
1099 csum:
1100         item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
1101         item_end = (struct btrfs_csum_item *)((unsigned char *)item +
1102                                       btrfs_item_size_nr(leaf, path->slots[0]));
1103         item = (struct btrfs_csum_item *)((unsigned char *)item +
1104                                           csum_offset * csum_size);
1105 found:
1106         ins_size = (u32)(sums->len - total_bytes) >> fs_info->sectorsize_bits;
1107         ins_size *= csum_size;
1108         ins_size = min_t(u32, (unsigned long)item_end - (unsigned long)item,
1109                               ins_size);
1110         write_extent_buffer(leaf, sums->sums + index, (unsigned long)item,
1111                             ins_size);
1112
1113         index += ins_size;
1114         ins_size /= csum_size;
1115         total_bytes += ins_size * fs_info->sectorsize;
1116
1117         btrfs_mark_buffer_dirty(path->nodes[0]);
1118         if (total_bytes < sums->len) {
1119                 btrfs_release_path(path);
1120                 cond_resched();
1121                 goto again;
1122         }
1123 out:
1124         btrfs_free_path(path);
1125         return ret;
1126 }
1127
1128 void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
1129                                      const struct btrfs_path *path,
1130                                      struct btrfs_file_extent_item *fi,
1131                                      const bool new_inline,
1132                                      struct extent_map *em)
1133 {
1134         struct btrfs_fs_info *fs_info = inode->root->fs_info;
1135         struct btrfs_root *root = inode->root;
1136         struct extent_buffer *leaf = path->nodes[0];
1137         const int slot = path->slots[0];
1138         struct btrfs_key key;
1139         u64 extent_start, extent_end;
1140         u64 bytenr;
1141         u8 type = btrfs_file_extent_type(leaf, fi);
1142         int compress_type = btrfs_file_extent_compression(leaf, fi);
1143
1144         btrfs_item_key_to_cpu(leaf, &key, slot);
1145         extent_start = key.offset;
1146         extent_end = btrfs_file_extent_end(path);
1147         em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
1148         if (type == BTRFS_FILE_EXTENT_REG ||
1149             type == BTRFS_FILE_EXTENT_PREALLOC) {
1150                 em->start = extent_start;
1151                 em->len = extent_end - extent_start;
1152                 em->orig_start = extent_start -
1153                         btrfs_file_extent_offset(leaf, fi);
1154                 em->orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
1155                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1156                 if (bytenr == 0) {
1157                         em->block_start = EXTENT_MAP_HOLE;
1158                         return;
1159                 }
1160                 if (compress_type != BTRFS_COMPRESS_NONE) {
1161                         set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
1162                         em->compress_type = compress_type;
1163                         em->block_start = bytenr;
1164                         em->block_len = em->orig_block_len;
1165                 } else {
1166                         bytenr += btrfs_file_extent_offset(leaf, fi);
1167                         em->block_start = bytenr;
1168                         em->block_len = em->len;
1169                         if (type == BTRFS_FILE_EXTENT_PREALLOC)
1170                                 set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
1171                 }
1172         } else if (type == BTRFS_FILE_EXTENT_INLINE) {
1173                 em->block_start = EXTENT_MAP_INLINE;
1174                 em->start = extent_start;
1175                 em->len = extent_end - extent_start;
1176                 /*
1177                  * Initialize orig_start and block_len with the same values
1178                  * as in inode.c:btrfs_get_extent().
1179                  */
1180                 em->orig_start = EXTENT_MAP_HOLE;
1181                 em->block_len = (u64)-1;
1182                 if (!new_inline && compress_type != BTRFS_COMPRESS_NONE) {
1183                         set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
1184                         em->compress_type = compress_type;
1185                 }
1186         } else {
1187                 btrfs_err(fs_info,
1188                           "unknown file extent item type %d, inode %llu, offset %llu, "
1189                           "root %llu", type, btrfs_ino(inode), extent_start,
1190                           root->root_key.objectid);
1191         }
1192 }
1193
1194 /*
1195  * Returns the end offset (non inclusive) of the file extent item the given path
1196  * points to. If it points to an inline extent, the returned offset is rounded
1197  * up to the sector size.
1198  */
1199 u64 btrfs_file_extent_end(const struct btrfs_path *path)
1200 {
1201         const struct extent_buffer *leaf = path->nodes[0];
1202         const int slot = path->slots[0];
1203         struct btrfs_file_extent_item *fi;
1204         struct btrfs_key key;
1205         u64 end;
1206
1207         btrfs_item_key_to_cpu(leaf, &key, slot);
1208         ASSERT(key.type == BTRFS_EXTENT_DATA_KEY);
1209         fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
1210
1211         if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
1212                 end = btrfs_file_extent_ram_bytes(leaf, fi);
1213                 end = ALIGN(key.offset + end, leaf->fs_info->sectorsize);
1214         } else {
1215                 end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
1216         }
1217
1218         return end;
1219 }