arm64: mremap speedup - enable HAVE_MOVE_PUD
[linux-2.6-microblaze.git] / fs / btrfs / free-space-cache.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2008 Red Hat.  All rights reserved.
4  */
5
6 #include <linux/pagemap.h>
7 #include <linux/sched.h>
8 #include <linux/sched/signal.h>
9 #include <linux/slab.h>
10 #include <linux/math64.h>
11 #include <linux/ratelimit.h>
12 #include <linux/error-injection.h>
13 #include <linux/sched/mm.h>
14 #include "ctree.h"
15 #include "free-space-cache.h"
16 #include "transaction.h"
17 #include "disk-io.h"
18 #include "extent_io.h"
19 #include "inode-map.h"
20 #include "volumes.h"
21 #include "space-info.h"
22 #include "delalloc-space.h"
23 #include "block-group.h"
24 #include "discard.h"
25
26 #define BITS_PER_BITMAP         (PAGE_SIZE * 8UL)
27 #define MAX_CACHE_BYTES_PER_GIG SZ_64K
28 #define FORCE_EXTENT_THRESHOLD  SZ_1M
29
30 struct btrfs_trim_range {
31         u64 start;
32         u64 bytes;
33         struct list_head list;
34 };
35
36 static int count_bitmap_extents(struct btrfs_free_space_ctl *ctl,
37                                 struct btrfs_free_space *bitmap_info);
38 static int link_free_space(struct btrfs_free_space_ctl *ctl,
39                            struct btrfs_free_space *info);
40 static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
41                               struct btrfs_free_space *info);
42 static int btrfs_wait_cache_io_root(struct btrfs_root *root,
43                              struct btrfs_trans_handle *trans,
44                              struct btrfs_io_ctl *io_ctl,
45                              struct btrfs_path *path);
46
47 static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
48                                                struct btrfs_path *path,
49                                                u64 offset)
50 {
51         struct btrfs_fs_info *fs_info = root->fs_info;
52         struct btrfs_key key;
53         struct btrfs_key location;
54         struct btrfs_disk_key disk_key;
55         struct btrfs_free_space_header *header;
56         struct extent_buffer *leaf;
57         struct inode *inode = NULL;
58         unsigned nofs_flag;
59         int ret;
60
61         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
62         key.offset = offset;
63         key.type = 0;
64
65         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
66         if (ret < 0)
67                 return ERR_PTR(ret);
68         if (ret > 0) {
69                 btrfs_release_path(path);
70                 return ERR_PTR(-ENOENT);
71         }
72
73         leaf = path->nodes[0];
74         header = btrfs_item_ptr(leaf, path->slots[0],
75                                 struct btrfs_free_space_header);
76         btrfs_free_space_key(leaf, header, &disk_key);
77         btrfs_disk_key_to_cpu(&location, &disk_key);
78         btrfs_release_path(path);
79
80         /*
81          * We are often under a trans handle at this point, so we need to make
82          * sure NOFS is set to keep us from deadlocking.
83          */
84         nofs_flag = memalloc_nofs_save();
85         inode = btrfs_iget_path(fs_info->sb, location.objectid, root, path);
86         btrfs_release_path(path);
87         memalloc_nofs_restore(nofs_flag);
88         if (IS_ERR(inode))
89                 return inode;
90
91         mapping_set_gfp_mask(inode->i_mapping,
92                         mapping_gfp_constraint(inode->i_mapping,
93                         ~(__GFP_FS | __GFP_HIGHMEM)));
94
95         return inode;
96 }
97
98 struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group,
99                 struct btrfs_path *path)
100 {
101         struct btrfs_fs_info *fs_info = block_group->fs_info;
102         struct inode *inode = NULL;
103         u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
104
105         spin_lock(&block_group->lock);
106         if (block_group->inode)
107                 inode = igrab(block_group->inode);
108         spin_unlock(&block_group->lock);
109         if (inode)
110                 return inode;
111
112         inode = __lookup_free_space_inode(fs_info->tree_root, path,
113                                           block_group->start);
114         if (IS_ERR(inode))
115                 return inode;
116
117         spin_lock(&block_group->lock);
118         if (!((BTRFS_I(inode)->flags & flags) == flags)) {
119                 btrfs_info(fs_info, "Old style space inode found, converting.");
120                 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM |
121                         BTRFS_INODE_NODATACOW;
122                 block_group->disk_cache_state = BTRFS_DC_CLEAR;
123         }
124
125         if (!block_group->iref) {
126                 block_group->inode = igrab(inode);
127                 block_group->iref = 1;
128         }
129         spin_unlock(&block_group->lock);
130
131         return inode;
132 }
133
134 static int __create_free_space_inode(struct btrfs_root *root,
135                                      struct btrfs_trans_handle *trans,
136                                      struct btrfs_path *path,
137                                      u64 ino, u64 offset)
138 {
139         struct btrfs_key key;
140         struct btrfs_disk_key disk_key;
141         struct btrfs_free_space_header *header;
142         struct btrfs_inode_item *inode_item;
143         struct extent_buffer *leaf;
144         u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC;
145         int ret;
146
147         ret = btrfs_insert_empty_inode(trans, root, path, ino);
148         if (ret)
149                 return ret;
150
151         /* We inline crc's for the free disk space cache */
152         if (ino != BTRFS_FREE_INO_OBJECTID)
153                 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
154
155         leaf = path->nodes[0];
156         inode_item = btrfs_item_ptr(leaf, path->slots[0],
157                                     struct btrfs_inode_item);
158         btrfs_item_key(leaf, &disk_key, path->slots[0]);
159         memzero_extent_buffer(leaf, (unsigned long)inode_item,
160                              sizeof(*inode_item));
161         btrfs_set_inode_generation(leaf, inode_item, trans->transid);
162         btrfs_set_inode_size(leaf, inode_item, 0);
163         btrfs_set_inode_nbytes(leaf, inode_item, 0);
164         btrfs_set_inode_uid(leaf, inode_item, 0);
165         btrfs_set_inode_gid(leaf, inode_item, 0);
166         btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
167         btrfs_set_inode_flags(leaf, inode_item, flags);
168         btrfs_set_inode_nlink(leaf, inode_item, 1);
169         btrfs_set_inode_transid(leaf, inode_item, trans->transid);
170         btrfs_set_inode_block_group(leaf, inode_item, offset);
171         btrfs_mark_buffer_dirty(leaf);
172         btrfs_release_path(path);
173
174         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
175         key.offset = offset;
176         key.type = 0;
177         ret = btrfs_insert_empty_item(trans, root, path, &key,
178                                       sizeof(struct btrfs_free_space_header));
179         if (ret < 0) {
180                 btrfs_release_path(path);
181                 return ret;
182         }
183
184         leaf = path->nodes[0];
185         header = btrfs_item_ptr(leaf, path->slots[0],
186                                 struct btrfs_free_space_header);
187         memzero_extent_buffer(leaf, (unsigned long)header, sizeof(*header));
188         btrfs_set_free_space_key(leaf, header, &disk_key);
189         btrfs_mark_buffer_dirty(leaf);
190         btrfs_release_path(path);
191
192         return 0;
193 }
194
195 int create_free_space_inode(struct btrfs_trans_handle *trans,
196                             struct btrfs_block_group *block_group,
197                             struct btrfs_path *path)
198 {
199         int ret;
200         u64 ino;
201
202         ret = btrfs_find_free_objectid(trans->fs_info->tree_root, &ino);
203         if (ret < 0)
204                 return ret;
205
206         return __create_free_space_inode(trans->fs_info->tree_root, trans, path,
207                                          ino, block_group->start);
208 }
209
210 int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
211                                        struct btrfs_block_rsv *rsv)
212 {
213         u64 needed_bytes;
214         int ret;
215
216         /* 1 for slack space, 1 for updating the inode */
217         needed_bytes = btrfs_calc_insert_metadata_size(fs_info, 1) +
218                 btrfs_calc_metadata_size(fs_info, 1);
219
220         spin_lock(&rsv->lock);
221         if (rsv->reserved < needed_bytes)
222                 ret = -ENOSPC;
223         else
224                 ret = 0;
225         spin_unlock(&rsv->lock);
226         return ret;
227 }
228
229 int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
230                                     struct btrfs_block_group *block_group,
231                                     struct inode *inode)
232 {
233         struct btrfs_root *root = BTRFS_I(inode)->root;
234         int ret = 0;
235         bool locked = false;
236
237         if (block_group) {
238                 struct btrfs_path *path = btrfs_alloc_path();
239
240                 if (!path) {
241                         ret = -ENOMEM;
242                         goto fail;
243                 }
244                 locked = true;
245                 mutex_lock(&trans->transaction->cache_write_mutex);
246                 if (!list_empty(&block_group->io_list)) {
247                         list_del_init(&block_group->io_list);
248
249                         btrfs_wait_cache_io(trans, block_group, path);
250                         btrfs_put_block_group(block_group);
251                 }
252
253                 /*
254                  * now that we've truncated the cache away, its no longer
255                  * setup or written
256                  */
257                 spin_lock(&block_group->lock);
258                 block_group->disk_cache_state = BTRFS_DC_CLEAR;
259                 spin_unlock(&block_group->lock);
260                 btrfs_free_path(path);
261         }
262
263         btrfs_i_size_write(BTRFS_I(inode), 0);
264         truncate_pagecache(inode, 0);
265
266         /*
267          * We skip the throttling logic for free space cache inodes, so we don't
268          * need to check for -EAGAIN.
269          */
270         ret = btrfs_truncate_inode_items(trans, root, inode,
271                                          0, BTRFS_EXTENT_DATA_KEY);
272         if (ret)
273                 goto fail;
274
275         ret = btrfs_update_inode(trans, root, inode);
276
277 fail:
278         if (locked)
279                 mutex_unlock(&trans->transaction->cache_write_mutex);
280         if (ret)
281                 btrfs_abort_transaction(trans, ret);
282
283         return ret;
284 }
285
286 static void readahead_cache(struct inode *inode)
287 {
288         struct file_ra_state *ra;
289         unsigned long last_index;
290
291         ra = kzalloc(sizeof(*ra), GFP_NOFS);
292         if (!ra)
293                 return;
294
295         file_ra_state_init(ra, inode->i_mapping);
296         last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
297
298         page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
299
300         kfree(ra);
301 }
302
303 static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
304                        int write)
305 {
306         int num_pages;
307         int check_crcs = 0;
308
309         num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
310
311         if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FREE_INO_OBJECTID)
312                 check_crcs = 1;
313
314         /* Make sure we can fit our crcs and generation into the first page */
315         if (write && check_crcs &&
316             (num_pages * sizeof(u32) + sizeof(u64)) > PAGE_SIZE)
317                 return -ENOSPC;
318
319         memset(io_ctl, 0, sizeof(struct btrfs_io_ctl));
320
321         io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS);
322         if (!io_ctl->pages)
323                 return -ENOMEM;
324
325         io_ctl->num_pages = num_pages;
326         io_ctl->fs_info = btrfs_sb(inode->i_sb);
327         io_ctl->check_crcs = check_crcs;
328         io_ctl->inode = inode;
329
330         return 0;
331 }
332 ALLOW_ERROR_INJECTION(io_ctl_init, ERRNO);
333
334 static void io_ctl_free(struct btrfs_io_ctl *io_ctl)
335 {
336         kfree(io_ctl->pages);
337         io_ctl->pages = NULL;
338 }
339
340 static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl)
341 {
342         if (io_ctl->cur) {
343                 io_ctl->cur = NULL;
344                 io_ctl->orig = NULL;
345         }
346 }
347
348 static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear)
349 {
350         ASSERT(io_ctl->index < io_ctl->num_pages);
351         io_ctl->page = io_ctl->pages[io_ctl->index++];
352         io_ctl->cur = page_address(io_ctl->page);
353         io_ctl->orig = io_ctl->cur;
354         io_ctl->size = PAGE_SIZE;
355         if (clear)
356                 clear_page(io_ctl->cur);
357 }
358
359 static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl)
360 {
361         int i;
362
363         io_ctl_unmap_page(io_ctl);
364
365         for (i = 0; i < io_ctl->num_pages; i++) {
366                 if (io_ctl->pages[i]) {
367                         ClearPageChecked(io_ctl->pages[i]);
368                         unlock_page(io_ctl->pages[i]);
369                         put_page(io_ctl->pages[i]);
370                 }
371         }
372 }
373
374 static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, bool uptodate)
375 {
376         struct page *page;
377         struct inode *inode = io_ctl->inode;
378         gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
379         int i;
380
381         for (i = 0; i < io_ctl->num_pages; i++) {
382                 page = find_or_create_page(inode->i_mapping, i, mask);
383                 if (!page) {
384                         io_ctl_drop_pages(io_ctl);
385                         return -ENOMEM;
386                 }
387                 io_ctl->pages[i] = page;
388                 if (uptodate && !PageUptodate(page)) {
389                         btrfs_readpage(NULL, page);
390                         lock_page(page);
391                         if (page->mapping != inode->i_mapping) {
392                                 btrfs_err(BTRFS_I(inode)->root->fs_info,
393                                           "free space cache page truncated");
394                                 io_ctl_drop_pages(io_ctl);
395                                 return -EIO;
396                         }
397                         if (!PageUptodate(page)) {
398                                 btrfs_err(BTRFS_I(inode)->root->fs_info,
399                                            "error reading free space cache");
400                                 io_ctl_drop_pages(io_ctl);
401                                 return -EIO;
402                         }
403                 }
404         }
405
406         for (i = 0; i < io_ctl->num_pages; i++) {
407                 clear_page_dirty_for_io(io_ctl->pages[i]);
408                 set_page_extent_mapped(io_ctl->pages[i]);
409         }
410
411         return 0;
412 }
413
414 static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
415 {
416         io_ctl_map_page(io_ctl, 1);
417
418         /*
419          * Skip the csum areas.  If we don't check crcs then we just have a
420          * 64bit chunk at the front of the first page.
421          */
422         if (io_ctl->check_crcs) {
423                 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages);
424                 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
425         } else {
426                 io_ctl->cur += sizeof(u64);
427                 io_ctl->size -= sizeof(u64) * 2;
428         }
429
430         put_unaligned_le64(generation, io_ctl->cur);
431         io_ctl->cur += sizeof(u64);
432 }
433
434 static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
435 {
436         u64 cache_gen;
437
438         /*
439          * Skip the crc area.  If we don't check crcs then we just have a 64bit
440          * chunk at the front of the first page.
441          */
442         if (io_ctl->check_crcs) {
443                 io_ctl->cur += sizeof(u32) * io_ctl->num_pages;
444                 io_ctl->size -= sizeof(u64) +
445                         (sizeof(u32) * io_ctl->num_pages);
446         } else {
447                 io_ctl->cur += sizeof(u64);
448                 io_ctl->size -= sizeof(u64) * 2;
449         }
450
451         cache_gen = get_unaligned_le64(io_ctl->cur);
452         if (cache_gen != generation) {
453                 btrfs_err_rl(io_ctl->fs_info,
454                         "space cache generation (%llu) does not match inode (%llu)",
455                                 cache_gen, generation);
456                 io_ctl_unmap_page(io_ctl);
457                 return -EIO;
458         }
459         io_ctl->cur += sizeof(u64);
460         return 0;
461 }
462
463 static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index)
464 {
465         u32 *tmp;
466         u32 crc = ~(u32)0;
467         unsigned offset = 0;
468
469         if (!io_ctl->check_crcs) {
470                 io_ctl_unmap_page(io_ctl);
471                 return;
472         }
473
474         if (index == 0)
475                 offset = sizeof(u32) * io_ctl->num_pages;
476
477         crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset);
478         btrfs_crc32c_final(crc, (u8 *)&crc);
479         io_ctl_unmap_page(io_ctl);
480         tmp = page_address(io_ctl->pages[0]);
481         tmp += index;
482         *tmp = crc;
483 }
484
485 static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index)
486 {
487         u32 *tmp, val;
488         u32 crc = ~(u32)0;
489         unsigned offset = 0;
490
491         if (!io_ctl->check_crcs) {
492                 io_ctl_map_page(io_ctl, 0);
493                 return 0;
494         }
495
496         if (index == 0)
497                 offset = sizeof(u32) * io_ctl->num_pages;
498
499         tmp = page_address(io_ctl->pages[0]);
500         tmp += index;
501         val = *tmp;
502
503         io_ctl_map_page(io_ctl, 0);
504         crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset);
505         btrfs_crc32c_final(crc, (u8 *)&crc);
506         if (val != crc) {
507                 btrfs_err_rl(io_ctl->fs_info,
508                         "csum mismatch on free space cache");
509                 io_ctl_unmap_page(io_ctl);
510                 return -EIO;
511         }
512
513         return 0;
514 }
515
516 static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes,
517                             void *bitmap)
518 {
519         struct btrfs_free_space_entry *entry;
520
521         if (!io_ctl->cur)
522                 return -ENOSPC;
523
524         entry = io_ctl->cur;
525         put_unaligned_le64(offset, &entry->offset);
526         put_unaligned_le64(bytes, &entry->bytes);
527         entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP :
528                 BTRFS_FREE_SPACE_EXTENT;
529         io_ctl->cur += sizeof(struct btrfs_free_space_entry);
530         io_ctl->size -= sizeof(struct btrfs_free_space_entry);
531
532         if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
533                 return 0;
534
535         io_ctl_set_crc(io_ctl, io_ctl->index - 1);
536
537         /* No more pages to map */
538         if (io_ctl->index >= io_ctl->num_pages)
539                 return 0;
540
541         /* map the next page */
542         io_ctl_map_page(io_ctl, 1);
543         return 0;
544 }
545
546 static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap)
547 {
548         if (!io_ctl->cur)
549                 return -ENOSPC;
550
551         /*
552          * If we aren't at the start of the current page, unmap this one and
553          * map the next one if there is any left.
554          */
555         if (io_ctl->cur != io_ctl->orig) {
556                 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
557                 if (io_ctl->index >= io_ctl->num_pages)
558                         return -ENOSPC;
559                 io_ctl_map_page(io_ctl, 0);
560         }
561
562         copy_page(io_ctl->cur, bitmap);
563         io_ctl_set_crc(io_ctl, io_ctl->index - 1);
564         if (io_ctl->index < io_ctl->num_pages)
565                 io_ctl_map_page(io_ctl, 0);
566         return 0;
567 }
568
569 static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl)
570 {
571         /*
572          * If we're not on the boundary we know we've modified the page and we
573          * need to crc the page.
574          */
575         if (io_ctl->cur != io_ctl->orig)
576                 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
577         else
578                 io_ctl_unmap_page(io_ctl);
579
580         while (io_ctl->index < io_ctl->num_pages) {
581                 io_ctl_map_page(io_ctl, 1);
582                 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
583         }
584 }
585
586 static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl,
587                             struct btrfs_free_space *entry, u8 *type)
588 {
589         struct btrfs_free_space_entry *e;
590         int ret;
591
592         if (!io_ctl->cur) {
593                 ret = io_ctl_check_crc(io_ctl, io_ctl->index);
594                 if (ret)
595                         return ret;
596         }
597
598         e = io_ctl->cur;
599         entry->offset = get_unaligned_le64(&e->offset);
600         entry->bytes = get_unaligned_le64(&e->bytes);
601         *type = e->type;
602         io_ctl->cur += sizeof(struct btrfs_free_space_entry);
603         io_ctl->size -= sizeof(struct btrfs_free_space_entry);
604
605         if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
606                 return 0;
607
608         io_ctl_unmap_page(io_ctl);
609
610         return 0;
611 }
612
613 static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl,
614                               struct btrfs_free_space *entry)
615 {
616         int ret;
617
618         ret = io_ctl_check_crc(io_ctl, io_ctl->index);
619         if (ret)
620                 return ret;
621
622         copy_page(entry->bitmap, io_ctl->cur);
623         io_ctl_unmap_page(io_ctl);
624
625         return 0;
626 }
627
628 /*
629  * Since we attach pinned extents after the fact we can have contiguous sections
630  * of free space that are split up in entries.  This poses a problem with the
631  * tree logging stuff since it could have allocated across what appears to be 2
632  * entries since we would have merged the entries when adding the pinned extents
633  * back to the free space cache.  So run through the space cache that we just
634  * loaded and merge contiguous entries.  This will make the log replay stuff not
635  * blow up and it will make for nicer allocator behavior.
636  */
637 static void merge_space_tree(struct btrfs_free_space_ctl *ctl)
638 {
639         struct btrfs_free_space *e, *prev = NULL;
640         struct rb_node *n;
641
642 again:
643         spin_lock(&ctl->tree_lock);
644         for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
645                 e = rb_entry(n, struct btrfs_free_space, offset_index);
646                 if (!prev)
647                         goto next;
648                 if (e->bitmap || prev->bitmap)
649                         goto next;
650                 if (prev->offset + prev->bytes == e->offset) {
651                         unlink_free_space(ctl, prev);
652                         unlink_free_space(ctl, e);
653                         prev->bytes += e->bytes;
654                         kmem_cache_free(btrfs_free_space_cachep, e);
655                         link_free_space(ctl, prev);
656                         prev = NULL;
657                         spin_unlock(&ctl->tree_lock);
658                         goto again;
659                 }
660 next:
661                 prev = e;
662         }
663         spin_unlock(&ctl->tree_lock);
664 }
665
666 static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
667                                    struct btrfs_free_space_ctl *ctl,
668                                    struct btrfs_path *path, u64 offset)
669 {
670         struct btrfs_fs_info *fs_info = root->fs_info;
671         struct btrfs_free_space_header *header;
672         struct extent_buffer *leaf;
673         struct btrfs_io_ctl io_ctl;
674         struct btrfs_key key;
675         struct btrfs_free_space *e, *n;
676         LIST_HEAD(bitmaps);
677         u64 num_entries;
678         u64 num_bitmaps;
679         u64 generation;
680         u8 type;
681         int ret = 0;
682
683         /* Nothing in the space cache, goodbye */
684         if (!i_size_read(inode))
685                 return 0;
686
687         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
688         key.offset = offset;
689         key.type = 0;
690
691         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
692         if (ret < 0)
693                 return 0;
694         else if (ret > 0) {
695                 btrfs_release_path(path);
696                 return 0;
697         }
698
699         ret = -1;
700
701         leaf = path->nodes[0];
702         header = btrfs_item_ptr(leaf, path->slots[0],
703                                 struct btrfs_free_space_header);
704         num_entries = btrfs_free_space_entries(leaf, header);
705         num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
706         generation = btrfs_free_space_generation(leaf, header);
707         btrfs_release_path(path);
708
709         if (!BTRFS_I(inode)->generation) {
710                 btrfs_info(fs_info,
711                            "the free space cache file (%llu) is invalid, skip it",
712                            offset);
713                 return 0;
714         }
715
716         if (BTRFS_I(inode)->generation != generation) {
717                 btrfs_err(fs_info,
718                           "free space inode generation (%llu) did not match free space cache generation (%llu)",
719                           BTRFS_I(inode)->generation, generation);
720                 return 0;
721         }
722
723         if (!num_entries)
724                 return 0;
725
726         ret = io_ctl_init(&io_ctl, inode, 0);
727         if (ret)
728                 return ret;
729
730         readahead_cache(inode);
731
732         ret = io_ctl_prepare_pages(&io_ctl, true);
733         if (ret)
734                 goto out;
735
736         ret = io_ctl_check_crc(&io_ctl, 0);
737         if (ret)
738                 goto free_cache;
739
740         ret = io_ctl_check_generation(&io_ctl, generation);
741         if (ret)
742                 goto free_cache;
743
744         while (num_entries) {
745                 e = kmem_cache_zalloc(btrfs_free_space_cachep,
746                                       GFP_NOFS);
747                 if (!e)
748                         goto free_cache;
749
750                 ret = io_ctl_read_entry(&io_ctl, e, &type);
751                 if (ret) {
752                         kmem_cache_free(btrfs_free_space_cachep, e);
753                         goto free_cache;
754                 }
755
756                 /*
757                  * Sync discard ensures that the free space cache is always
758                  * trimmed.  So when reading this in, the state should reflect
759                  * that.  We also do this for async as a stop gap for lack of
760                  * persistence.
761                  */
762                 if (btrfs_test_opt(fs_info, DISCARD_SYNC) ||
763                     btrfs_test_opt(fs_info, DISCARD_ASYNC))
764                         e->trim_state = BTRFS_TRIM_STATE_TRIMMED;
765
766                 if (!e->bytes) {
767                         kmem_cache_free(btrfs_free_space_cachep, e);
768                         goto free_cache;
769                 }
770
771                 if (type == BTRFS_FREE_SPACE_EXTENT) {
772                         spin_lock(&ctl->tree_lock);
773                         ret = link_free_space(ctl, e);
774                         spin_unlock(&ctl->tree_lock);
775                         if (ret) {
776                                 btrfs_err(fs_info,
777                                         "Duplicate entries in free space cache, dumping");
778                                 kmem_cache_free(btrfs_free_space_cachep, e);
779                                 goto free_cache;
780                         }
781                 } else {
782                         ASSERT(num_bitmaps);
783                         num_bitmaps--;
784                         e->bitmap = kmem_cache_zalloc(
785                                         btrfs_free_space_bitmap_cachep, GFP_NOFS);
786                         if (!e->bitmap) {
787                                 kmem_cache_free(
788                                         btrfs_free_space_cachep, e);
789                                 goto free_cache;
790                         }
791                         spin_lock(&ctl->tree_lock);
792                         ret = link_free_space(ctl, e);
793                         ctl->total_bitmaps++;
794                         ctl->op->recalc_thresholds(ctl);
795                         spin_unlock(&ctl->tree_lock);
796                         if (ret) {
797                                 btrfs_err(fs_info,
798                                         "Duplicate entries in free space cache, dumping");
799                                 kmem_cache_free(btrfs_free_space_cachep, e);
800                                 goto free_cache;
801                         }
802                         list_add_tail(&e->list, &bitmaps);
803                 }
804
805                 num_entries--;
806         }
807
808         io_ctl_unmap_page(&io_ctl);
809
810         /*
811          * We add the bitmaps at the end of the entries in order that
812          * the bitmap entries are added to the cache.
813          */
814         list_for_each_entry_safe(e, n, &bitmaps, list) {
815                 list_del_init(&e->list);
816                 ret = io_ctl_read_bitmap(&io_ctl, e);
817                 if (ret)
818                         goto free_cache;
819                 e->bitmap_extents = count_bitmap_extents(ctl, e);
820                 if (!btrfs_free_space_trimmed(e)) {
821                         ctl->discardable_extents[BTRFS_STAT_CURR] +=
822                                 e->bitmap_extents;
823                         ctl->discardable_bytes[BTRFS_STAT_CURR] += e->bytes;
824                 }
825         }
826
827         io_ctl_drop_pages(&io_ctl);
828         merge_space_tree(ctl);
829         ret = 1;
830 out:
831         btrfs_discard_update_discardable(ctl->private, ctl);
832         io_ctl_free(&io_ctl);
833         return ret;
834 free_cache:
835         io_ctl_drop_pages(&io_ctl);
836         __btrfs_remove_free_space_cache(ctl);
837         goto out;
838 }
839
840 int load_free_space_cache(struct btrfs_block_group *block_group)
841 {
842         struct btrfs_fs_info *fs_info = block_group->fs_info;
843         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
844         struct inode *inode;
845         struct btrfs_path *path;
846         int ret = 0;
847         bool matched;
848         u64 used = block_group->used;
849
850         /*
851          * If this block group has been marked to be cleared for one reason or
852          * another then we can't trust the on disk cache, so just return.
853          */
854         spin_lock(&block_group->lock);
855         if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
856                 spin_unlock(&block_group->lock);
857                 return 0;
858         }
859         spin_unlock(&block_group->lock);
860
861         path = btrfs_alloc_path();
862         if (!path)
863                 return 0;
864         path->search_commit_root = 1;
865         path->skip_locking = 1;
866
867         /*
868          * We must pass a path with search_commit_root set to btrfs_iget in
869          * order to avoid a deadlock when allocating extents for the tree root.
870          *
871          * When we are COWing an extent buffer from the tree root, when looking
872          * for a free extent, at extent-tree.c:find_free_extent(), we can find
873          * block group without its free space cache loaded. When we find one
874          * we must load its space cache which requires reading its free space
875          * cache's inode item from the root tree. If this inode item is located
876          * in the same leaf that we started COWing before, then we end up in
877          * deadlock on the extent buffer (trying to read lock it when we
878          * previously write locked it).
879          *
880          * It's safe to read the inode item using the commit root because
881          * block groups, once loaded, stay in memory forever (until they are
882          * removed) as well as their space caches once loaded. New block groups
883          * once created get their ->cached field set to BTRFS_CACHE_FINISHED so
884          * we will never try to read their inode item while the fs is mounted.
885          */
886         inode = lookup_free_space_inode(block_group, path);
887         if (IS_ERR(inode)) {
888                 btrfs_free_path(path);
889                 return 0;
890         }
891
892         /* We may have converted the inode and made the cache invalid. */
893         spin_lock(&block_group->lock);
894         if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
895                 spin_unlock(&block_group->lock);
896                 btrfs_free_path(path);
897                 goto out;
898         }
899         spin_unlock(&block_group->lock);
900
901         ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
902                                       path, block_group->start);
903         btrfs_free_path(path);
904         if (ret <= 0)
905                 goto out;
906
907         spin_lock(&ctl->tree_lock);
908         matched = (ctl->free_space == (block_group->length - used -
909                                        block_group->bytes_super));
910         spin_unlock(&ctl->tree_lock);
911
912         if (!matched) {
913                 __btrfs_remove_free_space_cache(ctl);
914                 btrfs_warn(fs_info,
915                            "block group %llu has wrong amount of free space",
916                            block_group->start);
917                 ret = -1;
918         }
919 out:
920         if (ret < 0) {
921                 /* This cache is bogus, make sure it gets cleared */
922                 spin_lock(&block_group->lock);
923                 block_group->disk_cache_state = BTRFS_DC_CLEAR;
924                 spin_unlock(&block_group->lock);
925                 ret = 0;
926
927                 btrfs_warn(fs_info,
928                            "failed to load free space cache for block group %llu, rebuilding it now",
929                            block_group->start);
930         }
931
932         iput(inode);
933         return ret;
934 }
935
936 static noinline_for_stack
937 int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl,
938                               struct btrfs_free_space_ctl *ctl,
939                               struct btrfs_block_group *block_group,
940                               int *entries, int *bitmaps,
941                               struct list_head *bitmap_list)
942 {
943         int ret;
944         struct btrfs_free_cluster *cluster = NULL;
945         struct btrfs_free_cluster *cluster_locked = NULL;
946         struct rb_node *node = rb_first(&ctl->free_space_offset);
947         struct btrfs_trim_range *trim_entry;
948
949         /* Get the cluster for this block_group if it exists */
950         if (block_group && !list_empty(&block_group->cluster_list)) {
951                 cluster = list_entry(block_group->cluster_list.next,
952                                      struct btrfs_free_cluster,
953                                      block_group_list);
954         }
955
956         if (!node && cluster) {
957                 cluster_locked = cluster;
958                 spin_lock(&cluster_locked->lock);
959                 node = rb_first(&cluster->root);
960                 cluster = NULL;
961         }
962
963         /* Write out the extent entries */
964         while (node) {
965                 struct btrfs_free_space *e;
966
967                 e = rb_entry(node, struct btrfs_free_space, offset_index);
968                 *entries += 1;
969
970                 ret = io_ctl_add_entry(io_ctl, e->offset, e->bytes,
971                                        e->bitmap);
972                 if (ret)
973                         goto fail;
974
975                 if (e->bitmap) {
976                         list_add_tail(&e->list, bitmap_list);
977                         *bitmaps += 1;
978                 }
979                 node = rb_next(node);
980                 if (!node && cluster) {
981                         node = rb_first(&cluster->root);
982                         cluster_locked = cluster;
983                         spin_lock(&cluster_locked->lock);
984                         cluster = NULL;
985                 }
986         }
987         if (cluster_locked) {
988                 spin_unlock(&cluster_locked->lock);
989                 cluster_locked = NULL;
990         }
991
992         /*
993          * Make sure we don't miss any range that was removed from our rbtree
994          * because trimming is running. Otherwise after a umount+mount (or crash
995          * after committing the transaction) we would leak free space and get
996          * an inconsistent free space cache report from fsck.
997          */
998         list_for_each_entry(trim_entry, &ctl->trimming_ranges, list) {
999                 ret = io_ctl_add_entry(io_ctl, trim_entry->start,
1000                                        trim_entry->bytes, NULL);
1001                 if (ret)
1002                         goto fail;
1003                 *entries += 1;
1004         }
1005
1006         return 0;
1007 fail:
1008         if (cluster_locked)
1009                 spin_unlock(&cluster_locked->lock);
1010         return -ENOSPC;
1011 }
1012
1013 static noinline_for_stack int
1014 update_cache_item(struct btrfs_trans_handle *trans,
1015                   struct btrfs_root *root,
1016                   struct inode *inode,
1017                   struct btrfs_path *path, u64 offset,
1018                   int entries, int bitmaps)
1019 {
1020         struct btrfs_key key;
1021         struct btrfs_free_space_header *header;
1022         struct extent_buffer *leaf;
1023         int ret;
1024
1025         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
1026         key.offset = offset;
1027         key.type = 0;
1028
1029         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1030         if (ret < 0) {
1031                 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
1032                                  EXTENT_DELALLOC, 0, 0, NULL);
1033                 goto fail;
1034         }
1035         leaf = path->nodes[0];
1036         if (ret > 0) {
1037                 struct btrfs_key found_key;
1038                 ASSERT(path->slots[0]);
1039                 path->slots[0]--;
1040                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1041                 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
1042                     found_key.offset != offset) {
1043                         clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
1044                                          inode->i_size - 1, EXTENT_DELALLOC, 0,
1045                                          0, NULL);
1046                         btrfs_release_path(path);
1047                         goto fail;
1048                 }
1049         }
1050
1051         BTRFS_I(inode)->generation = trans->transid;
1052         header = btrfs_item_ptr(leaf, path->slots[0],
1053                                 struct btrfs_free_space_header);
1054         btrfs_set_free_space_entries(leaf, header, entries);
1055         btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
1056         btrfs_set_free_space_generation(leaf, header, trans->transid);
1057         btrfs_mark_buffer_dirty(leaf);
1058         btrfs_release_path(path);
1059
1060         return 0;
1061
1062 fail:
1063         return -1;
1064 }
1065
1066 static noinline_for_stack int write_pinned_extent_entries(
1067                             struct btrfs_trans_handle *trans,
1068                             struct btrfs_block_group *block_group,
1069                             struct btrfs_io_ctl *io_ctl,
1070                             int *entries)
1071 {
1072         u64 start, extent_start, extent_end, len;
1073         struct extent_io_tree *unpin = NULL;
1074         int ret;
1075
1076         if (!block_group)
1077                 return 0;
1078
1079         /*
1080          * We want to add any pinned extents to our free space cache
1081          * so we don't leak the space
1082          *
1083          * We shouldn't have switched the pinned extents yet so this is the
1084          * right one
1085          */
1086         unpin = &trans->transaction->pinned_extents;
1087
1088         start = block_group->start;
1089
1090         while (start < block_group->start + block_group->length) {
1091                 ret = find_first_extent_bit(unpin, start,
1092                                             &extent_start, &extent_end,
1093                                             EXTENT_DIRTY, NULL);
1094                 if (ret)
1095                         return 0;
1096
1097                 /* This pinned extent is out of our range */
1098                 if (extent_start >= block_group->start + block_group->length)
1099                         return 0;
1100
1101                 extent_start = max(extent_start, start);
1102                 extent_end = min(block_group->start + block_group->length,
1103                                  extent_end + 1);
1104                 len = extent_end - extent_start;
1105
1106                 *entries += 1;
1107                 ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL);
1108                 if (ret)
1109                         return -ENOSPC;
1110
1111                 start = extent_end;
1112         }
1113
1114         return 0;
1115 }
1116
1117 static noinline_for_stack int
1118 write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list)
1119 {
1120         struct btrfs_free_space *entry, *next;
1121         int ret;
1122
1123         /* Write out the bitmaps */
1124         list_for_each_entry_safe(entry, next, bitmap_list, list) {
1125                 ret = io_ctl_add_bitmap(io_ctl, entry->bitmap);
1126                 if (ret)
1127                         return -ENOSPC;
1128                 list_del_init(&entry->list);
1129         }
1130
1131         return 0;
1132 }
1133
1134 static int flush_dirty_cache(struct inode *inode)
1135 {
1136         int ret;
1137
1138         ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
1139         if (ret)
1140                 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
1141                                  EXTENT_DELALLOC, 0, 0, NULL);
1142
1143         return ret;
1144 }
1145
1146 static void noinline_for_stack
1147 cleanup_bitmap_list(struct list_head *bitmap_list)
1148 {
1149         struct btrfs_free_space *entry, *next;
1150
1151         list_for_each_entry_safe(entry, next, bitmap_list, list)
1152                 list_del_init(&entry->list);
1153 }
1154
1155 static void noinline_for_stack
1156 cleanup_write_cache_enospc(struct inode *inode,
1157                            struct btrfs_io_ctl *io_ctl,
1158                            struct extent_state **cached_state)
1159 {
1160         io_ctl_drop_pages(io_ctl);
1161         unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
1162                              i_size_read(inode) - 1, cached_state);
1163 }
1164
1165 static int __btrfs_wait_cache_io(struct btrfs_root *root,
1166                                  struct btrfs_trans_handle *trans,
1167                                  struct btrfs_block_group *block_group,
1168                                  struct btrfs_io_ctl *io_ctl,
1169                                  struct btrfs_path *path, u64 offset)
1170 {
1171         int ret;
1172         struct inode *inode = io_ctl->inode;
1173
1174         if (!inode)
1175                 return 0;
1176
1177         /* Flush the dirty pages in the cache file. */
1178         ret = flush_dirty_cache(inode);
1179         if (ret)
1180                 goto out;
1181
1182         /* Update the cache item to tell everyone this cache file is valid. */
1183         ret = update_cache_item(trans, root, inode, path, offset,
1184                                 io_ctl->entries, io_ctl->bitmaps);
1185 out:
1186         if (ret) {
1187                 invalidate_inode_pages2(inode->i_mapping);
1188                 BTRFS_I(inode)->generation = 0;
1189                 if (block_group)
1190                         btrfs_debug(root->fs_info,
1191           "failed to write free space cache for block group %llu error %d",
1192                                   block_group->start, ret);
1193         }
1194         btrfs_update_inode(trans, root, inode);
1195
1196         if (block_group) {
1197                 /* the dirty list is protected by the dirty_bgs_lock */
1198                 spin_lock(&trans->transaction->dirty_bgs_lock);
1199
1200                 /* the disk_cache_state is protected by the block group lock */
1201                 spin_lock(&block_group->lock);
1202
1203                 /*
1204                  * only mark this as written if we didn't get put back on
1205                  * the dirty list while waiting for IO.   Otherwise our
1206                  * cache state won't be right, and we won't get written again
1207                  */
1208                 if (!ret && list_empty(&block_group->dirty_list))
1209                         block_group->disk_cache_state = BTRFS_DC_WRITTEN;
1210                 else if (ret)
1211                         block_group->disk_cache_state = BTRFS_DC_ERROR;
1212
1213                 spin_unlock(&block_group->lock);
1214                 spin_unlock(&trans->transaction->dirty_bgs_lock);
1215                 io_ctl->inode = NULL;
1216                 iput(inode);
1217         }
1218
1219         return ret;
1220
1221 }
1222
1223 static int btrfs_wait_cache_io_root(struct btrfs_root *root,
1224                                     struct btrfs_trans_handle *trans,
1225                                     struct btrfs_io_ctl *io_ctl,
1226                                     struct btrfs_path *path)
1227 {
1228         return __btrfs_wait_cache_io(root, trans, NULL, io_ctl, path, 0);
1229 }
1230
1231 int btrfs_wait_cache_io(struct btrfs_trans_handle *trans,
1232                         struct btrfs_block_group *block_group,
1233                         struct btrfs_path *path)
1234 {
1235         return __btrfs_wait_cache_io(block_group->fs_info->tree_root, trans,
1236                                      block_group, &block_group->io_ctl,
1237                                      path, block_group->start);
1238 }
1239
1240 /**
1241  * __btrfs_write_out_cache - write out cached info to an inode
1242  * @root - the root the inode belongs to
1243  * @ctl - the free space cache we are going to write out
1244  * @block_group - the block_group for this cache if it belongs to a block_group
1245  * @trans - the trans handle
1246  *
1247  * This function writes out a free space cache struct to disk for quick recovery
1248  * on mount.  This will return 0 if it was successful in writing the cache out,
1249  * or an errno if it was not.
1250  */
1251 static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1252                                    struct btrfs_free_space_ctl *ctl,
1253                                    struct btrfs_block_group *block_group,
1254                                    struct btrfs_io_ctl *io_ctl,
1255                                    struct btrfs_trans_handle *trans)
1256 {
1257         struct extent_state *cached_state = NULL;
1258         LIST_HEAD(bitmap_list);
1259         int entries = 0;
1260         int bitmaps = 0;
1261         int ret;
1262         int must_iput = 0;
1263
1264         if (!i_size_read(inode))
1265                 return -EIO;
1266
1267         WARN_ON(io_ctl->pages);
1268         ret = io_ctl_init(io_ctl, inode, 1);
1269         if (ret)
1270                 return ret;
1271
1272         if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) {
1273                 down_write(&block_group->data_rwsem);
1274                 spin_lock(&block_group->lock);
1275                 if (block_group->delalloc_bytes) {
1276                         block_group->disk_cache_state = BTRFS_DC_WRITTEN;
1277                         spin_unlock(&block_group->lock);
1278                         up_write(&block_group->data_rwsem);
1279                         BTRFS_I(inode)->generation = 0;
1280                         ret = 0;
1281                         must_iput = 1;
1282                         goto out;
1283                 }
1284                 spin_unlock(&block_group->lock);
1285         }
1286
1287         /* Lock all pages first so we can lock the extent safely. */
1288         ret = io_ctl_prepare_pages(io_ctl, false);
1289         if (ret)
1290                 goto out_unlock;
1291
1292         lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
1293                          &cached_state);
1294
1295         io_ctl_set_generation(io_ctl, trans->transid);
1296
1297         mutex_lock(&ctl->cache_writeout_mutex);
1298         /* Write out the extent entries in the free space cache */
1299         spin_lock(&ctl->tree_lock);
1300         ret = write_cache_extent_entries(io_ctl, ctl,
1301                                          block_group, &entries, &bitmaps,
1302                                          &bitmap_list);
1303         if (ret)
1304                 goto out_nospc_locked;
1305
1306         /*
1307          * Some spaces that are freed in the current transaction are pinned,
1308          * they will be added into free space cache after the transaction is
1309          * committed, we shouldn't lose them.
1310          *
1311          * If this changes while we are working we'll get added back to
1312          * the dirty list and redo it.  No locking needed
1313          */
1314         ret = write_pinned_extent_entries(trans, block_group, io_ctl, &entries);
1315         if (ret)
1316                 goto out_nospc_locked;
1317
1318         /*
1319          * At last, we write out all the bitmaps and keep cache_writeout_mutex
1320          * locked while doing it because a concurrent trim can be manipulating
1321          * or freeing the bitmap.
1322          */
1323         ret = write_bitmap_entries(io_ctl, &bitmap_list);
1324         spin_unlock(&ctl->tree_lock);
1325         mutex_unlock(&ctl->cache_writeout_mutex);
1326         if (ret)
1327                 goto out_nospc;
1328
1329         /* Zero out the rest of the pages just to make sure */
1330         io_ctl_zero_remaining_pages(io_ctl);
1331
1332         /* Everything is written out, now we dirty the pages in the file. */
1333         ret = btrfs_dirty_pages(BTRFS_I(inode), io_ctl->pages,
1334                                 io_ctl->num_pages, 0, i_size_read(inode),
1335                                 &cached_state);
1336         if (ret)
1337                 goto out_nospc;
1338
1339         if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
1340                 up_write(&block_group->data_rwsem);
1341         /*
1342          * Release the pages and unlock the extent, we will flush
1343          * them out later
1344          */
1345         io_ctl_drop_pages(io_ctl);
1346         io_ctl_free(io_ctl);
1347
1348         unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
1349                              i_size_read(inode) - 1, &cached_state);
1350
1351         /*
1352          * at this point the pages are under IO and we're happy,
1353          * The caller is responsible for waiting on them and updating
1354          * the cache and the inode
1355          */
1356         io_ctl->entries = entries;
1357         io_ctl->bitmaps = bitmaps;
1358
1359         ret = btrfs_fdatawrite_range(inode, 0, (u64)-1);
1360         if (ret)
1361                 goto out;
1362
1363         return 0;
1364
1365 out_nospc_locked:
1366         cleanup_bitmap_list(&bitmap_list);
1367         spin_unlock(&ctl->tree_lock);
1368         mutex_unlock(&ctl->cache_writeout_mutex);
1369
1370 out_nospc:
1371         cleanup_write_cache_enospc(inode, io_ctl, &cached_state);
1372
1373 out_unlock:
1374         if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
1375                 up_write(&block_group->data_rwsem);
1376
1377 out:
1378         io_ctl->inode = NULL;
1379         io_ctl_free(io_ctl);
1380         if (ret) {
1381                 invalidate_inode_pages2(inode->i_mapping);
1382                 BTRFS_I(inode)->generation = 0;
1383         }
1384         btrfs_update_inode(trans, root, inode);
1385         if (must_iput)
1386                 iput(inode);
1387         return ret;
1388 }
1389
1390 int btrfs_write_out_cache(struct btrfs_trans_handle *trans,
1391                           struct btrfs_block_group *block_group,
1392                           struct btrfs_path *path)
1393 {
1394         struct btrfs_fs_info *fs_info = trans->fs_info;
1395         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1396         struct inode *inode;
1397         int ret = 0;
1398
1399         spin_lock(&block_group->lock);
1400         if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
1401                 spin_unlock(&block_group->lock);
1402                 return 0;
1403         }
1404         spin_unlock(&block_group->lock);
1405
1406         inode = lookup_free_space_inode(block_group, path);
1407         if (IS_ERR(inode))
1408                 return 0;
1409
1410         ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl,
1411                                 block_group, &block_group->io_ctl, trans);
1412         if (ret) {
1413                 btrfs_debug(fs_info,
1414           "failed to write free space cache for block group %llu error %d",
1415                           block_group->start, ret);
1416                 spin_lock(&block_group->lock);
1417                 block_group->disk_cache_state = BTRFS_DC_ERROR;
1418                 spin_unlock(&block_group->lock);
1419
1420                 block_group->io_ctl.inode = NULL;
1421                 iput(inode);
1422         }
1423
1424         /*
1425          * if ret == 0 the caller is expected to call btrfs_wait_cache_io
1426          * to wait for IO and put the inode
1427          */
1428
1429         return ret;
1430 }
1431
1432 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
1433                                           u64 offset)
1434 {
1435         ASSERT(offset >= bitmap_start);
1436         offset -= bitmap_start;
1437         return (unsigned long)(div_u64(offset, unit));
1438 }
1439
1440 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
1441 {
1442         return (unsigned long)(div_u64(bytes, unit));
1443 }
1444
1445 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
1446                                    u64 offset)
1447 {
1448         u64 bitmap_start;
1449         u64 bytes_per_bitmap;
1450
1451         bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
1452         bitmap_start = offset - ctl->start;
1453         bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
1454         bitmap_start *= bytes_per_bitmap;
1455         bitmap_start += ctl->start;
1456
1457         return bitmap_start;
1458 }
1459
1460 static int tree_insert_offset(struct rb_root *root, u64 offset,
1461                               struct rb_node *node, int bitmap)
1462 {
1463         struct rb_node **p = &root->rb_node;
1464         struct rb_node *parent = NULL;
1465         struct btrfs_free_space *info;
1466
1467         while (*p) {
1468                 parent = *p;
1469                 info = rb_entry(parent, struct btrfs_free_space, offset_index);
1470
1471                 if (offset < info->offset) {
1472                         p = &(*p)->rb_left;
1473                 } else if (offset > info->offset) {
1474                         p = &(*p)->rb_right;
1475                 } else {
1476                         /*
1477                          * we could have a bitmap entry and an extent entry
1478                          * share the same offset.  If this is the case, we want
1479                          * the extent entry to always be found first if we do a
1480                          * linear search through the tree, since we want to have
1481                          * the quickest allocation time, and allocating from an
1482                          * extent is faster than allocating from a bitmap.  So
1483                          * if we're inserting a bitmap and we find an entry at
1484                          * this offset, we want to go right, or after this entry
1485                          * logically.  If we are inserting an extent and we've
1486                          * found a bitmap, we want to go left, or before
1487                          * logically.
1488                          */
1489                         if (bitmap) {
1490                                 if (info->bitmap) {
1491                                         WARN_ON_ONCE(1);
1492                                         return -EEXIST;
1493                                 }
1494                                 p = &(*p)->rb_right;
1495                         } else {
1496                                 if (!info->bitmap) {
1497                                         WARN_ON_ONCE(1);
1498                                         return -EEXIST;
1499                                 }
1500                                 p = &(*p)->rb_left;
1501                         }
1502                 }
1503         }
1504
1505         rb_link_node(node, parent, p);
1506         rb_insert_color(node, root);
1507
1508         return 0;
1509 }
1510
1511 /*
1512  * searches the tree for the given offset.
1513  *
1514  * fuzzy - If this is set, then we are trying to make an allocation, and we just
1515  * want a section that has at least bytes size and comes at or after the given
1516  * offset.
1517  */
1518 static struct btrfs_free_space *
1519 tree_search_offset(struct btrfs_free_space_ctl *ctl,
1520                    u64 offset, int bitmap_only, int fuzzy)
1521 {
1522         struct rb_node *n = ctl->free_space_offset.rb_node;
1523         struct btrfs_free_space *entry, *prev = NULL;
1524
1525         /* find entry that is closest to the 'offset' */
1526         while (1) {
1527                 if (!n) {
1528                         entry = NULL;
1529                         break;
1530                 }
1531
1532                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1533                 prev = entry;
1534
1535                 if (offset < entry->offset)
1536                         n = n->rb_left;
1537                 else if (offset > entry->offset)
1538                         n = n->rb_right;
1539                 else
1540                         break;
1541         }
1542
1543         if (bitmap_only) {
1544                 if (!entry)
1545                         return NULL;
1546                 if (entry->bitmap)
1547                         return entry;
1548
1549                 /*
1550                  * bitmap entry and extent entry may share same offset,
1551                  * in that case, bitmap entry comes after extent entry.
1552                  */
1553                 n = rb_next(n);
1554                 if (!n)
1555                         return NULL;
1556                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1557                 if (entry->offset != offset)
1558                         return NULL;
1559
1560                 WARN_ON(!entry->bitmap);
1561                 return entry;
1562         } else if (entry) {
1563                 if (entry->bitmap) {
1564                         /*
1565                          * if previous extent entry covers the offset,
1566                          * we should return it instead of the bitmap entry
1567                          */
1568                         n = rb_prev(&entry->offset_index);
1569                         if (n) {
1570                                 prev = rb_entry(n, struct btrfs_free_space,
1571                                                 offset_index);
1572                                 if (!prev->bitmap &&
1573                                     prev->offset + prev->bytes > offset)
1574                                         entry = prev;
1575                         }
1576                 }
1577                 return entry;
1578         }
1579
1580         if (!prev)
1581                 return NULL;
1582
1583         /* find last entry before the 'offset' */
1584         entry = prev;
1585         if (entry->offset > offset) {
1586                 n = rb_prev(&entry->offset_index);
1587                 if (n) {
1588                         entry = rb_entry(n, struct btrfs_free_space,
1589                                         offset_index);
1590                         ASSERT(entry->offset <= offset);
1591                 } else {
1592                         if (fuzzy)
1593                                 return entry;
1594                         else
1595                                 return NULL;
1596                 }
1597         }
1598
1599         if (entry->bitmap) {
1600                 n = rb_prev(&entry->offset_index);
1601                 if (n) {
1602                         prev = rb_entry(n, struct btrfs_free_space,
1603                                         offset_index);
1604                         if (!prev->bitmap &&
1605                             prev->offset + prev->bytes > offset)
1606                                 return prev;
1607                 }
1608                 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
1609                         return entry;
1610         } else if (entry->offset + entry->bytes > offset)
1611                 return entry;
1612
1613         if (!fuzzy)
1614                 return NULL;
1615
1616         while (1) {
1617                 if (entry->bitmap) {
1618                         if (entry->offset + BITS_PER_BITMAP *
1619                             ctl->unit > offset)
1620                                 break;
1621                 } else {
1622                         if (entry->offset + entry->bytes > offset)
1623                                 break;
1624                 }
1625
1626                 n = rb_next(&entry->offset_index);
1627                 if (!n)
1628                         return NULL;
1629                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1630         }
1631         return entry;
1632 }
1633
1634 static inline void
1635 __unlink_free_space(struct btrfs_free_space_ctl *ctl,
1636                     struct btrfs_free_space *info)
1637 {
1638         rb_erase(&info->offset_index, &ctl->free_space_offset);
1639         ctl->free_extents--;
1640
1641         if (!info->bitmap && !btrfs_free_space_trimmed(info)) {
1642                 ctl->discardable_extents[BTRFS_STAT_CURR]--;
1643                 ctl->discardable_bytes[BTRFS_STAT_CURR] -= info->bytes;
1644         }
1645 }
1646
1647 static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
1648                               struct btrfs_free_space *info)
1649 {
1650         __unlink_free_space(ctl, info);
1651         ctl->free_space -= info->bytes;
1652 }
1653
1654 static int link_free_space(struct btrfs_free_space_ctl *ctl,
1655                            struct btrfs_free_space *info)
1656 {
1657         int ret = 0;
1658
1659         ASSERT(info->bytes || info->bitmap);
1660         ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
1661                                  &info->offset_index, (info->bitmap != NULL));
1662         if (ret)
1663                 return ret;
1664
1665         if (!info->bitmap && !btrfs_free_space_trimmed(info)) {
1666                 ctl->discardable_extents[BTRFS_STAT_CURR]++;
1667                 ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes;
1668         }
1669
1670         ctl->free_space += info->bytes;
1671         ctl->free_extents++;
1672         return ret;
1673 }
1674
1675 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
1676 {
1677         struct btrfs_block_group *block_group = ctl->private;
1678         u64 max_bytes;
1679         u64 bitmap_bytes;
1680         u64 extent_bytes;
1681         u64 size = block_group->length;
1682         u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
1683         u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
1684
1685         max_bitmaps = max_t(u64, max_bitmaps, 1);
1686
1687         ASSERT(ctl->total_bitmaps <= max_bitmaps);
1688
1689         /*
1690          * We are trying to keep the total amount of memory used per 1GiB of
1691          * space to be MAX_CACHE_BYTES_PER_GIG.  However, with a reclamation
1692          * mechanism of pulling extents >= FORCE_EXTENT_THRESHOLD out of
1693          * bitmaps, we may end up using more memory than this.
1694          */
1695         if (size < SZ_1G)
1696                 max_bytes = MAX_CACHE_BYTES_PER_GIG;
1697         else
1698                 max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G);
1699
1700         bitmap_bytes = ctl->total_bitmaps * ctl->unit;
1701
1702         /*
1703          * we want the extent entry threshold to always be at most 1/2 the max
1704          * bytes we can have, or whatever is less than that.
1705          */
1706         extent_bytes = max_bytes - bitmap_bytes;
1707         extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1);
1708
1709         ctl->extents_thresh =
1710                 div_u64(extent_bytes, sizeof(struct btrfs_free_space));
1711 }
1712
1713 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1714                                        struct btrfs_free_space *info,
1715                                        u64 offset, u64 bytes)
1716 {
1717         unsigned long start, count, end;
1718         int extent_delta = -1;
1719
1720         start = offset_to_bit(info->offset, ctl->unit, offset);
1721         count = bytes_to_bits(bytes, ctl->unit);
1722         end = start + count;
1723         ASSERT(end <= BITS_PER_BITMAP);
1724
1725         bitmap_clear(info->bitmap, start, count);
1726
1727         info->bytes -= bytes;
1728         if (info->max_extent_size > ctl->unit)
1729                 info->max_extent_size = 0;
1730
1731         if (start && test_bit(start - 1, info->bitmap))
1732                 extent_delta++;
1733
1734         if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap))
1735                 extent_delta++;
1736
1737         info->bitmap_extents += extent_delta;
1738         if (!btrfs_free_space_trimmed(info)) {
1739                 ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta;
1740                 ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes;
1741         }
1742 }
1743
1744 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1745                               struct btrfs_free_space *info, u64 offset,
1746                               u64 bytes)
1747 {
1748         __bitmap_clear_bits(ctl, info, offset, bytes);
1749         ctl->free_space -= bytes;
1750 }
1751
1752 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
1753                             struct btrfs_free_space *info, u64 offset,
1754                             u64 bytes)
1755 {
1756         unsigned long start, count, end;
1757         int extent_delta = 1;
1758
1759         start = offset_to_bit(info->offset, ctl->unit, offset);
1760         count = bytes_to_bits(bytes, ctl->unit);
1761         end = start + count;
1762         ASSERT(end <= BITS_PER_BITMAP);
1763
1764         bitmap_set(info->bitmap, start, count);
1765
1766         info->bytes += bytes;
1767         ctl->free_space += bytes;
1768
1769         if (start && test_bit(start - 1, info->bitmap))
1770                 extent_delta--;
1771
1772         if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap))
1773                 extent_delta--;
1774
1775         info->bitmap_extents += extent_delta;
1776         if (!btrfs_free_space_trimmed(info)) {
1777                 ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta;
1778                 ctl->discardable_bytes[BTRFS_STAT_CURR] += bytes;
1779         }
1780 }
1781
1782 /*
1783  * If we can not find suitable extent, we will use bytes to record
1784  * the size of the max extent.
1785  */
1786 static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1787                          struct btrfs_free_space *bitmap_info, u64 *offset,
1788                          u64 *bytes, bool for_alloc)
1789 {
1790         unsigned long found_bits = 0;
1791         unsigned long max_bits = 0;
1792         unsigned long bits, i;
1793         unsigned long next_zero;
1794         unsigned long extent_bits;
1795
1796         /*
1797          * Skip searching the bitmap if we don't have a contiguous section that
1798          * is large enough for this allocation.
1799          */
1800         if (for_alloc &&
1801             bitmap_info->max_extent_size &&
1802             bitmap_info->max_extent_size < *bytes) {
1803                 *bytes = bitmap_info->max_extent_size;
1804                 return -1;
1805         }
1806
1807         i = offset_to_bit(bitmap_info->offset, ctl->unit,
1808                           max_t(u64, *offset, bitmap_info->offset));
1809         bits = bytes_to_bits(*bytes, ctl->unit);
1810
1811         for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) {
1812                 if (for_alloc && bits == 1) {
1813                         found_bits = 1;
1814                         break;
1815                 }
1816                 next_zero = find_next_zero_bit(bitmap_info->bitmap,
1817                                                BITS_PER_BITMAP, i);
1818                 extent_bits = next_zero - i;
1819                 if (extent_bits >= bits) {
1820                         found_bits = extent_bits;
1821                         break;
1822                 } else if (extent_bits > max_bits) {
1823                         max_bits = extent_bits;
1824                 }
1825                 i = next_zero;
1826         }
1827
1828         if (found_bits) {
1829                 *offset = (u64)(i * ctl->unit) + bitmap_info->offset;
1830                 *bytes = (u64)(found_bits) * ctl->unit;
1831                 return 0;
1832         }
1833
1834         *bytes = (u64)(max_bits) * ctl->unit;
1835         bitmap_info->max_extent_size = *bytes;
1836         return -1;
1837 }
1838
1839 static inline u64 get_max_extent_size(struct btrfs_free_space *entry)
1840 {
1841         if (entry->bitmap)
1842                 return entry->max_extent_size;
1843         return entry->bytes;
1844 }
1845
1846 /* Cache the size of the max extent in bytes */
1847 static struct btrfs_free_space *
1848 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
1849                 unsigned long align, u64 *max_extent_size)
1850 {
1851         struct btrfs_free_space *entry;
1852         struct rb_node *node;
1853         u64 tmp;
1854         u64 align_off;
1855         int ret;
1856
1857         if (!ctl->free_space_offset.rb_node)
1858                 goto out;
1859
1860         entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
1861         if (!entry)
1862                 goto out;
1863
1864         for (node = &entry->offset_index; node; node = rb_next(node)) {
1865                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1866                 if (entry->bytes < *bytes) {
1867                         *max_extent_size = max(get_max_extent_size(entry),
1868                                                *max_extent_size);
1869                         continue;
1870                 }
1871
1872                 /* make sure the space returned is big enough
1873                  * to match our requested alignment
1874                  */
1875                 if (*bytes >= align) {
1876                         tmp = entry->offset - ctl->start + align - 1;
1877                         tmp = div64_u64(tmp, align);
1878                         tmp = tmp * align + ctl->start;
1879                         align_off = tmp - entry->offset;
1880                 } else {
1881                         align_off = 0;
1882                         tmp = entry->offset;
1883                 }
1884
1885                 if (entry->bytes < *bytes + align_off) {
1886                         *max_extent_size = max(get_max_extent_size(entry),
1887                                                *max_extent_size);
1888                         continue;
1889                 }
1890
1891                 if (entry->bitmap) {
1892                         u64 size = *bytes;
1893
1894                         ret = search_bitmap(ctl, entry, &tmp, &size, true);
1895                         if (!ret) {
1896                                 *offset = tmp;
1897                                 *bytes = size;
1898                                 return entry;
1899                         } else {
1900                                 *max_extent_size =
1901                                         max(get_max_extent_size(entry),
1902                                             *max_extent_size);
1903                         }
1904                         continue;
1905                 }
1906
1907                 *offset = tmp;
1908                 *bytes = entry->bytes - align_off;
1909                 return entry;
1910         }
1911 out:
1912         return NULL;
1913 }
1914
1915 static int count_bitmap_extents(struct btrfs_free_space_ctl *ctl,
1916                                 struct btrfs_free_space *bitmap_info)
1917 {
1918         struct btrfs_block_group *block_group = ctl->private;
1919         u64 bytes = bitmap_info->bytes;
1920         unsigned int rs, re;
1921         int count = 0;
1922
1923         if (!block_group || !bytes)
1924                 return count;
1925
1926         bitmap_for_each_set_region(bitmap_info->bitmap, rs, re, 0,
1927                                    BITS_PER_BITMAP) {
1928                 bytes -= (rs - re) * ctl->unit;
1929                 count++;
1930
1931                 if (!bytes)
1932                         break;
1933         }
1934
1935         return count;
1936 }
1937
1938 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
1939                            struct btrfs_free_space *info, u64 offset)
1940 {
1941         info->offset = offset_to_bitmap(ctl, offset);
1942         info->bytes = 0;
1943         info->bitmap_extents = 0;
1944         INIT_LIST_HEAD(&info->list);
1945         link_free_space(ctl, info);
1946         ctl->total_bitmaps++;
1947
1948         ctl->op->recalc_thresholds(ctl);
1949 }
1950
1951 static void free_bitmap(struct btrfs_free_space_ctl *ctl,
1952                         struct btrfs_free_space *bitmap_info)
1953 {
1954         /*
1955          * Normally when this is called, the bitmap is completely empty. However,
1956          * if we are blowing up the free space cache for one reason or another
1957          * via __btrfs_remove_free_space_cache(), then it may not be freed and
1958          * we may leave stats on the table.
1959          */
1960         if (bitmap_info->bytes && !btrfs_free_space_trimmed(bitmap_info)) {
1961                 ctl->discardable_extents[BTRFS_STAT_CURR] -=
1962                         bitmap_info->bitmap_extents;
1963                 ctl->discardable_bytes[BTRFS_STAT_CURR] -= bitmap_info->bytes;
1964
1965         }
1966         unlink_free_space(ctl, bitmap_info);
1967         kmem_cache_free(btrfs_free_space_bitmap_cachep, bitmap_info->bitmap);
1968         kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
1969         ctl->total_bitmaps--;
1970         ctl->op->recalc_thresholds(ctl);
1971 }
1972
1973 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
1974                               struct btrfs_free_space *bitmap_info,
1975                               u64 *offset, u64 *bytes)
1976 {
1977         u64 end;
1978         u64 search_start, search_bytes;
1979         int ret;
1980
1981 again:
1982         end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
1983
1984         /*
1985          * We need to search for bits in this bitmap.  We could only cover some
1986          * of the extent in this bitmap thanks to how we add space, so we need
1987          * to search for as much as it as we can and clear that amount, and then
1988          * go searching for the next bit.
1989          */
1990         search_start = *offset;
1991         search_bytes = ctl->unit;
1992         search_bytes = min(search_bytes, end - search_start + 1);
1993         ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes,
1994                             false);
1995         if (ret < 0 || search_start != *offset)
1996                 return -EINVAL;
1997
1998         /* We may have found more bits than what we need */
1999         search_bytes = min(search_bytes, *bytes);
2000
2001         /* Cannot clear past the end of the bitmap */
2002         search_bytes = min(search_bytes, end - search_start + 1);
2003
2004         bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes);
2005         *offset += search_bytes;
2006         *bytes -= search_bytes;
2007
2008         if (*bytes) {
2009                 struct rb_node *next = rb_next(&bitmap_info->offset_index);
2010                 if (!bitmap_info->bytes)
2011                         free_bitmap(ctl, bitmap_info);
2012
2013                 /*
2014                  * no entry after this bitmap, but we still have bytes to
2015                  * remove, so something has gone wrong.
2016                  */
2017                 if (!next)
2018                         return -EINVAL;
2019
2020                 bitmap_info = rb_entry(next, struct btrfs_free_space,
2021                                        offset_index);
2022
2023                 /*
2024                  * if the next entry isn't a bitmap we need to return to let the
2025                  * extent stuff do its work.
2026                  */
2027                 if (!bitmap_info->bitmap)
2028                         return -EAGAIN;
2029
2030                 /*
2031                  * Ok the next item is a bitmap, but it may not actually hold
2032                  * the information for the rest of this free space stuff, so
2033                  * look for it, and if we don't find it return so we can try
2034                  * everything over again.
2035                  */
2036                 search_start = *offset;
2037                 search_bytes = ctl->unit;
2038                 ret = search_bitmap(ctl, bitmap_info, &search_start,
2039                                     &search_bytes, false);
2040                 if (ret < 0 || search_start != *offset)
2041                         return -EAGAIN;
2042
2043                 goto again;
2044         } else if (!bitmap_info->bytes)
2045                 free_bitmap(ctl, bitmap_info);
2046
2047         return 0;
2048 }
2049
2050 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
2051                                struct btrfs_free_space *info, u64 offset,
2052                                u64 bytes, enum btrfs_trim_state trim_state)
2053 {
2054         u64 bytes_to_set = 0;
2055         u64 end;
2056
2057         /*
2058          * This is a tradeoff to make bitmap trim state minimal.  We mark the
2059          * whole bitmap untrimmed if at any point we add untrimmed regions.
2060          */
2061         if (trim_state == BTRFS_TRIM_STATE_UNTRIMMED) {
2062                 if (btrfs_free_space_trimmed(info)) {
2063                         ctl->discardable_extents[BTRFS_STAT_CURR] +=
2064                                 info->bitmap_extents;
2065                         ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes;
2066                 }
2067                 info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2068         }
2069
2070         end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
2071
2072         bytes_to_set = min(end - offset, bytes);
2073
2074         bitmap_set_bits(ctl, info, offset, bytes_to_set);
2075
2076         /*
2077          * We set some bytes, we have no idea what the max extent size is
2078          * anymore.
2079          */
2080         info->max_extent_size = 0;
2081
2082         return bytes_to_set;
2083
2084 }
2085
2086 static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
2087                       struct btrfs_free_space *info)
2088 {
2089         struct btrfs_block_group *block_group = ctl->private;
2090         struct btrfs_fs_info *fs_info = block_group->fs_info;
2091         bool forced = false;
2092
2093 #ifdef CONFIG_BTRFS_DEBUG
2094         if (btrfs_should_fragment_free_space(block_group))
2095                 forced = true;
2096 #endif
2097
2098         /* This is a way to reclaim large regions from the bitmaps. */
2099         if (!forced && info->bytes >= FORCE_EXTENT_THRESHOLD)
2100                 return false;
2101
2102         /*
2103          * If we are below the extents threshold then we can add this as an
2104          * extent, and don't have to deal with the bitmap
2105          */
2106         if (!forced && ctl->free_extents < ctl->extents_thresh) {
2107                 /*
2108                  * If this block group has some small extents we don't want to
2109                  * use up all of our free slots in the cache with them, we want
2110                  * to reserve them to larger extents, however if we have plenty
2111                  * of cache left then go ahead an dadd them, no sense in adding
2112                  * the overhead of a bitmap if we don't have to.
2113                  */
2114                 if (info->bytes <= fs_info->sectorsize * 8) {
2115                         if (ctl->free_extents * 3 <= ctl->extents_thresh)
2116                                 return false;
2117                 } else {
2118                         return false;
2119                 }
2120         }
2121
2122         /*
2123          * The original block groups from mkfs can be really small, like 8
2124          * megabytes, so don't bother with a bitmap for those entries.  However
2125          * some block groups can be smaller than what a bitmap would cover but
2126          * are still large enough that they could overflow the 32k memory limit,
2127          * so allow those block groups to still be allowed to have a bitmap
2128          * entry.
2129          */
2130         if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->length)
2131                 return false;
2132
2133         return true;
2134 }
2135
2136 static const struct btrfs_free_space_op free_space_op = {
2137         .recalc_thresholds      = recalculate_thresholds,
2138         .use_bitmap             = use_bitmap,
2139 };
2140
2141 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
2142                               struct btrfs_free_space *info)
2143 {
2144         struct btrfs_free_space *bitmap_info;
2145         struct btrfs_block_group *block_group = NULL;
2146         int added = 0;
2147         u64 bytes, offset, bytes_added;
2148         enum btrfs_trim_state trim_state;
2149         int ret;
2150
2151         bytes = info->bytes;
2152         offset = info->offset;
2153         trim_state = info->trim_state;
2154
2155         if (!ctl->op->use_bitmap(ctl, info))
2156                 return 0;
2157
2158         if (ctl->op == &free_space_op)
2159                 block_group = ctl->private;
2160 again:
2161         /*
2162          * Since we link bitmaps right into the cluster we need to see if we
2163          * have a cluster here, and if so and it has our bitmap we need to add
2164          * the free space to that bitmap.
2165          */
2166         if (block_group && !list_empty(&block_group->cluster_list)) {
2167                 struct btrfs_free_cluster *cluster;
2168                 struct rb_node *node;
2169                 struct btrfs_free_space *entry;
2170
2171                 cluster = list_entry(block_group->cluster_list.next,
2172                                      struct btrfs_free_cluster,
2173                                      block_group_list);
2174                 spin_lock(&cluster->lock);
2175                 node = rb_first(&cluster->root);
2176                 if (!node) {
2177                         spin_unlock(&cluster->lock);
2178                         goto no_cluster_bitmap;
2179                 }
2180
2181                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2182                 if (!entry->bitmap) {
2183                         spin_unlock(&cluster->lock);
2184                         goto no_cluster_bitmap;
2185                 }
2186
2187                 if (entry->offset == offset_to_bitmap(ctl, offset)) {
2188                         bytes_added = add_bytes_to_bitmap(ctl, entry, offset,
2189                                                           bytes, trim_state);
2190                         bytes -= bytes_added;
2191                         offset += bytes_added;
2192                 }
2193                 spin_unlock(&cluster->lock);
2194                 if (!bytes) {
2195                         ret = 1;
2196                         goto out;
2197                 }
2198         }
2199
2200 no_cluster_bitmap:
2201         bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
2202                                          1, 0);
2203         if (!bitmap_info) {
2204                 ASSERT(added == 0);
2205                 goto new_bitmap;
2206         }
2207
2208         bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes,
2209                                           trim_state);
2210         bytes -= bytes_added;
2211         offset += bytes_added;
2212         added = 0;
2213
2214         if (!bytes) {
2215                 ret = 1;
2216                 goto out;
2217         } else
2218                 goto again;
2219
2220 new_bitmap:
2221         if (info && info->bitmap) {
2222                 add_new_bitmap(ctl, info, offset);
2223                 added = 1;
2224                 info = NULL;
2225                 goto again;
2226         } else {
2227                 spin_unlock(&ctl->tree_lock);
2228
2229                 /* no pre-allocated info, allocate a new one */
2230                 if (!info) {
2231                         info = kmem_cache_zalloc(btrfs_free_space_cachep,
2232                                                  GFP_NOFS);
2233                         if (!info) {
2234                                 spin_lock(&ctl->tree_lock);
2235                                 ret = -ENOMEM;
2236                                 goto out;
2237                         }
2238                 }
2239
2240                 /* allocate the bitmap */
2241                 info->bitmap = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep,
2242                                                  GFP_NOFS);
2243                 info->trim_state = BTRFS_TRIM_STATE_TRIMMED;
2244                 spin_lock(&ctl->tree_lock);
2245                 if (!info->bitmap) {
2246                         ret = -ENOMEM;
2247                         goto out;
2248                 }
2249                 goto again;
2250         }
2251
2252 out:
2253         if (info) {
2254                 if (info->bitmap)
2255                         kmem_cache_free(btrfs_free_space_bitmap_cachep,
2256                                         info->bitmap);
2257                 kmem_cache_free(btrfs_free_space_cachep, info);
2258         }
2259
2260         return ret;
2261 }
2262
2263 /*
2264  * Free space merging rules:
2265  *  1) Merge trimmed areas together
2266  *  2) Let untrimmed areas coalesce with trimmed areas
2267  *  3) Always pull neighboring regions from bitmaps
2268  *
2269  * The above rules are for when we merge free space based on btrfs_trim_state.
2270  * Rules 2 and 3 are subtle because they are suboptimal, but are done for the
2271  * same reason: to promote larger extent regions which makes life easier for
2272  * find_free_extent().  Rule 2 enables coalescing based on the common path
2273  * being returning free space from btrfs_finish_extent_commit().  So when free
2274  * space is trimmed, it will prevent aggregating trimmed new region and
2275  * untrimmed regions in the rb_tree.  Rule 3 is purely to obtain larger extents
2276  * and provide find_free_extent() with the largest extents possible hoping for
2277  * the reuse path.
2278  */
2279 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
2280                           struct btrfs_free_space *info, bool update_stat)
2281 {
2282         struct btrfs_free_space *left_info = NULL;
2283         struct btrfs_free_space *right_info;
2284         bool merged = false;
2285         u64 offset = info->offset;
2286         u64 bytes = info->bytes;
2287         const bool is_trimmed = btrfs_free_space_trimmed(info);
2288
2289         /*
2290          * first we want to see if there is free space adjacent to the range we
2291          * are adding, if there is remove that struct and add a new one to
2292          * cover the entire range
2293          */
2294         right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
2295         if (right_info && rb_prev(&right_info->offset_index))
2296                 left_info = rb_entry(rb_prev(&right_info->offset_index),
2297                                      struct btrfs_free_space, offset_index);
2298         else if (!right_info)
2299                 left_info = tree_search_offset(ctl, offset - 1, 0, 0);
2300
2301         /* See try_merge_free_space() comment. */
2302         if (right_info && !right_info->bitmap &&
2303             (!is_trimmed || btrfs_free_space_trimmed(right_info))) {
2304                 if (update_stat)
2305                         unlink_free_space(ctl, right_info);
2306                 else
2307                         __unlink_free_space(ctl, right_info);
2308                 info->bytes += right_info->bytes;
2309                 kmem_cache_free(btrfs_free_space_cachep, right_info);
2310                 merged = true;
2311         }
2312
2313         /* See try_merge_free_space() comment. */
2314         if (left_info && !left_info->bitmap &&
2315             left_info->offset + left_info->bytes == offset &&
2316             (!is_trimmed || btrfs_free_space_trimmed(left_info))) {
2317                 if (update_stat)
2318                         unlink_free_space(ctl, left_info);
2319                 else
2320                         __unlink_free_space(ctl, left_info);
2321                 info->offset = left_info->offset;
2322                 info->bytes += left_info->bytes;
2323                 kmem_cache_free(btrfs_free_space_cachep, left_info);
2324                 merged = true;
2325         }
2326
2327         return merged;
2328 }
2329
2330 static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl,
2331                                      struct btrfs_free_space *info,
2332                                      bool update_stat)
2333 {
2334         struct btrfs_free_space *bitmap;
2335         unsigned long i;
2336         unsigned long j;
2337         const u64 end = info->offset + info->bytes;
2338         const u64 bitmap_offset = offset_to_bitmap(ctl, end);
2339         u64 bytes;
2340
2341         bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0);
2342         if (!bitmap)
2343                 return false;
2344
2345         i = offset_to_bit(bitmap->offset, ctl->unit, end);
2346         j = find_next_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i);
2347         if (j == i)
2348                 return false;
2349         bytes = (j - i) * ctl->unit;
2350         info->bytes += bytes;
2351
2352         /* See try_merge_free_space() comment. */
2353         if (!btrfs_free_space_trimmed(bitmap))
2354                 info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2355
2356         if (update_stat)
2357                 bitmap_clear_bits(ctl, bitmap, end, bytes);
2358         else
2359                 __bitmap_clear_bits(ctl, bitmap, end, bytes);
2360
2361         if (!bitmap->bytes)
2362                 free_bitmap(ctl, bitmap);
2363
2364         return true;
2365 }
2366
2367 static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl,
2368                                        struct btrfs_free_space *info,
2369                                        bool update_stat)
2370 {
2371         struct btrfs_free_space *bitmap;
2372         u64 bitmap_offset;
2373         unsigned long i;
2374         unsigned long j;
2375         unsigned long prev_j;
2376         u64 bytes;
2377
2378         bitmap_offset = offset_to_bitmap(ctl, info->offset);
2379         /* If we're on a boundary, try the previous logical bitmap. */
2380         if (bitmap_offset == info->offset) {
2381                 if (info->offset == 0)
2382                         return false;
2383                 bitmap_offset = offset_to_bitmap(ctl, info->offset - 1);
2384         }
2385
2386         bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0);
2387         if (!bitmap)
2388                 return false;
2389
2390         i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1;
2391         j = 0;
2392         prev_j = (unsigned long)-1;
2393         for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) {
2394                 if (j > i)
2395                         break;
2396                 prev_j = j;
2397         }
2398         if (prev_j == i)
2399                 return false;
2400
2401         if (prev_j == (unsigned long)-1)
2402                 bytes = (i + 1) * ctl->unit;
2403         else
2404                 bytes = (i - prev_j) * ctl->unit;
2405
2406         info->offset -= bytes;
2407         info->bytes += bytes;
2408
2409         /* See try_merge_free_space() comment. */
2410         if (!btrfs_free_space_trimmed(bitmap))
2411                 info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2412
2413         if (update_stat)
2414                 bitmap_clear_bits(ctl, bitmap, info->offset, bytes);
2415         else
2416                 __bitmap_clear_bits(ctl, bitmap, info->offset, bytes);
2417
2418         if (!bitmap->bytes)
2419                 free_bitmap(ctl, bitmap);
2420
2421         return true;
2422 }
2423
2424 /*
2425  * We prefer always to allocate from extent entries, both for clustered and
2426  * non-clustered allocation requests. So when attempting to add a new extent
2427  * entry, try to see if there's adjacent free space in bitmap entries, and if
2428  * there is, migrate that space from the bitmaps to the extent.
2429  * Like this we get better chances of satisfying space allocation requests
2430  * because we attempt to satisfy them based on a single cache entry, and never
2431  * on 2 or more entries - even if the entries represent a contiguous free space
2432  * region (e.g. 1 extent entry + 1 bitmap entry starting where the extent entry
2433  * ends).
2434  */
2435 static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl,
2436                               struct btrfs_free_space *info,
2437                               bool update_stat)
2438 {
2439         /*
2440          * Only work with disconnected entries, as we can change their offset,
2441          * and must be extent entries.
2442          */
2443         ASSERT(!info->bitmap);
2444         ASSERT(RB_EMPTY_NODE(&info->offset_index));
2445
2446         if (ctl->total_bitmaps > 0) {
2447                 bool stole_end;
2448                 bool stole_front = false;
2449
2450                 stole_end = steal_from_bitmap_to_end(ctl, info, update_stat);
2451                 if (ctl->total_bitmaps > 0)
2452                         stole_front = steal_from_bitmap_to_front(ctl, info,
2453                                                                  update_stat);
2454
2455                 if (stole_end || stole_front)
2456                         try_merge_free_space(ctl, info, update_stat);
2457         }
2458 }
2459
2460 int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
2461                            struct btrfs_free_space_ctl *ctl,
2462                            u64 offset, u64 bytes,
2463                            enum btrfs_trim_state trim_state)
2464 {
2465         struct btrfs_block_group *block_group = ctl->private;
2466         struct btrfs_free_space *info;
2467         int ret = 0;
2468         u64 filter_bytes = bytes;
2469
2470         info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
2471         if (!info)
2472                 return -ENOMEM;
2473
2474         info->offset = offset;
2475         info->bytes = bytes;
2476         info->trim_state = trim_state;
2477         RB_CLEAR_NODE(&info->offset_index);
2478
2479         spin_lock(&ctl->tree_lock);
2480
2481         if (try_merge_free_space(ctl, info, true))
2482                 goto link;
2483
2484         /*
2485          * There was no extent directly to the left or right of this new
2486          * extent then we know we're going to have to allocate a new extent, so
2487          * before we do that see if we need to drop this into a bitmap
2488          */
2489         ret = insert_into_bitmap(ctl, info);
2490         if (ret < 0) {
2491                 goto out;
2492         } else if (ret) {
2493                 ret = 0;
2494                 goto out;
2495         }
2496 link:
2497         /*
2498          * Only steal free space from adjacent bitmaps if we're sure we're not
2499          * going to add the new free space to existing bitmap entries - because
2500          * that would mean unnecessary work that would be reverted. Therefore
2501          * attempt to steal space from bitmaps if we're adding an extent entry.
2502          */
2503         steal_from_bitmap(ctl, info, true);
2504
2505         filter_bytes = max(filter_bytes, info->bytes);
2506
2507         ret = link_free_space(ctl, info);
2508         if (ret)
2509                 kmem_cache_free(btrfs_free_space_cachep, info);
2510 out:
2511         btrfs_discard_update_discardable(block_group, ctl);
2512         spin_unlock(&ctl->tree_lock);
2513
2514         if (ret) {
2515                 btrfs_crit(fs_info, "unable to add free space :%d", ret);
2516                 ASSERT(ret != -EEXIST);
2517         }
2518
2519         if (trim_state != BTRFS_TRIM_STATE_TRIMMED) {
2520                 btrfs_discard_check_filter(block_group, filter_bytes);
2521                 btrfs_discard_queue_work(&fs_info->discard_ctl, block_group);
2522         }
2523
2524         return ret;
2525 }
2526
2527 int btrfs_add_free_space(struct btrfs_block_group *block_group,
2528                          u64 bytenr, u64 size)
2529 {
2530         enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2531
2532         if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC))
2533                 trim_state = BTRFS_TRIM_STATE_TRIMMED;
2534
2535         return __btrfs_add_free_space(block_group->fs_info,
2536                                       block_group->free_space_ctl,
2537                                       bytenr, size, trim_state);
2538 }
2539
2540 /*
2541  * This is a subtle distinction because when adding free space back in general,
2542  * we want it to be added as untrimmed for async. But in the case where we add
2543  * it on loading of a block group, we want to consider it trimmed.
2544  */
2545 int btrfs_add_free_space_async_trimmed(struct btrfs_block_group *block_group,
2546                                        u64 bytenr, u64 size)
2547 {
2548         enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2549
2550         if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC) ||
2551             btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
2552                 trim_state = BTRFS_TRIM_STATE_TRIMMED;
2553
2554         return __btrfs_add_free_space(block_group->fs_info,
2555                                       block_group->free_space_ctl,
2556                                       bytenr, size, trim_state);
2557 }
2558
2559 int btrfs_remove_free_space(struct btrfs_block_group *block_group,
2560                             u64 offset, u64 bytes)
2561 {
2562         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2563         struct btrfs_free_space *info;
2564         int ret;
2565         bool re_search = false;
2566
2567         spin_lock(&ctl->tree_lock);
2568
2569 again:
2570         ret = 0;
2571         if (!bytes)
2572                 goto out_lock;
2573
2574         info = tree_search_offset(ctl, offset, 0, 0);
2575         if (!info) {
2576                 /*
2577                  * oops didn't find an extent that matched the space we wanted
2578                  * to remove, look for a bitmap instead
2579                  */
2580                 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
2581                                           1, 0);
2582                 if (!info) {
2583                         /*
2584                          * If we found a partial bit of our free space in a
2585                          * bitmap but then couldn't find the other part this may
2586                          * be a problem, so WARN about it.
2587                          */
2588                         WARN_ON(re_search);
2589                         goto out_lock;
2590                 }
2591         }
2592
2593         re_search = false;
2594         if (!info->bitmap) {
2595                 unlink_free_space(ctl, info);
2596                 if (offset == info->offset) {
2597                         u64 to_free = min(bytes, info->bytes);
2598
2599                         info->bytes -= to_free;
2600                         info->offset += to_free;
2601                         if (info->bytes) {
2602                                 ret = link_free_space(ctl, info);
2603                                 WARN_ON(ret);
2604                         } else {
2605                                 kmem_cache_free(btrfs_free_space_cachep, info);
2606                         }
2607
2608                         offset += to_free;
2609                         bytes -= to_free;
2610                         goto again;
2611                 } else {
2612                         u64 old_end = info->bytes + info->offset;
2613
2614                         info->bytes = offset - info->offset;
2615                         ret = link_free_space(ctl, info);
2616                         WARN_ON(ret);
2617                         if (ret)
2618                                 goto out_lock;
2619
2620                         /* Not enough bytes in this entry to satisfy us */
2621                         if (old_end < offset + bytes) {
2622                                 bytes -= old_end - offset;
2623                                 offset = old_end;
2624                                 goto again;
2625                         } else if (old_end == offset + bytes) {
2626                                 /* all done */
2627                                 goto out_lock;
2628                         }
2629                         spin_unlock(&ctl->tree_lock);
2630
2631                         ret = __btrfs_add_free_space(block_group->fs_info, ctl,
2632                                                      offset + bytes,
2633                                                      old_end - (offset + bytes),
2634                                                      info->trim_state);
2635                         WARN_ON(ret);
2636                         goto out;
2637                 }
2638         }
2639
2640         ret = remove_from_bitmap(ctl, info, &offset, &bytes);
2641         if (ret == -EAGAIN) {
2642                 re_search = true;
2643                 goto again;
2644         }
2645 out_lock:
2646         btrfs_discard_update_discardable(block_group, ctl);
2647         spin_unlock(&ctl->tree_lock);
2648 out:
2649         return ret;
2650 }
2651
2652 void btrfs_dump_free_space(struct btrfs_block_group *block_group,
2653                            u64 bytes)
2654 {
2655         struct btrfs_fs_info *fs_info = block_group->fs_info;
2656         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2657         struct btrfs_free_space *info;
2658         struct rb_node *n;
2659         int count = 0;
2660
2661         spin_lock(&ctl->tree_lock);
2662         for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
2663                 info = rb_entry(n, struct btrfs_free_space, offset_index);
2664                 if (info->bytes >= bytes && !block_group->ro)
2665                         count++;
2666                 btrfs_crit(fs_info, "entry offset %llu, bytes %llu, bitmap %s",
2667                            info->offset, info->bytes,
2668                        (info->bitmap) ? "yes" : "no");
2669         }
2670         spin_unlock(&ctl->tree_lock);
2671         btrfs_info(fs_info, "block group has cluster?: %s",
2672                list_empty(&block_group->cluster_list) ? "no" : "yes");
2673         btrfs_info(fs_info,
2674                    "%d blocks of free space at or bigger than bytes is", count);
2675 }
2676
2677 void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group)
2678 {
2679         struct btrfs_fs_info *fs_info = block_group->fs_info;
2680         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2681
2682         spin_lock_init(&ctl->tree_lock);
2683         ctl->unit = fs_info->sectorsize;
2684         ctl->start = block_group->start;
2685         ctl->private = block_group;
2686         ctl->op = &free_space_op;
2687         INIT_LIST_HEAD(&ctl->trimming_ranges);
2688         mutex_init(&ctl->cache_writeout_mutex);
2689
2690         /*
2691          * we only want to have 32k of ram per block group for keeping
2692          * track of free space, and if we pass 1/2 of that we want to
2693          * start converting things over to using bitmaps
2694          */
2695         ctl->extents_thresh = (SZ_32K / 2) / sizeof(struct btrfs_free_space);
2696 }
2697
2698 /*
2699  * for a given cluster, put all of its extents back into the free
2700  * space cache.  If the block group passed doesn't match the block group
2701  * pointed to by the cluster, someone else raced in and freed the
2702  * cluster already.  In that case, we just return without changing anything
2703  */
2704 static void __btrfs_return_cluster_to_free_space(
2705                              struct btrfs_block_group *block_group,
2706                              struct btrfs_free_cluster *cluster)
2707 {
2708         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2709         struct btrfs_free_space *entry;
2710         struct rb_node *node;
2711
2712         spin_lock(&cluster->lock);
2713         if (cluster->block_group != block_group)
2714                 goto out;
2715
2716         cluster->block_group = NULL;
2717         cluster->window_start = 0;
2718         list_del_init(&cluster->block_group_list);
2719
2720         node = rb_first(&cluster->root);
2721         while (node) {
2722                 bool bitmap;
2723
2724                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2725                 node = rb_next(&entry->offset_index);
2726                 rb_erase(&entry->offset_index, &cluster->root);
2727                 RB_CLEAR_NODE(&entry->offset_index);
2728
2729                 bitmap = (entry->bitmap != NULL);
2730                 if (!bitmap) {
2731                         /* Merging treats extents as if they were new */
2732                         if (!btrfs_free_space_trimmed(entry)) {
2733                                 ctl->discardable_extents[BTRFS_STAT_CURR]--;
2734                                 ctl->discardable_bytes[BTRFS_STAT_CURR] -=
2735                                         entry->bytes;
2736                         }
2737
2738                         try_merge_free_space(ctl, entry, false);
2739                         steal_from_bitmap(ctl, entry, false);
2740
2741                         /* As we insert directly, update these statistics */
2742                         if (!btrfs_free_space_trimmed(entry)) {
2743                                 ctl->discardable_extents[BTRFS_STAT_CURR]++;
2744                                 ctl->discardable_bytes[BTRFS_STAT_CURR] +=
2745                                         entry->bytes;
2746                         }
2747                 }
2748                 tree_insert_offset(&ctl->free_space_offset,
2749                                    entry->offset, &entry->offset_index, bitmap);
2750         }
2751         cluster->root = RB_ROOT;
2752
2753 out:
2754         spin_unlock(&cluster->lock);
2755         btrfs_put_block_group(block_group);
2756 }
2757
2758 static void __btrfs_remove_free_space_cache_locked(
2759                                 struct btrfs_free_space_ctl *ctl)
2760 {
2761         struct btrfs_free_space *info;
2762         struct rb_node *node;
2763
2764         while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
2765                 info = rb_entry(node, struct btrfs_free_space, offset_index);
2766                 if (!info->bitmap) {
2767                         unlink_free_space(ctl, info);
2768                         kmem_cache_free(btrfs_free_space_cachep, info);
2769                 } else {
2770                         free_bitmap(ctl, info);
2771                 }
2772
2773                 cond_resched_lock(&ctl->tree_lock);
2774         }
2775 }
2776
2777 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
2778 {
2779         spin_lock(&ctl->tree_lock);
2780         __btrfs_remove_free_space_cache_locked(ctl);
2781         if (ctl->private)
2782                 btrfs_discard_update_discardable(ctl->private, ctl);
2783         spin_unlock(&ctl->tree_lock);
2784 }
2785
2786 void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group)
2787 {
2788         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2789         struct btrfs_free_cluster *cluster;
2790         struct list_head *head;
2791
2792         spin_lock(&ctl->tree_lock);
2793         while ((head = block_group->cluster_list.next) !=
2794                &block_group->cluster_list) {
2795                 cluster = list_entry(head, struct btrfs_free_cluster,
2796                                      block_group_list);
2797
2798                 WARN_ON(cluster->block_group != block_group);
2799                 __btrfs_return_cluster_to_free_space(block_group, cluster);
2800
2801                 cond_resched_lock(&ctl->tree_lock);
2802         }
2803         __btrfs_remove_free_space_cache_locked(ctl);
2804         btrfs_discard_update_discardable(block_group, ctl);
2805         spin_unlock(&ctl->tree_lock);
2806
2807 }
2808
2809 /**
2810  * btrfs_is_free_space_trimmed - see if everything is trimmed
2811  * @block_group: block_group of interest
2812  *
2813  * Walk @block_group's free space rb_tree to determine if everything is trimmed.
2814  */
2815 bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group)
2816 {
2817         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2818         struct btrfs_free_space *info;
2819         struct rb_node *node;
2820         bool ret = true;
2821
2822         spin_lock(&ctl->tree_lock);
2823         node = rb_first(&ctl->free_space_offset);
2824
2825         while (node) {
2826                 info = rb_entry(node, struct btrfs_free_space, offset_index);
2827
2828                 if (!btrfs_free_space_trimmed(info)) {
2829                         ret = false;
2830                         break;
2831                 }
2832
2833                 node = rb_next(node);
2834         }
2835
2836         spin_unlock(&ctl->tree_lock);
2837         return ret;
2838 }
2839
2840 u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group,
2841                                u64 offset, u64 bytes, u64 empty_size,
2842                                u64 *max_extent_size)
2843 {
2844         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2845         struct btrfs_discard_ctl *discard_ctl =
2846                                         &block_group->fs_info->discard_ctl;
2847         struct btrfs_free_space *entry = NULL;
2848         u64 bytes_search = bytes + empty_size;
2849         u64 ret = 0;
2850         u64 align_gap = 0;
2851         u64 align_gap_len = 0;
2852         enum btrfs_trim_state align_gap_trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2853
2854         spin_lock(&ctl->tree_lock);
2855         entry = find_free_space(ctl, &offset, &bytes_search,
2856                                 block_group->full_stripe_len, max_extent_size);
2857         if (!entry)
2858                 goto out;
2859
2860         ret = offset;
2861         if (entry->bitmap) {
2862                 bitmap_clear_bits(ctl, entry, offset, bytes);
2863
2864                 if (!btrfs_free_space_trimmed(entry))
2865                         atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
2866
2867                 if (!entry->bytes)
2868                         free_bitmap(ctl, entry);
2869         } else {
2870                 unlink_free_space(ctl, entry);
2871                 align_gap_len = offset - entry->offset;
2872                 align_gap = entry->offset;
2873                 align_gap_trim_state = entry->trim_state;
2874
2875                 if (!btrfs_free_space_trimmed(entry))
2876                         atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
2877
2878                 entry->offset = offset + bytes;
2879                 WARN_ON(entry->bytes < bytes + align_gap_len);
2880
2881                 entry->bytes -= bytes + align_gap_len;
2882                 if (!entry->bytes)
2883                         kmem_cache_free(btrfs_free_space_cachep, entry);
2884                 else
2885                         link_free_space(ctl, entry);
2886         }
2887 out:
2888         btrfs_discard_update_discardable(block_group, ctl);
2889         spin_unlock(&ctl->tree_lock);
2890
2891         if (align_gap_len)
2892                 __btrfs_add_free_space(block_group->fs_info, ctl,
2893                                        align_gap, align_gap_len,
2894                                        align_gap_trim_state);
2895         return ret;
2896 }
2897
2898 /*
2899  * given a cluster, put all of its extents back into the free space
2900  * cache.  If a block group is passed, this function will only free
2901  * a cluster that belongs to the passed block group.
2902  *
2903  * Otherwise, it'll get a reference on the block group pointed to by the
2904  * cluster and remove the cluster from it.
2905  */
2906 void btrfs_return_cluster_to_free_space(
2907                                struct btrfs_block_group *block_group,
2908                                struct btrfs_free_cluster *cluster)
2909 {
2910         struct btrfs_free_space_ctl *ctl;
2911
2912         /* first, get a safe pointer to the block group */
2913         spin_lock(&cluster->lock);
2914         if (!block_group) {
2915                 block_group = cluster->block_group;
2916                 if (!block_group) {
2917                         spin_unlock(&cluster->lock);
2918                         return;
2919                 }
2920         } else if (cluster->block_group != block_group) {
2921                 /* someone else has already freed it don't redo their work */
2922                 spin_unlock(&cluster->lock);
2923                 return;
2924         }
2925         btrfs_get_block_group(block_group);
2926         spin_unlock(&cluster->lock);
2927
2928         ctl = block_group->free_space_ctl;
2929
2930         /* now return any extents the cluster had on it */
2931         spin_lock(&ctl->tree_lock);
2932         __btrfs_return_cluster_to_free_space(block_group, cluster);
2933         spin_unlock(&ctl->tree_lock);
2934
2935         btrfs_discard_queue_work(&block_group->fs_info->discard_ctl, block_group);
2936
2937         /* finally drop our ref */
2938         btrfs_put_block_group(block_group);
2939 }
2940
2941 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group *block_group,
2942                                    struct btrfs_free_cluster *cluster,
2943                                    struct btrfs_free_space *entry,
2944                                    u64 bytes, u64 min_start,
2945                                    u64 *max_extent_size)
2946 {
2947         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2948         int err;
2949         u64 search_start = cluster->window_start;
2950         u64 search_bytes = bytes;
2951         u64 ret = 0;
2952
2953         search_start = min_start;
2954         search_bytes = bytes;
2955
2956         err = search_bitmap(ctl, entry, &search_start, &search_bytes, true);
2957         if (err) {
2958                 *max_extent_size = max(get_max_extent_size(entry),
2959                                        *max_extent_size);
2960                 return 0;
2961         }
2962
2963         ret = search_start;
2964         __bitmap_clear_bits(ctl, entry, ret, bytes);
2965
2966         return ret;
2967 }
2968
2969 /*
2970  * given a cluster, try to allocate 'bytes' from it, returns 0
2971  * if it couldn't find anything suitably large, or a logical disk offset
2972  * if things worked out
2973  */
2974 u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group,
2975                              struct btrfs_free_cluster *cluster, u64 bytes,
2976                              u64 min_start, u64 *max_extent_size)
2977 {
2978         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2979         struct btrfs_discard_ctl *discard_ctl =
2980                                         &block_group->fs_info->discard_ctl;
2981         struct btrfs_free_space *entry = NULL;
2982         struct rb_node *node;
2983         u64 ret = 0;
2984
2985         spin_lock(&cluster->lock);
2986         if (bytes > cluster->max_size)
2987                 goto out;
2988
2989         if (cluster->block_group != block_group)
2990                 goto out;
2991
2992         node = rb_first(&cluster->root);
2993         if (!node)
2994                 goto out;
2995
2996         entry = rb_entry(node, struct btrfs_free_space, offset_index);
2997         while (1) {
2998                 if (entry->bytes < bytes)
2999                         *max_extent_size = max(get_max_extent_size(entry),
3000                                                *max_extent_size);
3001
3002                 if (entry->bytes < bytes ||
3003                     (!entry->bitmap && entry->offset < min_start)) {
3004                         node = rb_next(&entry->offset_index);
3005                         if (!node)
3006                                 break;
3007                         entry = rb_entry(node, struct btrfs_free_space,
3008                                          offset_index);
3009                         continue;
3010                 }
3011
3012                 if (entry->bitmap) {
3013                         ret = btrfs_alloc_from_bitmap(block_group,
3014                                                       cluster, entry, bytes,
3015                                                       cluster->window_start,
3016                                                       max_extent_size);
3017                         if (ret == 0) {
3018                                 node = rb_next(&entry->offset_index);
3019                                 if (!node)
3020                                         break;
3021                                 entry = rb_entry(node, struct btrfs_free_space,
3022                                                  offset_index);
3023                                 continue;
3024                         }
3025                         cluster->window_start += bytes;
3026                 } else {
3027                         ret = entry->offset;
3028
3029                         entry->offset += bytes;
3030                         entry->bytes -= bytes;
3031                 }
3032
3033                 if (entry->bytes == 0)
3034                         rb_erase(&entry->offset_index, &cluster->root);
3035                 break;
3036         }
3037 out:
3038         spin_unlock(&cluster->lock);
3039
3040         if (!ret)
3041                 return 0;
3042
3043         spin_lock(&ctl->tree_lock);
3044
3045         if (!btrfs_free_space_trimmed(entry))
3046                 atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
3047
3048         ctl->free_space -= bytes;
3049         if (!entry->bitmap && !btrfs_free_space_trimmed(entry))
3050                 ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes;
3051         if (entry->bytes == 0) {
3052                 ctl->free_extents--;
3053                 if (entry->bitmap) {
3054                         kmem_cache_free(btrfs_free_space_bitmap_cachep,
3055                                         entry->bitmap);
3056                         ctl->total_bitmaps--;
3057                         ctl->op->recalc_thresholds(ctl);
3058                 } else if (!btrfs_free_space_trimmed(entry)) {
3059                         ctl->discardable_extents[BTRFS_STAT_CURR]--;
3060                 }
3061                 kmem_cache_free(btrfs_free_space_cachep, entry);
3062         }
3063
3064         spin_unlock(&ctl->tree_lock);
3065
3066         return ret;
3067 }
3068
3069 static int btrfs_bitmap_cluster(struct btrfs_block_group *block_group,
3070                                 struct btrfs_free_space *entry,
3071                                 struct btrfs_free_cluster *cluster,
3072                                 u64 offset, u64 bytes,
3073                                 u64 cont1_bytes, u64 min_bytes)
3074 {
3075         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3076         unsigned long next_zero;
3077         unsigned long i;
3078         unsigned long want_bits;
3079         unsigned long min_bits;
3080         unsigned long found_bits;
3081         unsigned long max_bits = 0;
3082         unsigned long start = 0;
3083         unsigned long total_found = 0;
3084         int ret;
3085
3086         i = offset_to_bit(entry->offset, ctl->unit,
3087                           max_t(u64, offset, entry->offset));
3088         want_bits = bytes_to_bits(bytes, ctl->unit);
3089         min_bits = bytes_to_bits(min_bytes, ctl->unit);
3090
3091         /*
3092          * Don't bother looking for a cluster in this bitmap if it's heavily
3093          * fragmented.
3094          */
3095         if (entry->max_extent_size &&
3096             entry->max_extent_size < cont1_bytes)
3097                 return -ENOSPC;
3098 again:
3099         found_bits = 0;
3100         for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) {
3101                 next_zero = find_next_zero_bit(entry->bitmap,
3102                                                BITS_PER_BITMAP, i);
3103                 if (next_zero - i >= min_bits) {
3104                         found_bits = next_zero - i;
3105                         if (found_bits > max_bits)
3106                                 max_bits = found_bits;
3107                         break;
3108                 }
3109                 if (next_zero - i > max_bits)
3110                         max_bits = next_zero - i;
3111                 i = next_zero;
3112         }
3113
3114         if (!found_bits) {
3115                 entry->max_extent_size = (u64)max_bits * ctl->unit;
3116                 return -ENOSPC;
3117         }
3118
3119         if (!total_found) {
3120                 start = i;
3121                 cluster->max_size = 0;
3122         }
3123
3124         total_found += found_bits;
3125
3126         if (cluster->max_size < found_bits * ctl->unit)
3127                 cluster->max_size = found_bits * ctl->unit;
3128
3129         if (total_found < want_bits || cluster->max_size < cont1_bytes) {
3130                 i = next_zero + 1;
3131                 goto again;
3132         }
3133
3134         cluster->window_start = start * ctl->unit + entry->offset;
3135         rb_erase(&entry->offset_index, &ctl->free_space_offset);
3136         ret = tree_insert_offset(&cluster->root, entry->offset,
3137                                  &entry->offset_index, 1);
3138         ASSERT(!ret); /* -EEXIST; Logic error */
3139
3140         trace_btrfs_setup_cluster(block_group, cluster,
3141                                   total_found * ctl->unit, 1);
3142         return 0;
3143 }
3144
3145 /*
3146  * This searches the block group for just extents to fill the cluster with.
3147  * Try to find a cluster with at least bytes total bytes, at least one
3148  * extent of cont1_bytes, and other clusters of at least min_bytes.
3149  */
3150 static noinline int
3151 setup_cluster_no_bitmap(struct btrfs_block_group *block_group,
3152                         struct btrfs_free_cluster *cluster,
3153                         struct list_head *bitmaps, u64 offset, u64 bytes,
3154                         u64 cont1_bytes, u64 min_bytes)
3155 {
3156         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3157         struct btrfs_free_space *first = NULL;
3158         struct btrfs_free_space *entry = NULL;
3159         struct btrfs_free_space *last;
3160         struct rb_node *node;
3161         u64 window_free;
3162         u64 max_extent;
3163         u64 total_size = 0;
3164
3165         entry = tree_search_offset(ctl, offset, 0, 1);
3166         if (!entry)
3167                 return -ENOSPC;
3168
3169         /*
3170          * We don't want bitmaps, so just move along until we find a normal
3171          * extent entry.
3172          */
3173         while (entry->bitmap || entry->bytes < min_bytes) {
3174                 if (entry->bitmap && list_empty(&entry->list))
3175                         list_add_tail(&entry->list, bitmaps);
3176                 node = rb_next(&entry->offset_index);
3177                 if (!node)
3178                         return -ENOSPC;
3179                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
3180         }
3181
3182         window_free = entry->bytes;
3183         max_extent = entry->bytes;
3184         first = entry;
3185         last = entry;
3186
3187         for (node = rb_next(&entry->offset_index); node;
3188              node = rb_next(&entry->offset_index)) {
3189                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
3190
3191                 if (entry->bitmap) {
3192                         if (list_empty(&entry->list))
3193                                 list_add_tail(&entry->list, bitmaps);
3194                         continue;
3195                 }
3196
3197                 if (entry->bytes < min_bytes)
3198                         continue;
3199
3200                 last = entry;
3201                 window_free += entry->bytes;
3202                 if (entry->bytes > max_extent)
3203                         max_extent = entry->bytes;
3204         }
3205
3206         if (window_free < bytes || max_extent < cont1_bytes)
3207                 return -ENOSPC;
3208
3209         cluster->window_start = first->offset;
3210
3211         node = &first->offset_index;
3212
3213         /*
3214          * now we've found our entries, pull them out of the free space
3215          * cache and put them into the cluster rbtree
3216          */
3217         do {
3218                 int ret;
3219
3220                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
3221                 node = rb_next(&entry->offset_index);
3222                 if (entry->bitmap || entry->bytes < min_bytes)
3223                         continue;
3224
3225                 rb_erase(&entry->offset_index, &ctl->free_space_offset);
3226                 ret = tree_insert_offset(&cluster->root, entry->offset,
3227                                          &entry->offset_index, 0);
3228                 total_size += entry->bytes;
3229                 ASSERT(!ret); /* -EEXIST; Logic error */
3230         } while (node && entry != last);
3231
3232         cluster->max_size = max_extent;
3233         trace_btrfs_setup_cluster(block_group, cluster, total_size, 0);
3234         return 0;
3235 }
3236
3237 /*
3238  * This specifically looks for bitmaps that may work in the cluster, we assume
3239  * that we have already failed to find extents that will work.
3240  */
3241 static noinline int
3242 setup_cluster_bitmap(struct btrfs_block_group *block_group,
3243                      struct btrfs_free_cluster *cluster,
3244                      struct list_head *bitmaps, u64 offset, u64 bytes,
3245                      u64 cont1_bytes, u64 min_bytes)
3246 {
3247         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3248         struct btrfs_free_space *entry = NULL;
3249         int ret = -ENOSPC;
3250         u64 bitmap_offset = offset_to_bitmap(ctl, offset);
3251
3252         if (ctl->total_bitmaps == 0)
3253                 return -ENOSPC;
3254
3255         /*
3256          * The bitmap that covers offset won't be in the list unless offset
3257          * is just its start offset.
3258          */
3259         if (!list_empty(bitmaps))
3260                 entry = list_first_entry(bitmaps, struct btrfs_free_space, list);
3261
3262         if (!entry || entry->offset != bitmap_offset) {
3263                 entry = tree_search_offset(ctl, bitmap_offset, 1, 0);
3264                 if (entry && list_empty(&entry->list))
3265                         list_add(&entry->list, bitmaps);
3266         }
3267
3268         list_for_each_entry(entry, bitmaps, list) {
3269                 if (entry->bytes < bytes)
3270                         continue;
3271                 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
3272                                            bytes, cont1_bytes, min_bytes);
3273                 if (!ret)
3274                         return 0;
3275         }
3276
3277         /*
3278          * The bitmaps list has all the bitmaps that record free space
3279          * starting after offset, so no more search is required.
3280          */
3281         return -ENOSPC;
3282 }
3283
3284 /*
3285  * here we try to find a cluster of blocks in a block group.  The goal
3286  * is to find at least bytes+empty_size.
3287  * We might not find them all in one contiguous area.
3288  *
3289  * returns zero and sets up cluster if things worked out, otherwise
3290  * it returns -enospc
3291  */
3292 int btrfs_find_space_cluster(struct btrfs_block_group *block_group,
3293                              struct btrfs_free_cluster *cluster,
3294                              u64 offset, u64 bytes, u64 empty_size)
3295 {
3296         struct btrfs_fs_info *fs_info = block_group->fs_info;
3297         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3298         struct btrfs_free_space *entry, *tmp;
3299         LIST_HEAD(bitmaps);
3300         u64 min_bytes;
3301         u64 cont1_bytes;
3302         int ret;
3303
3304         /*
3305          * Choose the minimum extent size we'll require for this
3306          * cluster.  For SSD_SPREAD, don't allow any fragmentation.
3307          * For metadata, allow allocates with smaller extents.  For
3308          * data, keep it dense.
3309          */
3310         if (btrfs_test_opt(fs_info, SSD_SPREAD)) {
3311                 cont1_bytes = min_bytes = bytes + empty_size;
3312         } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
3313                 cont1_bytes = bytes;
3314                 min_bytes = fs_info->sectorsize;
3315         } else {
3316                 cont1_bytes = max(bytes, (bytes + empty_size) >> 2);
3317                 min_bytes = fs_info->sectorsize;
3318         }
3319
3320         spin_lock(&ctl->tree_lock);
3321
3322         /*
3323          * If we know we don't have enough space to make a cluster don't even
3324          * bother doing all the work to try and find one.
3325          */
3326         if (ctl->free_space < bytes) {
3327                 spin_unlock(&ctl->tree_lock);
3328                 return -ENOSPC;
3329         }
3330
3331         spin_lock(&cluster->lock);
3332
3333         /* someone already found a cluster, hooray */
3334         if (cluster->block_group) {
3335                 ret = 0;
3336                 goto out;
3337         }
3338
3339         trace_btrfs_find_cluster(block_group, offset, bytes, empty_size,
3340                                  min_bytes);
3341
3342         ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
3343                                       bytes + empty_size,
3344                                       cont1_bytes, min_bytes);
3345         if (ret)
3346                 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
3347                                            offset, bytes + empty_size,
3348                                            cont1_bytes, min_bytes);
3349
3350         /* Clear our temporary list */
3351         list_for_each_entry_safe(entry, tmp, &bitmaps, list)
3352                 list_del_init(&entry->list);
3353
3354         if (!ret) {
3355                 btrfs_get_block_group(block_group);
3356                 list_add_tail(&cluster->block_group_list,
3357                               &block_group->cluster_list);
3358                 cluster->block_group = block_group;
3359         } else {
3360                 trace_btrfs_failed_cluster_setup(block_group);
3361         }
3362 out:
3363         spin_unlock(&cluster->lock);
3364         spin_unlock(&ctl->tree_lock);
3365
3366         return ret;
3367 }
3368
3369 /*
3370  * simple code to zero out a cluster
3371  */
3372 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
3373 {
3374         spin_lock_init(&cluster->lock);
3375         spin_lock_init(&cluster->refill_lock);
3376         cluster->root = RB_ROOT;
3377         cluster->max_size = 0;
3378         cluster->fragmented = false;
3379         INIT_LIST_HEAD(&cluster->block_group_list);
3380         cluster->block_group = NULL;
3381 }
3382
3383 static int do_trimming(struct btrfs_block_group *block_group,
3384                        u64 *total_trimmed, u64 start, u64 bytes,
3385                        u64 reserved_start, u64 reserved_bytes,
3386                        enum btrfs_trim_state reserved_trim_state,
3387                        struct btrfs_trim_range *trim_entry)
3388 {
3389         struct btrfs_space_info *space_info = block_group->space_info;
3390         struct btrfs_fs_info *fs_info = block_group->fs_info;
3391         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3392         int ret;
3393         int update = 0;
3394         const u64 end = start + bytes;
3395         const u64 reserved_end = reserved_start + reserved_bytes;
3396         enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
3397         u64 trimmed = 0;
3398
3399         spin_lock(&space_info->lock);
3400         spin_lock(&block_group->lock);
3401         if (!block_group->ro) {
3402                 block_group->reserved += reserved_bytes;
3403                 space_info->bytes_reserved += reserved_bytes;
3404                 update = 1;
3405         }
3406         spin_unlock(&block_group->lock);
3407         spin_unlock(&space_info->lock);
3408
3409         ret = btrfs_discard_extent(fs_info, start, bytes, &trimmed);
3410         if (!ret) {
3411                 *total_trimmed += trimmed;
3412                 trim_state = BTRFS_TRIM_STATE_TRIMMED;
3413         }
3414
3415         mutex_lock(&ctl->cache_writeout_mutex);
3416         if (reserved_start < start)
3417                 __btrfs_add_free_space(fs_info, ctl, reserved_start,
3418                                        start - reserved_start,
3419                                        reserved_trim_state);
3420         if (start + bytes < reserved_start + reserved_bytes)
3421                 __btrfs_add_free_space(fs_info, ctl, end, reserved_end - end,
3422                                        reserved_trim_state);
3423         __btrfs_add_free_space(fs_info, ctl, start, bytes, trim_state);
3424         list_del(&trim_entry->list);
3425         mutex_unlock(&ctl->cache_writeout_mutex);
3426
3427         if (update) {
3428                 spin_lock(&space_info->lock);
3429                 spin_lock(&block_group->lock);
3430                 if (block_group->ro)
3431                         space_info->bytes_readonly += reserved_bytes;
3432                 block_group->reserved -= reserved_bytes;
3433                 space_info->bytes_reserved -= reserved_bytes;
3434                 spin_unlock(&block_group->lock);
3435                 spin_unlock(&space_info->lock);
3436         }
3437
3438         return ret;
3439 }
3440
3441 /*
3442  * If @async is set, then we will trim 1 region and return.
3443  */
3444 static int trim_no_bitmap(struct btrfs_block_group *block_group,
3445                           u64 *total_trimmed, u64 start, u64 end, u64 minlen,
3446                           bool async)
3447 {
3448         struct btrfs_discard_ctl *discard_ctl =
3449                                         &block_group->fs_info->discard_ctl;
3450         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3451         struct btrfs_free_space *entry;
3452         struct rb_node *node;
3453         int ret = 0;
3454         u64 extent_start;
3455         u64 extent_bytes;
3456         enum btrfs_trim_state extent_trim_state;
3457         u64 bytes;
3458         const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size);
3459
3460         while (start < end) {
3461                 struct btrfs_trim_range trim_entry;
3462
3463                 mutex_lock(&ctl->cache_writeout_mutex);
3464                 spin_lock(&ctl->tree_lock);
3465
3466                 if (ctl->free_space < minlen)
3467                         goto out_unlock;
3468
3469                 entry = tree_search_offset(ctl, start, 0, 1);
3470                 if (!entry)
3471                         goto out_unlock;
3472
3473                 /* Skip bitmaps and if async, already trimmed entries */
3474                 while (entry->bitmap ||
3475                        (async && btrfs_free_space_trimmed(entry))) {
3476                         node = rb_next(&entry->offset_index);
3477                         if (!node)
3478                                 goto out_unlock;
3479                         entry = rb_entry(node, struct btrfs_free_space,
3480                                          offset_index);
3481                 }
3482
3483                 if (entry->offset >= end)
3484                         goto out_unlock;
3485
3486                 extent_start = entry->offset;
3487                 extent_bytes = entry->bytes;
3488                 extent_trim_state = entry->trim_state;
3489                 if (async) {
3490                         start = entry->offset;
3491                         bytes = entry->bytes;
3492                         if (bytes < minlen) {
3493                                 spin_unlock(&ctl->tree_lock);
3494                                 mutex_unlock(&ctl->cache_writeout_mutex);
3495                                 goto next;
3496                         }
3497                         unlink_free_space(ctl, entry);
3498                         /*
3499                          * Let bytes = BTRFS_MAX_DISCARD_SIZE + X.
3500                          * If X < BTRFS_ASYNC_DISCARD_MIN_FILTER, we won't trim
3501                          * X when we come back around.  So trim it now.
3502                          */
3503                         if (max_discard_size &&
3504                             bytes >= (max_discard_size +
3505                                       BTRFS_ASYNC_DISCARD_MIN_FILTER)) {
3506                                 bytes = max_discard_size;
3507                                 extent_bytes = max_discard_size;
3508                                 entry->offset += max_discard_size;
3509                                 entry->bytes -= max_discard_size;
3510                                 link_free_space(ctl, entry);
3511                         } else {
3512                                 kmem_cache_free(btrfs_free_space_cachep, entry);
3513                         }
3514                 } else {
3515                         start = max(start, extent_start);
3516                         bytes = min(extent_start + extent_bytes, end) - start;
3517                         if (bytes < minlen) {
3518                                 spin_unlock(&ctl->tree_lock);
3519                                 mutex_unlock(&ctl->cache_writeout_mutex);
3520                                 goto next;
3521                         }
3522
3523                         unlink_free_space(ctl, entry);
3524                         kmem_cache_free(btrfs_free_space_cachep, entry);
3525                 }
3526
3527                 spin_unlock(&ctl->tree_lock);
3528                 trim_entry.start = extent_start;
3529                 trim_entry.bytes = extent_bytes;
3530                 list_add_tail(&trim_entry.list, &ctl->trimming_ranges);
3531                 mutex_unlock(&ctl->cache_writeout_mutex);
3532
3533                 ret = do_trimming(block_group, total_trimmed, start, bytes,
3534                                   extent_start, extent_bytes, extent_trim_state,
3535                                   &trim_entry);
3536                 if (ret) {
3537                         block_group->discard_cursor = start + bytes;
3538                         break;
3539                 }
3540 next:
3541                 start += bytes;
3542                 block_group->discard_cursor = start;
3543                 if (async && *total_trimmed)
3544                         break;
3545
3546                 if (fatal_signal_pending(current)) {
3547                         ret = -ERESTARTSYS;
3548                         break;
3549                 }
3550
3551                 cond_resched();
3552         }
3553
3554         return ret;
3555
3556 out_unlock:
3557         block_group->discard_cursor = btrfs_block_group_end(block_group);
3558         spin_unlock(&ctl->tree_lock);
3559         mutex_unlock(&ctl->cache_writeout_mutex);
3560
3561         return ret;
3562 }
3563
3564 /*
3565  * If we break out of trimming a bitmap prematurely, we should reset the
3566  * trimming bit.  In a rather contrieved case, it's possible to race here so
3567  * reset the state to BTRFS_TRIM_STATE_UNTRIMMED.
3568  *
3569  * start = start of bitmap
3570  * end = near end of bitmap
3571  *
3572  * Thread 1:                    Thread 2:
3573  * trim_bitmaps(start)
3574  *                              trim_bitmaps(end)
3575  *                              end_trimming_bitmap()
3576  * reset_trimming_bitmap()
3577  */
3578 static void reset_trimming_bitmap(struct btrfs_free_space_ctl *ctl, u64 offset)
3579 {
3580         struct btrfs_free_space *entry;
3581
3582         spin_lock(&ctl->tree_lock);
3583         entry = tree_search_offset(ctl, offset, 1, 0);
3584         if (entry) {
3585                 if (btrfs_free_space_trimmed(entry)) {
3586                         ctl->discardable_extents[BTRFS_STAT_CURR] +=
3587                                 entry->bitmap_extents;
3588                         ctl->discardable_bytes[BTRFS_STAT_CURR] += entry->bytes;
3589                 }
3590                 entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
3591         }
3592
3593         spin_unlock(&ctl->tree_lock);
3594 }
3595
3596 static void end_trimming_bitmap(struct btrfs_free_space_ctl *ctl,
3597                                 struct btrfs_free_space *entry)
3598 {
3599         if (btrfs_free_space_trimming_bitmap(entry)) {
3600                 entry->trim_state = BTRFS_TRIM_STATE_TRIMMED;
3601                 ctl->discardable_extents[BTRFS_STAT_CURR] -=
3602                         entry->bitmap_extents;
3603                 ctl->discardable_bytes[BTRFS_STAT_CURR] -= entry->bytes;
3604         }
3605 }
3606
3607 /*
3608  * If @async is set, then we will trim 1 region and return.
3609  */
3610 static int trim_bitmaps(struct btrfs_block_group *block_group,
3611                         u64 *total_trimmed, u64 start, u64 end, u64 minlen,
3612                         u64 maxlen, bool async)
3613 {
3614         struct btrfs_discard_ctl *discard_ctl =
3615                                         &block_group->fs_info->discard_ctl;
3616         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3617         struct btrfs_free_space *entry;
3618         int ret = 0;
3619         int ret2;
3620         u64 bytes;
3621         u64 offset = offset_to_bitmap(ctl, start);
3622         const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size);
3623
3624         while (offset < end) {
3625                 bool next_bitmap = false;
3626                 struct btrfs_trim_range trim_entry;
3627
3628                 mutex_lock(&ctl->cache_writeout_mutex);
3629                 spin_lock(&ctl->tree_lock);
3630
3631                 if (ctl->free_space < minlen) {
3632                         block_group->discard_cursor =
3633                                 btrfs_block_group_end(block_group);
3634                         spin_unlock(&ctl->tree_lock);
3635                         mutex_unlock(&ctl->cache_writeout_mutex);
3636                         break;
3637                 }
3638
3639                 entry = tree_search_offset(ctl, offset, 1, 0);
3640                 /*
3641                  * Bitmaps are marked trimmed lossily now to prevent constant
3642                  * discarding of the same bitmap (the reason why we are bound
3643                  * by the filters).  So, retrim the block group bitmaps when we
3644                  * are preparing to punt to the unused_bgs list.  This uses
3645                  * @minlen to determine if we are in BTRFS_DISCARD_INDEX_UNUSED
3646                  * which is the only discard index which sets minlen to 0.
3647                  */
3648                 if (!entry || (async && minlen && start == offset &&
3649                                btrfs_free_space_trimmed(entry))) {
3650                         spin_unlock(&ctl->tree_lock);
3651                         mutex_unlock(&ctl->cache_writeout_mutex);
3652                         next_bitmap = true;
3653                         goto next;
3654                 }
3655
3656                 /*
3657                  * Async discard bitmap trimming begins at by setting the start
3658                  * to be key.objectid and the offset_to_bitmap() aligns to the
3659                  * start of the bitmap.  This lets us know we are fully
3660                  * scanning the bitmap rather than only some portion of it.
3661                  */
3662                 if (start == offset)
3663                         entry->trim_state = BTRFS_TRIM_STATE_TRIMMING;
3664
3665                 bytes = minlen;
3666                 ret2 = search_bitmap(ctl, entry, &start, &bytes, false);
3667                 if (ret2 || start >= end) {
3668                         /*
3669                          * We lossily consider a bitmap trimmed if we only skip
3670                          * over regions <= BTRFS_ASYNC_DISCARD_MIN_FILTER.
3671                          */
3672                         if (ret2 && minlen <= BTRFS_ASYNC_DISCARD_MIN_FILTER)
3673                                 end_trimming_bitmap(ctl, entry);
3674                         else
3675                                 entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
3676                         spin_unlock(&ctl->tree_lock);
3677                         mutex_unlock(&ctl->cache_writeout_mutex);
3678                         next_bitmap = true;
3679                         goto next;
3680                 }
3681
3682                 /*
3683                  * We already trimmed a region, but are using the locking above
3684                  * to reset the trim_state.
3685                  */
3686                 if (async && *total_trimmed) {
3687                         spin_unlock(&ctl->tree_lock);
3688                         mutex_unlock(&ctl->cache_writeout_mutex);
3689                         goto out;
3690                 }
3691
3692                 bytes = min(bytes, end - start);
3693                 if (bytes < minlen || (async && maxlen && bytes > maxlen)) {
3694                         spin_unlock(&ctl->tree_lock);
3695                         mutex_unlock(&ctl->cache_writeout_mutex);
3696                         goto next;
3697                 }
3698
3699                 /*
3700                  * Let bytes = BTRFS_MAX_DISCARD_SIZE + X.
3701                  * If X < @minlen, we won't trim X when we come back around.
3702                  * So trim it now.  We differ here from trimming extents as we
3703                  * don't keep individual state per bit.
3704                  */
3705                 if (async &&
3706                     max_discard_size &&
3707                     bytes > (max_discard_size + minlen))
3708                         bytes = max_discard_size;
3709
3710                 bitmap_clear_bits(ctl, entry, start, bytes);
3711                 if (entry->bytes == 0)
3712                         free_bitmap(ctl, entry);
3713
3714                 spin_unlock(&ctl->tree_lock);
3715                 trim_entry.start = start;
3716                 trim_entry.bytes = bytes;
3717                 list_add_tail(&trim_entry.list, &ctl->trimming_ranges);
3718                 mutex_unlock(&ctl->cache_writeout_mutex);
3719
3720                 ret = do_trimming(block_group, total_trimmed, start, bytes,
3721                                   start, bytes, 0, &trim_entry);
3722                 if (ret) {
3723                         reset_trimming_bitmap(ctl, offset);
3724                         block_group->discard_cursor =
3725                                 btrfs_block_group_end(block_group);
3726                         break;
3727                 }
3728 next:
3729                 if (next_bitmap) {
3730                         offset += BITS_PER_BITMAP * ctl->unit;
3731                         start = offset;
3732                 } else {
3733                         start += bytes;
3734                 }
3735                 block_group->discard_cursor = start;
3736
3737                 if (fatal_signal_pending(current)) {
3738                         if (start != offset)
3739                                 reset_trimming_bitmap(ctl, offset);
3740                         ret = -ERESTARTSYS;
3741                         break;
3742                 }
3743
3744                 cond_resched();
3745         }
3746
3747         if (offset >= end)
3748                 block_group->discard_cursor = end;
3749
3750 out:
3751         return ret;
3752 }
3753
3754 int btrfs_trim_block_group(struct btrfs_block_group *block_group,
3755                            u64 *trimmed, u64 start, u64 end, u64 minlen)
3756 {
3757         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3758         int ret;
3759         u64 rem = 0;
3760
3761         *trimmed = 0;
3762
3763         spin_lock(&block_group->lock);
3764         if (block_group->removed) {
3765                 spin_unlock(&block_group->lock);
3766                 return 0;
3767         }
3768         btrfs_freeze_block_group(block_group);
3769         spin_unlock(&block_group->lock);
3770
3771         ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, false);
3772         if (ret)
3773                 goto out;
3774
3775         ret = trim_bitmaps(block_group, trimmed, start, end, minlen, 0, false);
3776         div64_u64_rem(end, BITS_PER_BITMAP * ctl->unit, &rem);
3777         /* If we ended in the middle of a bitmap, reset the trimming flag */
3778         if (rem)
3779                 reset_trimming_bitmap(ctl, offset_to_bitmap(ctl, end));
3780 out:
3781         btrfs_unfreeze_block_group(block_group);
3782         return ret;
3783 }
3784
3785 int btrfs_trim_block_group_extents(struct btrfs_block_group *block_group,
3786                                    u64 *trimmed, u64 start, u64 end, u64 minlen,
3787                                    bool async)
3788 {
3789         int ret;
3790
3791         *trimmed = 0;
3792
3793         spin_lock(&block_group->lock);
3794         if (block_group->removed) {
3795                 spin_unlock(&block_group->lock);
3796                 return 0;
3797         }
3798         btrfs_freeze_block_group(block_group);
3799         spin_unlock(&block_group->lock);
3800
3801         ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, async);
3802         btrfs_unfreeze_block_group(block_group);
3803
3804         return ret;
3805 }
3806
3807 int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group,
3808                                    u64 *trimmed, u64 start, u64 end, u64 minlen,
3809                                    u64 maxlen, bool async)
3810 {
3811         int ret;
3812
3813         *trimmed = 0;
3814
3815         spin_lock(&block_group->lock);
3816         if (block_group->removed) {
3817                 spin_unlock(&block_group->lock);
3818                 return 0;
3819         }
3820         btrfs_freeze_block_group(block_group);
3821         spin_unlock(&block_group->lock);
3822
3823         ret = trim_bitmaps(block_group, trimmed, start, end, minlen, maxlen,
3824                            async);
3825
3826         btrfs_unfreeze_block_group(block_group);
3827
3828         return ret;
3829 }
3830
3831 /*
3832  * Find the left-most item in the cache tree, and then return the
3833  * smallest inode number in the item.
3834  *
3835  * Note: the returned inode number may not be the smallest one in
3836  * the tree, if the left-most item is a bitmap.
3837  */
3838 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
3839 {
3840         struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl;
3841         struct btrfs_free_space *entry = NULL;
3842         u64 ino = 0;
3843
3844         spin_lock(&ctl->tree_lock);
3845
3846         if (RB_EMPTY_ROOT(&ctl->free_space_offset))
3847                 goto out;
3848
3849         entry = rb_entry(rb_first(&ctl->free_space_offset),
3850                          struct btrfs_free_space, offset_index);
3851
3852         if (!entry->bitmap) {
3853                 ino = entry->offset;
3854
3855                 unlink_free_space(ctl, entry);
3856                 entry->offset++;
3857                 entry->bytes--;
3858                 if (!entry->bytes)
3859                         kmem_cache_free(btrfs_free_space_cachep, entry);
3860                 else
3861                         link_free_space(ctl, entry);
3862         } else {
3863                 u64 offset = 0;
3864                 u64 count = 1;
3865                 int ret;
3866
3867                 ret = search_bitmap(ctl, entry, &offset, &count, true);
3868                 /* Logic error; Should be empty if it can't find anything */
3869                 ASSERT(!ret);
3870
3871                 ino = offset;
3872                 bitmap_clear_bits(ctl, entry, offset, 1);
3873                 if (entry->bytes == 0)
3874                         free_bitmap(ctl, entry);
3875         }
3876 out:
3877         spin_unlock(&ctl->tree_lock);
3878
3879         return ino;
3880 }
3881
3882 struct inode *lookup_free_ino_inode(struct btrfs_root *root,
3883                                     struct btrfs_path *path)
3884 {
3885         struct inode *inode = NULL;
3886
3887         spin_lock(&root->ino_cache_lock);
3888         if (root->ino_cache_inode)
3889                 inode = igrab(root->ino_cache_inode);
3890         spin_unlock(&root->ino_cache_lock);
3891         if (inode)
3892                 return inode;
3893
3894         inode = __lookup_free_space_inode(root, path, 0);
3895         if (IS_ERR(inode))
3896                 return inode;
3897
3898         spin_lock(&root->ino_cache_lock);
3899         if (!btrfs_fs_closing(root->fs_info))
3900                 root->ino_cache_inode = igrab(inode);
3901         spin_unlock(&root->ino_cache_lock);
3902
3903         return inode;
3904 }
3905
3906 int create_free_ino_inode(struct btrfs_root *root,
3907                           struct btrfs_trans_handle *trans,
3908                           struct btrfs_path *path)
3909 {
3910         return __create_free_space_inode(root, trans, path,
3911                                          BTRFS_FREE_INO_OBJECTID, 0);
3912 }
3913
3914 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
3915 {
3916         struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
3917         struct btrfs_path *path;
3918         struct inode *inode;
3919         int ret = 0;
3920         u64 root_gen = btrfs_root_generation(&root->root_item);
3921
3922         if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE))
3923                 return 0;
3924
3925         /*
3926          * If we're unmounting then just return, since this does a search on the
3927          * normal root and not the commit root and we could deadlock.
3928          */
3929         if (btrfs_fs_closing(fs_info))
3930                 return 0;
3931
3932         path = btrfs_alloc_path();
3933         if (!path)
3934                 return 0;
3935
3936         inode = lookup_free_ino_inode(root, path);
3937         if (IS_ERR(inode))
3938                 goto out;
3939
3940         if (root_gen != BTRFS_I(inode)->generation)
3941                 goto out_put;
3942
3943         ret = __load_free_space_cache(root, inode, ctl, path, 0);
3944
3945         if (ret < 0)
3946                 btrfs_err(fs_info,
3947                         "failed to load free ino cache for root %llu",
3948                         root->root_key.objectid);
3949 out_put:
3950         iput(inode);
3951 out:
3952         btrfs_free_path(path);
3953         return ret;
3954 }
3955
3956 int btrfs_write_out_ino_cache(struct btrfs_root *root,
3957                               struct btrfs_trans_handle *trans,
3958                               struct btrfs_path *path,
3959                               struct inode *inode)
3960 {
3961         struct btrfs_fs_info *fs_info = root->fs_info;
3962         struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
3963         int ret;
3964         struct btrfs_io_ctl io_ctl;
3965         bool release_metadata = true;
3966
3967         if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE))
3968                 return 0;
3969
3970         memset(&io_ctl, 0, sizeof(io_ctl));
3971         ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl, trans);
3972         if (!ret) {
3973                 /*
3974                  * At this point writepages() didn't error out, so our metadata
3975                  * reservation is released when the writeback finishes, at
3976                  * inode.c:btrfs_finish_ordered_io(), regardless of it finishing
3977                  * with or without an error.
3978                  */
3979                 release_metadata = false;
3980                 ret = btrfs_wait_cache_io_root(root, trans, &io_ctl, path);
3981         }
3982
3983         if (ret) {
3984                 if (release_metadata)
3985                         btrfs_delalloc_release_metadata(BTRFS_I(inode),
3986                                         inode->i_size, true);
3987                 btrfs_debug(fs_info,
3988                           "failed to write free ino cache for root %llu error %d",
3989                           root->root_key.objectid, ret);
3990         }
3991
3992         return ret;
3993 }
3994
3995 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
3996 /*
3997  * Use this if you need to make a bitmap or extent entry specifically, it
3998  * doesn't do any of the merging that add_free_space does, this acts a lot like
3999  * how the free space cache loading stuff works, so you can get really weird
4000  * configurations.
4001  */
4002 int test_add_free_space_entry(struct btrfs_block_group *cache,
4003                               u64 offset, u64 bytes, bool bitmap)
4004 {
4005         struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
4006         struct btrfs_free_space *info = NULL, *bitmap_info;
4007         void *map = NULL;
4008         enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_TRIMMED;
4009         u64 bytes_added;
4010         int ret;
4011
4012 again:
4013         if (!info) {
4014                 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
4015                 if (!info)
4016                         return -ENOMEM;
4017         }
4018
4019         if (!bitmap) {
4020                 spin_lock(&ctl->tree_lock);
4021                 info->offset = offset;
4022                 info->bytes = bytes;
4023                 info->max_extent_size = 0;
4024                 ret = link_free_space(ctl, info);
4025                 spin_unlock(&ctl->tree_lock);
4026                 if (ret)
4027                         kmem_cache_free(btrfs_free_space_cachep, info);
4028                 return ret;
4029         }
4030
4031         if (!map) {
4032                 map = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep, GFP_NOFS);
4033                 if (!map) {
4034                         kmem_cache_free(btrfs_free_space_cachep, info);
4035                         return -ENOMEM;
4036                 }
4037         }
4038
4039         spin_lock(&ctl->tree_lock);
4040         bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
4041                                          1, 0);
4042         if (!bitmap_info) {
4043                 info->bitmap = map;
4044                 map = NULL;
4045                 add_new_bitmap(ctl, info, offset);
4046                 bitmap_info = info;
4047                 info = NULL;
4048         }
4049
4050         bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes,
4051                                           trim_state);
4052
4053         bytes -= bytes_added;
4054         offset += bytes_added;
4055         spin_unlock(&ctl->tree_lock);
4056
4057         if (bytes)
4058                 goto again;
4059
4060         if (info)
4061                 kmem_cache_free(btrfs_free_space_cachep, info);
4062         if (map)
4063                 kmem_cache_free(btrfs_free_space_bitmap_cachep, map);
4064         return 0;
4065 }
4066
4067 /*
4068  * Checks to see if the given range is in the free space cache.  This is really
4069  * just used to check the absence of space, so if there is free space in the
4070  * range at all we will return 1.
4071  */
4072 int test_check_exists(struct btrfs_block_group *cache,
4073                       u64 offset, u64 bytes)
4074 {
4075         struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
4076         struct btrfs_free_space *info;
4077         int ret = 0;
4078
4079         spin_lock(&ctl->tree_lock);
4080         info = tree_search_offset(ctl, offset, 0, 0);
4081         if (!info) {
4082                 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
4083                                           1, 0);
4084                 if (!info)
4085                         goto out;
4086         }
4087
4088 have_info:
4089         if (info->bitmap) {
4090                 u64 bit_off, bit_bytes;
4091                 struct rb_node *n;
4092                 struct btrfs_free_space *tmp;
4093
4094                 bit_off = offset;
4095                 bit_bytes = ctl->unit;
4096                 ret = search_bitmap(ctl, info, &bit_off, &bit_bytes, false);
4097                 if (!ret) {
4098                         if (bit_off == offset) {
4099                                 ret = 1;
4100                                 goto out;
4101                         } else if (bit_off > offset &&
4102                                    offset + bytes > bit_off) {
4103                                 ret = 1;
4104                                 goto out;
4105                         }
4106                 }
4107
4108                 n = rb_prev(&info->offset_index);
4109                 while (n) {
4110                         tmp = rb_entry(n, struct btrfs_free_space,
4111                                        offset_index);
4112                         if (tmp->offset + tmp->bytes < offset)
4113                                 break;
4114                         if (offset + bytes < tmp->offset) {
4115                                 n = rb_prev(&tmp->offset_index);
4116                                 continue;
4117                         }
4118                         info = tmp;
4119                         goto have_info;
4120                 }
4121
4122                 n = rb_next(&info->offset_index);
4123                 while (n) {
4124                         tmp = rb_entry(n, struct btrfs_free_space,
4125                                        offset_index);
4126                         if (offset + bytes < tmp->offset)
4127                                 break;
4128                         if (tmp->offset + tmp->bytes < offset) {
4129                                 n = rb_next(&tmp->offset_index);
4130                                 continue;
4131                         }
4132                         info = tmp;
4133                         goto have_info;
4134                 }
4135
4136                 ret = 0;
4137                 goto out;
4138         }
4139
4140         if (info->offset == offset) {
4141                 ret = 1;
4142                 goto out;
4143         }
4144
4145         if (offset > info->offset && offset < info->offset + info->bytes)
4146                 ret = 1;
4147 out:
4148         spin_unlock(&ctl->tree_lock);
4149         return ret;
4150 }
4151 #endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */