ARM: s3c64xx: bring back notes from removed debug-macro.S
[linux-2.6-microblaze.git] / fs / btrfs / free-space-cache.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2008 Red Hat.  All rights reserved.
4  */
5
6 #include <linux/pagemap.h>
7 #include <linux/sched.h>
8 #include <linux/sched/signal.h>
9 #include <linux/slab.h>
10 #include <linux/math64.h>
11 #include <linux/ratelimit.h>
12 #include <linux/error-injection.h>
13 #include <linux/sched/mm.h>
14 #include "ctree.h"
15 #include "free-space-cache.h"
16 #include "transaction.h"
17 #include "disk-io.h"
18 #include "extent_io.h"
19 #include "inode-map.h"
20 #include "volumes.h"
21 #include "space-info.h"
22 #include "delalloc-space.h"
23 #include "block-group.h"
24 #include "discard.h"
25
26 #define BITS_PER_BITMAP         (PAGE_SIZE * 8UL)
27 #define MAX_CACHE_BYTES_PER_GIG SZ_64K
28 #define FORCE_EXTENT_THRESHOLD  SZ_1M
29
30 struct btrfs_trim_range {
31         u64 start;
32         u64 bytes;
33         struct list_head list;
34 };
35
36 static int count_bitmap_extents(struct btrfs_free_space_ctl *ctl,
37                                 struct btrfs_free_space *bitmap_info);
38 static int link_free_space(struct btrfs_free_space_ctl *ctl,
39                            struct btrfs_free_space *info);
40 static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
41                               struct btrfs_free_space *info);
42 static int btrfs_wait_cache_io_root(struct btrfs_root *root,
43                              struct btrfs_trans_handle *trans,
44                              struct btrfs_io_ctl *io_ctl,
45                              struct btrfs_path *path);
46
47 static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
48                                                struct btrfs_path *path,
49                                                u64 offset)
50 {
51         struct btrfs_fs_info *fs_info = root->fs_info;
52         struct btrfs_key key;
53         struct btrfs_key location;
54         struct btrfs_disk_key disk_key;
55         struct btrfs_free_space_header *header;
56         struct extent_buffer *leaf;
57         struct inode *inode = NULL;
58         unsigned nofs_flag;
59         int ret;
60
61         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
62         key.offset = offset;
63         key.type = 0;
64
65         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
66         if (ret < 0)
67                 return ERR_PTR(ret);
68         if (ret > 0) {
69                 btrfs_release_path(path);
70                 return ERR_PTR(-ENOENT);
71         }
72
73         leaf = path->nodes[0];
74         header = btrfs_item_ptr(leaf, path->slots[0],
75                                 struct btrfs_free_space_header);
76         btrfs_free_space_key(leaf, header, &disk_key);
77         btrfs_disk_key_to_cpu(&location, &disk_key);
78         btrfs_release_path(path);
79
80         /*
81          * We are often under a trans handle at this point, so we need to make
82          * sure NOFS is set to keep us from deadlocking.
83          */
84         nofs_flag = memalloc_nofs_save();
85         inode = btrfs_iget_path(fs_info->sb, location.objectid, root, path);
86         btrfs_release_path(path);
87         memalloc_nofs_restore(nofs_flag);
88         if (IS_ERR(inode))
89                 return inode;
90
91         mapping_set_gfp_mask(inode->i_mapping,
92                         mapping_gfp_constraint(inode->i_mapping,
93                         ~(__GFP_FS | __GFP_HIGHMEM)));
94
95         return inode;
96 }
97
98 struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group,
99                 struct btrfs_path *path)
100 {
101         struct btrfs_fs_info *fs_info = block_group->fs_info;
102         struct inode *inode = NULL;
103         u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
104
105         spin_lock(&block_group->lock);
106         if (block_group->inode)
107                 inode = igrab(block_group->inode);
108         spin_unlock(&block_group->lock);
109         if (inode)
110                 return inode;
111
112         inode = __lookup_free_space_inode(fs_info->tree_root, path,
113                                           block_group->start);
114         if (IS_ERR(inode))
115                 return inode;
116
117         spin_lock(&block_group->lock);
118         if (!((BTRFS_I(inode)->flags & flags) == flags)) {
119                 btrfs_info(fs_info, "Old style space inode found, converting.");
120                 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM |
121                         BTRFS_INODE_NODATACOW;
122                 block_group->disk_cache_state = BTRFS_DC_CLEAR;
123         }
124
125         if (!block_group->iref) {
126                 block_group->inode = igrab(inode);
127                 block_group->iref = 1;
128         }
129         spin_unlock(&block_group->lock);
130
131         return inode;
132 }
133
134 static int __create_free_space_inode(struct btrfs_root *root,
135                                      struct btrfs_trans_handle *trans,
136                                      struct btrfs_path *path,
137                                      u64 ino, u64 offset)
138 {
139         struct btrfs_key key;
140         struct btrfs_disk_key disk_key;
141         struct btrfs_free_space_header *header;
142         struct btrfs_inode_item *inode_item;
143         struct extent_buffer *leaf;
144         u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC;
145         int ret;
146
147         ret = btrfs_insert_empty_inode(trans, root, path, ino);
148         if (ret)
149                 return ret;
150
151         /* We inline crc's for the free disk space cache */
152         if (ino != BTRFS_FREE_INO_OBJECTID)
153                 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
154
155         leaf = path->nodes[0];
156         inode_item = btrfs_item_ptr(leaf, path->slots[0],
157                                     struct btrfs_inode_item);
158         btrfs_item_key(leaf, &disk_key, path->slots[0]);
159         memzero_extent_buffer(leaf, (unsigned long)inode_item,
160                              sizeof(*inode_item));
161         btrfs_set_inode_generation(leaf, inode_item, trans->transid);
162         btrfs_set_inode_size(leaf, inode_item, 0);
163         btrfs_set_inode_nbytes(leaf, inode_item, 0);
164         btrfs_set_inode_uid(leaf, inode_item, 0);
165         btrfs_set_inode_gid(leaf, inode_item, 0);
166         btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
167         btrfs_set_inode_flags(leaf, inode_item, flags);
168         btrfs_set_inode_nlink(leaf, inode_item, 1);
169         btrfs_set_inode_transid(leaf, inode_item, trans->transid);
170         btrfs_set_inode_block_group(leaf, inode_item, offset);
171         btrfs_mark_buffer_dirty(leaf);
172         btrfs_release_path(path);
173
174         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
175         key.offset = offset;
176         key.type = 0;
177         ret = btrfs_insert_empty_item(trans, root, path, &key,
178                                       sizeof(struct btrfs_free_space_header));
179         if (ret < 0) {
180                 btrfs_release_path(path);
181                 return ret;
182         }
183
184         leaf = path->nodes[0];
185         header = btrfs_item_ptr(leaf, path->slots[0],
186                                 struct btrfs_free_space_header);
187         memzero_extent_buffer(leaf, (unsigned long)header, sizeof(*header));
188         btrfs_set_free_space_key(leaf, header, &disk_key);
189         btrfs_mark_buffer_dirty(leaf);
190         btrfs_release_path(path);
191
192         return 0;
193 }
194
195 int create_free_space_inode(struct btrfs_trans_handle *trans,
196                             struct btrfs_block_group *block_group,
197                             struct btrfs_path *path)
198 {
199         int ret;
200         u64 ino;
201
202         ret = btrfs_find_free_objectid(trans->fs_info->tree_root, &ino);
203         if (ret < 0)
204                 return ret;
205
206         return __create_free_space_inode(trans->fs_info->tree_root, trans, path,
207                                          ino, block_group->start);
208 }
209
210 int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
211                                        struct btrfs_block_rsv *rsv)
212 {
213         u64 needed_bytes;
214         int ret;
215
216         /* 1 for slack space, 1 for updating the inode */
217         needed_bytes = btrfs_calc_insert_metadata_size(fs_info, 1) +
218                 btrfs_calc_metadata_size(fs_info, 1);
219
220         spin_lock(&rsv->lock);
221         if (rsv->reserved < needed_bytes)
222                 ret = -ENOSPC;
223         else
224                 ret = 0;
225         spin_unlock(&rsv->lock);
226         return ret;
227 }
228
229 int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
230                                     struct btrfs_block_group *block_group,
231                                     struct inode *inode)
232 {
233         struct btrfs_root *root = BTRFS_I(inode)->root;
234         int ret = 0;
235         bool locked = false;
236
237         if (block_group) {
238                 struct btrfs_path *path = btrfs_alloc_path();
239
240                 if (!path) {
241                         ret = -ENOMEM;
242                         goto fail;
243                 }
244                 locked = true;
245                 mutex_lock(&trans->transaction->cache_write_mutex);
246                 if (!list_empty(&block_group->io_list)) {
247                         list_del_init(&block_group->io_list);
248
249                         btrfs_wait_cache_io(trans, block_group, path);
250                         btrfs_put_block_group(block_group);
251                 }
252
253                 /*
254                  * now that we've truncated the cache away, its no longer
255                  * setup or written
256                  */
257                 spin_lock(&block_group->lock);
258                 block_group->disk_cache_state = BTRFS_DC_CLEAR;
259                 spin_unlock(&block_group->lock);
260                 btrfs_free_path(path);
261         }
262
263         btrfs_i_size_write(BTRFS_I(inode), 0);
264         truncate_pagecache(inode, 0);
265
266         /*
267          * We skip the throttling logic for free space cache inodes, so we don't
268          * need to check for -EAGAIN.
269          */
270         ret = btrfs_truncate_inode_items(trans, root, inode,
271                                          0, BTRFS_EXTENT_DATA_KEY);
272         if (ret)
273                 goto fail;
274
275         ret = btrfs_update_inode(trans, root, inode);
276
277 fail:
278         if (locked)
279                 mutex_unlock(&trans->transaction->cache_write_mutex);
280         if (ret)
281                 btrfs_abort_transaction(trans, ret);
282
283         return ret;
284 }
285
286 static void readahead_cache(struct inode *inode)
287 {
288         struct file_ra_state *ra;
289         unsigned long last_index;
290
291         ra = kzalloc(sizeof(*ra), GFP_NOFS);
292         if (!ra)
293                 return;
294
295         file_ra_state_init(ra, inode->i_mapping);
296         last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
297
298         page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
299
300         kfree(ra);
301 }
302
303 static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
304                        int write)
305 {
306         int num_pages;
307         int check_crcs = 0;
308
309         num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
310
311         if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FREE_INO_OBJECTID)
312                 check_crcs = 1;
313
314         /* Make sure we can fit our crcs and generation into the first page */
315         if (write && check_crcs &&
316             (num_pages * sizeof(u32) + sizeof(u64)) > PAGE_SIZE)
317                 return -ENOSPC;
318
319         memset(io_ctl, 0, sizeof(struct btrfs_io_ctl));
320
321         io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS);
322         if (!io_ctl->pages)
323                 return -ENOMEM;
324
325         io_ctl->num_pages = num_pages;
326         io_ctl->fs_info = btrfs_sb(inode->i_sb);
327         io_ctl->check_crcs = check_crcs;
328         io_ctl->inode = inode;
329
330         return 0;
331 }
332 ALLOW_ERROR_INJECTION(io_ctl_init, ERRNO);
333
334 static void io_ctl_free(struct btrfs_io_ctl *io_ctl)
335 {
336         kfree(io_ctl->pages);
337         io_ctl->pages = NULL;
338 }
339
340 static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl)
341 {
342         if (io_ctl->cur) {
343                 io_ctl->cur = NULL;
344                 io_ctl->orig = NULL;
345         }
346 }
347
348 static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear)
349 {
350         ASSERT(io_ctl->index < io_ctl->num_pages);
351         io_ctl->page = io_ctl->pages[io_ctl->index++];
352         io_ctl->cur = page_address(io_ctl->page);
353         io_ctl->orig = io_ctl->cur;
354         io_ctl->size = PAGE_SIZE;
355         if (clear)
356                 clear_page(io_ctl->cur);
357 }
358
359 static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl)
360 {
361         int i;
362
363         io_ctl_unmap_page(io_ctl);
364
365         for (i = 0; i < io_ctl->num_pages; i++) {
366                 if (io_ctl->pages[i]) {
367                         ClearPageChecked(io_ctl->pages[i]);
368                         unlock_page(io_ctl->pages[i]);
369                         put_page(io_ctl->pages[i]);
370                 }
371         }
372 }
373
374 static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, bool uptodate)
375 {
376         struct page *page;
377         struct inode *inode = io_ctl->inode;
378         gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
379         int i;
380
381         for (i = 0; i < io_ctl->num_pages; i++) {
382                 page = find_or_create_page(inode->i_mapping, i, mask);
383                 if (!page) {
384                         io_ctl_drop_pages(io_ctl);
385                         return -ENOMEM;
386                 }
387                 io_ctl->pages[i] = page;
388                 if (uptodate && !PageUptodate(page)) {
389                         btrfs_readpage(NULL, page);
390                         lock_page(page);
391                         if (page->mapping != inode->i_mapping) {
392                                 btrfs_err(BTRFS_I(inode)->root->fs_info,
393                                           "free space cache page truncated");
394                                 io_ctl_drop_pages(io_ctl);
395                                 return -EIO;
396                         }
397                         if (!PageUptodate(page)) {
398                                 btrfs_err(BTRFS_I(inode)->root->fs_info,
399                                            "error reading free space cache");
400                                 io_ctl_drop_pages(io_ctl);
401                                 return -EIO;
402                         }
403                 }
404         }
405
406         for (i = 0; i < io_ctl->num_pages; i++) {
407                 clear_page_dirty_for_io(io_ctl->pages[i]);
408                 set_page_extent_mapped(io_ctl->pages[i]);
409         }
410
411         return 0;
412 }
413
414 static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
415 {
416         __le64 *val;
417
418         io_ctl_map_page(io_ctl, 1);
419
420         /*
421          * Skip the csum areas.  If we don't check crcs then we just have a
422          * 64bit chunk at the front of the first page.
423          */
424         if (io_ctl->check_crcs) {
425                 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages);
426                 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
427         } else {
428                 io_ctl->cur += sizeof(u64);
429                 io_ctl->size -= sizeof(u64) * 2;
430         }
431
432         val = io_ctl->cur;
433         *val = cpu_to_le64(generation);
434         io_ctl->cur += sizeof(u64);
435 }
436
437 static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
438 {
439         __le64 *gen;
440
441         /*
442          * Skip the crc area.  If we don't check crcs then we just have a 64bit
443          * chunk at the front of the first page.
444          */
445         if (io_ctl->check_crcs) {
446                 io_ctl->cur += sizeof(u32) * io_ctl->num_pages;
447                 io_ctl->size -= sizeof(u64) +
448                         (sizeof(u32) * io_ctl->num_pages);
449         } else {
450                 io_ctl->cur += sizeof(u64);
451                 io_ctl->size -= sizeof(u64) * 2;
452         }
453
454         gen = io_ctl->cur;
455         if (le64_to_cpu(*gen) != generation) {
456                 btrfs_err_rl(io_ctl->fs_info,
457                         "space cache generation (%llu) does not match inode (%llu)",
458                                 *gen, generation);
459                 io_ctl_unmap_page(io_ctl);
460                 return -EIO;
461         }
462         io_ctl->cur += sizeof(u64);
463         return 0;
464 }
465
466 static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index)
467 {
468         u32 *tmp;
469         u32 crc = ~(u32)0;
470         unsigned offset = 0;
471
472         if (!io_ctl->check_crcs) {
473                 io_ctl_unmap_page(io_ctl);
474                 return;
475         }
476
477         if (index == 0)
478                 offset = sizeof(u32) * io_ctl->num_pages;
479
480         crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset);
481         btrfs_crc32c_final(crc, (u8 *)&crc);
482         io_ctl_unmap_page(io_ctl);
483         tmp = page_address(io_ctl->pages[0]);
484         tmp += index;
485         *tmp = crc;
486 }
487
488 static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index)
489 {
490         u32 *tmp, val;
491         u32 crc = ~(u32)0;
492         unsigned offset = 0;
493
494         if (!io_ctl->check_crcs) {
495                 io_ctl_map_page(io_ctl, 0);
496                 return 0;
497         }
498
499         if (index == 0)
500                 offset = sizeof(u32) * io_ctl->num_pages;
501
502         tmp = page_address(io_ctl->pages[0]);
503         tmp += index;
504         val = *tmp;
505
506         io_ctl_map_page(io_ctl, 0);
507         crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset);
508         btrfs_crc32c_final(crc, (u8 *)&crc);
509         if (val != crc) {
510                 btrfs_err_rl(io_ctl->fs_info,
511                         "csum mismatch on free space cache");
512                 io_ctl_unmap_page(io_ctl);
513                 return -EIO;
514         }
515
516         return 0;
517 }
518
519 static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes,
520                             void *bitmap)
521 {
522         struct btrfs_free_space_entry *entry;
523
524         if (!io_ctl->cur)
525                 return -ENOSPC;
526
527         entry = io_ctl->cur;
528         entry->offset = cpu_to_le64(offset);
529         entry->bytes = cpu_to_le64(bytes);
530         entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP :
531                 BTRFS_FREE_SPACE_EXTENT;
532         io_ctl->cur += sizeof(struct btrfs_free_space_entry);
533         io_ctl->size -= sizeof(struct btrfs_free_space_entry);
534
535         if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
536                 return 0;
537
538         io_ctl_set_crc(io_ctl, io_ctl->index - 1);
539
540         /* No more pages to map */
541         if (io_ctl->index >= io_ctl->num_pages)
542                 return 0;
543
544         /* map the next page */
545         io_ctl_map_page(io_ctl, 1);
546         return 0;
547 }
548
549 static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap)
550 {
551         if (!io_ctl->cur)
552                 return -ENOSPC;
553
554         /*
555          * If we aren't at the start of the current page, unmap this one and
556          * map the next one if there is any left.
557          */
558         if (io_ctl->cur != io_ctl->orig) {
559                 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
560                 if (io_ctl->index >= io_ctl->num_pages)
561                         return -ENOSPC;
562                 io_ctl_map_page(io_ctl, 0);
563         }
564
565         copy_page(io_ctl->cur, bitmap);
566         io_ctl_set_crc(io_ctl, io_ctl->index - 1);
567         if (io_ctl->index < io_ctl->num_pages)
568                 io_ctl_map_page(io_ctl, 0);
569         return 0;
570 }
571
572 static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl)
573 {
574         /*
575          * If we're not on the boundary we know we've modified the page and we
576          * need to crc the page.
577          */
578         if (io_ctl->cur != io_ctl->orig)
579                 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
580         else
581                 io_ctl_unmap_page(io_ctl);
582
583         while (io_ctl->index < io_ctl->num_pages) {
584                 io_ctl_map_page(io_ctl, 1);
585                 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
586         }
587 }
588
589 static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl,
590                             struct btrfs_free_space *entry, u8 *type)
591 {
592         struct btrfs_free_space_entry *e;
593         int ret;
594
595         if (!io_ctl->cur) {
596                 ret = io_ctl_check_crc(io_ctl, io_ctl->index);
597                 if (ret)
598                         return ret;
599         }
600
601         e = io_ctl->cur;
602         entry->offset = le64_to_cpu(e->offset);
603         entry->bytes = le64_to_cpu(e->bytes);
604         *type = e->type;
605         io_ctl->cur += sizeof(struct btrfs_free_space_entry);
606         io_ctl->size -= sizeof(struct btrfs_free_space_entry);
607
608         if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
609                 return 0;
610
611         io_ctl_unmap_page(io_ctl);
612
613         return 0;
614 }
615
616 static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl,
617                               struct btrfs_free_space *entry)
618 {
619         int ret;
620
621         ret = io_ctl_check_crc(io_ctl, io_ctl->index);
622         if (ret)
623                 return ret;
624
625         copy_page(entry->bitmap, io_ctl->cur);
626         io_ctl_unmap_page(io_ctl);
627
628         return 0;
629 }
630
631 /*
632  * Since we attach pinned extents after the fact we can have contiguous sections
633  * of free space that are split up in entries.  This poses a problem with the
634  * tree logging stuff since it could have allocated across what appears to be 2
635  * entries since we would have merged the entries when adding the pinned extents
636  * back to the free space cache.  So run through the space cache that we just
637  * loaded and merge contiguous entries.  This will make the log replay stuff not
638  * blow up and it will make for nicer allocator behavior.
639  */
640 static void merge_space_tree(struct btrfs_free_space_ctl *ctl)
641 {
642         struct btrfs_free_space *e, *prev = NULL;
643         struct rb_node *n;
644
645 again:
646         spin_lock(&ctl->tree_lock);
647         for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
648                 e = rb_entry(n, struct btrfs_free_space, offset_index);
649                 if (!prev)
650                         goto next;
651                 if (e->bitmap || prev->bitmap)
652                         goto next;
653                 if (prev->offset + prev->bytes == e->offset) {
654                         unlink_free_space(ctl, prev);
655                         unlink_free_space(ctl, e);
656                         prev->bytes += e->bytes;
657                         kmem_cache_free(btrfs_free_space_cachep, e);
658                         link_free_space(ctl, prev);
659                         prev = NULL;
660                         spin_unlock(&ctl->tree_lock);
661                         goto again;
662                 }
663 next:
664                 prev = e;
665         }
666         spin_unlock(&ctl->tree_lock);
667 }
668
669 static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
670                                    struct btrfs_free_space_ctl *ctl,
671                                    struct btrfs_path *path, u64 offset)
672 {
673         struct btrfs_fs_info *fs_info = root->fs_info;
674         struct btrfs_free_space_header *header;
675         struct extent_buffer *leaf;
676         struct btrfs_io_ctl io_ctl;
677         struct btrfs_key key;
678         struct btrfs_free_space *e, *n;
679         LIST_HEAD(bitmaps);
680         u64 num_entries;
681         u64 num_bitmaps;
682         u64 generation;
683         u8 type;
684         int ret = 0;
685
686         /* Nothing in the space cache, goodbye */
687         if (!i_size_read(inode))
688                 return 0;
689
690         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
691         key.offset = offset;
692         key.type = 0;
693
694         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
695         if (ret < 0)
696                 return 0;
697         else if (ret > 0) {
698                 btrfs_release_path(path);
699                 return 0;
700         }
701
702         ret = -1;
703
704         leaf = path->nodes[0];
705         header = btrfs_item_ptr(leaf, path->slots[0],
706                                 struct btrfs_free_space_header);
707         num_entries = btrfs_free_space_entries(leaf, header);
708         num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
709         generation = btrfs_free_space_generation(leaf, header);
710         btrfs_release_path(path);
711
712         if (!BTRFS_I(inode)->generation) {
713                 btrfs_info(fs_info,
714                            "the free space cache file (%llu) is invalid, skip it",
715                            offset);
716                 return 0;
717         }
718
719         if (BTRFS_I(inode)->generation != generation) {
720                 btrfs_err(fs_info,
721                           "free space inode generation (%llu) did not match free space cache generation (%llu)",
722                           BTRFS_I(inode)->generation, generation);
723                 return 0;
724         }
725
726         if (!num_entries)
727                 return 0;
728
729         ret = io_ctl_init(&io_ctl, inode, 0);
730         if (ret)
731                 return ret;
732
733         readahead_cache(inode);
734
735         ret = io_ctl_prepare_pages(&io_ctl, true);
736         if (ret)
737                 goto out;
738
739         ret = io_ctl_check_crc(&io_ctl, 0);
740         if (ret)
741                 goto free_cache;
742
743         ret = io_ctl_check_generation(&io_ctl, generation);
744         if (ret)
745                 goto free_cache;
746
747         while (num_entries) {
748                 e = kmem_cache_zalloc(btrfs_free_space_cachep,
749                                       GFP_NOFS);
750                 if (!e)
751                         goto free_cache;
752
753                 ret = io_ctl_read_entry(&io_ctl, e, &type);
754                 if (ret) {
755                         kmem_cache_free(btrfs_free_space_cachep, e);
756                         goto free_cache;
757                 }
758
759                 /*
760                  * Sync discard ensures that the free space cache is always
761                  * trimmed.  So when reading this in, the state should reflect
762                  * that.  We also do this for async as a stop gap for lack of
763                  * persistence.
764                  */
765                 if (btrfs_test_opt(fs_info, DISCARD_SYNC) ||
766                     btrfs_test_opt(fs_info, DISCARD_ASYNC))
767                         e->trim_state = BTRFS_TRIM_STATE_TRIMMED;
768
769                 if (!e->bytes) {
770                         kmem_cache_free(btrfs_free_space_cachep, e);
771                         goto free_cache;
772                 }
773
774                 if (type == BTRFS_FREE_SPACE_EXTENT) {
775                         spin_lock(&ctl->tree_lock);
776                         ret = link_free_space(ctl, e);
777                         spin_unlock(&ctl->tree_lock);
778                         if (ret) {
779                                 btrfs_err(fs_info,
780                                         "Duplicate entries in free space cache, dumping");
781                                 kmem_cache_free(btrfs_free_space_cachep, e);
782                                 goto free_cache;
783                         }
784                 } else {
785                         ASSERT(num_bitmaps);
786                         num_bitmaps--;
787                         e->bitmap = kmem_cache_zalloc(
788                                         btrfs_free_space_bitmap_cachep, GFP_NOFS);
789                         if (!e->bitmap) {
790                                 kmem_cache_free(
791                                         btrfs_free_space_cachep, e);
792                                 goto free_cache;
793                         }
794                         spin_lock(&ctl->tree_lock);
795                         ret = link_free_space(ctl, e);
796                         ctl->total_bitmaps++;
797                         ctl->op->recalc_thresholds(ctl);
798                         spin_unlock(&ctl->tree_lock);
799                         if (ret) {
800                                 btrfs_err(fs_info,
801                                         "Duplicate entries in free space cache, dumping");
802                                 kmem_cache_free(btrfs_free_space_cachep, e);
803                                 goto free_cache;
804                         }
805                         list_add_tail(&e->list, &bitmaps);
806                 }
807
808                 num_entries--;
809         }
810
811         io_ctl_unmap_page(&io_ctl);
812
813         /*
814          * We add the bitmaps at the end of the entries in order that
815          * the bitmap entries are added to the cache.
816          */
817         list_for_each_entry_safe(e, n, &bitmaps, list) {
818                 list_del_init(&e->list);
819                 ret = io_ctl_read_bitmap(&io_ctl, e);
820                 if (ret)
821                         goto free_cache;
822                 e->bitmap_extents = count_bitmap_extents(ctl, e);
823                 if (!btrfs_free_space_trimmed(e)) {
824                         ctl->discardable_extents[BTRFS_STAT_CURR] +=
825                                 e->bitmap_extents;
826                         ctl->discardable_bytes[BTRFS_STAT_CURR] += e->bytes;
827                 }
828         }
829
830         io_ctl_drop_pages(&io_ctl);
831         merge_space_tree(ctl);
832         ret = 1;
833 out:
834         btrfs_discard_update_discardable(ctl->private, ctl);
835         io_ctl_free(&io_ctl);
836         return ret;
837 free_cache:
838         io_ctl_drop_pages(&io_ctl);
839         __btrfs_remove_free_space_cache(ctl);
840         goto out;
841 }
842
843 int load_free_space_cache(struct btrfs_block_group *block_group)
844 {
845         struct btrfs_fs_info *fs_info = block_group->fs_info;
846         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
847         struct inode *inode;
848         struct btrfs_path *path;
849         int ret = 0;
850         bool matched;
851         u64 used = block_group->used;
852
853         /*
854          * If this block group has been marked to be cleared for one reason or
855          * another then we can't trust the on disk cache, so just return.
856          */
857         spin_lock(&block_group->lock);
858         if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
859                 spin_unlock(&block_group->lock);
860                 return 0;
861         }
862         spin_unlock(&block_group->lock);
863
864         path = btrfs_alloc_path();
865         if (!path)
866                 return 0;
867         path->search_commit_root = 1;
868         path->skip_locking = 1;
869
870         /*
871          * We must pass a path with search_commit_root set to btrfs_iget in
872          * order to avoid a deadlock when allocating extents for the tree root.
873          *
874          * When we are COWing an extent buffer from the tree root, when looking
875          * for a free extent, at extent-tree.c:find_free_extent(), we can find
876          * block group without its free space cache loaded. When we find one
877          * we must load its space cache which requires reading its free space
878          * cache's inode item from the root tree. If this inode item is located
879          * in the same leaf that we started COWing before, then we end up in
880          * deadlock on the extent buffer (trying to read lock it when we
881          * previously write locked it).
882          *
883          * It's safe to read the inode item using the commit root because
884          * block groups, once loaded, stay in memory forever (until they are
885          * removed) as well as their space caches once loaded. New block groups
886          * once created get their ->cached field set to BTRFS_CACHE_FINISHED so
887          * we will never try to read their inode item while the fs is mounted.
888          */
889         inode = lookup_free_space_inode(block_group, path);
890         if (IS_ERR(inode)) {
891                 btrfs_free_path(path);
892                 return 0;
893         }
894
895         /* We may have converted the inode and made the cache invalid. */
896         spin_lock(&block_group->lock);
897         if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
898                 spin_unlock(&block_group->lock);
899                 btrfs_free_path(path);
900                 goto out;
901         }
902         spin_unlock(&block_group->lock);
903
904         ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
905                                       path, block_group->start);
906         btrfs_free_path(path);
907         if (ret <= 0)
908                 goto out;
909
910         spin_lock(&ctl->tree_lock);
911         matched = (ctl->free_space == (block_group->length - used -
912                                        block_group->bytes_super));
913         spin_unlock(&ctl->tree_lock);
914
915         if (!matched) {
916                 __btrfs_remove_free_space_cache(ctl);
917                 btrfs_warn(fs_info,
918                            "block group %llu has wrong amount of free space",
919                            block_group->start);
920                 ret = -1;
921         }
922 out:
923         if (ret < 0) {
924                 /* This cache is bogus, make sure it gets cleared */
925                 spin_lock(&block_group->lock);
926                 block_group->disk_cache_state = BTRFS_DC_CLEAR;
927                 spin_unlock(&block_group->lock);
928                 ret = 0;
929
930                 btrfs_warn(fs_info,
931                            "failed to load free space cache for block group %llu, rebuilding it now",
932                            block_group->start);
933         }
934
935         iput(inode);
936         return ret;
937 }
938
939 static noinline_for_stack
940 int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl,
941                               struct btrfs_free_space_ctl *ctl,
942                               struct btrfs_block_group *block_group,
943                               int *entries, int *bitmaps,
944                               struct list_head *bitmap_list)
945 {
946         int ret;
947         struct btrfs_free_cluster *cluster = NULL;
948         struct btrfs_free_cluster *cluster_locked = NULL;
949         struct rb_node *node = rb_first(&ctl->free_space_offset);
950         struct btrfs_trim_range *trim_entry;
951
952         /* Get the cluster for this block_group if it exists */
953         if (block_group && !list_empty(&block_group->cluster_list)) {
954                 cluster = list_entry(block_group->cluster_list.next,
955                                      struct btrfs_free_cluster,
956                                      block_group_list);
957         }
958
959         if (!node && cluster) {
960                 cluster_locked = cluster;
961                 spin_lock(&cluster_locked->lock);
962                 node = rb_first(&cluster->root);
963                 cluster = NULL;
964         }
965
966         /* Write out the extent entries */
967         while (node) {
968                 struct btrfs_free_space *e;
969
970                 e = rb_entry(node, struct btrfs_free_space, offset_index);
971                 *entries += 1;
972
973                 ret = io_ctl_add_entry(io_ctl, e->offset, e->bytes,
974                                        e->bitmap);
975                 if (ret)
976                         goto fail;
977
978                 if (e->bitmap) {
979                         list_add_tail(&e->list, bitmap_list);
980                         *bitmaps += 1;
981                 }
982                 node = rb_next(node);
983                 if (!node && cluster) {
984                         node = rb_first(&cluster->root);
985                         cluster_locked = cluster;
986                         spin_lock(&cluster_locked->lock);
987                         cluster = NULL;
988                 }
989         }
990         if (cluster_locked) {
991                 spin_unlock(&cluster_locked->lock);
992                 cluster_locked = NULL;
993         }
994
995         /*
996          * Make sure we don't miss any range that was removed from our rbtree
997          * because trimming is running. Otherwise after a umount+mount (or crash
998          * after committing the transaction) we would leak free space and get
999          * an inconsistent free space cache report from fsck.
1000          */
1001         list_for_each_entry(trim_entry, &ctl->trimming_ranges, list) {
1002                 ret = io_ctl_add_entry(io_ctl, trim_entry->start,
1003                                        trim_entry->bytes, NULL);
1004                 if (ret)
1005                         goto fail;
1006                 *entries += 1;
1007         }
1008
1009         return 0;
1010 fail:
1011         if (cluster_locked)
1012                 spin_unlock(&cluster_locked->lock);
1013         return -ENOSPC;
1014 }
1015
1016 static noinline_for_stack int
1017 update_cache_item(struct btrfs_trans_handle *trans,
1018                   struct btrfs_root *root,
1019                   struct inode *inode,
1020                   struct btrfs_path *path, u64 offset,
1021                   int entries, int bitmaps)
1022 {
1023         struct btrfs_key key;
1024         struct btrfs_free_space_header *header;
1025         struct extent_buffer *leaf;
1026         int ret;
1027
1028         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
1029         key.offset = offset;
1030         key.type = 0;
1031
1032         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1033         if (ret < 0) {
1034                 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
1035                                  EXTENT_DELALLOC, 0, 0, NULL);
1036                 goto fail;
1037         }
1038         leaf = path->nodes[0];
1039         if (ret > 0) {
1040                 struct btrfs_key found_key;
1041                 ASSERT(path->slots[0]);
1042                 path->slots[0]--;
1043                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1044                 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
1045                     found_key.offset != offset) {
1046                         clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
1047                                          inode->i_size - 1, EXTENT_DELALLOC, 0,
1048                                          0, NULL);
1049                         btrfs_release_path(path);
1050                         goto fail;
1051                 }
1052         }
1053
1054         BTRFS_I(inode)->generation = trans->transid;
1055         header = btrfs_item_ptr(leaf, path->slots[0],
1056                                 struct btrfs_free_space_header);
1057         btrfs_set_free_space_entries(leaf, header, entries);
1058         btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
1059         btrfs_set_free_space_generation(leaf, header, trans->transid);
1060         btrfs_mark_buffer_dirty(leaf);
1061         btrfs_release_path(path);
1062
1063         return 0;
1064
1065 fail:
1066         return -1;
1067 }
1068
1069 static noinline_for_stack int write_pinned_extent_entries(
1070                             struct btrfs_trans_handle *trans,
1071                             struct btrfs_block_group *block_group,
1072                             struct btrfs_io_ctl *io_ctl,
1073                             int *entries)
1074 {
1075         u64 start, extent_start, extent_end, len;
1076         struct extent_io_tree *unpin = NULL;
1077         int ret;
1078
1079         if (!block_group)
1080                 return 0;
1081
1082         /*
1083          * We want to add any pinned extents to our free space cache
1084          * so we don't leak the space
1085          *
1086          * We shouldn't have switched the pinned extents yet so this is the
1087          * right one
1088          */
1089         unpin = &trans->transaction->pinned_extents;
1090
1091         start = block_group->start;
1092
1093         while (start < block_group->start + block_group->length) {
1094                 ret = find_first_extent_bit(unpin, start,
1095                                             &extent_start, &extent_end,
1096                                             EXTENT_DIRTY, NULL);
1097                 if (ret)
1098                         return 0;
1099
1100                 /* This pinned extent is out of our range */
1101                 if (extent_start >= block_group->start + block_group->length)
1102                         return 0;
1103
1104                 extent_start = max(extent_start, start);
1105                 extent_end = min(block_group->start + block_group->length,
1106                                  extent_end + 1);
1107                 len = extent_end - extent_start;
1108
1109                 *entries += 1;
1110                 ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL);
1111                 if (ret)
1112                         return -ENOSPC;
1113
1114                 start = extent_end;
1115         }
1116
1117         return 0;
1118 }
1119
1120 static noinline_for_stack int
1121 write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list)
1122 {
1123         struct btrfs_free_space *entry, *next;
1124         int ret;
1125
1126         /* Write out the bitmaps */
1127         list_for_each_entry_safe(entry, next, bitmap_list, list) {
1128                 ret = io_ctl_add_bitmap(io_ctl, entry->bitmap);
1129                 if (ret)
1130                         return -ENOSPC;
1131                 list_del_init(&entry->list);
1132         }
1133
1134         return 0;
1135 }
1136
1137 static int flush_dirty_cache(struct inode *inode)
1138 {
1139         int ret;
1140
1141         ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
1142         if (ret)
1143                 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
1144                                  EXTENT_DELALLOC, 0, 0, NULL);
1145
1146         return ret;
1147 }
1148
1149 static void noinline_for_stack
1150 cleanup_bitmap_list(struct list_head *bitmap_list)
1151 {
1152         struct btrfs_free_space *entry, *next;
1153
1154         list_for_each_entry_safe(entry, next, bitmap_list, list)
1155                 list_del_init(&entry->list);
1156 }
1157
1158 static void noinline_for_stack
1159 cleanup_write_cache_enospc(struct inode *inode,
1160                            struct btrfs_io_ctl *io_ctl,
1161                            struct extent_state **cached_state)
1162 {
1163         io_ctl_drop_pages(io_ctl);
1164         unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
1165                              i_size_read(inode) - 1, cached_state);
1166 }
1167
1168 static int __btrfs_wait_cache_io(struct btrfs_root *root,
1169                                  struct btrfs_trans_handle *trans,
1170                                  struct btrfs_block_group *block_group,
1171                                  struct btrfs_io_ctl *io_ctl,
1172                                  struct btrfs_path *path, u64 offset)
1173 {
1174         int ret;
1175         struct inode *inode = io_ctl->inode;
1176
1177         if (!inode)
1178                 return 0;
1179
1180         /* Flush the dirty pages in the cache file. */
1181         ret = flush_dirty_cache(inode);
1182         if (ret)
1183                 goto out;
1184
1185         /* Update the cache item to tell everyone this cache file is valid. */
1186         ret = update_cache_item(trans, root, inode, path, offset,
1187                                 io_ctl->entries, io_ctl->bitmaps);
1188 out:
1189         io_ctl_free(io_ctl);
1190         if (ret) {
1191                 invalidate_inode_pages2(inode->i_mapping);
1192                 BTRFS_I(inode)->generation = 0;
1193                 if (block_group)
1194                         btrfs_debug(root->fs_info,
1195           "failed to write free space cache for block group %llu error %d",
1196                                   block_group->start, ret);
1197         }
1198         btrfs_update_inode(trans, root, inode);
1199
1200         if (block_group) {
1201                 /* the dirty list is protected by the dirty_bgs_lock */
1202                 spin_lock(&trans->transaction->dirty_bgs_lock);
1203
1204                 /* the disk_cache_state is protected by the block group lock */
1205                 spin_lock(&block_group->lock);
1206
1207                 /*
1208                  * only mark this as written if we didn't get put back on
1209                  * the dirty list while waiting for IO.   Otherwise our
1210                  * cache state won't be right, and we won't get written again
1211                  */
1212                 if (!ret && list_empty(&block_group->dirty_list))
1213                         block_group->disk_cache_state = BTRFS_DC_WRITTEN;
1214                 else if (ret)
1215                         block_group->disk_cache_state = BTRFS_DC_ERROR;
1216
1217                 spin_unlock(&block_group->lock);
1218                 spin_unlock(&trans->transaction->dirty_bgs_lock);
1219                 io_ctl->inode = NULL;
1220                 iput(inode);
1221         }
1222
1223         return ret;
1224
1225 }
1226
1227 static int btrfs_wait_cache_io_root(struct btrfs_root *root,
1228                                     struct btrfs_trans_handle *trans,
1229                                     struct btrfs_io_ctl *io_ctl,
1230                                     struct btrfs_path *path)
1231 {
1232         return __btrfs_wait_cache_io(root, trans, NULL, io_ctl, path, 0);
1233 }
1234
1235 int btrfs_wait_cache_io(struct btrfs_trans_handle *trans,
1236                         struct btrfs_block_group *block_group,
1237                         struct btrfs_path *path)
1238 {
1239         return __btrfs_wait_cache_io(block_group->fs_info->tree_root, trans,
1240                                      block_group, &block_group->io_ctl,
1241                                      path, block_group->start);
1242 }
1243
1244 /**
1245  * __btrfs_write_out_cache - write out cached info to an inode
1246  * @root - the root the inode belongs to
1247  * @ctl - the free space cache we are going to write out
1248  * @block_group - the block_group for this cache if it belongs to a block_group
1249  * @trans - the trans handle
1250  *
1251  * This function writes out a free space cache struct to disk for quick recovery
1252  * on mount.  This will return 0 if it was successful in writing the cache out,
1253  * or an errno if it was not.
1254  */
1255 static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1256                                    struct btrfs_free_space_ctl *ctl,
1257                                    struct btrfs_block_group *block_group,
1258                                    struct btrfs_io_ctl *io_ctl,
1259                                    struct btrfs_trans_handle *trans)
1260 {
1261         struct extent_state *cached_state = NULL;
1262         LIST_HEAD(bitmap_list);
1263         int entries = 0;
1264         int bitmaps = 0;
1265         int ret;
1266         int must_iput = 0;
1267
1268         if (!i_size_read(inode))
1269                 return -EIO;
1270
1271         WARN_ON(io_ctl->pages);
1272         ret = io_ctl_init(io_ctl, inode, 1);
1273         if (ret)
1274                 return ret;
1275
1276         if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) {
1277                 down_write(&block_group->data_rwsem);
1278                 spin_lock(&block_group->lock);
1279                 if (block_group->delalloc_bytes) {
1280                         block_group->disk_cache_state = BTRFS_DC_WRITTEN;
1281                         spin_unlock(&block_group->lock);
1282                         up_write(&block_group->data_rwsem);
1283                         BTRFS_I(inode)->generation = 0;
1284                         ret = 0;
1285                         must_iput = 1;
1286                         goto out;
1287                 }
1288                 spin_unlock(&block_group->lock);
1289         }
1290
1291         /* Lock all pages first so we can lock the extent safely. */
1292         ret = io_ctl_prepare_pages(io_ctl, false);
1293         if (ret)
1294                 goto out_unlock;
1295
1296         lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
1297                          &cached_state);
1298
1299         io_ctl_set_generation(io_ctl, trans->transid);
1300
1301         mutex_lock(&ctl->cache_writeout_mutex);
1302         /* Write out the extent entries in the free space cache */
1303         spin_lock(&ctl->tree_lock);
1304         ret = write_cache_extent_entries(io_ctl, ctl,
1305                                          block_group, &entries, &bitmaps,
1306                                          &bitmap_list);
1307         if (ret)
1308                 goto out_nospc_locked;
1309
1310         /*
1311          * Some spaces that are freed in the current transaction are pinned,
1312          * they will be added into free space cache after the transaction is
1313          * committed, we shouldn't lose them.
1314          *
1315          * If this changes while we are working we'll get added back to
1316          * the dirty list and redo it.  No locking needed
1317          */
1318         ret = write_pinned_extent_entries(trans, block_group, io_ctl, &entries);
1319         if (ret)
1320                 goto out_nospc_locked;
1321
1322         /*
1323          * At last, we write out all the bitmaps and keep cache_writeout_mutex
1324          * locked while doing it because a concurrent trim can be manipulating
1325          * or freeing the bitmap.
1326          */
1327         ret = write_bitmap_entries(io_ctl, &bitmap_list);
1328         spin_unlock(&ctl->tree_lock);
1329         mutex_unlock(&ctl->cache_writeout_mutex);
1330         if (ret)
1331                 goto out_nospc;
1332
1333         /* Zero out the rest of the pages just to make sure */
1334         io_ctl_zero_remaining_pages(io_ctl);
1335
1336         /* Everything is written out, now we dirty the pages in the file. */
1337         ret = btrfs_dirty_pages(BTRFS_I(inode), io_ctl->pages,
1338                                 io_ctl->num_pages, 0, i_size_read(inode),
1339                                 &cached_state);
1340         if (ret)
1341                 goto out_nospc;
1342
1343         if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
1344                 up_write(&block_group->data_rwsem);
1345         /*
1346          * Release the pages and unlock the extent, we will flush
1347          * them out later
1348          */
1349         io_ctl_drop_pages(io_ctl);
1350
1351         unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
1352                              i_size_read(inode) - 1, &cached_state);
1353
1354         /*
1355          * at this point the pages are under IO and we're happy,
1356          * The caller is responsible for waiting on them and updating the
1357          * the cache and the inode
1358          */
1359         io_ctl->entries = entries;
1360         io_ctl->bitmaps = bitmaps;
1361
1362         ret = btrfs_fdatawrite_range(inode, 0, (u64)-1);
1363         if (ret)
1364                 goto out;
1365
1366         return 0;
1367
1368 out_nospc_locked:
1369         cleanup_bitmap_list(&bitmap_list);
1370         spin_unlock(&ctl->tree_lock);
1371         mutex_unlock(&ctl->cache_writeout_mutex);
1372
1373 out_nospc:
1374         cleanup_write_cache_enospc(inode, io_ctl, &cached_state);
1375
1376 out_unlock:
1377         if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
1378                 up_write(&block_group->data_rwsem);
1379
1380 out:
1381         io_ctl->inode = NULL;
1382         io_ctl_free(io_ctl);
1383         if (ret) {
1384                 invalidate_inode_pages2(inode->i_mapping);
1385                 BTRFS_I(inode)->generation = 0;
1386         }
1387         btrfs_update_inode(trans, root, inode);
1388         if (must_iput)
1389                 iput(inode);
1390         return ret;
1391 }
1392
1393 int btrfs_write_out_cache(struct btrfs_trans_handle *trans,
1394                           struct btrfs_block_group *block_group,
1395                           struct btrfs_path *path)
1396 {
1397         struct btrfs_fs_info *fs_info = trans->fs_info;
1398         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1399         struct inode *inode;
1400         int ret = 0;
1401
1402         spin_lock(&block_group->lock);
1403         if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
1404                 spin_unlock(&block_group->lock);
1405                 return 0;
1406         }
1407         spin_unlock(&block_group->lock);
1408
1409         inode = lookup_free_space_inode(block_group, path);
1410         if (IS_ERR(inode))
1411                 return 0;
1412
1413         ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl,
1414                                 block_group, &block_group->io_ctl, trans);
1415         if (ret) {
1416                 btrfs_debug(fs_info,
1417           "failed to write free space cache for block group %llu error %d",
1418                           block_group->start, ret);
1419                 spin_lock(&block_group->lock);
1420                 block_group->disk_cache_state = BTRFS_DC_ERROR;
1421                 spin_unlock(&block_group->lock);
1422
1423                 block_group->io_ctl.inode = NULL;
1424                 iput(inode);
1425         }
1426
1427         /*
1428          * if ret == 0 the caller is expected to call btrfs_wait_cache_io
1429          * to wait for IO and put the inode
1430          */
1431
1432         return ret;
1433 }
1434
1435 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
1436                                           u64 offset)
1437 {
1438         ASSERT(offset >= bitmap_start);
1439         offset -= bitmap_start;
1440         return (unsigned long)(div_u64(offset, unit));
1441 }
1442
1443 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
1444 {
1445         return (unsigned long)(div_u64(bytes, unit));
1446 }
1447
1448 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
1449                                    u64 offset)
1450 {
1451         u64 bitmap_start;
1452         u64 bytes_per_bitmap;
1453
1454         bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
1455         bitmap_start = offset - ctl->start;
1456         bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
1457         bitmap_start *= bytes_per_bitmap;
1458         bitmap_start += ctl->start;
1459
1460         return bitmap_start;
1461 }
1462
1463 static int tree_insert_offset(struct rb_root *root, u64 offset,
1464                               struct rb_node *node, int bitmap)
1465 {
1466         struct rb_node **p = &root->rb_node;
1467         struct rb_node *parent = NULL;
1468         struct btrfs_free_space *info;
1469
1470         while (*p) {
1471                 parent = *p;
1472                 info = rb_entry(parent, struct btrfs_free_space, offset_index);
1473
1474                 if (offset < info->offset) {
1475                         p = &(*p)->rb_left;
1476                 } else if (offset > info->offset) {
1477                         p = &(*p)->rb_right;
1478                 } else {
1479                         /*
1480                          * we could have a bitmap entry and an extent entry
1481                          * share the same offset.  If this is the case, we want
1482                          * the extent entry to always be found first if we do a
1483                          * linear search through the tree, since we want to have
1484                          * the quickest allocation time, and allocating from an
1485                          * extent is faster than allocating from a bitmap.  So
1486                          * if we're inserting a bitmap and we find an entry at
1487                          * this offset, we want to go right, or after this entry
1488                          * logically.  If we are inserting an extent and we've
1489                          * found a bitmap, we want to go left, or before
1490                          * logically.
1491                          */
1492                         if (bitmap) {
1493                                 if (info->bitmap) {
1494                                         WARN_ON_ONCE(1);
1495                                         return -EEXIST;
1496                                 }
1497                                 p = &(*p)->rb_right;
1498                         } else {
1499                                 if (!info->bitmap) {
1500                                         WARN_ON_ONCE(1);
1501                                         return -EEXIST;
1502                                 }
1503                                 p = &(*p)->rb_left;
1504                         }
1505                 }
1506         }
1507
1508         rb_link_node(node, parent, p);
1509         rb_insert_color(node, root);
1510
1511         return 0;
1512 }
1513
1514 /*
1515  * searches the tree for the given offset.
1516  *
1517  * fuzzy - If this is set, then we are trying to make an allocation, and we just
1518  * want a section that has at least bytes size and comes at or after the given
1519  * offset.
1520  */
1521 static struct btrfs_free_space *
1522 tree_search_offset(struct btrfs_free_space_ctl *ctl,
1523                    u64 offset, int bitmap_only, int fuzzy)
1524 {
1525         struct rb_node *n = ctl->free_space_offset.rb_node;
1526         struct btrfs_free_space *entry, *prev = NULL;
1527
1528         /* find entry that is closest to the 'offset' */
1529         while (1) {
1530                 if (!n) {
1531                         entry = NULL;
1532                         break;
1533                 }
1534
1535                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1536                 prev = entry;
1537
1538                 if (offset < entry->offset)
1539                         n = n->rb_left;
1540                 else if (offset > entry->offset)
1541                         n = n->rb_right;
1542                 else
1543                         break;
1544         }
1545
1546         if (bitmap_only) {
1547                 if (!entry)
1548                         return NULL;
1549                 if (entry->bitmap)
1550                         return entry;
1551
1552                 /*
1553                  * bitmap entry and extent entry may share same offset,
1554                  * in that case, bitmap entry comes after extent entry.
1555                  */
1556                 n = rb_next(n);
1557                 if (!n)
1558                         return NULL;
1559                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1560                 if (entry->offset != offset)
1561                         return NULL;
1562
1563                 WARN_ON(!entry->bitmap);
1564                 return entry;
1565         } else if (entry) {
1566                 if (entry->bitmap) {
1567                         /*
1568                          * if previous extent entry covers the offset,
1569                          * we should return it instead of the bitmap entry
1570                          */
1571                         n = rb_prev(&entry->offset_index);
1572                         if (n) {
1573                                 prev = rb_entry(n, struct btrfs_free_space,
1574                                                 offset_index);
1575                                 if (!prev->bitmap &&
1576                                     prev->offset + prev->bytes > offset)
1577                                         entry = prev;
1578                         }
1579                 }
1580                 return entry;
1581         }
1582
1583         if (!prev)
1584                 return NULL;
1585
1586         /* find last entry before the 'offset' */
1587         entry = prev;
1588         if (entry->offset > offset) {
1589                 n = rb_prev(&entry->offset_index);
1590                 if (n) {
1591                         entry = rb_entry(n, struct btrfs_free_space,
1592                                         offset_index);
1593                         ASSERT(entry->offset <= offset);
1594                 } else {
1595                         if (fuzzy)
1596                                 return entry;
1597                         else
1598                                 return NULL;
1599                 }
1600         }
1601
1602         if (entry->bitmap) {
1603                 n = rb_prev(&entry->offset_index);
1604                 if (n) {
1605                         prev = rb_entry(n, struct btrfs_free_space,
1606                                         offset_index);
1607                         if (!prev->bitmap &&
1608                             prev->offset + prev->bytes > offset)
1609                                 return prev;
1610                 }
1611                 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
1612                         return entry;
1613         } else if (entry->offset + entry->bytes > offset)
1614                 return entry;
1615
1616         if (!fuzzy)
1617                 return NULL;
1618
1619         while (1) {
1620                 if (entry->bitmap) {
1621                         if (entry->offset + BITS_PER_BITMAP *
1622                             ctl->unit > offset)
1623                                 break;
1624                 } else {
1625                         if (entry->offset + entry->bytes > offset)
1626                                 break;
1627                 }
1628
1629                 n = rb_next(&entry->offset_index);
1630                 if (!n)
1631                         return NULL;
1632                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1633         }
1634         return entry;
1635 }
1636
1637 static inline void
1638 __unlink_free_space(struct btrfs_free_space_ctl *ctl,
1639                     struct btrfs_free_space *info)
1640 {
1641         rb_erase(&info->offset_index, &ctl->free_space_offset);
1642         ctl->free_extents--;
1643
1644         if (!info->bitmap && !btrfs_free_space_trimmed(info)) {
1645                 ctl->discardable_extents[BTRFS_STAT_CURR]--;
1646                 ctl->discardable_bytes[BTRFS_STAT_CURR] -= info->bytes;
1647         }
1648 }
1649
1650 static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
1651                               struct btrfs_free_space *info)
1652 {
1653         __unlink_free_space(ctl, info);
1654         ctl->free_space -= info->bytes;
1655 }
1656
1657 static int link_free_space(struct btrfs_free_space_ctl *ctl,
1658                            struct btrfs_free_space *info)
1659 {
1660         int ret = 0;
1661
1662         ASSERT(info->bytes || info->bitmap);
1663         ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
1664                                  &info->offset_index, (info->bitmap != NULL));
1665         if (ret)
1666                 return ret;
1667
1668         if (!info->bitmap && !btrfs_free_space_trimmed(info)) {
1669                 ctl->discardable_extents[BTRFS_STAT_CURR]++;
1670                 ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes;
1671         }
1672
1673         ctl->free_space += info->bytes;
1674         ctl->free_extents++;
1675         return ret;
1676 }
1677
1678 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
1679 {
1680         struct btrfs_block_group *block_group = ctl->private;
1681         u64 max_bytes;
1682         u64 bitmap_bytes;
1683         u64 extent_bytes;
1684         u64 size = block_group->length;
1685         u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
1686         u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
1687
1688         max_bitmaps = max_t(u64, max_bitmaps, 1);
1689
1690         ASSERT(ctl->total_bitmaps <= max_bitmaps);
1691
1692         /*
1693          * We are trying to keep the total amount of memory used per 1GiB of
1694          * space to be MAX_CACHE_BYTES_PER_GIG.  However, with a reclamation
1695          * mechanism of pulling extents >= FORCE_EXTENT_THRESHOLD out of
1696          * bitmaps, we may end up using more memory than this.
1697          */
1698         if (size < SZ_1G)
1699                 max_bytes = MAX_CACHE_BYTES_PER_GIG;
1700         else
1701                 max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G);
1702
1703         bitmap_bytes = ctl->total_bitmaps * ctl->unit;
1704
1705         /*
1706          * we want the extent entry threshold to always be at most 1/2 the max
1707          * bytes we can have, or whatever is less than that.
1708          */
1709         extent_bytes = max_bytes - bitmap_bytes;
1710         extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1);
1711
1712         ctl->extents_thresh =
1713                 div_u64(extent_bytes, sizeof(struct btrfs_free_space));
1714 }
1715
1716 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1717                                        struct btrfs_free_space *info,
1718                                        u64 offset, u64 bytes)
1719 {
1720         unsigned long start, count, end;
1721         int extent_delta = -1;
1722
1723         start = offset_to_bit(info->offset, ctl->unit, offset);
1724         count = bytes_to_bits(bytes, ctl->unit);
1725         end = start + count;
1726         ASSERT(end <= BITS_PER_BITMAP);
1727
1728         bitmap_clear(info->bitmap, start, count);
1729
1730         info->bytes -= bytes;
1731         if (info->max_extent_size > ctl->unit)
1732                 info->max_extent_size = 0;
1733
1734         if (start && test_bit(start - 1, info->bitmap))
1735                 extent_delta++;
1736
1737         if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap))
1738                 extent_delta++;
1739
1740         info->bitmap_extents += extent_delta;
1741         if (!btrfs_free_space_trimmed(info)) {
1742                 ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta;
1743                 ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes;
1744         }
1745 }
1746
1747 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1748                               struct btrfs_free_space *info, u64 offset,
1749                               u64 bytes)
1750 {
1751         __bitmap_clear_bits(ctl, info, offset, bytes);
1752         ctl->free_space -= bytes;
1753 }
1754
1755 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
1756                             struct btrfs_free_space *info, u64 offset,
1757                             u64 bytes)
1758 {
1759         unsigned long start, count, end;
1760         int extent_delta = 1;
1761
1762         start = offset_to_bit(info->offset, ctl->unit, offset);
1763         count = bytes_to_bits(bytes, ctl->unit);
1764         end = start + count;
1765         ASSERT(end <= BITS_PER_BITMAP);
1766
1767         bitmap_set(info->bitmap, start, count);
1768
1769         info->bytes += bytes;
1770         ctl->free_space += bytes;
1771
1772         if (start && test_bit(start - 1, info->bitmap))
1773                 extent_delta--;
1774
1775         if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap))
1776                 extent_delta--;
1777
1778         info->bitmap_extents += extent_delta;
1779         if (!btrfs_free_space_trimmed(info)) {
1780                 ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta;
1781                 ctl->discardable_bytes[BTRFS_STAT_CURR] += bytes;
1782         }
1783 }
1784
1785 /*
1786  * If we can not find suitable extent, we will use bytes to record
1787  * the size of the max extent.
1788  */
1789 static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1790                          struct btrfs_free_space *bitmap_info, u64 *offset,
1791                          u64 *bytes, bool for_alloc)
1792 {
1793         unsigned long found_bits = 0;
1794         unsigned long max_bits = 0;
1795         unsigned long bits, i;
1796         unsigned long next_zero;
1797         unsigned long extent_bits;
1798
1799         /*
1800          * Skip searching the bitmap if we don't have a contiguous section that
1801          * is large enough for this allocation.
1802          */
1803         if (for_alloc &&
1804             bitmap_info->max_extent_size &&
1805             bitmap_info->max_extent_size < *bytes) {
1806                 *bytes = bitmap_info->max_extent_size;
1807                 return -1;
1808         }
1809
1810         i = offset_to_bit(bitmap_info->offset, ctl->unit,
1811                           max_t(u64, *offset, bitmap_info->offset));
1812         bits = bytes_to_bits(*bytes, ctl->unit);
1813
1814         for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) {
1815                 if (for_alloc && bits == 1) {
1816                         found_bits = 1;
1817                         break;
1818                 }
1819                 next_zero = find_next_zero_bit(bitmap_info->bitmap,
1820                                                BITS_PER_BITMAP, i);
1821                 extent_bits = next_zero - i;
1822                 if (extent_bits >= bits) {
1823                         found_bits = extent_bits;
1824                         break;
1825                 } else if (extent_bits > max_bits) {
1826                         max_bits = extent_bits;
1827                 }
1828                 i = next_zero;
1829         }
1830
1831         if (found_bits) {
1832                 *offset = (u64)(i * ctl->unit) + bitmap_info->offset;
1833                 *bytes = (u64)(found_bits) * ctl->unit;
1834                 return 0;
1835         }
1836
1837         *bytes = (u64)(max_bits) * ctl->unit;
1838         bitmap_info->max_extent_size = *bytes;
1839         return -1;
1840 }
1841
1842 static inline u64 get_max_extent_size(struct btrfs_free_space *entry)
1843 {
1844         if (entry->bitmap)
1845                 return entry->max_extent_size;
1846         return entry->bytes;
1847 }
1848
1849 /* Cache the size of the max extent in bytes */
1850 static struct btrfs_free_space *
1851 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
1852                 unsigned long align, u64 *max_extent_size)
1853 {
1854         struct btrfs_free_space *entry;
1855         struct rb_node *node;
1856         u64 tmp;
1857         u64 align_off;
1858         int ret;
1859
1860         if (!ctl->free_space_offset.rb_node)
1861                 goto out;
1862
1863         entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
1864         if (!entry)
1865                 goto out;
1866
1867         for (node = &entry->offset_index; node; node = rb_next(node)) {
1868                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1869                 if (entry->bytes < *bytes) {
1870                         *max_extent_size = max(get_max_extent_size(entry),
1871                                                *max_extent_size);
1872                         continue;
1873                 }
1874
1875                 /* make sure the space returned is big enough
1876                  * to match our requested alignment
1877                  */
1878                 if (*bytes >= align) {
1879                         tmp = entry->offset - ctl->start + align - 1;
1880                         tmp = div64_u64(tmp, align);
1881                         tmp = tmp * align + ctl->start;
1882                         align_off = tmp - entry->offset;
1883                 } else {
1884                         align_off = 0;
1885                         tmp = entry->offset;
1886                 }
1887
1888                 if (entry->bytes < *bytes + align_off) {
1889                         *max_extent_size = max(get_max_extent_size(entry),
1890                                                *max_extent_size);
1891                         continue;
1892                 }
1893
1894                 if (entry->bitmap) {
1895                         u64 size = *bytes;
1896
1897                         ret = search_bitmap(ctl, entry, &tmp, &size, true);
1898                         if (!ret) {
1899                                 *offset = tmp;
1900                                 *bytes = size;
1901                                 return entry;
1902                         } else {
1903                                 *max_extent_size =
1904                                         max(get_max_extent_size(entry),
1905                                             *max_extent_size);
1906                         }
1907                         continue;
1908                 }
1909
1910                 *offset = tmp;
1911                 *bytes = entry->bytes - align_off;
1912                 return entry;
1913         }
1914 out:
1915         return NULL;
1916 }
1917
1918 static int count_bitmap_extents(struct btrfs_free_space_ctl *ctl,
1919                                 struct btrfs_free_space *bitmap_info)
1920 {
1921         struct btrfs_block_group *block_group = ctl->private;
1922         u64 bytes = bitmap_info->bytes;
1923         unsigned int rs, re;
1924         int count = 0;
1925
1926         if (!block_group || !bytes)
1927                 return count;
1928
1929         bitmap_for_each_set_region(bitmap_info->bitmap, rs, re, 0,
1930                                    BITS_PER_BITMAP) {
1931                 bytes -= (rs - re) * ctl->unit;
1932                 count++;
1933
1934                 if (!bytes)
1935                         break;
1936         }
1937
1938         return count;
1939 }
1940
1941 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
1942                            struct btrfs_free_space *info, u64 offset)
1943 {
1944         info->offset = offset_to_bitmap(ctl, offset);
1945         info->bytes = 0;
1946         info->bitmap_extents = 0;
1947         INIT_LIST_HEAD(&info->list);
1948         link_free_space(ctl, info);
1949         ctl->total_bitmaps++;
1950
1951         ctl->op->recalc_thresholds(ctl);
1952 }
1953
1954 static void free_bitmap(struct btrfs_free_space_ctl *ctl,
1955                         struct btrfs_free_space *bitmap_info)
1956 {
1957         /*
1958          * Normally when this is called, the bitmap is completely empty. However,
1959          * if we are blowing up the free space cache for one reason or another
1960          * via __btrfs_remove_free_space_cache(), then it may not be freed and
1961          * we may leave stats on the table.
1962          */
1963         if (bitmap_info->bytes && !btrfs_free_space_trimmed(bitmap_info)) {
1964                 ctl->discardable_extents[BTRFS_STAT_CURR] -=
1965                         bitmap_info->bitmap_extents;
1966                 ctl->discardable_bytes[BTRFS_STAT_CURR] -= bitmap_info->bytes;
1967
1968         }
1969         unlink_free_space(ctl, bitmap_info);
1970         kmem_cache_free(btrfs_free_space_bitmap_cachep, bitmap_info->bitmap);
1971         kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
1972         ctl->total_bitmaps--;
1973         ctl->op->recalc_thresholds(ctl);
1974 }
1975
1976 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
1977                               struct btrfs_free_space *bitmap_info,
1978                               u64 *offset, u64 *bytes)
1979 {
1980         u64 end;
1981         u64 search_start, search_bytes;
1982         int ret;
1983
1984 again:
1985         end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
1986
1987         /*
1988          * We need to search for bits in this bitmap.  We could only cover some
1989          * of the extent in this bitmap thanks to how we add space, so we need
1990          * to search for as much as it as we can and clear that amount, and then
1991          * go searching for the next bit.
1992          */
1993         search_start = *offset;
1994         search_bytes = ctl->unit;
1995         search_bytes = min(search_bytes, end - search_start + 1);
1996         ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes,
1997                             false);
1998         if (ret < 0 || search_start != *offset)
1999                 return -EINVAL;
2000
2001         /* We may have found more bits than what we need */
2002         search_bytes = min(search_bytes, *bytes);
2003
2004         /* Cannot clear past the end of the bitmap */
2005         search_bytes = min(search_bytes, end - search_start + 1);
2006
2007         bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes);
2008         *offset += search_bytes;
2009         *bytes -= search_bytes;
2010
2011         if (*bytes) {
2012                 struct rb_node *next = rb_next(&bitmap_info->offset_index);
2013                 if (!bitmap_info->bytes)
2014                         free_bitmap(ctl, bitmap_info);
2015
2016                 /*
2017                  * no entry after this bitmap, but we still have bytes to
2018                  * remove, so something has gone wrong.
2019                  */
2020                 if (!next)
2021                         return -EINVAL;
2022
2023                 bitmap_info = rb_entry(next, struct btrfs_free_space,
2024                                        offset_index);
2025
2026                 /*
2027                  * if the next entry isn't a bitmap we need to return to let the
2028                  * extent stuff do its work.
2029                  */
2030                 if (!bitmap_info->bitmap)
2031                         return -EAGAIN;
2032
2033                 /*
2034                  * Ok the next item is a bitmap, but it may not actually hold
2035                  * the information for the rest of this free space stuff, so
2036                  * look for it, and if we don't find it return so we can try
2037                  * everything over again.
2038                  */
2039                 search_start = *offset;
2040                 search_bytes = ctl->unit;
2041                 ret = search_bitmap(ctl, bitmap_info, &search_start,
2042                                     &search_bytes, false);
2043                 if (ret < 0 || search_start != *offset)
2044                         return -EAGAIN;
2045
2046                 goto again;
2047         } else if (!bitmap_info->bytes)
2048                 free_bitmap(ctl, bitmap_info);
2049
2050         return 0;
2051 }
2052
2053 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
2054                                struct btrfs_free_space *info, u64 offset,
2055                                u64 bytes, enum btrfs_trim_state trim_state)
2056 {
2057         u64 bytes_to_set = 0;
2058         u64 end;
2059
2060         /*
2061          * This is a tradeoff to make bitmap trim state minimal.  We mark the
2062          * whole bitmap untrimmed if at any point we add untrimmed regions.
2063          */
2064         if (trim_state == BTRFS_TRIM_STATE_UNTRIMMED) {
2065                 if (btrfs_free_space_trimmed(info)) {
2066                         ctl->discardable_extents[BTRFS_STAT_CURR] +=
2067                                 info->bitmap_extents;
2068                         ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes;
2069                 }
2070                 info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2071         }
2072
2073         end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
2074
2075         bytes_to_set = min(end - offset, bytes);
2076
2077         bitmap_set_bits(ctl, info, offset, bytes_to_set);
2078
2079         /*
2080          * We set some bytes, we have no idea what the max extent size is
2081          * anymore.
2082          */
2083         info->max_extent_size = 0;
2084
2085         return bytes_to_set;
2086
2087 }
2088
2089 static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
2090                       struct btrfs_free_space *info)
2091 {
2092         struct btrfs_block_group *block_group = ctl->private;
2093         struct btrfs_fs_info *fs_info = block_group->fs_info;
2094         bool forced = false;
2095
2096 #ifdef CONFIG_BTRFS_DEBUG
2097         if (btrfs_should_fragment_free_space(block_group))
2098                 forced = true;
2099 #endif
2100
2101         /* This is a way to reclaim large regions from the bitmaps. */
2102         if (!forced && info->bytes >= FORCE_EXTENT_THRESHOLD)
2103                 return false;
2104
2105         /*
2106          * If we are below the extents threshold then we can add this as an
2107          * extent, and don't have to deal with the bitmap
2108          */
2109         if (!forced && ctl->free_extents < ctl->extents_thresh) {
2110                 /*
2111                  * If this block group has some small extents we don't want to
2112                  * use up all of our free slots in the cache with them, we want
2113                  * to reserve them to larger extents, however if we have plenty
2114                  * of cache left then go ahead an dadd them, no sense in adding
2115                  * the overhead of a bitmap if we don't have to.
2116                  */
2117                 if (info->bytes <= fs_info->sectorsize * 8) {
2118                         if (ctl->free_extents * 3 <= ctl->extents_thresh)
2119                                 return false;
2120                 } else {
2121                         return false;
2122                 }
2123         }
2124
2125         /*
2126          * The original block groups from mkfs can be really small, like 8
2127          * megabytes, so don't bother with a bitmap for those entries.  However
2128          * some block groups can be smaller than what a bitmap would cover but
2129          * are still large enough that they could overflow the 32k memory limit,
2130          * so allow those block groups to still be allowed to have a bitmap
2131          * entry.
2132          */
2133         if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->length)
2134                 return false;
2135
2136         return true;
2137 }
2138
2139 static const struct btrfs_free_space_op free_space_op = {
2140         .recalc_thresholds      = recalculate_thresholds,
2141         .use_bitmap             = use_bitmap,
2142 };
2143
2144 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
2145                               struct btrfs_free_space *info)
2146 {
2147         struct btrfs_free_space *bitmap_info;
2148         struct btrfs_block_group *block_group = NULL;
2149         int added = 0;
2150         u64 bytes, offset, bytes_added;
2151         enum btrfs_trim_state trim_state;
2152         int ret;
2153
2154         bytes = info->bytes;
2155         offset = info->offset;
2156         trim_state = info->trim_state;
2157
2158         if (!ctl->op->use_bitmap(ctl, info))
2159                 return 0;
2160
2161         if (ctl->op == &free_space_op)
2162                 block_group = ctl->private;
2163 again:
2164         /*
2165          * Since we link bitmaps right into the cluster we need to see if we
2166          * have a cluster here, and if so and it has our bitmap we need to add
2167          * the free space to that bitmap.
2168          */
2169         if (block_group && !list_empty(&block_group->cluster_list)) {
2170                 struct btrfs_free_cluster *cluster;
2171                 struct rb_node *node;
2172                 struct btrfs_free_space *entry;
2173
2174                 cluster = list_entry(block_group->cluster_list.next,
2175                                      struct btrfs_free_cluster,
2176                                      block_group_list);
2177                 spin_lock(&cluster->lock);
2178                 node = rb_first(&cluster->root);
2179                 if (!node) {
2180                         spin_unlock(&cluster->lock);
2181                         goto no_cluster_bitmap;
2182                 }
2183
2184                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2185                 if (!entry->bitmap) {
2186                         spin_unlock(&cluster->lock);
2187                         goto no_cluster_bitmap;
2188                 }
2189
2190                 if (entry->offset == offset_to_bitmap(ctl, offset)) {
2191                         bytes_added = add_bytes_to_bitmap(ctl, entry, offset,
2192                                                           bytes, trim_state);
2193                         bytes -= bytes_added;
2194                         offset += bytes_added;
2195                 }
2196                 spin_unlock(&cluster->lock);
2197                 if (!bytes) {
2198                         ret = 1;
2199                         goto out;
2200                 }
2201         }
2202
2203 no_cluster_bitmap:
2204         bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
2205                                          1, 0);
2206         if (!bitmap_info) {
2207                 ASSERT(added == 0);
2208                 goto new_bitmap;
2209         }
2210
2211         bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes,
2212                                           trim_state);
2213         bytes -= bytes_added;
2214         offset += bytes_added;
2215         added = 0;
2216
2217         if (!bytes) {
2218                 ret = 1;
2219                 goto out;
2220         } else
2221                 goto again;
2222
2223 new_bitmap:
2224         if (info && info->bitmap) {
2225                 add_new_bitmap(ctl, info, offset);
2226                 added = 1;
2227                 info = NULL;
2228                 goto again;
2229         } else {
2230                 spin_unlock(&ctl->tree_lock);
2231
2232                 /* no pre-allocated info, allocate a new one */
2233                 if (!info) {
2234                         info = kmem_cache_zalloc(btrfs_free_space_cachep,
2235                                                  GFP_NOFS);
2236                         if (!info) {
2237                                 spin_lock(&ctl->tree_lock);
2238                                 ret = -ENOMEM;
2239                                 goto out;
2240                         }
2241                 }
2242
2243                 /* allocate the bitmap */
2244                 info->bitmap = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep,
2245                                                  GFP_NOFS);
2246                 info->trim_state = BTRFS_TRIM_STATE_TRIMMED;
2247                 spin_lock(&ctl->tree_lock);
2248                 if (!info->bitmap) {
2249                         ret = -ENOMEM;
2250                         goto out;
2251                 }
2252                 goto again;
2253         }
2254
2255 out:
2256         if (info) {
2257                 if (info->bitmap)
2258                         kmem_cache_free(btrfs_free_space_bitmap_cachep,
2259                                         info->bitmap);
2260                 kmem_cache_free(btrfs_free_space_cachep, info);
2261         }
2262
2263         return ret;
2264 }
2265
2266 /*
2267  * Free space merging rules:
2268  *  1) Merge trimmed areas together
2269  *  2) Let untrimmed areas coalesce with trimmed areas
2270  *  3) Always pull neighboring regions from bitmaps
2271  *
2272  * The above rules are for when we merge free space based on btrfs_trim_state.
2273  * Rules 2 and 3 are subtle because they are suboptimal, but are done for the
2274  * same reason: to promote larger extent regions which makes life easier for
2275  * find_free_extent().  Rule 2 enables coalescing based on the common path
2276  * being returning free space from btrfs_finish_extent_commit().  So when free
2277  * space is trimmed, it will prevent aggregating trimmed new region and
2278  * untrimmed regions in the rb_tree.  Rule 3 is purely to obtain larger extents
2279  * and provide find_free_extent() with the largest extents possible hoping for
2280  * the reuse path.
2281  */
2282 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
2283                           struct btrfs_free_space *info, bool update_stat)
2284 {
2285         struct btrfs_free_space *left_info = NULL;
2286         struct btrfs_free_space *right_info;
2287         bool merged = false;
2288         u64 offset = info->offset;
2289         u64 bytes = info->bytes;
2290         const bool is_trimmed = btrfs_free_space_trimmed(info);
2291
2292         /*
2293          * first we want to see if there is free space adjacent to the range we
2294          * are adding, if there is remove that struct and add a new one to
2295          * cover the entire range
2296          */
2297         right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
2298         if (right_info && rb_prev(&right_info->offset_index))
2299                 left_info = rb_entry(rb_prev(&right_info->offset_index),
2300                                      struct btrfs_free_space, offset_index);
2301         else if (!right_info)
2302                 left_info = tree_search_offset(ctl, offset - 1, 0, 0);
2303
2304         /* See try_merge_free_space() comment. */
2305         if (right_info && !right_info->bitmap &&
2306             (!is_trimmed || btrfs_free_space_trimmed(right_info))) {
2307                 if (update_stat)
2308                         unlink_free_space(ctl, right_info);
2309                 else
2310                         __unlink_free_space(ctl, right_info);
2311                 info->bytes += right_info->bytes;
2312                 kmem_cache_free(btrfs_free_space_cachep, right_info);
2313                 merged = true;
2314         }
2315
2316         /* See try_merge_free_space() comment. */
2317         if (left_info && !left_info->bitmap &&
2318             left_info->offset + left_info->bytes == offset &&
2319             (!is_trimmed || btrfs_free_space_trimmed(left_info))) {
2320                 if (update_stat)
2321                         unlink_free_space(ctl, left_info);
2322                 else
2323                         __unlink_free_space(ctl, left_info);
2324                 info->offset = left_info->offset;
2325                 info->bytes += left_info->bytes;
2326                 kmem_cache_free(btrfs_free_space_cachep, left_info);
2327                 merged = true;
2328         }
2329
2330         return merged;
2331 }
2332
2333 static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl,
2334                                      struct btrfs_free_space *info,
2335                                      bool update_stat)
2336 {
2337         struct btrfs_free_space *bitmap;
2338         unsigned long i;
2339         unsigned long j;
2340         const u64 end = info->offset + info->bytes;
2341         const u64 bitmap_offset = offset_to_bitmap(ctl, end);
2342         u64 bytes;
2343
2344         bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0);
2345         if (!bitmap)
2346                 return false;
2347
2348         i = offset_to_bit(bitmap->offset, ctl->unit, end);
2349         j = find_next_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i);
2350         if (j == i)
2351                 return false;
2352         bytes = (j - i) * ctl->unit;
2353         info->bytes += bytes;
2354
2355         /* See try_merge_free_space() comment. */
2356         if (!btrfs_free_space_trimmed(bitmap))
2357                 info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2358
2359         if (update_stat)
2360                 bitmap_clear_bits(ctl, bitmap, end, bytes);
2361         else
2362                 __bitmap_clear_bits(ctl, bitmap, end, bytes);
2363
2364         if (!bitmap->bytes)
2365                 free_bitmap(ctl, bitmap);
2366
2367         return true;
2368 }
2369
2370 static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl,
2371                                        struct btrfs_free_space *info,
2372                                        bool update_stat)
2373 {
2374         struct btrfs_free_space *bitmap;
2375         u64 bitmap_offset;
2376         unsigned long i;
2377         unsigned long j;
2378         unsigned long prev_j;
2379         u64 bytes;
2380
2381         bitmap_offset = offset_to_bitmap(ctl, info->offset);
2382         /* If we're on a boundary, try the previous logical bitmap. */
2383         if (bitmap_offset == info->offset) {
2384                 if (info->offset == 0)
2385                         return false;
2386                 bitmap_offset = offset_to_bitmap(ctl, info->offset - 1);
2387         }
2388
2389         bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0);
2390         if (!bitmap)
2391                 return false;
2392
2393         i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1;
2394         j = 0;
2395         prev_j = (unsigned long)-1;
2396         for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) {
2397                 if (j > i)
2398                         break;
2399                 prev_j = j;
2400         }
2401         if (prev_j == i)
2402                 return false;
2403
2404         if (prev_j == (unsigned long)-1)
2405                 bytes = (i + 1) * ctl->unit;
2406         else
2407                 bytes = (i - prev_j) * ctl->unit;
2408
2409         info->offset -= bytes;
2410         info->bytes += bytes;
2411
2412         /* See try_merge_free_space() comment. */
2413         if (!btrfs_free_space_trimmed(bitmap))
2414                 info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2415
2416         if (update_stat)
2417                 bitmap_clear_bits(ctl, bitmap, info->offset, bytes);
2418         else
2419                 __bitmap_clear_bits(ctl, bitmap, info->offset, bytes);
2420
2421         if (!bitmap->bytes)
2422                 free_bitmap(ctl, bitmap);
2423
2424         return true;
2425 }
2426
2427 /*
2428  * We prefer always to allocate from extent entries, both for clustered and
2429  * non-clustered allocation requests. So when attempting to add a new extent
2430  * entry, try to see if there's adjacent free space in bitmap entries, and if
2431  * there is, migrate that space from the bitmaps to the extent.
2432  * Like this we get better chances of satisfying space allocation requests
2433  * because we attempt to satisfy them based on a single cache entry, and never
2434  * on 2 or more entries - even if the entries represent a contiguous free space
2435  * region (e.g. 1 extent entry + 1 bitmap entry starting where the extent entry
2436  * ends).
2437  */
2438 static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl,
2439                               struct btrfs_free_space *info,
2440                               bool update_stat)
2441 {
2442         /*
2443          * Only work with disconnected entries, as we can change their offset,
2444          * and must be extent entries.
2445          */
2446         ASSERT(!info->bitmap);
2447         ASSERT(RB_EMPTY_NODE(&info->offset_index));
2448
2449         if (ctl->total_bitmaps > 0) {
2450                 bool stole_end;
2451                 bool stole_front = false;
2452
2453                 stole_end = steal_from_bitmap_to_end(ctl, info, update_stat);
2454                 if (ctl->total_bitmaps > 0)
2455                         stole_front = steal_from_bitmap_to_front(ctl, info,
2456                                                                  update_stat);
2457
2458                 if (stole_end || stole_front)
2459                         try_merge_free_space(ctl, info, update_stat);
2460         }
2461 }
2462
2463 int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
2464                            struct btrfs_free_space_ctl *ctl,
2465                            u64 offset, u64 bytes,
2466                            enum btrfs_trim_state trim_state)
2467 {
2468         struct btrfs_block_group *block_group = ctl->private;
2469         struct btrfs_free_space *info;
2470         int ret = 0;
2471         u64 filter_bytes = bytes;
2472
2473         info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
2474         if (!info)
2475                 return -ENOMEM;
2476
2477         info->offset = offset;
2478         info->bytes = bytes;
2479         info->trim_state = trim_state;
2480         RB_CLEAR_NODE(&info->offset_index);
2481
2482         spin_lock(&ctl->tree_lock);
2483
2484         if (try_merge_free_space(ctl, info, true))
2485                 goto link;
2486
2487         /*
2488          * There was no extent directly to the left or right of this new
2489          * extent then we know we're going to have to allocate a new extent, so
2490          * before we do that see if we need to drop this into a bitmap
2491          */
2492         ret = insert_into_bitmap(ctl, info);
2493         if (ret < 0) {
2494                 goto out;
2495         } else if (ret) {
2496                 ret = 0;
2497                 goto out;
2498         }
2499 link:
2500         /*
2501          * Only steal free space from adjacent bitmaps if we're sure we're not
2502          * going to add the new free space to existing bitmap entries - because
2503          * that would mean unnecessary work that would be reverted. Therefore
2504          * attempt to steal space from bitmaps if we're adding an extent entry.
2505          */
2506         steal_from_bitmap(ctl, info, true);
2507
2508         filter_bytes = max(filter_bytes, info->bytes);
2509
2510         ret = link_free_space(ctl, info);
2511         if (ret)
2512                 kmem_cache_free(btrfs_free_space_cachep, info);
2513 out:
2514         btrfs_discard_update_discardable(block_group, ctl);
2515         spin_unlock(&ctl->tree_lock);
2516
2517         if (ret) {
2518                 btrfs_crit(fs_info, "unable to add free space :%d", ret);
2519                 ASSERT(ret != -EEXIST);
2520         }
2521
2522         if (trim_state != BTRFS_TRIM_STATE_TRIMMED) {
2523                 btrfs_discard_check_filter(block_group, filter_bytes);
2524                 btrfs_discard_queue_work(&fs_info->discard_ctl, block_group);
2525         }
2526
2527         return ret;
2528 }
2529
2530 int btrfs_add_free_space(struct btrfs_block_group *block_group,
2531                          u64 bytenr, u64 size)
2532 {
2533         enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2534
2535         if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC))
2536                 trim_state = BTRFS_TRIM_STATE_TRIMMED;
2537
2538         return __btrfs_add_free_space(block_group->fs_info,
2539                                       block_group->free_space_ctl,
2540                                       bytenr, size, trim_state);
2541 }
2542
2543 /*
2544  * This is a subtle distinction because when adding free space back in general,
2545  * we want it to be added as untrimmed for async. But in the case where we add
2546  * it on loading of a block group, we want to consider it trimmed.
2547  */
2548 int btrfs_add_free_space_async_trimmed(struct btrfs_block_group *block_group,
2549                                        u64 bytenr, u64 size)
2550 {
2551         enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2552
2553         if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC) ||
2554             btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
2555                 trim_state = BTRFS_TRIM_STATE_TRIMMED;
2556
2557         return __btrfs_add_free_space(block_group->fs_info,
2558                                       block_group->free_space_ctl,
2559                                       bytenr, size, trim_state);
2560 }
2561
2562 int btrfs_remove_free_space(struct btrfs_block_group *block_group,
2563                             u64 offset, u64 bytes)
2564 {
2565         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2566         struct btrfs_free_space *info;
2567         int ret;
2568         bool re_search = false;
2569
2570         spin_lock(&ctl->tree_lock);
2571
2572 again:
2573         ret = 0;
2574         if (!bytes)
2575                 goto out_lock;
2576
2577         info = tree_search_offset(ctl, offset, 0, 0);
2578         if (!info) {
2579                 /*
2580                  * oops didn't find an extent that matched the space we wanted
2581                  * to remove, look for a bitmap instead
2582                  */
2583                 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
2584                                           1, 0);
2585                 if (!info) {
2586                         /*
2587                          * If we found a partial bit of our free space in a
2588                          * bitmap but then couldn't find the other part this may
2589                          * be a problem, so WARN about it.
2590                          */
2591                         WARN_ON(re_search);
2592                         goto out_lock;
2593                 }
2594         }
2595
2596         re_search = false;
2597         if (!info->bitmap) {
2598                 unlink_free_space(ctl, info);
2599                 if (offset == info->offset) {
2600                         u64 to_free = min(bytes, info->bytes);
2601
2602                         info->bytes -= to_free;
2603                         info->offset += to_free;
2604                         if (info->bytes) {
2605                                 ret = link_free_space(ctl, info);
2606                                 WARN_ON(ret);
2607                         } else {
2608                                 kmem_cache_free(btrfs_free_space_cachep, info);
2609                         }
2610
2611                         offset += to_free;
2612                         bytes -= to_free;
2613                         goto again;
2614                 } else {
2615                         u64 old_end = info->bytes + info->offset;
2616
2617                         info->bytes = offset - info->offset;
2618                         ret = link_free_space(ctl, info);
2619                         WARN_ON(ret);
2620                         if (ret)
2621                                 goto out_lock;
2622
2623                         /* Not enough bytes in this entry to satisfy us */
2624                         if (old_end < offset + bytes) {
2625                                 bytes -= old_end - offset;
2626                                 offset = old_end;
2627                                 goto again;
2628                         } else if (old_end == offset + bytes) {
2629                                 /* all done */
2630                                 goto out_lock;
2631                         }
2632                         spin_unlock(&ctl->tree_lock);
2633
2634                         ret = __btrfs_add_free_space(block_group->fs_info, ctl,
2635                                                      offset + bytes,
2636                                                      old_end - (offset + bytes),
2637                                                      info->trim_state);
2638                         WARN_ON(ret);
2639                         goto out;
2640                 }
2641         }
2642
2643         ret = remove_from_bitmap(ctl, info, &offset, &bytes);
2644         if (ret == -EAGAIN) {
2645                 re_search = true;
2646                 goto again;
2647         }
2648 out_lock:
2649         btrfs_discard_update_discardable(block_group, ctl);
2650         spin_unlock(&ctl->tree_lock);
2651 out:
2652         return ret;
2653 }
2654
2655 void btrfs_dump_free_space(struct btrfs_block_group *block_group,
2656                            u64 bytes)
2657 {
2658         struct btrfs_fs_info *fs_info = block_group->fs_info;
2659         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2660         struct btrfs_free_space *info;
2661         struct rb_node *n;
2662         int count = 0;
2663
2664         spin_lock(&ctl->tree_lock);
2665         for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
2666                 info = rb_entry(n, struct btrfs_free_space, offset_index);
2667                 if (info->bytes >= bytes && !block_group->ro)
2668                         count++;
2669                 btrfs_crit(fs_info, "entry offset %llu, bytes %llu, bitmap %s",
2670                            info->offset, info->bytes,
2671                        (info->bitmap) ? "yes" : "no");
2672         }
2673         spin_unlock(&ctl->tree_lock);
2674         btrfs_info(fs_info, "block group has cluster?: %s",
2675                list_empty(&block_group->cluster_list) ? "no" : "yes");
2676         btrfs_info(fs_info,
2677                    "%d blocks of free space at or bigger than bytes is", count);
2678 }
2679
2680 void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group)
2681 {
2682         struct btrfs_fs_info *fs_info = block_group->fs_info;
2683         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2684
2685         spin_lock_init(&ctl->tree_lock);
2686         ctl->unit = fs_info->sectorsize;
2687         ctl->start = block_group->start;
2688         ctl->private = block_group;
2689         ctl->op = &free_space_op;
2690         INIT_LIST_HEAD(&ctl->trimming_ranges);
2691         mutex_init(&ctl->cache_writeout_mutex);
2692
2693         /*
2694          * we only want to have 32k of ram per block group for keeping
2695          * track of free space, and if we pass 1/2 of that we want to
2696          * start converting things over to using bitmaps
2697          */
2698         ctl->extents_thresh = (SZ_32K / 2) / sizeof(struct btrfs_free_space);
2699 }
2700
2701 /*
2702  * for a given cluster, put all of its extents back into the free
2703  * space cache.  If the block group passed doesn't match the block group
2704  * pointed to by the cluster, someone else raced in and freed the
2705  * cluster already.  In that case, we just return without changing anything
2706  */
2707 static void __btrfs_return_cluster_to_free_space(
2708                              struct btrfs_block_group *block_group,
2709                              struct btrfs_free_cluster *cluster)
2710 {
2711         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2712         struct btrfs_free_space *entry;
2713         struct rb_node *node;
2714
2715         spin_lock(&cluster->lock);
2716         if (cluster->block_group != block_group)
2717                 goto out;
2718
2719         cluster->block_group = NULL;
2720         cluster->window_start = 0;
2721         list_del_init(&cluster->block_group_list);
2722
2723         node = rb_first(&cluster->root);
2724         while (node) {
2725                 bool bitmap;
2726
2727                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2728                 node = rb_next(&entry->offset_index);
2729                 rb_erase(&entry->offset_index, &cluster->root);
2730                 RB_CLEAR_NODE(&entry->offset_index);
2731
2732                 bitmap = (entry->bitmap != NULL);
2733                 if (!bitmap) {
2734                         /* Merging treats extents as if they were new */
2735                         if (!btrfs_free_space_trimmed(entry)) {
2736                                 ctl->discardable_extents[BTRFS_STAT_CURR]--;
2737                                 ctl->discardable_bytes[BTRFS_STAT_CURR] -=
2738                                         entry->bytes;
2739                         }
2740
2741                         try_merge_free_space(ctl, entry, false);
2742                         steal_from_bitmap(ctl, entry, false);
2743
2744                         /* As we insert directly, update these statistics */
2745                         if (!btrfs_free_space_trimmed(entry)) {
2746                                 ctl->discardable_extents[BTRFS_STAT_CURR]++;
2747                                 ctl->discardable_bytes[BTRFS_STAT_CURR] +=
2748                                         entry->bytes;
2749                         }
2750                 }
2751                 tree_insert_offset(&ctl->free_space_offset,
2752                                    entry->offset, &entry->offset_index, bitmap);
2753         }
2754         cluster->root = RB_ROOT;
2755
2756 out:
2757         spin_unlock(&cluster->lock);
2758         btrfs_put_block_group(block_group);
2759 }
2760
2761 static void __btrfs_remove_free_space_cache_locked(
2762                                 struct btrfs_free_space_ctl *ctl)
2763 {
2764         struct btrfs_free_space *info;
2765         struct rb_node *node;
2766
2767         while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
2768                 info = rb_entry(node, struct btrfs_free_space, offset_index);
2769                 if (!info->bitmap) {
2770                         unlink_free_space(ctl, info);
2771                         kmem_cache_free(btrfs_free_space_cachep, info);
2772                 } else {
2773                         free_bitmap(ctl, info);
2774                 }
2775
2776                 cond_resched_lock(&ctl->tree_lock);
2777         }
2778 }
2779
2780 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
2781 {
2782         spin_lock(&ctl->tree_lock);
2783         __btrfs_remove_free_space_cache_locked(ctl);
2784         if (ctl->private)
2785                 btrfs_discard_update_discardable(ctl->private, ctl);
2786         spin_unlock(&ctl->tree_lock);
2787 }
2788
2789 void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group)
2790 {
2791         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2792         struct btrfs_free_cluster *cluster;
2793         struct list_head *head;
2794
2795         spin_lock(&ctl->tree_lock);
2796         while ((head = block_group->cluster_list.next) !=
2797                &block_group->cluster_list) {
2798                 cluster = list_entry(head, struct btrfs_free_cluster,
2799                                      block_group_list);
2800
2801                 WARN_ON(cluster->block_group != block_group);
2802                 __btrfs_return_cluster_to_free_space(block_group, cluster);
2803
2804                 cond_resched_lock(&ctl->tree_lock);
2805         }
2806         __btrfs_remove_free_space_cache_locked(ctl);
2807         btrfs_discard_update_discardable(block_group, ctl);
2808         spin_unlock(&ctl->tree_lock);
2809
2810 }
2811
2812 /**
2813  * btrfs_is_free_space_trimmed - see if everything is trimmed
2814  * @block_group: block_group of interest
2815  *
2816  * Walk @block_group's free space rb_tree to determine if everything is trimmed.
2817  */
2818 bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group)
2819 {
2820         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2821         struct btrfs_free_space *info;
2822         struct rb_node *node;
2823         bool ret = true;
2824
2825         spin_lock(&ctl->tree_lock);
2826         node = rb_first(&ctl->free_space_offset);
2827
2828         while (node) {
2829                 info = rb_entry(node, struct btrfs_free_space, offset_index);
2830
2831                 if (!btrfs_free_space_trimmed(info)) {
2832                         ret = false;
2833                         break;
2834                 }
2835
2836                 node = rb_next(node);
2837         }
2838
2839         spin_unlock(&ctl->tree_lock);
2840         return ret;
2841 }
2842
2843 u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group,
2844                                u64 offset, u64 bytes, u64 empty_size,
2845                                u64 *max_extent_size)
2846 {
2847         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2848         struct btrfs_discard_ctl *discard_ctl =
2849                                         &block_group->fs_info->discard_ctl;
2850         struct btrfs_free_space *entry = NULL;
2851         u64 bytes_search = bytes + empty_size;
2852         u64 ret = 0;
2853         u64 align_gap = 0;
2854         u64 align_gap_len = 0;
2855         enum btrfs_trim_state align_gap_trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2856
2857         spin_lock(&ctl->tree_lock);
2858         entry = find_free_space(ctl, &offset, &bytes_search,
2859                                 block_group->full_stripe_len, max_extent_size);
2860         if (!entry)
2861                 goto out;
2862
2863         ret = offset;
2864         if (entry->bitmap) {
2865                 bitmap_clear_bits(ctl, entry, offset, bytes);
2866
2867                 if (!btrfs_free_space_trimmed(entry))
2868                         atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
2869
2870                 if (!entry->bytes)
2871                         free_bitmap(ctl, entry);
2872         } else {
2873                 unlink_free_space(ctl, entry);
2874                 align_gap_len = offset - entry->offset;
2875                 align_gap = entry->offset;
2876                 align_gap_trim_state = entry->trim_state;
2877
2878                 if (!btrfs_free_space_trimmed(entry))
2879                         atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
2880
2881                 entry->offset = offset + bytes;
2882                 WARN_ON(entry->bytes < bytes + align_gap_len);
2883
2884                 entry->bytes -= bytes + align_gap_len;
2885                 if (!entry->bytes)
2886                         kmem_cache_free(btrfs_free_space_cachep, entry);
2887                 else
2888                         link_free_space(ctl, entry);
2889         }
2890 out:
2891         btrfs_discard_update_discardable(block_group, ctl);
2892         spin_unlock(&ctl->tree_lock);
2893
2894         if (align_gap_len)
2895                 __btrfs_add_free_space(block_group->fs_info, ctl,
2896                                        align_gap, align_gap_len,
2897                                        align_gap_trim_state);
2898         return ret;
2899 }
2900
2901 /*
2902  * given a cluster, put all of its extents back into the free space
2903  * cache.  If a block group is passed, this function will only free
2904  * a cluster that belongs to the passed block group.
2905  *
2906  * Otherwise, it'll get a reference on the block group pointed to by the
2907  * cluster and remove the cluster from it.
2908  */
2909 void btrfs_return_cluster_to_free_space(
2910                                struct btrfs_block_group *block_group,
2911                                struct btrfs_free_cluster *cluster)
2912 {
2913         struct btrfs_free_space_ctl *ctl;
2914
2915         /* first, get a safe pointer to the block group */
2916         spin_lock(&cluster->lock);
2917         if (!block_group) {
2918                 block_group = cluster->block_group;
2919                 if (!block_group) {
2920                         spin_unlock(&cluster->lock);
2921                         return;
2922                 }
2923         } else if (cluster->block_group != block_group) {
2924                 /* someone else has already freed it don't redo their work */
2925                 spin_unlock(&cluster->lock);
2926                 return;
2927         }
2928         btrfs_get_block_group(block_group);
2929         spin_unlock(&cluster->lock);
2930
2931         ctl = block_group->free_space_ctl;
2932
2933         /* now return any extents the cluster had on it */
2934         spin_lock(&ctl->tree_lock);
2935         __btrfs_return_cluster_to_free_space(block_group, cluster);
2936         spin_unlock(&ctl->tree_lock);
2937
2938         btrfs_discard_queue_work(&block_group->fs_info->discard_ctl, block_group);
2939
2940         /* finally drop our ref */
2941         btrfs_put_block_group(block_group);
2942 }
2943
2944 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group *block_group,
2945                                    struct btrfs_free_cluster *cluster,
2946                                    struct btrfs_free_space *entry,
2947                                    u64 bytes, u64 min_start,
2948                                    u64 *max_extent_size)
2949 {
2950         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2951         int err;
2952         u64 search_start = cluster->window_start;
2953         u64 search_bytes = bytes;
2954         u64 ret = 0;
2955
2956         search_start = min_start;
2957         search_bytes = bytes;
2958
2959         err = search_bitmap(ctl, entry, &search_start, &search_bytes, true);
2960         if (err) {
2961                 *max_extent_size = max(get_max_extent_size(entry),
2962                                        *max_extent_size);
2963                 return 0;
2964         }
2965
2966         ret = search_start;
2967         __bitmap_clear_bits(ctl, entry, ret, bytes);
2968
2969         return ret;
2970 }
2971
2972 /*
2973  * given a cluster, try to allocate 'bytes' from it, returns 0
2974  * if it couldn't find anything suitably large, or a logical disk offset
2975  * if things worked out
2976  */
2977 u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group,
2978                              struct btrfs_free_cluster *cluster, u64 bytes,
2979                              u64 min_start, u64 *max_extent_size)
2980 {
2981         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2982         struct btrfs_discard_ctl *discard_ctl =
2983                                         &block_group->fs_info->discard_ctl;
2984         struct btrfs_free_space *entry = NULL;
2985         struct rb_node *node;
2986         u64 ret = 0;
2987
2988         spin_lock(&cluster->lock);
2989         if (bytes > cluster->max_size)
2990                 goto out;
2991
2992         if (cluster->block_group != block_group)
2993                 goto out;
2994
2995         node = rb_first(&cluster->root);
2996         if (!node)
2997                 goto out;
2998
2999         entry = rb_entry(node, struct btrfs_free_space, offset_index);
3000         while (1) {
3001                 if (entry->bytes < bytes)
3002                         *max_extent_size = max(get_max_extent_size(entry),
3003                                                *max_extent_size);
3004
3005                 if (entry->bytes < bytes ||
3006                     (!entry->bitmap && entry->offset < min_start)) {
3007                         node = rb_next(&entry->offset_index);
3008                         if (!node)
3009                                 break;
3010                         entry = rb_entry(node, struct btrfs_free_space,
3011                                          offset_index);
3012                         continue;
3013                 }
3014
3015                 if (entry->bitmap) {
3016                         ret = btrfs_alloc_from_bitmap(block_group,
3017                                                       cluster, entry, bytes,
3018                                                       cluster->window_start,
3019                                                       max_extent_size);
3020                         if (ret == 0) {
3021                                 node = rb_next(&entry->offset_index);
3022                                 if (!node)
3023                                         break;
3024                                 entry = rb_entry(node, struct btrfs_free_space,
3025                                                  offset_index);
3026                                 continue;
3027                         }
3028                         cluster->window_start += bytes;
3029                 } else {
3030                         ret = entry->offset;
3031
3032                         entry->offset += bytes;
3033                         entry->bytes -= bytes;
3034                 }
3035
3036                 if (entry->bytes == 0)
3037                         rb_erase(&entry->offset_index, &cluster->root);
3038                 break;
3039         }
3040 out:
3041         spin_unlock(&cluster->lock);
3042
3043         if (!ret)
3044                 return 0;
3045
3046         spin_lock(&ctl->tree_lock);
3047
3048         if (!btrfs_free_space_trimmed(entry))
3049                 atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
3050
3051         ctl->free_space -= bytes;
3052         if (!entry->bitmap && !btrfs_free_space_trimmed(entry))
3053                 ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes;
3054         if (entry->bytes == 0) {
3055                 ctl->free_extents--;
3056                 if (entry->bitmap) {
3057                         kmem_cache_free(btrfs_free_space_bitmap_cachep,
3058                                         entry->bitmap);
3059                         ctl->total_bitmaps--;
3060                         ctl->op->recalc_thresholds(ctl);
3061                 } else if (!btrfs_free_space_trimmed(entry)) {
3062                         ctl->discardable_extents[BTRFS_STAT_CURR]--;
3063                 }
3064                 kmem_cache_free(btrfs_free_space_cachep, entry);
3065         }
3066
3067         spin_unlock(&ctl->tree_lock);
3068
3069         return ret;
3070 }
3071
3072 static int btrfs_bitmap_cluster(struct btrfs_block_group *block_group,
3073                                 struct btrfs_free_space *entry,
3074                                 struct btrfs_free_cluster *cluster,
3075                                 u64 offset, u64 bytes,
3076                                 u64 cont1_bytes, u64 min_bytes)
3077 {
3078         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3079         unsigned long next_zero;
3080         unsigned long i;
3081         unsigned long want_bits;
3082         unsigned long min_bits;
3083         unsigned long found_bits;
3084         unsigned long max_bits = 0;
3085         unsigned long start = 0;
3086         unsigned long total_found = 0;
3087         int ret;
3088
3089         i = offset_to_bit(entry->offset, ctl->unit,
3090                           max_t(u64, offset, entry->offset));
3091         want_bits = bytes_to_bits(bytes, ctl->unit);
3092         min_bits = bytes_to_bits(min_bytes, ctl->unit);
3093
3094         /*
3095          * Don't bother looking for a cluster in this bitmap if it's heavily
3096          * fragmented.
3097          */
3098         if (entry->max_extent_size &&
3099             entry->max_extent_size < cont1_bytes)
3100                 return -ENOSPC;
3101 again:
3102         found_bits = 0;
3103         for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) {
3104                 next_zero = find_next_zero_bit(entry->bitmap,
3105                                                BITS_PER_BITMAP, i);
3106                 if (next_zero - i >= min_bits) {
3107                         found_bits = next_zero - i;
3108                         if (found_bits > max_bits)
3109                                 max_bits = found_bits;
3110                         break;
3111                 }
3112                 if (next_zero - i > max_bits)
3113                         max_bits = next_zero - i;
3114                 i = next_zero;
3115         }
3116
3117         if (!found_bits) {
3118                 entry->max_extent_size = (u64)max_bits * ctl->unit;
3119                 return -ENOSPC;
3120         }
3121
3122         if (!total_found) {
3123                 start = i;
3124                 cluster->max_size = 0;
3125         }
3126
3127         total_found += found_bits;
3128
3129         if (cluster->max_size < found_bits * ctl->unit)
3130                 cluster->max_size = found_bits * ctl->unit;
3131
3132         if (total_found < want_bits || cluster->max_size < cont1_bytes) {
3133                 i = next_zero + 1;
3134                 goto again;
3135         }
3136
3137         cluster->window_start = start * ctl->unit + entry->offset;
3138         rb_erase(&entry->offset_index, &ctl->free_space_offset);
3139         ret = tree_insert_offset(&cluster->root, entry->offset,
3140                                  &entry->offset_index, 1);
3141         ASSERT(!ret); /* -EEXIST; Logic error */
3142
3143         trace_btrfs_setup_cluster(block_group, cluster,
3144                                   total_found * ctl->unit, 1);
3145         return 0;
3146 }
3147
3148 /*
3149  * This searches the block group for just extents to fill the cluster with.
3150  * Try to find a cluster with at least bytes total bytes, at least one
3151  * extent of cont1_bytes, and other clusters of at least min_bytes.
3152  */
3153 static noinline int
3154 setup_cluster_no_bitmap(struct btrfs_block_group *block_group,
3155                         struct btrfs_free_cluster *cluster,
3156                         struct list_head *bitmaps, u64 offset, u64 bytes,
3157                         u64 cont1_bytes, u64 min_bytes)
3158 {
3159         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3160         struct btrfs_free_space *first = NULL;
3161         struct btrfs_free_space *entry = NULL;
3162         struct btrfs_free_space *last;
3163         struct rb_node *node;
3164         u64 window_free;
3165         u64 max_extent;
3166         u64 total_size = 0;
3167
3168         entry = tree_search_offset(ctl, offset, 0, 1);
3169         if (!entry)
3170                 return -ENOSPC;
3171
3172         /*
3173          * We don't want bitmaps, so just move along until we find a normal
3174          * extent entry.
3175          */
3176         while (entry->bitmap || entry->bytes < min_bytes) {
3177                 if (entry->bitmap && list_empty(&entry->list))
3178                         list_add_tail(&entry->list, bitmaps);
3179                 node = rb_next(&entry->offset_index);
3180                 if (!node)
3181                         return -ENOSPC;
3182                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
3183         }
3184
3185         window_free = entry->bytes;
3186         max_extent = entry->bytes;
3187         first = entry;
3188         last = entry;
3189
3190         for (node = rb_next(&entry->offset_index); node;
3191              node = rb_next(&entry->offset_index)) {
3192                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
3193
3194                 if (entry->bitmap) {
3195                         if (list_empty(&entry->list))
3196                                 list_add_tail(&entry->list, bitmaps);
3197                         continue;
3198                 }
3199
3200                 if (entry->bytes < min_bytes)
3201                         continue;
3202
3203                 last = entry;
3204                 window_free += entry->bytes;
3205                 if (entry->bytes > max_extent)
3206                         max_extent = entry->bytes;
3207         }
3208
3209         if (window_free < bytes || max_extent < cont1_bytes)
3210                 return -ENOSPC;
3211
3212         cluster->window_start = first->offset;
3213
3214         node = &first->offset_index;
3215
3216         /*
3217          * now we've found our entries, pull them out of the free space
3218          * cache and put them into the cluster rbtree
3219          */
3220         do {
3221                 int ret;
3222
3223                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
3224                 node = rb_next(&entry->offset_index);
3225                 if (entry->bitmap || entry->bytes < min_bytes)
3226                         continue;
3227
3228                 rb_erase(&entry->offset_index, &ctl->free_space_offset);
3229                 ret = tree_insert_offset(&cluster->root, entry->offset,
3230                                          &entry->offset_index, 0);
3231                 total_size += entry->bytes;
3232                 ASSERT(!ret); /* -EEXIST; Logic error */
3233         } while (node && entry != last);
3234
3235         cluster->max_size = max_extent;
3236         trace_btrfs_setup_cluster(block_group, cluster, total_size, 0);
3237         return 0;
3238 }
3239
3240 /*
3241  * This specifically looks for bitmaps that may work in the cluster, we assume
3242  * that we have already failed to find extents that will work.
3243  */
3244 static noinline int
3245 setup_cluster_bitmap(struct btrfs_block_group *block_group,
3246                      struct btrfs_free_cluster *cluster,
3247                      struct list_head *bitmaps, u64 offset, u64 bytes,
3248                      u64 cont1_bytes, u64 min_bytes)
3249 {
3250         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3251         struct btrfs_free_space *entry = NULL;
3252         int ret = -ENOSPC;
3253         u64 bitmap_offset = offset_to_bitmap(ctl, offset);
3254
3255         if (ctl->total_bitmaps == 0)
3256                 return -ENOSPC;
3257
3258         /*
3259          * The bitmap that covers offset won't be in the list unless offset
3260          * is just its start offset.
3261          */
3262         if (!list_empty(bitmaps))
3263                 entry = list_first_entry(bitmaps, struct btrfs_free_space, list);
3264
3265         if (!entry || entry->offset != bitmap_offset) {
3266                 entry = tree_search_offset(ctl, bitmap_offset, 1, 0);
3267                 if (entry && list_empty(&entry->list))
3268                         list_add(&entry->list, bitmaps);
3269         }
3270
3271         list_for_each_entry(entry, bitmaps, list) {
3272                 if (entry->bytes < bytes)
3273                         continue;
3274                 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
3275                                            bytes, cont1_bytes, min_bytes);
3276                 if (!ret)
3277                         return 0;
3278         }
3279
3280         /*
3281          * The bitmaps list has all the bitmaps that record free space
3282          * starting after offset, so no more search is required.
3283          */
3284         return -ENOSPC;
3285 }
3286
3287 /*
3288  * here we try to find a cluster of blocks in a block group.  The goal
3289  * is to find at least bytes+empty_size.
3290  * We might not find them all in one contiguous area.
3291  *
3292  * returns zero and sets up cluster if things worked out, otherwise
3293  * it returns -enospc
3294  */
3295 int btrfs_find_space_cluster(struct btrfs_block_group *block_group,
3296                              struct btrfs_free_cluster *cluster,
3297                              u64 offset, u64 bytes, u64 empty_size)
3298 {
3299         struct btrfs_fs_info *fs_info = block_group->fs_info;
3300         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3301         struct btrfs_free_space *entry, *tmp;
3302         LIST_HEAD(bitmaps);
3303         u64 min_bytes;
3304         u64 cont1_bytes;
3305         int ret;
3306
3307         /*
3308          * Choose the minimum extent size we'll require for this
3309          * cluster.  For SSD_SPREAD, don't allow any fragmentation.
3310          * For metadata, allow allocates with smaller extents.  For
3311          * data, keep it dense.
3312          */
3313         if (btrfs_test_opt(fs_info, SSD_SPREAD)) {
3314                 cont1_bytes = min_bytes = bytes + empty_size;
3315         } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
3316                 cont1_bytes = bytes;
3317                 min_bytes = fs_info->sectorsize;
3318         } else {
3319                 cont1_bytes = max(bytes, (bytes + empty_size) >> 2);
3320                 min_bytes = fs_info->sectorsize;
3321         }
3322
3323         spin_lock(&ctl->tree_lock);
3324
3325         /*
3326          * If we know we don't have enough space to make a cluster don't even
3327          * bother doing all the work to try and find one.
3328          */
3329         if (ctl->free_space < bytes) {
3330                 spin_unlock(&ctl->tree_lock);
3331                 return -ENOSPC;
3332         }
3333
3334         spin_lock(&cluster->lock);
3335
3336         /* someone already found a cluster, hooray */
3337         if (cluster->block_group) {
3338                 ret = 0;
3339                 goto out;
3340         }
3341
3342         trace_btrfs_find_cluster(block_group, offset, bytes, empty_size,
3343                                  min_bytes);
3344
3345         ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
3346                                       bytes + empty_size,
3347                                       cont1_bytes, min_bytes);
3348         if (ret)
3349                 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
3350                                            offset, bytes + empty_size,
3351                                            cont1_bytes, min_bytes);
3352
3353         /* Clear our temporary list */
3354         list_for_each_entry_safe(entry, tmp, &bitmaps, list)
3355                 list_del_init(&entry->list);
3356
3357         if (!ret) {
3358                 btrfs_get_block_group(block_group);
3359                 list_add_tail(&cluster->block_group_list,
3360                               &block_group->cluster_list);
3361                 cluster->block_group = block_group;
3362         } else {
3363                 trace_btrfs_failed_cluster_setup(block_group);
3364         }
3365 out:
3366         spin_unlock(&cluster->lock);
3367         spin_unlock(&ctl->tree_lock);
3368
3369         return ret;
3370 }
3371
3372 /*
3373  * simple code to zero out a cluster
3374  */
3375 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
3376 {
3377         spin_lock_init(&cluster->lock);
3378         spin_lock_init(&cluster->refill_lock);
3379         cluster->root = RB_ROOT;
3380         cluster->max_size = 0;
3381         cluster->fragmented = false;
3382         INIT_LIST_HEAD(&cluster->block_group_list);
3383         cluster->block_group = NULL;
3384 }
3385
3386 static int do_trimming(struct btrfs_block_group *block_group,
3387                        u64 *total_trimmed, u64 start, u64 bytes,
3388                        u64 reserved_start, u64 reserved_bytes,
3389                        enum btrfs_trim_state reserved_trim_state,
3390                        struct btrfs_trim_range *trim_entry)
3391 {
3392         struct btrfs_space_info *space_info = block_group->space_info;
3393         struct btrfs_fs_info *fs_info = block_group->fs_info;
3394         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3395         int ret;
3396         int update = 0;
3397         const u64 end = start + bytes;
3398         const u64 reserved_end = reserved_start + reserved_bytes;
3399         enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
3400         u64 trimmed = 0;
3401
3402         spin_lock(&space_info->lock);
3403         spin_lock(&block_group->lock);
3404         if (!block_group->ro) {
3405                 block_group->reserved += reserved_bytes;
3406                 space_info->bytes_reserved += reserved_bytes;
3407                 update = 1;
3408         }
3409         spin_unlock(&block_group->lock);
3410         spin_unlock(&space_info->lock);
3411
3412         ret = btrfs_discard_extent(fs_info, start, bytes, &trimmed);
3413         if (!ret) {
3414                 *total_trimmed += trimmed;
3415                 trim_state = BTRFS_TRIM_STATE_TRIMMED;
3416         }
3417
3418         mutex_lock(&ctl->cache_writeout_mutex);
3419         if (reserved_start < start)
3420                 __btrfs_add_free_space(fs_info, ctl, reserved_start,
3421                                        start - reserved_start,
3422                                        reserved_trim_state);
3423         if (start + bytes < reserved_start + reserved_bytes)
3424                 __btrfs_add_free_space(fs_info, ctl, end, reserved_end - end,
3425                                        reserved_trim_state);
3426         __btrfs_add_free_space(fs_info, ctl, start, bytes, trim_state);
3427         list_del(&trim_entry->list);
3428         mutex_unlock(&ctl->cache_writeout_mutex);
3429
3430         if (update) {
3431                 spin_lock(&space_info->lock);
3432                 spin_lock(&block_group->lock);
3433                 if (block_group->ro)
3434                         space_info->bytes_readonly += reserved_bytes;
3435                 block_group->reserved -= reserved_bytes;
3436                 space_info->bytes_reserved -= reserved_bytes;
3437                 spin_unlock(&block_group->lock);
3438                 spin_unlock(&space_info->lock);
3439         }
3440
3441         return ret;
3442 }
3443
3444 /*
3445  * If @async is set, then we will trim 1 region and return.
3446  */
3447 static int trim_no_bitmap(struct btrfs_block_group *block_group,
3448                           u64 *total_trimmed, u64 start, u64 end, u64 minlen,
3449                           bool async)
3450 {
3451         struct btrfs_discard_ctl *discard_ctl =
3452                                         &block_group->fs_info->discard_ctl;
3453         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3454         struct btrfs_free_space *entry;
3455         struct rb_node *node;
3456         int ret = 0;
3457         u64 extent_start;
3458         u64 extent_bytes;
3459         enum btrfs_trim_state extent_trim_state;
3460         u64 bytes;
3461         const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size);
3462
3463         while (start < end) {
3464                 struct btrfs_trim_range trim_entry;
3465
3466                 mutex_lock(&ctl->cache_writeout_mutex);
3467                 spin_lock(&ctl->tree_lock);
3468
3469                 if (ctl->free_space < minlen)
3470                         goto out_unlock;
3471
3472                 entry = tree_search_offset(ctl, start, 0, 1);
3473                 if (!entry)
3474                         goto out_unlock;
3475
3476                 /* Skip bitmaps and if async, already trimmed entries */
3477                 while (entry->bitmap ||
3478                        (async && btrfs_free_space_trimmed(entry))) {
3479                         node = rb_next(&entry->offset_index);
3480                         if (!node)
3481                                 goto out_unlock;
3482                         entry = rb_entry(node, struct btrfs_free_space,
3483                                          offset_index);
3484                 }
3485
3486                 if (entry->offset >= end)
3487                         goto out_unlock;
3488
3489                 extent_start = entry->offset;
3490                 extent_bytes = entry->bytes;
3491                 extent_trim_state = entry->trim_state;
3492                 if (async) {
3493                         start = entry->offset;
3494                         bytes = entry->bytes;
3495                         if (bytes < minlen) {
3496                                 spin_unlock(&ctl->tree_lock);
3497                                 mutex_unlock(&ctl->cache_writeout_mutex);
3498                                 goto next;
3499                         }
3500                         unlink_free_space(ctl, entry);
3501                         /*
3502                          * Let bytes = BTRFS_MAX_DISCARD_SIZE + X.
3503                          * If X < BTRFS_ASYNC_DISCARD_MIN_FILTER, we won't trim
3504                          * X when we come back around.  So trim it now.
3505                          */
3506                         if (max_discard_size &&
3507                             bytes >= (max_discard_size +
3508                                       BTRFS_ASYNC_DISCARD_MIN_FILTER)) {
3509                                 bytes = max_discard_size;
3510                                 extent_bytes = max_discard_size;
3511                                 entry->offset += max_discard_size;
3512                                 entry->bytes -= max_discard_size;
3513                                 link_free_space(ctl, entry);
3514                         } else {
3515                                 kmem_cache_free(btrfs_free_space_cachep, entry);
3516                         }
3517                 } else {
3518                         start = max(start, extent_start);
3519                         bytes = min(extent_start + extent_bytes, end) - start;
3520                         if (bytes < minlen) {
3521                                 spin_unlock(&ctl->tree_lock);
3522                                 mutex_unlock(&ctl->cache_writeout_mutex);
3523                                 goto next;
3524                         }
3525
3526                         unlink_free_space(ctl, entry);
3527                         kmem_cache_free(btrfs_free_space_cachep, entry);
3528                 }
3529
3530                 spin_unlock(&ctl->tree_lock);
3531                 trim_entry.start = extent_start;
3532                 trim_entry.bytes = extent_bytes;
3533                 list_add_tail(&trim_entry.list, &ctl->trimming_ranges);
3534                 mutex_unlock(&ctl->cache_writeout_mutex);
3535
3536                 ret = do_trimming(block_group, total_trimmed, start, bytes,
3537                                   extent_start, extent_bytes, extent_trim_state,
3538                                   &trim_entry);
3539                 if (ret) {
3540                         block_group->discard_cursor = start + bytes;
3541                         break;
3542                 }
3543 next:
3544                 start += bytes;
3545                 block_group->discard_cursor = start;
3546                 if (async && *total_trimmed)
3547                         break;
3548
3549                 if (fatal_signal_pending(current)) {
3550                         ret = -ERESTARTSYS;
3551                         break;
3552                 }
3553
3554                 cond_resched();
3555         }
3556
3557         return ret;
3558
3559 out_unlock:
3560         block_group->discard_cursor = btrfs_block_group_end(block_group);
3561         spin_unlock(&ctl->tree_lock);
3562         mutex_unlock(&ctl->cache_writeout_mutex);
3563
3564         return ret;
3565 }
3566
3567 /*
3568  * If we break out of trimming a bitmap prematurely, we should reset the
3569  * trimming bit.  In a rather contrieved case, it's possible to race here so
3570  * reset the state to BTRFS_TRIM_STATE_UNTRIMMED.
3571  *
3572  * start = start of bitmap
3573  * end = near end of bitmap
3574  *
3575  * Thread 1:                    Thread 2:
3576  * trim_bitmaps(start)
3577  *                              trim_bitmaps(end)
3578  *                              end_trimming_bitmap()
3579  * reset_trimming_bitmap()
3580  */
3581 static void reset_trimming_bitmap(struct btrfs_free_space_ctl *ctl, u64 offset)
3582 {
3583         struct btrfs_free_space *entry;
3584
3585         spin_lock(&ctl->tree_lock);
3586         entry = tree_search_offset(ctl, offset, 1, 0);
3587         if (entry) {
3588                 if (btrfs_free_space_trimmed(entry)) {
3589                         ctl->discardable_extents[BTRFS_STAT_CURR] +=
3590                                 entry->bitmap_extents;
3591                         ctl->discardable_bytes[BTRFS_STAT_CURR] += entry->bytes;
3592                 }
3593                 entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
3594         }
3595
3596         spin_unlock(&ctl->tree_lock);
3597 }
3598
3599 static void end_trimming_bitmap(struct btrfs_free_space_ctl *ctl,
3600                                 struct btrfs_free_space *entry)
3601 {
3602         if (btrfs_free_space_trimming_bitmap(entry)) {
3603                 entry->trim_state = BTRFS_TRIM_STATE_TRIMMED;
3604                 ctl->discardable_extents[BTRFS_STAT_CURR] -=
3605                         entry->bitmap_extents;
3606                 ctl->discardable_bytes[BTRFS_STAT_CURR] -= entry->bytes;
3607         }
3608 }
3609
3610 /*
3611  * If @async is set, then we will trim 1 region and return.
3612  */
3613 static int trim_bitmaps(struct btrfs_block_group *block_group,
3614                         u64 *total_trimmed, u64 start, u64 end, u64 minlen,
3615                         u64 maxlen, bool async)
3616 {
3617         struct btrfs_discard_ctl *discard_ctl =
3618                                         &block_group->fs_info->discard_ctl;
3619         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3620         struct btrfs_free_space *entry;
3621         int ret = 0;
3622         int ret2;
3623         u64 bytes;
3624         u64 offset = offset_to_bitmap(ctl, start);
3625         const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size);
3626
3627         while (offset < end) {
3628                 bool next_bitmap = false;
3629                 struct btrfs_trim_range trim_entry;
3630
3631                 mutex_lock(&ctl->cache_writeout_mutex);
3632                 spin_lock(&ctl->tree_lock);
3633
3634                 if (ctl->free_space < minlen) {
3635                         block_group->discard_cursor =
3636                                 btrfs_block_group_end(block_group);
3637                         spin_unlock(&ctl->tree_lock);
3638                         mutex_unlock(&ctl->cache_writeout_mutex);
3639                         break;
3640                 }
3641
3642                 entry = tree_search_offset(ctl, offset, 1, 0);
3643                 /*
3644                  * Bitmaps are marked trimmed lossily now to prevent constant
3645                  * discarding of the same bitmap (the reason why we are bound
3646                  * by the filters).  So, retrim the block group bitmaps when we
3647                  * are preparing to punt to the unused_bgs list.  This uses
3648                  * @minlen to determine if we are in BTRFS_DISCARD_INDEX_UNUSED
3649                  * which is the only discard index which sets minlen to 0.
3650                  */
3651                 if (!entry || (async && minlen && start == offset &&
3652                                btrfs_free_space_trimmed(entry))) {
3653                         spin_unlock(&ctl->tree_lock);
3654                         mutex_unlock(&ctl->cache_writeout_mutex);
3655                         next_bitmap = true;
3656                         goto next;
3657                 }
3658
3659                 /*
3660                  * Async discard bitmap trimming begins at by setting the start
3661                  * to be key.objectid and the offset_to_bitmap() aligns to the
3662                  * start of the bitmap.  This lets us know we are fully
3663                  * scanning the bitmap rather than only some portion of it.
3664                  */
3665                 if (start == offset)
3666                         entry->trim_state = BTRFS_TRIM_STATE_TRIMMING;
3667
3668                 bytes = minlen;
3669                 ret2 = search_bitmap(ctl, entry, &start, &bytes, false);
3670                 if (ret2 || start >= end) {
3671                         /*
3672                          * We lossily consider a bitmap trimmed if we only skip
3673                          * over regions <= BTRFS_ASYNC_DISCARD_MIN_FILTER.
3674                          */
3675                         if (ret2 && minlen <= BTRFS_ASYNC_DISCARD_MIN_FILTER)
3676                                 end_trimming_bitmap(ctl, entry);
3677                         else
3678                                 entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
3679                         spin_unlock(&ctl->tree_lock);
3680                         mutex_unlock(&ctl->cache_writeout_mutex);
3681                         next_bitmap = true;
3682                         goto next;
3683                 }
3684
3685                 /*
3686                  * We already trimmed a region, but are using the locking above
3687                  * to reset the trim_state.
3688                  */
3689                 if (async && *total_trimmed) {
3690                         spin_unlock(&ctl->tree_lock);
3691                         mutex_unlock(&ctl->cache_writeout_mutex);
3692                         goto out;
3693                 }
3694
3695                 bytes = min(bytes, end - start);
3696                 if (bytes < minlen || (async && maxlen && bytes > maxlen)) {
3697                         spin_unlock(&ctl->tree_lock);
3698                         mutex_unlock(&ctl->cache_writeout_mutex);
3699                         goto next;
3700                 }
3701
3702                 /*
3703                  * Let bytes = BTRFS_MAX_DISCARD_SIZE + X.
3704                  * If X < @minlen, we won't trim X when we come back around.
3705                  * So trim it now.  We differ here from trimming extents as we
3706                  * don't keep individual state per bit.
3707                  */
3708                 if (async &&
3709                     max_discard_size &&
3710                     bytes > (max_discard_size + minlen))
3711                         bytes = max_discard_size;
3712
3713                 bitmap_clear_bits(ctl, entry, start, bytes);
3714                 if (entry->bytes == 0)
3715                         free_bitmap(ctl, entry);
3716
3717                 spin_unlock(&ctl->tree_lock);
3718                 trim_entry.start = start;
3719                 trim_entry.bytes = bytes;
3720                 list_add_tail(&trim_entry.list, &ctl->trimming_ranges);
3721                 mutex_unlock(&ctl->cache_writeout_mutex);
3722
3723                 ret = do_trimming(block_group, total_trimmed, start, bytes,
3724                                   start, bytes, 0, &trim_entry);
3725                 if (ret) {
3726                         reset_trimming_bitmap(ctl, offset);
3727                         block_group->discard_cursor =
3728                                 btrfs_block_group_end(block_group);
3729                         break;
3730                 }
3731 next:
3732                 if (next_bitmap) {
3733                         offset += BITS_PER_BITMAP * ctl->unit;
3734                         start = offset;
3735                 } else {
3736                         start += bytes;
3737                 }
3738                 block_group->discard_cursor = start;
3739
3740                 if (fatal_signal_pending(current)) {
3741                         if (start != offset)
3742                                 reset_trimming_bitmap(ctl, offset);
3743                         ret = -ERESTARTSYS;
3744                         break;
3745                 }
3746
3747                 cond_resched();
3748         }
3749
3750         if (offset >= end)
3751                 block_group->discard_cursor = end;
3752
3753 out:
3754         return ret;
3755 }
3756
3757 int btrfs_trim_block_group(struct btrfs_block_group *block_group,
3758                            u64 *trimmed, u64 start, u64 end, u64 minlen)
3759 {
3760         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3761         int ret;
3762         u64 rem = 0;
3763
3764         *trimmed = 0;
3765
3766         spin_lock(&block_group->lock);
3767         if (block_group->removed) {
3768                 spin_unlock(&block_group->lock);
3769                 return 0;
3770         }
3771         btrfs_freeze_block_group(block_group);
3772         spin_unlock(&block_group->lock);
3773
3774         ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, false);
3775         if (ret)
3776                 goto out;
3777
3778         ret = trim_bitmaps(block_group, trimmed, start, end, minlen, 0, false);
3779         div64_u64_rem(end, BITS_PER_BITMAP * ctl->unit, &rem);
3780         /* If we ended in the middle of a bitmap, reset the trimming flag */
3781         if (rem)
3782                 reset_trimming_bitmap(ctl, offset_to_bitmap(ctl, end));
3783 out:
3784         btrfs_unfreeze_block_group(block_group);
3785         return ret;
3786 }
3787
3788 int btrfs_trim_block_group_extents(struct btrfs_block_group *block_group,
3789                                    u64 *trimmed, u64 start, u64 end, u64 minlen,
3790                                    bool async)
3791 {
3792         int ret;
3793
3794         *trimmed = 0;
3795
3796         spin_lock(&block_group->lock);
3797         if (block_group->removed) {
3798                 spin_unlock(&block_group->lock);
3799                 return 0;
3800         }
3801         btrfs_freeze_block_group(block_group);
3802         spin_unlock(&block_group->lock);
3803
3804         ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, async);
3805         btrfs_unfreeze_block_group(block_group);
3806
3807         return ret;
3808 }
3809
3810 int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group,
3811                                    u64 *trimmed, u64 start, u64 end, u64 minlen,
3812                                    u64 maxlen, bool async)
3813 {
3814         int ret;
3815
3816         *trimmed = 0;
3817
3818         spin_lock(&block_group->lock);
3819         if (block_group->removed) {
3820                 spin_unlock(&block_group->lock);
3821                 return 0;
3822         }
3823         btrfs_freeze_block_group(block_group);
3824         spin_unlock(&block_group->lock);
3825
3826         ret = trim_bitmaps(block_group, trimmed, start, end, minlen, maxlen,
3827                            async);
3828
3829         btrfs_unfreeze_block_group(block_group);
3830
3831         return ret;
3832 }
3833
3834 /*
3835  * Find the left-most item in the cache tree, and then return the
3836  * smallest inode number in the item.
3837  *
3838  * Note: the returned inode number may not be the smallest one in
3839  * the tree, if the left-most item is a bitmap.
3840  */
3841 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
3842 {
3843         struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl;
3844         struct btrfs_free_space *entry = NULL;
3845         u64 ino = 0;
3846
3847         spin_lock(&ctl->tree_lock);
3848
3849         if (RB_EMPTY_ROOT(&ctl->free_space_offset))
3850                 goto out;
3851
3852         entry = rb_entry(rb_first(&ctl->free_space_offset),
3853                          struct btrfs_free_space, offset_index);
3854
3855         if (!entry->bitmap) {
3856                 ino = entry->offset;
3857
3858                 unlink_free_space(ctl, entry);
3859                 entry->offset++;
3860                 entry->bytes--;
3861                 if (!entry->bytes)
3862                         kmem_cache_free(btrfs_free_space_cachep, entry);
3863                 else
3864                         link_free_space(ctl, entry);
3865         } else {
3866                 u64 offset = 0;
3867                 u64 count = 1;
3868                 int ret;
3869
3870                 ret = search_bitmap(ctl, entry, &offset, &count, true);
3871                 /* Logic error; Should be empty if it can't find anything */
3872                 ASSERT(!ret);
3873
3874                 ino = offset;
3875                 bitmap_clear_bits(ctl, entry, offset, 1);
3876                 if (entry->bytes == 0)
3877                         free_bitmap(ctl, entry);
3878         }
3879 out:
3880         spin_unlock(&ctl->tree_lock);
3881
3882         return ino;
3883 }
3884
3885 struct inode *lookup_free_ino_inode(struct btrfs_root *root,
3886                                     struct btrfs_path *path)
3887 {
3888         struct inode *inode = NULL;
3889
3890         spin_lock(&root->ino_cache_lock);
3891         if (root->ino_cache_inode)
3892                 inode = igrab(root->ino_cache_inode);
3893         spin_unlock(&root->ino_cache_lock);
3894         if (inode)
3895                 return inode;
3896
3897         inode = __lookup_free_space_inode(root, path, 0);
3898         if (IS_ERR(inode))
3899                 return inode;
3900
3901         spin_lock(&root->ino_cache_lock);
3902         if (!btrfs_fs_closing(root->fs_info))
3903                 root->ino_cache_inode = igrab(inode);
3904         spin_unlock(&root->ino_cache_lock);
3905
3906         return inode;
3907 }
3908
3909 int create_free_ino_inode(struct btrfs_root *root,
3910                           struct btrfs_trans_handle *trans,
3911                           struct btrfs_path *path)
3912 {
3913         return __create_free_space_inode(root, trans, path,
3914                                          BTRFS_FREE_INO_OBJECTID, 0);
3915 }
3916
3917 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
3918 {
3919         struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
3920         struct btrfs_path *path;
3921         struct inode *inode;
3922         int ret = 0;
3923         u64 root_gen = btrfs_root_generation(&root->root_item);
3924
3925         if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE))
3926                 return 0;
3927
3928         /*
3929          * If we're unmounting then just return, since this does a search on the
3930          * normal root and not the commit root and we could deadlock.
3931          */
3932         if (btrfs_fs_closing(fs_info))
3933                 return 0;
3934
3935         path = btrfs_alloc_path();
3936         if (!path)
3937                 return 0;
3938
3939         inode = lookup_free_ino_inode(root, path);
3940         if (IS_ERR(inode))
3941                 goto out;
3942
3943         if (root_gen != BTRFS_I(inode)->generation)
3944                 goto out_put;
3945
3946         ret = __load_free_space_cache(root, inode, ctl, path, 0);
3947
3948         if (ret < 0)
3949                 btrfs_err(fs_info,
3950                         "failed to load free ino cache for root %llu",
3951                         root->root_key.objectid);
3952 out_put:
3953         iput(inode);
3954 out:
3955         btrfs_free_path(path);
3956         return ret;
3957 }
3958
3959 int btrfs_write_out_ino_cache(struct btrfs_root *root,
3960                               struct btrfs_trans_handle *trans,
3961                               struct btrfs_path *path,
3962                               struct inode *inode)
3963 {
3964         struct btrfs_fs_info *fs_info = root->fs_info;
3965         struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
3966         int ret;
3967         struct btrfs_io_ctl io_ctl;
3968         bool release_metadata = true;
3969
3970         if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE))
3971                 return 0;
3972
3973         memset(&io_ctl, 0, sizeof(io_ctl));
3974         ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl, trans);
3975         if (!ret) {
3976                 /*
3977                  * At this point writepages() didn't error out, so our metadata
3978                  * reservation is released when the writeback finishes, at
3979                  * inode.c:btrfs_finish_ordered_io(), regardless of it finishing
3980                  * with or without an error.
3981                  */
3982                 release_metadata = false;
3983                 ret = btrfs_wait_cache_io_root(root, trans, &io_ctl, path);
3984         }
3985
3986         if (ret) {
3987                 if (release_metadata)
3988                         btrfs_delalloc_release_metadata(BTRFS_I(inode),
3989                                         inode->i_size, true);
3990                 btrfs_debug(fs_info,
3991                           "failed to write free ino cache for root %llu error %d",
3992                           root->root_key.objectid, ret);
3993         }
3994
3995         return ret;
3996 }
3997
3998 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
3999 /*
4000  * Use this if you need to make a bitmap or extent entry specifically, it
4001  * doesn't do any of the merging that add_free_space does, this acts a lot like
4002  * how the free space cache loading stuff works, so you can get really weird
4003  * configurations.
4004  */
4005 int test_add_free_space_entry(struct btrfs_block_group *cache,
4006                               u64 offset, u64 bytes, bool bitmap)
4007 {
4008         struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
4009         struct btrfs_free_space *info = NULL, *bitmap_info;
4010         void *map = NULL;
4011         enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_TRIMMED;
4012         u64 bytes_added;
4013         int ret;
4014
4015 again:
4016         if (!info) {
4017                 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
4018                 if (!info)
4019                         return -ENOMEM;
4020         }
4021
4022         if (!bitmap) {
4023                 spin_lock(&ctl->tree_lock);
4024                 info->offset = offset;
4025                 info->bytes = bytes;
4026                 info->max_extent_size = 0;
4027                 ret = link_free_space(ctl, info);
4028                 spin_unlock(&ctl->tree_lock);
4029                 if (ret)
4030                         kmem_cache_free(btrfs_free_space_cachep, info);
4031                 return ret;
4032         }
4033
4034         if (!map) {
4035                 map = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep, GFP_NOFS);
4036                 if (!map) {
4037                         kmem_cache_free(btrfs_free_space_cachep, info);
4038                         return -ENOMEM;
4039                 }
4040         }
4041
4042         spin_lock(&ctl->tree_lock);
4043         bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
4044                                          1, 0);
4045         if (!bitmap_info) {
4046                 info->bitmap = map;
4047                 map = NULL;
4048                 add_new_bitmap(ctl, info, offset);
4049                 bitmap_info = info;
4050                 info = NULL;
4051         }
4052
4053         bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes,
4054                                           trim_state);
4055
4056         bytes -= bytes_added;
4057         offset += bytes_added;
4058         spin_unlock(&ctl->tree_lock);
4059
4060         if (bytes)
4061                 goto again;
4062
4063         if (info)
4064                 kmem_cache_free(btrfs_free_space_cachep, info);
4065         if (map)
4066                 kmem_cache_free(btrfs_free_space_bitmap_cachep, map);
4067         return 0;
4068 }
4069
4070 /*
4071  * Checks to see if the given range is in the free space cache.  This is really
4072  * just used to check the absence of space, so if there is free space in the
4073  * range at all we will return 1.
4074  */
4075 int test_check_exists(struct btrfs_block_group *cache,
4076                       u64 offset, u64 bytes)
4077 {
4078         struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
4079         struct btrfs_free_space *info;
4080         int ret = 0;
4081
4082         spin_lock(&ctl->tree_lock);
4083         info = tree_search_offset(ctl, offset, 0, 0);
4084         if (!info) {
4085                 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
4086                                           1, 0);
4087                 if (!info)
4088                         goto out;
4089         }
4090
4091 have_info:
4092         if (info->bitmap) {
4093                 u64 bit_off, bit_bytes;
4094                 struct rb_node *n;
4095                 struct btrfs_free_space *tmp;
4096
4097                 bit_off = offset;
4098                 bit_bytes = ctl->unit;
4099                 ret = search_bitmap(ctl, info, &bit_off, &bit_bytes, false);
4100                 if (!ret) {
4101                         if (bit_off == offset) {
4102                                 ret = 1;
4103                                 goto out;
4104                         } else if (bit_off > offset &&
4105                                    offset + bytes > bit_off) {
4106                                 ret = 1;
4107                                 goto out;
4108                         }
4109                 }
4110
4111                 n = rb_prev(&info->offset_index);
4112                 while (n) {
4113                         tmp = rb_entry(n, struct btrfs_free_space,
4114                                        offset_index);
4115                         if (tmp->offset + tmp->bytes < offset)
4116                                 break;
4117                         if (offset + bytes < tmp->offset) {
4118                                 n = rb_prev(&tmp->offset_index);
4119                                 continue;
4120                         }
4121                         info = tmp;
4122                         goto have_info;
4123                 }
4124
4125                 n = rb_next(&info->offset_index);
4126                 while (n) {
4127                         tmp = rb_entry(n, struct btrfs_free_space,
4128                                        offset_index);
4129                         if (offset + bytes < tmp->offset)
4130                                 break;
4131                         if (tmp->offset + tmp->bytes < offset) {
4132                                 n = rb_next(&tmp->offset_index);
4133                                 continue;
4134                         }
4135                         info = tmp;
4136                         goto have_info;
4137                 }
4138
4139                 ret = 0;
4140                 goto out;
4141         }
4142
4143         if (info->offset == offset) {
4144                 ret = 1;
4145                 goto out;
4146         }
4147
4148         if (offset > info->offset && offset < info->offset + info->bytes)
4149                 ret = 1;
4150 out:
4151         spin_unlock(&ctl->tree_lock);
4152         return ret;
4153 }
4154 #endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */