Merge tag 'for-linus-4.15-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-microblaze.git] / fs / btrfs / ref-verify.c
1 /*
2  * Copyright (C) 2014 Facebook.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include <linux/stacktrace.h>
21 #include "ctree.h"
22 #include "disk-io.h"
23 #include "locking.h"
24 #include "delayed-ref.h"
25 #include "ref-verify.h"
26
27 /*
28  * Used to keep track the roots and number of refs each root has for a given
29  * bytenr.  This just tracks the number of direct references, no shared
30  * references.
31  */
32 struct root_entry {
33         u64 root_objectid;
34         u64 num_refs;
35         struct rb_node node;
36 };
37
38 /*
39  * These are meant to represent what should exist in the extent tree, these can
40  * be used to verify the extent tree is consistent as these should all match
41  * what the extent tree says.
42  */
43 struct ref_entry {
44         u64 root_objectid;
45         u64 parent;
46         u64 owner;
47         u64 offset;
48         u64 num_refs;
49         struct rb_node node;
50 };
51
52 #define MAX_TRACE       16
53
54 /*
55  * Whenever we add/remove a reference we record the action.  The action maps
56  * back to the delayed ref action.  We hold the ref we are changing in the
57  * action so we can account for the history properly, and we record the root we
58  * were called with since it could be different from ref_root.  We also store
59  * stack traces because thats how I roll.
60  */
61 struct ref_action {
62         int action;
63         u64 root;
64         struct ref_entry ref;
65         struct list_head list;
66         unsigned long trace[MAX_TRACE];
67         unsigned int trace_len;
68 };
69
70 /*
71  * One of these for every block we reference, it holds the roots and references
72  * to it as well as all of the ref actions that have occured to it.  We never
73  * free it until we unmount the file system in order to make sure re-allocations
74  * are happening properly.
75  */
76 struct block_entry {
77         u64 bytenr;
78         u64 len;
79         u64 num_refs;
80         int metadata;
81         int from_disk;
82         struct rb_root roots;
83         struct rb_root refs;
84         struct rb_node node;
85         struct list_head actions;
86 };
87
88 static struct block_entry *insert_block_entry(struct rb_root *root,
89                                               struct block_entry *be)
90 {
91         struct rb_node **p = &root->rb_node;
92         struct rb_node *parent_node = NULL;
93         struct block_entry *entry;
94
95         while (*p) {
96                 parent_node = *p;
97                 entry = rb_entry(parent_node, struct block_entry, node);
98                 if (entry->bytenr > be->bytenr)
99                         p = &(*p)->rb_left;
100                 else if (entry->bytenr < be->bytenr)
101                         p = &(*p)->rb_right;
102                 else
103                         return entry;
104         }
105
106         rb_link_node(&be->node, parent_node, p);
107         rb_insert_color(&be->node, root);
108         return NULL;
109 }
110
111 static struct block_entry *lookup_block_entry(struct rb_root *root, u64 bytenr)
112 {
113         struct rb_node *n;
114         struct block_entry *entry = NULL;
115
116         n = root->rb_node;
117         while (n) {
118                 entry = rb_entry(n, struct block_entry, node);
119                 if (entry->bytenr < bytenr)
120                         n = n->rb_right;
121                 else if (entry->bytenr > bytenr)
122                         n = n->rb_left;
123                 else
124                         return entry;
125         }
126         return NULL;
127 }
128
129 static struct root_entry *insert_root_entry(struct rb_root *root,
130                                             struct root_entry *re)
131 {
132         struct rb_node **p = &root->rb_node;
133         struct rb_node *parent_node = NULL;
134         struct root_entry *entry;
135
136         while (*p) {
137                 parent_node = *p;
138                 entry = rb_entry(parent_node, struct root_entry, node);
139                 if (entry->root_objectid > re->root_objectid)
140                         p = &(*p)->rb_left;
141                 else if (entry->root_objectid < re->root_objectid)
142                         p = &(*p)->rb_right;
143                 else
144                         return entry;
145         }
146
147         rb_link_node(&re->node, parent_node, p);
148         rb_insert_color(&re->node, root);
149         return NULL;
150
151 }
152
153 static int comp_refs(struct ref_entry *ref1, struct ref_entry *ref2)
154 {
155         if (ref1->root_objectid < ref2->root_objectid)
156                 return -1;
157         if (ref1->root_objectid > ref2->root_objectid)
158                 return 1;
159         if (ref1->parent < ref2->parent)
160                 return -1;
161         if (ref1->parent > ref2->parent)
162                 return 1;
163         if (ref1->owner < ref2->owner)
164                 return -1;
165         if (ref1->owner > ref2->owner)
166                 return 1;
167         if (ref1->offset < ref2->offset)
168                 return -1;
169         if (ref1->offset > ref2->offset)
170                 return 1;
171         return 0;
172 }
173
174 static struct ref_entry *insert_ref_entry(struct rb_root *root,
175                                           struct ref_entry *ref)
176 {
177         struct rb_node **p = &root->rb_node;
178         struct rb_node *parent_node = NULL;
179         struct ref_entry *entry;
180         int cmp;
181
182         while (*p) {
183                 parent_node = *p;
184                 entry = rb_entry(parent_node, struct ref_entry, node);
185                 cmp = comp_refs(entry, ref);
186                 if (cmp > 0)
187                         p = &(*p)->rb_left;
188                 else if (cmp < 0)
189                         p = &(*p)->rb_right;
190                 else
191                         return entry;
192         }
193
194         rb_link_node(&ref->node, parent_node, p);
195         rb_insert_color(&ref->node, root);
196         return NULL;
197
198 }
199
200 static struct root_entry *lookup_root_entry(struct rb_root *root, u64 objectid)
201 {
202         struct rb_node *n;
203         struct root_entry *entry = NULL;
204
205         n = root->rb_node;
206         while (n) {
207                 entry = rb_entry(n, struct root_entry, node);
208                 if (entry->root_objectid < objectid)
209                         n = n->rb_right;
210                 else if (entry->root_objectid > objectid)
211                         n = n->rb_left;
212                 else
213                         return entry;
214         }
215         return NULL;
216 }
217
218 #ifdef CONFIG_STACKTRACE
219 static void __save_stack_trace(struct ref_action *ra)
220 {
221         struct stack_trace stack_trace;
222
223         stack_trace.max_entries = MAX_TRACE;
224         stack_trace.nr_entries = 0;
225         stack_trace.entries = ra->trace;
226         stack_trace.skip = 2;
227         save_stack_trace(&stack_trace);
228         ra->trace_len = stack_trace.nr_entries;
229 }
230
231 static void __print_stack_trace(struct btrfs_fs_info *fs_info,
232                                 struct ref_action *ra)
233 {
234         struct stack_trace trace;
235
236         if (ra->trace_len == 0) {
237                 btrfs_err(fs_info, "  ref-verify: no stacktrace");
238                 return;
239         }
240         trace.nr_entries = ra->trace_len;
241         trace.entries = ra->trace;
242         print_stack_trace(&trace, 2);
243 }
244 #else
245 static void inline __save_stack_trace(struct ref_action *ra)
246 {
247 }
248
249 static void inline __print_stack_trace(struct btrfs_fs_info *fs_info,
250                                        struct ref_action *ra)
251 {
252         btrfs_err(fs_info, "  ref-verify: no stacktrace support");
253 }
254 #endif
255
256 static void free_block_entry(struct block_entry *be)
257 {
258         struct root_entry *re;
259         struct ref_entry *ref;
260         struct ref_action *ra;
261         struct rb_node *n;
262
263         while ((n = rb_first(&be->roots))) {
264                 re = rb_entry(n, struct root_entry, node);
265                 rb_erase(&re->node, &be->roots);
266                 kfree(re);
267         }
268
269         while((n = rb_first(&be->refs))) {
270                 ref = rb_entry(n, struct ref_entry, node);
271                 rb_erase(&ref->node, &be->refs);
272                 kfree(ref);
273         }
274
275         while (!list_empty(&be->actions)) {
276                 ra = list_first_entry(&be->actions, struct ref_action,
277                                       list);
278                 list_del(&ra->list);
279                 kfree(ra);
280         }
281         kfree(be);
282 }
283
284 static struct block_entry *add_block_entry(struct btrfs_fs_info *fs_info,
285                                            u64 bytenr, u64 len,
286                                            u64 root_objectid)
287 {
288         struct block_entry *be = NULL, *exist;
289         struct root_entry *re = NULL;
290
291         re = kzalloc(sizeof(struct root_entry), GFP_KERNEL);
292         be = kzalloc(sizeof(struct block_entry), GFP_KERNEL);
293         if (!be || !re) {
294                 kfree(re);
295                 kfree(be);
296                 return ERR_PTR(-ENOMEM);
297         }
298         be->bytenr = bytenr;
299         be->len = len;
300
301         re->root_objectid = root_objectid;
302         re->num_refs = 0;
303
304         spin_lock(&fs_info->ref_verify_lock);
305         exist = insert_block_entry(&fs_info->block_tree, be);
306         if (exist) {
307                 if (root_objectid) {
308                         struct root_entry *exist_re;
309
310                         exist_re = insert_root_entry(&exist->roots, re);
311                         if (exist_re)
312                                 kfree(re);
313                 }
314                 kfree(be);
315                 return exist;
316         }
317
318         be->num_refs = 0;
319         be->metadata = 0;
320         be->from_disk = 0;
321         be->roots = RB_ROOT;
322         be->refs = RB_ROOT;
323         INIT_LIST_HEAD(&be->actions);
324         if (root_objectid)
325                 insert_root_entry(&be->roots, re);
326         else
327                 kfree(re);
328         return be;
329 }
330
331 static int add_tree_block(struct btrfs_fs_info *fs_info, u64 ref_root,
332                           u64 parent, u64 bytenr, int level)
333 {
334         struct block_entry *be;
335         struct root_entry *re;
336         struct ref_entry *ref = NULL, *exist;
337
338         ref = kmalloc(sizeof(struct ref_entry), GFP_KERNEL);
339         if (!ref)
340                 return -ENOMEM;
341
342         if (parent)
343                 ref->root_objectid = 0;
344         else
345                 ref->root_objectid = ref_root;
346         ref->parent = parent;
347         ref->owner = level;
348         ref->offset = 0;
349         ref->num_refs = 1;
350
351         be = add_block_entry(fs_info, bytenr, fs_info->nodesize, ref_root);
352         if (IS_ERR(be)) {
353                 kfree(ref);
354                 return PTR_ERR(be);
355         }
356         be->num_refs++;
357         be->from_disk = 1;
358         be->metadata = 1;
359
360         if (!parent) {
361                 ASSERT(ref_root);
362                 re = lookup_root_entry(&be->roots, ref_root);
363                 ASSERT(re);
364                 re->num_refs++;
365         }
366         exist = insert_ref_entry(&be->refs, ref);
367         if (exist) {
368                 exist->num_refs++;
369                 kfree(ref);
370         }
371         spin_unlock(&fs_info->ref_verify_lock);
372
373         return 0;
374 }
375
376 static int add_shared_data_ref(struct btrfs_fs_info *fs_info,
377                                u64 parent, u32 num_refs, u64 bytenr,
378                                u64 num_bytes)
379 {
380         struct block_entry *be;
381         struct ref_entry *ref;
382
383         ref = kzalloc(sizeof(struct ref_entry), GFP_KERNEL);
384         if (!ref)
385                 return -ENOMEM;
386         be = add_block_entry(fs_info, bytenr, num_bytes, 0);
387         if (IS_ERR(be)) {
388                 kfree(ref);
389                 return PTR_ERR(be);
390         }
391         be->num_refs += num_refs;
392
393         ref->parent = parent;
394         ref->num_refs = num_refs;
395         if (insert_ref_entry(&be->refs, ref)) {
396                 spin_unlock(&fs_info->ref_verify_lock);
397                 btrfs_err(fs_info, "existing shared ref when reading from disk?");
398                 kfree(ref);
399                 return -EINVAL;
400         }
401         spin_unlock(&fs_info->ref_verify_lock);
402         return 0;
403 }
404
405 static int add_extent_data_ref(struct btrfs_fs_info *fs_info,
406                                struct extent_buffer *leaf,
407                                struct btrfs_extent_data_ref *dref,
408                                u64 bytenr, u64 num_bytes)
409 {
410         struct block_entry *be;
411         struct ref_entry *ref;
412         struct root_entry *re;
413         u64 ref_root = btrfs_extent_data_ref_root(leaf, dref);
414         u64 owner = btrfs_extent_data_ref_objectid(leaf, dref);
415         u64 offset = btrfs_extent_data_ref_offset(leaf, dref);
416         u32 num_refs = btrfs_extent_data_ref_count(leaf, dref);
417
418         ref = kzalloc(sizeof(struct ref_entry), GFP_KERNEL);
419         if (!ref)
420                 return -ENOMEM;
421         be = add_block_entry(fs_info, bytenr, num_bytes, ref_root);
422         if (IS_ERR(be)) {
423                 kfree(ref);
424                 return PTR_ERR(be);
425         }
426         be->num_refs += num_refs;
427
428         ref->parent = 0;
429         ref->owner = owner;
430         ref->root_objectid = ref_root;
431         ref->offset = offset;
432         ref->num_refs = num_refs;
433         if (insert_ref_entry(&be->refs, ref)) {
434                 spin_unlock(&fs_info->ref_verify_lock);
435                 btrfs_err(fs_info, "existing ref when reading from disk?");
436                 kfree(ref);
437                 return -EINVAL;
438         }
439
440         re = lookup_root_entry(&be->roots, ref_root);
441         if (!re) {
442                 spin_unlock(&fs_info->ref_verify_lock);
443                 btrfs_err(fs_info, "missing root in new block entry?");
444                 return -EINVAL;
445         }
446         re->num_refs += num_refs;
447         spin_unlock(&fs_info->ref_verify_lock);
448         return 0;
449 }
450
451 static int process_extent_item(struct btrfs_fs_info *fs_info,
452                                struct btrfs_path *path, struct btrfs_key *key,
453                                int slot, int *tree_block_level)
454 {
455         struct btrfs_extent_item *ei;
456         struct btrfs_extent_inline_ref *iref;
457         struct btrfs_extent_data_ref *dref;
458         struct btrfs_shared_data_ref *sref;
459         struct extent_buffer *leaf = path->nodes[0];
460         u32 item_size = btrfs_item_size_nr(leaf, slot);
461         unsigned long end, ptr;
462         u64 offset, flags, count;
463         int type, ret;
464
465         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
466         flags = btrfs_extent_flags(leaf, ei);
467
468         if ((key->type == BTRFS_EXTENT_ITEM_KEY) &&
469             flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
470                 struct btrfs_tree_block_info *info;
471
472                 info = (struct btrfs_tree_block_info *)(ei + 1);
473                 *tree_block_level = btrfs_tree_block_level(leaf, info);
474                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
475         } else {
476                 if (key->type == BTRFS_METADATA_ITEM_KEY)
477                         *tree_block_level = key->offset;
478                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
479         }
480
481         ptr = (unsigned long)iref;
482         end = (unsigned long)ei + item_size;
483         while (ptr < end) {
484                 iref = (struct btrfs_extent_inline_ref *)ptr;
485                 type = btrfs_extent_inline_ref_type(leaf, iref);
486                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
487                 switch (type) {
488                 case BTRFS_TREE_BLOCK_REF_KEY:
489                         ret = add_tree_block(fs_info, offset, 0, key->objectid,
490                                              *tree_block_level);
491                         break;
492                 case BTRFS_SHARED_BLOCK_REF_KEY:
493                         ret = add_tree_block(fs_info, 0, offset, key->objectid,
494                                              *tree_block_level);
495                         break;
496                 case BTRFS_EXTENT_DATA_REF_KEY:
497                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
498                         ret = add_extent_data_ref(fs_info, leaf, dref,
499                                                   key->objectid, key->offset);
500                         break;
501                 case BTRFS_SHARED_DATA_REF_KEY:
502                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
503                         count = btrfs_shared_data_ref_count(leaf, sref);
504                         ret = add_shared_data_ref(fs_info, offset, count,
505                                                   key->objectid, key->offset);
506                         break;
507                 default:
508                         btrfs_err(fs_info, "invalid key type in iref");
509                         ret = -EINVAL;
510                         break;
511                 }
512                 if (ret)
513                         break;
514                 ptr += btrfs_extent_inline_ref_size(type);
515         }
516         return ret;
517 }
518
519 static int process_leaf(struct btrfs_root *root,
520                         struct btrfs_path *path, u64 *bytenr, u64 *num_bytes)
521 {
522         struct btrfs_fs_info *fs_info = root->fs_info;
523         struct extent_buffer *leaf = path->nodes[0];
524         struct btrfs_extent_data_ref *dref;
525         struct btrfs_shared_data_ref *sref;
526         u32 count;
527         int i = 0, tree_block_level = 0, ret;
528         struct btrfs_key key;
529         int nritems = btrfs_header_nritems(leaf);
530
531         for (i = 0; i < nritems; i++) {
532                 btrfs_item_key_to_cpu(leaf, &key, i);
533                 switch (key.type) {
534                 case BTRFS_EXTENT_ITEM_KEY:
535                         *num_bytes = key.offset;
536                 case BTRFS_METADATA_ITEM_KEY:
537                         *bytenr = key.objectid;
538                         ret = process_extent_item(fs_info, path, &key, i,
539                                                   &tree_block_level);
540                         break;
541                 case BTRFS_TREE_BLOCK_REF_KEY:
542                         ret = add_tree_block(fs_info, key.offset, 0,
543                                              key.objectid, tree_block_level);
544                         break;
545                 case BTRFS_SHARED_BLOCK_REF_KEY:
546                         ret = add_tree_block(fs_info, 0, key.offset,
547                                              key.objectid, tree_block_level);
548                         break;
549                 case BTRFS_EXTENT_DATA_REF_KEY:
550                         dref = btrfs_item_ptr(leaf, i,
551                                               struct btrfs_extent_data_ref);
552                         ret = add_extent_data_ref(fs_info, leaf, dref, *bytenr,
553                                                   *num_bytes);
554                         break;
555                 case BTRFS_SHARED_DATA_REF_KEY:
556                         sref = btrfs_item_ptr(leaf, i,
557                                               struct btrfs_shared_data_ref);
558                         count = btrfs_shared_data_ref_count(leaf, sref);
559                         ret = add_shared_data_ref(fs_info, key.offset, count,
560                                                   *bytenr, *num_bytes);
561                         break;
562                 default:
563                         break;
564                 }
565                 if (ret)
566                         break;
567         }
568         return ret;
569 }
570
571 /* Walk down to the leaf from the given level */
572 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
573                           int level, u64 *bytenr, u64 *num_bytes)
574 {
575         struct btrfs_fs_info *fs_info = root->fs_info;
576         struct extent_buffer *eb;
577         u64 block_bytenr, gen;
578         int ret = 0;
579
580         while (level >= 0) {
581                 if (level) {
582                         block_bytenr = btrfs_node_blockptr(path->nodes[level],
583                                                            path->slots[level]);
584                         gen = btrfs_node_ptr_generation(path->nodes[level],
585                                                         path->slots[level]);
586                         eb = read_tree_block(fs_info, block_bytenr, gen);
587                         if (IS_ERR(eb))
588                                 return PTR_ERR(eb);
589                         if (!extent_buffer_uptodate(eb)) {
590                                 free_extent_buffer(eb);
591                                 return -EIO;
592                         }
593                         btrfs_tree_read_lock(eb);
594                         btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
595                         path->nodes[level-1] = eb;
596                         path->slots[level-1] = 0;
597                         path->locks[level-1] = BTRFS_READ_LOCK_BLOCKING;
598                 } else {
599                         ret = process_leaf(root, path, bytenr, num_bytes);
600                         if (ret)
601                                 break;
602                 }
603                 level--;
604         }
605         return ret;
606 }
607
608 /* Walk up to the next node that needs to be processed */
609 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
610                         int *level)
611 {
612         int l;
613
614         for (l = 0; l < BTRFS_MAX_LEVEL; l++) {
615                 if (!path->nodes[l])
616                         continue;
617                 if (l) {
618                         path->slots[l]++;
619                         if (path->slots[l] <
620                             btrfs_header_nritems(path->nodes[l])) {
621                                 *level = l;
622                                 return 0;
623                         }
624                 }
625                 btrfs_tree_unlock_rw(path->nodes[l], path->locks[l]);
626                 free_extent_buffer(path->nodes[l]);
627                 path->nodes[l] = NULL;
628                 path->slots[l] = 0;
629                 path->locks[l] = 0;
630         }
631
632         return 1;
633 }
634
635 static void dump_ref_action(struct btrfs_fs_info *fs_info,
636                             struct ref_action *ra)
637 {
638         btrfs_err(fs_info,
639 "  Ref action %d, root %llu, ref_root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu",
640                   ra->action, ra->root, ra->ref.root_objectid, ra->ref.parent,
641                   ra->ref.owner, ra->ref.offset, ra->ref.num_refs);
642         __print_stack_trace(fs_info, ra);
643 }
644
645 /*
646  * Dumps all the information from the block entry to printk, it's going to be
647  * awesome.
648  */
649 static void dump_block_entry(struct btrfs_fs_info *fs_info,
650                              struct block_entry *be)
651 {
652         struct ref_entry *ref;
653         struct root_entry *re;
654         struct ref_action *ra;
655         struct rb_node *n;
656
657         btrfs_err(fs_info,
658 "dumping block entry [%llu %llu], num_refs %llu, metadata %d, from disk %d",
659                   be->bytenr, be->len, be->num_refs, be->metadata,
660                   be->from_disk);
661
662         for (n = rb_first(&be->refs); n; n = rb_next(n)) {
663                 ref = rb_entry(n, struct ref_entry, node);
664                 btrfs_err(fs_info,
665 "  ref root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu",
666                           ref->root_objectid, ref->parent, ref->owner,
667                           ref->offset, ref->num_refs);
668         }
669
670         for (n = rb_first(&be->roots); n; n = rb_next(n)) {
671                 re = rb_entry(n, struct root_entry, node);
672                 btrfs_err(fs_info, "  root entry %llu, num_refs %llu",
673                           re->root_objectid, re->num_refs);
674         }
675
676         list_for_each_entry(ra, &be->actions, list)
677                 dump_ref_action(fs_info, ra);
678 }
679
680 /*
681  * btrfs_ref_tree_mod: called when we modify a ref for a bytenr
682  * @root: the root we are making this modification from.
683  * @bytenr: the bytenr we are modifying.
684  * @num_bytes: number of bytes.
685  * @parent: the parent bytenr.
686  * @ref_root: the original root owner of the bytenr.
687  * @owner: level in the case of metadata, inode in the case of data.
688  * @offset: 0 for metadata, file offset for data.
689  * @action: the action that we are doing, this is the same as the delayed ref
690  *      action.
691  *
692  * This will add an action item to the given bytenr and do sanity checks to make
693  * sure we haven't messed something up.  If we are making a new allocation and
694  * this block entry has history we will delete all previous actions as long as
695  * our sanity checks pass as they are no longer needed.
696  */
697 int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
698                        u64 parent, u64 ref_root, u64 owner, u64 offset,
699                        int action)
700 {
701         struct btrfs_fs_info *fs_info = root->fs_info;
702         struct ref_entry *ref = NULL, *exist;
703         struct ref_action *ra = NULL;
704         struct block_entry *be = NULL;
705         struct root_entry *re = NULL;
706         int ret = 0;
707         bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
708
709         if (!btrfs_test_opt(root->fs_info, REF_VERIFY))
710                 return 0;
711
712         ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS);
713         ra = kmalloc(sizeof(struct ref_action), GFP_NOFS);
714         if (!ra || !ref) {
715                 kfree(ref);
716                 kfree(ra);
717                 ret = -ENOMEM;
718                 goto out;
719         }
720
721         if (parent) {
722                 ref->parent = parent;
723         } else {
724                 ref->root_objectid = ref_root;
725                 ref->owner = owner;
726                 ref->offset = offset;
727         }
728         ref->num_refs = (action == BTRFS_DROP_DELAYED_REF) ? -1 : 1;
729
730         memcpy(&ra->ref, ref, sizeof(struct ref_entry));
731         /*
732          * Save the extra info from the delayed ref in the ref action to make it
733          * easier to figure out what is happening.  The real ref's we add to the
734          * ref tree need to reflect what we save on disk so it matches any
735          * on-disk refs we pre-loaded.
736          */
737         ra->ref.owner = owner;
738         ra->ref.offset = offset;
739         ra->ref.root_objectid = ref_root;
740         __save_stack_trace(ra);
741
742         INIT_LIST_HEAD(&ra->list);
743         ra->action = action;
744         ra->root = root->objectid;
745
746         /*
747          * This is an allocation, preallocate the block_entry in case we haven't
748          * used it before.
749          */
750         ret = -EINVAL;
751         if (action == BTRFS_ADD_DELAYED_EXTENT) {
752                 /*
753                  * For subvol_create we'll just pass in whatever the parent root
754                  * is and the new root objectid, so let's not treat the passed
755                  * in root as if it really has a ref for this bytenr.
756                  */
757                 be = add_block_entry(root->fs_info, bytenr, num_bytes, ref_root);
758                 if (IS_ERR(be)) {
759                         kfree(ra);
760                         ret = PTR_ERR(be);
761                         goto out;
762                 }
763                 be->num_refs++;
764                 if (metadata)
765                         be->metadata = 1;
766
767                 if (be->num_refs != 1) {
768                         btrfs_err(fs_info,
769                         "re-allocated a block that still has references to it!");
770                         dump_block_entry(fs_info, be);
771                         dump_ref_action(fs_info, ra);
772                         goto out_unlock;
773                 }
774
775                 while (!list_empty(&be->actions)) {
776                         struct ref_action *tmp;
777
778                         tmp = list_first_entry(&be->actions, struct ref_action,
779                                                list);
780                         list_del(&tmp->list);
781                         kfree(tmp);
782                 }
783         } else {
784                 struct root_entry *tmp;
785
786                 if (!parent) {
787                         re = kmalloc(sizeof(struct root_entry), GFP_NOFS);
788                         if (!re) {
789                                 kfree(ref);
790                                 kfree(ra);
791                                 ret = -ENOMEM;
792                                 goto out;
793                         }
794                         /*
795                          * This is the root that is modifying us, so it's the
796                          * one we want to lookup below when we modify the
797                          * re->num_refs.
798                          */
799                         ref_root = root->objectid;
800                         re->root_objectid = root->objectid;
801                         re->num_refs = 0;
802                 }
803
804                 spin_lock(&root->fs_info->ref_verify_lock);
805                 be = lookup_block_entry(&root->fs_info->block_tree, bytenr);
806                 if (!be) {
807                         btrfs_err(fs_info,
808 "trying to do action %d to bytenr %llu num_bytes %llu but there is no existing entry!",
809                                   action, (unsigned long long)bytenr,
810                                   (unsigned long long)num_bytes);
811                         dump_ref_action(fs_info, ra);
812                         kfree(ref);
813                         kfree(ra);
814                         goto out_unlock;
815                 }
816
817                 if (!parent) {
818                         tmp = insert_root_entry(&be->roots, re);
819                         if (tmp) {
820                                 kfree(re);
821                                 re = tmp;
822                         }
823                 }
824         }
825
826         exist = insert_ref_entry(&be->refs, ref);
827         if (exist) {
828                 if (action == BTRFS_DROP_DELAYED_REF) {
829                         if (exist->num_refs == 0) {
830                                 btrfs_err(fs_info,
831 "dropping a ref for a existing root that doesn't have a ref on the block");
832                                 dump_block_entry(fs_info, be);
833                                 dump_ref_action(fs_info, ra);
834                                 kfree(ra);
835                                 goto out_unlock;
836                         }
837                         exist->num_refs--;
838                         if (exist->num_refs == 0) {
839                                 rb_erase(&exist->node, &be->refs);
840                                 kfree(exist);
841                         }
842                 } else if (!be->metadata) {
843                         exist->num_refs++;
844                 } else {
845                         btrfs_err(fs_info,
846 "attempting to add another ref for an existing ref on a tree block");
847                         dump_block_entry(fs_info, be);
848                         dump_ref_action(fs_info, ra);
849                         kfree(ra);
850                         goto out_unlock;
851                 }
852                 kfree(ref);
853         } else {
854                 if (action == BTRFS_DROP_DELAYED_REF) {
855                         btrfs_err(fs_info,
856 "dropping a ref for a root that doesn't have a ref on the block");
857                         dump_block_entry(fs_info, be);
858                         dump_ref_action(fs_info, ra);
859                         kfree(ra);
860                         goto out_unlock;
861                 }
862         }
863
864         if (!parent && !re) {
865                 re = lookup_root_entry(&be->roots, ref_root);
866                 if (!re) {
867                         /*
868                          * This shouldn't happen because we will add our re
869                          * above when we lookup the be with !parent, but just in
870                          * case catch this case so we don't panic because I
871                          * didn't thik of some other corner case.
872                          */
873                         btrfs_err(fs_info, "failed to find root %llu for %llu",
874                                   root->objectid, be->bytenr);
875                         dump_block_entry(fs_info, be);
876                         dump_ref_action(fs_info, ra);
877                         kfree(ra);
878                         goto out_unlock;
879                 }
880         }
881         if (action == BTRFS_DROP_DELAYED_REF) {
882                 if (re)
883                         re->num_refs--;
884                 be->num_refs--;
885         } else if (action == BTRFS_ADD_DELAYED_REF) {
886                 be->num_refs++;
887                 if (re)
888                         re->num_refs++;
889         }
890         list_add_tail(&ra->list, &be->actions);
891         ret = 0;
892 out_unlock:
893         spin_unlock(&root->fs_info->ref_verify_lock);
894 out:
895         if (ret)
896                 btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
897         return ret;
898 }
899
900 /* Free up the ref cache */
901 void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info)
902 {
903         struct block_entry *be;
904         struct rb_node *n;
905
906         if (!btrfs_test_opt(fs_info, REF_VERIFY))
907                 return;
908
909         spin_lock(&fs_info->ref_verify_lock);
910         while ((n = rb_first(&fs_info->block_tree))) {
911                 be = rb_entry(n, struct block_entry, node);
912                 rb_erase(&be->node, &fs_info->block_tree);
913                 free_block_entry(be);
914                 cond_resched_lock(&fs_info->ref_verify_lock);
915         }
916         spin_unlock(&fs_info->ref_verify_lock);
917 }
918
919 void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start,
920                                u64 len)
921 {
922         struct block_entry *be = NULL, *entry;
923         struct rb_node *n;
924
925         if (!btrfs_test_opt(fs_info, REF_VERIFY))
926                 return;
927
928         spin_lock(&fs_info->ref_verify_lock);
929         n = fs_info->block_tree.rb_node;
930         while (n) {
931                 entry = rb_entry(n, struct block_entry, node);
932                 if (entry->bytenr < start) {
933                         n = n->rb_right;
934                 } else if (entry->bytenr > start) {
935                         n = n->rb_left;
936                 } else {
937                         be = entry;
938                         break;
939                 }
940                 /* We want to get as close to start as possible */
941                 if (be == NULL ||
942                     (entry->bytenr < start && be->bytenr > start) ||
943                     (entry->bytenr < start && entry->bytenr > be->bytenr))
944                         be = entry;
945         }
946
947         /*
948          * Could have an empty block group, maybe have something to check for
949          * this case to verify we were actually empty?
950          */
951         if (!be) {
952                 spin_unlock(&fs_info->ref_verify_lock);
953                 return;
954         }
955
956         n = &be->node;
957         while (n) {
958                 be = rb_entry(n, struct block_entry, node);
959                 n = rb_next(n);
960                 if (be->bytenr < start && be->bytenr + be->len > start) {
961                         btrfs_err(fs_info,
962                                 "block entry overlaps a block group [%llu,%llu]!",
963                                 start, len);
964                         dump_block_entry(fs_info, be);
965                         continue;
966                 }
967                 if (be->bytenr < start)
968                         continue;
969                 if (be->bytenr >= start + len)
970                         break;
971                 if (be->bytenr + be->len > start + len) {
972                         btrfs_err(fs_info,
973                                 "block entry overlaps a block group [%llu,%llu]!",
974                                 start, len);
975                         dump_block_entry(fs_info, be);
976                 }
977                 rb_erase(&be->node, &fs_info->block_tree);
978                 free_block_entry(be);
979         }
980         spin_unlock(&fs_info->ref_verify_lock);
981 }
982
983 /* Walk down all roots and build the ref tree, meant to be called at mount */
984 int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info)
985 {
986         struct btrfs_path *path;
987         struct btrfs_root *root;
988         struct extent_buffer *eb;
989         u64 bytenr = 0, num_bytes = 0;
990         int ret, level;
991
992         if (!btrfs_test_opt(fs_info, REF_VERIFY))
993                 return 0;
994
995         path = btrfs_alloc_path();
996         if (!path)
997                 return -ENOMEM;
998
999         eb = btrfs_read_lock_root_node(fs_info->extent_root);
1000         btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
1001         level = btrfs_header_level(eb);
1002         path->nodes[level] = eb;
1003         path->slots[level] = 0;
1004         path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
1005
1006         while (1) {
1007                 /*
1008                  * We have to keep track of the bytenr/num_bytes we last hit
1009                  * because we could have run out of space for an inline ref, and
1010                  * would have had to added a ref key item which may appear on a
1011                  * different leaf from the original extent item.
1012                  */
1013                 ret = walk_down_tree(fs_info->extent_root, path, level,
1014                                      &bytenr, &num_bytes);
1015                 if (ret)
1016                         break;
1017                 ret = walk_up_tree(root, path, &level);
1018                 if (ret < 0)
1019                         break;
1020                 if (ret > 0) {
1021                         ret = 0;
1022                         break;
1023                 }
1024         }
1025         if (ret) {
1026                 btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
1027                 btrfs_free_ref_cache(fs_info);
1028         }
1029         btrfs_free_path(path);
1030         return ret;
1031 }