Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
[linux-2.6-microblaze.git] / fs / btrfs / delayed-inode.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2011 Fujitsu.  All rights reserved.
4  * Written by Miao Xie <miaox@cn.fujitsu.com>
5  */
6
7 #include <linux/slab.h>
8 #include <linux/iversion.h>
9 #include <linux/sched/mm.h>
10 #include "misc.h"
11 #include "delayed-inode.h"
12 #include "disk-io.h"
13 #include "transaction.h"
14 #include "ctree.h"
15 #include "qgroup.h"
16 #include "locking.h"
17
18 #define BTRFS_DELAYED_WRITEBACK         512
19 #define BTRFS_DELAYED_BACKGROUND        128
20 #define BTRFS_DELAYED_BATCH             16
21
22 static struct kmem_cache *delayed_node_cache;
23
24 int __init btrfs_delayed_inode_init(void)
25 {
26         delayed_node_cache = kmem_cache_create("btrfs_delayed_node",
27                                         sizeof(struct btrfs_delayed_node),
28                                         0,
29                                         SLAB_MEM_SPREAD,
30                                         NULL);
31         if (!delayed_node_cache)
32                 return -ENOMEM;
33         return 0;
34 }
35
36 void __cold btrfs_delayed_inode_exit(void)
37 {
38         kmem_cache_destroy(delayed_node_cache);
39 }
40
41 static inline void btrfs_init_delayed_node(
42                                 struct btrfs_delayed_node *delayed_node,
43                                 struct btrfs_root *root, u64 inode_id)
44 {
45         delayed_node->root = root;
46         delayed_node->inode_id = inode_id;
47         refcount_set(&delayed_node->refs, 0);
48         delayed_node->ins_root = RB_ROOT_CACHED;
49         delayed_node->del_root = RB_ROOT_CACHED;
50         mutex_init(&delayed_node->mutex);
51         INIT_LIST_HEAD(&delayed_node->n_list);
52         INIT_LIST_HEAD(&delayed_node->p_list);
53 }
54
55 static inline int btrfs_is_continuous_delayed_item(
56                                         struct btrfs_delayed_item *item1,
57                                         struct btrfs_delayed_item *item2)
58 {
59         if (item1->key.type == BTRFS_DIR_INDEX_KEY &&
60             item1->key.objectid == item2->key.objectid &&
61             item1->key.type == item2->key.type &&
62             item1->key.offset + 1 == item2->key.offset)
63                 return 1;
64         return 0;
65 }
66
67 static struct btrfs_delayed_node *btrfs_get_delayed_node(
68                 struct btrfs_inode *btrfs_inode)
69 {
70         struct btrfs_root *root = btrfs_inode->root;
71         u64 ino = btrfs_ino(btrfs_inode);
72         struct btrfs_delayed_node *node;
73
74         node = READ_ONCE(btrfs_inode->delayed_node);
75         if (node) {
76                 refcount_inc(&node->refs);
77                 return node;
78         }
79
80         spin_lock(&root->inode_lock);
81         node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
82
83         if (node) {
84                 if (btrfs_inode->delayed_node) {
85                         refcount_inc(&node->refs);      /* can be accessed */
86                         BUG_ON(btrfs_inode->delayed_node != node);
87                         spin_unlock(&root->inode_lock);
88                         return node;
89                 }
90
91                 /*
92                  * It's possible that we're racing into the middle of removing
93                  * this node from the radix tree.  In this case, the refcount
94                  * was zero and it should never go back to one.  Just return
95                  * NULL like it was never in the radix at all; our release
96                  * function is in the process of removing it.
97                  *
98                  * Some implementations of refcount_inc refuse to bump the
99                  * refcount once it has hit zero.  If we don't do this dance
100                  * here, refcount_inc() may decide to just WARN_ONCE() instead
101                  * of actually bumping the refcount.
102                  *
103                  * If this node is properly in the radix, we want to bump the
104                  * refcount twice, once for the inode and once for this get
105                  * operation.
106                  */
107                 if (refcount_inc_not_zero(&node->refs)) {
108                         refcount_inc(&node->refs);
109                         btrfs_inode->delayed_node = node;
110                 } else {
111                         node = NULL;
112                 }
113
114                 spin_unlock(&root->inode_lock);
115                 return node;
116         }
117         spin_unlock(&root->inode_lock);
118
119         return NULL;
120 }
121
122 /* Will return either the node or PTR_ERR(-ENOMEM) */
123 static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
124                 struct btrfs_inode *btrfs_inode)
125 {
126         struct btrfs_delayed_node *node;
127         struct btrfs_root *root = btrfs_inode->root;
128         u64 ino = btrfs_ino(btrfs_inode);
129         int ret;
130
131 again:
132         node = btrfs_get_delayed_node(btrfs_inode);
133         if (node)
134                 return node;
135
136         node = kmem_cache_zalloc(delayed_node_cache, GFP_NOFS);
137         if (!node)
138                 return ERR_PTR(-ENOMEM);
139         btrfs_init_delayed_node(node, root, ino);
140
141         /* cached in the btrfs inode and can be accessed */
142         refcount_set(&node->refs, 2);
143
144         ret = radix_tree_preload(GFP_NOFS);
145         if (ret) {
146                 kmem_cache_free(delayed_node_cache, node);
147                 return ERR_PTR(ret);
148         }
149
150         spin_lock(&root->inode_lock);
151         ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
152         if (ret == -EEXIST) {
153                 spin_unlock(&root->inode_lock);
154                 kmem_cache_free(delayed_node_cache, node);
155                 radix_tree_preload_end();
156                 goto again;
157         }
158         btrfs_inode->delayed_node = node;
159         spin_unlock(&root->inode_lock);
160         radix_tree_preload_end();
161
162         return node;
163 }
164
165 /*
166  * Call it when holding delayed_node->mutex
167  *
168  * If mod = 1, add this node into the prepared list.
169  */
170 static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
171                                      struct btrfs_delayed_node *node,
172                                      int mod)
173 {
174         spin_lock(&root->lock);
175         if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
176                 if (!list_empty(&node->p_list))
177                         list_move_tail(&node->p_list, &root->prepare_list);
178                 else if (mod)
179                         list_add_tail(&node->p_list, &root->prepare_list);
180         } else {
181                 list_add_tail(&node->n_list, &root->node_list);
182                 list_add_tail(&node->p_list, &root->prepare_list);
183                 refcount_inc(&node->refs);      /* inserted into list */
184                 root->nodes++;
185                 set_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
186         }
187         spin_unlock(&root->lock);
188 }
189
190 /* Call it when holding delayed_node->mutex */
191 static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
192                                        struct btrfs_delayed_node *node)
193 {
194         spin_lock(&root->lock);
195         if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
196                 root->nodes--;
197                 refcount_dec(&node->refs);      /* not in the list */
198                 list_del_init(&node->n_list);
199                 if (!list_empty(&node->p_list))
200                         list_del_init(&node->p_list);
201                 clear_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
202         }
203         spin_unlock(&root->lock);
204 }
205
206 static struct btrfs_delayed_node *btrfs_first_delayed_node(
207                         struct btrfs_delayed_root *delayed_root)
208 {
209         struct list_head *p;
210         struct btrfs_delayed_node *node = NULL;
211
212         spin_lock(&delayed_root->lock);
213         if (list_empty(&delayed_root->node_list))
214                 goto out;
215
216         p = delayed_root->node_list.next;
217         node = list_entry(p, struct btrfs_delayed_node, n_list);
218         refcount_inc(&node->refs);
219 out:
220         spin_unlock(&delayed_root->lock);
221
222         return node;
223 }
224
225 static struct btrfs_delayed_node *btrfs_next_delayed_node(
226                                                 struct btrfs_delayed_node *node)
227 {
228         struct btrfs_delayed_root *delayed_root;
229         struct list_head *p;
230         struct btrfs_delayed_node *next = NULL;
231
232         delayed_root = node->root->fs_info->delayed_root;
233         spin_lock(&delayed_root->lock);
234         if (!test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
235                 /* not in the list */
236                 if (list_empty(&delayed_root->node_list))
237                         goto out;
238                 p = delayed_root->node_list.next;
239         } else if (list_is_last(&node->n_list, &delayed_root->node_list))
240                 goto out;
241         else
242                 p = node->n_list.next;
243
244         next = list_entry(p, struct btrfs_delayed_node, n_list);
245         refcount_inc(&next->refs);
246 out:
247         spin_unlock(&delayed_root->lock);
248
249         return next;
250 }
251
252 static void __btrfs_release_delayed_node(
253                                 struct btrfs_delayed_node *delayed_node,
254                                 int mod)
255 {
256         struct btrfs_delayed_root *delayed_root;
257
258         if (!delayed_node)
259                 return;
260
261         delayed_root = delayed_node->root->fs_info->delayed_root;
262
263         mutex_lock(&delayed_node->mutex);
264         if (delayed_node->count)
265                 btrfs_queue_delayed_node(delayed_root, delayed_node, mod);
266         else
267                 btrfs_dequeue_delayed_node(delayed_root, delayed_node);
268         mutex_unlock(&delayed_node->mutex);
269
270         if (refcount_dec_and_test(&delayed_node->refs)) {
271                 struct btrfs_root *root = delayed_node->root;
272
273                 spin_lock(&root->inode_lock);
274                 /*
275                  * Once our refcount goes to zero, nobody is allowed to bump it
276                  * back up.  We can delete it now.
277                  */
278                 ASSERT(refcount_read(&delayed_node->refs) == 0);
279                 radix_tree_delete(&root->delayed_nodes_tree,
280                                   delayed_node->inode_id);
281                 spin_unlock(&root->inode_lock);
282                 kmem_cache_free(delayed_node_cache, delayed_node);
283         }
284 }
285
286 static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node)
287 {
288         __btrfs_release_delayed_node(node, 0);
289 }
290
291 static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
292                                         struct btrfs_delayed_root *delayed_root)
293 {
294         struct list_head *p;
295         struct btrfs_delayed_node *node = NULL;
296
297         spin_lock(&delayed_root->lock);
298         if (list_empty(&delayed_root->prepare_list))
299                 goto out;
300
301         p = delayed_root->prepare_list.next;
302         list_del_init(p);
303         node = list_entry(p, struct btrfs_delayed_node, p_list);
304         refcount_inc(&node->refs);
305 out:
306         spin_unlock(&delayed_root->lock);
307
308         return node;
309 }
310
311 static inline void btrfs_release_prepared_delayed_node(
312                                         struct btrfs_delayed_node *node)
313 {
314         __btrfs_release_delayed_node(node, 1);
315 }
316
317 static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
318 {
319         struct btrfs_delayed_item *item;
320         item = kmalloc(sizeof(*item) + data_len, GFP_NOFS);
321         if (item) {
322                 item->data_len = data_len;
323                 item->ins_or_del = 0;
324                 item->bytes_reserved = 0;
325                 item->delayed_node = NULL;
326                 refcount_set(&item->refs, 1);
327         }
328         return item;
329 }
330
331 /*
332  * __btrfs_lookup_delayed_item - look up the delayed item by key
333  * @delayed_node: pointer to the delayed node
334  * @key:          the key to look up
335  * @prev:         used to store the prev item if the right item isn't found
336  * @next:         used to store the next item if the right item isn't found
337  *
338  * Note: if we don't find the right item, we will return the prev item and
339  * the next item.
340  */
341 static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
342                                 struct rb_root *root,
343                                 struct btrfs_key *key,
344                                 struct btrfs_delayed_item **prev,
345                                 struct btrfs_delayed_item **next)
346 {
347         struct rb_node *node, *prev_node = NULL;
348         struct btrfs_delayed_item *delayed_item = NULL;
349         int ret = 0;
350
351         node = root->rb_node;
352
353         while (node) {
354                 delayed_item = rb_entry(node, struct btrfs_delayed_item,
355                                         rb_node);
356                 prev_node = node;
357                 ret = btrfs_comp_cpu_keys(&delayed_item->key, key);
358                 if (ret < 0)
359                         node = node->rb_right;
360                 else if (ret > 0)
361                         node = node->rb_left;
362                 else
363                         return delayed_item;
364         }
365
366         if (prev) {
367                 if (!prev_node)
368                         *prev = NULL;
369                 else if (ret < 0)
370                         *prev = delayed_item;
371                 else if ((node = rb_prev(prev_node)) != NULL) {
372                         *prev = rb_entry(node, struct btrfs_delayed_item,
373                                          rb_node);
374                 } else
375                         *prev = NULL;
376         }
377
378         if (next) {
379                 if (!prev_node)
380                         *next = NULL;
381                 else if (ret > 0)
382                         *next = delayed_item;
383                 else if ((node = rb_next(prev_node)) != NULL) {
384                         *next = rb_entry(node, struct btrfs_delayed_item,
385                                          rb_node);
386                 } else
387                         *next = NULL;
388         }
389         return NULL;
390 }
391
392 static struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
393                                         struct btrfs_delayed_node *delayed_node,
394                                         struct btrfs_key *key)
395 {
396         return __btrfs_lookup_delayed_item(&delayed_node->ins_root.rb_root, key,
397                                            NULL, NULL);
398 }
399
400 static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
401                                     struct btrfs_delayed_item *ins,
402                                     int action)
403 {
404         struct rb_node **p, *node;
405         struct rb_node *parent_node = NULL;
406         struct rb_root_cached *root;
407         struct btrfs_delayed_item *item;
408         int cmp;
409         bool leftmost = true;
410
411         if (action == BTRFS_DELAYED_INSERTION_ITEM)
412                 root = &delayed_node->ins_root;
413         else if (action == BTRFS_DELAYED_DELETION_ITEM)
414                 root = &delayed_node->del_root;
415         else
416                 BUG();
417         p = &root->rb_root.rb_node;
418         node = &ins->rb_node;
419
420         while (*p) {
421                 parent_node = *p;
422                 item = rb_entry(parent_node, struct btrfs_delayed_item,
423                                  rb_node);
424
425                 cmp = btrfs_comp_cpu_keys(&item->key, &ins->key);
426                 if (cmp < 0) {
427                         p = &(*p)->rb_right;
428                         leftmost = false;
429                 } else if (cmp > 0) {
430                         p = &(*p)->rb_left;
431                 } else {
432                         return -EEXIST;
433                 }
434         }
435
436         rb_link_node(node, parent_node, p);
437         rb_insert_color_cached(node, root, leftmost);
438         ins->delayed_node = delayed_node;
439         ins->ins_or_del = action;
440
441         if (ins->key.type == BTRFS_DIR_INDEX_KEY &&
442             action == BTRFS_DELAYED_INSERTION_ITEM &&
443             ins->key.offset >= delayed_node->index_cnt)
444                         delayed_node->index_cnt = ins->key.offset + 1;
445
446         delayed_node->count++;
447         atomic_inc(&delayed_node->root->fs_info->delayed_root->items);
448         return 0;
449 }
450
451 static int __btrfs_add_delayed_insertion_item(struct btrfs_delayed_node *node,
452                                               struct btrfs_delayed_item *item)
453 {
454         return __btrfs_add_delayed_item(node, item,
455                                         BTRFS_DELAYED_INSERTION_ITEM);
456 }
457
458 static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
459                                              struct btrfs_delayed_item *item)
460 {
461         return __btrfs_add_delayed_item(node, item,
462                                         BTRFS_DELAYED_DELETION_ITEM);
463 }
464
465 static void finish_one_item(struct btrfs_delayed_root *delayed_root)
466 {
467         int seq = atomic_inc_return(&delayed_root->items_seq);
468
469         /* atomic_dec_return implies a barrier */
470         if ((atomic_dec_return(&delayed_root->items) <
471             BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0))
472                 cond_wake_up_nomb(&delayed_root->wait);
473 }
474
475 static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
476 {
477         struct rb_root_cached *root;
478         struct btrfs_delayed_root *delayed_root;
479
480         /* Not associated with any delayed_node */
481         if (!delayed_item->delayed_node)
482                 return;
483         delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
484
485         BUG_ON(!delayed_root);
486         BUG_ON(delayed_item->ins_or_del != BTRFS_DELAYED_DELETION_ITEM &&
487                delayed_item->ins_or_del != BTRFS_DELAYED_INSERTION_ITEM);
488
489         if (delayed_item->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM)
490                 root = &delayed_item->delayed_node->ins_root;
491         else
492                 root = &delayed_item->delayed_node->del_root;
493
494         rb_erase_cached(&delayed_item->rb_node, root);
495         delayed_item->delayed_node->count--;
496
497         finish_one_item(delayed_root);
498 }
499
500 static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
501 {
502         if (item) {
503                 __btrfs_remove_delayed_item(item);
504                 if (refcount_dec_and_test(&item->refs))
505                         kfree(item);
506         }
507 }
508
509 static struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
510                                         struct btrfs_delayed_node *delayed_node)
511 {
512         struct rb_node *p;
513         struct btrfs_delayed_item *item = NULL;
514
515         p = rb_first_cached(&delayed_node->ins_root);
516         if (p)
517                 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
518
519         return item;
520 }
521
522 static struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
523                                         struct btrfs_delayed_node *delayed_node)
524 {
525         struct rb_node *p;
526         struct btrfs_delayed_item *item = NULL;
527
528         p = rb_first_cached(&delayed_node->del_root);
529         if (p)
530                 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
531
532         return item;
533 }
534
535 static struct btrfs_delayed_item *__btrfs_next_delayed_item(
536                                                 struct btrfs_delayed_item *item)
537 {
538         struct rb_node *p;
539         struct btrfs_delayed_item *next = NULL;
540
541         p = rb_next(&item->rb_node);
542         if (p)
543                 next = rb_entry(p, struct btrfs_delayed_item, rb_node);
544
545         return next;
546 }
547
548 static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
549                                                struct btrfs_root *root,
550                                                struct btrfs_delayed_item *item)
551 {
552         struct btrfs_block_rsv *src_rsv;
553         struct btrfs_block_rsv *dst_rsv;
554         struct btrfs_fs_info *fs_info = root->fs_info;
555         u64 num_bytes;
556         int ret;
557
558         if (!trans->bytes_reserved)
559                 return 0;
560
561         src_rsv = trans->block_rsv;
562         dst_rsv = &fs_info->delayed_block_rsv;
563
564         num_bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
565
566         /*
567          * Here we migrate space rsv from transaction rsv, since have already
568          * reserved space when starting a transaction.  So no need to reserve
569          * qgroup space here.
570          */
571         ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, true);
572         if (!ret) {
573                 trace_btrfs_space_reservation(fs_info, "delayed_item",
574                                               item->key.objectid,
575                                               num_bytes, 1);
576                 item->bytes_reserved = num_bytes;
577         }
578
579         return ret;
580 }
581
582 static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
583                                                 struct btrfs_delayed_item *item)
584 {
585         struct btrfs_block_rsv *rsv;
586         struct btrfs_fs_info *fs_info = root->fs_info;
587
588         if (!item->bytes_reserved)
589                 return;
590
591         rsv = &fs_info->delayed_block_rsv;
592         /*
593          * Check btrfs_delayed_item_reserve_metadata() to see why we don't need
594          * to release/reserve qgroup space.
595          */
596         trace_btrfs_space_reservation(fs_info, "delayed_item",
597                                       item->key.objectid, item->bytes_reserved,
598                                       0);
599         btrfs_block_rsv_release(fs_info, rsv, item->bytes_reserved, NULL);
600 }
601
602 static int btrfs_delayed_inode_reserve_metadata(
603                                         struct btrfs_trans_handle *trans,
604                                         struct btrfs_root *root,
605                                         struct btrfs_delayed_node *node)
606 {
607         struct btrfs_fs_info *fs_info = root->fs_info;
608         struct btrfs_block_rsv *src_rsv;
609         struct btrfs_block_rsv *dst_rsv;
610         u64 num_bytes;
611         int ret;
612
613         src_rsv = trans->block_rsv;
614         dst_rsv = &fs_info->delayed_block_rsv;
615
616         num_bytes = btrfs_calc_metadata_size(fs_info, 1);
617
618         /*
619          * btrfs_dirty_inode will update the inode under btrfs_join_transaction
620          * which doesn't reserve space for speed.  This is a problem since we
621          * still need to reserve space for this update, so try to reserve the
622          * space.
623          *
624          * Now if src_rsv == delalloc_block_rsv we'll let it just steal since
625          * we always reserve enough to update the inode item.
626          */
627         if (!src_rsv || (!trans->bytes_reserved &&
628                          src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
629                 ret = btrfs_qgroup_reserve_meta(root, num_bytes,
630                                           BTRFS_QGROUP_RSV_META_PREALLOC, true);
631                 if (ret < 0)
632                         return ret;
633                 ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
634                                           BTRFS_RESERVE_NO_FLUSH);
635                 /* NO_FLUSH could only fail with -ENOSPC */
636                 ASSERT(ret == 0 || ret == -ENOSPC);
637                 if (ret)
638                         btrfs_qgroup_free_meta_prealloc(root, num_bytes);
639         } else {
640                 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, true);
641         }
642
643         if (!ret) {
644                 trace_btrfs_space_reservation(fs_info, "delayed_inode",
645                                               node->inode_id, num_bytes, 1);
646                 node->bytes_reserved = num_bytes;
647         }
648
649         return ret;
650 }
651
652 static void btrfs_delayed_inode_release_metadata(struct btrfs_fs_info *fs_info,
653                                                 struct btrfs_delayed_node *node,
654                                                 bool qgroup_free)
655 {
656         struct btrfs_block_rsv *rsv;
657
658         if (!node->bytes_reserved)
659                 return;
660
661         rsv = &fs_info->delayed_block_rsv;
662         trace_btrfs_space_reservation(fs_info, "delayed_inode",
663                                       node->inode_id, node->bytes_reserved, 0);
664         btrfs_block_rsv_release(fs_info, rsv, node->bytes_reserved, NULL);
665         if (qgroup_free)
666                 btrfs_qgroup_free_meta_prealloc(node->root,
667                                 node->bytes_reserved);
668         else
669                 btrfs_qgroup_convert_reserved_meta(node->root,
670                                 node->bytes_reserved);
671         node->bytes_reserved = 0;
672 }
673
674 /*
675  * This helper will insert some continuous items into the same leaf according
676  * to the free space of the leaf.
677  */
678 static int btrfs_batch_insert_items(struct btrfs_root *root,
679                                     struct btrfs_path *path,
680                                     struct btrfs_delayed_item *item)
681 {
682         struct btrfs_delayed_item *curr, *next;
683         int free_space;
684         int total_data_size = 0, total_size = 0;
685         struct extent_buffer *leaf;
686         char *data_ptr;
687         struct btrfs_key *keys;
688         u32 *data_size;
689         struct list_head head;
690         int slot;
691         int nitems;
692         int i;
693         int ret = 0;
694
695         BUG_ON(!path->nodes[0]);
696
697         leaf = path->nodes[0];
698         free_space = btrfs_leaf_free_space(leaf);
699         INIT_LIST_HEAD(&head);
700
701         next = item;
702         nitems = 0;
703
704         /*
705          * count the number of the continuous items that we can insert in batch
706          */
707         while (total_size + next->data_len + sizeof(struct btrfs_item) <=
708                free_space) {
709                 total_data_size += next->data_len;
710                 total_size += next->data_len + sizeof(struct btrfs_item);
711                 list_add_tail(&next->tree_list, &head);
712                 nitems++;
713
714                 curr = next;
715                 next = __btrfs_next_delayed_item(curr);
716                 if (!next)
717                         break;
718
719                 if (!btrfs_is_continuous_delayed_item(curr, next))
720                         break;
721         }
722
723         if (!nitems) {
724                 ret = 0;
725                 goto out;
726         }
727
728         keys = kmalloc_array(nitems, sizeof(struct btrfs_key), GFP_NOFS);
729         if (!keys) {
730                 ret = -ENOMEM;
731                 goto out;
732         }
733
734         data_size = kmalloc_array(nitems, sizeof(u32), GFP_NOFS);
735         if (!data_size) {
736                 ret = -ENOMEM;
737                 goto error;
738         }
739
740         /* get keys of all the delayed items */
741         i = 0;
742         list_for_each_entry(next, &head, tree_list) {
743                 keys[i] = next->key;
744                 data_size[i] = next->data_len;
745                 i++;
746         }
747
748         /* insert the keys of the items */
749         setup_items_for_insert(root, path, keys, data_size, nitems);
750
751         /* insert the dir index items */
752         slot = path->slots[0];
753         list_for_each_entry_safe(curr, next, &head, tree_list) {
754                 data_ptr = btrfs_item_ptr(leaf, slot, char);
755                 write_extent_buffer(leaf, &curr->data,
756                                     (unsigned long)data_ptr,
757                                     curr->data_len);
758                 slot++;
759
760                 btrfs_delayed_item_release_metadata(root, curr);
761
762                 list_del(&curr->tree_list);
763                 btrfs_release_delayed_item(curr);
764         }
765
766 error:
767         kfree(data_size);
768         kfree(keys);
769 out:
770         return ret;
771 }
772
773 /*
774  * This helper can just do simple insertion that needn't extend item for new
775  * data, such as directory name index insertion, inode insertion.
776  */
777 static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
778                                      struct btrfs_root *root,
779                                      struct btrfs_path *path,
780                                      struct btrfs_delayed_item *delayed_item)
781 {
782         struct extent_buffer *leaf;
783         unsigned int nofs_flag;
784         char *ptr;
785         int ret;
786
787         nofs_flag = memalloc_nofs_save();
788         ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key,
789                                       delayed_item->data_len);
790         memalloc_nofs_restore(nofs_flag);
791         if (ret < 0 && ret != -EEXIST)
792                 return ret;
793
794         leaf = path->nodes[0];
795
796         ptr = btrfs_item_ptr(leaf, path->slots[0], char);
797
798         write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr,
799                             delayed_item->data_len);
800         btrfs_mark_buffer_dirty(leaf);
801
802         btrfs_delayed_item_release_metadata(root, delayed_item);
803         return 0;
804 }
805
806 /*
807  * we insert an item first, then if there are some continuous items, we try
808  * to insert those items into the same leaf.
809  */
810 static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans,
811                                       struct btrfs_path *path,
812                                       struct btrfs_root *root,
813                                       struct btrfs_delayed_node *node)
814 {
815         struct btrfs_delayed_item *curr, *prev;
816         int ret = 0;
817
818 do_again:
819         mutex_lock(&node->mutex);
820         curr = __btrfs_first_delayed_insertion_item(node);
821         if (!curr)
822                 goto insert_end;
823
824         ret = btrfs_insert_delayed_item(trans, root, path, curr);
825         if (ret < 0) {
826                 btrfs_release_path(path);
827                 goto insert_end;
828         }
829
830         prev = curr;
831         curr = __btrfs_next_delayed_item(prev);
832         if (curr && btrfs_is_continuous_delayed_item(prev, curr)) {
833                 /* insert the continuous items into the same leaf */
834                 path->slots[0]++;
835                 btrfs_batch_insert_items(root, path, curr);
836         }
837         btrfs_release_delayed_item(prev);
838         btrfs_mark_buffer_dirty(path->nodes[0]);
839
840         btrfs_release_path(path);
841         mutex_unlock(&node->mutex);
842         goto do_again;
843
844 insert_end:
845         mutex_unlock(&node->mutex);
846         return ret;
847 }
848
849 static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
850                                     struct btrfs_root *root,
851                                     struct btrfs_path *path,
852                                     struct btrfs_delayed_item *item)
853 {
854         struct btrfs_delayed_item *curr, *next;
855         struct extent_buffer *leaf;
856         struct btrfs_key key;
857         struct list_head head;
858         int nitems, i, last_item;
859         int ret = 0;
860
861         BUG_ON(!path->nodes[0]);
862
863         leaf = path->nodes[0];
864
865         i = path->slots[0];
866         last_item = btrfs_header_nritems(leaf) - 1;
867         if (i > last_item)
868                 return -ENOENT; /* FIXME: Is errno suitable? */
869
870         next = item;
871         INIT_LIST_HEAD(&head);
872         btrfs_item_key_to_cpu(leaf, &key, i);
873         nitems = 0;
874         /*
875          * count the number of the dir index items that we can delete in batch
876          */
877         while (btrfs_comp_cpu_keys(&next->key, &key) == 0) {
878                 list_add_tail(&next->tree_list, &head);
879                 nitems++;
880
881                 curr = next;
882                 next = __btrfs_next_delayed_item(curr);
883                 if (!next)
884                         break;
885
886                 if (!btrfs_is_continuous_delayed_item(curr, next))
887                         break;
888
889                 i++;
890                 if (i > last_item)
891                         break;
892                 btrfs_item_key_to_cpu(leaf, &key, i);
893         }
894
895         if (!nitems)
896                 return 0;
897
898         ret = btrfs_del_items(trans, root, path, path->slots[0], nitems);
899         if (ret)
900                 goto out;
901
902         list_for_each_entry_safe(curr, next, &head, tree_list) {
903                 btrfs_delayed_item_release_metadata(root, curr);
904                 list_del(&curr->tree_list);
905                 btrfs_release_delayed_item(curr);
906         }
907
908 out:
909         return ret;
910 }
911
912 static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
913                                       struct btrfs_path *path,
914                                       struct btrfs_root *root,
915                                       struct btrfs_delayed_node *node)
916 {
917         struct btrfs_delayed_item *curr, *prev;
918         unsigned int nofs_flag;
919         int ret = 0;
920
921 do_again:
922         mutex_lock(&node->mutex);
923         curr = __btrfs_first_delayed_deletion_item(node);
924         if (!curr)
925                 goto delete_fail;
926
927         nofs_flag = memalloc_nofs_save();
928         ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1);
929         memalloc_nofs_restore(nofs_flag);
930         if (ret < 0)
931                 goto delete_fail;
932         else if (ret > 0) {
933                 /*
934                  * can't find the item which the node points to, so this node
935                  * is invalid, just drop it.
936                  */
937                 prev = curr;
938                 curr = __btrfs_next_delayed_item(prev);
939                 btrfs_release_delayed_item(prev);
940                 ret = 0;
941                 btrfs_release_path(path);
942                 if (curr) {
943                         mutex_unlock(&node->mutex);
944                         goto do_again;
945                 } else
946                         goto delete_fail;
947         }
948
949         btrfs_batch_delete_items(trans, root, path, curr);
950         btrfs_release_path(path);
951         mutex_unlock(&node->mutex);
952         goto do_again;
953
954 delete_fail:
955         btrfs_release_path(path);
956         mutex_unlock(&node->mutex);
957         return ret;
958 }
959
960 static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
961 {
962         struct btrfs_delayed_root *delayed_root;
963
964         if (delayed_node &&
965             test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
966                 BUG_ON(!delayed_node->root);
967                 clear_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
968                 delayed_node->count--;
969
970                 delayed_root = delayed_node->root->fs_info->delayed_root;
971                 finish_one_item(delayed_root);
972         }
973 }
974
975 static void btrfs_release_delayed_iref(struct btrfs_delayed_node *delayed_node)
976 {
977         struct btrfs_delayed_root *delayed_root;
978
979         ASSERT(delayed_node->root);
980         clear_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
981         delayed_node->count--;
982
983         delayed_root = delayed_node->root->fs_info->delayed_root;
984         finish_one_item(delayed_root);
985 }
986
987 static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
988                                         struct btrfs_root *root,
989                                         struct btrfs_path *path,
990                                         struct btrfs_delayed_node *node)
991 {
992         struct btrfs_fs_info *fs_info = root->fs_info;
993         struct btrfs_key key;
994         struct btrfs_inode_item *inode_item;
995         struct extent_buffer *leaf;
996         unsigned int nofs_flag;
997         int mod;
998         int ret;
999
1000         key.objectid = node->inode_id;
1001         key.type = BTRFS_INODE_ITEM_KEY;
1002         key.offset = 0;
1003
1004         if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
1005                 mod = -1;
1006         else
1007                 mod = 1;
1008
1009         nofs_flag = memalloc_nofs_save();
1010         ret = btrfs_lookup_inode(trans, root, path, &key, mod);
1011         memalloc_nofs_restore(nofs_flag);
1012         if (ret > 0) {
1013                 btrfs_release_path(path);
1014                 return -ENOENT;
1015         } else if (ret < 0) {
1016                 return ret;
1017         }
1018
1019         leaf = path->nodes[0];
1020         inode_item = btrfs_item_ptr(leaf, path->slots[0],
1021                                     struct btrfs_inode_item);
1022         write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item,
1023                             sizeof(struct btrfs_inode_item));
1024         btrfs_mark_buffer_dirty(leaf);
1025
1026         if (!test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
1027                 goto no_iref;
1028
1029         path->slots[0]++;
1030         if (path->slots[0] >= btrfs_header_nritems(leaf))
1031                 goto search;
1032 again:
1033         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1034         if (key.objectid != node->inode_id)
1035                 goto out;
1036
1037         if (key.type != BTRFS_INODE_REF_KEY &&
1038             key.type != BTRFS_INODE_EXTREF_KEY)
1039                 goto out;
1040
1041         /*
1042          * Delayed iref deletion is for the inode who has only one link,
1043          * so there is only one iref. The case that several irefs are
1044          * in the same item doesn't exist.
1045          */
1046         btrfs_del_item(trans, root, path);
1047 out:
1048         btrfs_release_delayed_iref(node);
1049 no_iref:
1050         btrfs_release_path(path);
1051 err_out:
1052         btrfs_delayed_inode_release_metadata(fs_info, node, (ret < 0));
1053         btrfs_release_delayed_inode(node);
1054
1055         return ret;
1056
1057 search:
1058         btrfs_release_path(path);
1059
1060         key.type = BTRFS_INODE_EXTREF_KEY;
1061         key.offset = -1;
1062
1063         nofs_flag = memalloc_nofs_save();
1064         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1065         memalloc_nofs_restore(nofs_flag);
1066         if (ret < 0)
1067                 goto err_out;
1068         ASSERT(ret);
1069
1070         ret = 0;
1071         leaf = path->nodes[0];
1072         path->slots[0]--;
1073         goto again;
1074 }
1075
1076 static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1077                                              struct btrfs_root *root,
1078                                              struct btrfs_path *path,
1079                                              struct btrfs_delayed_node *node)
1080 {
1081         int ret;
1082
1083         mutex_lock(&node->mutex);
1084         if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &node->flags)) {
1085                 mutex_unlock(&node->mutex);
1086                 return 0;
1087         }
1088
1089         ret = __btrfs_update_delayed_inode(trans, root, path, node);
1090         mutex_unlock(&node->mutex);
1091         return ret;
1092 }
1093
1094 static inline int
1095 __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1096                                    struct btrfs_path *path,
1097                                    struct btrfs_delayed_node *node)
1098 {
1099         int ret;
1100
1101         ret = btrfs_insert_delayed_items(trans, path, node->root, node);
1102         if (ret)
1103                 return ret;
1104
1105         ret = btrfs_delete_delayed_items(trans, path, node->root, node);
1106         if (ret)
1107                 return ret;
1108
1109         ret = btrfs_update_delayed_inode(trans, node->root, path, node);
1110         return ret;
1111 }
1112
1113 /*
1114  * Called when committing the transaction.
1115  * Returns 0 on success.
1116  * Returns < 0 on error and returns with an aborted transaction with any
1117  * outstanding delayed items cleaned up.
1118  */
1119 static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
1120 {
1121         struct btrfs_fs_info *fs_info = trans->fs_info;
1122         struct btrfs_delayed_root *delayed_root;
1123         struct btrfs_delayed_node *curr_node, *prev_node;
1124         struct btrfs_path *path;
1125         struct btrfs_block_rsv *block_rsv;
1126         int ret = 0;
1127         bool count = (nr > 0);
1128
1129         if (TRANS_ABORTED(trans))
1130                 return -EIO;
1131
1132         path = btrfs_alloc_path();
1133         if (!path)
1134                 return -ENOMEM;
1135
1136         block_rsv = trans->block_rsv;
1137         trans->block_rsv = &fs_info->delayed_block_rsv;
1138
1139         delayed_root = fs_info->delayed_root;
1140
1141         curr_node = btrfs_first_delayed_node(delayed_root);
1142         while (curr_node && (!count || nr--)) {
1143                 ret = __btrfs_commit_inode_delayed_items(trans, path,
1144                                                          curr_node);
1145                 if (ret) {
1146                         btrfs_release_delayed_node(curr_node);
1147                         curr_node = NULL;
1148                         btrfs_abort_transaction(trans, ret);
1149                         break;
1150                 }
1151
1152                 prev_node = curr_node;
1153                 curr_node = btrfs_next_delayed_node(curr_node);
1154                 btrfs_release_delayed_node(prev_node);
1155         }
1156
1157         if (curr_node)
1158                 btrfs_release_delayed_node(curr_node);
1159         btrfs_free_path(path);
1160         trans->block_rsv = block_rsv;
1161
1162         return ret;
1163 }
1164
1165 int btrfs_run_delayed_items(struct btrfs_trans_handle *trans)
1166 {
1167         return __btrfs_run_delayed_items(trans, -1);
1168 }
1169
1170 int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans, int nr)
1171 {
1172         return __btrfs_run_delayed_items(trans, nr);
1173 }
1174
1175 int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1176                                      struct btrfs_inode *inode)
1177 {
1178         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1179         struct btrfs_path *path;
1180         struct btrfs_block_rsv *block_rsv;
1181         int ret;
1182
1183         if (!delayed_node)
1184                 return 0;
1185
1186         mutex_lock(&delayed_node->mutex);
1187         if (!delayed_node->count) {
1188                 mutex_unlock(&delayed_node->mutex);
1189                 btrfs_release_delayed_node(delayed_node);
1190                 return 0;
1191         }
1192         mutex_unlock(&delayed_node->mutex);
1193
1194         path = btrfs_alloc_path();
1195         if (!path) {
1196                 btrfs_release_delayed_node(delayed_node);
1197                 return -ENOMEM;
1198         }
1199
1200         block_rsv = trans->block_rsv;
1201         trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
1202
1203         ret = __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1204
1205         btrfs_release_delayed_node(delayed_node);
1206         btrfs_free_path(path);
1207         trans->block_rsv = block_rsv;
1208
1209         return ret;
1210 }
1211
1212 int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode)
1213 {
1214         struct btrfs_fs_info *fs_info = inode->root->fs_info;
1215         struct btrfs_trans_handle *trans;
1216         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1217         struct btrfs_path *path;
1218         struct btrfs_block_rsv *block_rsv;
1219         int ret;
1220
1221         if (!delayed_node)
1222                 return 0;
1223
1224         mutex_lock(&delayed_node->mutex);
1225         if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1226                 mutex_unlock(&delayed_node->mutex);
1227                 btrfs_release_delayed_node(delayed_node);
1228                 return 0;
1229         }
1230         mutex_unlock(&delayed_node->mutex);
1231
1232         trans = btrfs_join_transaction(delayed_node->root);
1233         if (IS_ERR(trans)) {
1234                 ret = PTR_ERR(trans);
1235                 goto out;
1236         }
1237
1238         path = btrfs_alloc_path();
1239         if (!path) {
1240                 ret = -ENOMEM;
1241                 goto trans_out;
1242         }
1243
1244         block_rsv = trans->block_rsv;
1245         trans->block_rsv = &fs_info->delayed_block_rsv;
1246
1247         mutex_lock(&delayed_node->mutex);
1248         if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags))
1249                 ret = __btrfs_update_delayed_inode(trans, delayed_node->root,
1250                                                    path, delayed_node);
1251         else
1252                 ret = 0;
1253         mutex_unlock(&delayed_node->mutex);
1254
1255         btrfs_free_path(path);
1256         trans->block_rsv = block_rsv;
1257 trans_out:
1258         btrfs_end_transaction(trans);
1259         btrfs_btree_balance_dirty(fs_info);
1260 out:
1261         btrfs_release_delayed_node(delayed_node);
1262
1263         return ret;
1264 }
1265
1266 void btrfs_remove_delayed_node(struct btrfs_inode *inode)
1267 {
1268         struct btrfs_delayed_node *delayed_node;
1269
1270         delayed_node = READ_ONCE(inode->delayed_node);
1271         if (!delayed_node)
1272                 return;
1273
1274         inode->delayed_node = NULL;
1275         btrfs_release_delayed_node(delayed_node);
1276 }
1277
1278 struct btrfs_async_delayed_work {
1279         struct btrfs_delayed_root *delayed_root;
1280         int nr;
1281         struct btrfs_work work;
1282 };
1283
1284 static void btrfs_async_run_delayed_root(struct btrfs_work *work)
1285 {
1286         struct btrfs_async_delayed_work *async_work;
1287         struct btrfs_delayed_root *delayed_root;
1288         struct btrfs_trans_handle *trans;
1289         struct btrfs_path *path;
1290         struct btrfs_delayed_node *delayed_node = NULL;
1291         struct btrfs_root *root;
1292         struct btrfs_block_rsv *block_rsv;
1293         int total_done = 0;
1294
1295         async_work = container_of(work, struct btrfs_async_delayed_work, work);
1296         delayed_root = async_work->delayed_root;
1297
1298         path = btrfs_alloc_path();
1299         if (!path)
1300                 goto out;
1301
1302         do {
1303                 if (atomic_read(&delayed_root->items) <
1304                     BTRFS_DELAYED_BACKGROUND / 2)
1305                         break;
1306
1307                 delayed_node = btrfs_first_prepared_delayed_node(delayed_root);
1308                 if (!delayed_node)
1309                         break;
1310
1311                 root = delayed_node->root;
1312
1313                 trans = btrfs_join_transaction(root);
1314                 if (IS_ERR(trans)) {
1315                         btrfs_release_path(path);
1316                         btrfs_release_prepared_delayed_node(delayed_node);
1317                         total_done++;
1318                         continue;
1319                 }
1320
1321                 block_rsv = trans->block_rsv;
1322                 trans->block_rsv = &root->fs_info->delayed_block_rsv;
1323
1324                 __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1325
1326                 trans->block_rsv = block_rsv;
1327                 btrfs_end_transaction(trans);
1328                 btrfs_btree_balance_dirty_nodelay(root->fs_info);
1329
1330                 btrfs_release_path(path);
1331                 btrfs_release_prepared_delayed_node(delayed_node);
1332                 total_done++;
1333
1334         } while ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK)
1335                  || total_done < async_work->nr);
1336
1337         btrfs_free_path(path);
1338 out:
1339         wake_up(&delayed_root->wait);
1340         kfree(async_work);
1341 }
1342
1343
1344 static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
1345                                      struct btrfs_fs_info *fs_info, int nr)
1346 {
1347         struct btrfs_async_delayed_work *async_work;
1348
1349         async_work = kmalloc(sizeof(*async_work), GFP_NOFS);
1350         if (!async_work)
1351                 return -ENOMEM;
1352
1353         async_work->delayed_root = delayed_root;
1354         btrfs_init_work(&async_work->work, btrfs_async_run_delayed_root, NULL,
1355                         NULL);
1356         async_work->nr = nr;
1357
1358         btrfs_queue_work(fs_info->delayed_workers, &async_work->work);
1359         return 0;
1360 }
1361
1362 void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info)
1363 {
1364         WARN_ON(btrfs_first_delayed_node(fs_info->delayed_root));
1365 }
1366
1367 static int could_end_wait(struct btrfs_delayed_root *delayed_root, int seq)
1368 {
1369         int val = atomic_read(&delayed_root->items_seq);
1370
1371         if (val < seq || val >= seq + BTRFS_DELAYED_BATCH)
1372                 return 1;
1373
1374         if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1375                 return 1;
1376
1377         return 0;
1378 }
1379
1380 void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
1381 {
1382         struct btrfs_delayed_root *delayed_root = fs_info->delayed_root;
1383
1384         if ((atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) ||
1385                 btrfs_workqueue_normal_congested(fs_info->delayed_workers))
1386                 return;
1387
1388         if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
1389                 int seq;
1390                 int ret;
1391
1392                 seq = atomic_read(&delayed_root->items_seq);
1393
1394                 ret = btrfs_wq_run_delayed_node(delayed_root, fs_info, 0);
1395                 if (ret)
1396                         return;
1397
1398                 wait_event_interruptible(delayed_root->wait,
1399                                          could_end_wait(delayed_root, seq));
1400                 return;
1401         }
1402
1403         btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH);
1404 }
1405
1406 /* Will return 0 or -ENOMEM */
1407 int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
1408                                    const char *name, int name_len,
1409                                    struct btrfs_inode *dir,
1410                                    struct btrfs_disk_key *disk_key, u8 type,
1411                                    u64 index)
1412 {
1413         struct btrfs_delayed_node *delayed_node;
1414         struct btrfs_delayed_item *delayed_item;
1415         struct btrfs_dir_item *dir_item;
1416         int ret;
1417
1418         delayed_node = btrfs_get_or_create_delayed_node(dir);
1419         if (IS_ERR(delayed_node))
1420                 return PTR_ERR(delayed_node);
1421
1422         delayed_item = btrfs_alloc_delayed_item(sizeof(*dir_item) + name_len);
1423         if (!delayed_item) {
1424                 ret = -ENOMEM;
1425                 goto release_node;
1426         }
1427
1428         delayed_item->key.objectid = btrfs_ino(dir);
1429         delayed_item->key.type = BTRFS_DIR_INDEX_KEY;
1430         delayed_item->key.offset = index;
1431
1432         dir_item = (struct btrfs_dir_item *)delayed_item->data;
1433         dir_item->location = *disk_key;
1434         btrfs_set_stack_dir_transid(dir_item, trans->transid);
1435         btrfs_set_stack_dir_data_len(dir_item, 0);
1436         btrfs_set_stack_dir_name_len(dir_item, name_len);
1437         btrfs_set_stack_dir_type(dir_item, type);
1438         memcpy((char *)(dir_item + 1), name, name_len);
1439
1440         ret = btrfs_delayed_item_reserve_metadata(trans, dir->root, delayed_item);
1441         /*
1442          * we have reserved enough space when we start a new transaction,
1443          * so reserving metadata failure is impossible
1444          */
1445         BUG_ON(ret);
1446
1447         mutex_lock(&delayed_node->mutex);
1448         ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
1449         if (unlikely(ret)) {
1450                 btrfs_err(trans->fs_info,
1451                           "err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
1452                           name_len, name, delayed_node->root->root_key.objectid,
1453                           delayed_node->inode_id, ret);
1454                 BUG();
1455         }
1456         mutex_unlock(&delayed_node->mutex);
1457
1458 release_node:
1459         btrfs_release_delayed_node(delayed_node);
1460         return ret;
1461 }
1462
1463 static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info,
1464                                                struct btrfs_delayed_node *node,
1465                                                struct btrfs_key *key)
1466 {
1467         struct btrfs_delayed_item *item;
1468
1469         mutex_lock(&node->mutex);
1470         item = __btrfs_lookup_delayed_insertion_item(node, key);
1471         if (!item) {
1472                 mutex_unlock(&node->mutex);
1473                 return 1;
1474         }
1475
1476         btrfs_delayed_item_release_metadata(node->root, item);
1477         btrfs_release_delayed_item(item);
1478         mutex_unlock(&node->mutex);
1479         return 0;
1480 }
1481
1482 int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
1483                                    struct btrfs_inode *dir, u64 index)
1484 {
1485         struct btrfs_delayed_node *node;
1486         struct btrfs_delayed_item *item;
1487         struct btrfs_key item_key;
1488         int ret;
1489
1490         node = btrfs_get_or_create_delayed_node(dir);
1491         if (IS_ERR(node))
1492                 return PTR_ERR(node);
1493
1494         item_key.objectid = btrfs_ino(dir);
1495         item_key.type = BTRFS_DIR_INDEX_KEY;
1496         item_key.offset = index;
1497
1498         ret = btrfs_delete_delayed_insertion_item(trans->fs_info, node,
1499                                                   &item_key);
1500         if (!ret)
1501                 goto end;
1502
1503         item = btrfs_alloc_delayed_item(0);
1504         if (!item) {
1505                 ret = -ENOMEM;
1506                 goto end;
1507         }
1508
1509         item->key = item_key;
1510
1511         ret = btrfs_delayed_item_reserve_metadata(trans, dir->root, item);
1512         /*
1513          * we have reserved enough space when we start a new transaction,
1514          * so reserving metadata failure is impossible.
1515          */
1516         if (ret < 0) {
1517                 btrfs_err(trans->fs_info,
1518 "metadata reservation failed for delayed dir item deltiona, should have been reserved");
1519                 btrfs_release_delayed_item(item);
1520                 goto end;
1521         }
1522
1523         mutex_lock(&node->mutex);
1524         ret = __btrfs_add_delayed_deletion_item(node, item);
1525         if (unlikely(ret)) {
1526                 btrfs_err(trans->fs_info,
1527                           "err add delayed dir index item(index: %llu) into the deletion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
1528                           index, node->root->root_key.objectid,
1529                           node->inode_id, ret);
1530                 btrfs_delayed_item_release_metadata(dir->root, item);
1531                 btrfs_release_delayed_item(item);
1532         }
1533         mutex_unlock(&node->mutex);
1534 end:
1535         btrfs_release_delayed_node(node);
1536         return ret;
1537 }
1538
1539 int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode)
1540 {
1541         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1542
1543         if (!delayed_node)
1544                 return -ENOENT;
1545
1546         /*
1547          * Since we have held i_mutex of this directory, it is impossible that
1548          * a new directory index is added into the delayed node and index_cnt
1549          * is updated now. So we needn't lock the delayed node.
1550          */
1551         if (!delayed_node->index_cnt) {
1552                 btrfs_release_delayed_node(delayed_node);
1553                 return -EINVAL;
1554         }
1555
1556         inode->index_cnt = delayed_node->index_cnt;
1557         btrfs_release_delayed_node(delayed_node);
1558         return 0;
1559 }
1560
1561 bool btrfs_readdir_get_delayed_items(struct inode *inode,
1562                                      struct list_head *ins_list,
1563                                      struct list_head *del_list)
1564 {
1565         struct btrfs_delayed_node *delayed_node;
1566         struct btrfs_delayed_item *item;
1567
1568         delayed_node = btrfs_get_delayed_node(BTRFS_I(inode));
1569         if (!delayed_node)
1570                 return false;
1571
1572         /*
1573          * We can only do one readdir with delayed items at a time because of
1574          * item->readdir_list.
1575          */
1576         btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
1577         btrfs_inode_lock(inode, 0);
1578
1579         mutex_lock(&delayed_node->mutex);
1580         item = __btrfs_first_delayed_insertion_item(delayed_node);
1581         while (item) {
1582                 refcount_inc(&item->refs);
1583                 list_add_tail(&item->readdir_list, ins_list);
1584                 item = __btrfs_next_delayed_item(item);
1585         }
1586
1587         item = __btrfs_first_delayed_deletion_item(delayed_node);
1588         while (item) {
1589                 refcount_inc(&item->refs);
1590                 list_add_tail(&item->readdir_list, del_list);
1591                 item = __btrfs_next_delayed_item(item);
1592         }
1593         mutex_unlock(&delayed_node->mutex);
1594         /*
1595          * This delayed node is still cached in the btrfs inode, so refs
1596          * must be > 1 now, and we needn't check it is going to be freed
1597          * or not.
1598          *
1599          * Besides that, this function is used to read dir, we do not
1600          * insert/delete delayed items in this period. So we also needn't
1601          * requeue or dequeue this delayed node.
1602          */
1603         refcount_dec(&delayed_node->refs);
1604
1605         return true;
1606 }
1607
1608 void btrfs_readdir_put_delayed_items(struct inode *inode,
1609                                      struct list_head *ins_list,
1610                                      struct list_head *del_list)
1611 {
1612         struct btrfs_delayed_item *curr, *next;
1613
1614         list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1615                 list_del(&curr->readdir_list);
1616                 if (refcount_dec_and_test(&curr->refs))
1617                         kfree(curr);
1618         }
1619
1620         list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1621                 list_del(&curr->readdir_list);
1622                 if (refcount_dec_and_test(&curr->refs))
1623                         kfree(curr);
1624         }
1625
1626         /*
1627          * The VFS is going to do up_read(), so we need to downgrade back to a
1628          * read lock.
1629          */
1630         downgrade_write(&inode->i_rwsem);
1631 }
1632
1633 int btrfs_should_delete_dir_index(struct list_head *del_list,
1634                                   u64 index)
1635 {
1636         struct btrfs_delayed_item *curr;
1637         int ret = 0;
1638
1639         list_for_each_entry(curr, del_list, readdir_list) {
1640                 if (curr->key.offset > index)
1641                         break;
1642                 if (curr->key.offset == index) {
1643                         ret = 1;
1644                         break;
1645                 }
1646         }
1647         return ret;
1648 }
1649
1650 /*
1651  * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree
1652  *
1653  */
1654 int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
1655                                     struct list_head *ins_list)
1656 {
1657         struct btrfs_dir_item *di;
1658         struct btrfs_delayed_item *curr, *next;
1659         struct btrfs_key location;
1660         char *name;
1661         int name_len;
1662         int over = 0;
1663         unsigned char d_type;
1664
1665         if (list_empty(ins_list))
1666                 return 0;
1667
1668         /*
1669          * Changing the data of the delayed item is impossible. So
1670          * we needn't lock them. And we have held i_mutex of the
1671          * directory, nobody can delete any directory indexes now.
1672          */
1673         list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1674                 list_del(&curr->readdir_list);
1675
1676                 if (curr->key.offset < ctx->pos) {
1677                         if (refcount_dec_and_test(&curr->refs))
1678                                 kfree(curr);
1679                         continue;
1680                 }
1681
1682                 ctx->pos = curr->key.offset;
1683
1684                 di = (struct btrfs_dir_item *)curr->data;
1685                 name = (char *)(di + 1);
1686                 name_len = btrfs_stack_dir_name_len(di);
1687
1688                 d_type = fs_ftype_to_dtype(di->type);
1689                 btrfs_disk_key_to_cpu(&location, &di->location);
1690
1691                 over = !dir_emit(ctx, name, name_len,
1692                                location.objectid, d_type);
1693
1694                 if (refcount_dec_and_test(&curr->refs))
1695                         kfree(curr);
1696
1697                 if (over)
1698                         return 1;
1699                 ctx->pos++;
1700         }
1701         return 0;
1702 }
1703
1704 static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1705                                   struct btrfs_inode_item *inode_item,
1706                                   struct inode *inode)
1707 {
1708         btrfs_set_stack_inode_uid(inode_item, i_uid_read(inode));
1709         btrfs_set_stack_inode_gid(inode_item, i_gid_read(inode));
1710         btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size);
1711         btrfs_set_stack_inode_mode(inode_item, inode->i_mode);
1712         btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink);
1713         btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode));
1714         btrfs_set_stack_inode_generation(inode_item,
1715                                          BTRFS_I(inode)->generation);
1716         btrfs_set_stack_inode_sequence(inode_item,
1717                                        inode_peek_iversion(inode));
1718         btrfs_set_stack_inode_transid(inode_item, trans->transid);
1719         btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
1720         btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
1721         btrfs_set_stack_inode_block_group(inode_item, 0);
1722
1723         btrfs_set_stack_timespec_sec(&inode_item->atime,
1724                                      inode->i_atime.tv_sec);
1725         btrfs_set_stack_timespec_nsec(&inode_item->atime,
1726                                       inode->i_atime.tv_nsec);
1727
1728         btrfs_set_stack_timespec_sec(&inode_item->mtime,
1729                                      inode->i_mtime.tv_sec);
1730         btrfs_set_stack_timespec_nsec(&inode_item->mtime,
1731                                       inode->i_mtime.tv_nsec);
1732
1733         btrfs_set_stack_timespec_sec(&inode_item->ctime,
1734                                      inode->i_ctime.tv_sec);
1735         btrfs_set_stack_timespec_nsec(&inode_item->ctime,
1736                                       inode->i_ctime.tv_nsec);
1737
1738         btrfs_set_stack_timespec_sec(&inode_item->otime,
1739                                      BTRFS_I(inode)->i_otime.tv_sec);
1740         btrfs_set_stack_timespec_nsec(&inode_item->otime,
1741                                      BTRFS_I(inode)->i_otime.tv_nsec);
1742 }
1743
1744 int btrfs_fill_inode(struct inode *inode, u32 *rdev)
1745 {
1746         struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
1747         struct btrfs_delayed_node *delayed_node;
1748         struct btrfs_inode_item *inode_item;
1749
1750         delayed_node = btrfs_get_delayed_node(BTRFS_I(inode));
1751         if (!delayed_node)
1752                 return -ENOENT;
1753
1754         mutex_lock(&delayed_node->mutex);
1755         if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1756                 mutex_unlock(&delayed_node->mutex);
1757                 btrfs_release_delayed_node(delayed_node);
1758                 return -ENOENT;
1759         }
1760
1761         inode_item = &delayed_node->inode_item;
1762
1763         i_uid_write(inode, btrfs_stack_inode_uid(inode_item));
1764         i_gid_write(inode, btrfs_stack_inode_gid(inode_item));
1765         btrfs_i_size_write(BTRFS_I(inode), btrfs_stack_inode_size(inode_item));
1766         btrfs_inode_set_file_extent_range(BTRFS_I(inode), 0,
1767                         round_up(i_size_read(inode), fs_info->sectorsize));
1768         inode->i_mode = btrfs_stack_inode_mode(inode_item);
1769         set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
1770         inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
1771         BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
1772         BTRFS_I(inode)->last_trans = btrfs_stack_inode_transid(inode_item);
1773
1774         inode_set_iversion_queried(inode,
1775                                    btrfs_stack_inode_sequence(inode_item));
1776         inode->i_rdev = 0;
1777         *rdev = btrfs_stack_inode_rdev(inode_item);
1778         BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
1779
1780         inode->i_atime.tv_sec = btrfs_stack_timespec_sec(&inode_item->atime);
1781         inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->atime);
1782
1783         inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(&inode_item->mtime);
1784         inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->mtime);
1785
1786         inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(&inode_item->ctime);
1787         inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->ctime);
1788
1789         BTRFS_I(inode)->i_otime.tv_sec =
1790                 btrfs_stack_timespec_sec(&inode_item->otime);
1791         BTRFS_I(inode)->i_otime.tv_nsec =
1792                 btrfs_stack_timespec_nsec(&inode_item->otime);
1793
1794         inode->i_generation = BTRFS_I(inode)->generation;
1795         BTRFS_I(inode)->index_cnt = (u64)-1;
1796
1797         mutex_unlock(&delayed_node->mutex);
1798         btrfs_release_delayed_node(delayed_node);
1799         return 0;
1800 }
1801
1802 int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
1803                                struct btrfs_root *root,
1804                                struct btrfs_inode *inode)
1805 {
1806         struct btrfs_delayed_node *delayed_node;
1807         int ret = 0;
1808
1809         delayed_node = btrfs_get_or_create_delayed_node(inode);
1810         if (IS_ERR(delayed_node))
1811                 return PTR_ERR(delayed_node);
1812
1813         mutex_lock(&delayed_node->mutex);
1814         if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1815                 fill_stack_inode_item(trans, &delayed_node->inode_item,
1816                                       &inode->vfs_inode);
1817                 goto release_node;
1818         }
1819
1820         ret = btrfs_delayed_inode_reserve_metadata(trans, root, delayed_node);
1821         if (ret)
1822                 goto release_node;
1823
1824         fill_stack_inode_item(trans, &delayed_node->inode_item, &inode->vfs_inode);
1825         set_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
1826         delayed_node->count++;
1827         atomic_inc(&root->fs_info->delayed_root->items);
1828 release_node:
1829         mutex_unlock(&delayed_node->mutex);
1830         btrfs_release_delayed_node(delayed_node);
1831         return ret;
1832 }
1833
1834 int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode)
1835 {
1836         struct btrfs_fs_info *fs_info = inode->root->fs_info;
1837         struct btrfs_delayed_node *delayed_node;
1838
1839         /*
1840          * we don't do delayed inode updates during log recovery because it
1841          * leads to enospc problems.  This means we also can't do
1842          * delayed inode refs
1843          */
1844         if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
1845                 return -EAGAIN;
1846
1847         delayed_node = btrfs_get_or_create_delayed_node(inode);
1848         if (IS_ERR(delayed_node))
1849                 return PTR_ERR(delayed_node);
1850
1851         /*
1852          * We don't reserve space for inode ref deletion is because:
1853          * - We ONLY do async inode ref deletion for the inode who has only
1854          *   one link(i_nlink == 1), it means there is only one inode ref.
1855          *   And in most case, the inode ref and the inode item are in the
1856          *   same leaf, and we will deal with them at the same time.
1857          *   Since we are sure we will reserve the space for the inode item,
1858          *   it is unnecessary to reserve space for inode ref deletion.
1859          * - If the inode ref and the inode item are not in the same leaf,
1860          *   We also needn't worry about enospc problem, because we reserve
1861          *   much more space for the inode update than it needs.
1862          * - At the worst, we can steal some space from the global reservation.
1863          *   It is very rare.
1864          */
1865         mutex_lock(&delayed_node->mutex);
1866         if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
1867                 goto release_node;
1868
1869         set_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
1870         delayed_node->count++;
1871         atomic_inc(&fs_info->delayed_root->items);
1872 release_node:
1873         mutex_unlock(&delayed_node->mutex);
1874         btrfs_release_delayed_node(delayed_node);
1875         return 0;
1876 }
1877
1878 static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
1879 {
1880         struct btrfs_root *root = delayed_node->root;
1881         struct btrfs_fs_info *fs_info = root->fs_info;
1882         struct btrfs_delayed_item *curr_item, *prev_item;
1883
1884         mutex_lock(&delayed_node->mutex);
1885         curr_item = __btrfs_first_delayed_insertion_item(delayed_node);
1886         while (curr_item) {
1887                 btrfs_delayed_item_release_metadata(root, curr_item);
1888                 prev_item = curr_item;
1889                 curr_item = __btrfs_next_delayed_item(prev_item);
1890                 btrfs_release_delayed_item(prev_item);
1891         }
1892
1893         curr_item = __btrfs_first_delayed_deletion_item(delayed_node);
1894         while (curr_item) {
1895                 btrfs_delayed_item_release_metadata(root, curr_item);
1896                 prev_item = curr_item;
1897                 curr_item = __btrfs_next_delayed_item(prev_item);
1898                 btrfs_release_delayed_item(prev_item);
1899         }
1900
1901         if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
1902                 btrfs_release_delayed_iref(delayed_node);
1903
1904         if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1905                 btrfs_delayed_inode_release_metadata(fs_info, delayed_node, false);
1906                 btrfs_release_delayed_inode(delayed_node);
1907         }
1908         mutex_unlock(&delayed_node->mutex);
1909 }
1910
1911 void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode)
1912 {
1913         struct btrfs_delayed_node *delayed_node;
1914
1915         delayed_node = btrfs_get_delayed_node(inode);
1916         if (!delayed_node)
1917                 return;
1918
1919         __btrfs_kill_delayed_node(delayed_node);
1920         btrfs_release_delayed_node(delayed_node);
1921 }
1922
1923 void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
1924 {
1925         u64 inode_id = 0;
1926         struct btrfs_delayed_node *delayed_nodes[8];
1927         int i, n;
1928
1929         while (1) {
1930                 spin_lock(&root->inode_lock);
1931                 n = radix_tree_gang_lookup(&root->delayed_nodes_tree,
1932                                            (void **)delayed_nodes, inode_id,
1933                                            ARRAY_SIZE(delayed_nodes));
1934                 if (!n) {
1935                         spin_unlock(&root->inode_lock);
1936                         break;
1937                 }
1938
1939                 inode_id = delayed_nodes[n - 1]->inode_id + 1;
1940                 for (i = 0; i < n; i++) {
1941                         /*
1942                          * Don't increase refs in case the node is dead and
1943                          * about to be removed from the tree in the loop below
1944                          */
1945                         if (!refcount_inc_not_zero(&delayed_nodes[i]->refs))
1946                                 delayed_nodes[i] = NULL;
1947                 }
1948                 spin_unlock(&root->inode_lock);
1949
1950                 for (i = 0; i < n; i++) {
1951                         if (!delayed_nodes[i])
1952                                 continue;
1953                         __btrfs_kill_delayed_node(delayed_nodes[i]);
1954                         btrfs_release_delayed_node(delayed_nodes[i]);
1955                 }
1956         }
1957 }
1958
1959 void btrfs_destroy_delayed_inodes(struct btrfs_fs_info *fs_info)
1960 {
1961         struct btrfs_delayed_node *curr_node, *prev_node;
1962
1963         curr_node = btrfs_first_delayed_node(fs_info->delayed_root);
1964         while (curr_node) {
1965                 __btrfs_kill_delayed_node(curr_node);
1966
1967                 prev_node = curr_node;
1968                 curr_node = btrfs_next_delayed_node(curr_node);
1969                 btrfs_release_delayed_node(prev_node);
1970         }
1971 }
1972