Merge tag 'nfsd-5.14' of git://linux-nfs.org/~bfields/linux
[linux-2.6-microblaze.git] / drivers / md / persistent-data / dm-space-map-common.c
1 /*
2  * Copyright (C) 2011 Red Hat, Inc.
3  *
4  * This file is released under the GPL.
5  */
6
7 #include "dm-space-map-common.h"
8 #include "dm-transaction-manager.h"
9 #include "dm-btree-internal.h"
10 #include "dm-persistent-data-internal.h"
11
12 #include <linux/bitops.h>
13 #include <linux/device-mapper.h>
14
15 #define DM_MSG_PREFIX "space map common"
16
17 /*----------------------------------------------------------------*/
18
19 /*
20  * Index validator.
21  */
22 #define INDEX_CSUM_XOR 160478
23
24 static void index_prepare_for_write(struct dm_block_validator *v,
25                                     struct dm_block *b,
26                                     size_t block_size)
27 {
28         struct disk_metadata_index *mi_le = dm_block_data(b);
29
30         mi_le->blocknr = cpu_to_le64(dm_block_location(b));
31         mi_le->csum = cpu_to_le32(dm_bm_checksum(&mi_le->padding,
32                                                  block_size - sizeof(__le32),
33                                                  INDEX_CSUM_XOR));
34 }
35
36 static int index_check(struct dm_block_validator *v,
37                        struct dm_block *b,
38                        size_t block_size)
39 {
40         struct disk_metadata_index *mi_le = dm_block_data(b);
41         __le32 csum_disk;
42
43         if (dm_block_location(b) != le64_to_cpu(mi_le->blocknr)) {
44                 DMERR_LIMIT("index_check failed: blocknr %llu != wanted %llu",
45                             le64_to_cpu(mi_le->blocknr), dm_block_location(b));
46                 return -ENOTBLK;
47         }
48
49         csum_disk = cpu_to_le32(dm_bm_checksum(&mi_le->padding,
50                                                block_size - sizeof(__le32),
51                                                INDEX_CSUM_XOR));
52         if (csum_disk != mi_le->csum) {
53                 DMERR_LIMIT("index_check failed: csum %u != wanted %u",
54                             le32_to_cpu(csum_disk), le32_to_cpu(mi_le->csum));
55                 return -EILSEQ;
56         }
57
58         return 0;
59 }
60
61 static struct dm_block_validator index_validator = {
62         .name = "index",
63         .prepare_for_write = index_prepare_for_write,
64         .check = index_check
65 };
66
67 /*----------------------------------------------------------------*/
68
69 /*
70  * Bitmap validator
71  */
72 #define BITMAP_CSUM_XOR 240779
73
74 static void dm_bitmap_prepare_for_write(struct dm_block_validator *v,
75                                         struct dm_block *b,
76                                         size_t block_size)
77 {
78         struct disk_bitmap_header *disk_header = dm_block_data(b);
79
80         disk_header->blocknr = cpu_to_le64(dm_block_location(b));
81         disk_header->csum = cpu_to_le32(dm_bm_checksum(&disk_header->not_used,
82                                                        block_size - sizeof(__le32),
83                                                        BITMAP_CSUM_XOR));
84 }
85
86 static int dm_bitmap_check(struct dm_block_validator *v,
87                            struct dm_block *b,
88                            size_t block_size)
89 {
90         struct disk_bitmap_header *disk_header = dm_block_data(b);
91         __le32 csum_disk;
92
93         if (dm_block_location(b) != le64_to_cpu(disk_header->blocknr)) {
94                 DMERR_LIMIT("bitmap check failed: blocknr %llu != wanted %llu",
95                             le64_to_cpu(disk_header->blocknr), dm_block_location(b));
96                 return -ENOTBLK;
97         }
98
99         csum_disk = cpu_to_le32(dm_bm_checksum(&disk_header->not_used,
100                                                block_size - sizeof(__le32),
101                                                BITMAP_CSUM_XOR));
102         if (csum_disk != disk_header->csum) {
103                 DMERR_LIMIT("bitmap check failed: csum %u != wanted %u",
104                             le32_to_cpu(csum_disk), le32_to_cpu(disk_header->csum));
105                 return -EILSEQ;
106         }
107
108         return 0;
109 }
110
111 static struct dm_block_validator dm_sm_bitmap_validator = {
112         .name = "sm_bitmap",
113         .prepare_for_write = dm_bitmap_prepare_for_write,
114         .check = dm_bitmap_check,
115 };
116
117 /*----------------------------------------------------------------*/
118
119 #define ENTRIES_PER_WORD 32
120 #define ENTRIES_SHIFT   5
121
122 static void *dm_bitmap_data(struct dm_block *b)
123 {
124         return dm_block_data(b) + sizeof(struct disk_bitmap_header);
125 }
126
127 #define WORD_MASK_HIGH 0xAAAAAAAAAAAAAAAAULL
128
129 static unsigned dm_bitmap_word_used(void *addr, unsigned b)
130 {
131         __le64 *words_le = addr;
132         __le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
133
134         uint64_t bits = le64_to_cpu(*w_le);
135         uint64_t mask = (bits + WORD_MASK_HIGH + 1) & WORD_MASK_HIGH;
136
137         return !(~bits & mask);
138 }
139
140 static unsigned sm_lookup_bitmap(void *addr, unsigned b)
141 {
142         __le64 *words_le = addr;
143         __le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
144         unsigned hi, lo;
145
146         b = (b & (ENTRIES_PER_WORD - 1)) << 1;
147         hi = !!test_bit_le(b, (void *) w_le);
148         lo = !!test_bit_le(b + 1, (void *) w_le);
149         return (hi << 1) | lo;
150 }
151
152 static void sm_set_bitmap(void *addr, unsigned b, unsigned val)
153 {
154         __le64 *words_le = addr;
155         __le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
156
157         b = (b & (ENTRIES_PER_WORD - 1)) << 1;
158
159         if (val & 2)
160                 __set_bit_le(b, (void *) w_le);
161         else
162                 __clear_bit_le(b, (void *) w_le);
163
164         if (val & 1)
165                 __set_bit_le(b + 1, (void *) w_le);
166         else
167                 __clear_bit_le(b + 1, (void *) w_le);
168 }
169
170 static int sm_find_free(void *addr, unsigned begin, unsigned end,
171                         unsigned *result)
172 {
173         while (begin < end) {
174                 if (!(begin & (ENTRIES_PER_WORD - 1)) &&
175                     dm_bitmap_word_used(addr, begin)) {
176                         begin += ENTRIES_PER_WORD;
177                         continue;
178                 }
179
180                 if (!sm_lookup_bitmap(addr, begin)) {
181                         *result = begin;
182                         return 0;
183                 }
184
185                 begin++;
186         }
187
188         return -ENOSPC;
189 }
190
191 /*----------------------------------------------------------------*/
192
193 static int sm_ll_init(struct ll_disk *ll, struct dm_transaction_manager *tm)
194 {
195         memset(ll, 0, sizeof(struct ll_disk));
196
197         ll->tm = tm;
198
199         ll->bitmap_info.tm = tm;
200         ll->bitmap_info.levels = 1;
201
202         /*
203          * Because the new bitmap blocks are created via a shadow
204          * operation, the old entry has already had its reference count
205          * decremented and we don't need the btree to do any bookkeeping.
206          */
207         ll->bitmap_info.value_type.size = sizeof(struct disk_index_entry);
208         ll->bitmap_info.value_type.inc = NULL;
209         ll->bitmap_info.value_type.dec = NULL;
210         ll->bitmap_info.value_type.equal = NULL;
211
212         ll->ref_count_info.tm = tm;
213         ll->ref_count_info.levels = 1;
214         ll->ref_count_info.value_type.size = sizeof(uint32_t);
215         ll->ref_count_info.value_type.inc = NULL;
216         ll->ref_count_info.value_type.dec = NULL;
217         ll->ref_count_info.value_type.equal = NULL;
218
219         ll->block_size = dm_bm_block_size(dm_tm_get_bm(tm));
220
221         if (ll->block_size > (1 << 30)) {
222                 DMERR("block size too big to hold bitmaps");
223                 return -EINVAL;
224         }
225
226         ll->entries_per_block = (ll->block_size - sizeof(struct disk_bitmap_header)) *
227                 ENTRIES_PER_BYTE;
228         ll->nr_blocks = 0;
229         ll->bitmap_root = 0;
230         ll->ref_count_root = 0;
231         ll->bitmap_index_changed = false;
232
233         return 0;
234 }
235
236 int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks)
237 {
238         int r;
239         dm_block_t i, nr_blocks, nr_indexes;
240         unsigned old_blocks, blocks;
241
242         nr_blocks = ll->nr_blocks + extra_blocks;
243         old_blocks = dm_sector_div_up(ll->nr_blocks, ll->entries_per_block);
244         blocks = dm_sector_div_up(nr_blocks, ll->entries_per_block);
245
246         nr_indexes = dm_sector_div_up(nr_blocks, ll->entries_per_block);
247         if (nr_indexes > ll->max_entries(ll)) {
248                 DMERR("space map too large");
249                 return -EINVAL;
250         }
251
252         /*
253          * We need to set this before the dm_tm_new_block() call below.
254          */
255         ll->nr_blocks = nr_blocks;
256         for (i = old_blocks; i < blocks; i++) {
257                 struct dm_block *b;
258                 struct disk_index_entry idx;
259
260                 r = dm_tm_new_block(ll->tm, &dm_sm_bitmap_validator, &b);
261                 if (r < 0)
262                         return r;
263
264                 idx.blocknr = cpu_to_le64(dm_block_location(b));
265
266                 dm_tm_unlock(ll->tm, b);
267
268                 idx.nr_free = cpu_to_le32(ll->entries_per_block);
269                 idx.none_free_before = 0;
270
271                 r = ll->save_ie(ll, i, &idx);
272                 if (r < 0)
273                         return r;
274         }
275
276         return 0;
277 }
278
279 int sm_ll_lookup_bitmap(struct ll_disk *ll, dm_block_t b, uint32_t *result)
280 {
281         int r;
282         dm_block_t index = b;
283         struct disk_index_entry ie_disk;
284         struct dm_block *blk;
285
286         b = do_div(index, ll->entries_per_block);
287         r = ll->load_ie(ll, index, &ie_disk);
288         if (r < 0)
289                 return r;
290
291         r = dm_tm_read_lock(ll->tm, le64_to_cpu(ie_disk.blocknr),
292                             &dm_sm_bitmap_validator, &blk);
293         if (r < 0)
294                 return r;
295
296         *result = sm_lookup_bitmap(dm_bitmap_data(blk), b);
297
298         dm_tm_unlock(ll->tm, blk);
299
300         return 0;
301 }
302
303 static int sm_ll_lookup_big_ref_count(struct ll_disk *ll, dm_block_t b,
304                                       uint32_t *result)
305 {
306         __le32 le_rc;
307         int r;
308
309         r = dm_btree_lookup(&ll->ref_count_info, ll->ref_count_root, &b, &le_rc);
310         if (r < 0)
311                 return r;
312
313         *result = le32_to_cpu(le_rc);
314
315         return r;
316 }
317
318 int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result)
319 {
320         int r = sm_ll_lookup_bitmap(ll, b, result);
321
322         if (r)
323                 return r;
324
325         if (*result != 3)
326                 return r;
327
328         return sm_ll_lookup_big_ref_count(ll, b, result);
329 }
330
331 int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin,
332                           dm_block_t end, dm_block_t *result)
333 {
334         int r;
335         struct disk_index_entry ie_disk;
336         dm_block_t i, index_begin = begin;
337         dm_block_t index_end = dm_sector_div_up(end, ll->entries_per_block);
338
339         /*
340          * FIXME: Use shifts
341          */
342         begin = do_div(index_begin, ll->entries_per_block);
343         end = do_div(end, ll->entries_per_block);
344         if (end == 0)
345                 end = ll->entries_per_block;
346
347         for (i = index_begin; i < index_end; i++, begin = 0) {
348                 struct dm_block *blk;
349                 unsigned position;
350                 uint32_t bit_end;
351
352                 r = ll->load_ie(ll, i, &ie_disk);
353                 if (r < 0)
354                         return r;
355
356                 if (le32_to_cpu(ie_disk.nr_free) == 0)
357                         continue;
358
359                 r = dm_tm_read_lock(ll->tm, le64_to_cpu(ie_disk.blocknr),
360                                     &dm_sm_bitmap_validator, &blk);
361                 if (r < 0)
362                         return r;
363
364                 bit_end = (i == index_end - 1) ?  end : ll->entries_per_block;
365
366                 r = sm_find_free(dm_bitmap_data(blk),
367                                  max_t(unsigned, begin, le32_to_cpu(ie_disk.none_free_before)),
368                                  bit_end, &position);
369                 if (r == -ENOSPC) {
370                         /*
371                          * This might happen because we started searching
372                          * part way through the bitmap.
373                          */
374                         dm_tm_unlock(ll->tm, blk);
375                         continue;
376                 }
377
378                 dm_tm_unlock(ll->tm, blk);
379
380                 *result = i * ll->entries_per_block + (dm_block_t) position;
381                 return 0;
382         }
383
384         return -ENOSPC;
385 }
386
387 int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll,
388                                  dm_block_t begin, dm_block_t end, dm_block_t *b)
389 {
390         int r;
391         uint32_t count;
392
393         do {
394                 r = sm_ll_find_free_block(new_ll, begin, new_ll->nr_blocks, b);
395                 if (r)
396                         break;
397
398                 /* double check this block wasn't used in the old transaction */
399                 if (*b >= old_ll->nr_blocks)
400                         count = 0;
401                 else {
402                         r = sm_ll_lookup(old_ll, *b, &count);
403                         if (r)
404                                 break;
405
406                         if (count)
407                                 begin = *b + 1;
408                 }
409         } while (count);
410
411         return r;
412 }
413
414 /*----------------------------------------------------------------*/
415
416 int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
417                  uint32_t ref_count, int32_t *nr_allocations)
418 {
419         int r;
420         uint32_t bit, old;
421         struct dm_block *nb;
422         dm_block_t index = b;
423         struct disk_index_entry ie_disk;
424         void *bm_le;
425         int inc;
426
427         bit = do_div(index, ll->entries_per_block);
428         r = ll->load_ie(ll, index, &ie_disk);
429         if (r < 0)
430                 return r;
431
432         r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ie_disk.blocknr),
433                                &dm_sm_bitmap_validator, &nb, &inc);
434         if (r < 0) {
435                 DMERR("dm_tm_shadow_block() failed");
436                 return r;
437         }
438         ie_disk.blocknr = cpu_to_le64(dm_block_location(nb));
439         bm_le = dm_bitmap_data(nb);
440
441         old = sm_lookup_bitmap(bm_le, bit);
442         if (old > 2) {
443                 r = sm_ll_lookup_big_ref_count(ll, b, &old);
444                 if (r < 0) {
445                         dm_tm_unlock(ll->tm, nb);
446                         return r;
447                 }
448         }
449
450         if (r) {
451                 dm_tm_unlock(ll->tm, nb);
452                 return r;
453         }
454
455         if (ref_count <= 2) {
456                 sm_set_bitmap(bm_le, bit, ref_count);
457                 dm_tm_unlock(ll->tm, nb);
458
459                 if (old > 2) {
460                         r = dm_btree_remove(&ll->ref_count_info,
461                                             ll->ref_count_root,
462                                             &b, &ll->ref_count_root);
463                         if (r)
464                                 return r;
465                 }
466
467         } else {
468                 __le32 le_rc = cpu_to_le32(ref_count);
469
470                 sm_set_bitmap(bm_le, bit, 3);
471                 dm_tm_unlock(ll->tm, nb);
472
473                 __dm_bless_for_disk(&le_rc);
474                 r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root,
475                                     &b, &le_rc, &ll->ref_count_root);
476                 if (r < 0) {
477                         DMERR("ref count insert failed");
478                         return r;
479                 }
480         }
481
482         if (ref_count && !old) {
483                 *nr_allocations = 1;
484                 ll->nr_allocated++;
485                 le32_add_cpu(&ie_disk.nr_free, -1);
486                 if (le32_to_cpu(ie_disk.none_free_before) == bit)
487                         ie_disk.none_free_before = cpu_to_le32(bit + 1);
488
489         } else if (old && !ref_count) {
490                 *nr_allocations = -1;
491                 ll->nr_allocated--;
492                 le32_add_cpu(&ie_disk.nr_free, 1);
493                 ie_disk.none_free_before = cpu_to_le32(min(le32_to_cpu(ie_disk.none_free_before), bit));
494         } else
495                 *nr_allocations = 0;
496
497         return ll->save_ie(ll, index, &ie_disk);
498 }
499
500 /*----------------------------------------------------------------*/
501
502 /*
503  * Holds useful intermediate results for the range based inc and dec
504  * operations.
505  */
506 struct inc_context {
507         struct disk_index_entry ie_disk;
508         struct dm_block *bitmap_block;
509         void *bitmap;
510
511         struct dm_block *overflow_leaf;
512 };
513
514 static inline void init_inc_context(struct inc_context *ic)
515 {
516         ic->bitmap_block = NULL;
517         ic->bitmap = NULL;
518         ic->overflow_leaf = NULL;
519 }
520
521 static inline void exit_inc_context(struct ll_disk *ll, struct inc_context *ic)
522 {
523         if (ic->bitmap_block)
524                 dm_tm_unlock(ll->tm, ic->bitmap_block);
525         if (ic->overflow_leaf)
526                 dm_tm_unlock(ll->tm, ic->overflow_leaf);
527 }
528
529 static inline void reset_inc_context(struct ll_disk *ll, struct inc_context *ic)
530 {
531         exit_inc_context(ll, ic);
532         init_inc_context(ic);
533 }
534
535 /*
536  * Confirms a btree node contains a particular key at an index.
537  */
538 static bool contains_key(struct btree_node *n, uint64_t key, int index)
539 {
540         return index >= 0 &&
541                 index < le32_to_cpu(n->header.nr_entries) &&
542                 le64_to_cpu(n->keys[index]) == key;
543 }
544
545 static int __sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic)
546 {
547         int r;
548         int index;
549         struct btree_node *n;
550         __le32 *v_ptr;
551         uint32_t rc;
552
553         /*
554          * bitmap_block needs to be unlocked because getting the
555          * overflow_leaf may need to allocate, and thus use the space map.
556          */
557         reset_inc_context(ll, ic);
558
559         r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root,
560                                      b, &index, &ll->ref_count_root, &ic->overflow_leaf);
561         if (r < 0)
562                 return r;
563
564         n = dm_block_data(ic->overflow_leaf);
565
566         if (!contains_key(n, b, index)) {
567                 DMERR("overflow btree is missing an entry");
568                 return -EINVAL;
569         }
570
571         v_ptr = value_ptr(n, index);
572         rc = le32_to_cpu(*v_ptr) + 1;
573         *v_ptr = cpu_to_le32(rc);
574
575         return 0;
576 }
577
578 static int sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic)
579 {
580         int index;
581         struct btree_node *n;
582         __le32 *v_ptr;
583         uint32_t rc;
584
585         /*
586          * Do we already have the correct overflow leaf?
587          */
588         if (ic->overflow_leaf) {
589                 n = dm_block_data(ic->overflow_leaf);
590                 index = lower_bound(n, b);
591                 if (contains_key(n, b, index)) {
592                         v_ptr = value_ptr(n, index);
593                         rc = le32_to_cpu(*v_ptr) + 1;
594                         *v_ptr = cpu_to_le32(rc);
595
596                         return 0;
597                 }
598         }
599
600         return __sm_ll_inc_overflow(ll, b, ic);
601 }
602
603 static inline int shadow_bitmap(struct ll_disk *ll, struct inc_context *ic)
604 {
605         int r, inc;
606         r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ic->ie_disk.blocknr),
607                                &dm_sm_bitmap_validator, &ic->bitmap_block, &inc);
608         if (r < 0) {
609                 DMERR("dm_tm_shadow_block() failed");
610                 return r;
611         }
612         ic->ie_disk.blocknr = cpu_to_le64(dm_block_location(ic->bitmap_block));
613         ic->bitmap = dm_bitmap_data(ic->bitmap_block);
614         return 0;
615 }
616
617 /*
618  * Once shadow_bitmap has been called, which always happens at the start of inc/dec,
619  * we can reopen the bitmap with a simple write lock, rather than re calling
620  * dm_tm_shadow_block().
621  */
622 static inline int ensure_bitmap(struct ll_disk *ll, struct inc_context *ic)
623 {
624         if (!ic->bitmap_block) {
625                 int r = dm_bm_write_lock(dm_tm_get_bm(ll->tm), le64_to_cpu(ic->ie_disk.blocknr),
626                                          &dm_sm_bitmap_validator, &ic->bitmap_block);
627                 if (r) {
628                         DMERR("unable to re-get write lock for bitmap");
629                         return r;
630                 }
631                 ic->bitmap = dm_bitmap_data(ic->bitmap_block);
632         }
633
634         return 0;
635 }
636
637 /*
638  * Loops round incrementing entries in a single bitmap.
639  */
640 static inline int sm_ll_inc_bitmap(struct ll_disk *ll, dm_block_t b,
641                                    uint32_t bit, uint32_t bit_end,
642                                    int32_t *nr_allocations, dm_block_t *new_b,
643                                    struct inc_context *ic)
644 {
645         int r;
646         __le32 le_rc;
647         uint32_t old;
648
649         for (; bit != bit_end; bit++, b++) {
650                 /*
651                  * We only need to drop the bitmap if we need to find a new btree
652                  * leaf for the overflow.  So if it was dropped last iteration,
653                  * we now re-get it.
654                  */
655                 r = ensure_bitmap(ll, ic);
656                 if (r)
657                         return r;
658
659                 old = sm_lookup_bitmap(ic->bitmap, bit);
660                 switch (old) {
661                 case 0:
662                         /* inc bitmap, adjust nr_allocated */
663                         sm_set_bitmap(ic->bitmap, bit, 1);
664                         (*nr_allocations)++;
665                         ll->nr_allocated++;
666                         le32_add_cpu(&ic->ie_disk.nr_free, -1);
667                         if (le32_to_cpu(ic->ie_disk.none_free_before) == bit)
668                                 ic->ie_disk.none_free_before = cpu_to_le32(bit + 1);
669                         break;
670
671                 case 1:
672                         /* inc bitmap */
673                         sm_set_bitmap(ic->bitmap, bit, 2);
674                         break;
675
676                 case 2:
677                         /* inc bitmap and insert into overflow */
678                         sm_set_bitmap(ic->bitmap, bit, 3);
679                         reset_inc_context(ll, ic);
680
681                         le_rc = cpu_to_le32(3);
682                         __dm_bless_for_disk(&le_rc);
683                         r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root,
684                                             &b, &le_rc, &ll->ref_count_root);
685                         if (r < 0) {
686                                 DMERR("ref count insert failed");
687                                 return r;
688                         }
689                         break;
690
691                 default:
692                         /*
693                          * inc within the overflow tree only.
694                          */
695                         r = sm_ll_inc_overflow(ll, b, ic);
696                         if (r < 0)
697                                 return r;
698                 }
699         }
700
701         *new_b = b;
702         return 0;
703 }
704
705 /*
706  * Finds a bitmap that contains entries in the block range, and increments
707  * them.
708  */
709 static int __sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e,
710                        int32_t *nr_allocations, dm_block_t *new_b)
711 {
712         int r;
713         struct inc_context ic;
714         uint32_t bit, bit_end;
715         dm_block_t index = b;
716
717         init_inc_context(&ic);
718
719         bit = do_div(index, ll->entries_per_block);
720         r = ll->load_ie(ll, index, &ic.ie_disk);
721         if (r < 0)
722                 return r;
723
724         r = shadow_bitmap(ll, &ic);
725         if (r)
726                 return r;
727
728         bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block);
729         r = sm_ll_inc_bitmap(ll, b, bit, bit_end, nr_allocations, new_b, &ic);
730
731         exit_inc_context(ll, &ic);
732
733         if (r)
734                 return r;
735
736         return ll->save_ie(ll, index, &ic.ie_disk);
737 }
738
739 int sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e,
740               int32_t *nr_allocations)
741 {
742         *nr_allocations = 0;
743         while (b != e) {
744                 int r = __sm_ll_inc(ll, b, e, nr_allocations, &b);
745                 if (r)
746                         return r;
747         }
748
749         return 0;
750 }
751
752 /*----------------------------------------------------------------*/
753
754 static int __sm_ll_del_overflow(struct ll_disk *ll, dm_block_t b,
755                                 struct inc_context *ic)
756 {
757         reset_inc_context(ll, ic);
758         return dm_btree_remove(&ll->ref_count_info, ll->ref_count_root,
759                                &b, &ll->ref_count_root);
760 }
761
762 static int __sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b,
763                                 struct inc_context *ic, uint32_t *old_rc)
764 {
765         int r;
766         int index = -1;
767         struct btree_node *n;
768         __le32 *v_ptr;
769         uint32_t rc;
770
771         reset_inc_context(ll, ic);
772         r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root,
773                                      b, &index, &ll->ref_count_root, &ic->overflow_leaf);
774         if (r < 0)
775                 return r;
776
777         n = dm_block_data(ic->overflow_leaf);
778
779         if (!contains_key(n, b, index)) {
780                 DMERR("overflow btree is missing an entry");
781                 return -EINVAL;
782         }
783
784         v_ptr = value_ptr(n, index);
785         rc = le32_to_cpu(*v_ptr);
786         *old_rc = rc;
787
788         if (rc == 3) {
789                 return __sm_ll_del_overflow(ll, b, ic);
790         } else {
791                 rc--;
792                 *v_ptr = cpu_to_le32(rc);
793                 return 0;
794         }
795 }
796
797 static int sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b,
798                               struct inc_context *ic, uint32_t *old_rc)
799 {
800         /*
801          * Do we already have the correct overflow leaf?
802          */
803         if (ic->overflow_leaf) {
804                 int index;
805                 struct btree_node *n;
806                 __le32 *v_ptr;
807                 uint32_t rc;
808
809                 n = dm_block_data(ic->overflow_leaf);
810                 index = lower_bound(n, b);
811                 if (contains_key(n, b, index)) {
812                         v_ptr = value_ptr(n, index);
813                         rc = le32_to_cpu(*v_ptr);
814                         *old_rc = rc;
815
816                         if (rc > 3) {
817                                 rc--;
818                                 *v_ptr = cpu_to_le32(rc);
819                                 return 0;
820                         } else {
821                                 return __sm_ll_del_overflow(ll, b, ic);
822                         }
823
824                 }
825         }
826
827         return __sm_ll_dec_overflow(ll, b, ic, old_rc);
828 }
829
830 /*
831  * Loops round incrementing entries in a single bitmap.
832  */
833 static inline int sm_ll_dec_bitmap(struct ll_disk *ll, dm_block_t b,
834                                    uint32_t bit, uint32_t bit_end,
835                                    struct inc_context *ic,
836                                    int32_t *nr_allocations, dm_block_t *new_b)
837 {
838         int r;
839         uint32_t old;
840
841         for (; bit != bit_end; bit++, b++) {
842                 /*
843                  * We only need to drop the bitmap if we need to find a new btree
844                  * leaf for the overflow.  So if it was dropped last iteration,
845                  * we now re-get it.
846                  */
847                 r = ensure_bitmap(ll, ic);
848                 if (r)
849                         return r;
850
851                 old = sm_lookup_bitmap(ic->bitmap, bit);
852                 switch (old) {
853                 case 0:
854                         DMERR("unable to decrement block");
855                         return -EINVAL;
856
857                 case 1:
858                         /* dec bitmap */
859                         sm_set_bitmap(ic->bitmap, bit, 0);
860                         (*nr_allocations)--;
861                         ll->nr_allocated--;
862                         le32_add_cpu(&ic->ie_disk.nr_free, 1);
863                         ic->ie_disk.none_free_before =
864                                 cpu_to_le32(min(le32_to_cpu(ic->ie_disk.none_free_before), bit));
865                         break;
866
867                 case 2:
868                         /* dec bitmap and insert into overflow */
869                         sm_set_bitmap(ic->bitmap, bit, 1);
870                         break;
871
872                 case 3:
873                         r = sm_ll_dec_overflow(ll, b, ic, &old);
874                         if (r < 0)
875                                 return r;
876
877                         if (old == 3) {
878                                 r = ensure_bitmap(ll, ic);
879                                 if (r)
880                                         return r;
881
882                                 sm_set_bitmap(ic->bitmap, bit, 2);
883                         }
884                         break;
885                 }
886         }
887
888         *new_b = b;
889         return 0;
890 }
891
892 static int __sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e,
893                        int32_t *nr_allocations, dm_block_t *new_b)
894 {
895         int r;
896         uint32_t bit, bit_end;
897         struct inc_context ic;
898         dm_block_t index = b;
899
900         init_inc_context(&ic);
901
902         bit = do_div(index, ll->entries_per_block);
903         r = ll->load_ie(ll, index, &ic.ie_disk);
904         if (r < 0)
905                 return r;
906
907         r = shadow_bitmap(ll, &ic);
908         if (r)
909                 return r;
910
911         bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block);
912         r = sm_ll_dec_bitmap(ll, b, bit, bit_end, &ic, nr_allocations, new_b);
913         exit_inc_context(ll, &ic);
914
915         if (r)
916                 return r;
917
918         return ll->save_ie(ll, index, &ic.ie_disk);
919 }
920
921 int sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e,
922               int32_t *nr_allocations)
923 {
924         *nr_allocations = 0;
925         while (b != e) {
926                 int r = __sm_ll_dec(ll, b, e, nr_allocations, &b);
927                 if (r)
928                         return r;
929         }
930
931         return 0;
932 }
933
934 /*----------------------------------------------------------------*/
935
936 int sm_ll_commit(struct ll_disk *ll)
937 {
938         int r = 0;
939
940         if (ll->bitmap_index_changed) {
941                 r = ll->commit(ll);
942                 if (!r)
943                         ll->bitmap_index_changed = false;
944         }
945
946         return r;
947 }
948
949 /*----------------------------------------------------------------*/
950
951 static int metadata_ll_load_ie(struct ll_disk *ll, dm_block_t index,
952                                struct disk_index_entry *ie)
953 {
954         memcpy(ie, ll->mi_le.index + index, sizeof(*ie));
955         return 0;
956 }
957
958 static int metadata_ll_save_ie(struct ll_disk *ll, dm_block_t index,
959                                struct disk_index_entry *ie)
960 {
961         ll->bitmap_index_changed = true;
962         memcpy(ll->mi_le.index + index, ie, sizeof(*ie));
963         return 0;
964 }
965
966 static int metadata_ll_init_index(struct ll_disk *ll)
967 {
968         int r;
969         struct dm_block *b;
970
971         r = dm_tm_new_block(ll->tm, &index_validator, &b);
972         if (r < 0)
973                 return r;
974
975         ll->bitmap_root = dm_block_location(b);
976
977         dm_tm_unlock(ll->tm, b);
978
979         return 0;
980 }
981
982 static int metadata_ll_open(struct ll_disk *ll)
983 {
984         int r;
985         struct dm_block *block;
986
987         r = dm_tm_read_lock(ll->tm, ll->bitmap_root,
988                             &index_validator, &block);
989         if (r)
990                 return r;
991
992         memcpy(&ll->mi_le, dm_block_data(block), sizeof(ll->mi_le));
993         dm_tm_unlock(ll->tm, block);
994
995         return 0;
996 }
997
998 static dm_block_t metadata_ll_max_entries(struct ll_disk *ll)
999 {
1000         return MAX_METADATA_BITMAPS;
1001 }
1002
1003 static int metadata_ll_commit(struct ll_disk *ll)
1004 {
1005         int r, inc;
1006         struct dm_block *b;
1007
1008         r = dm_tm_shadow_block(ll->tm, ll->bitmap_root, &index_validator, &b, &inc);
1009         if (r)
1010                 return r;
1011
1012         memcpy(dm_block_data(b), &ll->mi_le, sizeof(ll->mi_le));
1013         ll->bitmap_root = dm_block_location(b);
1014
1015         dm_tm_unlock(ll->tm, b);
1016
1017         return 0;
1018 }
1019
1020 int sm_ll_new_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm)
1021 {
1022         int r;
1023
1024         r = sm_ll_init(ll, tm);
1025         if (r < 0)
1026                 return r;
1027
1028         ll->load_ie = metadata_ll_load_ie;
1029         ll->save_ie = metadata_ll_save_ie;
1030         ll->init_index = metadata_ll_init_index;
1031         ll->open_index = metadata_ll_open;
1032         ll->max_entries = metadata_ll_max_entries;
1033         ll->commit = metadata_ll_commit;
1034
1035         ll->nr_blocks = 0;
1036         ll->nr_allocated = 0;
1037
1038         r = ll->init_index(ll);
1039         if (r < 0)
1040                 return r;
1041
1042         r = dm_btree_empty(&ll->ref_count_info, &ll->ref_count_root);
1043         if (r < 0)
1044                 return r;
1045
1046         return 0;
1047 }
1048
1049 int sm_ll_open_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm,
1050                         void *root_le, size_t len)
1051 {
1052         int r;
1053         struct disk_sm_root smr;
1054
1055         if (len < sizeof(struct disk_sm_root)) {
1056                 DMERR("sm_metadata root too small");
1057                 return -ENOMEM;
1058         }
1059
1060         /*
1061          * We don't know the alignment of the root_le buffer, so need to
1062          * copy into a new structure.
1063          */
1064         memcpy(&smr, root_le, sizeof(smr));
1065
1066         r = sm_ll_init(ll, tm);
1067         if (r < 0)
1068                 return r;
1069
1070         ll->load_ie = metadata_ll_load_ie;
1071         ll->save_ie = metadata_ll_save_ie;
1072         ll->init_index = metadata_ll_init_index;
1073         ll->open_index = metadata_ll_open;
1074         ll->max_entries = metadata_ll_max_entries;
1075         ll->commit = metadata_ll_commit;
1076
1077         ll->nr_blocks = le64_to_cpu(smr.nr_blocks);
1078         ll->nr_allocated = le64_to_cpu(smr.nr_allocated);
1079         ll->bitmap_root = le64_to_cpu(smr.bitmap_root);
1080         ll->ref_count_root = le64_to_cpu(smr.ref_count_root);
1081
1082         return ll->open_index(ll);
1083 }
1084
1085 /*----------------------------------------------------------------*/
1086
1087 static inline int ie_cache_writeback(struct ll_disk *ll, struct ie_cache *iec)
1088 {
1089         iec->dirty = false;
1090         __dm_bless_for_disk(iec->ie);
1091         return dm_btree_insert(&ll->bitmap_info, ll->bitmap_root,
1092                                &iec->index, &iec->ie, &ll->bitmap_root);
1093 }
1094
1095 static inline unsigned hash_index(dm_block_t index)
1096 {
1097         return dm_hash_block(index, IE_CACHE_MASK);
1098 }
1099
1100 static int disk_ll_load_ie(struct ll_disk *ll, dm_block_t index,
1101                            struct disk_index_entry *ie)
1102 {
1103         int r;
1104         unsigned h = hash_index(index);
1105         struct ie_cache *iec = ll->ie_cache + h;
1106
1107         if (iec->valid) {
1108                 if (iec->index == index) {
1109                         memcpy(ie, &iec->ie, sizeof(*ie));
1110                         return 0;
1111                 }
1112
1113                 if (iec->dirty) {
1114                         r = ie_cache_writeback(ll, iec);
1115                         if (r)
1116                                 return r;
1117                 }
1118         }
1119
1120         r = dm_btree_lookup(&ll->bitmap_info, ll->bitmap_root, &index, ie);
1121         if (!r) {
1122                 iec->valid = true;
1123                 iec->dirty = false;
1124                 iec->index = index;
1125                 memcpy(&iec->ie, ie, sizeof(*ie));
1126         }
1127
1128         return r;
1129 }
1130
1131 static int disk_ll_save_ie(struct ll_disk *ll, dm_block_t index,
1132                            struct disk_index_entry *ie)
1133 {
1134         int r;
1135         unsigned h = hash_index(index);
1136         struct ie_cache *iec = ll->ie_cache + h;
1137
1138         ll->bitmap_index_changed = true;
1139         if (iec->valid) {
1140                 if (iec->index == index) {
1141                         memcpy(&iec->ie, ie, sizeof(*ie));
1142                         iec->dirty = true;
1143                         return 0;
1144                 }
1145
1146                 if (iec->dirty) {
1147                         r = ie_cache_writeback(ll, iec);
1148                         if (r)
1149                                 return r;
1150                 }
1151         }
1152
1153         iec->valid = true;
1154         iec->dirty = true;
1155         iec->index = index;
1156         memcpy(&iec->ie, ie, sizeof(*ie));
1157         return 0;
1158 }
1159
1160 static int disk_ll_init_index(struct ll_disk *ll)
1161 {
1162         unsigned i;
1163         for (i = 0; i < IE_CACHE_SIZE; i++) {
1164                 struct ie_cache *iec = ll->ie_cache + i;
1165                 iec->valid = false;
1166                 iec->dirty = false;
1167         }
1168         return dm_btree_empty(&ll->bitmap_info, &ll->bitmap_root);
1169 }
1170
1171 static int disk_ll_open(struct ll_disk *ll)
1172 {
1173         return 0;
1174 }
1175
1176 static dm_block_t disk_ll_max_entries(struct ll_disk *ll)
1177 {
1178         return -1ULL;
1179 }
1180
1181 static int disk_ll_commit(struct ll_disk *ll)
1182 {
1183         int r = 0;
1184         unsigned i;
1185
1186         for (i = 0; i < IE_CACHE_SIZE; i++) {
1187                 struct ie_cache *iec = ll->ie_cache + i;
1188                 if (iec->valid && iec->dirty)
1189                         r = ie_cache_writeback(ll, iec);
1190         }
1191
1192         return r;
1193 }
1194
1195 int sm_ll_new_disk(struct ll_disk *ll, struct dm_transaction_manager *tm)
1196 {
1197         int r;
1198
1199         r = sm_ll_init(ll, tm);
1200         if (r < 0)
1201                 return r;
1202
1203         ll->load_ie = disk_ll_load_ie;
1204         ll->save_ie = disk_ll_save_ie;
1205         ll->init_index = disk_ll_init_index;
1206         ll->open_index = disk_ll_open;
1207         ll->max_entries = disk_ll_max_entries;
1208         ll->commit = disk_ll_commit;
1209
1210         ll->nr_blocks = 0;
1211         ll->nr_allocated = 0;
1212
1213         r = ll->init_index(ll);
1214         if (r < 0)
1215                 return r;
1216
1217         r = dm_btree_empty(&ll->ref_count_info, &ll->ref_count_root);
1218         if (r < 0)
1219                 return r;
1220
1221         return 0;
1222 }
1223
1224 int sm_ll_open_disk(struct ll_disk *ll, struct dm_transaction_manager *tm,
1225                     void *root_le, size_t len)
1226 {
1227         int r;
1228         struct disk_sm_root *smr = root_le;
1229
1230         if (len < sizeof(struct disk_sm_root)) {
1231                 DMERR("sm_metadata root too small");
1232                 return -ENOMEM;
1233         }
1234
1235         r = sm_ll_init(ll, tm);
1236         if (r < 0)
1237                 return r;
1238
1239         ll->load_ie = disk_ll_load_ie;
1240         ll->save_ie = disk_ll_save_ie;
1241         ll->init_index = disk_ll_init_index;
1242         ll->open_index = disk_ll_open;
1243         ll->max_entries = disk_ll_max_entries;
1244         ll->commit = disk_ll_commit;
1245
1246         ll->nr_blocks = le64_to_cpu(smr->nr_blocks);
1247         ll->nr_allocated = le64_to_cpu(smr->nr_allocated);
1248         ll->bitmap_root = le64_to_cpu(smr->bitmap_root);
1249         ll->ref_count_root = le64_to_cpu(smr->ref_count_root);
1250
1251         return ll->open_index(ll);
1252 }
1253
1254 /*----------------------------------------------------------------*/