Merge tag 'clk-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/clk/linux
[linux-2.6-microblaze.git] / fs / ntfs3 / bitmap.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *
4  * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
5  *
6  * This code builds two trees of free clusters extents.
7  * Trees are sorted by start of extent and by length of extent.
8  * NTFS_MAX_WND_EXTENTS defines the maximum number of elements in trees.
9  * In extreme case code reads on-disk bitmap to find free clusters.
10  *
11  */
12
13 #include <linux/buffer_head.h>
14 #include <linux/fs.h>
15 #include <linux/kernel.h>
16
17 #include "ntfs.h"
18 #include "ntfs_fs.h"
19
20 /*
21  * Maximum number of extents in tree.
22  */
23 #define NTFS_MAX_WND_EXTENTS (32u * 1024u)
24
25 struct rb_node_key {
26         struct rb_node node;
27         size_t key;
28 };
29
30 struct e_node {
31         struct rb_node_key start; /* Tree sorted by start. */
32         struct rb_node_key count; /* Tree sorted by len. */
33 };
34
35 static int wnd_rescan(struct wnd_bitmap *wnd);
36 static struct buffer_head *wnd_map(struct wnd_bitmap *wnd, size_t iw);
37 static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits);
38
39 static struct kmem_cache *ntfs_enode_cachep;
40
41 int __init ntfs3_init_bitmap(void)
42 {
43         ntfs_enode_cachep =
44                 kmem_cache_create("ntfs3_enode_cache", sizeof(struct e_node), 0,
45                                   SLAB_RECLAIM_ACCOUNT, NULL);
46         return ntfs_enode_cachep ? 0 : -ENOMEM;
47 }
48
49 void ntfs3_exit_bitmap(void)
50 {
51         kmem_cache_destroy(ntfs_enode_cachep);
52 }
53
54 static inline u32 wnd_bits(const struct wnd_bitmap *wnd, size_t i)
55 {
56         return i + 1 == wnd->nwnd ? wnd->bits_last : wnd->sb->s_blocksize * 8;
57 }
58
59 /*
60  * wnd_scan
61  *
62  * b_pos + b_len - biggest fragment.
63  * Scan range [wpos wbits) window @buf.
64  *
65  * Return: -1 if not found.
66  */
67 static size_t wnd_scan(const ulong *buf, size_t wbit, u32 wpos, u32 wend,
68                        size_t to_alloc, size_t *prev_tail, size_t *b_pos,
69                        size_t *b_len)
70 {
71         while (wpos < wend) {
72                 size_t free_len;
73                 u32 free_bits, end;
74                 u32 used = find_next_zero_bit(buf, wend, wpos);
75
76                 if (used >= wend) {
77                         if (*b_len < *prev_tail) {
78                                 *b_pos = wbit - *prev_tail;
79                                 *b_len = *prev_tail;
80                         }
81
82                         *prev_tail = 0;
83                         return -1;
84                 }
85
86                 if (used > wpos) {
87                         wpos = used;
88                         if (*b_len < *prev_tail) {
89                                 *b_pos = wbit - *prev_tail;
90                                 *b_len = *prev_tail;
91                         }
92
93                         *prev_tail = 0;
94                 }
95
96                 /*
97                  * Now we have a fragment [wpos, wend) staring with 0.
98                  */
99                 end = wpos + to_alloc - *prev_tail;
100                 free_bits = find_next_bit(buf, min(end, wend), wpos);
101
102                 free_len = *prev_tail + free_bits - wpos;
103
104                 if (*b_len < free_len) {
105                         *b_pos = wbit + wpos - *prev_tail;
106                         *b_len = free_len;
107                 }
108
109                 if (free_len >= to_alloc)
110                         return wbit + wpos - *prev_tail;
111
112                 if (free_bits >= wend) {
113                         *prev_tail += free_bits - wpos;
114                         return -1;
115                 }
116
117                 wpos = free_bits + 1;
118
119                 *prev_tail = 0;
120         }
121
122         return -1;
123 }
124
125 /*
126  * wnd_close - Frees all resources.
127  */
128 void wnd_close(struct wnd_bitmap *wnd)
129 {
130         struct rb_node *node, *next;
131
132         kfree(wnd->free_bits);
133         run_close(&wnd->run);
134
135         node = rb_first(&wnd->start_tree);
136
137         while (node) {
138                 next = rb_next(node);
139                 rb_erase(node, &wnd->start_tree);
140                 kmem_cache_free(ntfs_enode_cachep,
141                                 rb_entry(node, struct e_node, start.node));
142                 node = next;
143         }
144 }
145
146 static struct rb_node *rb_lookup(struct rb_root *root, size_t v)
147 {
148         struct rb_node **p = &root->rb_node;
149         struct rb_node *r = NULL;
150
151         while (*p) {
152                 struct rb_node_key *k;
153
154                 k = rb_entry(*p, struct rb_node_key, node);
155                 if (v < k->key) {
156                         p = &(*p)->rb_left;
157                 } else if (v > k->key) {
158                         r = &k->node;
159                         p = &(*p)->rb_right;
160                 } else {
161                         return &k->node;
162                 }
163         }
164
165         return r;
166 }
167
168 /*
169  * rb_insert_count - Helper function to insert special kind of 'count' tree.
170  */
171 static inline bool rb_insert_count(struct rb_root *root, struct e_node *e)
172 {
173         struct rb_node **p = &root->rb_node;
174         struct rb_node *parent = NULL;
175         size_t e_ckey = e->count.key;
176         size_t e_skey = e->start.key;
177
178         while (*p) {
179                 struct e_node *k =
180                         rb_entry(parent = *p, struct e_node, count.node);
181
182                 if (e_ckey > k->count.key) {
183                         p = &(*p)->rb_left;
184                 } else if (e_ckey < k->count.key) {
185                         p = &(*p)->rb_right;
186                 } else if (e_skey < k->start.key) {
187                         p = &(*p)->rb_left;
188                 } else if (e_skey > k->start.key) {
189                         p = &(*p)->rb_right;
190                 } else {
191                         WARN_ON(1);
192                         return false;
193                 }
194         }
195
196         rb_link_node(&e->count.node, parent, p);
197         rb_insert_color(&e->count.node, root);
198         return true;
199 }
200
201 /*
202  * rb_insert_start - Helper function to insert special kind of 'count' tree.
203  */
204 static inline bool rb_insert_start(struct rb_root *root, struct e_node *e)
205 {
206         struct rb_node **p = &root->rb_node;
207         struct rb_node *parent = NULL;
208         size_t e_skey = e->start.key;
209
210         while (*p) {
211                 struct e_node *k;
212
213                 parent = *p;
214
215                 k = rb_entry(parent, struct e_node, start.node);
216                 if (e_skey < k->start.key) {
217                         p = &(*p)->rb_left;
218                 } else if (e_skey > k->start.key) {
219                         p = &(*p)->rb_right;
220                 } else {
221                         WARN_ON(1);
222                         return false;
223                 }
224         }
225
226         rb_link_node(&e->start.node, parent, p);
227         rb_insert_color(&e->start.node, root);
228         return true;
229 }
230
231 /*
232  * wnd_add_free_ext - Adds a new extent of free space.
233  * @build:      1 when building tree.
234  */
235 static void wnd_add_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len,
236                              bool build)
237 {
238         struct e_node *e, *e0 = NULL;
239         size_t ib, end_in = bit + len;
240         struct rb_node *n;
241
242         if (build) {
243                 /* Use extent_min to filter too short extents. */
244                 if (wnd->count >= NTFS_MAX_WND_EXTENTS &&
245                     len <= wnd->extent_min) {
246                         wnd->uptodated = -1;
247                         return;
248                 }
249         } else {
250                 /* Try to find extent before 'bit'. */
251                 n = rb_lookup(&wnd->start_tree, bit);
252
253                 if (!n) {
254                         n = rb_first(&wnd->start_tree);
255                 } else {
256                         e = rb_entry(n, struct e_node, start.node);
257                         n = rb_next(n);
258                         if (e->start.key + e->count.key == bit) {
259                                 /* Remove left. */
260                                 bit = e->start.key;
261                                 len += e->count.key;
262                                 rb_erase(&e->start.node, &wnd->start_tree);
263                                 rb_erase(&e->count.node, &wnd->count_tree);
264                                 wnd->count -= 1;
265                                 e0 = e;
266                         }
267                 }
268
269                 while (n) {
270                         size_t next_end;
271
272                         e = rb_entry(n, struct e_node, start.node);
273                         next_end = e->start.key + e->count.key;
274                         if (e->start.key > end_in)
275                                 break;
276
277                         /* Remove right. */
278                         n = rb_next(n);
279                         len += next_end - end_in;
280                         end_in = next_end;
281                         rb_erase(&e->start.node, &wnd->start_tree);
282                         rb_erase(&e->count.node, &wnd->count_tree);
283                         wnd->count -= 1;
284
285                         if (!e0)
286                                 e0 = e;
287                         else
288                                 kmem_cache_free(ntfs_enode_cachep, e);
289                 }
290
291                 if (wnd->uptodated != 1) {
292                         /* Check bits before 'bit'. */
293                         ib = wnd->zone_bit == wnd->zone_end ||
294                                              bit < wnd->zone_end
295                                      ? 0
296                                      : wnd->zone_end;
297
298                         while (bit > ib && wnd_is_free_hlp(wnd, bit - 1, 1)) {
299                                 bit -= 1;
300                                 len += 1;
301                         }
302
303                         /* Check bits after 'end_in'. */
304                         ib = wnd->zone_bit == wnd->zone_end ||
305                                              end_in > wnd->zone_bit
306                                      ? wnd->nbits
307                                      : wnd->zone_bit;
308
309                         while (end_in < ib && wnd_is_free_hlp(wnd, end_in, 1)) {
310                                 end_in += 1;
311                                 len += 1;
312                         }
313                 }
314         }
315         /* Insert new fragment. */
316         if (wnd->count >= NTFS_MAX_WND_EXTENTS) {
317                 if (e0)
318                         kmem_cache_free(ntfs_enode_cachep, e0);
319
320                 wnd->uptodated = -1;
321
322                 /* Compare with smallest fragment. */
323                 n = rb_last(&wnd->count_tree);
324                 e = rb_entry(n, struct e_node, count.node);
325                 if (len <= e->count.key)
326                         goto out; /* Do not insert small fragments. */
327
328                 if (build) {
329                         struct e_node *e2;
330
331                         n = rb_prev(n);
332                         e2 = rb_entry(n, struct e_node, count.node);
333                         /* Smallest fragment will be 'e2->count.key'. */
334                         wnd->extent_min = e2->count.key;
335                 }
336
337                 /* Replace smallest fragment by new one. */
338                 rb_erase(&e->start.node, &wnd->start_tree);
339                 rb_erase(&e->count.node, &wnd->count_tree);
340                 wnd->count -= 1;
341         } else {
342                 e = e0 ? e0 : kmem_cache_alloc(ntfs_enode_cachep, GFP_ATOMIC);
343                 if (!e) {
344                         wnd->uptodated = -1;
345                         goto out;
346                 }
347
348                 if (build && len <= wnd->extent_min)
349                         wnd->extent_min = len;
350         }
351         e->start.key = bit;
352         e->count.key = len;
353         if (len > wnd->extent_max)
354                 wnd->extent_max = len;
355
356         rb_insert_start(&wnd->start_tree, e);
357         rb_insert_count(&wnd->count_tree, e);
358         wnd->count += 1;
359
360 out:;
361 }
362
363 /*
364  * wnd_remove_free_ext - Remove a run from the cached free space.
365  */
366 static void wnd_remove_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len)
367 {
368         struct rb_node *n, *n3;
369         struct e_node *e, *e3;
370         size_t end_in = bit + len;
371         size_t end3, end, new_key, new_len, max_new_len;
372
373         /* Try to find extent before 'bit'. */
374         n = rb_lookup(&wnd->start_tree, bit);
375
376         if (!n)
377                 return;
378
379         e = rb_entry(n, struct e_node, start.node);
380         end = e->start.key + e->count.key;
381
382         new_key = new_len = 0;
383         len = e->count.key;
384
385         /* Range [bit,end_in) must be inside 'e' or outside 'e' and 'n'. */
386         if (e->start.key > bit)
387                 ;
388         else if (end_in <= end) {
389                 /* Range [bit,end_in) inside 'e'. */
390                 new_key = end_in;
391                 new_len = end - end_in;
392                 len = bit - e->start.key;
393         } else if (bit > end) {
394                 bool bmax = false;
395
396                 n3 = rb_next(n);
397
398                 while (n3) {
399                         e3 = rb_entry(n3, struct e_node, start.node);
400                         if (e3->start.key >= end_in)
401                                 break;
402
403                         if (e3->count.key == wnd->extent_max)
404                                 bmax = true;
405
406                         end3 = e3->start.key + e3->count.key;
407                         if (end3 > end_in) {
408                                 e3->start.key = end_in;
409                                 rb_erase(&e3->count.node, &wnd->count_tree);
410                                 e3->count.key = end3 - end_in;
411                                 rb_insert_count(&wnd->count_tree, e3);
412                                 break;
413                         }
414
415                         n3 = rb_next(n3);
416                         rb_erase(&e3->start.node, &wnd->start_tree);
417                         rb_erase(&e3->count.node, &wnd->count_tree);
418                         wnd->count -= 1;
419                         kmem_cache_free(ntfs_enode_cachep, e3);
420                 }
421                 if (!bmax)
422                         return;
423                 n3 = rb_first(&wnd->count_tree);
424                 wnd->extent_max =
425                         n3 ? rb_entry(n3, struct e_node, count.node)->count.key
426                            : 0;
427                 return;
428         }
429
430         if (e->count.key != wnd->extent_max) {
431                 ;
432         } else if (rb_prev(&e->count.node)) {
433                 ;
434         } else {
435                 n3 = rb_next(&e->count.node);
436                 max_new_len = max(len, new_len);
437                 if (!n3) {
438                         wnd->extent_max = max_new_len;
439                 } else {
440                         e3 = rb_entry(n3, struct e_node, count.node);
441                         wnd->extent_max = max(e3->count.key, max_new_len);
442                 }
443         }
444
445         if (!len) {
446                 if (new_len) {
447                         e->start.key = new_key;
448                         rb_erase(&e->count.node, &wnd->count_tree);
449                         e->count.key = new_len;
450                         rb_insert_count(&wnd->count_tree, e);
451                 } else {
452                         rb_erase(&e->start.node, &wnd->start_tree);
453                         rb_erase(&e->count.node, &wnd->count_tree);
454                         wnd->count -= 1;
455                         kmem_cache_free(ntfs_enode_cachep, e);
456                 }
457                 goto out;
458         }
459         rb_erase(&e->count.node, &wnd->count_tree);
460         e->count.key = len;
461         rb_insert_count(&wnd->count_tree, e);
462
463         if (!new_len)
464                 goto out;
465
466         if (wnd->count >= NTFS_MAX_WND_EXTENTS) {
467                 wnd->uptodated = -1;
468
469                 /* Get minimal extent. */
470                 e = rb_entry(rb_last(&wnd->count_tree), struct e_node,
471                              count.node);
472                 if (e->count.key > new_len)
473                         goto out;
474
475                 /* Replace minimum. */
476                 rb_erase(&e->start.node, &wnd->start_tree);
477                 rb_erase(&e->count.node, &wnd->count_tree);
478                 wnd->count -= 1;
479         } else {
480                 e = kmem_cache_alloc(ntfs_enode_cachep, GFP_ATOMIC);
481                 if (!e)
482                         wnd->uptodated = -1;
483         }
484
485         if (e) {
486                 e->start.key = new_key;
487                 e->count.key = new_len;
488                 rb_insert_start(&wnd->start_tree, e);
489                 rb_insert_count(&wnd->count_tree, e);
490                 wnd->count += 1;
491         }
492
493 out:
494         if (!wnd->count && 1 != wnd->uptodated)
495                 wnd_rescan(wnd);
496 }
497
498 /*
499  * wnd_rescan - Scan all bitmap. Used while initialization.
500  */
501 static int wnd_rescan(struct wnd_bitmap *wnd)
502 {
503         int err = 0;
504         size_t prev_tail = 0;
505         struct super_block *sb = wnd->sb;
506         struct ntfs_sb_info *sbi = sb->s_fs_info;
507         u64 lbo, len = 0;
508         u32 blocksize = sb->s_blocksize;
509         u8 cluster_bits = sbi->cluster_bits;
510         u32 wbits = 8 * sb->s_blocksize;
511         u32 used, frb;
512         const ulong *buf;
513         size_t wpos, wbit, iw, vbo;
514         struct buffer_head *bh = NULL;
515         CLST lcn, clen;
516
517         wnd->uptodated = 0;
518         wnd->extent_max = 0;
519         wnd->extent_min = MINUS_ONE_T;
520         wnd->total_zeroes = 0;
521
522         vbo = 0;
523
524         for (iw = 0; iw < wnd->nwnd; iw++) {
525                 if (iw + 1 == wnd->nwnd)
526                         wbits = wnd->bits_last;
527
528                 if (wnd->inited) {
529                         if (!wnd->free_bits[iw]) {
530                                 /* All ones. */
531                                 if (prev_tail) {
532                                         wnd_add_free_ext(wnd,
533                                                          vbo * 8 - prev_tail,
534                                                          prev_tail, true);
535                                         prev_tail = 0;
536                                 }
537                                 goto next_wnd;
538                         }
539                         if (wbits == wnd->free_bits[iw]) {
540                                 /* All zeroes. */
541                                 prev_tail += wbits;
542                                 wnd->total_zeroes += wbits;
543                                 goto next_wnd;
544                         }
545                 }
546
547                 if (!len) {
548                         u32 off = vbo & sbi->cluster_mask;
549
550                         if (!run_lookup_entry(&wnd->run, vbo >> cluster_bits,
551                                               &lcn, &clen, NULL)) {
552                                 err = -ENOENT;
553                                 goto out;
554                         }
555
556                         lbo = ((u64)lcn << cluster_bits) + off;
557                         len = ((u64)clen << cluster_bits) - off;
558                 }
559
560                 bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits);
561                 if (!bh) {
562                         err = -EIO;
563                         goto out;
564                 }
565
566                 buf = (ulong *)bh->b_data;
567
568                 used = __bitmap_weight(buf, wbits);
569                 if (used < wbits) {
570                         frb = wbits - used;
571                         wnd->free_bits[iw] = frb;
572                         wnd->total_zeroes += frb;
573                 }
574
575                 wpos = 0;
576                 wbit = vbo * 8;
577
578                 if (wbit + wbits > wnd->nbits)
579                         wbits = wnd->nbits - wbit;
580
581                 do {
582                         used = find_next_zero_bit(buf, wbits, wpos);
583
584                         if (used > wpos && prev_tail) {
585                                 wnd_add_free_ext(wnd, wbit + wpos - prev_tail,
586                                                  prev_tail, true);
587                                 prev_tail = 0;
588                         }
589
590                         wpos = used;
591
592                         if (wpos >= wbits) {
593                                 /* No free blocks. */
594                                 prev_tail = 0;
595                                 break;
596                         }
597
598                         frb = find_next_bit(buf, wbits, wpos);
599                         if (frb >= wbits) {
600                                 /* Keep last free block. */
601                                 prev_tail += frb - wpos;
602                                 break;
603                         }
604
605                         wnd_add_free_ext(wnd, wbit + wpos - prev_tail,
606                                          frb + prev_tail - wpos, true);
607
608                         /* Skip free block and first '1'. */
609                         wpos = frb + 1;
610                         /* Reset previous tail. */
611                         prev_tail = 0;
612                 } while (wpos < wbits);
613
614 next_wnd:
615
616                 if (bh)
617                         put_bh(bh);
618                 bh = NULL;
619
620                 vbo += blocksize;
621                 if (len) {
622                         len -= blocksize;
623                         lbo += blocksize;
624                 }
625         }
626
627         /* Add last block. */
628         if (prev_tail)
629                 wnd_add_free_ext(wnd, wnd->nbits - prev_tail, prev_tail, true);
630
631         /*
632          * Before init cycle wnd->uptodated was 0.
633          * If any errors or limits occurs while initialization then
634          * wnd->uptodated will be -1.
635          * If 'uptodated' is still 0 then Tree is really updated.
636          */
637         if (!wnd->uptodated)
638                 wnd->uptodated = 1;
639
640         if (wnd->zone_bit != wnd->zone_end) {
641                 size_t zlen = wnd->zone_end - wnd->zone_bit;
642
643                 wnd->zone_end = wnd->zone_bit;
644                 wnd_zone_set(wnd, wnd->zone_bit, zlen);
645         }
646
647 out:
648         return err;
649 }
650
651 int wnd_init(struct wnd_bitmap *wnd, struct super_block *sb, size_t nbits)
652 {
653         int err;
654         u32 blocksize = sb->s_blocksize;
655         u32 wbits = blocksize * 8;
656
657         init_rwsem(&wnd->rw_lock);
658
659         wnd->sb = sb;
660         wnd->nbits = nbits;
661         wnd->total_zeroes = nbits;
662         wnd->extent_max = MINUS_ONE_T;
663         wnd->zone_bit = wnd->zone_end = 0;
664         wnd->nwnd = bytes_to_block(sb, bitmap_size(nbits));
665         wnd->bits_last = nbits & (wbits - 1);
666         if (!wnd->bits_last)
667                 wnd->bits_last = wbits;
668
669         wnd->free_bits = kcalloc(wnd->nwnd, sizeof(u16), GFP_NOFS);
670         if (!wnd->free_bits)
671                 return -ENOMEM;
672
673         err = wnd_rescan(wnd);
674         if (err)
675                 return err;
676
677         wnd->inited = true;
678
679         return 0;
680 }
681
682 /*
683  * wnd_map - Call sb_bread for requested window.
684  */
685 static struct buffer_head *wnd_map(struct wnd_bitmap *wnd, size_t iw)
686 {
687         size_t vbo;
688         CLST lcn, clen;
689         struct super_block *sb = wnd->sb;
690         struct ntfs_sb_info *sbi;
691         struct buffer_head *bh;
692         u64 lbo;
693
694         sbi = sb->s_fs_info;
695         vbo = (u64)iw << sb->s_blocksize_bits;
696
697         if (!run_lookup_entry(&wnd->run, vbo >> sbi->cluster_bits, &lcn, &clen,
698                               NULL)) {
699                 return ERR_PTR(-ENOENT);
700         }
701
702         lbo = ((u64)lcn << sbi->cluster_bits) + (vbo & sbi->cluster_mask);
703
704         bh = ntfs_bread(wnd->sb, lbo >> sb->s_blocksize_bits);
705         if (!bh)
706                 return ERR_PTR(-EIO);
707
708         return bh;
709 }
710
711 /*
712  * wnd_set_free - Mark the bits range from bit to bit + bits as free.
713  */
714 int wnd_set_free(struct wnd_bitmap *wnd, size_t bit, size_t bits)
715 {
716         int err = 0;
717         struct super_block *sb = wnd->sb;
718         size_t bits0 = bits;
719         u32 wbits = 8 * sb->s_blocksize;
720         size_t iw = bit >> (sb->s_blocksize_bits + 3);
721         u32 wbit = bit & (wbits - 1);
722         struct buffer_head *bh;
723
724         while (iw < wnd->nwnd && bits) {
725                 u32 tail, op;
726                 ulong *buf;
727
728                 if (iw + 1 == wnd->nwnd)
729                         wbits = wnd->bits_last;
730
731                 tail = wbits - wbit;
732                 op = min_t(u32, tail, bits);
733
734                 bh = wnd_map(wnd, iw);
735                 if (IS_ERR(bh)) {
736                         err = PTR_ERR(bh);
737                         break;
738                 }
739
740                 buf = (ulong *)bh->b_data;
741
742                 lock_buffer(bh);
743
744                 __bitmap_clear(buf, wbit, op);
745
746                 wnd->free_bits[iw] += op;
747
748                 set_buffer_uptodate(bh);
749                 mark_buffer_dirty(bh);
750                 unlock_buffer(bh);
751                 put_bh(bh);
752
753                 wnd->total_zeroes += op;
754                 bits -= op;
755                 wbit = 0;
756                 iw += 1;
757         }
758
759         wnd_add_free_ext(wnd, bit, bits0, false);
760
761         return err;
762 }
763
764 /*
765  * wnd_set_used - Mark the bits range from bit to bit + bits as used.
766  */
767 int wnd_set_used(struct wnd_bitmap *wnd, size_t bit, size_t bits)
768 {
769         int err = 0;
770         struct super_block *sb = wnd->sb;
771         size_t bits0 = bits;
772         size_t iw = bit >> (sb->s_blocksize_bits + 3);
773         u32 wbits = 8 * sb->s_blocksize;
774         u32 wbit = bit & (wbits - 1);
775         struct buffer_head *bh;
776
777         while (iw < wnd->nwnd && bits) {
778                 u32 tail, op;
779                 ulong *buf;
780
781                 if (unlikely(iw + 1 == wnd->nwnd))
782                         wbits = wnd->bits_last;
783
784                 tail = wbits - wbit;
785                 op = min_t(u32, tail, bits);
786
787                 bh = wnd_map(wnd, iw);
788                 if (IS_ERR(bh)) {
789                         err = PTR_ERR(bh);
790                         break;
791                 }
792                 buf = (ulong *)bh->b_data;
793
794                 lock_buffer(bh);
795
796                 __bitmap_set(buf, wbit, op);
797                 wnd->free_bits[iw] -= op;
798
799                 set_buffer_uptodate(bh);
800                 mark_buffer_dirty(bh);
801                 unlock_buffer(bh);
802                 put_bh(bh);
803
804                 wnd->total_zeroes -= op;
805                 bits -= op;
806                 wbit = 0;
807                 iw += 1;
808         }
809
810         if (!RB_EMPTY_ROOT(&wnd->start_tree))
811                 wnd_remove_free_ext(wnd, bit, bits0);
812
813         return err;
814 }
815
816 /*
817  * wnd_is_free_hlp
818  *
819  * Return: True if all clusters [bit, bit+bits) are free (bitmap only).
820  */
821 static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits)
822 {
823         struct super_block *sb = wnd->sb;
824         size_t iw = bit >> (sb->s_blocksize_bits + 3);
825         u32 wbits = 8 * sb->s_blocksize;
826         u32 wbit = bit & (wbits - 1);
827
828         while (iw < wnd->nwnd && bits) {
829                 u32 tail, op;
830
831                 if (unlikely(iw + 1 == wnd->nwnd))
832                         wbits = wnd->bits_last;
833
834                 tail = wbits - wbit;
835                 op = min_t(u32, tail, bits);
836
837                 if (wbits != wnd->free_bits[iw]) {
838                         bool ret;
839                         struct buffer_head *bh = wnd_map(wnd, iw);
840
841                         if (IS_ERR(bh))
842                                 return false;
843
844                         ret = are_bits_clear((ulong *)bh->b_data, wbit, op);
845
846                         put_bh(bh);
847                         if (!ret)
848                                 return false;
849                 }
850
851                 bits -= op;
852                 wbit = 0;
853                 iw += 1;
854         }
855
856         return true;
857 }
858
859 /*
860  * wnd_is_free
861  *
862  * Return: True if all clusters [bit, bit+bits) are free.
863  */
864 bool wnd_is_free(struct wnd_bitmap *wnd, size_t bit, size_t bits)
865 {
866         bool ret;
867         struct rb_node *n;
868         size_t end;
869         struct e_node *e;
870
871         if (RB_EMPTY_ROOT(&wnd->start_tree))
872                 goto use_wnd;
873
874         n = rb_lookup(&wnd->start_tree, bit);
875         if (!n)
876                 goto use_wnd;
877
878         e = rb_entry(n, struct e_node, start.node);
879
880         end = e->start.key + e->count.key;
881
882         if (bit < end && bit + bits <= end)
883                 return true;
884
885 use_wnd:
886         ret = wnd_is_free_hlp(wnd, bit, bits);
887
888         return ret;
889 }
890
891 /*
892  * wnd_is_used
893  *
894  * Return: True if all clusters [bit, bit+bits) are used.
895  */
896 bool wnd_is_used(struct wnd_bitmap *wnd, size_t bit, size_t bits)
897 {
898         bool ret = false;
899         struct super_block *sb = wnd->sb;
900         size_t iw = bit >> (sb->s_blocksize_bits + 3);
901         u32 wbits = 8 * sb->s_blocksize;
902         u32 wbit = bit & (wbits - 1);
903         size_t end;
904         struct rb_node *n;
905         struct e_node *e;
906
907         if (RB_EMPTY_ROOT(&wnd->start_tree))
908                 goto use_wnd;
909
910         end = bit + bits;
911         n = rb_lookup(&wnd->start_tree, end - 1);
912         if (!n)
913                 goto use_wnd;
914
915         e = rb_entry(n, struct e_node, start.node);
916         if (e->start.key + e->count.key > bit)
917                 return false;
918
919 use_wnd:
920         while (iw < wnd->nwnd && bits) {
921                 u32 tail, op;
922
923                 if (unlikely(iw + 1 == wnd->nwnd))
924                         wbits = wnd->bits_last;
925
926                 tail = wbits - wbit;
927                 op = min_t(u32, tail, bits);
928
929                 if (wnd->free_bits[iw]) {
930                         bool ret;
931                         struct buffer_head *bh = wnd_map(wnd, iw);
932
933                         if (IS_ERR(bh))
934                                 goto out;
935
936                         ret = are_bits_set((ulong *)bh->b_data, wbit, op);
937                         put_bh(bh);
938                         if (!ret)
939                                 goto out;
940                 }
941
942                 bits -= op;
943                 wbit = 0;
944                 iw += 1;
945         }
946         ret = true;
947
948 out:
949         return ret;
950 }
951
952 /*
953  * wnd_find - Look for free space.
954  *
955  * - flags - BITMAP_FIND_XXX flags
956  *
957  * Return: 0 if not found.
958  */
959 size_t wnd_find(struct wnd_bitmap *wnd, size_t to_alloc, size_t hint,
960                 size_t flags, size_t *allocated)
961 {
962         struct super_block *sb;
963         u32 wbits, wpos, wzbit, wzend;
964         size_t fnd, max_alloc, b_len, b_pos;
965         size_t iw, prev_tail, nwnd, wbit, ebit, zbit, zend;
966         size_t to_alloc0 = to_alloc;
967         const ulong *buf;
968         const struct e_node *e;
969         const struct rb_node *pr, *cr;
970         u8 log2_bits;
971         bool fbits_valid;
972         struct buffer_head *bh;
973
974         /* Fast checking for available free space. */
975         if (flags & BITMAP_FIND_FULL) {
976                 size_t zeroes = wnd_zeroes(wnd);
977
978                 zeroes -= wnd->zone_end - wnd->zone_bit;
979                 if (zeroes < to_alloc0)
980                         goto no_space;
981
982                 if (to_alloc0 > wnd->extent_max)
983                         goto no_space;
984         } else {
985                 if (to_alloc > wnd->extent_max)
986                         to_alloc = wnd->extent_max;
987         }
988
989         if (wnd->zone_bit <= hint && hint < wnd->zone_end)
990                 hint = wnd->zone_end;
991
992         max_alloc = wnd->nbits;
993         b_len = b_pos = 0;
994
995         if (hint >= max_alloc)
996                 hint = 0;
997
998         if (RB_EMPTY_ROOT(&wnd->start_tree)) {
999                 if (wnd->uptodated == 1) {
1000                         /* Extents tree is updated -> No free space. */
1001                         goto no_space;
1002                 }
1003                 goto scan_bitmap;
1004         }
1005
1006         e = NULL;
1007         if (!hint)
1008                 goto allocate_biggest;
1009
1010         /* Use hint: Enumerate extents by start >= hint. */
1011         pr = NULL;
1012         cr = wnd->start_tree.rb_node;
1013
1014         for (;;) {
1015                 e = rb_entry(cr, struct e_node, start.node);
1016
1017                 if (e->start.key == hint)
1018                         break;
1019
1020                 if (e->start.key < hint) {
1021                         pr = cr;
1022                         cr = cr->rb_right;
1023                         if (!cr)
1024                                 break;
1025                         continue;
1026                 }
1027
1028                 cr = cr->rb_left;
1029                 if (!cr) {
1030                         e = pr ? rb_entry(pr, struct e_node, start.node) : NULL;
1031                         break;
1032                 }
1033         }
1034
1035         if (!e)
1036                 goto allocate_biggest;
1037
1038         if (e->start.key + e->count.key > hint) {
1039                 /* We have found extension with 'hint' inside. */
1040                 size_t len = e->start.key + e->count.key - hint;
1041
1042                 if (len >= to_alloc && hint + to_alloc <= max_alloc) {
1043                         fnd = hint;
1044                         goto found;
1045                 }
1046
1047                 if (!(flags & BITMAP_FIND_FULL)) {
1048                         if (len > to_alloc)
1049                                 len = to_alloc;
1050
1051                         if (hint + len <= max_alloc) {
1052                                 fnd = hint;
1053                                 to_alloc = len;
1054                                 goto found;
1055                         }
1056                 }
1057         }
1058
1059 allocate_biggest:
1060         /* Allocate from biggest free extent. */
1061         e = rb_entry(rb_first(&wnd->count_tree), struct e_node, count.node);
1062         if (e->count.key != wnd->extent_max)
1063                 wnd->extent_max = e->count.key;
1064
1065         if (e->count.key < max_alloc) {
1066                 if (e->count.key >= to_alloc) {
1067                         ;
1068                 } else if (flags & BITMAP_FIND_FULL) {
1069                         if (e->count.key < to_alloc0) {
1070                                 /* Biggest free block is less then requested. */
1071                                 goto no_space;
1072                         }
1073                         to_alloc = e->count.key;
1074                 } else if (-1 != wnd->uptodated) {
1075                         to_alloc = e->count.key;
1076                 } else {
1077                         /* Check if we can use more bits. */
1078                         size_t op, max_check;
1079                         struct rb_root start_tree;
1080
1081                         memcpy(&start_tree, &wnd->start_tree,
1082                                sizeof(struct rb_root));
1083                         memset(&wnd->start_tree, 0, sizeof(struct rb_root));
1084
1085                         max_check = e->start.key + to_alloc;
1086                         if (max_check > max_alloc)
1087                                 max_check = max_alloc;
1088                         for (op = e->start.key + e->count.key; op < max_check;
1089                              op++) {
1090                                 if (!wnd_is_free(wnd, op, 1))
1091                                         break;
1092                         }
1093                         memcpy(&wnd->start_tree, &start_tree,
1094                                sizeof(struct rb_root));
1095                         to_alloc = op - e->start.key;
1096                 }
1097
1098                 /* Prepare to return. */
1099                 fnd = e->start.key;
1100                 if (e->start.key + to_alloc > max_alloc)
1101                         to_alloc = max_alloc - e->start.key;
1102                 goto found;
1103         }
1104
1105         if (wnd->uptodated == 1) {
1106                 /* Extents tree is updated -> no free space. */
1107                 goto no_space;
1108         }
1109
1110         b_len = e->count.key;
1111         b_pos = e->start.key;
1112
1113 scan_bitmap:
1114         sb = wnd->sb;
1115         log2_bits = sb->s_blocksize_bits + 3;
1116
1117         /* At most two ranges [hint, max_alloc) + [0, hint). */
1118 Again:
1119
1120         /* TODO: Optimize request for case nbits > wbits. */
1121         iw = hint >> log2_bits;
1122         wbits = sb->s_blocksize * 8;
1123         wpos = hint & (wbits - 1);
1124         prev_tail = 0;
1125         fbits_valid = true;
1126
1127         if (max_alloc == wnd->nbits) {
1128                 nwnd = wnd->nwnd;
1129         } else {
1130                 size_t t = max_alloc + wbits - 1;
1131
1132                 nwnd = likely(t > max_alloc) ? (t >> log2_bits) : wnd->nwnd;
1133         }
1134
1135         /* Enumerate all windows. */
1136         for (; iw < nwnd; iw++) {
1137                 wbit = iw << log2_bits;
1138
1139                 if (!wnd->free_bits[iw]) {
1140                         if (prev_tail > b_len) {
1141                                 b_pos = wbit - prev_tail;
1142                                 b_len = prev_tail;
1143                         }
1144
1145                         /* Skip full used window. */
1146                         prev_tail = 0;
1147                         wpos = 0;
1148                         continue;
1149                 }
1150
1151                 if (unlikely(iw + 1 == nwnd)) {
1152                         if (max_alloc == wnd->nbits) {
1153                                 wbits = wnd->bits_last;
1154                         } else {
1155                                 size_t t = max_alloc & (wbits - 1);
1156
1157                                 if (t) {
1158                                         wbits = t;
1159                                         fbits_valid = false;
1160                                 }
1161                         }
1162                 }
1163
1164                 if (wnd->zone_end > wnd->zone_bit) {
1165                         ebit = wbit + wbits;
1166                         zbit = max(wnd->zone_bit, wbit);
1167                         zend = min(wnd->zone_end, ebit);
1168
1169                         /* Here we have a window [wbit, ebit) and zone [zbit, zend). */
1170                         if (zend <= zbit) {
1171                                 /* Zone does not overlap window. */
1172                         } else {
1173                                 wzbit = zbit - wbit;
1174                                 wzend = zend - wbit;
1175
1176                                 /* Zone overlaps window. */
1177                                 if (wnd->free_bits[iw] == wzend - wzbit) {
1178                                         prev_tail = 0;
1179                                         wpos = 0;
1180                                         continue;
1181                                 }
1182
1183                                 /* Scan two ranges window: [wbit, zbit) and [zend, ebit). */
1184                                 bh = wnd_map(wnd, iw);
1185
1186                                 if (IS_ERR(bh)) {
1187                                         /* TODO: Error */
1188                                         prev_tail = 0;
1189                                         wpos = 0;
1190                                         continue;
1191                                 }
1192
1193                                 buf = (ulong *)bh->b_data;
1194
1195                                 /* Scan range [wbit, zbit). */
1196                                 if (wpos < wzbit) {
1197                                         /* Scan range [wpos, zbit). */
1198                                         fnd = wnd_scan(buf, wbit, wpos, wzbit,
1199                                                        to_alloc, &prev_tail,
1200                                                        &b_pos, &b_len);
1201                                         if (fnd != MINUS_ONE_T) {
1202                                                 put_bh(bh);
1203                                                 goto found;
1204                                         }
1205                                 }
1206
1207                                 prev_tail = 0;
1208
1209                                 /* Scan range [zend, ebit). */
1210                                 if (wzend < wbits) {
1211                                         fnd = wnd_scan(buf, wbit,
1212                                                        max(wzend, wpos), wbits,
1213                                                        to_alloc, &prev_tail,
1214                                                        &b_pos, &b_len);
1215                                         if (fnd != MINUS_ONE_T) {
1216                                                 put_bh(bh);
1217                                                 goto found;
1218                                         }
1219                                 }
1220
1221                                 wpos = 0;
1222                                 put_bh(bh);
1223                                 continue;
1224                         }
1225                 }
1226
1227                 /* Current window does not overlap zone. */
1228                 if (!wpos && fbits_valid && wnd->free_bits[iw] == wbits) {
1229                         /* Window is empty. */
1230                         if (prev_tail + wbits >= to_alloc) {
1231                                 fnd = wbit + wpos - prev_tail;
1232                                 goto found;
1233                         }
1234
1235                         /* Increase 'prev_tail' and process next window. */
1236                         prev_tail += wbits;
1237                         wpos = 0;
1238                         continue;
1239                 }
1240
1241                 /* Read window. */
1242                 bh = wnd_map(wnd, iw);
1243                 if (IS_ERR(bh)) {
1244                         // TODO: Error.
1245                         prev_tail = 0;
1246                         wpos = 0;
1247                         continue;
1248                 }
1249
1250                 buf = (ulong *)bh->b_data;
1251
1252                 /* Scan range [wpos, eBits). */
1253                 fnd = wnd_scan(buf, wbit, wpos, wbits, to_alloc, &prev_tail,
1254                                &b_pos, &b_len);
1255                 put_bh(bh);
1256                 if (fnd != MINUS_ONE_T)
1257                         goto found;
1258         }
1259
1260         if (b_len < prev_tail) {
1261                 /* The last fragment. */
1262                 b_len = prev_tail;
1263                 b_pos = max_alloc - prev_tail;
1264         }
1265
1266         if (hint) {
1267                 /*
1268                  * We have scanned range [hint max_alloc).
1269                  * Prepare to scan range [0 hint + to_alloc).
1270                  */
1271                 size_t nextmax = hint + to_alloc;
1272
1273                 if (likely(nextmax >= hint) && nextmax < max_alloc)
1274                         max_alloc = nextmax;
1275                 hint = 0;
1276                 goto Again;
1277         }
1278
1279         if (!b_len)
1280                 goto no_space;
1281
1282         wnd->extent_max = b_len;
1283
1284         if (flags & BITMAP_FIND_FULL)
1285                 goto no_space;
1286
1287         fnd = b_pos;
1288         to_alloc = b_len;
1289
1290 found:
1291         if (flags & BITMAP_FIND_MARK_AS_USED) {
1292                 /* TODO: Optimize remove extent (pass 'e'?). */
1293                 if (wnd_set_used(wnd, fnd, to_alloc))
1294                         goto no_space;
1295         } else if (wnd->extent_max != MINUS_ONE_T &&
1296                    to_alloc > wnd->extent_max) {
1297                 wnd->extent_max = to_alloc;
1298         }
1299
1300         *allocated = fnd;
1301         return to_alloc;
1302
1303 no_space:
1304         return 0;
1305 }
1306
1307 /*
1308  * wnd_extend - Extend bitmap ($MFT bitmap).
1309  */
1310 int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits)
1311 {
1312         int err;
1313         struct super_block *sb = wnd->sb;
1314         struct ntfs_sb_info *sbi = sb->s_fs_info;
1315         u32 blocksize = sb->s_blocksize;
1316         u32 wbits = blocksize * 8;
1317         u32 b0, new_last;
1318         size_t bits, iw, new_wnd;
1319         size_t old_bits = wnd->nbits;
1320         u16 *new_free;
1321
1322         if (new_bits <= old_bits)
1323                 return -EINVAL;
1324
1325         /* Align to 8 byte boundary. */
1326         new_wnd = bytes_to_block(sb, bitmap_size(new_bits));
1327         new_last = new_bits & (wbits - 1);
1328         if (!new_last)
1329                 new_last = wbits;
1330
1331         if (new_wnd != wnd->nwnd) {
1332                 new_free = kmalloc(new_wnd * sizeof(u16), GFP_NOFS);
1333                 if (!new_free)
1334                         return -ENOMEM;
1335
1336                 if (new_free != wnd->free_bits)
1337                         memcpy(new_free, wnd->free_bits,
1338                                wnd->nwnd * sizeof(short));
1339                 memset(new_free + wnd->nwnd, 0,
1340                        (new_wnd - wnd->nwnd) * sizeof(short));
1341                 kfree(wnd->free_bits);
1342                 wnd->free_bits = new_free;
1343         }
1344
1345         /* Zero bits [old_bits,new_bits). */
1346         bits = new_bits - old_bits;
1347         b0 = old_bits & (wbits - 1);
1348
1349         for (iw = old_bits >> (sb->s_blocksize_bits + 3); bits; iw += 1) {
1350                 u32 op;
1351                 size_t frb;
1352                 u64 vbo, lbo, bytes;
1353                 struct buffer_head *bh;
1354                 ulong *buf;
1355
1356                 if (iw + 1 == new_wnd)
1357                         wbits = new_last;
1358
1359                 op = b0 + bits > wbits ? wbits - b0 : bits;
1360                 vbo = (u64)iw * blocksize;
1361
1362                 err = ntfs_vbo_to_lbo(sbi, &wnd->run, vbo, &lbo, &bytes);
1363                 if (err)
1364                         break;
1365
1366                 bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits);
1367                 if (!bh)
1368                         return -EIO;
1369
1370                 lock_buffer(bh);
1371                 buf = (ulong *)bh->b_data;
1372
1373                 __bitmap_clear(buf, b0, blocksize * 8 - b0);
1374                 frb = wbits - __bitmap_weight(buf, wbits);
1375                 wnd->total_zeroes += frb - wnd->free_bits[iw];
1376                 wnd->free_bits[iw] = frb;
1377
1378                 set_buffer_uptodate(bh);
1379                 mark_buffer_dirty(bh);
1380                 unlock_buffer(bh);
1381                 /* err = sync_dirty_buffer(bh); */
1382
1383                 b0 = 0;
1384                 bits -= op;
1385         }
1386
1387         wnd->nbits = new_bits;
1388         wnd->nwnd = new_wnd;
1389         wnd->bits_last = new_last;
1390
1391         wnd_add_free_ext(wnd, old_bits, new_bits - old_bits, false);
1392
1393         return 0;
1394 }
1395
1396 void wnd_zone_set(struct wnd_bitmap *wnd, size_t lcn, size_t len)
1397 {
1398         size_t zlen;
1399
1400         zlen = wnd->zone_end - wnd->zone_bit;
1401         if (zlen)
1402                 wnd_add_free_ext(wnd, wnd->zone_bit, zlen, false);
1403
1404         if (!RB_EMPTY_ROOT(&wnd->start_tree) && len)
1405                 wnd_remove_free_ext(wnd, lcn, len);
1406
1407         wnd->zone_bit = lcn;
1408         wnd->zone_end = lcn + len;
1409 }
1410
1411 int ntfs_trim_fs(struct ntfs_sb_info *sbi, struct fstrim_range *range)
1412 {
1413         int err = 0;
1414         struct super_block *sb = sbi->sb;
1415         struct wnd_bitmap *wnd = &sbi->used.bitmap;
1416         u32 wbits = 8 * sb->s_blocksize;
1417         CLST len = 0, lcn = 0, done = 0;
1418         CLST minlen = bytes_to_cluster(sbi, range->minlen);
1419         CLST lcn_from = bytes_to_cluster(sbi, range->start);
1420         size_t iw = lcn_from >> (sb->s_blocksize_bits + 3);
1421         u32 wbit = lcn_from & (wbits - 1);
1422         const ulong *buf;
1423         CLST lcn_to;
1424
1425         if (!minlen)
1426                 minlen = 1;
1427
1428         if (range->len == (u64)-1)
1429                 lcn_to = wnd->nbits;
1430         else
1431                 lcn_to = bytes_to_cluster(sbi, range->start + range->len);
1432
1433         down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS);
1434
1435         for (; iw < wnd->nbits; iw++, wbit = 0) {
1436                 CLST lcn_wnd = iw * wbits;
1437                 struct buffer_head *bh;
1438
1439                 if (lcn_wnd > lcn_to)
1440                         break;
1441
1442                 if (!wnd->free_bits[iw])
1443                         continue;
1444
1445                 if (iw + 1 == wnd->nwnd)
1446                         wbits = wnd->bits_last;
1447
1448                 if (lcn_wnd + wbits > lcn_to)
1449                         wbits = lcn_to - lcn_wnd;
1450
1451                 bh = wnd_map(wnd, iw);
1452                 if (IS_ERR(bh)) {
1453                         err = PTR_ERR(bh);
1454                         break;
1455                 }
1456
1457                 buf = (ulong *)bh->b_data;
1458
1459                 for (; wbit < wbits; wbit++) {
1460                         if (!test_bit(wbit, buf)) {
1461                                 if (!len)
1462                                         lcn = lcn_wnd + wbit;
1463                                 len += 1;
1464                                 continue;
1465                         }
1466                         if (len >= minlen) {
1467                                 err = ntfs_discard(sbi, lcn, len);
1468                                 if (err)
1469                                         goto out;
1470                                 done += len;
1471                         }
1472                         len = 0;
1473                 }
1474                 put_bh(bh);
1475         }
1476
1477         /* Process the last fragment. */
1478         if (len >= minlen) {
1479                 err = ntfs_discard(sbi, lcn, len);
1480                 if (err)
1481                         goto out;
1482                 done += len;
1483         }
1484
1485 out:
1486         range->len = (u64)done << sbi->cluster_bits;
1487
1488         up_read(&wnd->rw_lock);
1489
1490         return err;
1491 }