Merge tag 'media/v5.9-1' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab...
[linux-2.6-microblaze.git] / drivers / md / dm-bufio.c
1 /*
2  * Copyright (C) 2009-2011 Red Hat, Inc.
3  *
4  * Author: Mikulas Patocka <mpatocka@redhat.com>
5  *
6  * This file is released under the GPL.
7  */
8
9 #include <linux/dm-bufio.h>
10
11 #include <linux/device-mapper.h>
12 #include <linux/dm-io.h>
13 #include <linux/slab.h>
14 #include <linux/sched/mm.h>
15 #include <linux/jiffies.h>
16 #include <linux/vmalloc.h>
17 #include <linux/shrinker.h>
18 #include <linux/module.h>
19 #include <linux/rbtree.h>
20 #include <linux/stacktrace.h>
21
22 #define DM_MSG_PREFIX "bufio"
23
24 /*
25  * Memory management policy:
26  *      Limit the number of buffers to DM_BUFIO_MEMORY_PERCENT of main memory
27  *      or DM_BUFIO_VMALLOC_PERCENT of vmalloc memory (whichever is lower).
28  *      Always allocate at least DM_BUFIO_MIN_BUFFERS buffers.
29  *      Start background writeback when there are DM_BUFIO_WRITEBACK_PERCENT
30  *      dirty buffers.
31  */
32 #define DM_BUFIO_MIN_BUFFERS            8
33
34 #define DM_BUFIO_MEMORY_PERCENT         2
35 #define DM_BUFIO_VMALLOC_PERCENT        25
36 #define DM_BUFIO_WRITEBACK_RATIO        3
37 #define DM_BUFIO_LOW_WATERMARK_RATIO    16
38
39 /*
40  * Check buffer ages in this interval (seconds)
41  */
42 #define DM_BUFIO_WORK_TIMER_SECS        30
43
44 /*
45  * Free buffers when they are older than this (seconds)
46  */
47 #define DM_BUFIO_DEFAULT_AGE_SECS       300
48
49 /*
50  * The nr of bytes of cached data to keep around.
51  */
52 #define DM_BUFIO_DEFAULT_RETAIN_BYTES   (256 * 1024)
53
54 /*
55  * Align buffer writes to this boundary.
56  * Tests show that SSDs have the highest IOPS when using 4k writes.
57  */
58 #define DM_BUFIO_WRITE_ALIGN            4096
59
60 /*
61  * dm_buffer->list_mode
62  */
63 #define LIST_CLEAN      0
64 #define LIST_DIRTY      1
65 #define LIST_SIZE       2
66
67 /*
68  * Linking of buffers:
69  *      All buffers are linked to buffer_tree with their node field.
70  *
71  *      Clean buffers that are not being written (B_WRITING not set)
72  *      are linked to lru[LIST_CLEAN] with their lru_list field.
73  *
74  *      Dirty and clean buffers that are being written are linked to
75  *      lru[LIST_DIRTY] with their lru_list field. When the write
76  *      finishes, the buffer cannot be relinked immediately (because we
77  *      are in an interrupt context and relinking requires process
78  *      context), so some clean-not-writing buffers can be held on
79  *      dirty_lru too.  They are later added to lru in the process
80  *      context.
81  */
82 struct dm_bufio_client {
83         struct mutex lock;
84
85         struct list_head lru[LIST_SIZE];
86         unsigned long n_buffers[LIST_SIZE];
87
88         struct block_device *bdev;
89         unsigned block_size;
90         s8 sectors_per_block_bits;
91         void (*alloc_callback)(struct dm_buffer *);
92         void (*write_callback)(struct dm_buffer *);
93
94         struct kmem_cache *slab_buffer;
95         struct kmem_cache *slab_cache;
96         struct dm_io_client *dm_io;
97
98         struct list_head reserved_buffers;
99         unsigned need_reserved_buffers;
100
101         unsigned minimum_buffers;
102
103         struct rb_root buffer_tree;
104         wait_queue_head_t free_buffer_wait;
105
106         sector_t start;
107
108         int async_write_error;
109
110         struct list_head client_list;
111         struct shrinker shrinker;
112 };
113
114 /*
115  * Buffer state bits.
116  */
117 #define B_READING       0
118 #define B_WRITING       1
119 #define B_DIRTY         2
120
121 /*
122  * Describes how the block was allocated:
123  * kmem_cache_alloc(), __get_free_pages() or vmalloc().
124  * See the comment at alloc_buffer_data.
125  */
126 enum data_mode {
127         DATA_MODE_SLAB = 0,
128         DATA_MODE_GET_FREE_PAGES = 1,
129         DATA_MODE_VMALLOC = 2,
130         DATA_MODE_LIMIT = 3
131 };
132
133 struct dm_buffer {
134         struct rb_node node;
135         struct list_head lru_list;
136         struct list_head global_list;
137         sector_t block;
138         void *data;
139         unsigned char data_mode;                /* DATA_MODE_* */
140         unsigned char list_mode;                /* LIST_* */
141         blk_status_t read_error;
142         blk_status_t write_error;
143         unsigned accessed;
144         unsigned hold_count;
145         unsigned long state;
146         unsigned long last_accessed;
147         unsigned dirty_start;
148         unsigned dirty_end;
149         unsigned write_start;
150         unsigned write_end;
151         struct dm_bufio_client *c;
152         struct list_head write_list;
153         void (*end_io)(struct dm_buffer *, blk_status_t);
154 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
155 #define MAX_STACK 10
156         unsigned int stack_len;
157         unsigned long stack_entries[MAX_STACK];
158 #endif
159 };
160
161 /*----------------------------------------------------------------*/
162
163 #define dm_bufio_in_request()   (!!current->bio_list)
164
165 static void dm_bufio_lock(struct dm_bufio_client *c)
166 {
167         mutex_lock_nested(&c->lock, dm_bufio_in_request());
168 }
169
170 static int dm_bufio_trylock(struct dm_bufio_client *c)
171 {
172         return mutex_trylock(&c->lock);
173 }
174
175 static void dm_bufio_unlock(struct dm_bufio_client *c)
176 {
177         mutex_unlock(&c->lock);
178 }
179
180 /*----------------------------------------------------------------*/
181
182 /*
183  * Default cache size: available memory divided by the ratio.
184  */
185 static unsigned long dm_bufio_default_cache_size;
186
187 /*
188  * Total cache size set by the user.
189  */
190 static unsigned long dm_bufio_cache_size;
191
192 /*
193  * A copy of dm_bufio_cache_size because dm_bufio_cache_size can change
194  * at any time.  If it disagrees, the user has changed cache size.
195  */
196 static unsigned long dm_bufio_cache_size_latch;
197
198 static DEFINE_SPINLOCK(global_spinlock);
199
200 static LIST_HEAD(global_queue);
201
202 static unsigned long global_num = 0;
203
204 /*
205  * Buffers are freed after this timeout
206  */
207 static unsigned dm_bufio_max_age = DM_BUFIO_DEFAULT_AGE_SECS;
208 static unsigned long dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES;
209
210 static unsigned long dm_bufio_peak_allocated;
211 static unsigned long dm_bufio_allocated_kmem_cache;
212 static unsigned long dm_bufio_allocated_get_free_pages;
213 static unsigned long dm_bufio_allocated_vmalloc;
214 static unsigned long dm_bufio_current_allocated;
215
216 /*----------------------------------------------------------------*/
217
218 /*
219  * The current number of clients.
220  */
221 static int dm_bufio_client_count;
222
223 /*
224  * The list of all clients.
225  */
226 static LIST_HEAD(dm_bufio_all_clients);
227
228 /*
229  * This mutex protects dm_bufio_cache_size_latch and dm_bufio_client_count
230  */
231 static DEFINE_MUTEX(dm_bufio_clients_lock);
232
233 static struct workqueue_struct *dm_bufio_wq;
234 static struct delayed_work dm_bufio_cleanup_old_work;
235 static struct work_struct dm_bufio_replacement_work;
236
237
238 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
239 static void buffer_record_stack(struct dm_buffer *b)
240 {
241         b->stack_len = stack_trace_save(b->stack_entries, MAX_STACK, 2);
242 }
243 #endif
244
245 /*----------------------------------------------------------------
246  * A red/black tree acts as an index for all the buffers.
247  *--------------------------------------------------------------*/
248 static struct dm_buffer *__find(struct dm_bufio_client *c, sector_t block)
249 {
250         struct rb_node *n = c->buffer_tree.rb_node;
251         struct dm_buffer *b;
252
253         while (n) {
254                 b = container_of(n, struct dm_buffer, node);
255
256                 if (b->block == block)
257                         return b;
258
259                 n = block < b->block ? n->rb_left : n->rb_right;
260         }
261
262         return NULL;
263 }
264
265 static struct dm_buffer *__find_next(struct dm_bufio_client *c, sector_t block)
266 {
267         struct rb_node *n = c->buffer_tree.rb_node;
268         struct dm_buffer *b;
269         struct dm_buffer *best = NULL;
270
271         while (n) {
272                 b = container_of(n, struct dm_buffer, node);
273
274                 if (b->block == block)
275                         return b;
276
277                 if (block <= b->block) {
278                         n = n->rb_left;
279                         best = b;
280                 } else {
281                         n = n->rb_right;
282                 }
283         }
284
285         return best;
286 }
287
288 static void __insert(struct dm_bufio_client *c, struct dm_buffer *b)
289 {
290         struct rb_node **new = &c->buffer_tree.rb_node, *parent = NULL;
291         struct dm_buffer *found;
292
293         while (*new) {
294                 found = container_of(*new, struct dm_buffer, node);
295
296                 if (found->block == b->block) {
297                         BUG_ON(found != b);
298                         return;
299                 }
300
301                 parent = *new;
302                 new = b->block < found->block ?
303                         &found->node.rb_left : &found->node.rb_right;
304         }
305
306         rb_link_node(&b->node, parent, new);
307         rb_insert_color(&b->node, &c->buffer_tree);
308 }
309
310 static void __remove(struct dm_bufio_client *c, struct dm_buffer *b)
311 {
312         rb_erase(&b->node, &c->buffer_tree);
313 }
314
315 /*----------------------------------------------------------------*/
316
317 static void adjust_total_allocated(struct dm_buffer *b, bool unlink)
318 {
319         unsigned char data_mode;
320         long diff;
321
322         static unsigned long * const class_ptr[DATA_MODE_LIMIT] = {
323                 &dm_bufio_allocated_kmem_cache,
324                 &dm_bufio_allocated_get_free_pages,
325                 &dm_bufio_allocated_vmalloc,
326         };
327
328         data_mode = b->data_mode;
329         diff = (long)b->c->block_size;
330         if (unlink)
331                 diff = -diff;
332
333         spin_lock(&global_spinlock);
334
335         *class_ptr[data_mode] += diff;
336
337         dm_bufio_current_allocated += diff;
338
339         if (dm_bufio_current_allocated > dm_bufio_peak_allocated)
340                 dm_bufio_peak_allocated = dm_bufio_current_allocated;
341
342         b->accessed = 1;
343
344         if (!unlink) {
345                 list_add(&b->global_list, &global_queue);
346                 global_num++;
347                 if (dm_bufio_current_allocated > dm_bufio_cache_size)
348                         queue_work(dm_bufio_wq, &dm_bufio_replacement_work);
349         } else {
350                 list_del(&b->global_list);
351                 global_num--;
352         }
353
354         spin_unlock(&global_spinlock);
355 }
356
357 /*
358  * Change the number of clients and recalculate per-client limit.
359  */
360 static void __cache_size_refresh(void)
361 {
362         BUG_ON(!mutex_is_locked(&dm_bufio_clients_lock));
363         BUG_ON(dm_bufio_client_count < 0);
364
365         dm_bufio_cache_size_latch = READ_ONCE(dm_bufio_cache_size);
366
367         /*
368          * Use default if set to 0 and report the actual cache size used.
369          */
370         if (!dm_bufio_cache_size_latch) {
371                 (void)cmpxchg(&dm_bufio_cache_size, 0,
372                               dm_bufio_default_cache_size);
373                 dm_bufio_cache_size_latch = dm_bufio_default_cache_size;
374         }
375 }
376
377 /*
378  * Allocating buffer data.
379  *
380  * Small buffers are allocated with kmem_cache, to use space optimally.
381  *
382  * For large buffers, we choose between get_free_pages and vmalloc.
383  * Each has advantages and disadvantages.
384  *
385  * __get_free_pages can randomly fail if the memory is fragmented.
386  * __vmalloc won't randomly fail, but vmalloc space is limited (it may be
387  * as low as 128M) so using it for caching is not appropriate.
388  *
389  * If the allocation may fail we use __get_free_pages. Memory fragmentation
390  * won't have a fatal effect here, but it just causes flushes of some other
391  * buffers and more I/O will be performed. Don't use __get_free_pages if it
392  * always fails (i.e. order >= MAX_ORDER).
393  *
394  * If the allocation shouldn't fail we use __vmalloc. This is only for the
395  * initial reserve allocation, so there's no risk of wasting all vmalloc
396  * space.
397  */
398 static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
399                                unsigned char *data_mode)
400 {
401         if (unlikely(c->slab_cache != NULL)) {
402                 *data_mode = DATA_MODE_SLAB;
403                 return kmem_cache_alloc(c->slab_cache, gfp_mask);
404         }
405
406         if (c->block_size <= KMALLOC_MAX_SIZE &&
407             gfp_mask & __GFP_NORETRY) {
408                 *data_mode = DATA_MODE_GET_FREE_PAGES;
409                 return (void *)__get_free_pages(gfp_mask,
410                                                 c->sectors_per_block_bits - (PAGE_SHIFT - SECTOR_SHIFT));
411         }
412
413         *data_mode = DATA_MODE_VMALLOC;
414
415         /*
416          * __vmalloc allocates the data pages and auxiliary structures with
417          * gfp_flags that were specified, but pagetables are always allocated
418          * with GFP_KERNEL, no matter what was specified as gfp_mask.
419          *
420          * Consequently, we must set per-process flag PF_MEMALLOC_NOIO so that
421          * all allocations done by this process (including pagetables) are done
422          * as if GFP_NOIO was specified.
423          */
424         if (gfp_mask & __GFP_NORETRY) {
425                 unsigned noio_flag = memalloc_noio_save();
426                 void *ptr = __vmalloc(c->block_size, gfp_mask);
427
428                 memalloc_noio_restore(noio_flag);
429                 return ptr;
430         }
431
432         return __vmalloc(c->block_size, gfp_mask);
433 }
434
435 /*
436  * Free buffer's data.
437  */
438 static void free_buffer_data(struct dm_bufio_client *c,
439                              void *data, unsigned char data_mode)
440 {
441         switch (data_mode) {
442         case DATA_MODE_SLAB:
443                 kmem_cache_free(c->slab_cache, data);
444                 break;
445
446         case DATA_MODE_GET_FREE_PAGES:
447                 free_pages((unsigned long)data,
448                            c->sectors_per_block_bits - (PAGE_SHIFT - SECTOR_SHIFT));
449                 break;
450
451         case DATA_MODE_VMALLOC:
452                 vfree(data);
453                 break;
454
455         default:
456                 DMCRIT("dm_bufio_free_buffer_data: bad data mode: %d",
457                        data_mode);
458                 BUG();
459         }
460 }
461
462 /*
463  * Allocate buffer and its data.
464  */
465 static struct dm_buffer *alloc_buffer(struct dm_bufio_client *c, gfp_t gfp_mask)
466 {
467         struct dm_buffer *b = kmem_cache_alloc(c->slab_buffer, gfp_mask);
468
469         if (!b)
470                 return NULL;
471
472         b->c = c;
473
474         b->data = alloc_buffer_data(c, gfp_mask, &b->data_mode);
475         if (!b->data) {
476                 kmem_cache_free(c->slab_buffer, b);
477                 return NULL;
478         }
479
480 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
481         b->stack_len = 0;
482 #endif
483         return b;
484 }
485
486 /*
487  * Free buffer and its data.
488  */
489 static void free_buffer(struct dm_buffer *b)
490 {
491         struct dm_bufio_client *c = b->c;
492
493         free_buffer_data(c, b->data, b->data_mode);
494         kmem_cache_free(c->slab_buffer, b);
495 }
496
497 /*
498  * Link buffer to the buffer tree and clean or dirty queue.
499  */
500 static void __link_buffer(struct dm_buffer *b, sector_t block, int dirty)
501 {
502         struct dm_bufio_client *c = b->c;
503
504         c->n_buffers[dirty]++;
505         b->block = block;
506         b->list_mode = dirty;
507         list_add(&b->lru_list, &c->lru[dirty]);
508         __insert(b->c, b);
509         b->last_accessed = jiffies;
510
511         adjust_total_allocated(b, false);
512 }
513
514 /*
515  * Unlink buffer from the buffer tree and dirty or clean queue.
516  */
517 static void __unlink_buffer(struct dm_buffer *b)
518 {
519         struct dm_bufio_client *c = b->c;
520
521         BUG_ON(!c->n_buffers[b->list_mode]);
522
523         c->n_buffers[b->list_mode]--;
524         __remove(b->c, b);
525         list_del(&b->lru_list);
526
527         adjust_total_allocated(b, true);
528 }
529
530 /*
531  * Place the buffer to the head of dirty or clean LRU queue.
532  */
533 static void __relink_lru(struct dm_buffer *b, int dirty)
534 {
535         struct dm_bufio_client *c = b->c;
536
537         b->accessed = 1;
538
539         BUG_ON(!c->n_buffers[b->list_mode]);
540
541         c->n_buffers[b->list_mode]--;
542         c->n_buffers[dirty]++;
543         b->list_mode = dirty;
544         list_move(&b->lru_list, &c->lru[dirty]);
545         b->last_accessed = jiffies;
546 }
547
548 /*----------------------------------------------------------------
549  * Submit I/O on the buffer.
550  *
551  * Bio interface is faster but it has some problems:
552  *      the vector list is limited (increasing this limit increases
553  *      memory-consumption per buffer, so it is not viable);
554  *
555  *      the memory must be direct-mapped, not vmalloced;
556  *
557  * If the buffer is small enough (up to DM_BUFIO_INLINE_VECS pages) and
558  * it is not vmalloced, try using the bio interface.
559  *
560  * If the buffer is big, if it is vmalloced or if the underlying device
561  * rejects the bio because it is too large, use dm-io layer to do the I/O.
562  * The dm-io layer splits the I/O into multiple requests, avoiding the above
563  * shortcomings.
564  *--------------------------------------------------------------*/
565
566 /*
567  * dm-io completion routine. It just calls b->bio.bi_end_io, pretending
568  * that the request was handled directly with bio interface.
569  */
570 static void dmio_complete(unsigned long error, void *context)
571 {
572         struct dm_buffer *b = context;
573
574         b->end_io(b, unlikely(error != 0) ? BLK_STS_IOERR : 0);
575 }
576
577 static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
578                      unsigned n_sectors, unsigned offset)
579 {
580         int r;
581         struct dm_io_request io_req = {
582                 .bi_op = rw,
583                 .bi_op_flags = 0,
584                 .notify.fn = dmio_complete,
585                 .notify.context = b,
586                 .client = b->c->dm_io,
587         };
588         struct dm_io_region region = {
589                 .bdev = b->c->bdev,
590                 .sector = sector,
591                 .count = n_sectors,
592         };
593
594         if (b->data_mode != DATA_MODE_VMALLOC) {
595                 io_req.mem.type = DM_IO_KMEM;
596                 io_req.mem.ptr.addr = (char *)b->data + offset;
597         } else {
598                 io_req.mem.type = DM_IO_VMA;
599                 io_req.mem.ptr.vma = (char *)b->data + offset;
600         }
601
602         r = dm_io(&io_req, 1, &region, NULL);
603         if (unlikely(r))
604                 b->end_io(b, errno_to_blk_status(r));
605 }
606
607 static void bio_complete(struct bio *bio)
608 {
609         struct dm_buffer *b = bio->bi_private;
610         blk_status_t status = bio->bi_status;
611         bio_put(bio);
612         b->end_io(b, status);
613 }
614
615 static void use_bio(struct dm_buffer *b, int rw, sector_t sector,
616                     unsigned n_sectors, unsigned offset)
617 {
618         struct bio *bio;
619         char *ptr;
620         unsigned vec_size, len;
621
622         vec_size = b->c->block_size >> PAGE_SHIFT;
623         if (unlikely(b->c->sectors_per_block_bits < PAGE_SHIFT - SECTOR_SHIFT))
624                 vec_size += 2;
625
626         bio = bio_kmalloc(GFP_NOWAIT | __GFP_NORETRY | __GFP_NOWARN, vec_size);
627         if (!bio) {
628 dmio:
629                 use_dmio(b, rw, sector, n_sectors, offset);
630                 return;
631         }
632
633         bio->bi_iter.bi_sector = sector;
634         bio_set_dev(bio, b->c->bdev);
635         bio_set_op_attrs(bio, rw, 0);
636         bio->bi_end_io = bio_complete;
637         bio->bi_private = b;
638
639         ptr = (char *)b->data + offset;
640         len = n_sectors << SECTOR_SHIFT;
641
642         do {
643                 unsigned this_step = min((unsigned)(PAGE_SIZE - offset_in_page(ptr)), len);
644                 if (!bio_add_page(bio, virt_to_page(ptr), this_step,
645                                   offset_in_page(ptr))) {
646                         bio_put(bio);
647                         goto dmio;
648                 }
649
650                 len -= this_step;
651                 ptr += this_step;
652         } while (len > 0);
653
654         submit_bio(bio);
655 }
656
657 static inline sector_t block_to_sector(struct dm_bufio_client *c, sector_t block)
658 {
659         sector_t sector;
660
661         if (likely(c->sectors_per_block_bits >= 0))
662                 sector = block << c->sectors_per_block_bits;
663         else
664                 sector = block * (c->block_size >> SECTOR_SHIFT);
665         sector += c->start;
666
667         return sector;
668 }
669
670 static void submit_io(struct dm_buffer *b, int rw, void (*end_io)(struct dm_buffer *, blk_status_t))
671 {
672         unsigned n_sectors;
673         sector_t sector;
674         unsigned offset, end;
675
676         b->end_io = end_io;
677
678         sector = block_to_sector(b->c, b->block);
679
680         if (rw != REQ_OP_WRITE) {
681                 n_sectors = b->c->block_size >> SECTOR_SHIFT;
682                 offset = 0;
683         } else {
684                 if (b->c->write_callback)
685                         b->c->write_callback(b);
686                 offset = b->write_start;
687                 end = b->write_end;
688                 offset &= -DM_BUFIO_WRITE_ALIGN;
689                 end += DM_BUFIO_WRITE_ALIGN - 1;
690                 end &= -DM_BUFIO_WRITE_ALIGN;
691                 if (unlikely(end > b->c->block_size))
692                         end = b->c->block_size;
693
694                 sector += offset >> SECTOR_SHIFT;
695                 n_sectors = (end - offset) >> SECTOR_SHIFT;
696         }
697
698         if (b->data_mode != DATA_MODE_VMALLOC)
699                 use_bio(b, rw, sector, n_sectors, offset);
700         else
701                 use_dmio(b, rw, sector, n_sectors, offset);
702 }
703
704 /*----------------------------------------------------------------
705  * Writing dirty buffers
706  *--------------------------------------------------------------*/
707
708 /*
709  * The endio routine for write.
710  *
711  * Set the error, clear B_WRITING bit and wake anyone who was waiting on
712  * it.
713  */
714 static void write_endio(struct dm_buffer *b, blk_status_t status)
715 {
716         b->write_error = status;
717         if (unlikely(status)) {
718                 struct dm_bufio_client *c = b->c;
719
720                 (void)cmpxchg(&c->async_write_error, 0,
721                                 blk_status_to_errno(status));
722         }
723
724         BUG_ON(!test_bit(B_WRITING, &b->state));
725
726         smp_mb__before_atomic();
727         clear_bit(B_WRITING, &b->state);
728         smp_mb__after_atomic();
729
730         wake_up_bit(&b->state, B_WRITING);
731 }
732
733 /*
734  * Initiate a write on a dirty buffer, but don't wait for it.
735  *
736  * - If the buffer is not dirty, exit.
737  * - If there some previous write going on, wait for it to finish (we can't
738  *   have two writes on the same buffer simultaneously).
739  * - Submit our write and don't wait on it. We set B_WRITING indicating
740  *   that there is a write in progress.
741  */
742 static void __write_dirty_buffer(struct dm_buffer *b,
743                                  struct list_head *write_list)
744 {
745         if (!test_bit(B_DIRTY, &b->state))
746                 return;
747
748         clear_bit(B_DIRTY, &b->state);
749         wait_on_bit_lock_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE);
750
751         b->write_start = b->dirty_start;
752         b->write_end = b->dirty_end;
753
754         if (!write_list)
755                 submit_io(b, REQ_OP_WRITE, write_endio);
756         else
757                 list_add_tail(&b->write_list, write_list);
758 }
759
760 static void __flush_write_list(struct list_head *write_list)
761 {
762         struct blk_plug plug;
763         blk_start_plug(&plug);
764         while (!list_empty(write_list)) {
765                 struct dm_buffer *b =
766                         list_entry(write_list->next, struct dm_buffer, write_list);
767                 list_del(&b->write_list);
768                 submit_io(b, REQ_OP_WRITE, write_endio);
769                 cond_resched();
770         }
771         blk_finish_plug(&plug);
772 }
773
774 /*
775  * Wait until any activity on the buffer finishes.  Possibly write the
776  * buffer if it is dirty.  When this function finishes, there is no I/O
777  * running on the buffer and the buffer is not dirty.
778  */
779 static void __make_buffer_clean(struct dm_buffer *b)
780 {
781         BUG_ON(b->hold_count);
782
783         if (!b->state)  /* fast case */
784                 return;
785
786         wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
787         __write_dirty_buffer(b, NULL);
788         wait_on_bit_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE);
789 }
790
791 /*
792  * Find some buffer that is not held by anybody, clean it, unlink it and
793  * return it.
794  */
795 static struct dm_buffer *__get_unclaimed_buffer(struct dm_bufio_client *c)
796 {
797         struct dm_buffer *b;
798
799         list_for_each_entry_reverse(b, &c->lru[LIST_CLEAN], lru_list) {
800                 BUG_ON(test_bit(B_WRITING, &b->state));
801                 BUG_ON(test_bit(B_DIRTY, &b->state));
802
803                 if (!b->hold_count) {
804                         __make_buffer_clean(b);
805                         __unlink_buffer(b);
806                         return b;
807                 }
808                 cond_resched();
809         }
810
811         list_for_each_entry_reverse(b, &c->lru[LIST_DIRTY], lru_list) {
812                 BUG_ON(test_bit(B_READING, &b->state));
813
814                 if (!b->hold_count) {
815                         __make_buffer_clean(b);
816                         __unlink_buffer(b);
817                         return b;
818                 }
819                 cond_resched();
820         }
821
822         return NULL;
823 }
824
825 /*
826  * Wait until some other threads free some buffer or release hold count on
827  * some buffer.
828  *
829  * This function is entered with c->lock held, drops it and regains it
830  * before exiting.
831  */
832 static void __wait_for_free_buffer(struct dm_bufio_client *c)
833 {
834         DECLARE_WAITQUEUE(wait, current);
835
836         add_wait_queue(&c->free_buffer_wait, &wait);
837         set_current_state(TASK_UNINTERRUPTIBLE);
838         dm_bufio_unlock(c);
839
840         io_schedule();
841
842         remove_wait_queue(&c->free_buffer_wait, &wait);
843
844         dm_bufio_lock(c);
845 }
846
847 enum new_flag {
848         NF_FRESH = 0,
849         NF_READ = 1,
850         NF_GET = 2,
851         NF_PREFETCH = 3
852 };
853
854 /*
855  * Allocate a new buffer. If the allocation is not possible, wait until
856  * some other thread frees a buffer.
857  *
858  * May drop the lock and regain it.
859  */
860 static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client *c, enum new_flag nf)
861 {
862         struct dm_buffer *b;
863         bool tried_noio_alloc = false;
864
865         /*
866          * dm-bufio is resistant to allocation failures (it just keeps
867          * one buffer reserved in cases all the allocations fail).
868          * So set flags to not try too hard:
869          *      GFP_NOWAIT: don't wait; if we need to sleep we'll release our
870          *                  mutex and wait ourselves.
871          *      __GFP_NORETRY: don't retry and rather return failure
872          *      __GFP_NOMEMALLOC: don't use emergency reserves
873          *      __GFP_NOWARN: don't print a warning in case of failure
874          *
875          * For debugging, if we set the cache size to 1, no new buffers will
876          * be allocated.
877          */
878         while (1) {
879                 if (dm_bufio_cache_size_latch != 1) {
880                         b = alloc_buffer(c, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
881                         if (b)
882                                 return b;
883                 }
884
885                 if (nf == NF_PREFETCH)
886                         return NULL;
887
888                 if (dm_bufio_cache_size_latch != 1 && !tried_noio_alloc) {
889                         dm_bufio_unlock(c);
890                         b = alloc_buffer(c, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
891                         dm_bufio_lock(c);
892                         if (b)
893                                 return b;
894                         tried_noio_alloc = true;
895                 }
896
897                 if (!list_empty(&c->reserved_buffers)) {
898                         b = list_entry(c->reserved_buffers.next,
899                                        struct dm_buffer, lru_list);
900                         list_del(&b->lru_list);
901                         c->need_reserved_buffers++;
902
903                         return b;
904                 }
905
906                 b = __get_unclaimed_buffer(c);
907                 if (b)
908                         return b;
909
910                 __wait_for_free_buffer(c);
911         }
912 }
913
914 static struct dm_buffer *__alloc_buffer_wait(struct dm_bufio_client *c, enum new_flag nf)
915 {
916         struct dm_buffer *b = __alloc_buffer_wait_no_callback(c, nf);
917
918         if (!b)
919                 return NULL;
920
921         if (c->alloc_callback)
922                 c->alloc_callback(b);
923
924         return b;
925 }
926
927 /*
928  * Free a buffer and wake other threads waiting for free buffers.
929  */
930 static void __free_buffer_wake(struct dm_buffer *b)
931 {
932         struct dm_bufio_client *c = b->c;
933
934         if (!c->need_reserved_buffers)
935                 free_buffer(b);
936         else {
937                 list_add(&b->lru_list, &c->reserved_buffers);
938                 c->need_reserved_buffers--;
939         }
940
941         wake_up(&c->free_buffer_wait);
942 }
943
944 static void __write_dirty_buffers_async(struct dm_bufio_client *c, int no_wait,
945                                         struct list_head *write_list)
946 {
947         struct dm_buffer *b, *tmp;
948
949         list_for_each_entry_safe_reverse(b, tmp, &c->lru[LIST_DIRTY], lru_list) {
950                 BUG_ON(test_bit(B_READING, &b->state));
951
952                 if (!test_bit(B_DIRTY, &b->state) &&
953                     !test_bit(B_WRITING, &b->state)) {
954                         __relink_lru(b, LIST_CLEAN);
955                         continue;
956                 }
957
958                 if (no_wait && test_bit(B_WRITING, &b->state))
959                         return;
960
961                 __write_dirty_buffer(b, write_list);
962                 cond_resched();
963         }
964 }
965
966 /*
967  * Check if we're over watermark.
968  * If we are over threshold_buffers, start freeing buffers.
969  * If we're over "limit_buffers", block until we get under the limit.
970  */
971 static void __check_watermark(struct dm_bufio_client *c,
972                               struct list_head *write_list)
973 {
974         if (c->n_buffers[LIST_DIRTY] > c->n_buffers[LIST_CLEAN] * DM_BUFIO_WRITEBACK_RATIO)
975                 __write_dirty_buffers_async(c, 1, write_list);
976 }
977
978 /*----------------------------------------------------------------
979  * Getting a buffer
980  *--------------------------------------------------------------*/
981
982 static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block,
983                                      enum new_flag nf, int *need_submit,
984                                      struct list_head *write_list)
985 {
986         struct dm_buffer *b, *new_b = NULL;
987
988         *need_submit = 0;
989
990         b = __find(c, block);
991         if (b)
992                 goto found_buffer;
993
994         if (nf == NF_GET)
995                 return NULL;
996
997         new_b = __alloc_buffer_wait(c, nf);
998         if (!new_b)
999                 return NULL;
1000
1001         /*
1002          * We've had a period where the mutex was unlocked, so need to
1003          * recheck the buffer tree.
1004          */
1005         b = __find(c, block);
1006         if (b) {
1007                 __free_buffer_wake(new_b);
1008                 goto found_buffer;
1009         }
1010
1011         __check_watermark(c, write_list);
1012
1013         b = new_b;
1014         b->hold_count = 1;
1015         b->read_error = 0;
1016         b->write_error = 0;
1017         __link_buffer(b, block, LIST_CLEAN);
1018
1019         if (nf == NF_FRESH) {
1020                 b->state = 0;
1021                 return b;
1022         }
1023
1024         b->state = 1 << B_READING;
1025         *need_submit = 1;
1026
1027         return b;
1028
1029 found_buffer:
1030         if (nf == NF_PREFETCH)
1031                 return NULL;
1032         /*
1033          * Note: it is essential that we don't wait for the buffer to be
1034          * read if dm_bufio_get function is used. Both dm_bufio_get and
1035          * dm_bufio_prefetch can be used in the driver request routine.
1036          * If the user called both dm_bufio_prefetch and dm_bufio_get on
1037          * the same buffer, it would deadlock if we waited.
1038          */
1039         if (nf == NF_GET && unlikely(test_bit(B_READING, &b->state)))
1040                 return NULL;
1041
1042         b->hold_count++;
1043         __relink_lru(b, test_bit(B_DIRTY, &b->state) ||
1044                      test_bit(B_WRITING, &b->state));
1045         return b;
1046 }
1047
1048 /*
1049  * The endio routine for reading: set the error, clear the bit and wake up
1050  * anyone waiting on the buffer.
1051  */
1052 static void read_endio(struct dm_buffer *b, blk_status_t status)
1053 {
1054         b->read_error = status;
1055
1056         BUG_ON(!test_bit(B_READING, &b->state));
1057
1058         smp_mb__before_atomic();
1059         clear_bit(B_READING, &b->state);
1060         smp_mb__after_atomic();
1061
1062         wake_up_bit(&b->state, B_READING);
1063 }
1064
1065 /*
1066  * A common routine for dm_bufio_new and dm_bufio_read.  Operation of these
1067  * functions is similar except that dm_bufio_new doesn't read the
1068  * buffer from the disk (assuming that the caller overwrites all the data
1069  * and uses dm_bufio_mark_buffer_dirty to write new data back).
1070  */
1071 static void *new_read(struct dm_bufio_client *c, sector_t block,
1072                       enum new_flag nf, struct dm_buffer **bp)
1073 {
1074         int need_submit;
1075         struct dm_buffer *b;
1076
1077         LIST_HEAD(write_list);
1078
1079         dm_bufio_lock(c);
1080         b = __bufio_new(c, block, nf, &need_submit, &write_list);
1081 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
1082         if (b && b->hold_count == 1)
1083                 buffer_record_stack(b);
1084 #endif
1085         dm_bufio_unlock(c);
1086
1087         __flush_write_list(&write_list);
1088
1089         if (!b)
1090                 return NULL;
1091
1092         if (need_submit)
1093                 submit_io(b, REQ_OP_READ, read_endio);
1094
1095         wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
1096
1097         if (b->read_error) {
1098                 int error = blk_status_to_errno(b->read_error);
1099
1100                 dm_bufio_release(b);
1101
1102                 return ERR_PTR(error);
1103         }
1104
1105         *bp = b;
1106
1107         return b->data;
1108 }
1109
1110 void *dm_bufio_get(struct dm_bufio_client *c, sector_t block,
1111                    struct dm_buffer **bp)
1112 {
1113         return new_read(c, block, NF_GET, bp);
1114 }
1115 EXPORT_SYMBOL_GPL(dm_bufio_get);
1116
1117 void *dm_bufio_read(struct dm_bufio_client *c, sector_t block,
1118                     struct dm_buffer **bp)
1119 {
1120         BUG_ON(dm_bufio_in_request());
1121
1122         return new_read(c, block, NF_READ, bp);
1123 }
1124 EXPORT_SYMBOL_GPL(dm_bufio_read);
1125
1126 void *dm_bufio_new(struct dm_bufio_client *c, sector_t block,
1127                    struct dm_buffer **bp)
1128 {
1129         BUG_ON(dm_bufio_in_request());
1130
1131         return new_read(c, block, NF_FRESH, bp);
1132 }
1133 EXPORT_SYMBOL_GPL(dm_bufio_new);
1134
1135 void dm_bufio_prefetch(struct dm_bufio_client *c,
1136                        sector_t block, unsigned n_blocks)
1137 {
1138         struct blk_plug plug;
1139
1140         LIST_HEAD(write_list);
1141
1142         BUG_ON(dm_bufio_in_request());
1143
1144         blk_start_plug(&plug);
1145         dm_bufio_lock(c);
1146
1147         for (; n_blocks--; block++) {
1148                 int need_submit;
1149                 struct dm_buffer *b;
1150                 b = __bufio_new(c, block, NF_PREFETCH, &need_submit,
1151                                 &write_list);
1152                 if (unlikely(!list_empty(&write_list))) {
1153                         dm_bufio_unlock(c);
1154                         blk_finish_plug(&plug);
1155                         __flush_write_list(&write_list);
1156                         blk_start_plug(&plug);
1157                         dm_bufio_lock(c);
1158                 }
1159                 if (unlikely(b != NULL)) {
1160                         dm_bufio_unlock(c);
1161
1162                         if (need_submit)
1163                                 submit_io(b, REQ_OP_READ, read_endio);
1164                         dm_bufio_release(b);
1165
1166                         cond_resched();
1167
1168                         if (!n_blocks)
1169                                 goto flush_plug;
1170                         dm_bufio_lock(c);
1171                 }
1172         }
1173
1174         dm_bufio_unlock(c);
1175
1176 flush_plug:
1177         blk_finish_plug(&plug);
1178 }
1179 EXPORT_SYMBOL_GPL(dm_bufio_prefetch);
1180
1181 void dm_bufio_release(struct dm_buffer *b)
1182 {
1183         struct dm_bufio_client *c = b->c;
1184
1185         dm_bufio_lock(c);
1186
1187         BUG_ON(!b->hold_count);
1188
1189         b->hold_count--;
1190         if (!b->hold_count) {
1191                 wake_up(&c->free_buffer_wait);
1192
1193                 /*
1194                  * If there were errors on the buffer, and the buffer is not
1195                  * to be written, free the buffer. There is no point in caching
1196                  * invalid buffer.
1197                  */
1198                 if ((b->read_error || b->write_error) &&
1199                     !test_bit(B_READING, &b->state) &&
1200                     !test_bit(B_WRITING, &b->state) &&
1201                     !test_bit(B_DIRTY, &b->state)) {
1202                         __unlink_buffer(b);
1203                         __free_buffer_wake(b);
1204                 }
1205         }
1206
1207         dm_bufio_unlock(c);
1208 }
1209 EXPORT_SYMBOL_GPL(dm_bufio_release);
1210
1211 void dm_bufio_mark_partial_buffer_dirty(struct dm_buffer *b,
1212                                         unsigned start, unsigned end)
1213 {
1214         struct dm_bufio_client *c = b->c;
1215
1216         BUG_ON(start >= end);
1217         BUG_ON(end > b->c->block_size);
1218
1219         dm_bufio_lock(c);
1220
1221         BUG_ON(test_bit(B_READING, &b->state));
1222
1223         if (!test_and_set_bit(B_DIRTY, &b->state)) {
1224                 b->dirty_start = start;
1225                 b->dirty_end = end;
1226                 __relink_lru(b, LIST_DIRTY);
1227         } else {
1228                 if (start < b->dirty_start)
1229                         b->dirty_start = start;
1230                 if (end > b->dirty_end)
1231                         b->dirty_end = end;
1232         }
1233
1234         dm_bufio_unlock(c);
1235 }
1236 EXPORT_SYMBOL_GPL(dm_bufio_mark_partial_buffer_dirty);
1237
1238 void dm_bufio_mark_buffer_dirty(struct dm_buffer *b)
1239 {
1240         dm_bufio_mark_partial_buffer_dirty(b, 0, b->c->block_size);
1241 }
1242 EXPORT_SYMBOL_GPL(dm_bufio_mark_buffer_dirty);
1243
1244 void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c)
1245 {
1246         LIST_HEAD(write_list);
1247
1248         BUG_ON(dm_bufio_in_request());
1249
1250         dm_bufio_lock(c);
1251         __write_dirty_buffers_async(c, 0, &write_list);
1252         dm_bufio_unlock(c);
1253         __flush_write_list(&write_list);
1254 }
1255 EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers_async);
1256
1257 /*
1258  * For performance, it is essential that the buffers are written asynchronously
1259  * and simultaneously (so that the block layer can merge the writes) and then
1260  * waited upon.
1261  *
1262  * Finally, we flush hardware disk cache.
1263  */
1264 int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c)
1265 {
1266         int a, f;
1267         unsigned long buffers_processed = 0;
1268         struct dm_buffer *b, *tmp;
1269
1270         LIST_HEAD(write_list);
1271
1272         dm_bufio_lock(c);
1273         __write_dirty_buffers_async(c, 0, &write_list);
1274         dm_bufio_unlock(c);
1275         __flush_write_list(&write_list);
1276         dm_bufio_lock(c);
1277
1278 again:
1279         list_for_each_entry_safe_reverse(b, tmp, &c->lru[LIST_DIRTY], lru_list) {
1280                 int dropped_lock = 0;
1281
1282                 if (buffers_processed < c->n_buffers[LIST_DIRTY])
1283                         buffers_processed++;
1284
1285                 BUG_ON(test_bit(B_READING, &b->state));
1286
1287                 if (test_bit(B_WRITING, &b->state)) {
1288                         if (buffers_processed < c->n_buffers[LIST_DIRTY]) {
1289                                 dropped_lock = 1;
1290                                 b->hold_count++;
1291                                 dm_bufio_unlock(c);
1292                                 wait_on_bit_io(&b->state, B_WRITING,
1293                                                TASK_UNINTERRUPTIBLE);
1294                                 dm_bufio_lock(c);
1295                                 b->hold_count--;
1296                         } else
1297                                 wait_on_bit_io(&b->state, B_WRITING,
1298                                                TASK_UNINTERRUPTIBLE);
1299                 }
1300
1301                 if (!test_bit(B_DIRTY, &b->state) &&
1302                     !test_bit(B_WRITING, &b->state))
1303                         __relink_lru(b, LIST_CLEAN);
1304
1305                 cond_resched();
1306
1307                 /*
1308                  * If we dropped the lock, the list is no longer consistent,
1309                  * so we must restart the search.
1310                  *
1311                  * In the most common case, the buffer just processed is
1312                  * relinked to the clean list, so we won't loop scanning the
1313                  * same buffer again and again.
1314                  *
1315                  * This may livelock if there is another thread simultaneously
1316                  * dirtying buffers, so we count the number of buffers walked
1317                  * and if it exceeds the total number of buffers, it means that
1318                  * someone is doing some writes simultaneously with us.  In
1319                  * this case, stop, dropping the lock.
1320                  */
1321                 if (dropped_lock)
1322                         goto again;
1323         }
1324         wake_up(&c->free_buffer_wait);
1325         dm_bufio_unlock(c);
1326
1327         a = xchg(&c->async_write_error, 0);
1328         f = dm_bufio_issue_flush(c);
1329         if (a)
1330                 return a;
1331
1332         return f;
1333 }
1334 EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers);
1335
1336 /*
1337  * Use dm-io to send an empty barrier to flush the device.
1338  */
1339 int dm_bufio_issue_flush(struct dm_bufio_client *c)
1340 {
1341         struct dm_io_request io_req = {
1342                 .bi_op = REQ_OP_WRITE,
1343                 .bi_op_flags = REQ_PREFLUSH | REQ_SYNC,
1344                 .mem.type = DM_IO_KMEM,
1345                 .mem.ptr.addr = NULL,
1346                 .client = c->dm_io,
1347         };
1348         struct dm_io_region io_reg = {
1349                 .bdev = c->bdev,
1350                 .sector = 0,
1351                 .count = 0,
1352         };
1353
1354         BUG_ON(dm_bufio_in_request());
1355
1356         return dm_io(&io_req, 1, &io_reg, NULL);
1357 }
1358 EXPORT_SYMBOL_GPL(dm_bufio_issue_flush);
1359
1360 /*
1361  * Use dm-io to send a discard request to flush the device.
1362  */
1363 int dm_bufio_issue_discard(struct dm_bufio_client *c, sector_t block, sector_t count)
1364 {
1365         struct dm_io_request io_req = {
1366                 .bi_op = REQ_OP_DISCARD,
1367                 .bi_op_flags = REQ_SYNC,
1368                 .mem.type = DM_IO_KMEM,
1369                 .mem.ptr.addr = NULL,
1370                 .client = c->dm_io,
1371         };
1372         struct dm_io_region io_reg = {
1373                 .bdev = c->bdev,
1374                 .sector = block_to_sector(c, block),
1375                 .count = block_to_sector(c, count),
1376         };
1377
1378         BUG_ON(dm_bufio_in_request());
1379
1380         return dm_io(&io_req, 1, &io_reg, NULL);
1381 }
1382 EXPORT_SYMBOL_GPL(dm_bufio_issue_discard);
1383
1384 /*
1385  * We first delete any other buffer that may be at that new location.
1386  *
1387  * Then, we write the buffer to the original location if it was dirty.
1388  *
1389  * Then, if we are the only one who is holding the buffer, relink the buffer
1390  * in the buffer tree for the new location.
1391  *
1392  * If there was someone else holding the buffer, we write it to the new
1393  * location but not relink it, because that other user needs to have the buffer
1394  * at the same place.
1395  */
1396 void dm_bufio_release_move(struct dm_buffer *b, sector_t new_block)
1397 {
1398         struct dm_bufio_client *c = b->c;
1399         struct dm_buffer *new;
1400
1401         BUG_ON(dm_bufio_in_request());
1402
1403         dm_bufio_lock(c);
1404
1405 retry:
1406         new = __find(c, new_block);
1407         if (new) {
1408                 if (new->hold_count) {
1409                         __wait_for_free_buffer(c);
1410                         goto retry;
1411                 }
1412
1413                 /*
1414                  * FIXME: Is there any point waiting for a write that's going
1415                  * to be overwritten in a bit?
1416                  */
1417                 __make_buffer_clean(new);
1418                 __unlink_buffer(new);
1419                 __free_buffer_wake(new);
1420         }
1421
1422         BUG_ON(!b->hold_count);
1423         BUG_ON(test_bit(B_READING, &b->state));
1424
1425         __write_dirty_buffer(b, NULL);
1426         if (b->hold_count == 1) {
1427                 wait_on_bit_io(&b->state, B_WRITING,
1428                                TASK_UNINTERRUPTIBLE);
1429                 set_bit(B_DIRTY, &b->state);
1430                 b->dirty_start = 0;
1431                 b->dirty_end = c->block_size;
1432                 __unlink_buffer(b);
1433                 __link_buffer(b, new_block, LIST_DIRTY);
1434         } else {
1435                 sector_t old_block;
1436                 wait_on_bit_lock_io(&b->state, B_WRITING,
1437                                     TASK_UNINTERRUPTIBLE);
1438                 /*
1439                  * Relink buffer to "new_block" so that write_callback
1440                  * sees "new_block" as a block number.
1441                  * After the write, link the buffer back to old_block.
1442                  * All this must be done in bufio lock, so that block number
1443                  * change isn't visible to other threads.
1444                  */
1445                 old_block = b->block;
1446                 __unlink_buffer(b);
1447                 __link_buffer(b, new_block, b->list_mode);
1448                 submit_io(b, REQ_OP_WRITE, write_endio);
1449                 wait_on_bit_io(&b->state, B_WRITING,
1450                                TASK_UNINTERRUPTIBLE);
1451                 __unlink_buffer(b);
1452                 __link_buffer(b, old_block, b->list_mode);
1453         }
1454
1455         dm_bufio_unlock(c);
1456         dm_bufio_release(b);
1457 }
1458 EXPORT_SYMBOL_GPL(dm_bufio_release_move);
1459
1460 static void forget_buffer_locked(struct dm_buffer *b)
1461 {
1462         if (likely(!b->hold_count) && likely(!b->state)) {
1463                 __unlink_buffer(b);
1464                 __free_buffer_wake(b);
1465         }
1466 }
1467
1468 /*
1469  * Free the given buffer.
1470  *
1471  * This is just a hint, if the buffer is in use or dirty, this function
1472  * does nothing.
1473  */
1474 void dm_bufio_forget(struct dm_bufio_client *c, sector_t block)
1475 {
1476         struct dm_buffer *b;
1477
1478         dm_bufio_lock(c);
1479
1480         b = __find(c, block);
1481         if (b)
1482                 forget_buffer_locked(b);
1483
1484         dm_bufio_unlock(c);
1485 }
1486 EXPORT_SYMBOL_GPL(dm_bufio_forget);
1487
1488 void dm_bufio_forget_buffers(struct dm_bufio_client *c, sector_t block, sector_t n_blocks)
1489 {
1490         struct dm_buffer *b;
1491         sector_t end_block = block + n_blocks;
1492
1493         while (block < end_block) {
1494                 dm_bufio_lock(c);
1495
1496                 b = __find_next(c, block);
1497                 if (b) {
1498                         block = b->block + 1;
1499                         forget_buffer_locked(b);
1500                 }
1501
1502                 dm_bufio_unlock(c);
1503
1504                 if (!b)
1505                         break;
1506         }
1507
1508 }
1509 EXPORT_SYMBOL_GPL(dm_bufio_forget_buffers);
1510
1511 void dm_bufio_set_minimum_buffers(struct dm_bufio_client *c, unsigned n)
1512 {
1513         c->minimum_buffers = n;
1514 }
1515 EXPORT_SYMBOL_GPL(dm_bufio_set_minimum_buffers);
1516
1517 unsigned dm_bufio_get_block_size(struct dm_bufio_client *c)
1518 {
1519         return c->block_size;
1520 }
1521 EXPORT_SYMBOL_GPL(dm_bufio_get_block_size);
1522
1523 sector_t dm_bufio_get_device_size(struct dm_bufio_client *c)
1524 {
1525         sector_t s = i_size_read(c->bdev->bd_inode) >> SECTOR_SHIFT;
1526         if (likely(c->sectors_per_block_bits >= 0))
1527                 s >>= c->sectors_per_block_bits;
1528         else
1529                 sector_div(s, c->block_size >> SECTOR_SHIFT);
1530         return s;
1531 }
1532 EXPORT_SYMBOL_GPL(dm_bufio_get_device_size);
1533
1534 sector_t dm_bufio_get_block_number(struct dm_buffer *b)
1535 {
1536         return b->block;
1537 }
1538 EXPORT_SYMBOL_GPL(dm_bufio_get_block_number);
1539
1540 void *dm_bufio_get_block_data(struct dm_buffer *b)
1541 {
1542         return b->data;
1543 }
1544 EXPORT_SYMBOL_GPL(dm_bufio_get_block_data);
1545
1546 void *dm_bufio_get_aux_data(struct dm_buffer *b)
1547 {
1548         return b + 1;
1549 }
1550 EXPORT_SYMBOL_GPL(dm_bufio_get_aux_data);
1551
1552 struct dm_bufio_client *dm_bufio_get_client(struct dm_buffer *b)
1553 {
1554         return b->c;
1555 }
1556 EXPORT_SYMBOL_GPL(dm_bufio_get_client);
1557
1558 static void drop_buffers(struct dm_bufio_client *c)
1559 {
1560         struct dm_buffer *b;
1561         int i;
1562         bool warned = false;
1563
1564         BUG_ON(dm_bufio_in_request());
1565
1566         /*
1567          * An optimization so that the buffers are not written one-by-one.
1568          */
1569         dm_bufio_write_dirty_buffers_async(c);
1570
1571         dm_bufio_lock(c);
1572
1573         while ((b = __get_unclaimed_buffer(c)))
1574                 __free_buffer_wake(b);
1575
1576         for (i = 0; i < LIST_SIZE; i++)
1577                 list_for_each_entry(b, &c->lru[i], lru_list) {
1578                         WARN_ON(!warned);
1579                         warned = true;
1580                         DMERR("leaked buffer %llx, hold count %u, list %d",
1581                               (unsigned long long)b->block, b->hold_count, i);
1582 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
1583                         stack_trace_print(b->stack_entries, b->stack_len, 1);
1584                         /* mark unclaimed to avoid BUG_ON below */
1585                         b->hold_count = 0;
1586 #endif
1587                 }
1588
1589 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
1590         while ((b = __get_unclaimed_buffer(c)))
1591                 __free_buffer_wake(b);
1592 #endif
1593
1594         for (i = 0; i < LIST_SIZE; i++)
1595                 BUG_ON(!list_empty(&c->lru[i]));
1596
1597         dm_bufio_unlock(c);
1598 }
1599
1600 /*
1601  * We may not be able to evict this buffer if IO pending or the client
1602  * is still using it.  Caller is expected to know buffer is too old.
1603  *
1604  * And if GFP_NOFS is used, we must not do any I/O because we hold
1605  * dm_bufio_clients_lock and we would risk deadlock if the I/O gets
1606  * rerouted to different bufio client.
1607  */
1608 static bool __try_evict_buffer(struct dm_buffer *b, gfp_t gfp)
1609 {
1610         if (!(gfp & __GFP_FS)) {
1611                 if (test_bit(B_READING, &b->state) ||
1612                     test_bit(B_WRITING, &b->state) ||
1613                     test_bit(B_DIRTY, &b->state))
1614                         return false;
1615         }
1616
1617         if (b->hold_count)
1618                 return false;
1619
1620         __make_buffer_clean(b);
1621         __unlink_buffer(b);
1622         __free_buffer_wake(b);
1623
1624         return true;
1625 }
1626
1627 static unsigned long get_retain_buffers(struct dm_bufio_client *c)
1628 {
1629         unsigned long retain_bytes = READ_ONCE(dm_bufio_retain_bytes);
1630         if (likely(c->sectors_per_block_bits >= 0))
1631                 retain_bytes >>= c->sectors_per_block_bits + SECTOR_SHIFT;
1632         else
1633                 retain_bytes /= c->block_size;
1634         return retain_bytes;
1635 }
1636
1637 static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
1638                             gfp_t gfp_mask)
1639 {
1640         int l;
1641         struct dm_buffer *b, *tmp;
1642         unsigned long freed = 0;
1643         unsigned long count = c->n_buffers[LIST_CLEAN] +
1644                               c->n_buffers[LIST_DIRTY];
1645         unsigned long retain_target = get_retain_buffers(c);
1646
1647         for (l = 0; l < LIST_SIZE; l++) {
1648                 list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) {
1649                         if (__try_evict_buffer(b, gfp_mask))
1650                                 freed++;
1651                         if (!--nr_to_scan || ((count - freed) <= retain_target))
1652                                 return freed;
1653                         cond_resched();
1654                 }
1655         }
1656         return freed;
1657 }
1658
1659 static unsigned long
1660 dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
1661 {
1662         struct dm_bufio_client *c;
1663         unsigned long freed;
1664
1665         c = container_of(shrink, struct dm_bufio_client, shrinker);
1666         if (sc->gfp_mask & __GFP_FS)
1667                 dm_bufio_lock(c);
1668         else if (!dm_bufio_trylock(c))
1669                 return SHRINK_STOP;
1670
1671         freed  = __scan(c, sc->nr_to_scan, sc->gfp_mask);
1672         dm_bufio_unlock(c);
1673         return freed;
1674 }
1675
1676 static unsigned long
1677 dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
1678 {
1679         struct dm_bufio_client *c = container_of(shrink, struct dm_bufio_client, shrinker);
1680         unsigned long count = READ_ONCE(c->n_buffers[LIST_CLEAN]) +
1681                               READ_ONCE(c->n_buffers[LIST_DIRTY]);
1682         unsigned long retain_target = get_retain_buffers(c);
1683
1684         return (count < retain_target) ? 0 : (count - retain_target);
1685 }
1686
1687 /*
1688  * Create the buffering interface
1689  */
1690 struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsigned block_size,
1691                                                unsigned reserved_buffers, unsigned aux_size,
1692                                                void (*alloc_callback)(struct dm_buffer *),
1693                                                void (*write_callback)(struct dm_buffer *))
1694 {
1695         int r;
1696         struct dm_bufio_client *c;
1697         unsigned i;
1698         char slab_name[27];
1699
1700         if (!block_size || block_size & ((1 << SECTOR_SHIFT) - 1)) {
1701                 DMERR("%s: block size not specified or is not multiple of 512b", __func__);
1702                 r = -EINVAL;
1703                 goto bad_client;
1704         }
1705
1706         c = kzalloc(sizeof(*c), GFP_KERNEL);
1707         if (!c) {
1708                 r = -ENOMEM;
1709                 goto bad_client;
1710         }
1711         c->buffer_tree = RB_ROOT;
1712
1713         c->bdev = bdev;
1714         c->block_size = block_size;
1715         if (is_power_of_2(block_size))
1716                 c->sectors_per_block_bits = __ffs(block_size) - SECTOR_SHIFT;
1717         else
1718                 c->sectors_per_block_bits = -1;
1719
1720         c->alloc_callback = alloc_callback;
1721         c->write_callback = write_callback;
1722
1723         for (i = 0; i < LIST_SIZE; i++) {
1724                 INIT_LIST_HEAD(&c->lru[i]);
1725                 c->n_buffers[i] = 0;
1726         }
1727
1728         mutex_init(&c->lock);
1729         INIT_LIST_HEAD(&c->reserved_buffers);
1730         c->need_reserved_buffers = reserved_buffers;
1731
1732         dm_bufio_set_minimum_buffers(c, DM_BUFIO_MIN_BUFFERS);
1733
1734         init_waitqueue_head(&c->free_buffer_wait);
1735         c->async_write_error = 0;
1736
1737         c->dm_io = dm_io_client_create();
1738         if (IS_ERR(c->dm_io)) {
1739                 r = PTR_ERR(c->dm_io);
1740                 goto bad_dm_io;
1741         }
1742
1743         if (block_size <= KMALLOC_MAX_SIZE &&
1744             (block_size < PAGE_SIZE || !is_power_of_2(block_size))) {
1745                 unsigned align = min(1U << __ffs(block_size), (unsigned)PAGE_SIZE);
1746                 snprintf(slab_name, sizeof slab_name, "dm_bufio_cache-%u", block_size);
1747                 c->slab_cache = kmem_cache_create(slab_name, block_size, align,
1748                                                   SLAB_RECLAIM_ACCOUNT, NULL);
1749                 if (!c->slab_cache) {
1750                         r = -ENOMEM;
1751                         goto bad;
1752                 }
1753         }
1754         if (aux_size)
1755                 snprintf(slab_name, sizeof slab_name, "dm_bufio_buffer-%u", aux_size);
1756         else
1757                 snprintf(slab_name, sizeof slab_name, "dm_bufio_buffer");
1758         c->slab_buffer = kmem_cache_create(slab_name, sizeof(struct dm_buffer) + aux_size,
1759                                            0, SLAB_RECLAIM_ACCOUNT, NULL);
1760         if (!c->slab_buffer) {
1761                 r = -ENOMEM;
1762                 goto bad;
1763         }
1764
1765         while (c->need_reserved_buffers) {
1766                 struct dm_buffer *b = alloc_buffer(c, GFP_KERNEL);
1767
1768                 if (!b) {
1769                         r = -ENOMEM;
1770                         goto bad;
1771                 }
1772                 __free_buffer_wake(b);
1773         }
1774
1775         c->shrinker.count_objects = dm_bufio_shrink_count;
1776         c->shrinker.scan_objects = dm_bufio_shrink_scan;
1777         c->shrinker.seeks = 1;
1778         c->shrinker.batch = 0;
1779         r = register_shrinker(&c->shrinker);
1780         if (r)
1781                 goto bad;
1782
1783         mutex_lock(&dm_bufio_clients_lock);
1784         dm_bufio_client_count++;
1785         list_add(&c->client_list, &dm_bufio_all_clients);
1786         __cache_size_refresh();
1787         mutex_unlock(&dm_bufio_clients_lock);
1788
1789         return c;
1790
1791 bad:
1792         while (!list_empty(&c->reserved_buffers)) {
1793                 struct dm_buffer *b = list_entry(c->reserved_buffers.next,
1794                                                  struct dm_buffer, lru_list);
1795                 list_del(&b->lru_list);
1796                 free_buffer(b);
1797         }
1798         kmem_cache_destroy(c->slab_cache);
1799         kmem_cache_destroy(c->slab_buffer);
1800         dm_io_client_destroy(c->dm_io);
1801 bad_dm_io:
1802         mutex_destroy(&c->lock);
1803         kfree(c);
1804 bad_client:
1805         return ERR_PTR(r);
1806 }
1807 EXPORT_SYMBOL_GPL(dm_bufio_client_create);
1808
1809 /*
1810  * Free the buffering interface.
1811  * It is required that there are no references on any buffers.
1812  */
1813 void dm_bufio_client_destroy(struct dm_bufio_client *c)
1814 {
1815         unsigned i;
1816
1817         drop_buffers(c);
1818
1819         unregister_shrinker(&c->shrinker);
1820
1821         mutex_lock(&dm_bufio_clients_lock);
1822
1823         list_del(&c->client_list);
1824         dm_bufio_client_count--;
1825         __cache_size_refresh();
1826
1827         mutex_unlock(&dm_bufio_clients_lock);
1828
1829         BUG_ON(!RB_EMPTY_ROOT(&c->buffer_tree));
1830         BUG_ON(c->need_reserved_buffers);
1831
1832         while (!list_empty(&c->reserved_buffers)) {
1833                 struct dm_buffer *b = list_entry(c->reserved_buffers.next,
1834                                                  struct dm_buffer, lru_list);
1835                 list_del(&b->lru_list);
1836                 free_buffer(b);
1837         }
1838
1839         for (i = 0; i < LIST_SIZE; i++)
1840                 if (c->n_buffers[i])
1841                         DMERR("leaked buffer count %d: %ld", i, c->n_buffers[i]);
1842
1843         for (i = 0; i < LIST_SIZE; i++)
1844                 BUG_ON(c->n_buffers[i]);
1845
1846         kmem_cache_destroy(c->slab_cache);
1847         kmem_cache_destroy(c->slab_buffer);
1848         dm_io_client_destroy(c->dm_io);
1849         mutex_destroy(&c->lock);
1850         kfree(c);
1851 }
1852 EXPORT_SYMBOL_GPL(dm_bufio_client_destroy);
1853
1854 void dm_bufio_set_sector_offset(struct dm_bufio_client *c, sector_t start)
1855 {
1856         c->start = start;
1857 }
1858 EXPORT_SYMBOL_GPL(dm_bufio_set_sector_offset);
1859
1860 static unsigned get_max_age_hz(void)
1861 {
1862         unsigned max_age = READ_ONCE(dm_bufio_max_age);
1863
1864         if (max_age > UINT_MAX / HZ)
1865                 max_age = UINT_MAX / HZ;
1866
1867         return max_age * HZ;
1868 }
1869
1870 static bool older_than(struct dm_buffer *b, unsigned long age_hz)
1871 {
1872         return time_after_eq(jiffies, b->last_accessed + age_hz);
1873 }
1874
1875 static void __evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz)
1876 {
1877         struct dm_buffer *b, *tmp;
1878         unsigned long retain_target = get_retain_buffers(c);
1879         unsigned long count;
1880         LIST_HEAD(write_list);
1881
1882         dm_bufio_lock(c);
1883
1884         __check_watermark(c, &write_list);
1885         if (unlikely(!list_empty(&write_list))) {
1886                 dm_bufio_unlock(c);
1887                 __flush_write_list(&write_list);
1888                 dm_bufio_lock(c);
1889         }
1890
1891         count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY];
1892         list_for_each_entry_safe_reverse(b, tmp, &c->lru[LIST_CLEAN], lru_list) {
1893                 if (count <= retain_target)
1894                         break;
1895
1896                 if (!older_than(b, age_hz))
1897                         break;
1898
1899                 if (__try_evict_buffer(b, 0))
1900                         count--;
1901
1902                 cond_resched();
1903         }
1904
1905         dm_bufio_unlock(c);
1906 }
1907
1908 static void do_global_cleanup(struct work_struct *w)
1909 {
1910         struct dm_bufio_client *locked_client = NULL;
1911         struct dm_bufio_client *current_client;
1912         struct dm_buffer *b;
1913         unsigned spinlock_hold_count;
1914         unsigned long threshold = dm_bufio_cache_size -
1915                 dm_bufio_cache_size / DM_BUFIO_LOW_WATERMARK_RATIO;
1916         unsigned long loops = global_num * 2;
1917
1918         mutex_lock(&dm_bufio_clients_lock);
1919
1920         while (1) {
1921                 cond_resched();
1922
1923                 spin_lock(&global_spinlock);
1924                 if (unlikely(dm_bufio_current_allocated <= threshold))
1925                         break;
1926
1927                 spinlock_hold_count = 0;
1928 get_next:
1929                 if (!loops--)
1930                         break;
1931                 if (unlikely(list_empty(&global_queue)))
1932                         break;
1933                 b = list_entry(global_queue.prev, struct dm_buffer, global_list);
1934
1935                 if (b->accessed) {
1936                         b->accessed = 0;
1937                         list_move(&b->global_list, &global_queue);
1938                         if (likely(++spinlock_hold_count < 16))
1939                                 goto get_next;
1940                         spin_unlock(&global_spinlock);
1941                         continue;
1942                 }
1943
1944                 current_client = b->c;
1945                 if (unlikely(current_client != locked_client)) {
1946                         if (locked_client)
1947                                 dm_bufio_unlock(locked_client);
1948
1949                         if (!dm_bufio_trylock(current_client)) {
1950                                 spin_unlock(&global_spinlock);
1951                                 dm_bufio_lock(current_client);
1952                                 locked_client = current_client;
1953                                 continue;
1954                         }
1955
1956                         locked_client = current_client;
1957                 }
1958
1959                 spin_unlock(&global_spinlock);
1960
1961                 if (unlikely(!__try_evict_buffer(b, GFP_KERNEL))) {
1962                         spin_lock(&global_spinlock);
1963                         list_move(&b->global_list, &global_queue);
1964                         spin_unlock(&global_spinlock);
1965                 }
1966         }
1967
1968         spin_unlock(&global_spinlock);
1969
1970         if (locked_client)
1971                 dm_bufio_unlock(locked_client);
1972
1973         mutex_unlock(&dm_bufio_clients_lock);
1974 }
1975
1976 static void cleanup_old_buffers(void)
1977 {
1978         unsigned long max_age_hz = get_max_age_hz();
1979         struct dm_bufio_client *c;
1980
1981         mutex_lock(&dm_bufio_clients_lock);
1982
1983         __cache_size_refresh();
1984
1985         list_for_each_entry(c, &dm_bufio_all_clients, client_list)
1986                 __evict_old_buffers(c, max_age_hz);
1987
1988         mutex_unlock(&dm_bufio_clients_lock);
1989 }
1990
1991 static void work_fn(struct work_struct *w)
1992 {
1993         cleanup_old_buffers();
1994
1995         queue_delayed_work(dm_bufio_wq, &dm_bufio_cleanup_old_work,
1996                            DM_BUFIO_WORK_TIMER_SECS * HZ);
1997 }
1998
1999 /*----------------------------------------------------------------
2000  * Module setup
2001  *--------------------------------------------------------------*/
2002
2003 /*
2004  * This is called only once for the whole dm_bufio module.
2005  * It initializes memory limit.
2006  */
2007 static int __init dm_bufio_init(void)
2008 {
2009         __u64 mem;
2010
2011         dm_bufio_allocated_kmem_cache = 0;
2012         dm_bufio_allocated_get_free_pages = 0;
2013         dm_bufio_allocated_vmalloc = 0;
2014         dm_bufio_current_allocated = 0;
2015
2016         mem = (__u64)mult_frac(totalram_pages() - totalhigh_pages(),
2017                                DM_BUFIO_MEMORY_PERCENT, 100) << PAGE_SHIFT;
2018
2019         if (mem > ULONG_MAX)
2020                 mem = ULONG_MAX;
2021
2022 #ifdef CONFIG_MMU
2023         if (mem > mult_frac(VMALLOC_TOTAL, DM_BUFIO_VMALLOC_PERCENT, 100))
2024                 mem = mult_frac(VMALLOC_TOTAL, DM_BUFIO_VMALLOC_PERCENT, 100);
2025 #endif
2026
2027         dm_bufio_default_cache_size = mem;
2028
2029         mutex_lock(&dm_bufio_clients_lock);
2030         __cache_size_refresh();
2031         mutex_unlock(&dm_bufio_clients_lock);
2032
2033         dm_bufio_wq = alloc_workqueue("dm_bufio_cache", WQ_MEM_RECLAIM, 0);
2034         if (!dm_bufio_wq)
2035                 return -ENOMEM;
2036
2037         INIT_DELAYED_WORK(&dm_bufio_cleanup_old_work, work_fn);
2038         INIT_WORK(&dm_bufio_replacement_work, do_global_cleanup);
2039         queue_delayed_work(dm_bufio_wq, &dm_bufio_cleanup_old_work,
2040                            DM_BUFIO_WORK_TIMER_SECS * HZ);
2041
2042         return 0;
2043 }
2044
2045 /*
2046  * This is called once when unloading the dm_bufio module.
2047  */
2048 static void __exit dm_bufio_exit(void)
2049 {
2050         int bug = 0;
2051
2052         cancel_delayed_work_sync(&dm_bufio_cleanup_old_work);
2053         flush_workqueue(dm_bufio_wq);
2054         destroy_workqueue(dm_bufio_wq);
2055
2056         if (dm_bufio_client_count) {
2057                 DMCRIT("%s: dm_bufio_client_count leaked: %d",
2058                         __func__, dm_bufio_client_count);
2059                 bug = 1;
2060         }
2061
2062         if (dm_bufio_current_allocated) {
2063                 DMCRIT("%s: dm_bufio_current_allocated leaked: %lu",
2064                         __func__, dm_bufio_current_allocated);
2065                 bug = 1;
2066         }
2067
2068         if (dm_bufio_allocated_get_free_pages) {
2069                 DMCRIT("%s: dm_bufio_allocated_get_free_pages leaked: %lu",
2070                        __func__, dm_bufio_allocated_get_free_pages);
2071                 bug = 1;
2072         }
2073
2074         if (dm_bufio_allocated_vmalloc) {
2075                 DMCRIT("%s: dm_bufio_vmalloc leaked: %lu",
2076                        __func__, dm_bufio_allocated_vmalloc);
2077                 bug = 1;
2078         }
2079
2080         BUG_ON(bug);
2081 }
2082
2083 module_init(dm_bufio_init)
2084 module_exit(dm_bufio_exit)
2085
2086 module_param_named(max_cache_size_bytes, dm_bufio_cache_size, ulong, S_IRUGO | S_IWUSR);
2087 MODULE_PARM_DESC(max_cache_size_bytes, "Size of metadata cache");
2088
2089 module_param_named(max_age_seconds, dm_bufio_max_age, uint, S_IRUGO | S_IWUSR);
2090 MODULE_PARM_DESC(max_age_seconds, "Max age of a buffer in seconds");
2091
2092 module_param_named(retain_bytes, dm_bufio_retain_bytes, ulong, S_IRUGO | S_IWUSR);
2093 MODULE_PARM_DESC(retain_bytes, "Try to keep at least this many bytes cached in memory");
2094
2095 module_param_named(peak_allocated_bytes, dm_bufio_peak_allocated, ulong, S_IRUGO | S_IWUSR);
2096 MODULE_PARM_DESC(peak_allocated_bytes, "Tracks the maximum allocated memory");
2097
2098 module_param_named(allocated_kmem_cache_bytes, dm_bufio_allocated_kmem_cache, ulong, S_IRUGO);
2099 MODULE_PARM_DESC(allocated_kmem_cache_bytes, "Memory allocated with kmem_cache_alloc");
2100
2101 module_param_named(allocated_get_free_pages_bytes, dm_bufio_allocated_get_free_pages, ulong, S_IRUGO);
2102 MODULE_PARM_DESC(allocated_get_free_pages_bytes, "Memory allocated with get_free_pages");
2103
2104 module_param_named(allocated_vmalloc_bytes, dm_bufio_allocated_vmalloc, ulong, S_IRUGO);
2105 MODULE_PARM_DESC(allocated_vmalloc_bytes, "Memory allocated with vmalloc");
2106
2107 module_param_named(current_allocated_bytes, dm_bufio_current_allocated, ulong, S_IRUGO);
2108 MODULE_PARM_DESC(current_allocated_bytes, "Memory currently used by the cache");
2109
2110 MODULE_AUTHOR("Mikulas Patocka <dm-devel@redhat.com>");
2111 MODULE_DESCRIPTION(DM_NAME " buffered I/O library");
2112 MODULE_LICENSE("GPL");