a356275605b104bee0ecb9c8b1cf991a20381a69
[linux-2.6-microblaze.git] / drivers / block / zram / zram_drv.c
1 /*
2  * Compressed RAM block device
3  *
4  * Copyright (C) 2008, 2009, 2010  Nitin Gupta
5  *               2012, 2013 Minchan Kim
6  *
7  * This code is released using a dual license strategy: BSD/GPL
8  * You can choose the licence that better fits your requirements.
9  *
10  * Released under the terms of 3-clause BSD License
11  * Released under the terms of GNU General Public License Version 2.0
12  *
13  */
14
15 #define KMSG_COMPONENT "zram"
16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17
18 #include <linux/module.h>
19 #include <linux/kernel.h>
20 #include <linux/bio.h>
21 #include <linux/bitops.h>
22 #include <linux/blkdev.h>
23 #include <linux/buffer_head.h>
24 #include <linux/device.h>
25 #include <linux/genhd.h>
26 #include <linux/highmem.h>
27 #include <linux/slab.h>
28 #include <linux/backing-dev.h>
29 #include <linux/string.h>
30 #include <linux/vmalloc.h>
31 #include <linux/err.h>
32 #include <linux/idr.h>
33 #include <linux/sysfs.h>
34 #include <linux/debugfs.h>
35 #include <linux/cpuhotplug.h>
36 #include <linux/part_stat.h>
37
38 #include "zram_drv.h"
39
40 static DEFINE_IDR(zram_index_idr);
41 /* idr index must be protected */
42 static DEFINE_MUTEX(zram_index_mutex);
43
44 static int zram_major;
45 static const char *default_compressor = "lzo-rle";
46
47 /* Module params (documentation at end) */
48 static unsigned int num_devices = 1;
49 /*
50  * Pages that compress to sizes equals or greater than this are stored
51  * uncompressed in memory.
52  */
53 static size_t huge_class_size;
54
55 static void zram_free_page(struct zram *zram, size_t index);
56 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
57                                 u32 index, int offset, struct bio *bio);
58
59
60 static int zram_slot_trylock(struct zram *zram, u32 index)
61 {
62         return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags);
63 }
64
65 static void zram_slot_lock(struct zram *zram, u32 index)
66 {
67         bit_spin_lock(ZRAM_LOCK, &zram->table[index].flags);
68 }
69
70 static void zram_slot_unlock(struct zram *zram, u32 index)
71 {
72         bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
73 }
74
75 static inline bool init_done(struct zram *zram)
76 {
77         return zram->disksize;
78 }
79
80 static inline struct zram *dev_to_zram(struct device *dev)
81 {
82         return (struct zram *)dev_to_disk(dev)->private_data;
83 }
84
85 static unsigned long zram_get_handle(struct zram *zram, u32 index)
86 {
87         return zram->table[index].handle;
88 }
89
90 static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle)
91 {
92         zram->table[index].handle = handle;
93 }
94
95 /* flag operations require table entry bit_spin_lock() being held */
96 static bool zram_test_flag(struct zram *zram, u32 index,
97                         enum zram_pageflags flag)
98 {
99         return zram->table[index].flags & BIT(flag);
100 }
101
102 static void zram_set_flag(struct zram *zram, u32 index,
103                         enum zram_pageflags flag)
104 {
105         zram->table[index].flags |= BIT(flag);
106 }
107
108 static void zram_clear_flag(struct zram *zram, u32 index,
109                         enum zram_pageflags flag)
110 {
111         zram->table[index].flags &= ~BIT(flag);
112 }
113
114 static inline void zram_set_element(struct zram *zram, u32 index,
115                         unsigned long element)
116 {
117         zram->table[index].element = element;
118 }
119
120 static unsigned long zram_get_element(struct zram *zram, u32 index)
121 {
122         return zram->table[index].element;
123 }
124
125 static size_t zram_get_obj_size(struct zram *zram, u32 index)
126 {
127         return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1);
128 }
129
130 static void zram_set_obj_size(struct zram *zram,
131                                         u32 index, size_t size)
132 {
133         unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT;
134
135         zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size;
136 }
137
138 static inline bool zram_allocated(struct zram *zram, u32 index)
139 {
140         return zram_get_obj_size(zram, index) ||
141                         zram_test_flag(zram, index, ZRAM_SAME) ||
142                         zram_test_flag(zram, index, ZRAM_WB);
143 }
144
145 #if PAGE_SIZE != 4096
146 static inline bool is_partial_io(struct bio_vec *bvec)
147 {
148         return bvec->bv_len != PAGE_SIZE;
149 }
150 #else
151 static inline bool is_partial_io(struct bio_vec *bvec)
152 {
153         return false;
154 }
155 #endif
156
157 /*
158  * Check if request is within bounds and aligned on zram logical blocks.
159  */
160 static inline bool valid_io_request(struct zram *zram,
161                 sector_t start, unsigned int size)
162 {
163         u64 end, bound;
164
165         /* unaligned request */
166         if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
167                 return false;
168         if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
169                 return false;
170
171         end = start + (size >> SECTOR_SHIFT);
172         bound = zram->disksize >> SECTOR_SHIFT;
173         /* out of range range */
174         if (unlikely(start >= bound || end > bound || start > end))
175                 return false;
176
177         /* I/O request is valid */
178         return true;
179 }
180
181 static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
182 {
183         *index  += (*offset + bvec->bv_len) / PAGE_SIZE;
184         *offset = (*offset + bvec->bv_len) % PAGE_SIZE;
185 }
186
187 static inline void update_used_max(struct zram *zram,
188                                         const unsigned long pages)
189 {
190         unsigned long old_max, cur_max;
191
192         old_max = atomic_long_read(&zram->stats.max_used_pages);
193
194         do {
195                 cur_max = old_max;
196                 if (pages > cur_max)
197                         old_max = atomic_long_cmpxchg(
198                                 &zram->stats.max_used_pages, cur_max, pages);
199         } while (old_max != cur_max);
200 }
201
202 static inline void zram_fill_page(void *ptr, unsigned long len,
203                                         unsigned long value)
204 {
205         WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));
206         memset_l(ptr, value, len / sizeof(unsigned long));
207 }
208
209 static bool page_same_filled(void *ptr, unsigned long *element)
210 {
211         unsigned long *page;
212         unsigned long val;
213         unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1;
214
215         page = (unsigned long *)ptr;
216         val = page[0];
217
218         if (val != page[last_pos])
219                 return false;
220
221         for (pos = 1; pos < last_pos; pos++) {
222                 if (val != page[pos])
223                         return false;
224         }
225
226         *element = val;
227
228         return true;
229 }
230
231 static ssize_t initstate_show(struct device *dev,
232                 struct device_attribute *attr, char *buf)
233 {
234         u32 val;
235         struct zram *zram = dev_to_zram(dev);
236
237         down_read(&zram->init_lock);
238         val = init_done(zram);
239         up_read(&zram->init_lock);
240
241         return scnprintf(buf, PAGE_SIZE, "%u\n", val);
242 }
243
244 static ssize_t disksize_show(struct device *dev,
245                 struct device_attribute *attr, char *buf)
246 {
247         struct zram *zram = dev_to_zram(dev);
248
249         return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
250 }
251
252 static ssize_t mem_limit_store(struct device *dev,
253                 struct device_attribute *attr, const char *buf, size_t len)
254 {
255         u64 limit;
256         char *tmp;
257         struct zram *zram = dev_to_zram(dev);
258
259         limit = memparse(buf, &tmp);
260         if (buf == tmp) /* no chars parsed, invalid input */
261                 return -EINVAL;
262
263         down_write(&zram->init_lock);
264         zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
265         up_write(&zram->init_lock);
266
267         return len;
268 }
269
270 static ssize_t mem_used_max_store(struct device *dev,
271                 struct device_attribute *attr, const char *buf, size_t len)
272 {
273         int err;
274         unsigned long val;
275         struct zram *zram = dev_to_zram(dev);
276
277         err = kstrtoul(buf, 10, &val);
278         if (err || val != 0)
279                 return -EINVAL;
280
281         down_read(&zram->init_lock);
282         if (init_done(zram)) {
283                 atomic_long_set(&zram->stats.max_used_pages,
284                                 zs_get_total_pages(zram->mem_pool));
285         }
286         up_read(&zram->init_lock);
287
288         return len;
289 }
290
291 static ssize_t idle_store(struct device *dev,
292                 struct device_attribute *attr, const char *buf, size_t len)
293 {
294         struct zram *zram = dev_to_zram(dev);
295         unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
296         int index;
297
298         if (!sysfs_streq(buf, "all"))
299                 return -EINVAL;
300
301         down_read(&zram->init_lock);
302         if (!init_done(zram)) {
303                 up_read(&zram->init_lock);
304                 return -EINVAL;
305         }
306
307         for (index = 0; index < nr_pages; index++) {
308                 /*
309                  * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
310                  * See the comment in writeback_store.
311                  */
312                 zram_slot_lock(zram, index);
313                 if (zram_allocated(zram, index) &&
314                                 !zram_test_flag(zram, index, ZRAM_UNDER_WB))
315                         zram_set_flag(zram, index, ZRAM_IDLE);
316                 zram_slot_unlock(zram, index);
317         }
318
319         up_read(&zram->init_lock);
320
321         return len;
322 }
323
324 #ifdef CONFIG_ZRAM_WRITEBACK
325 static ssize_t writeback_limit_enable_store(struct device *dev,
326                 struct device_attribute *attr, const char *buf, size_t len)
327 {
328         struct zram *zram = dev_to_zram(dev);
329         u64 val;
330         ssize_t ret = -EINVAL;
331
332         if (kstrtoull(buf, 10, &val))
333                 return ret;
334
335         down_read(&zram->init_lock);
336         spin_lock(&zram->wb_limit_lock);
337         zram->wb_limit_enable = val;
338         spin_unlock(&zram->wb_limit_lock);
339         up_read(&zram->init_lock);
340         ret = len;
341
342         return ret;
343 }
344
345 static ssize_t writeback_limit_enable_show(struct device *dev,
346                 struct device_attribute *attr, char *buf)
347 {
348         bool val;
349         struct zram *zram = dev_to_zram(dev);
350
351         down_read(&zram->init_lock);
352         spin_lock(&zram->wb_limit_lock);
353         val = zram->wb_limit_enable;
354         spin_unlock(&zram->wb_limit_lock);
355         up_read(&zram->init_lock);
356
357         return scnprintf(buf, PAGE_SIZE, "%d\n", val);
358 }
359
360 static ssize_t writeback_limit_store(struct device *dev,
361                 struct device_attribute *attr, const char *buf, size_t len)
362 {
363         struct zram *zram = dev_to_zram(dev);
364         u64 val;
365         ssize_t ret = -EINVAL;
366
367         if (kstrtoull(buf, 10, &val))
368                 return ret;
369
370         down_read(&zram->init_lock);
371         spin_lock(&zram->wb_limit_lock);
372         zram->bd_wb_limit = val;
373         spin_unlock(&zram->wb_limit_lock);
374         up_read(&zram->init_lock);
375         ret = len;
376
377         return ret;
378 }
379
380 static ssize_t writeback_limit_show(struct device *dev,
381                 struct device_attribute *attr, char *buf)
382 {
383         u64 val;
384         struct zram *zram = dev_to_zram(dev);
385
386         down_read(&zram->init_lock);
387         spin_lock(&zram->wb_limit_lock);
388         val = zram->bd_wb_limit;
389         spin_unlock(&zram->wb_limit_lock);
390         up_read(&zram->init_lock);
391
392         return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
393 }
394
395 static void reset_bdev(struct zram *zram)
396 {
397         struct block_device *bdev;
398
399         if (!zram->backing_dev)
400                 return;
401
402         bdev = zram->bdev;
403         if (zram->old_block_size)
404                 set_blocksize(bdev, zram->old_block_size);
405         blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
406         /* hope filp_close flush all of IO */
407         filp_close(zram->backing_dev, NULL);
408         zram->backing_dev = NULL;
409         zram->old_block_size = 0;
410         zram->bdev = NULL;
411         zram->disk->queue->backing_dev_info->capabilities |=
412                                 BDI_CAP_SYNCHRONOUS_IO;
413         kvfree(zram->bitmap);
414         zram->bitmap = NULL;
415 }
416
417 static ssize_t backing_dev_show(struct device *dev,
418                 struct device_attribute *attr, char *buf)
419 {
420         struct file *file;
421         struct zram *zram = dev_to_zram(dev);
422         char *p;
423         ssize_t ret;
424
425         down_read(&zram->init_lock);
426         file = zram->backing_dev;
427         if (!file) {
428                 memcpy(buf, "none\n", 5);
429                 up_read(&zram->init_lock);
430                 return 5;
431         }
432
433         p = file_path(file, buf, PAGE_SIZE - 1);
434         if (IS_ERR(p)) {
435                 ret = PTR_ERR(p);
436                 goto out;
437         }
438
439         ret = strlen(p);
440         memmove(buf, p, ret);
441         buf[ret++] = '\n';
442 out:
443         up_read(&zram->init_lock);
444         return ret;
445 }
446
447 static ssize_t backing_dev_store(struct device *dev,
448                 struct device_attribute *attr, const char *buf, size_t len)
449 {
450         char *file_name;
451         size_t sz;
452         struct file *backing_dev = NULL;
453         struct inode *inode;
454         struct address_space *mapping;
455         unsigned int bitmap_sz, old_block_size = 0;
456         unsigned long nr_pages, *bitmap = NULL;
457         struct block_device *bdev = NULL;
458         int err;
459         struct zram *zram = dev_to_zram(dev);
460
461         file_name = kmalloc(PATH_MAX, GFP_KERNEL);
462         if (!file_name)
463                 return -ENOMEM;
464
465         down_write(&zram->init_lock);
466         if (init_done(zram)) {
467                 pr_info("Can't setup backing device for initialized device\n");
468                 err = -EBUSY;
469                 goto out;
470         }
471
472         strlcpy(file_name, buf, PATH_MAX);
473         /* ignore trailing newline */
474         sz = strlen(file_name);
475         if (sz > 0 && file_name[sz - 1] == '\n')
476                 file_name[sz - 1] = 0x00;
477
478         backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0);
479         if (IS_ERR(backing_dev)) {
480                 err = PTR_ERR(backing_dev);
481                 backing_dev = NULL;
482                 goto out;
483         }
484
485         mapping = backing_dev->f_mapping;
486         inode = mapping->host;
487
488         /* Support only block device in this moment */
489         if (!S_ISBLK(inode->i_mode)) {
490                 err = -ENOTBLK;
491                 goto out;
492         }
493
494         bdev = bdgrab(I_BDEV(inode));
495         err = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram);
496         if (err < 0) {
497                 bdev = NULL;
498                 goto out;
499         }
500
501         nr_pages = i_size_read(inode) >> PAGE_SHIFT;
502         bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long);
503         bitmap = kvzalloc(bitmap_sz, GFP_KERNEL);
504         if (!bitmap) {
505                 err = -ENOMEM;
506                 goto out;
507         }
508
509         old_block_size = block_size(bdev);
510         err = set_blocksize(bdev, PAGE_SIZE);
511         if (err)
512                 goto out;
513
514         reset_bdev(zram);
515
516         zram->old_block_size = old_block_size;
517         zram->bdev = bdev;
518         zram->backing_dev = backing_dev;
519         zram->bitmap = bitmap;
520         zram->nr_pages = nr_pages;
521         /*
522          * With writeback feature, zram does asynchronous IO so it's no longer
523          * synchronous device so let's remove synchronous io flag. Othewise,
524          * upper layer(e.g., swap) could wait IO completion rather than
525          * (submit and return), which will cause system sluggish.
526          * Furthermore, when the IO function returns(e.g., swap_readpage),
527          * upper layer expects IO was done so it could deallocate the page
528          * freely but in fact, IO is going on so finally could cause
529          * use-after-free when the IO is really done.
530          */
531         zram->disk->queue->backing_dev_info->capabilities &=
532                         ~BDI_CAP_SYNCHRONOUS_IO;
533         up_write(&zram->init_lock);
534
535         pr_info("setup backing device %s\n", file_name);
536         kfree(file_name);
537
538         return len;
539 out:
540         if (bitmap)
541                 kvfree(bitmap);
542
543         if (bdev)
544                 blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
545
546         if (backing_dev)
547                 filp_close(backing_dev, NULL);
548
549         up_write(&zram->init_lock);
550
551         kfree(file_name);
552
553         return err;
554 }
555
556 static unsigned long alloc_block_bdev(struct zram *zram)
557 {
558         unsigned long blk_idx = 1;
559 retry:
560         /* skip 0 bit to confuse zram.handle = 0 */
561         blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, blk_idx);
562         if (blk_idx == zram->nr_pages)
563                 return 0;
564
565         if (test_and_set_bit(blk_idx, zram->bitmap))
566                 goto retry;
567
568         atomic64_inc(&zram->stats.bd_count);
569         return blk_idx;
570 }
571
572 static void free_block_bdev(struct zram *zram, unsigned long blk_idx)
573 {
574         int was_set;
575
576         was_set = test_and_clear_bit(blk_idx, zram->bitmap);
577         WARN_ON_ONCE(!was_set);
578         atomic64_dec(&zram->stats.bd_count);
579 }
580
581 static void zram_page_end_io(struct bio *bio)
582 {
583         struct page *page = bio_first_page_all(bio);
584
585         page_endio(page, op_is_write(bio_op(bio)),
586                         blk_status_to_errno(bio->bi_status));
587         bio_put(bio);
588 }
589
590 /*
591  * Returns 1 if the submission is successful.
592  */
593 static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
594                         unsigned long entry, struct bio *parent)
595 {
596         struct bio *bio;
597
598         bio = bio_alloc(GFP_ATOMIC, 1);
599         if (!bio)
600                 return -ENOMEM;
601
602         bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
603         bio_set_dev(bio, zram->bdev);
604         if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) {
605                 bio_put(bio);
606                 return -EIO;
607         }
608
609         if (!parent) {
610                 bio->bi_opf = REQ_OP_READ;
611                 bio->bi_end_io = zram_page_end_io;
612         } else {
613                 bio->bi_opf = parent->bi_opf;
614                 bio_chain(bio, parent);
615         }
616
617         submit_bio(bio);
618         return 1;
619 }
620
621 #define HUGE_WRITEBACK 1
622 #define IDLE_WRITEBACK 2
623
624 static ssize_t writeback_store(struct device *dev,
625                 struct device_attribute *attr, const char *buf, size_t len)
626 {
627         struct zram *zram = dev_to_zram(dev);
628         unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
629         unsigned long index;
630         struct bio bio;
631         struct bio_vec bio_vec;
632         struct page *page;
633         ssize_t ret = len;
634         int mode;
635         unsigned long blk_idx = 0;
636
637         if (sysfs_streq(buf, "idle"))
638                 mode = IDLE_WRITEBACK;
639         else if (sysfs_streq(buf, "huge"))
640                 mode = HUGE_WRITEBACK;
641         else
642                 return -EINVAL;
643
644         down_read(&zram->init_lock);
645         if (!init_done(zram)) {
646                 ret = -EINVAL;
647                 goto release_init_lock;
648         }
649
650         if (!zram->backing_dev) {
651                 ret = -ENODEV;
652                 goto release_init_lock;
653         }
654
655         page = alloc_page(GFP_KERNEL);
656         if (!page) {
657                 ret = -ENOMEM;
658                 goto release_init_lock;
659         }
660
661         for (index = 0; index < nr_pages; index++) {
662                 struct bio_vec bvec;
663
664                 bvec.bv_page = page;
665                 bvec.bv_len = PAGE_SIZE;
666                 bvec.bv_offset = 0;
667
668                 spin_lock(&zram->wb_limit_lock);
669                 if (zram->wb_limit_enable && !zram->bd_wb_limit) {
670                         spin_unlock(&zram->wb_limit_lock);
671                         ret = -EIO;
672                         break;
673                 }
674                 spin_unlock(&zram->wb_limit_lock);
675
676                 if (!blk_idx) {
677                         blk_idx = alloc_block_bdev(zram);
678                         if (!blk_idx) {
679                                 ret = -ENOSPC;
680                                 break;
681                         }
682                 }
683
684                 zram_slot_lock(zram, index);
685                 if (!zram_allocated(zram, index))
686                         goto next;
687
688                 if (zram_test_flag(zram, index, ZRAM_WB) ||
689                                 zram_test_flag(zram, index, ZRAM_SAME) ||
690                                 zram_test_flag(zram, index, ZRAM_UNDER_WB))
691                         goto next;
692
693                 if (mode == IDLE_WRITEBACK &&
694                           !zram_test_flag(zram, index, ZRAM_IDLE))
695                         goto next;
696                 if (mode == HUGE_WRITEBACK &&
697                           !zram_test_flag(zram, index, ZRAM_HUGE))
698                         goto next;
699                 /*
700                  * Clearing ZRAM_UNDER_WB is duty of caller.
701                  * IOW, zram_free_page never clear it.
702                  */
703                 zram_set_flag(zram, index, ZRAM_UNDER_WB);
704                 /* Need for hugepage writeback racing */
705                 zram_set_flag(zram, index, ZRAM_IDLE);
706                 zram_slot_unlock(zram, index);
707                 if (zram_bvec_read(zram, &bvec, index, 0, NULL)) {
708                         zram_slot_lock(zram, index);
709                         zram_clear_flag(zram, index, ZRAM_UNDER_WB);
710                         zram_clear_flag(zram, index, ZRAM_IDLE);
711                         zram_slot_unlock(zram, index);
712                         continue;
713                 }
714
715                 bio_init(&bio, &bio_vec, 1);
716                 bio_set_dev(&bio, zram->bdev);
717                 bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9);
718                 bio.bi_opf = REQ_OP_WRITE | REQ_SYNC;
719
720                 bio_add_page(&bio, bvec.bv_page, bvec.bv_len,
721                                 bvec.bv_offset);
722                 /*
723                  * XXX: A single page IO would be inefficient for write
724                  * but it would be not bad as starter.
725                  */
726                 ret = submit_bio_wait(&bio);
727                 if (ret) {
728                         zram_slot_lock(zram, index);
729                         zram_clear_flag(zram, index, ZRAM_UNDER_WB);
730                         zram_clear_flag(zram, index, ZRAM_IDLE);
731                         zram_slot_unlock(zram, index);
732                         continue;
733                 }
734
735                 atomic64_inc(&zram->stats.bd_writes);
736                 /*
737                  * We released zram_slot_lock so need to check if the slot was
738                  * changed. If there is freeing for the slot, we can catch it
739                  * easily by zram_allocated.
740                  * A subtle case is the slot is freed/reallocated/marked as
741                  * ZRAM_IDLE again. To close the race, idle_store doesn't
742                  * mark ZRAM_IDLE once it found the slot was ZRAM_UNDER_WB.
743                  * Thus, we could close the race by checking ZRAM_IDLE bit.
744                  */
745                 zram_slot_lock(zram, index);
746                 if (!zram_allocated(zram, index) ||
747                           !zram_test_flag(zram, index, ZRAM_IDLE)) {
748                         zram_clear_flag(zram, index, ZRAM_UNDER_WB);
749                         zram_clear_flag(zram, index, ZRAM_IDLE);
750                         goto next;
751                 }
752
753                 zram_free_page(zram, index);
754                 zram_clear_flag(zram, index, ZRAM_UNDER_WB);
755                 zram_set_flag(zram, index, ZRAM_WB);
756                 zram_set_element(zram, index, blk_idx);
757                 blk_idx = 0;
758                 atomic64_inc(&zram->stats.pages_stored);
759                 spin_lock(&zram->wb_limit_lock);
760                 if (zram->wb_limit_enable && zram->bd_wb_limit > 0)
761                         zram->bd_wb_limit -=  1UL << (PAGE_SHIFT - 12);
762                 spin_unlock(&zram->wb_limit_lock);
763 next:
764                 zram_slot_unlock(zram, index);
765         }
766
767         if (blk_idx)
768                 free_block_bdev(zram, blk_idx);
769         __free_page(page);
770 release_init_lock:
771         up_read(&zram->init_lock);
772
773         return ret;
774 }
775
776 struct zram_work {
777         struct work_struct work;
778         struct zram *zram;
779         unsigned long entry;
780         struct bio *bio;
781         struct bio_vec bvec;
782 };
783
784 #if PAGE_SIZE != 4096
785 static void zram_sync_read(struct work_struct *work)
786 {
787         struct zram_work *zw = container_of(work, struct zram_work, work);
788         struct zram *zram = zw->zram;
789         unsigned long entry = zw->entry;
790         struct bio *bio = zw->bio;
791
792         read_from_bdev_async(zram, &zw->bvec, entry, bio);
793 }
794
795 /*
796  * Block layer want one ->submit_bio to be active at a time, so if we use
797  * chained IO with parent IO in same context, it's a deadlock. To avoid that,
798  * use a worker thread context.
799  */
800 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
801                                 unsigned long entry, struct bio *bio)
802 {
803         struct zram_work work;
804
805         work.bvec = *bvec;
806         work.zram = zram;
807         work.entry = entry;
808         work.bio = bio;
809
810         INIT_WORK_ONSTACK(&work.work, zram_sync_read);
811         queue_work(system_unbound_wq, &work.work);
812         flush_work(&work.work);
813         destroy_work_on_stack(&work.work);
814
815         return 1;
816 }
817 #else
818 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
819                                 unsigned long entry, struct bio *bio)
820 {
821         WARN_ON(1);
822         return -EIO;
823 }
824 #endif
825
826 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
827                         unsigned long entry, struct bio *parent, bool sync)
828 {
829         atomic64_inc(&zram->stats.bd_reads);
830         if (sync)
831                 return read_from_bdev_sync(zram, bvec, entry, parent);
832         else
833                 return read_from_bdev_async(zram, bvec, entry, parent);
834 }
835 #else
836 static inline void reset_bdev(struct zram *zram) {};
837 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
838                         unsigned long entry, struct bio *parent, bool sync)
839 {
840         return -EIO;
841 }
842
843 static void free_block_bdev(struct zram *zram, unsigned long blk_idx) {};
844 #endif
845
846 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
847
848 static struct dentry *zram_debugfs_root;
849
850 static void zram_debugfs_create(void)
851 {
852         zram_debugfs_root = debugfs_create_dir("zram", NULL);
853 }
854
855 static void zram_debugfs_destroy(void)
856 {
857         debugfs_remove_recursive(zram_debugfs_root);
858 }
859
860 static void zram_accessed(struct zram *zram, u32 index)
861 {
862         zram_clear_flag(zram, index, ZRAM_IDLE);
863         zram->table[index].ac_time = ktime_get_boottime();
864 }
865
866 static ssize_t read_block_state(struct file *file, char __user *buf,
867                                 size_t count, loff_t *ppos)
868 {
869         char *kbuf;
870         ssize_t index, written = 0;
871         struct zram *zram = file->private_data;
872         unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
873         struct timespec64 ts;
874
875         kbuf = kvmalloc(count, GFP_KERNEL);
876         if (!kbuf)
877                 return -ENOMEM;
878
879         down_read(&zram->init_lock);
880         if (!init_done(zram)) {
881                 up_read(&zram->init_lock);
882                 kvfree(kbuf);
883                 return -EINVAL;
884         }
885
886         for (index = *ppos; index < nr_pages; index++) {
887                 int copied;
888
889                 zram_slot_lock(zram, index);
890                 if (!zram_allocated(zram, index))
891                         goto next;
892
893                 ts = ktime_to_timespec64(zram->table[index].ac_time);
894                 copied = snprintf(kbuf + written, count,
895                         "%12zd %12lld.%06lu %c%c%c%c\n",
896                         index, (s64)ts.tv_sec,
897                         ts.tv_nsec / NSEC_PER_USEC,
898                         zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.',
899                         zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.',
900                         zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.',
901                         zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.');
902
903                 if (count < copied) {
904                         zram_slot_unlock(zram, index);
905                         break;
906                 }
907                 written += copied;
908                 count -= copied;
909 next:
910                 zram_slot_unlock(zram, index);
911                 *ppos += 1;
912         }
913
914         up_read(&zram->init_lock);
915         if (copy_to_user(buf, kbuf, written))
916                 written = -EFAULT;
917         kvfree(kbuf);
918
919         return written;
920 }
921
922 static const struct file_operations proc_zram_block_state_op = {
923         .open = simple_open,
924         .read = read_block_state,
925         .llseek = default_llseek,
926 };
927
928 static void zram_debugfs_register(struct zram *zram)
929 {
930         if (!zram_debugfs_root)
931                 return;
932
933         zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name,
934                                                 zram_debugfs_root);
935         debugfs_create_file("block_state", 0400, zram->debugfs_dir,
936                                 zram, &proc_zram_block_state_op);
937 }
938
939 static void zram_debugfs_unregister(struct zram *zram)
940 {
941         debugfs_remove_recursive(zram->debugfs_dir);
942 }
943 #else
944 static void zram_debugfs_create(void) {};
945 static void zram_debugfs_destroy(void) {};
946 static void zram_accessed(struct zram *zram, u32 index)
947 {
948         zram_clear_flag(zram, index, ZRAM_IDLE);
949 };
950 static void zram_debugfs_register(struct zram *zram) {};
951 static void zram_debugfs_unregister(struct zram *zram) {};
952 #endif
953
954 /*
955  * We switched to per-cpu streams and this attr is not needed anymore.
956  * However, we will keep it around for some time, because:
957  * a) we may revert per-cpu streams in the future
958  * b) it's visible to user space and we need to follow our 2 years
959  *    retirement rule; but we already have a number of 'soon to be
960  *    altered' attrs, so max_comp_streams need to wait for the next
961  *    layoff cycle.
962  */
963 static ssize_t max_comp_streams_show(struct device *dev,
964                 struct device_attribute *attr, char *buf)
965 {
966         return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus());
967 }
968
969 static ssize_t max_comp_streams_store(struct device *dev,
970                 struct device_attribute *attr, const char *buf, size_t len)
971 {
972         return len;
973 }
974
975 static ssize_t comp_algorithm_show(struct device *dev,
976                 struct device_attribute *attr, char *buf)
977 {
978         size_t sz;
979         struct zram *zram = dev_to_zram(dev);
980
981         down_read(&zram->init_lock);
982         sz = zcomp_available_show(zram->compressor, buf);
983         up_read(&zram->init_lock);
984
985         return sz;
986 }
987
988 static ssize_t comp_algorithm_store(struct device *dev,
989                 struct device_attribute *attr, const char *buf, size_t len)
990 {
991         struct zram *zram = dev_to_zram(dev);
992         char compressor[ARRAY_SIZE(zram->compressor)];
993         size_t sz;
994
995         strlcpy(compressor, buf, sizeof(compressor));
996         /* ignore trailing newline */
997         sz = strlen(compressor);
998         if (sz > 0 && compressor[sz - 1] == '\n')
999                 compressor[sz - 1] = 0x00;
1000
1001         if (!zcomp_available_algorithm(compressor))
1002                 return -EINVAL;
1003
1004         down_write(&zram->init_lock);
1005         if (init_done(zram)) {
1006                 up_write(&zram->init_lock);
1007                 pr_info("Can't change algorithm for initialized device\n");
1008                 return -EBUSY;
1009         }
1010
1011         strcpy(zram->compressor, compressor);
1012         up_write(&zram->init_lock);
1013         return len;
1014 }
1015
1016 static ssize_t compact_store(struct device *dev,
1017                 struct device_attribute *attr, const char *buf, size_t len)
1018 {
1019         struct zram *zram = dev_to_zram(dev);
1020
1021         down_read(&zram->init_lock);
1022         if (!init_done(zram)) {
1023                 up_read(&zram->init_lock);
1024                 return -EINVAL;
1025         }
1026
1027         zs_compact(zram->mem_pool);
1028         up_read(&zram->init_lock);
1029
1030         return len;
1031 }
1032
1033 static ssize_t io_stat_show(struct device *dev,
1034                 struct device_attribute *attr, char *buf)
1035 {
1036         struct zram *zram = dev_to_zram(dev);
1037         ssize_t ret;
1038
1039         down_read(&zram->init_lock);
1040         ret = scnprintf(buf, PAGE_SIZE,
1041                         "%8llu %8llu %8llu %8llu\n",
1042                         (u64)atomic64_read(&zram->stats.failed_reads),
1043                         (u64)atomic64_read(&zram->stats.failed_writes),
1044                         (u64)atomic64_read(&zram->stats.invalid_io),
1045                         (u64)atomic64_read(&zram->stats.notify_free));
1046         up_read(&zram->init_lock);
1047
1048         return ret;
1049 }
1050
1051 static ssize_t mm_stat_show(struct device *dev,
1052                 struct device_attribute *attr, char *buf)
1053 {
1054         struct zram *zram = dev_to_zram(dev);
1055         struct zs_pool_stats pool_stats;
1056         u64 orig_size, mem_used = 0;
1057         long max_used;
1058         ssize_t ret;
1059
1060         memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));
1061
1062         down_read(&zram->init_lock);
1063         if (init_done(zram)) {
1064                 mem_used = zs_get_total_pages(zram->mem_pool);
1065                 zs_pool_stats(zram->mem_pool, &pool_stats);
1066         }
1067
1068         orig_size = atomic64_read(&zram->stats.pages_stored);
1069         max_used = atomic_long_read(&zram->stats.max_used_pages);
1070
1071         ret = scnprintf(buf, PAGE_SIZE,
1072                         "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu\n",
1073                         orig_size << PAGE_SHIFT,
1074                         (u64)atomic64_read(&zram->stats.compr_data_size),
1075                         mem_used << PAGE_SHIFT,
1076                         zram->limit_pages << PAGE_SHIFT,
1077                         max_used << PAGE_SHIFT,
1078                         (u64)atomic64_read(&zram->stats.same_pages),
1079                         pool_stats.pages_compacted,
1080                         (u64)atomic64_read(&zram->stats.huge_pages));
1081         up_read(&zram->init_lock);
1082
1083         return ret;
1084 }
1085
1086 #ifdef CONFIG_ZRAM_WRITEBACK
1087 #define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12)))
1088 static ssize_t bd_stat_show(struct device *dev,
1089                 struct device_attribute *attr, char *buf)
1090 {
1091         struct zram *zram = dev_to_zram(dev);
1092         ssize_t ret;
1093
1094         down_read(&zram->init_lock);
1095         ret = scnprintf(buf, PAGE_SIZE,
1096                 "%8llu %8llu %8llu\n",
1097                         FOUR_K((u64)atomic64_read(&zram->stats.bd_count)),
1098                         FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)),
1099                         FOUR_K((u64)atomic64_read(&zram->stats.bd_writes)));
1100         up_read(&zram->init_lock);
1101
1102         return ret;
1103 }
1104 #endif
1105
1106 static ssize_t debug_stat_show(struct device *dev,
1107                 struct device_attribute *attr, char *buf)
1108 {
1109         int version = 1;
1110         struct zram *zram = dev_to_zram(dev);
1111         ssize_t ret;
1112
1113         down_read(&zram->init_lock);
1114         ret = scnprintf(buf, PAGE_SIZE,
1115                         "version: %d\n%8llu %8llu\n",
1116                         version,
1117                         (u64)atomic64_read(&zram->stats.writestall),
1118                         (u64)atomic64_read(&zram->stats.miss_free));
1119         up_read(&zram->init_lock);
1120
1121         return ret;
1122 }
1123
1124 static DEVICE_ATTR_RO(io_stat);
1125 static DEVICE_ATTR_RO(mm_stat);
1126 #ifdef CONFIG_ZRAM_WRITEBACK
1127 static DEVICE_ATTR_RO(bd_stat);
1128 #endif
1129 static DEVICE_ATTR_RO(debug_stat);
1130
1131 static void zram_meta_free(struct zram *zram, u64 disksize)
1132 {
1133         size_t num_pages = disksize >> PAGE_SHIFT;
1134         size_t index;
1135
1136         /* Free all pages that are still in this zram device */
1137         for (index = 0; index < num_pages; index++)
1138                 zram_free_page(zram, index);
1139
1140         zs_destroy_pool(zram->mem_pool);
1141         vfree(zram->table);
1142 }
1143
1144 static bool zram_meta_alloc(struct zram *zram, u64 disksize)
1145 {
1146         size_t num_pages;
1147
1148         num_pages = disksize >> PAGE_SHIFT;
1149         zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table)));
1150         if (!zram->table)
1151                 return false;
1152
1153         zram->mem_pool = zs_create_pool(zram->disk->disk_name);
1154         if (!zram->mem_pool) {
1155                 vfree(zram->table);
1156                 return false;
1157         }
1158
1159         if (!huge_class_size)
1160                 huge_class_size = zs_huge_class_size(zram->mem_pool);
1161         return true;
1162 }
1163
1164 /*
1165  * To protect concurrent access to the same index entry,
1166  * caller should hold this table index entry's bit_spinlock to
1167  * indicate this index entry is accessing.
1168  */
1169 static void zram_free_page(struct zram *zram, size_t index)
1170 {
1171         unsigned long handle;
1172
1173 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
1174         zram->table[index].ac_time = 0;
1175 #endif
1176         if (zram_test_flag(zram, index, ZRAM_IDLE))
1177                 zram_clear_flag(zram, index, ZRAM_IDLE);
1178
1179         if (zram_test_flag(zram, index, ZRAM_HUGE)) {
1180                 zram_clear_flag(zram, index, ZRAM_HUGE);
1181                 atomic64_dec(&zram->stats.huge_pages);
1182         }
1183
1184         if (zram_test_flag(zram, index, ZRAM_WB)) {
1185                 zram_clear_flag(zram, index, ZRAM_WB);
1186                 free_block_bdev(zram, zram_get_element(zram, index));
1187                 goto out;
1188         }
1189
1190         /*
1191          * No memory is allocated for same element filled pages.
1192          * Simply clear same page flag.
1193          */
1194         if (zram_test_flag(zram, index, ZRAM_SAME)) {
1195                 zram_clear_flag(zram, index, ZRAM_SAME);
1196                 atomic64_dec(&zram->stats.same_pages);
1197                 goto out;
1198         }
1199
1200         handle = zram_get_handle(zram, index);
1201         if (!handle)
1202                 return;
1203
1204         zs_free(zram->mem_pool, handle);
1205
1206         atomic64_sub(zram_get_obj_size(zram, index),
1207                         &zram->stats.compr_data_size);
1208 out:
1209         atomic64_dec(&zram->stats.pages_stored);
1210         zram_set_handle(zram, index, 0);
1211         zram_set_obj_size(zram, index, 0);
1212         WARN_ON_ONCE(zram->table[index].flags &
1213                 ~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB));
1214 }
1215
1216 static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
1217                                 struct bio *bio, bool partial_io)
1218 {
1219         int ret;
1220         unsigned long handle;
1221         unsigned int size;
1222         void *src, *dst;
1223
1224         zram_slot_lock(zram, index);
1225         if (zram_test_flag(zram, index, ZRAM_WB)) {
1226                 struct bio_vec bvec;
1227
1228                 zram_slot_unlock(zram, index);
1229
1230                 bvec.bv_page = page;
1231                 bvec.bv_len = PAGE_SIZE;
1232                 bvec.bv_offset = 0;
1233                 return read_from_bdev(zram, &bvec,
1234                                 zram_get_element(zram, index),
1235                                 bio, partial_io);
1236         }
1237
1238         handle = zram_get_handle(zram, index);
1239         if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) {
1240                 unsigned long value;
1241                 void *mem;
1242
1243                 value = handle ? zram_get_element(zram, index) : 0;
1244                 mem = kmap_atomic(page);
1245                 zram_fill_page(mem, PAGE_SIZE, value);
1246                 kunmap_atomic(mem);
1247                 zram_slot_unlock(zram, index);
1248                 return 0;
1249         }
1250
1251         size = zram_get_obj_size(zram, index);
1252
1253         src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
1254         if (size == PAGE_SIZE) {
1255                 dst = kmap_atomic(page);
1256                 memcpy(dst, src, PAGE_SIZE);
1257                 kunmap_atomic(dst);
1258                 ret = 0;
1259         } else {
1260                 struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);
1261
1262                 dst = kmap_atomic(page);
1263                 ret = zcomp_decompress(zstrm, src, size, dst);
1264                 kunmap_atomic(dst);
1265                 zcomp_stream_put(zram->comp);
1266         }
1267         zs_unmap_object(zram->mem_pool, handle);
1268         zram_slot_unlock(zram, index);
1269
1270         /* Should NEVER happen. Return bio error if it does. */
1271         if (unlikely(ret))
1272                 pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
1273
1274         return ret;
1275 }
1276
1277 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
1278                                 u32 index, int offset, struct bio *bio)
1279 {
1280         int ret;
1281         struct page *page;
1282
1283         page = bvec->bv_page;
1284         if (is_partial_io(bvec)) {
1285                 /* Use a temporary buffer to decompress the page */
1286                 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
1287                 if (!page)
1288                         return -ENOMEM;
1289         }
1290
1291         ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec));
1292         if (unlikely(ret))
1293                 goto out;
1294
1295         if (is_partial_io(bvec)) {
1296                 void *dst = kmap_atomic(bvec->bv_page);
1297                 void *src = kmap_atomic(page);
1298
1299                 memcpy(dst + bvec->bv_offset, src + offset, bvec->bv_len);
1300                 kunmap_atomic(src);
1301                 kunmap_atomic(dst);
1302         }
1303 out:
1304         if (is_partial_io(bvec))
1305                 __free_page(page);
1306
1307         return ret;
1308 }
1309
1310 static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
1311                                 u32 index, struct bio *bio)
1312 {
1313         int ret = 0;
1314         unsigned long alloced_pages;
1315         unsigned long handle = 0;
1316         unsigned int comp_len = 0;
1317         void *src, *dst, *mem;
1318         struct zcomp_strm *zstrm;
1319         struct page *page = bvec->bv_page;
1320         unsigned long element = 0;
1321         enum zram_pageflags flags = 0;
1322
1323         mem = kmap_atomic(page);
1324         if (page_same_filled(mem, &element)) {
1325                 kunmap_atomic(mem);
1326                 /* Free memory associated with this sector now. */
1327                 flags = ZRAM_SAME;
1328                 atomic64_inc(&zram->stats.same_pages);
1329                 goto out;
1330         }
1331         kunmap_atomic(mem);
1332
1333 compress_again:
1334         zstrm = zcomp_stream_get(zram->comp);
1335         src = kmap_atomic(page);
1336         ret = zcomp_compress(zstrm, src, &comp_len);
1337         kunmap_atomic(src);
1338
1339         if (unlikely(ret)) {
1340                 zcomp_stream_put(zram->comp);
1341                 pr_err("Compression failed! err=%d\n", ret);
1342                 zs_free(zram->mem_pool, handle);
1343                 return ret;
1344         }
1345
1346         if (comp_len >= huge_class_size)
1347                 comp_len = PAGE_SIZE;
1348         /*
1349          * handle allocation has 2 paths:
1350          * a) fast path is executed with preemption disabled (for
1351          *  per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
1352          *  since we can't sleep;
1353          * b) slow path enables preemption and attempts to allocate
1354          *  the page with __GFP_DIRECT_RECLAIM bit set. we have to
1355          *  put per-cpu compression stream and, thus, to re-do
1356          *  the compression once handle is allocated.
1357          *
1358          * if we have a 'non-null' handle here then we are coming
1359          * from the slow path and handle has already been allocated.
1360          */
1361         if (!handle)
1362                 handle = zs_malloc(zram->mem_pool, comp_len,
1363                                 __GFP_KSWAPD_RECLAIM |
1364                                 __GFP_NOWARN |
1365                                 __GFP_HIGHMEM |
1366                                 __GFP_MOVABLE);
1367         if (!handle) {
1368                 zcomp_stream_put(zram->comp);
1369                 atomic64_inc(&zram->stats.writestall);
1370                 handle = zs_malloc(zram->mem_pool, comp_len,
1371                                 GFP_NOIO | __GFP_HIGHMEM |
1372                                 __GFP_MOVABLE);
1373                 if (handle)
1374                         goto compress_again;
1375                 return -ENOMEM;
1376         }
1377
1378         alloced_pages = zs_get_total_pages(zram->mem_pool);
1379         update_used_max(zram, alloced_pages);
1380
1381         if (zram->limit_pages && alloced_pages > zram->limit_pages) {
1382                 zcomp_stream_put(zram->comp);
1383                 zs_free(zram->mem_pool, handle);
1384                 return -ENOMEM;
1385         }
1386
1387         dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO);
1388
1389         src = zstrm->buffer;
1390         if (comp_len == PAGE_SIZE)
1391                 src = kmap_atomic(page);
1392         memcpy(dst, src, comp_len);
1393         if (comp_len == PAGE_SIZE)
1394                 kunmap_atomic(src);
1395
1396         zcomp_stream_put(zram->comp);
1397         zs_unmap_object(zram->mem_pool, handle);
1398         atomic64_add(comp_len, &zram->stats.compr_data_size);
1399 out:
1400         /*
1401          * Free memory associated with this sector
1402          * before overwriting unused sectors.
1403          */
1404         zram_slot_lock(zram, index);
1405         zram_free_page(zram, index);
1406
1407         if (comp_len == PAGE_SIZE) {
1408                 zram_set_flag(zram, index, ZRAM_HUGE);
1409                 atomic64_inc(&zram->stats.huge_pages);
1410         }
1411
1412         if (flags) {
1413                 zram_set_flag(zram, index, flags);
1414                 zram_set_element(zram, index, element);
1415         }  else {
1416                 zram_set_handle(zram, index, handle);
1417                 zram_set_obj_size(zram, index, comp_len);
1418         }
1419         zram_slot_unlock(zram, index);
1420
1421         /* Update stats */
1422         atomic64_inc(&zram->stats.pages_stored);
1423         return ret;
1424 }
1425
1426 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
1427                                 u32 index, int offset, struct bio *bio)
1428 {
1429         int ret;
1430         struct page *page = NULL;
1431         void *src;
1432         struct bio_vec vec;
1433
1434         vec = *bvec;
1435         if (is_partial_io(bvec)) {
1436                 void *dst;
1437                 /*
1438                  * This is a partial IO. We need to read the full page
1439                  * before to write the changes.
1440                  */
1441                 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
1442                 if (!page)
1443                         return -ENOMEM;
1444
1445                 ret = __zram_bvec_read(zram, page, index, bio, true);
1446                 if (ret)
1447                         goto out;
1448
1449                 src = kmap_atomic(bvec->bv_page);
1450                 dst = kmap_atomic(page);
1451                 memcpy(dst + offset, src + bvec->bv_offset, bvec->bv_len);
1452                 kunmap_atomic(dst);
1453                 kunmap_atomic(src);
1454
1455                 vec.bv_page = page;
1456                 vec.bv_len = PAGE_SIZE;
1457                 vec.bv_offset = 0;
1458         }
1459
1460         ret = __zram_bvec_write(zram, &vec, index, bio);
1461 out:
1462         if (is_partial_io(bvec))
1463                 __free_page(page);
1464         return ret;
1465 }
1466
1467 /*
1468  * zram_bio_discard - handler on discard request
1469  * @index: physical block index in PAGE_SIZE units
1470  * @offset: byte offset within physical block
1471  */
1472 static void zram_bio_discard(struct zram *zram, u32 index,
1473                              int offset, struct bio *bio)
1474 {
1475         size_t n = bio->bi_iter.bi_size;
1476
1477         /*
1478          * zram manages data in physical block size units. Because logical block
1479          * size isn't identical with physical block size on some arch, we
1480          * could get a discard request pointing to a specific offset within a
1481          * certain physical block.  Although we can handle this request by
1482          * reading that physiclal block and decompressing and partially zeroing
1483          * and re-compressing and then re-storing it, this isn't reasonable
1484          * because our intent with a discard request is to save memory.  So
1485          * skipping this logical block is appropriate here.
1486          */
1487         if (offset) {
1488                 if (n <= (PAGE_SIZE - offset))
1489                         return;
1490
1491                 n -= (PAGE_SIZE - offset);
1492                 index++;
1493         }
1494
1495         while (n >= PAGE_SIZE) {
1496                 zram_slot_lock(zram, index);
1497                 zram_free_page(zram, index);
1498                 zram_slot_unlock(zram, index);
1499                 atomic64_inc(&zram->stats.notify_free);
1500                 index++;
1501                 n -= PAGE_SIZE;
1502         }
1503 }
1504
1505 /*
1506  * Returns errno if it has some problem. Otherwise return 0 or 1.
1507  * Returns 0 if IO request was done synchronously
1508  * Returns 1 if IO request was successfully submitted.
1509  */
1510 static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
1511                         int offset, unsigned int op, struct bio *bio)
1512 {
1513         int ret;
1514
1515         if (!op_is_write(op)) {
1516                 atomic64_inc(&zram->stats.num_reads);
1517                 ret = zram_bvec_read(zram, bvec, index, offset, bio);
1518                 flush_dcache_page(bvec->bv_page);
1519         } else {
1520                 atomic64_inc(&zram->stats.num_writes);
1521                 ret = zram_bvec_write(zram, bvec, index, offset, bio);
1522         }
1523
1524         zram_slot_lock(zram, index);
1525         zram_accessed(zram, index);
1526         zram_slot_unlock(zram, index);
1527
1528         if (unlikely(ret < 0)) {
1529                 if (!op_is_write(op))
1530                         atomic64_inc(&zram->stats.failed_reads);
1531                 else
1532                         atomic64_inc(&zram->stats.failed_writes);
1533         }
1534
1535         return ret;
1536 }
1537
1538 static void __zram_make_request(struct zram *zram, struct bio *bio)
1539 {
1540         int offset;
1541         u32 index;
1542         struct bio_vec bvec;
1543         struct bvec_iter iter;
1544         unsigned long start_time;
1545
1546         index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
1547         offset = (bio->bi_iter.bi_sector &
1548                   (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
1549
1550         switch (bio_op(bio)) {
1551         case REQ_OP_DISCARD:
1552         case REQ_OP_WRITE_ZEROES:
1553                 zram_bio_discard(zram, index, offset, bio);
1554                 bio_endio(bio);
1555                 return;
1556         default:
1557                 break;
1558         }
1559
1560         start_time = bio_start_io_acct(bio);
1561         bio_for_each_segment(bvec, bio, iter) {
1562                 struct bio_vec bv = bvec;
1563                 unsigned int unwritten = bvec.bv_len;
1564
1565                 do {
1566                         bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset,
1567                                                         unwritten);
1568                         if (zram_bvec_rw(zram, &bv, index, offset,
1569                                          bio_op(bio), bio) < 0) {
1570                                 bio->bi_status = BLK_STS_IOERR;
1571                                 break;
1572                         }
1573
1574                         bv.bv_offset += bv.bv_len;
1575                         unwritten -= bv.bv_len;
1576
1577                         update_position(&index, &offset, &bv);
1578                 } while (unwritten);
1579         }
1580         bio_end_io_acct(bio, start_time);
1581         bio_endio(bio);
1582 }
1583
1584 /*
1585  * Handler function for all zram I/O requests.
1586  */
1587 static blk_qc_t zram_submit_bio(struct bio *bio)
1588 {
1589         struct zram *zram = bio->bi_disk->private_data;
1590
1591         if (!valid_io_request(zram, bio->bi_iter.bi_sector,
1592                                         bio->bi_iter.bi_size)) {
1593                 atomic64_inc(&zram->stats.invalid_io);
1594                 goto error;
1595         }
1596
1597         __zram_make_request(zram, bio);
1598         return BLK_QC_T_NONE;
1599
1600 error:
1601         bio_io_error(bio);
1602         return BLK_QC_T_NONE;
1603 }
1604
1605 static void zram_slot_free_notify(struct block_device *bdev,
1606                                 unsigned long index)
1607 {
1608         struct zram *zram;
1609
1610         zram = bdev->bd_disk->private_data;
1611
1612         atomic64_inc(&zram->stats.notify_free);
1613         if (!zram_slot_trylock(zram, index)) {
1614                 atomic64_inc(&zram->stats.miss_free);
1615                 return;
1616         }
1617
1618         zram_free_page(zram, index);
1619         zram_slot_unlock(zram, index);
1620 }
1621
1622 static int zram_rw_page(struct block_device *bdev, sector_t sector,
1623                        struct page *page, unsigned int op)
1624 {
1625         int offset, ret;
1626         u32 index;
1627         struct zram *zram;
1628         struct bio_vec bv;
1629         unsigned long start_time;
1630
1631         if (PageTransHuge(page))
1632                 return -ENOTSUPP;
1633         zram = bdev->bd_disk->private_data;
1634
1635         if (!valid_io_request(zram, sector, PAGE_SIZE)) {
1636                 atomic64_inc(&zram->stats.invalid_io);
1637                 ret = -EINVAL;
1638                 goto out;
1639         }
1640
1641         index = sector >> SECTORS_PER_PAGE_SHIFT;
1642         offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
1643
1644         bv.bv_page = page;
1645         bv.bv_len = PAGE_SIZE;
1646         bv.bv_offset = 0;
1647
1648         start_time = disk_start_io_acct(bdev->bd_disk, SECTORS_PER_PAGE, op);
1649         ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL);
1650         disk_end_io_acct(bdev->bd_disk, op, start_time);
1651 out:
1652         /*
1653          * If I/O fails, just return error(ie, non-zero) without
1654          * calling page_endio.
1655          * It causes resubmit the I/O with bio request by upper functions
1656          * of rw_page(e.g., swap_readpage, __swap_writepage) and
1657          * bio->bi_end_io does things to handle the error
1658          * (e.g., SetPageError, set_page_dirty and extra works).
1659          */
1660         if (unlikely(ret < 0))
1661                 return ret;
1662
1663         switch (ret) {
1664         case 0:
1665                 page_endio(page, op_is_write(op), 0);
1666                 break;
1667         case 1:
1668                 ret = 0;
1669                 break;
1670         default:
1671                 WARN_ON(1);
1672         }
1673         return ret;
1674 }
1675
1676 static void zram_reset_device(struct zram *zram)
1677 {
1678         struct zcomp *comp;
1679         u64 disksize;
1680
1681         down_write(&zram->init_lock);
1682
1683         zram->limit_pages = 0;
1684
1685         if (!init_done(zram)) {
1686                 up_write(&zram->init_lock);
1687                 return;
1688         }
1689
1690         comp = zram->comp;
1691         disksize = zram->disksize;
1692         zram->disksize = 0;
1693
1694         set_capacity(zram->disk, 0);
1695         part_stat_set_all(&zram->disk->part0, 0);
1696
1697         up_write(&zram->init_lock);
1698         /* I/O operation under all of CPU are done so let's free */
1699         zram_meta_free(zram, disksize);
1700         memset(&zram->stats, 0, sizeof(zram->stats));
1701         zcomp_destroy(comp);
1702         reset_bdev(zram);
1703 }
1704
1705 static ssize_t disksize_store(struct device *dev,
1706                 struct device_attribute *attr, const char *buf, size_t len)
1707 {
1708         u64 disksize;
1709         struct zcomp *comp;
1710         struct zram *zram = dev_to_zram(dev);
1711         int err;
1712
1713         disksize = memparse(buf, NULL);
1714         if (!disksize)
1715                 return -EINVAL;
1716
1717         down_write(&zram->init_lock);
1718         if (init_done(zram)) {
1719                 pr_info("Cannot change disksize for initialized device\n");
1720                 err = -EBUSY;
1721                 goto out_unlock;
1722         }
1723
1724         disksize = PAGE_ALIGN(disksize);
1725         if (!zram_meta_alloc(zram, disksize)) {
1726                 err = -ENOMEM;
1727                 goto out_unlock;
1728         }
1729
1730         comp = zcomp_create(zram->compressor);
1731         if (IS_ERR(comp)) {
1732                 pr_err("Cannot initialise %s compressing backend\n",
1733                                 zram->compressor);
1734                 err = PTR_ERR(comp);
1735                 goto out_free_meta;
1736         }
1737
1738         zram->comp = comp;
1739         zram->disksize = disksize;
1740         set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
1741
1742         revalidate_disk_size(zram->disk, true);
1743         up_write(&zram->init_lock);
1744
1745         return len;
1746
1747 out_free_meta:
1748         zram_meta_free(zram, disksize);
1749 out_unlock:
1750         up_write(&zram->init_lock);
1751         return err;
1752 }
1753
1754 static ssize_t reset_store(struct device *dev,
1755                 struct device_attribute *attr, const char *buf, size_t len)
1756 {
1757         int ret;
1758         unsigned short do_reset;
1759         struct zram *zram;
1760         struct block_device *bdev;
1761
1762         ret = kstrtou16(buf, 10, &do_reset);
1763         if (ret)
1764                 return ret;
1765
1766         if (!do_reset)
1767                 return -EINVAL;
1768
1769         zram = dev_to_zram(dev);
1770         bdev = bdget_disk(zram->disk, 0);
1771         if (!bdev)
1772                 return -ENOMEM;
1773
1774         mutex_lock(&bdev->bd_mutex);
1775         /* Do not reset an active device or claimed device */
1776         if (bdev->bd_openers || zram->claim) {
1777                 mutex_unlock(&bdev->bd_mutex);
1778                 bdput(bdev);
1779                 return -EBUSY;
1780         }
1781
1782         /* From now on, anyone can't open /dev/zram[0-9] */
1783         zram->claim = true;
1784         mutex_unlock(&bdev->bd_mutex);
1785
1786         /* Make sure all the pending I/O are finished */
1787         fsync_bdev(bdev);
1788         zram_reset_device(zram);
1789         revalidate_disk_size(zram->disk, true);
1790         bdput(bdev);
1791
1792         mutex_lock(&bdev->bd_mutex);
1793         zram->claim = false;
1794         mutex_unlock(&bdev->bd_mutex);
1795
1796         return len;
1797 }
1798
1799 static int zram_open(struct block_device *bdev, fmode_t mode)
1800 {
1801         int ret = 0;
1802         struct zram *zram;
1803
1804         WARN_ON(!mutex_is_locked(&bdev->bd_mutex));
1805
1806         zram = bdev->bd_disk->private_data;
1807         /* zram was claimed to reset so open request fails */
1808         if (zram->claim)
1809                 ret = -EBUSY;
1810
1811         return ret;
1812 }
1813
1814 static const struct block_device_operations zram_devops = {
1815         .open = zram_open,
1816         .submit_bio = zram_submit_bio,
1817         .swap_slot_free_notify = zram_slot_free_notify,
1818         .rw_page = zram_rw_page,
1819         .owner = THIS_MODULE
1820 };
1821
1822 static DEVICE_ATTR_WO(compact);
1823 static DEVICE_ATTR_RW(disksize);
1824 static DEVICE_ATTR_RO(initstate);
1825 static DEVICE_ATTR_WO(reset);
1826 static DEVICE_ATTR_WO(mem_limit);
1827 static DEVICE_ATTR_WO(mem_used_max);
1828 static DEVICE_ATTR_WO(idle);
1829 static DEVICE_ATTR_RW(max_comp_streams);
1830 static DEVICE_ATTR_RW(comp_algorithm);
1831 #ifdef CONFIG_ZRAM_WRITEBACK
1832 static DEVICE_ATTR_RW(backing_dev);
1833 static DEVICE_ATTR_WO(writeback);
1834 static DEVICE_ATTR_RW(writeback_limit);
1835 static DEVICE_ATTR_RW(writeback_limit_enable);
1836 #endif
1837
1838 static struct attribute *zram_disk_attrs[] = {
1839         &dev_attr_disksize.attr,
1840         &dev_attr_initstate.attr,
1841         &dev_attr_reset.attr,
1842         &dev_attr_compact.attr,
1843         &dev_attr_mem_limit.attr,
1844         &dev_attr_mem_used_max.attr,
1845         &dev_attr_idle.attr,
1846         &dev_attr_max_comp_streams.attr,
1847         &dev_attr_comp_algorithm.attr,
1848 #ifdef CONFIG_ZRAM_WRITEBACK
1849         &dev_attr_backing_dev.attr,
1850         &dev_attr_writeback.attr,
1851         &dev_attr_writeback_limit.attr,
1852         &dev_attr_writeback_limit_enable.attr,
1853 #endif
1854         &dev_attr_io_stat.attr,
1855         &dev_attr_mm_stat.attr,
1856 #ifdef CONFIG_ZRAM_WRITEBACK
1857         &dev_attr_bd_stat.attr,
1858 #endif
1859         &dev_attr_debug_stat.attr,
1860         NULL,
1861 };
1862
1863 static const struct attribute_group zram_disk_attr_group = {
1864         .attrs = zram_disk_attrs,
1865 };
1866
1867 static const struct attribute_group *zram_disk_attr_groups[] = {
1868         &zram_disk_attr_group,
1869         NULL,
1870 };
1871
1872 /*
1873  * Allocate and initialize new zram device. the function returns
1874  * '>= 0' device_id upon success, and negative value otherwise.
1875  */
1876 static int zram_add(void)
1877 {
1878         struct zram *zram;
1879         struct request_queue *queue;
1880         int ret, device_id;
1881
1882         zram = kzalloc(sizeof(struct zram), GFP_KERNEL);
1883         if (!zram)
1884                 return -ENOMEM;
1885
1886         ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL);
1887         if (ret < 0)
1888                 goto out_free_dev;
1889         device_id = ret;
1890
1891         init_rwsem(&zram->init_lock);
1892 #ifdef CONFIG_ZRAM_WRITEBACK
1893         spin_lock_init(&zram->wb_limit_lock);
1894 #endif
1895         queue = blk_alloc_queue(NUMA_NO_NODE);
1896         if (!queue) {
1897                 pr_err("Error allocating disk queue for device %d\n",
1898                         device_id);
1899                 ret = -ENOMEM;
1900                 goto out_free_idr;
1901         }
1902
1903         /* gendisk structure */
1904         zram->disk = alloc_disk(1);
1905         if (!zram->disk) {
1906                 pr_err("Error allocating disk structure for device %d\n",
1907                         device_id);
1908                 ret = -ENOMEM;
1909                 goto out_free_queue;
1910         }
1911
1912         zram->disk->major = zram_major;
1913         zram->disk->first_minor = device_id;
1914         zram->disk->fops = &zram_devops;
1915         zram->disk->queue = queue;
1916         zram->disk->private_data = zram;
1917         snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
1918
1919         /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
1920         set_capacity(zram->disk, 0);
1921         /* zram devices sort of resembles non-rotational disks */
1922         blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue);
1923         blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
1924
1925         /*
1926          * To ensure that we always get PAGE_SIZE aligned
1927          * and n*PAGE_SIZED sized I/O requests.
1928          */
1929         blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE);
1930         blk_queue_logical_block_size(zram->disk->queue,
1931                                         ZRAM_LOGICAL_BLOCK_SIZE);
1932         blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
1933         blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
1934         zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
1935         blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
1936         blk_queue_flag_set(QUEUE_FLAG_DISCARD, zram->disk->queue);
1937
1938         /*
1939          * zram_bio_discard() will clear all logical blocks if logical block
1940          * size is identical with physical block size(PAGE_SIZE). But if it is
1941          * different, we will skip discarding some parts of logical blocks in
1942          * the part of the request range which isn't aligned to physical block
1943          * size.  So we can't ensure that all discarded logical blocks are
1944          * zeroed.
1945          */
1946         if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
1947                 blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
1948
1949         zram->disk->queue->backing_dev_info->capabilities |=
1950                         (BDI_CAP_STABLE_WRITES | BDI_CAP_SYNCHRONOUS_IO);
1951         device_add_disk(NULL, zram->disk, zram_disk_attr_groups);
1952
1953         strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
1954
1955         zram_debugfs_register(zram);
1956         pr_info("Added device: %s\n", zram->disk->disk_name);
1957         return device_id;
1958
1959 out_free_queue:
1960         blk_cleanup_queue(queue);
1961 out_free_idr:
1962         idr_remove(&zram_index_idr, device_id);
1963 out_free_dev:
1964         kfree(zram);
1965         return ret;
1966 }
1967
1968 static int zram_remove(struct zram *zram)
1969 {
1970         struct block_device *bdev;
1971
1972         bdev = bdget_disk(zram->disk, 0);
1973         if (!bdev)
1974                 return -ENOMEM;
1975
1976         mutex_lock(&bdev->bd_mutex);
1977         if (bdev->bd_openers || zram->claim) {
1978                 mutex_unlock(&bdev->bd_mutex);
1979                 bdput(bdev);
1980                 return -EBUSY;
1981         }
1982
1983         zram->claim = true;
1984         mutex_unlock(&bdev->bd_mutex);
1985
1986         zram_debugfs_unregister(zram);
1987
1988         /* Make sure all the pending I/O are finished */
1989         fsync_bdev(bdev);
1990         zram_reset_device(zram);
1991         bdput(bdev);
1992
1993         pr_info("Removed device: %s\n", zram->disk->disk_name);
1994
1995         del_gendisk(zram->disk);
1996         blk_cleanup_queue(zram->disk->queue);
1997         put_disk(zram->disk);
1998         kfree(zram);
1999         return 0;
2000 }
2001
2002 /* zram-control sysfs attributes */
2003
2004 /*
2005  * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a
2006  * sense that reading from this file does alter the state of your system -- it
2007  * creates a new un-initialized zram device and returns back this device's
2008  * device_id (or an error code if it fails to create a new device).
2009  */
2010 static ssize_t hot_add_show(struct class *class,
2011                         struct class_attribute *attr,
2012                         char *buf)
2013 {
2014         int ret;
2015
2016         mutex_lock(&zram_index_mutex);
2017         ret = zram_add();
2018         mutex_unlock(&zram_index_mutex);
2019
2020         if (ret < 0)
2021                 return ret;
2022         return scnprintf(buf, PAGE_SIZE, "%d\n", ret);
2023 }
2024 static struct class_attribute class_attr_hot_add =
2025         __ATTR(hot_add, 0400, hot_add_show, NULL);
2026
2027 static ssize_t hot_remove_store(struct class *class,
2028                         struct class_attribute *attr,
2029                         const char *buf,
2030                         size_t count)
2031 {
2032         struct zram *zram;
2033         int ret, dev_id;
2034
2035         /* dev_id is gendisk->first_minor, which is `int' */
2036         ret = kstrtoint(buf, 10, &dev_id);
2037         if (ret)
2038                 return ret;
2039         if (dev_id < 0)
2040                 return -EINVAL;
2041
2042         mutex_lock(&zram_index_mutex);
2043
2044         zram = idr_find(&zram_index_idr, dev_id);
2045         if (zram) {
2046                 ret = zram_remove(zram);
2047                 if (!ret)
2048                         idr_remove(&zram_index_idr, dev_id);
2049         } else {
2050                 ret = -ENODEV;
2051         }
2052
2053         mutex_unlock(&zram_index_mutex);
2054         return ret ? ret : count;
2055 }
2056 static CLASS_ATTR_WO(hot_remove);
2057
2058 static struct attribute *zram_control_class_attrs[] = {
2059         &class_attr_hot_add.attr,
2060         &class_attr_hot_remove.attr,
2061         NULL,
2062 };
2063 ATTRIBUTE_GROUPS(zram_control_class);
2064
2065 static struct class zram_control_class = {
2066         .name           = "zram-control",
2067         .owner          = THIS_MODULE,
2068         .class_groups   = zram_control_class_groups,
2069 };
2070
2071 static int zram_remove_cb(int id, void *ptr, void *data)
2072 {
2073         zram_remove(ptr);
2074         return 0;
2075 }
2076
2077 static void destroy_devices(void)
2078 {
2079         class_unregister(&zram_control_class);
2080         idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
2081         zram_debugfs_destroy();
2082         idr_destroy(&zram_index_idr);
2083         unregister_blkdev(zram_major, "zram");
2084         cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
2085 }
2086
2087 static int __init zram_init(void)
2088 {
2089         int ret;
2090
2091         ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare",
2092                                       zcomp_cpu_up_prepare, zcomp_cpu_dead);
2093         if (ret < 0)
2094                 return ret;
2095
2096         ret = class_register(&zram_control_class);
2097         if (ret) {
2098                 pr_err("Unable to register zram-control class\n");
2099                 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
2100                 return ret;
2101         }
2102
2103         zram_debugfs_create();
2104         zram_major = register_blkdev(0, "zram");
2105         if (zram_major <= 0) {
2106                 pr_err("Unable to get major number\n");
2107                 class_unregister(&zram_control_class);
2108                 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
2109                 return -EBUSY;
2110         }
2111
2112         while (num_devices != 0) {
2113                 mutex_lock(&zram_index_mutex);
2114                 ret = zram_add();
2115                 mutex_unlock(&zram_index_mutex);
2116                 if (ret < 0)
2117                         goto out_error;
2118                 num_devices--;
2119         }
2120
2121         return 0;
2122
2123 out_error:
2124         destroy_devices();
2125         return ret;
2126 }
2127
2128 static void __exit zram_exit(void)
2129 {
2130         destroy_devices();
2131 }
2132
2133 module_init(zram_init);
2134 module_exit(zram_exit);
2135
2136 module_param(num_devices, uint, 0);
2137 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices");
2138
2139 MODULE_LICENSE("Dual BSD/GPL");
2140 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
2141 MODULE_DESCRIPTION("Compressed RAM Block Device");