Merge tag 'for-5.16/drivers-2021-11-09' of git://git.kernel.dk/linux-block
[linux-2.6-microblaze.git] / drivers / block / zram / zram_drv.c
1 /*
2  * Compressed RAM block device
3  *
4  * Copyright (C) 2008, 2009, 2010  Nitin Gupta
5  *               2012, 2013 Minchan Kim
6  *
7  * This code is released using a dual license strategy: BSD/GPL
8  * You can choose the licence that better fits your requirements.
9  *
10  * Released under the terms of 3-clause BSD License
11  * Released under the terms of GNU General Public License Version 2.0
12  *
13  */
14
15 #define KMSG_COMPONENT "zram"
16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17
18 #include <linux/module.h>
19 #include <linux/kernel.h>
20 #include <linux/bio.h>
21 #include <linux/bitops.h>
22 #include <linux/blkdev.h>
23 #include <linux/buffer_head.h>
24 #include <linux/device.h>
25 #include <linux/genhd.h>
26 #include <linux/highmem.h>
27 #include <linux/slab.h>
28 #include <linux/backing-dev.h>
29 #include <linux/string.h>
30 #include <linux/vmalloc.h>
31 #include <linux/err.h>
32 #include <linux/idr.h>
33 #include <linux/sysfs.h>
34 #include <linux/debugfs.h>
35 #include <linux/cpuhotplug.h>
36 #include <linux/part_stat.h>
37
38 #include "zram_drv.h"
39
40 static DEFINE_IDR(zram_index_idr);
41 /* idr index must be protected */
42 static DEFINE_MUTEX(zram_index_mutex);
43
44 static int zram_major;
45 static const char *default_compressor = CONFIG_ZRAM_DEF_COMP;
46
47 /* Module params (documentation at end) */
48 static unsigned int num_devices = 1;
49 /*
50  * Pages that compress to sizes equals or greater than this are stored
51  * uncompressed in memory.
52  */
53 static size_t huge_class_size;
54
55 static const struct block_device_operations zram_devops;
56 static const struct block_device_operations zram_wb_devops;
57
58 static void zram_free_page(struct zram *zram, size_t index);
59 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
60                                 u32 index, int offset, struct bio *bio);
61
62
63 static int zram_slot_trylock(struct zram *zram, u32 index)
64 {
65         return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags);
66 }
67
68 static void zram_slot_lock(struct zram *zram, u32 index)
69 {
70         bit_spin_lock(ZRAM_LOCK, &zram->table[index].flags);
71 }
72
73 static void zram_slot_unlock(struct zram *zram, u32 index)
74 {
75         bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
76 }
77
78 static inline bool init_done(struct zram *zram)
79 {
80         return zram->disksize;
81 }
82
83 static inline struct zram *dev_to_zram(struct device *dev)
84 {
85         return (struct zram *)dev_to_disk(dev)->private_data;
86 }
87
88 static unsigned long zram_get_handle(struct zram *zram, u32 index)
89 {
90         return zram->table[index].handle;
91 }
92
93 static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle)
94 {
95         zram->table[index].handle = handle;
96 }
97
98 /* flag operations require table entry bit_spin_lock() being held */
99 static bool zram_test_flag(struct zram *zram, u32 index,
100                         enum zram_pageflags flag)
101 {
102         return zram->table[index].flags & BIT(flag);
103 }
104
105 static void zram_set_flag(struct zram *zram, u32 index,
106                         enum zram_pageflags flag)
107 {
108         zram->table[index].flags |= BIT(flag);
109 }
110
111 static void zram_clear_flag(struct zram *zram, u32 index,
112                         enum zram_pageflags flag)
113 {
114         zram->table[index].flags &= ~BIT(flag);
115 }
116
117 static inline void zram_set_element(struct zram *zram, u32 index,
118                         unsigned long element)
119 {
120         zram->table[index].element = element;
121 }
122
123 static unsigned long zram_get_element(struct zram *zram, u32 index)
124 {
125         return zram->table[index].element;
126 }
127
128 static size_t zram_get_obj_size(struct zram *zram, u32 index)
129 {
130         return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1);
131 }
132
133 static void zram_set_obj_size(struct zram *zram,
134                                         u32 index, size_t size)
135 {
136         unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT;
137
138         zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size;
139 }
140
141 static inline bool zram_allocated(struct zram *zram, u32 index)
142 {
143         return zram_get_obj_size(zram, index) ||
144                         zram_test_flag(zram, index, ZRAM_SAME) ||
145                         zram_test_flag(zram, index, ZRAM_WB);
146 }
147
148 #if PAGE_SIZE != 4096
149 static inline bool is_partial_io(struct bio_vec *bvec)
150 {
151         return bvec->bv_len != PAGE_SIZE;
152 }
153 #else
154 static inline bool is_partial_io(struct bio_vec *bvec)
155 {
156         return false;
157 }
158 #endif
159
160 /*
161  * Check if request is within bounds and aligned on zram logical blocks.
162  */
163 static inline bool valid_io_request(struct zram *zram,
164                 sector_t start, unsigned int size)
165 {
166         u64 end, bound;
167
168         /* unaligned request */
169         if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
170                 return false;
171         if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
172                 return false;
173
174         end = start + (size >> SECTOR_SHIFT);
175         bound = zram->disksize >> SECTOR_SHIFT;
176         /* out of range range */
177         if (unlikely(start >= bound || end > bound || start > end))
178                 return false;
179
180         /* I/O request is valid */
181         return true;
182 }
183
184 static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
185 {
186         *index  += (*offset + bvec->bv_len) / PAGE_SIZE;
187         *offset = (*offset + bvec->bv_len) % PAGE_SIZE;
188 }
189
190 static inline void update_used_max(struct zram *zram,
191                                         const unsigned long pages)
192 {
193         unsigned long old_max, cur_max;
194
195         old_max = atomic_long_read(&zram->stats.max_used_pages);
196
197         do {
198                 cur_max = old_max;
199                 if (pages > cur_max)
200                         old_max = atomic_long_cmpxchg(
201                                 &zram->stats.max_used_pages, cur_max, pages);
202         } while (old_max != cur_max);
203 }
204
205 static inline void zram_fill_page(void *ptr, unsigned long len,
206                                         unsigned long value)
207 {
208         WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));
209         memset_l(ptr, value, len / sizeof(unsigned long));
210 }
211
212 static bool page_same_filled(void *ptr, unsigned long *element)
213 {
214         unsigned long *page;
215         unsigned long val;
216         unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1;
217
218         page = (unsigned long *)ptr;
219         val = page[0];
220
221         if (val != page[last_pos])
222                 return false;
223
224         for (pos = 1; pos < last_pos; pos++) {
225                 if (val != page[pos])
226                         return false;
227         }
228
229         *element = val;
230
231         return true;
232 }
233
234 static ssize_t initstate_show(struct device *dev,
235                 struct device_attribute *attr, char *buf)
236 {
237         u32 val;
238         struct zram *zram = dev_to_zram(dev);
239
240         down_read(&zram->init_lock);
241         val = init_done(zram);
242         up_read(&zram->init_lock);
243
244         return scnprintf(buf, PAGE_SIZE, "%u\n", val);
245 }
246
247 static ssize_t disksize_show(struct device *dev,
248                 struct device_attribute *attr, char *buf)
249 {
250         struct zram *zram = dev_to_zram(dev);
251
252         return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
253 }
254
255 static ssize_t mem_limit_store(struct device *dev,
256                 struct device_attribute *attr, const char *buf, size_t len)
257 {
258         u64 limit;
259         char *tmp;
260         struct zram *zram = dev_to_zram(dev);
261
262         limit = memparse(buf, &tmp);
263         if (buf == tmp) /* no chars parsed, invalid input */
264                 return -EINVAL;
265
266         down_write(&zram->init_lock);
267         zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
268         up_write(&zram->init_lock);
269
270         return len;
271 }
272
273 static ssize_t mem_used_max_store(struct device *dev,
274                 struct device_attribute *attr, const char *buf, size_t len)
275 {
276         int err;
277         unsigned long val;
278         struct zram *zram = dev_to_zram(dev);
279
280         err = kstrtoul(buf, 10, &val);
281         if (err || val != 0)
282                 return -EINVAL;
283
284         down_read(&zram->init_lock);
285         if (init_done(zram)) {
286                 atomic_long_set(&zram->stats.max_used_pages,
287                                 zs_get_total_pages(zram->mem_pool));
288         }
289         up_read(&zram->init_lock);
290
291         return len;
292 }
293
294 /*
295  * Mark all pages which are older than or equal to cutoff as IDLE.
296  * Callers should hold the zram init lock in read mode
297  */
298 static void mark_idle(struct zram *zram, ktime_t cutoff)
299 {
300         int is_idle = 1;
301         unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
302         int index;
303
304         for (index = 0; index < nr_pages; index++) {
305                 /*
306                  * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
307                  * See the comment in writeback_store.
308                  */
309                 zram_slot_lock(zram, index);
310                 if (zram_allocated(zram, index) &&
311                                 !zram_test_flag(zram, index, ZRAM_UNDER_WB)) {
312 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
313                         is_idle = !cutoff || ktime_after(cutoff, zram->table[index].ac_time);
314 #endif
315                         if (is_idle)
316                                 zram_set_flag(zram, index, ZRAM_IDLE);
317                 }
318                 zram_slot_unlock(zram, index);
319         }
320 }
321
322 static ssize_t idle_store(struct device *dev,
323                 struct device_attribute *attr, const char *buf, size_t len)
324 {
325         struct zram *zram = dev_to_zram(dev);
326         ktime_t cutoff_time = 0;
327         ssize_t rv = -EINVAL;
328
329         if (!sysfs_streq(buf, "all")) {
330                 /*
331                  * If it did not parse as 'all' try to treat it as an integer when
332                  * we have memory tracking enabled.
333                  */
334                 u64 age_sec;
335
336                 if (IS_ENABLED(CONFIG_ZRAM_MEMORY_TRACKING) && !kstrtoull(buf, 0, &age_sec))
337                         cutoff_time = ktime_sub(ktime_get_boottime(),
338                                         ns_to_ktime(age_sec * NSEC_PER_SEC));
339                 else
340                         goto out;
341         }
342
343         down_read(&zram->init_lock);
344         if (!init_done(zram))
345                 goto out_unlock;
346
347         /* A cutoff_time of 0 marks everything as idle, this is the "all" behavior */
348         mark_idle(zram, cutoff_time);
349         rv = len;
350
351 out_unlock:
352         up_read(&zram->init_lock);
353 out:
354         return rv;
355 }
356
357 #ifdef CONFIG_ZRAM_WRITEBACK
358 static ssize_t writeback_limit_enable_store(struct device *dev,
359                 struct device_attribute *attr, const char *buf, size_t len)
360 {
361         struct zram *zram = dev_to_zram(dev);
362         u64 val;
363         ssize_t ret = -EINVAL;
364
365         if (kstrtoull(buf, 10, &val))
366                 return ret;
367
368         down_read(&zram->init_lock);
369         spin_lock(&zram->wb_limit_lock);
370         zram->wb_limit_enable = val;
371         spin_unlock(&zram->wb_limit_lock);
372         up_read(&zram->init_lock);
373         ret = len;
374
375         return ret;
376 }
377
378 static ssize_t writeback_limit_enable_show(struct device *dev,
379                 struct device_attribute *attr, char *buf)
380 {
381         bool val;
382         struct zram *zram = dev_to_zram(dev);
383
384         down_read(&zram->init_lock);
385         spin_lock(&zram->wb_limit_lock);
386         val = zram->wb_limit_enable;
387         spin_unlock(&zram->wb_limit_lock);
388         up_read(&zram->init_lock);
389
390         return scnprintf(buf, PAGE_SIZE, "%d\n", val);
391 }
392
393 static ssize_t writeback_limit_store(struct device *dev,
394                 struct device_attribute *attr, const char *buf, size_t len)
395 {
396         struct zram *zram = dev_to_zram(dev);
397         u64 val;
398         ssize_t ret = -EINVAL;
399
400         if (kstrtoull(buf, 10, &val))
401                 return ret;
402
403         down_read(&zram->init_lock);
404         spin_lock(&zram->wb_limit_lock);
405         zram->bd_wb_limit = val;
406         spin_unlock(&zram->wb_limit_lock);
407         up_read(&zram->init_lock);
408         ret = len;
409
410         return ret;
411 }
412
413 static ssize_t writeback_limit_show(struct device *dev,
414                 struct device_attribute *attr, char *buf)
415 {
416         u64 val;
417         struct zram *zram = dev_to_zram(dev);
418
419         down_read(&zram->init_lock);
420         spin_lock(&zram->wb_limit_lock);
421         val = zram->bd_wb_limit;
422         spin_unlock(&zram->wb_limit_lock);
423         up_read(&zram->init_lock);
424
425         return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
426 }
427
428 static void reset_bdev(struct zram *zram)
429 {
430         struct block_device *bdev;
431
432         if (!zram->backing_dev)
433                 return;
434
435         bdev = zram->bdev;
436         blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
437         /* hope filp_close flush all of IO */
438         filp_close(zram->backing_dev, NULL);
439         zram->backing_dev = NULL;
440         zram->bdev = NULL;
441         zram->disk->fops = &zram_devops;
442         kvfree(zram->bitmap);
443         zram->bitmap = NULL;
444 }
445
446 static ssize_t backing_dev_show(struct device *dev,
447                 struct device_attribute *attr, char *buf)
448 {
449         struct file *file;
450         struct zram *zram = dev_to_zram(dev);
451         char *p;
452         ssize_t ret;
453
454         down_read(&zram->init_lock);
455         file = zram->backing_dev;
456         if (!file) {
457                 memcpy(buf, "none\n", 5);
458                 up_read(&zram->init_lock);
459                 return 5;
460         }
461
462         p = file_path(file, buf, PAGE_SIZE - 1);
463         if (IS_ERR(p)) {
464                 ret = PTR_ERR(p);
465                 goto out;
466         }
467
468         ret = strlen(p);
469         memmove(buf, p, ret);
470         buf[ret++] = '\n';
471 out:
472         up_read(&zram->init_lock);
473         return ret;
474 }
475
476 static ssize_t backing_dev_store(struct device *dev,
477                 struct device_attribute *attr, const char *buf, size_t len)
478 {
479         char *file_name;
480         size_t sz;
481         struct file *backing_dev = NULL;
482         struct inode *inode;
483         struct address_space *mapping;
484         unsigned int bitmap_sz;
485         unsigned long nr_pages, *bitmap = NULL;
486         struct block_device *bdev = NULL;
487         int err;
488         struct zram *zram = dev_to_zram(dev);
489
490         file_name = kmalloc(PATH_MAX, GFP_KERNEL);
491         if (!file_name)
492                 return -ENOMEM;
493
494         down_write(&zram->init_lock);
495         if (init_done(zram)) {
496                 pr_info("Can't setup backing device for initialized device\n");
497                 err = -EBUSY;
498                 goto out;
499         }
500
501         strlcpy(file_name, buf, PATH_MAX);
502         /* ignore trailing newline */
503         sz = strlen(file_name);
504         if (sz > 0 && file_name[sz - 1] == '\n')
505                 file_name[sz - 1] = 0x00;
506
507         backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0);
508         if (IS_ERR(backing_dev)) {
509                 err = PTR_ERR(backing_dev);
510                 backing_dev = NULL;
511                 goto out;
512         }
513
514         mapping = backing_dev->f_mapping;
515         inode = mapping->host;
516
517         /* Support only block device in this moment */
518         if (!S_ISBLK(inode->i_mode)) {
519                 err = -ENOTBLK;
520                 goto out;
521         }
522
523         bdev = blkdev_get_by_dev(inode->i_rdev,
524                         FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram);
525         if (IS_ERR(bdev)) {
526                 err = PTR_ERR(bdev);
527                 bdev = NULL;
528                 goto out;
529         }
530
531         nr_pages = i_size_read(inode) >> PAGE_SHIFT;
532         bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long);
533         bitmap = kvzalloc(bitmap_sz, GFP_KERNEL);
534         if (!bitmap) {
535                 err = -ENOMEM;
536                 goto out;
537         }
538
539         reset_bdev(zram);
540
541         zram->bdev = bdev;
542         zram->backing_dev = backing_dev;
543         zram->bitmap = bitmap;
544         zram->nr_pages = nr_pages;
545         /*
546          * With writeback feature, zram does asynchronous IO so it's no longer
547          * synchronous device so let's remove synchronous io flag. Othewise,
548          * upper layer(e.g., swap) could wait IO completion rather than
549          * (submit and return), which will cause system sluggish.
550          * Furthermore, when the IO function returns(e.g., swap_readpage),
551          * upper layer expects IO was done so it could deallocate the page
552          * freely but in fact, IO is going on so finally could cause
553          * use-after-free when the IO is really done.
554          */
555         zram->disk->fops = &zram_wb_devops;
556         up_write(&zram->init_lock);
557
558         pr_info("setup backing device %s\n", file_name);
559         kfree(file_name);
560
561         return len;
562 out:
563         kvfree(bitmap);
564
565         if (bdev)
566                 blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
567
568         if (backing_dev)
569                 filp_close(backing_dev, NULL);
570
571         up_write(&zram->init_lock);
572
573         kfree(file_name);
574
575         return err;
576 }
577
578 static unsigned long alloc_block_bdev(struct zram *zram)
579 {
580         unsigned long blk_idx = 1;
581 retry:
582         /* skip 0 bit to confuse zram.handle = 0 */
583         blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, blk_idx);
584         if (blk_idx == zram->nr_pages)
585                 return 0;
586
587         if (test_and_set_bit(blk_idx, zram->bitmap))
588                 goto retry;
589
590         atomic64_inc(&zram->stats.bd_count);
591         return blk_idx;
592 }
593
594 static void free_block_bdev(struct zram *zram, unsigned long blk_idx)
595 {
596         int was_set;
597
598         was_set = test_and_clear_bit(blk_idx, zram->bitmap);
599         WARN_ON_ONCE(!was_set);
600         atomic64_dec(&zram->stats.bd_count);
601 }
602
603 static void zram_page_end_io(struct bio *bio)
604 {
605         struct page *page = bio_first_page_all(bio);
606
607         page_endio(page, op_is_write(bio_op(bio)),
608                         blk_status_to_errno(bio->bi_status));
609         bio_put(bio);
610 }
611
612 /*
613  * Returns 1 if the submission is successful.
614  */
615 static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
616                         unsigned long entry, struct bio *parent)
617 {
618         struct bio *bio;
619
620         bio = bio_alloc(GFP_NOIO, 1);
621         if (!bio)
622                 return -ENOMEM;
623
624         bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
625         bio_set_dev(bio, zram->bdev);
626         if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) {
627                 bio_put(bio);
628                 return -EIO;
629         }
630
631         if (!parent) {
632                 bio->bi_opf = REQ_OP_READ;
633                 bio->bi_end_io = zram_page_end_io;
634         } else {
635                 bio->bi_opf = parent->bi_opf;
636                 bio_chain(bio, parent);
637         }
638
639         submit_bio(bio);
640         return 1;
641 }
642
643 #define PAGE_WB_SIG "page_index="
644
645 #define PAGE_WRITEBACK 0
646 #define HUGE_WRITEBACK 1
647 #define IDLE_WRITEBACK 2
648
649
650 static ssize_t writeback_store(struct device *dev,
651                 struct device_attribute *attr, const char *buf, size_t len)
652 {
653         struct zram *zram = dev_to_zram(dev);
654         unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
655         unsigned long index = 0;
656         struct bio bio;
657         struct bio_vec bio_vec;
658         struct page *page;
659         ssize_t ret = len;
660         int mode, err;
661         unsigned long blk_idx = 0;
662
663         if (sysfs_streq(buf, "idle"))
664                 mode = IDLE_WRITEBACK;
665         else if (sysfs_streq(buf, "huge"))
666                 mode = HUGE_WRITEBACK;
667         else {
668                 if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1))
669                         return -EINVAL;
670
671                 if (kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index) ||
672                                 index >= nr_pages)
673                         return -EINVAL;
674
675                 nr_pages = 1;
676                 mode = PAGE_WRITEBACK;
677         }
678
679         down_read(&zram->init_lock);
680         if (!init_done(zram)) {
681                 ret = -EINVAL;
682                 goto release_init_lock;
683         }
684
685         if (!zram->backing_dev) {
686                 ret = -ENODEV;
687                 goto release_init_lock;
688         }
689
690         page = alloc_page(GFP_KERNEL);
691         if (!page) {
692                 ret = -ENOMEM;
693                 goto release_init_lock;
694         }
695
696         for (; nr_pages != 0; index++, nr_pages--) {
697                 struct bio_vec bvec;
698
699                 bvec.bv_page = page;
700                 bvec.bv_len = PAGE_SIZE;
701                 bvec.bv_offset = 0;
702
703                 spin_lock(&zram->wb_limit_lock);
704                 if (zram->wb_limit_enable && !zram->bd_wb_limit) {
705                         spin_unlock(&zram->wb_limit_lock);
706                         ret = -EIO;
707                         break;
708                 }
709                 spin_unlock(&zram->wb_limit_lock);
710
711                 if (!blk_idx) {
712                         blk_idx = alloc_block_bdev(zram);
713                         if (!blk_idx) {
714                                 ret = -ENOSPC;
715                                 break;
716                         }
717                 }
718
719                 zram_slot_lock(zram, index);
720                 if (!zram_allocated(zram, index))
721                         goto next;
722
723                 if (zram_test_flag(zram, index, ZRAM_WB) ||
724                                 zram_test_flag(zram, index, ZRAM_SAME) ||
725                                 zram_test_flag(zram, index, ZRAM_UNDER_WB))
726                         goto next;
727
728                 if (mode == IDLE_WRITEBACK &&
729                           !zram_test_flag(zram, index, ZRAM_IDLE))
730                         goto next;
731                 if (mode == HUGE_WRITEBACK &&
732                           !zram_test_flag(zram, index, ZRAM_HUGE))
733                         goto next;
734                 /*
735                  * Clearing ZRAM_UNDER_WB is duty of caller.
736                  * IOW, zram_free_page never clear it.
737                  */
738                 zram_set_flag(zram, index, ZRAM_UNDER_WB);
739                 /* Need for hugepage writeback racing */
740                 zram_set_flag(zram, index, ZRAM_IDLE);
741                 zram_slot_unlock(zram, index);
742                 if (zram_bvec_read(zram, &bvec, index, 0, NULL)) {
743                         zram_slot_lock(zram, index);
744                         zram_clear_flag(zram, index, ZRAM_UNDER_WB);
745                         zram_clear_flag(zram, index, ZRAM_IDLE);
746                         zram_slot_unlock(zram, index);
747                         continue;
748                 }
749
750                 bio_init(&bio, &bio_vec, 1);
751                 bio_set_dev(&bio, zram->bdev);
752                 bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9);
753                 bio.bi_opf = REQ_OP_WRITE | REQ_SYNC;
754
755                 bio_add_page(&bio, bvec.bv_page, bvec.bv_len,
756                                 bvec.bv_offset);
757                 /*
758                  * XXX: A single page IO would be inefficient for write
759                  * but it would be not bad as starter.
760                  */
761                 err = submit_bio_wait(&bio);
762                 if (err) {
763                         zram_slot_lock(zram, index);
764                         zram_clear_flag(zram, index, ZRAM_UNDER_WB);
765                         zram_clear_flag(zram, index, ZRAM_IDLE);
766                         zram_slot_unlock(zram, index);
767                         /*
768                          * Return last IO error unless every IO were
769                          * not suceeded.
770                          */
771                         ret = err;
772                         continue;
773                 }
774
775                 atomic64_inc(&zram->stats.bd_writes);
776                 /*
777                  * We released zram_slot_lock so need to check if the slot was
778                  * changed. If there is freeing for the slot, we can catch it
779                  * easily by zram_allocated.
780                  * A subtle case is the slot is freed/reallocated/marked as
781                  * ZRAM_IDLE again. To close the race, idle_store doesn't
782                  * mark ZRAM_IDLE once it found the slot was ZRAM_UNDER_WB.
783                  * Thus, we could close the race by checking ZRAM_IDLE bit.
784                  */
785                 zram_slot_lock(zram, index);
786                 if (!zram_allocated(zram, index) ||
787                           !zram_test_flag(zram, index, ZRAM_IDLE)) {
788                         zram_clear_flag(zram, index, ZRAM_UNDER_WB);
789                         zram_clear_flag(zram, index, ZRAM_IDLE);
790                         goto next;
791                 }
792
793                 zram_free_page(zram, index);
794                 zram_clear_flag(zram, index, ZRAM_UNDER_WB);
795                 zram_set_flag(zram, index, ZRAM_WB);
796                 zram_set_element(zram, index, blk_idx);
797                 blk_idx = 0;
798                 atomic64_inc(&zram->stats.pages_stored);
799                 spin_lock(&zram->wb_limit_lock);
800                 if (zram->wb_limit_enable && zram->bd_wb_limit > 0)
801                         zram->bd_wb_limit -=  1UL << (PAGE_SHIFT - 12);
802                 spin_unlock(&zram->wb_limit_lock);
803 next:
804                 zram_slot_unlock(zram, index);
805         }
806
807         if (blk_idx)
808                 free_block_bdev(zram, blk_idx);
809         __free_page(page);
810 release_init_lock:
811         up_read(&zram->init_lock);
812
813         return ret;
814 }
815
816 struct zram_work {
817         struct work_struct work;
818         struct zram *zram;
819         unsigned long entry;
820         struct bio *bio;
821         struct bio_vec bvec;
822 };
823
824 #if PAGE_SIZE != 4096
825 static void zram_sync_read(struct work_struct *work)
826 {
827         struct zram_work *zw = container_of(work, struct zram_work, work);
828         struct zram *zram = zw->zram;
829         unsigned long entry = zw->entry;
830         struct bio *bio = zw->bio;
831
832         read_from_bdev_async(zram, &zw->bvec, entry, bio);
833 }
834
835 /*
836  * Block layer want one ->submit_bio to be active at a time, so if we use
837  * chained IO with parent IO in same context, it's a deadlock. To avoid that,
838  * use a worker thread context.
839  */
840 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
841                                 unsigned long entry, struct bio *bio)
842 {
843         struct zram_work work;
844
845         work.bvec = *bvec;
846         work.zram = zram;
847         work.entry = entry;
848         work.bio = bio;
849
850         INIT_WORK_ONSTACK(&work.work, zram_sync_read);
851         queue_work(system_unbound_wq, &work.work);
852         flush_work(&work.work);
853         destroy_work_on_stack(&work.work);
854
855         return 1;
856 }
857 #else
858 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
859                                 unsigned long entry, struct bio *bio)
860 {
861         WARN_ON(1);
862         return -EIO;
863 }
864 #endif
865
866 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
867                         unsigned long entry, struct bio *parent, bool sync)
868 {
869         atomic64_inc(&zram->stats.bd_reads);
870         if (sync)
871                 return read_from_bdev_sync(zram, bvec, entry, parent);
872         else
873                 return read_from_bdev_async(zram, bvec, entry, parent);
874 }
875 #else
876 static inline void reset_bdev(struct zram *zram) {};
877 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
878                         unsigned long entry, struct bio *parent, bool sync)
879 {
880         return -EIO;
881 }
882
883 static void free_block_bdev(struct zram *zram, unsigned long blk_idx) {};
884 #endif
885
886 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
887
888 static struct dentry *zram_debugfs_root;
889
890 static void zram_debugfs_create(void)
891 {
892         zram_debugfs_root = debugfs_create_dir("zram", NULL);
893 }
894
895 static void zram_debugfs_destroy(void)
896 {
897         debugfs_remove_recursive(zram_debugfs_root);
898 }
899
900 static void zram_accessed(struct zram *zram, u32 index)
901 {
902         zram_clear_flag(zram, index, ZRAM_IDLE);
903         zram->table[index].ac_time = ktime_get_boottime();
904 }
905
906 static ssize_t read_block_state(struct file *file, char __user *buf,
907                                 size_t count, loff_t *ppos)
908 {
909         char *kbuf;
910         ssize_t index, written = 0;
911         struct zram *zram = file->private_data;
912         unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
913         struct timespec64 ts;
914
915         kbuf = kvmalloc(count, GFP_KERNEL);
916         if (!kbuf)
917                 return -ENOMEM;
918
919         down_read(&zram->init_lock);
920         if (!init_done(zram)) {
921                 up_read(&zram->init_lock);
922                 kvfree(kbuf);
923                 return -EINVAL;
924         }
925
926         for (index = *ppos; index < nr_pages; index++) {
927                 int copied;
928
929                 zram_slot_lock(zram, index);
930                 if (!zram_allocated(zram, index))
931                         goto next;
932
933                 ts = ktime_to_timespec64(zram->table[index].ac_time);
934                 copied = snprintf(kbuf + written, count,
935                         "%12zd %12lld.%06lu %c%c%c%c\n",
936                         index, (s64)ts.tv_sec,
937                         ts.tv_nsec / NSEC_PER_USEC,
938                         zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.',
939                         zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.',
940                         zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.',
941                         zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.');
942
943                 if (count <= copied) {
944                         zram_slot_unlock(zram, index);
945                         break;
946                 }
947                 written += copied;
948                 count -= copied;
949 next:
950                 zram_slot_unlock(zram, index);
951                 *ppos += 1;
952         }
953
954         up_read(&zram->init_lock);
955         if (copy_to_user(buf, kbuf, written))
956                 written = -EFAULT;
957         kvfree(kbuf);
958
959         return written;
960 }
961
962 static const struct file_operations proc_zram_block_state_op = {
963         .open = simple_open,
964         .read = read_block_state,
965         .llseek = default_llseek,
966 };
967
968 static void zram_debugfs_register(struct zram *zram)
969 {
970         if (!zram_debugfs_root)
971                 return;
972
973         zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name,
974                                                 zram_debugfs_root);
975         debugfs_create_file("block_state", 0400, zram->debugfs_dir,
976                                 zram, &proc_zram_block_state_op);
977 }
978
979 static void zram_debugfs_unregister(struct zram *zram)
980 {
981         debugfs_remove_recursive(zram->debugfs_dir);
982 }
983 #else
984 static void zram_debugfs_create(void) {};
985 static void zram_debugfs_destroy(void) {};
986 static void zram_accessed(struct zram *zram, u32 index)
987 {
988         zram_clear_flag(zram, index, ZRAM_IDLE);
989 };
990 static void zram_debugfs_register(struct zram *zram) {};
991 static void zram_debugfs_unregister(struct zram *zram) {};
992 #endif
993
994 /*
995  * We switched to per-cpu streams and this attr is not needed anymore.
996  * However, we will keep it around for some time, because:
997  * a) we may revert per-cpu streams in the future
998  * b) it's visible to user space and we need to follow our 2 years
999  *    retirement rule; but we already have a number of 'soon to be
1000  *    altered' attrs, so max_comp_streams need to wait for the next
1001  *    layoff cycle.
1002  */
1003 static ssize_t max_comp_streams_show(struct device *dev,
1004                 struct device_attribute *attr, char *buf)
1005 {
1006         return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus());
1007 }
1008
1009 static ssize_t max_comp_streams_store(struct device *dev,
1010                 struct device_attribute *attr, const char *buf, size_t len)
1011 {
1012         return len;
1013 }
1014
1015 static ssize_t comp_algorithm_show(struct device *dev,
1016                 struct device_attribute *attr, char *buf)
1017 {
1018         size_t sz;
1019         struct zram *zram = dev_to_zram(dev);
1020
1021         down_read(&zram->init_lock);
1022         sz = zcomp_available_show(zram->compressor, buf);
1023         up_read(&zram->init_lock);
1024
1025         return sz;
1026 }
1027
1028 static ssize_t comp_algorithm_store(struct device *dev,
1029                 struct device_attribute *attr, const char *buf, size_t len)
1030 {
1031         struct zram *zram = dev_to_zram(dev);
1032         char compressor[ARRAY_SIZE(zram->compressor)];
1033         size_t sz;
1034
1035         strlcpy(compressor, buf, sizeof(compressor));
1036         /* ignore trailing newline */
1037         sz = strlen(compressor);
1038         if (sz > 0 && compressor[sz - 1] == '\n')
1039                 compressor[sz - 1] = 0x00;
1040
1041         if (!zcomp_available_algorithm(compressor))
1042                 return -EINVAL;
1043
1044         down_write(&zram->init_lock);
1045         if (init_done(zram)) {
1046                 up_write(&zram->init_lock);
1047                 pr_info("Can't change algorithm for initialized device\n");
1048                 return -EBUSY;
1049         }
1050
1051         strcpy(zram->compressor, compressor);
1052         up_write(&zram->init_lock);
1053         return len;
1054 }
1055
1056 static ssize_t compact_store(struct device *dev,
1057                 struct device_attribute *attr, const char *buf, size_t len)
1058 {
1059         struct zram *zram = dev_to_zram(dev);
1060
1061         down_read(&zram->init_lock);
1062         if (!init_done(zram)) {
1063                 up_read(&zram->init_lock);
1064                 return -EINVAL;
1065         }
1066
1067         zs_compact(zram->mem_pool);
1068         up_read(&zram->init_lock);
1069
1070         return len;
1071 }
1072
1073 static ssize_t io_stat_show(struct device *dev,
1074                 struct device_attribute *attr, char *buf)
1075 {
1076         struct zram *zram = dev_to_zram(dev);
1077         ssize_t ret;
1078
1079         down_read(&zram->init_lock);
1080         ret = scnprintf(buf, PAGE_SIZE,
1081                         "%8llu %8llu %8llu %8llu\n",
1082                         (u64)atomic64_read(&zram->stats.failed_reads),
1083                         (u64)atomic64_read(&zram->stats.failed_writes),
1084                         (u64)atomic64_read(&zram->stats.invalid_io),
1085                         (u64)atomic64_read(&zram->stats.notify_free));
1086         up_read(&zram->init_lock);
1087
1088         return ret;
1089 }
1090
1091 static ssize_t mm_stat_show(struct device *dev,
1092                 struct device_attribute *attr, char *buf)
1093 {
1094         struct zram *zram = dev_to_zram(dev);
1095         struct zs_pool_stats pool_stats;
1096         u64 orig_size, mem_used = 0;
1097         long max_used;
1098         ssize_t ret;
1099
1100         memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));
1101
1102         down_read(&zram->init_lock);
1103         if (init_done(zram)) {
1104                 mem_used = zs_get_total_pages(zram->mem_pool);
1105                 zs_pool_stats(zram->mem_pool, &pool_stats);
1106         }
1107
1108         orig_size = atomic64_read(&zram->stats.pages_stored);
1109         max_used = atomic_long_read(&zram->stats.max_used_pages);
1110
1111         ret = scnprintf(buf, PAGE_SIZE,
1112                         "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n",
1113                         orig_size << PAGE_SHIFT,
1114                         (u64)atomic64_read(&zram->stats.compr_data_size),
1115                         mem_used << PAGE_SHIFT,
1116                         zram->limit_pages << PAGE_SHIFT,
1117                         max_used << PAGE_SHIFT,
1118                         (u64)atomic64_read(&zram->stats.same_pages),
1119                         atomic_long_read(&pool_stats.pages_compacted),
1120                         (u64)atomic64_read(&zram->stats.huge_pages),
1121                         (u64)atomic64_read(&zram->stats.huge_pages_since));
1122         up_read(&zram->init_lock);
1123
1124         return ret;
1125 }
1126
1127 #ifdef CONFIG_ZRAM_WRITEBACK
1128 #define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12)))
1129 static ssize_t bd_stat_show(struct device *dev,
1130                 struct device_attribute *attr, char *buf)
1131 {
1132         struct zram *zram = dev_to_zram(dev);
1133         ssize_t ret;
1134
1135         down_read(&zram->init_lock);
1136         ret = scnprintf(buf, PAGE_SIZE,
1137                 "%8llu %8llu %8llu\n",
1138                         FOUR_K((u64)atomic64_read(&zram->stats.bd_count)),
1139                         FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)),
1140                         FOUR_K((u64)atomic64_read(&zram->stats.bd_writes)));
1141         up_read(&zram->init_lock);
1142
1143         return ret;
1144 }
1145 #endif
1146
1147 static ssize_t debug_stat_show(struct device *dev,
1148                 struct device_attribute *attr, char *buf)
1149 {
1150         int version = 1;
1151         struct zram *zram = dev_to_zram(dev);
1152         ssize_t ret;
1153
1154         down_read(&zram->init_lock);
1155         ret = scnprintf(buf, PAGE_SIZE,
1156                         "version: %d\n%8llu %8llu\n",
1157                         version,
1158                         (u64)atomic64_read(&zram->stats.writestall),
1159                         (u64)atomic64_read(&zram->stats.miss_free));
1160         up_read(&zram->init_lock);
1161
1162         return ret;
1163 }
1164
1165 static DEVICE_ATTR_RO(io_stat);
1166 static DEVICE_ATTR_RO(mm_stat);
1167 #ifdef CONFIG_ZRAM_WRITEBACK
1168 static DEVICE_ATTR_RO(bd_stat);
1169 #endif
1170 static DEVICE_ATTR_RO(debug_stat);
1171
1172 static void zram_meta_free(struct zram *zram, u64 disksize)
1173 {
1174         size_t num_pages = disksize >> PAGE_SHIFT;
1175         size_t index;
1176
1177         /* Free all pages that are still in this zram device */
1178         for (index = 0; index < num_pages; index++)
1179                 zram_free_page(zram, index);
1180
1181         zs_destroy_pool(zram->mem_pool);
1182         vfree(zram->table);
1183 }
1184
1185 static bool zram_meta_alloc(struct zram *zram, u64 disksize)
1186 {
1187         size_t num_pages;
1188
1189         num_pages = disksize >> PAGE_SHIFT;
1190         zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table)));
1191         if (!zram->table)
1192                 return false;
1193
1194         zram->mem_pool = zs_create_pool(zram->disk->disk_name);
1195         if (!zram->mem_pool) {
1196                 vfree(zram->table);
1197                 return false;
1198         }
1199
1200         if (!huge_class_size)
1201                 huge_class_size = zs_huge_class_size(zram->mem_pool);
1202         return true;
1203 }
1204
1205 /*
1206  * To protect concurrent access to the same index entry,
1207  * caller should hold this table index entry's bit_spinlock to
1208  * indicate this index entry is accessing.
1209  */
1210 static void zram_free_page(struct zram *zram, size_t index)
1211 {
1212         unsigned long handle;
1213
1214 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
1215         zram->table[index].ac_time = 0;
1216 #endif
1217         if (zram_test_flag(zram, index, ZRAM_IDLE))
1218                 zram_clear_flag(zram, index, ZRAM_IDLE);
1219
1220         if (zram_test_flag(zram, index, ZRAM_HUGE)) {
1221                 zram_clear_flag(zram, index, ZRAM_HUGE);
1222                 atomic64_dec(&zram->stats.huge_pages);
1223         }
1224
1225         if (zram_test_flag(zram, index, ZRAM_WB)) {
1226                 zram_clear_flag(zram, index, ZRAM_WB);
1227                 free_block_bdev(zram, zram_get_element(zram, index));
1228                 goto out;
1229         }
1230
1231         /*
1232          * No memory is allocated for same element filled pages.
1233          * Simply clear same page flag.
1234          */
1235         if (zram_test_flag(zram, index, ZRAM_SAME)) {
1236                 zram_clear_flag(zram, index, ZRAM_SAME);
1237                 atomic64_dec(&zram->stats.same_pages);
1238                 goto out;
1239         }
1240
1241         handle = zram_get_handle(zram, index);
1242         if (!handle)
1243                 return;
1244
1245         zs_free(zram->mem_pool, handle);
1246
1247         atomic64_sub(zram_get_obj_size(zram, index),
1248                         &zram->stats.compr_data_size);
1249 out:
1250         atomic64_dec(&zram->stats.pages_stored);
1251         zram_set_handle(zram, index, 0);
1252         zram_set_obj_size(zram, index, 0);
1253         WARN_ON_ONCE(zram->table[index].flags &
1254                 ~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB));
1255 }
1256
1257 static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
1258                                 struct bio *bio, bool partial_io)
1259 {
1260         struct zcomp_strm *zstrm;
1261         unsigned long handle;
1262         unsigned int size;
1263         void *src, *dst;
1264         int ret;
1265
1266         zram_slot_lock(zram, index);
1267         if (zram_test_flag(zram, index, ZRAM_WB)) {
1268                 struct bio_vec bvec;
1269
1270                 zram_slot_unlock(zram, index);
1271
1272                 bvec.bv_page = page;
1273                 bvec.bv_len = PAGE_SIZE;
1274                 bvec.bv_offset = 0;
1275                 return read_from_bdev(zram, &bvec,
1276                                 zram_get_element(zram, index),
1277                                 bio, partial_io);
1278         }
1279
1280         handle = zram_get_handle(zram, index);
1281         if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) {
1282                 unsigned long value;
1283                 void *mem;
1284
1285                 value = handle ? zram_get_element(zram, index) : 0;
1286                 mem = kmap_atomic(page);
1287                 zram_fill_page(mem, PAGE_SIZE, value);
1288                 kunmap_atomic(mem);
1289                 zram_slot_unlock(zram, index);
1290                 return 0;
1291         }
1292
1293         size = zram_get_obj_size(zram, index);
1294
1295         if (size != PAGE_SIZE)
1296                 zstrm = zcomp_stream_get(zram->comp);
1297
1298         src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
1299         if (size == PAGE_SIZE) {
1300                 dst = kmap_atomic(page);
1301                 memcpy(dst, src, PAGE_SIZE);
1302                 kunmap_atomic(dst);
1303                 ret = 0;
1304         } else {
1305                 dst = kmap_atomic(page);
1306                 ret = zcomp_decompress(zstrm, src, size, dst);
1307                 kunmap_atomic(dst);
1308                 zcomp_stream_put(zram->comp);
1309         }
1310         zs_unmap_object(zram->mem_pool, handle);
1311         zram_slot_unlock(zram, index);
1312
1313         /* Should NEVER happen. Return bio error if it does. */
1314         if (WARN_ON(ret))
1315                 pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
1316
1317         return ret;
1318 }
1319
1320 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
1321                                 u32 index, int offset, struct bio *bio)
1322 {
1323         int ret;
1324         struct page *page;
1325
1326         page = bvec->bv_page;
1327         if (is_partial_io(bvec)) {
1328                 /* Use a temporary buffer to decompress the page */
1329                 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
1330                 if (!page)
1331                         return -ENOMEM;
1332         }
1333
1334         ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec));
1335         if (unlikely(ret))
1336                 goto out;
1337
1338         if (is_partial_io(bvec)) {
1339                 void *dst = kmap_atomic(bvec->bv_page);
1340                 void *src = kmap_atomic(page);
1341
1342                 memcpy(dst + bvec->bv_offset, src + offset, bvec->bv_len);
1343                 kunmap_atomic(src);
1344                 kunmap_atomic(dst);
1345         }
1346 out:
1347         if (is_partial_io(bvec))
1348                 __free_page(page);
1349
1350         return ret;
1351 }
1352
1353 static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
1354                                 u32 index, struct bio *bio)
1355 {
1356         int ret = 0;
1357         unsigned long alloced_pages;
1358         unsigned long handle = 0;
1359         unsigned int comp_len = 0;
1360         void *src, *dst, *mem;
1361         struct zcomp_strm *zstrm;
1362         struct page *page = bvec->bv_page;
1363         unsigned long element = 0;
1364         enum zram_pageflags flags = 0;
1365
1366         mem = kmap_atomic(page);
1367         if (page_same_filled(mem, &element)) {
1368                 kunmap_atomic(mem);
1369                 /* Free memory associated with this sector now. */
1370                 flags = ZRAM_SAME;
1371                 atomic64_inc(&zram->stats.same_pages);
1372                 goto out;
1373         }
1374         kunmap_atomic(mem);
1375
1376 compress_again:
1377         zstrm = zcomp_stream_get(zram->comp);
1378         src = kmap_atomic(page);
1379         ret = zcomp_compress(zstrm, src, &comp_len);
1380         kunmap_atomic(src);
1381
1382         if (unlikely(ret)) {
1383                 zcomp_stream_put(zram->comp);
1384                 pr_err("Compression failed! err=%d\n", ret);
1385                 zs_free(zram->mem_pool, handle);
1386                 return ret;
1387         }
1388
1389         if (comp_len >= huge_class_size)
1390                 comp_len = PAGE_SIZE;
1391         /*
1392          * handle allocation has 2 paths:
1393          * a) fast path is executed with preemption disabled (for
1394          *  per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
1395          *  since we can't sleep;
1396          * b) slow path enables preemption and attempts to allocate
1397          *  the page with __GFP_DIRECT_RECLAIM bit set. we have to
1398          *  put per-cpu compression stream and, thus, to re-do
1399          *  the compression once handle is allocated.
1400          *
1401          * if we have a 'non-null' handle here then we are coming
1402          * from the slow path and handle has already been allocated.
1403          */
1404         if (!handle)
1405                 handle = zs_malloc(zram->mem_pool, comp_len,
1406                                 __GFP_KSWAPD_RECLAIM |
1407                                 __GFP_NOWARN |
1408                                 __GFP_HIGHMEM |
1409                                 __GFP_MOVABLE);
1410         if (!handle) {
1411                 zcomp_stream_put(zram->comp);
1412                 atomic64_inc(&zram->stats.writestall);
1413                 handle = zs_malloc(zram->mem_pool, comp_len,
1414                                 GFP_NOIO | __GFP_HIGHMEM |
1415                                 __GFP_MOVABLE);
1416                 if (handle)
1417                         goto compress_again;
1418                 return -ENOMEM;
1419         }
1420
1421         alloced_pages = zs_get_total_pages(zram->mem_pool);
1422         update_used_max(zram, alloced_pages);
1423
1424         if (zram->limit_pages && alloced_pages > zram->limit_pages) {
1425                 zcomp_stream_put(zram->comp);
1426                 zs_free(zram->mem_pool, handle);
1427                 return -ENOMEM;
1428         }
1429
1430         dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO);
1431
1432         src = zstrm->buffer;
1433         if (comp_len == PAGE_SIZE)
1434                 src = kmap_atomic(page);
1435         memcpy(dst, src, comp_len);
1436         if (comp_len == PAGE_SIZE)
1437                 kunmap_atomic(src);
1438
1439         zcomp_stream_put(zram->comp);
1440         zs_unmap_object(zram->mem_pool, handle);
1441         atomic64_add(comp_len, &zram->stats.compr_data_size);
1442 out:
1443         /*
1444          * Free memory associated with this sector
1445          * before overwriting unused sectors.
1446          */
1447         zram_slot_lock(zram, index);
1448         zram_free_page(zram, index);
1449
1450         if (comp_len == PAGE_SIZE) {
1451                 zram_set_flag(zram, index, ZRAM_HUGE);
1452                 atomic64_inc(&zram->stats.huge_pages);
1453                 atomic64_inc(&zram->stats.huge_pages_since);
1454         }
1455
1456         if (flags) {
1457                 zram_set_flag(zram, index, flags);
1458                 zram_set_element(zram, index, element);
1459         }  else {
1460                 zram_set_handle(zram, index, handle);
1461                 zram_set_obj_size(zram, index, comp_len);
1462         }
1463         zram_slot_unlock(zram, index);
1464
1465         /* Update stats */
1466         atomic64_inc(&zram->stats.pages_stored);
1467         return ret;
1468 }
1469
1470 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
1471                                 u32 index, int offset, struct bio *bio)
1472 {
1473         int ret;
1474         struct page *page = NULL;
1475         void *src;
1476         struct bio_vec vec;
1477
1478         vec = *bvec;
1479         if (is_partial_io(bvec)) {
1480                 void *dst;
1481                 /*
1482                  * This is a partial IO. We need to read the full page
1483                  * before to write the changes.
1484                  */
1485                 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
1486                 if (!page)
1487                         return -ENOMEM;
1488
1489                 ret = __zram_bvec_read(zram, page, index, bio, true);
1490                 if (ret)
1491                         goto out;
1492
1493                 src = kmap_atomic(bvec->bv_page);
1494                 dst = kmap_atomic(page);
1495                 memcpy(dst + offset, src + bvec->bv_offset, bvec->bv_len);
1496                 kunmap_atomic(dst);
1497                 kunmap_atomic(src);
1498
1499                 vec.bv_page = page;
1500                 vec.bv_len = PAGE_SIZE;
1501                 vec.bv_offset = 0;
1502         }
1503
1504         ret = __zram_bvec_write(zram, &vec, index, bio);
1505 out:
1506         if (is_partial_io(bvec))
1507                 __free_page(page);
1508         return ret;
1509 }
1510
1511 /*
1512  * zram_bio_discard - handler on discard request
1513  * @index: physical block index in PAGE_SIZE units
1514  * @offset: byte offset within physical block
1515  */
1516 static void zram_bio_discard(struct zram *zram, u32 index,
1517                              int offset, struct bio *bio)
1518 {
1519         size_t n = bio->bi_iter.bi_size;
1520
1521         /*
1522          * zram manages data in physical block size units. Because logical block
1523          * size isn't identical with physical block size on some arch, we
1524          * could get a discard request pointing to a specific offset within a
1525          * certain physical block.  Although we can handle this request by
1526          * reading that physiclal block and decompressing and partially zeroing
1527          * and re-compressing and then re-storing it, this isn't reasonable
1528          * because our intent with a discard request is to save memory.  So
1529          * skipping this logical block is appropriate here.
1530          */
1531         if (offset) {
1532                 if (n <= (PAGE_SIZE - offset))
1533                         return;
1534
1535                 n -= (PAGE_SIZE - offset);
1536                 index++;
1537         }
1538
1539         while (n >= PAGE_SIZE) {
1540                 zram_slot_lock(zram, index);
1541                 zram_free_page(zram, index);
1542                 zram_slot_unlock(zram, index);
1543                 atomic64_inc(&zram->stats.notify_free);
1544                 index++;
1545                 n -= PAGE_SIZE;
1546         }
1547 }
1548
1549 /*
1550  * Returns errno if it has some problem. Otherwise return 0 or 1.
1551  * Returns 0 if IO request was done synchronously
1552  * Returns 1 if IO request was successfully submitted.
1553  */
1554 static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
1555                         int offset, unsigned int op, struct bio *bio)
1556 {
1557         int ret;
1558
1559         if (!op_is_write(op)) {
1560                 atomic64_inc(&zram->stats.num_reads);
1561                 ret = zram_bvec_read(zram, bvec, index, offset, bio);
1562                 flush_dcache_page(bvec->bv_page);
1563         } else {
1564                 atomic64_inc(&zram->stats.num_writes);
1565                 ret = zram_bvec_write(zram, bvec, index, offset, bio);
1566         }
1567
1568         zram_slot_lock(zram, index);
1569         zram_accessed(zram, index);
1570         zram_slot_unlock(zram, index);
1571
1572         if (unlikely(ret < 0)) {
1573                 if (!op_is_write(op))
1574                         atomic64_inc(&zram->stats.failed_reads);
1575                 else
1576                         atomic64_inc(&zram->stats.failed_writes);
1577         }
1578
1579         return ret;
1580 }
1581
1582 static void __zram_make_request(struct zram *zram, struct bio *bio)
1583 {
1584         int offset;
1585         u32 index;
1586         struct bio_vec bvec;
1587         struct bvec_iter iter;
1588         unsigned long start_time;
1589
1590         index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
1591         offset = (bio->bi_iter.bi_sector &
1592                   (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
1593
1594         switch (bio_op(bio)) {
1595         case REQ_OP_DISCARD:
1596         case REQ_OP_WRITE_ZEROES:
1597                 zram_bio_discard(zram, index, offset, bio);
1598                 bio_endio(bio);
1599                 return;
1600         default:
1601                 break;
1602         }
1603
1604         start_time = bio_start_io_acct(bio);
1605         bio_for_each_segment(bvec, bio, iter) {
1606                 struct bio_vec bv = bvec;
1607                 unsigned int unwritten = bvec.bv_len;
1608
1609                 do {
1610                         bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset,
1611                                                         unwritten);
1612                         if (zram_bvec_rw(zram, &bv, index, offset,
1613                                          bio_op(bio), bio) < 0) {
1614                                 bio->bi_status = BLK_STS_IOERR;
1615                                 break;
1616                         }
1617
1618                         bv.bv_offset += bv.bv_len;
1619                         unwritten -= bv.bv_len;
1620
1621                         update_position(&index, &offset, &bv);
1622                 } while (unwritten);
1623         }
1624         bio_end_io_acct(bio, start_time);
1625         bio_endio(bio);
1626 }
1627
1628 /*
1629  * Handler function for all zram I/O requests.
1630  */
1631 static void zram_submit_bio(struct bio *bio)
1632 {
1633         struct zram *zram = bio->bi_bdev->bd_disk->private_data;
1634
1635         if (!valid_io_request(zram, bio->bi_iter.bi_sector,
1636                                         bio->bi_iter.bi_size)) {
1637                 atomic64_inc(&zram->stats.invalid_io);
1638                 bio_io_error(bio);
1639                 return;
1640         }
1641
1642         __zram_make_request(zram, bio);
1643 }
1644
1645 static void zram_slot_free_notify(struct block_device *bdev,
1646                                 unsigned long index)
1647 {
1648         struct zram *zram;
1649
1650         zram = bdev->bd_disk->private_data;
1651
1652         atomic64_inc(&zram->stats.notify_free);
1653         if (!zram_slot_trylock(zram, index)) {
1654                 atomic64_inc(&zram->stats.miss_free);
1655                 return;
1656         }
1657
1658         zram_free_page(zram, index);
1659         zram_slot_unlock(zram, index);
1660 }
1661
1662 static int zram_rw_page(struct block_device *bdev, sector_t sector,
1663                        struct page *page, unsigned int op)
1664 {
1665         int offset, ret;
1666         u32 index;
1667         struct zram *zram;
1668         struct bio_vec bv;
1669         unsigned long start_time;
1670
1671         if (PageTransHuge(page))
1672                 return -ENOTSUPP;
1673         zram = bdev->bd_disk->private_data;
1674
1675         if (!valid_io_request(zram, sector, PAGE_SIZE)) {
1676                 atomic64_inc(&zram->stats.invalid_io);
1677                 ret = -EINVAL;
1678                 goto out;
1679         }
1680
1681         index = sector >> SECTORS_PER_PAGE_SHIFT;
1682         offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
1683
1684         bv.bv_page = page;
1685         bv.bv_len = PAGE_SIZE;
1686         bv.bv_offset = 0;
1687
1688         start_time = disk_start_io_acct(bdev->bd_disk, SECTORS_PER_PAGE, op);
1689         ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL);
1690         disk_end_io_acct(bdev->bd_disk, op, start_time);
1691 out:
1692         /*
1693          * If I/O fails, just return error(ie, non-zero) without
1694          * calling page_endio.
1695          * It causes resubmit the I/O with bio request by upper functions
1696          * of rw_page(e.g., swap_readpage, __swap_writepage) and
1697          * bio->bi_end_io does things to handle the error
1698          * (e.g., SetPageError, set_page_dirty and extra works).
1699          */
1700         if (unlikely(ret < 0))
1701                 return ret;
1702
1703         switch (ret) {
1704         case 0:
1705                 page_endio(page, op_is_write(op), 0);
1706                 break;
1707         case 1:
1708                 ret = 0;
1709                 break;
1710         default:
1711                 WARN_ON(1);
1712         }
1713         return ret;
1714 }
1715
1716 static void zram_reset_device(struct zram *zram)
1717 {
1718         struct zcomp *comp;
1719         u64 disksize;
1720
1721         down_write(&zram->init_lock);
1722
1723         zram->limit_pages = 0;
1724
1725         if (!init_done(zram)) {
1726                 up_write(&zram->init_lock);
1727                 return;
1728         }
1729
1730         comp = zram->comp;
1731         disksize = zram->disksize;
1732         zram->disksize = 0;
1733
1734         set_capacity_and_notify(zram->disk, 0);
1735         part_stat_set_all(zram->disk->part0, 0);
1736
1737         /* I/O operation under all of CPU are done so let's free */
1738         zram_meta_free(zram, disksize);
1739         memset(&zram->stats, 0, sizeof(zram->stats));
1740         zcomp_destroy(comp);
1741         reset_bdev(zram);
1742
1743         up_write(&zram->init_lock);
1744 }
1745
1746 static ssize_t disksize_store(struct device *dev,
1747                 struct device_attribute *attr, const char *buf, size_t len)
1748 {
1749         u64 disksize;
1750         struct zcomp *comp;
1751         struct zram *zram = dev_to_zram(dev);
1752         int err;
1753
1754         disksize = memparse(buf, NULL);
1755         if (!disksize)
1756                 return -EINVAL;
1757
1758         down_write(&zram->init_lock);
1759         if (init_done(zram)) {
1760                 pr_info("Cannot change disksize for initialized device\n");
1761                 err = -EBUSY;
1762                 goto out_unlock;
1763         }
1764
1765         disksize = PAGE_ALIGN(disksize);
1766         if (!zram_meta_alloc(zram, disksize)) {
1767                 err = -ENOMEM;
1768                 goto out_unlock;
1769         }
1770
1771         comp = zcomp_create(zram->compressor);
1772         if (IS_ERR(comp)) {
1773                 pr_err("Cannot initialise %s compressing backend\n",
1774                                 zram->compressor);
1775                 err = PTR_ERR(comp);
1776                 goto out_free_meta;
1777         }
1778
1779         zram->comp = comp;
1780         zram->disksize = disksize;
1781         set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT);
1782         up_write(&zram->init_lock);
1783
1784         return len;
1785
1786 out_free_meta:
1787         zram_meta_free(zram, disksize);
1788 out_unlock:
1789         up_write(&zram->init_lock);
1790         return err;
1791 }
1792
1793 static ssize_t reset_store(struct device *dev,
1794                 struct device_attribute *attr, const char *buf, size_t len)
1795 {
1796         int ret;
1797         unsigned short do_reset;
1798         struct zram *zram;
1799         struct block_device *bdev;
1800
1801         ret = kstrtou16(buf, 10, &do_reset);
1802         if (ret)
1803                 return ret;
1804
1805         if (!do_reset)
1806                 return -EINVAL;
1807
1808         zram = dev_to_zram(dev);
1809         bdev = zram->disk->part0;
1810
1811         mutex_lock(&bdev->bd_disk->open_mutex);
1812         /* Do not reset an active device or claimed device */
1813         if (bdev->bd_openers || zram->claim) {
1814                 mutex_unlock(&bdev->bd_disk->open_mutex);
1815                 return -EBUSY;
1816         }
1817
1818         /* From now on, anyone can't open /dev/zram[0-9] */
1819         zram->claim = true;
1820         mutex_unlock(&bdev->bd_disk->open_mutex);
1821
1822         /* Make sure all the pending I/O are finished */
1823         sync_blockdev(bdev);
1824         zram_reset_device(zram);
1825
1826         mutex_lock(&bdev->bd_disk->open_mutex);
1827         zram->claim = false;
1828         mutex_unlock(&bdev->bd_disk->open_mutex);
1829
1830         return len;
1831 }
1832
1833 static int zram_open(struct block_device *bdev, fmode_t mode)
1834 {
1835         int ret = 0;
1836         struct zram *zram;
1837
1838         WARN_ON(!mutex_is_locked(&bdev->bd_disk->open_mutex));
1839
1840         zram = bdev->bd_disk->private_data;
1841         /* zram was claimed to reset so open request fails */
1842         if (zram->claim)
1843                 ret = -EBUSY;
1844
1845         return ret;
1846 }
1847
1848 static const struct block_device_operations zram_devops = {
1849         .open = zram_open,
1850         .submit_bio = zram_submit_bio,
1851         .swap_slot_free_notify = zram_slot_free_notify,
1852         .rw_page = zram_rw_page,
1853         .owner = THIS_MODULE
1854 };
1855
1856 static const struct block_device_operations zram_wb_devops = {
1857         .open = zram_open,
1858         .submit_bio = zram_submit_bio,
1859         .swap_slot_free_notify = zram_slot_free_notify,
1860         .owner = THIS_MODULE
1861 };
1862
1863 static DEVICE_ATTR_WO(compact);
1864 static DEVICE_ATTR_RW(disksize);
1865 static DEVICE_ATTR_RO(initstate);
1866 static DEVICE_ATTR_WO(reset);
1867 static DEVICE_ATTR_WO(mem_limit);
1868 static DEVICE_ATTR_WO(mem_used_max);
1869 static DEVICE_ATTR_WO(idle);
1870 static DEVICE_ATTR_RW(max_comp_streams);
1871 static DEVICE_ATTR_RW(comp_algorithm);
1872 #ifdef CONFIG_ZRAM_WRITEBACK
1873 static DEVICE_ATTR_RW(backing_dev);
1874 static DEVICE_ATTR_WO(writeback);
1875 static DEVICE_ATTR_RW(writeback_limit);
1876 static DEVICE_ATTR_RW(writeback_limit_enable);
1877 #endif
1878
1879 static struct attribute *zram_disk_attrs[] = {
1880         &dev_attr_disksize.attr,
1881         &dev_attr_initstate.attr,
1882         &dev_attr_reset.attr,
1883         &dev_attr_compact.attr,
1884         &dev_attr_mem_limit.attr,
1885         &dev_attr_mem_used_max.attr,
1886         &dev_attr_idle.attr,
1887         &dev_attr_max_comp_streams.attr,
1888         &dev_attr_comp_algorithm.attr,
1889 #ifdef CONFIG_ZRAM_WRITEBACK
1890         &dev_attr_backing_dev.attr,
1891         &dev_attr_writeback.attr,
1892         &dev_attr_writeback_limit.attr,
1893         &dev_attr_writeback_limit_enable.attr,
1894 #endif
1895         &dev_attr_io_stat.attr,
1896         &dev_attr_mm_stat.attr,
1897 #ifdef CONFIG_ZRAM_WRITEBACK
1898         &dev_attr_bd_stat.attr,
1899 #endif
1900         &dev_attr_debug_stat.attr,
1901         NULL,
1902 };
1903
1904 static const struct attribute_group zram_disk_attr_group = {
1905         .attrs = zram_disk_attrs,
1906 };
1907
1908 static const struct attribute_group *zram_disk_attr_groups[] = {
1909         &zram_disk_attr_group,
1910         NULL,
1911 };
1912
1913 /*
1914  * Allocate and initialize new zram device. the function returns
1915  * '>= 0' device_id upon success, and negative value otherwise.
1916  */
1917 static int zram_add(void)
1918 {
1919         struct zram *zram;
1920         int ret, device_id;
1921
1922         zram = kzalloc(sizeof(struct zram), GFP_KERNEL);
1923         if (!zram)
1924                 return -ENOMEM;
1925
1926         ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL);
1927         if (ret < 0)
1928                 goto out_free_dev;
1929         device_id = ret;
1930
1931         init_rwsem(&zram->init_lock);
1932 #ifdef CONFIG_ZRAM_WRITEBACK
1933         spin_lock_init(&zram->wb_limit_lock);
1934 #endif
1935
1936         /* gendisk structure */
1937         zram->disk = blk_alloc_disk(NUMA_NO_NODE);
1938         if (!zram->disk) {
1939                 pr_err("Error allocating disk structure for device %d\n",
1940                         device_id);
1941                 ret = -ENOMEM;
1942                 goto out_free_idr;
1943         }
1944
1945         zram->disk->major = zram_major;
1946         zram->disk->first_minor = device_id;
1947         zram->disk->minors = 1;
1948         zram->disk->fops = &zram_devops;
1949         zram->disk->private_data = zram;
1950         snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
1951
1952         /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
1953         set_capacity(zram->disk, 0);
1954         /* zram devices sort of resembles non-rotational disks */
1955         blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue);
1956         blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
1957
1958         /*
1959          * To ensure that we always get PAGE_SIZE aligned
1960          * and n*PAGE_SIZED sized I/O requests.
1961          */
1962         blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE);
1963         blk_queue_logical_block_size(zram->disk->queue,
1964                                         ZRAM_LOGICAL_BLOCK_SIZE);
1965         blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
1966         blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
1967         zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
1968         blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
1969         blk_queue_flag_set(QUEUE_FLAG_DISCARD, zram->disk->queue);
1970
1971         /*
1972          * zram_bio_discard() will clear all logical blocks if logical block
1973          * size is identical with physical block size(PAGE_SIZE). But if it is
1974          * different, we will skip discarding some parts of logical blocks in
1975          * the part of the request range which isn't aligned to physical block
1976          * size.  So we can't ensure that all discarded logical blocks are
1977          * zeroed.
1978          */
1979         if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
1980                 blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
1981
1982         blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue);
1983         ret = device_add_disk(NULL, zram->disk, zram_disk_attr_groups);
1984         if (ret)
1985                 goto out_cleanup_disk;
1986
1987         strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
1988
1989         zram_debugfs_register(zram);
1990         pr_info("Added device: %s\n", zram->disk->disk_name);
1991         return device_id;
1992
1993 out_cleanup_disk:
1994         blk_cleanup_disk(zram->disk);
1995 out_free_idr:
1996         idr_remove(&zram_index_idr, device_id);
1997 out_free_dev:
1998         kfree(zram);
1999         return ret;
2000 }
2001
2002 static int zram_remove(struct zram *zram)
2003 {
2004         struct block_device *bdev = zram->disk->part0;
2005         bool claimed;
2006
2007         mutex_lock(&bdev->bd_disk->open_mutex);
2008         if (bdev->bd_openers) {
2009                 mutex_unlock(&bdev->bd_disk->open_mutex);
2010                 return -EBUSY;
2011         }
2012
2013         claimed = zram->claim;
2014         if (!claimed)
2015                 zram->claim = true;
2016         mutex_unlock(&bdev->bd_disk->open_mutex);
2017
2018         zram_debugfs_unregister(zram);
2019
2020         if (claimed) {
2021                 /*
2022                  * If we were claimed by reset_store(), del_gendisk() will
2023                  * wait until reset_store() is done, so nothing need to do.
2024                  */
2025                 ;
2026         } else {
2027                 /* Make sure all the pending I/O are finished */
2028                 sync_blockdev(bdev);
2029                 zram_reset_device(zram);
2030         }
2031
2032         pr_info("Removed device: %s\n", zram->disk->disk_name);
2033
2034         del_gendisk(zram->disk);
2035
2036         /* del_gendisk drains pending reset_store */
2037         WARN_ON_ONCE(claimed && zram->claim);
2038
2039         /*
2040          * disksize_store() may be called in between zram_reset_device()
2041          * and del_gendisk(), so run the last reset to avoid leaking
2042          * anything allocated with disksize_store()
2043          */
2044         zram_reset_device(zram);
2045
2046         blk_cleanup_disk(zram->disk);
2047         kfree(zram);
2048         return 0;
2049 }
2050
2051 /* zram-control sysfs attributes */
2052
2053 /*
2054  * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a
2055  * sense that reading from this file does alter the state of your system -- it
2056  * creates a new un-initialized zram device and returns back this device's
2057  * device_id (or an error code if it fails to create a new device).
2058  */
2059 static ssize_t hot_add_show(struct class *class,
2060                         struct class_attribute *attr,
2061                         char *buf)
2062 {
2063         int ret;
2064
2065         mutex_lock(&zram_index_mutex);
2066         ret = zram_add();
2067         mutex_unlock(&zram_index_mutex);
2068
2069         if (ret < 0)
2070                 return ret;
2071         return scnprintf(buf, PAGE_SIZE, "%d\n", ret);
2072 }
2073 static struct class_attribute class_attr_hot_add =
2074         __ATTR(hot_add, 0400, hot_add_show, NULL);
2075
2076 static ssize_t hot_remove_store(struct class *class,
2077                         struct class_attribute *attr,
2078                         const char *buf,
2079                         size_t count)
2080 {
2081         struct zram *zram;
2082         int ret, dev_id;
2083
2084         /* dev_id is gendisk->first_minor, which is `int' */
2085         ret = kstrtoint(buf, 10, &dev_id);
2086         if (ret)
2087                 return ret;
2088         if (dev_id < 0)
2089                 return -EINVAL;
2090
2091         mutex_lock(&zram_index_mutex);
2092
2093         zram = idr_find(&zram_index_idr, dev_id);
2094         if (zram) {
2095                 ret = zram_remove(zram);
2096                 if (!ret)
2097                         idr_remove(&zram_index_idr, dev_id);
2098         } else {
2099                 ret = -ENODEV;
2100         }
2101
2102         mutex_unlock(&zram_index_mutex);
2103         return ret ? ret : count;
2104 }
2105 static CLASS_ATTR_WO(hot_remove);
2106
2107 static struct attribute *zram_control_class_attrs[] = {
2108         &class_attr_hot_add.attr,
2109         &class_attr_hot_remove.attr,
2110         NULL,
2111 };
2112 ATTRIBUTE_GROUPS(zram_control_class);
2113
2114 static struct class zram_control_class = {
2115         .name           = "zram-control",
2116         .owner          = THIS_MODULE,
2117         .class_groups   = zram_control_class_groups,
2118 };
2119
2120 static int zram_remove_cb(int id, void *ptr, void *data)
2121 {
2122         WARN_ON_ONCE(zram_remove(ptr));
2123         return 0;
2124 }
2125
2126 static void destroy_devices(void)
2127 {
2128         class_unregister(&zram_control_class);
2129         idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
2130         zram_debugfs_destroy();
2131         idr_destroy(&zram_index_idr);
2132         unregister_blkdev(zram_major, "zram");
2133         cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
2134 }
2135
2136 static int __init zram_init(void)
2137 {
2138         int ret;
2139
2140         ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare",
2141                                       zcomp_cpu_up_prepare, zcomp_cpu_dead);
2142         if (ret < 0)
2143                 return ret;
2144
2145         ret = class_register(&zram_control_class);
2146         if (ret) {
2147                 pr_err("Unable to register zram-control class\n");
2148                 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
2149                 return ret;
2150         }
2151
2152         zram_debugfs_create();
2153         zram_major = register_blkdev(0, "zram");
2154         if (zram_major <= 0) {
2155                 pr_err("Unable to get major number\n");
2156                 class_unregister(&zram_control_class);
2157                 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
2158                 return -EBUSY;
2159         }
2160
2161         while (num_devices != 0) {
2162                 mutex_lock(&zram_index_mutex);
2163                 ret = zram_add();
2164                 mutex_unlock(&zram_index_mutex);
2165                 if (ret < 0)
2166                         goto out_error;
2167                 num_devices--;
2168         }
2169
2170         return 0;
2171
2172 out_error:
2173         destroy_devices();
2174         return ret;
2175 }
2176
2177 static void __exit zram_exit(void)
2178 {
2179         destroy_devices();
2180 }
2181
2182 module_init(zram_init);
2183 module_exit(zram_exit);
2184
2185 module_param(num_devices, uint, 0);
2186 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices");
2187
2188 MODULE_LICENSE("Dual BSD/GPL");
2189 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
2190 MODULE_DESCRIPTION("Compressed RAM Block Device");