Merge tag 'v5.18-p1' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
[linux-2.6-microblaze.git] / drivers / block / zram / zram_drv.c
1 /*
2  * Compressed RAM block device
3  *
4  * Copyright (C) 2008, 2009, 2010  Nitin Gupta
5  *               2012, 2013 Minchan Kim
6  *
7  * This code is released using a dual license strategy: BSD/GPL
8  * You can choose the licence that better fits your requirements.
9  *
10  * Released under the terms of 3-clause BSD License
11  * Released under the terms of GNU General Public License Version 2.0
12  *
13  */
14
15 #define KMSG_COMPONENT "zram"
16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17
18 #include <linux/module.h>
19 #include <linux/kernel.h>
20 #include <linux/bio.h>
21 #include <linux/bitops.h>
22 #include <linux/blkdev.h>
23 #include <linux/buffer_head.h>
24 #include <linux/device.h>
25 #include <linux/highmem.h>
26 #include <linux/slab.h>
27 #include <linux/backing-dev.h>
28 #include <linux/string.h>
29 #include <linux/vmalloc.h>
30 #include <linux/err.h>
31 #include <linux/idr.h>
32 #include <linux/sysfs.h>
33 #include <linux/debugfs.h>
34 #include <linux/cpuhotplug.h>
35 #include <linux/part_stat.h>
36
37 #include "zram_drv.h"
38
39 static DEFINE_IDR(zram_index_idr);
40 /* idr index must be protected */
41 static DEFINE_MUTEX(zram_index_mutex);
42
43 static int zram_major;
44 static const char *default_compressor = CONFIG_ZRAM_DEF_COMP;
45
46 /* Module params (documentation at end) */
47 static unsigned int num_devices = 1;
48 /*
49  * Pages that compress to sizes equals or greater than this are stored
50  * uncompressed in memory.
51  */
52 static size_t huge_class_size;
53
54 static const struct block_device_operations zram_devops;
55 static const struct block_device_operations zram_wb_devops;
56
57 static void zram_free_page(struct zram *zram, size_t index);
58 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
59                                 u32 index, int offset, struct bio *bio);
60
61
62 static int zram_slot_trylock(struct zram *zram, u32 index)
63 {
64         return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags);
65 }
66
67 static void zram_slot_lock(struct zram *zram, u32 index)
68 {
69         bit_spin_lock(ZRAM_LOCK, &zram->table[index].flags);
70 }
71
72 static void zram_slot_unlock(struct zram *zram, u32 index)
73 {
74         bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
75 }
76
77 static inline bool init_done(struct zram *zram)
78 {
79         return zram->disksize;
80 }
81
82 static inline struct zram *dev_to_zram(struct device *dev)
83 {
84         return (struct zram *)dev_to_disk(dev)->private_data;
85 }
86
87 static unsigned long zram_get_handle(struct zram *zram, u32 index)
88 {
89         return zram->table[index].handle;
90 }
91
92 static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle)
93 {
94         zram->table[index].handle = handle;
95 }
96
97 /* flag operations require table entry bit_spin_lock() being held */
98 static bool zram_test_flag(struct zram *zram, u32 index,
99                         enum zram_pageflags flag)
100 {
101         return zram->table[index].flags & BIT(flag);
102 }
103
104 static void zram_set_flag(struct zram *zram, u32 index,
105                         enum zram_pageflags flag)
106 {
107         zram->table[index].flags |= BIT(flag);
108 }
109
110 static void zram_clear_flag(struct zram *zram, u32 index,
111                         enum zram_pageflags flag)
112 {
113         zram->table[index].flags &= ~BIT(flag);
114 }
115
116 static inline void zram_set_element(struct zram *zram, u32 index,
117                         unsigned long element)
118 {
119         zram->table[index].element = element;
120 }
121
122 static unsigned long zram_get_element(struct zram *zram, u32 index)
123 {
124         return zram->table[index].element;
125 }
126
127 static size_t zram_get_obj_size(struct zram *zram, u32 index)
128 {
129         return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1);
130 }
131
132 static void zram_set_obj_size(struct zram *zram,
133                                         u32 index, size_t size)
134 {
135         unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT;
136
137         zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size;
138 }
139
140 static inline bool zram_allocated(struct zram *zram, u32 index)
141 {
142         return zram_get_obj_size(zram, index) ||
143                         zram_test_flag(zram, index, ZRAM_SAME) ||
144                         zram_test_flag(zram, index, ZRAM_WB);
145 }
146
147 #if PAGE_SIZE != 4096
148 static inline bool is_partial_io(struct bio_vec *bvec)
149 {
150         return bvec->bv_len != PAGE_SIZE;
151 }
152 #else
153 static inline bool is_partial_io(struct bio_vec *bvec)
154 {
155         return false;
156 }
157 #endif
158
159 /*
160  * Check if request is within bounds and aligned on zram logical blocks.
161  */
162 static inline bool valid_io_request(struct zram *zram,
163                 sector_t start, unsigned int size)
164 {
165         u64 end, bound;
166
167         /* unaligned request */
168         if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
169                 return false;
170         if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
171                 return false;
172
173         end = start + (size >> SECTOR_SHIFT);
174         bound = zram->disksize >> SECTOR_SHIFT;
175         /* out of range range */
176         if (unlikely(start >= bound || end > bound || start > end))
177                 return false;
178
179         /* I/O request is valid */
180         return true;
181 }
182
183 static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
184 {
185         *index  += (*offset + bvec->bv_len) / PAGE_SIZE;
186         *offset = (*offset + bvec->bv_len) % PAGE_SIZE;
187 }
188
189 static inline void update_used_max(struct zram *zram,
190                                         const unsigned long pages)
191 {
192         unsigned long old_max, cur_max;
193
194         old_max = atomic_long_read(&zram->stats.max_used_pages);
195
196         do {
197                 cur_max = old_max;
198                 if (pages > cur_max)
199                         old_max = atomic_long_cmpxchg(
200                                 &zram->stats.max_used_pages, cur_max, pages);
201         } while (old_max != cur_max);
202 }
203
204 static inline void zram_fill_page(void *ptr, unsigned long len,
205                                         unsigned long value)
206 {
207         WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));
208         memset_l(ptr, value, len / sizeof(unsigned long));
209 }
210
211 static bool page_same_filled(void *ptr, unsigned long *element)
212 {
213         unsigned long *page;
214         unsigned long val;
215         unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1;
216
217         page = (unsigned long *)ptr;
218         val = page[0];
219
220         if (val != page[last_pos])
221                 return false;
222
223         for (pos = 1; pos < last_pos; pos++) {
224                 if (val != page[pos])
225                         return false;
226         }
227
228         *element = val;
229
230         return true;
231 }
232
233 static ssize_t initstate_show(struct device *dev,
234                 struct device_attribute *attr, char *buf)
235 {
236         u32 val;
237         struct zram *zram = dev_to_zram(dev);
238
239         down_read(&zram->init_lock);
240         val = init_done(zram);
241         up_read(&zram->init_lock);
242
243         return scnprintf(buf, PAGE_SIZE, "%u\n", val);
244 }
245
246 static ssize_t disksize_show(struct device *dev,
247                 struct device_attribute *attr, char *buf)
248 {
249         struct zram *zram = dev_to_zram(dev);
250
251         return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
252 }
253
254 static ssize_t mem_limit_store(struct device *dev,
255                 struct device_attribute *attr, const char *buf, size_t len)
256 {
257         u64 limit;
258         char *tmp;
259         struct zram *zram = dev_to_zram(dev);
260
261         limit = memparse(buf, &tmp);
262         if (buf == tmp) /* no chars parsed, invalid input */
263                 return -EINVAL;
264
265         down_write(&zram->init_lock);
266         zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
267         up_write(&zram->init_lock);
268
269         return len;
270 }
271
272 static ssize_t mem_used_max_store(struct device *dev,
273                 struct device_attribute *attr, const char *buf, size_t len)
274 {
275         int err;
276         unsigned long val;
277         struct zram *zram = dev_to_zram(dev);
278
279         err = kstrtoul(buf, 10, &val);
280         if (err || val != 0)
281                 return -EINVAL;
282
283         down_read(&zram->init_lock);
284         if (init_done(zram)) {
285                 atomic_long_set(&zram->stats.max_used_pages,
286                                 zs_get_total_pages(zram->mem_pool));
287         }
288         up_read(&zram->init_lock);
289
290         return len;
291 }
292
293 /*
294  * Mark all pages which are older than or equal to cutoff as IDLE.
295  * Callers should hold the zram init lock in read mode
296  */
297 static void mark_idle(struct zram *zram, ktime_t cutoff)
298 {
299         int is_idle = 1;
300         unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
301         int index;
302
303         for (index = 0; index < nr_pages; index++) {
304                 /*
305                  * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
306                  * See the comment in writeback_store.
307                  */
308                 zram_slot_lock(zram, index);
309                 if (zram_allocated(zram, index) &&
310                                 !zram_test_flag(zram, index, ZRAM_UNDER_WB)) {
311 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
312                         is_idle = !cutoff || ktime_after(cutoff, zram->table[index].ac_time);
313 #endif
314                         if (is_idle)
315                                 zram_set_flag(zram, index, ZRAM_IDLE);
316                 }
317                 zram_slot_unlock(zram, index);
318         }
319 }
320
321 static ssize_t idle_store(struct device *dev,
322                 struct device_attribute *attr, const char *buf, size_t len)
323 {
324         struct zram *zram = dev_to_zram(dev);
325         ktime_t cutoff_time = 0;
326         ssize_t rv = -EINVAL;
327
328         if (!sysfs_streq(buf, "all")) {
329                 /*
330                  * If it did not parse as 'all' try to treat it as an integer when
331                  * we have memory tracking enabled.
332                  */
333                 u64 age_sec;
334
335                 if (IS_ENABLED(CONFIG_ZRAM_MEMORY_TRACKING) && !kstrtoull(buf, 0, &age_sec))
336                         cutoff_time = ktime_sub(ktime_get_boottime(),
337                                         ns_to_ktime(age_sec * NSEC_PER_SEC));
338                 else
339                         goto out;
340         }
341
342         down_read(&zram->init_lock);
343         if (!init_done(zram))
344                 goto out_unlock;
345
346         /* A cutoff_time of 0 marks everything as idle, this is the "all" behavior */
347         mark_idle(zram, cutoff_time);
348         rv = len;
349
350 out_unlock:
351         up_read(&zram->init_lock);
352 out:
353         return rv;
354 }
355
356 #ifdef CONFIG_ZRAM_WRITEBACK
357 static ssize_t writeback_limit_enable_store(struct device *dev,
358                 struct device_attribute *attr, const char *buf, size_t len)
359 {
360         struct zram *zram = dev_to_zram(dev);
361         u64 val;
362         ssize_t ret = -EINVAL;
363
364         if (kstrtoull(buf, 10, &val))
365                 return ret;
366
367         down_read(&zram->init_lock);
368         spin_lock(&zram->wb_limit_lock);
369         zram->wb_limit_enable = val;
370         spin_unlock(&zram->wb_limit_lock);
371         up_read(&zram->init_lock);
372         ret = len;
373
374         return ret;
375 }
376
377 static ssize_t writeback_limit_enable_show(struct device *dev,
378                 struct device_attribute *attr, char *buf)
379 {
380         bool val;
381         struct zram *zram = dev_to_zram(dev);
382
383         down_read(&zram->init_lock);
384         spin_lock(&zram->wb_limit_lock);
385         val = zram->wb_limit_enable;
386         spin_unlock(&zram->wb_limit_lock);
387         up_read(&zram->init_lock);
388
389         return scnprintf(buf, PAGE_SIZE, "%d\n", val);
390 }
391
392 static ssize_t writeback_limit_store(struct device *dev,
393                 struct device_attribute *attr, const char *buf, size_t len)
394 {
395         struct zram *zram = dev_to_zram(dev);
396         u64 val;
397         ssize_t ret = -EINVAL;
398
399         if (kstrtoull(buf, 10, &val))
400                 return ret;
401
402         down_read(&zram->init_lock);
403         spin_lock(&zram->wb_limit_lock);
404         zram->bd_wb_limit = val;
405         spin_unlock(&zram->wb_limit_lock);
406         up_read(&zram->init_lock);
407         ret = len;
408
409         return ret;
410 }
411
412 static ssize_t writeback_limit_show(struct device *dev,
413                 struct device_attribute *attr, char *buf)
414 {
415         u64 val;
416         struct zram *zram = dev_to_zram(dev);
417
418         down_read(&zram->init_lock);
419         spin_lock(&zram->wb_limit_lock);
420         val = zram->bd_wb_limit;
421         spin_unlock(&zram->wb_limit_lock);
422         up_read(&zram->init_lock);
423
424         return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
425 }
426
427 static void reset_bdev(struct zram *zram)
428 {
429         struct block_device *bdev;
430
431         if (!zram->backing_dev)
432                 return;
433
434         bdev = zram->bdev;
435         blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
436         /* hope filp_close flush all of IO */
437         filp_close(zram->backing_dev, NULL);
438         zram->backing_dev = NULL;
439         zram->bdev = NULL;
440         zram->disk->fops = &zram_devops;
441         kvfree(zram->bitmap);
442         zram->bitmap = NULL;
443 }
444
445 static ssize_t backing_dev_show(struct device *dev,
446                 struct device_attribute *attr, char *buf)
447 {
448         struct file *file;
449         struct zram *zram = dev_to_zram(dev);
450         char *p;
451         ssize_t ret;
452
453         down_read(&zram->init_lock);
454         file = zram->backing_dev;
455         if (!file) {
456                 memcpy(buf, "none\n", 5);
457                 up_read(&zram->init_lock);
458                 return 5;
459         }
460
461         p = file_path(file, buf, PAGE_SIZE - 1);
462         if (IS_ERR(p)) {
463                 ret = PTR_ERR(p);
464                 goto out;
465         }
466
467         ret = strlen(p);
468         memmove(buf, p, ret);
469         buf[ret++] = '\n';
470 out:
471         up_read(&zram->init_lock);
472         return ret;
473 }
474
475 static ssize_t backing_dev_store(struct device *dev,
476                 struct device_attribute *attr, const char *buf, size_t len)
477 {
478         char *file_name;
479         size_t sz;
480         struct file *backing_dev = NULL;
481         struct inode *inode;
482         struct address_space *mapping;
483         unsigned int bitmap_sz;
484         unsigned long nr_pages, *bitmap = NULL;
485         struct block_device *bdev = NULL;
486         int err;
487         struct zram *zram = dev_to_zram(dev);
488
489         file_name = kmalloc(PATH_MAX, GFP_KERNEL);
490         if (!file_name)
491                 return -ENOMEM;
492
493         down_write(&zram->init_lock);
494         if (init_done(zram)) {
495                 pr_info("Can't setup backing device for initialized device\n");
496                 err = -EBUSY;
497                 goto out;
498         }
499
500         strlcpy(file_name, buf, PATH_MAX);
501         /* ignore trailing newline */
502         sz = strlen(file_name);
503         if (sz > 0 && file_name[sz - 1] == '\n')
504                 file_name[sz - 1] = 0x00;
505
506         backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0);
507         if (IS_ERR(backing_dev)) {
508                 err = PTR_ERR(backing_dev);
509                 backing_dev = NULL;
510                 goto out;
511         }
512
513         mapping = backing_dev->f_mapping;
514         inode = mapping->host;
515
516         /* Support only block device in this moment */
517         if (!S_ISBLK(inode->i_mode)) {
518                 err = -ENOTBLK;
519                 goto out;
520         }
521
522         bdev = blkdev_get_by_dev(inode->i_rdev,
523                         FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram);
524         if (IS_ERR(bdev)) {
525                 err = PTR_ERR(bdev);
526                 bdev = NULL;
527                 goto out;
528         }
529
530         nr_pages = i_size_read(inode) >> PAGE_SHIFT;
531         bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long);
532         bitmap = kvzalloc(bitmap_sz, GFP_KERNEL);
533         if (!bitmap) {
534                 err = -ENOMEM;
535                 goto out;
536         }
537
538         reset_bdev(zram);
539
540         zram->bdev = bdev;
541         zram->backing_dev = backing_dev;
542         zram->bitmap = bitmap;
543         zram->nr_pages = nr_pages;
544         /*
545          * With writeback feature, zram does asynchronous IO so it's no longer
546          * synchronous device so let's remove synchronous io flag. Othewise,
547          * upper layer(e.g., swap) could wait IO completion rather than
548          * (submit and return), which will cause system sluggish.
549          * Furthermore, when the IO function returns(e.g., swap_readpage),
550          * upper layer expects IO was done so it could deallocate the page
551          * freely but in fact, IO is going on so finally could cause
552          * use-after-free when the IO is really done.
553          */
554         zram->disk->fops = &zram_wb_devops;
555         up_write(&zram->init_lock);
556
557         pr_info("setup backing device %s\n", file_name);
558         kfree(file_name);
559
560         return len;
561 out:
562         kvfree(bitmap);
563
564         if (bdev)
565                 blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
566
567         if (backing_dev)
568                 filp_close(backing_dev, NULL);
569
570         up_write(&zram->init_lock);
571
572         kfree(file_name);
573
574         return err;
575 }
576
577 static unsigned long alloc_block_bdev(struct zram *zram)
578 {
579         unsigned long blk_idx = 1;
580 retry:
581         /* skip 0 bit to confuse zram.handle = 0 */
582         blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, blk_idx);
583         if (blk_idx == zram->nr_pages)
584                 return 0;
585
586         if (test_and_set_bit(blk_idx, zram->bitmap))
587                 goto retry;
588
589         atomic64_inc(&zram->stats.bd_count);
590         return blk_idx;
591 }
592
593 static void free_block_bdev(struct zram *zram, unsigned long blk_idx)
594 {
595         int was_set;
596
597         was_set = test_and_clear_bit(blk_idx, zram->bitmap);
598         WARN_ON_ONCE(!was_set);
599         atomic64_dec(&zram->stats.bd_count);
600 }
601
602 static void zram_page_end_io(struct bio *bio)
603 {
604         struct page *page = bio_first_page_all(bio);
605
606         page_endio(page, op_is_write(bio_op(bio)),
607                         blk_status_to_errno(bio->bi_status));
608         bio_put(bio);
609 }
610
611 /*
612  * Returns 1 if the submission is successful.
613  */
614 static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
615                         unsigned long entry, struct bio *parent)
616 {
617         struct bio *bio;
618
619         bio = bio_alloc(zram->bdev, 1, parent ? parent->bi_opf : REQ_OP_READ,
620                         GFP_NOIO);
621         if (!bio)
622                 return -ENOMEM;
623
624         bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
625         if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) {
626                 bio_put(bio);
627                 return -EIO;
628         }
629
630         if (!parent)
631                 bio->bi_end_io = zram_page_end_io;
632         else
633                 bio_chain(bio, parent);
634
635         submit_bio(bio);
636         return 1;
637 }
638
639 #define PAGE_WB_SIG "page_index="
640
641 #define PAGE_WRITEBACK 0
642 #define HUGE_WRITEBACK 1
643 #define IDLE_WRITEBACK 2
644
645
646 static ssize_t writeback_store(struct device *dev,
647                 struct device_attribute *attr, const char *buf, size_t len)
648 {
649         struct zram *zram = dev_to_zram(dev);
650         unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
651         unsigned long index = 0;
652         struct bio bio;
653         struct bio_vec bio_vec;
654         struct page *page;
655         ssize_t ret = len;
656         int mode, err;
657         unsigned long blk_idx = 0;
658
659         if (sysfs_streq(buf, "idle"))
660                 mode = IDLE_WRITEBACK;
661         else if (sysfs_streq(buf, "huge"))
662                 mode = HUGE_WRITEBACK;
663         else {
664                 if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1))
665                         return -EINVAL;
666
667                 if (kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index) ||
668                                 index >= nr_pages)
669                         return -EINVAL;
670
671                 nr_pages = 1;
672                 mode = PAGE_WRITEBACK;
673         }
674
675         down_read(&zram->init_lock);
676         if (!init_done(zram)) {
677                 ret = -EINVAL;
678                 goto release_init_lock;
679         }
680
681         if (!zram->backing_dev) {
682                 ret = -ENODEV;
683                 goto release_init_lock;
684         }
685
686         page = alloc_page(GFP_KERNEL);
687         if (!page) {
688                 ret = -ENOMEM;
689                 goto release_init_lock;
690         }
691
692         for (; nr_pages != 0; index++, nr_pages--) {
693                 struct bio_vec bvec;
694
695                 bvec.bv_page = page;
696                 bvec.bv_len = PAGE_SIZE;
697                 bvec.bv_offset = 0;
698
699                 spin_lock(&zram->wb_limit_lock);
700                 if (zram->wb_limit_enable && !zram->bd_wb_limit) {
701                         spin_unlock(&zram->wb_limit_lock);
702                         ret = -EIO;
703                         break;
704                 }
705                 spin_unlock(&zram->wb_limit_lock);
706
707                 if (!blk_idx) {
708                         blk_idx = alloc_block_bdev(zram);
709                         if (!blk_idx) {
710                                 ret = -ENOSPC;
711                                 break;
712                         }
713                 }
714
715                 zram_slot_lock(zram, index);
716                 if (!zram_allocated(zram, index))
717                         goto next;
718
719                 if (zram_test_flag(zram, index, ZRAM_WB) ||
720                                 zram_test_flag(zram, index, ZRAM_SAME) ||
721                                 zram_test_flag(zram, index, ZRAM_UNDER_WB))
722                         goto next;
723
724                 if (mode == IDLE_WRITEBACK &&
725                           !zram_test_flag(zram, index, ZRAM_IDLE))
726                         goto next;
727                 if (mode == HUGE_WRITEBACK &&
728                           !zram_test_flag(zram, index, ZRAM_HUGE))
729                         goto next;
730                 /*
731                  * Clearing ZRAM_UNDER_WB is duty of caller.
732                  * IOW, zram_free_page never clear it.
733                  */
734                 zram_set_flag(zram, index, ZRAM_UNDER_WB);
735                 /* Need for hugepage writeback racing */
736                 zram_set_flag(zram, index, ZRAM_IDLE);
737                 zram_slot_unlock(zram, index);
738                 if (zram_bvec_read(zram, &bvec, index, 0, NULL)) {
739                         zram_slot_lock(zram, index);
740                         zram_clear_flag(zram, index, ZRAM_UNDER_WB);
741                         zram_clear_flag(zram, index, ZRAM_IDLE);
742                         zram_slot_unlock(zram, index);
743                         continue;
744                 }
745
746                 bio_init(&bio, zram->bdev, &bio_vec, 1,
747                          REQ_OP_WRITE | REQ_SYNC);
748                 bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9);
749
750                 bio_add_page(&bio, bvec.bv_page, bvec.bv_len,
751                                 bvec.bv_offset);
752                 /*
753                  * XXX: A single page IO would be inefficient for write
754                  * but it would be not bad as starter.
755                  */
756                 err = submit_bio_wait(&bio);
757                 if (err) {
758                         zram_slot_lock(zram, index);
759                         zram_clear_flag(zram, index, ZRAM_UNDER_WB);
760                         zram_clear_flag(zram, index, ZRAM_IDLE);
761                         zram_slot_unlock(zram, index);
762                         /*
763                          * Return last IO error unless every IO were
764                          * not suceeded.
765                          */
766                         ret = err;
767                         continue;
768                 }
769
770                 atomic64_inc(&zram->stats.bd_writes);
771                 /*
772                  * We released zram_slot_lock so need to check if the slot was
773                  * changed. If there is freeing for the slot, we can catch it
774                  * easily by zram_allocated.
775                  * A subtle case is the slot is freed/reallocated/marked as
776                  * ZRAM_IDLE again. To close the race, idle_store doesn't
777                  * mark ZRAM_IDLE once it found the slot was ZRAM_UNDER_WB.
778                  * Thus, we could close the race by checking ZRAM_IDLE bit.
779                  */
780                 zram_slot_lock(zram, index);
781                 if (!zram_allocated(zram, index) ||
782                           !zram_test_flag(zram, index, ZRAM_IDLE)) {
783                         zram_clear_flag(zram, index, ZRAM_UNDER_WB);
784                         zram_clear_flag(zram, index, ZRAM_IDLE);
785                         goto next;
786                 }
787
788                 zram_free_page(zram, index);
789                 zram_clear_flag(zram, index, ZRAM_UNDER_WB);
790                 zram_set_flag(zram, index, ZRAM_WB);
791                 zram_set_element(zram, index, blk_idx);
792                 blk_idx = 0;
793                 atomic64_inc(&zram->stats.pages_stored);
794                 spin_lock(&zram->wb_limit_lock);
795                 if (zram->wb_limit_enable && zram->bd_wb_limit > 0)
796                         zram->bd_wb_limit -=  1UL << (PAGE_SHIFT - 12);
797                 spin_unlock(&zram->wb_limit_lock);
798 next:
799                 zram_slot_unlock(zram, index);
800         }
801
802         if (blk_idx)
803                 free_block_bdev(zram, blk_idx);
804         __free_page(page);
805 release_init_lock:
806         up_read(&zram->init_lock);
807
808         return ret;
809 }
810
811 struct zram_work {
812         struct work_struct work;
813         struct zram *zram;
814         unsigned long entry;
815         struct bio *bio;
816         struct bio_vec bvec;
817 };
818
819 #if PAGE_SIZE != 4096
820 static void zram_sync_read(struct work_struct *work)
821 {
822         struct zram_work *zw = container_of(work, struct zram_work, work);
823         struct zram *zram = zw->zram;
824         unsigned long entry = zw->entry;
825         struct bio *bio = zw->bio;
826
827         read_from_bdev_async(zram, &zw->bvec, entry, bio);
828 }
829
830 /*
831  * Block layer want one ->submit_bio to be active at a time, so if we use
832  * chained IO with parent IO in same context, it's a deadlock. To avoid that,
833  * use a worker thread context.
834  */
835 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
836                                 unsigned long entry, struct bio *bio)
837 {
838         struct zram_work work;
839
840         work.bvec = *bvec;
841         work.zram = zram;
842         work.entry = entry;
843         work.bio = bio;
844
845         INIT_WORK_ONSTACK(&work.work, zram_sync_read);
846         queue_work(system_unbound_wq, &work.work);
847         flush_work(&work.work);
848         destroy_work_on_stack(&work.work);
849
850         return 1;
851 }
852 #else
853 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
854                                 unsigned long entry, struct bio *bio)
855 {
856         WARN_ON(1);
857         return -EIO;
858 }
859 #endif
860
861 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
862                         unsigned long entry, struct bio *parent, bool sync)
863 {
864         atomic64_inc(&zram->stats.bd_reads);
865         if (sync)
866                 return read_from_bdev_sync(zram, bvec, entry, parent);
867         else
868                 return read_from_bdev_async(zram, bvec, entry, parent);
869 }
870 #else
871 static inline void reset_bdev(struct zram *zram) {};
872 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
873                         unsigned long entry, struct bio *parent, bool sync)
874 {
875         return -EIO;
876 }
877
878 static void free_block_bdev(struct zram *zram, unsigned long blk_idx) {};
879 #endif
880
881 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
882
883 static struct dentry *zram_debugfs_root;
884
885 static void zram_debugfs_create(void)
886 {
887         zram_debugfs_root = debugfs_create_dir("zram", NULL);
888 }
889
890 static void zram_debugfs_destroy(void)
891 {
892         debugfs_remove_recursive(zram_debugfs_root);
893 }
894
895 static void zram_accessed(struct zram *zram, u32 index)
896 {
897         zram_clear_flag(zram, index, ZRAM_IDLE);
898         zram->table[index].ac_time = ktime_get_boottime();
899 }
900
901 static ssize_t read_block_state(struct file *file, char __user *buf,
902                                 size_t count, loff_t *ppos)
903 {
904         char *kbuf;
905         ssize_t index, written = 0;
906         struct zram *zram = file->private_data;
907         unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
908         struct timespec64 ts;
909
910         kbuf = kvmalloc(count, GFP_KERNEL);
911         if (!kbuf)
912                 return -ENOMEM;
913
914         down_read(&zram->init_lock);
915         if (!init_done(zram)) {
916                 up_read(&zram->init_lock);
917                 kvfree(kbuf);
918                 return -EINVAL;
919         }
920
921         for (index = *ppos; index < nr_pages; index++) {
922                 int copied;
923
924                 zram_slot_lock(zram, index);
925                 if (!zram_allocated(zram, index))
926                         goto next;
927
928                 ts = ktime_to_timespec64(zram->table[index].ac_time);
929                 copied = snprintf(kbuf + written, count,
930                         "%12zd %12lld.%06lu %c%c%c%c\n",
931                         index, (s64)ts.tv_sec,
932                         ts.tv_nsec / NSEC_PER_USEC,
933                         zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.',
934                         zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.',
935                         zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.',
936                         zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.');
937
938                 if (count <= copied) {
939                         zram_slot_unlock(zram, index);
940                         break;
941                 }
942                 written += copied;
943                 count -= copied;
944 next:
945                 zram_slot_unlock(zram, index);
946                 *ppos += 1;
947         }
948
949         up_read(&zram->init_lock);
950         if (copy_to_user(buf, kbuf, written))
951                 written = -EFAULT;
952         kvfree(kbuf);
953
954         return written;
955 }
956
957 static const struct file_operations proc_zram_block_state_op = {
958         .open = simple_open,
959         .read = read_block_state,
960         .llseek = default_llseek,
961 };
962
963 static void zram_debugfs_register(struct zram *zram)
964 {
965         if (!zram_debugfs_root)
966                 return;
967
968         zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name,
969                                                 zram_debugfs_root);
970         debugfs_create_file("block_state", 0400, zram->debugfs_dir,
971                                 zram, &proc_zram_block_state_op);
972 }
973
974 static void zram_debugfs_unregister(struct zram *zram)
975 {
976         debugfs_remove_recursive(zram->debugfs_dir);
977 }
978 #else
979 static void zram_debugfs_create(void) {};
980 static void zram_debugfs_destroy(void) {};
981 static void zram_accessed(struct zram *zram, u32 index)
982 {
983         zram_clear_flag(zram, index, ZRAM_IDLE);
984 };
985 static void zram_debugfs_register(struct zram *zram) {};
986 static void zram_debugfs_unregister(struct zram *zram) {};
987 #endif
988
989 /*
990  * We switched to per-cpu streams and this attr is not needed anymore.
991  * However, we will keep it around for some time, because:
992  * a) we may revert per-cpu streams in the future
993  * b) it's visible to user space and we need to follow our 2 years
994  *    retirement rule; but we already have a number of 'soon to be
995  *    altered' attrs, so max_comp_streams need to wait for the next
996  *    layoff cycle.
997  */
998 static ssize_t max_comp_streams_show(struct device *dev,
999                 struct device_attribute *attr, char *buf)
1000 {
1001         return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus());
1002 }
1003
1004 static ssize_t max_comp_streams_store(struct device *dev,
1005                 struct device_attribute *attr, const char *buf, size_t len)
1006 {
1007         return len;
1008 }
1009
1010 static ssize_t comp_algorithm_show(struct device *dev,
1011                 struct device_attribute *attr, char *buf)
1012 {
1013         size_t sz;
1014         struct zram *zram = dev_to_zram(dev);
1015
1016         down_read(&zram->init_lock);
1017         sz = zcomp_available_show(zram->compressor, buf);
1018         up_read(&zram->init_lock);
1019
1020         return sz;
1021 }
1022
1023 static ssize_t comp_algorithm_store(struct device *dev,
1024                 struct device_attribute *attr, const char *buf, size_t len)
1025 {
1026         struct zram *zram = dev_to_zram(dev);
1027         char compressor[ARRAY_SIZE(zram->compressor)];
1028         size_t sz;
1029
1030         strlcpy(compressor, buf, sizeof(compressor));
1031         /* ignore trailing newline */
1032         sz = strlen(compressor);
1033         if (sz > 0 && compressor[sz - 1] == '\n')
1034                 compressor[sz - 1] = 0x00;
1035
1036         if (!zcomp_available_algorithm(compressor))
1037                 return -EINVAL;
1038
1039         down_write(&zram->init_lock);
1040         if (init_done(zram)) {
1041                 up_write(&zram->init_lock);
1042                 pr_info("Can't change algorithm for initialized device\n");
1043                 return -EBUSY;
1044         }
1045
1046         strcpy(zram->compressor, compressor);
1047         up_write(&zram->init_lock);
1048         return len;
1049 }
1050
1051 static ssize_t compact_store(struct device *dev,
1052                 struct device_attribute *attr, const char *buf, size_t len)
1053 {
1054         struct zram *zram = dev_to_zram(dev);
1055
1056         down_read(&zram->init_lock);
1057         if (!init_done(zram)) {
1058                 up_read(&zram->init_lock);
1059                 return -EINVAL;
1060         }
1061
1062         zs_compact(zram->mem_pool);
1063         up_read(&zram->init_lock);
1064
1065         return len;
1066 }
1067
1068 static ssize_t io_stat_show(struct device *dev,
1069                 struct device_attribute *attr, char *buf)
1070 {
1071         struct zram *zram = dev_to_zram(dev);
1072         ssize_t ret;
1073
1074         down_read(&zram->init_lock);
1075         ret = scnprintf(buf, PAGE_SIZE,
1076                         "%8llu %8llu %8llu %8llu\n",
1077                         (u64)atomic64_read(&zram->stats.failed_reads),
1078                         (u64)atomic64_read(&zram->stats.failed_writes),
1079                         (u64)atomic64_read(&zram->stats.invalid_io),
1080                         (u64)atomic64_read(&zram->stats.notify_free));
1081         up_read(&zram->init_lock);
1082
1083         return ret;
1084 }
1085
1086 static ssize_t mm_stat_show(struct device *dev,
1087                 struct device_attribute *attr, char *buf)
1088 {
1089         struct zram *zram = dev_to_zram(dev);
1090         struct zs_pool_stats pool_stats;
1091         u64 orig_size, mem_used = 0;
1092         long max_used;
1093         ssize_t ret;
1094
1095         memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));
1096
1097         down_read(&zram->init_lock);
1098         if (init_done(zram)) {
1099                 mem_used = zs_get_total_pages(zram->mem_pool);
1100                 zs_pool_stats(zram->mem_pool, &pool_stats);
1101         }
1102
1103         orig_size = atomic64_read(&zram->stats.pages_stored);
1104         max_used = atomic_long_read(&zram->stats.max_used_pages);
1105
1106         ret = scnprintf(buf, PAGE_SIZE,
1107                         "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n",
1108                         orig_size << PAGE_SHIFT,
1109                         (u64)atomic64_read(&zram->stats.compr_data_size),
1110                         mem_used << PAGE_SHIFT,
1111                         zram->limit_pages << PAGE_SHIFT,
1112                         max_used << PAGE_SHIFT,
1113                         (u64)atomic64_read(&zram->stats.same_pages),
1114                         atomic_long_read(&pool_stats.pages_compacted),
1115                         (u64)atomic64_read(&zram->stats.huge_pages),
1116                         (u64)atomic64_read(&zram->stats.huge_pages_since));
1117         up_read(&zram->init_lock);
1118
1119         return ret;
1120 }
1121
1122 #ifdef CONFIG_ZRAM_WRITEBACK
1123 #define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12)))
1124 static ssize_t bd_stat_show(struct device *dev,
1125                 struct device_attribute *attr, char *buf)
1126 {
1127         struct zram *zram = dev_to_zram(dev);
1128         ssize_t ret;
1129
1130         down_read(&zram->init_lock);
1131         ret = scnprintf(buf, PAGE_SIZE,
1132                 "%8llu %8llu %8llu\n",
1133                         FOUR_K((u64)atomic64_read(&zram->stats.bd_count)),
1134                         FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)),
1135                         FOUR_K((u64)atomic64_read(&zram->stats.bd_writes)));
1136         up_read(&zram->init_lock);
1137
1138         return ret;
1139 }
1140 #endif
1141
1142 static ssize_t debug_stat_show(struct device *dev,
1143                 struct device_attribute *attr, char *buf)
1144 {
1145         int version = 1;
1146         struct zram *zram = dev_to_zram(dev);
1147         ssize_t ret;
1148
1149         down_read(&zram->init_lock);
1150         ret = scnprintf(buf, PAGE_SIZE,
1151                         "version: %d\n%8llu %8llu\n",
1152                         version,
1153                         (u64)atomic64_read(&zram->stats.writestall),
1154                         (u64)atomic64_read(&zram->stats.miss_free));
1155         up_read(&zram->init_lock);
1156
1157         return ret;
1158 }
1159
1160 static DEVICE_ATTR_RO(io_stat);
1161 static DEVICE_ATTR_RO(mm_stat);
1162 #ifdef CONFIG_ZRAM_WRITEBACK
1163 static DEVICE_ATTR_RO(bd_stat);
1164 #endif
1165 static DEVICE_ATTR_RO(debug_stat);
1166
1167 static void zram_meta_free(struct zram *zram, u64 disksize)
1168 {
1169         size_t num_pages = disksize >> PAGE_SHIFT;
1170         size_t index;
1171
1172         /* Free all pages that are still in this zram device */
1173         for (index = 0; index < num_pages; index++)
1174                 zram_free_page(zram, index);
1175
1176         zs_destroy_pool(zram->mem_pool);
1177         vfree(zram->table);
1178 }
1179
1180 static bool zram_meta_alloc(struct zram *zram, u64 disksize)
1181 {
1182         size_t num_pages;
1183
1184         num_pages = disksize >> PAGE_SHIFT;
1185         zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table)));
1186         if (!zram->table)
1187                 return false;
1188
1189         zram->mem_pool = zs_create_pool(zram->disk->disk_name);
1190         if (!zram->mem_pool) {
1191                 vfree(zram->table);
1192                 return false;
1193         }
1194
1195         if (!huge_class_size)
1196                 huge_class_size = zs_huge_class_size(zram->mem_pool);
1197         return true;
1198 }
1199
1200 /*
1201  * To protect concurrent access to the same index entry,
1202  * caller should hold this table index entry's bit_spinlock to
1203  * indicate this index entry is accessing.
1204  */
1205 static void zram_free_page(struct zram *zram, size_t index)
1206 {
1207         unsigned long handle;
1208
1209 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
1210         zram->table[index].ac_time = 0;
1211 #endif
1212         if (zram_test_flag(zram, index, ZRAM_IDLE))
1213                 zram_clear_flag(zram, index, ZRAM_IDLE);
1214
1215         if (zram_test_flag(zram, index, ZRAM_HUGE)) {
1216                 zram_clear_flag(zram, index, ZRAM_HUGE);
1217                 atomic64_dec(&zram->stats.huge_pages);
1218         }
1219
1220         if (zram_test_flag(zram, index, ZRAM_WB)) {
1221                 zram_clear_flag(zram, index, ZRAM_WB);
1222                 free_block_bdev(zram, zram_get_element(zram, index));
1223                 goto out;
1224         }
1225
1226         /*
1227          * No memory is allocated for same element filled pages.
1228          * Simply clear same page flag.
1229          */
1230         if (zram_test_flag(zram, index, ZRAM_SAME)) {
1231                 zram_clear_flag(zram, index, ZRAM_SAME);
1232                 atomic64_dec(&zram->stats.same_pages);
1233                 goto out;
1234         }
1235
1236         handle = zram_get_handle(zram, index);
1237         if (!handle)
1238                 return;
1239
1240         zs_free(zram->mem_pool, handle);
1241
1242         atomic64_sub(zram_get_obj_size(zram, index),
1243                         &zram->stats.compr_data_size);
1244 out:
1245         atomic64_dec(&zram->stats.pages_stored);
1246         zram_set_handle(zram, index, 0);
1247         zram_set_obj_size(zram, index, 0);
1248         WARN_ON_ONCE(zram->table[index].flags &
1249                 ~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB));
1250 }
1251
1252 static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
1253                                 struct bio *bio, bool partial_io)
1254 {
1255         struct zcomp_strm *zstrm;
1256         unsigned long handle;
1257         unsigned int size;
1258         void *src, *dst;
1259         int ret;
1260
1261         zram_slot_lock(zram, index);
1262         if (zram_test_flag(zram, index, ZRAM_WB)) {
1263                 struct bio_vec bvec;
1264
1265                 zram_slot_unlock(zram, index);
1266
1267                 bvec.bv_page = page;
1268                 bvec.bv_len = PAGE_SIZE;
1269                 bvec.bv_offset = 0;
1270                 return read_from_bdev(zram, &bvec,
1271                                 zram_get_element(zram, index),
1272                                 bio, partial_io);
1273         }
1274
1275         handle = zram_get_handle(zram, index);
1276         if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) {
1277                 unsigned long value;
1278                 void *mem;
1279
1280                 value = handle ? zram_get_element(zram, index) : 0;
1281                 mem = kmap_atomic(page);
1282                 zram_fill_page(mem, PAGE_SIZE, value);
1283                 kunmap_atomic(mem);
1284                 zram_slot_unlock(zram, index);
1285                 return 0;
1286         }
1287
1288         size = zram_get_obj_size(zram, index);
1289
1290         if (size != PAGE_SIZE)
1291                 zstrm = zcomp_stream_get(zram->comp);
1292
1293         src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
1294         if (size == PAGE_SIZE) {
1295                 dst = kmap_atomic(page);
1296                 memcpy(dst, src, PAGE_SIZE);
1297                 kunmap_atomic(dst);
1298                 ret = 0;
1299         } else {
1300                 dst = kmap_atomic(page);
1301                 ret = zcomp_decompress(zstrm, src, size, dst);
1302                 kunmap_atomic(dst);
1303                 zcomp_stream_put(zram->comp);
1304         }
1305         zs_unmap_object(zram->mem_pool, handle);
1306         zram_slot_unlock(zram, index);
1307
1308         /* Should NEVER happen. Return bio error if it does. */
1309         if (WARN_ON(ret))
1310                 pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
1311
1312         return ret;
1313 }
1314
1315 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
1316                                 u32 index, int offset, struct bio *bio)
1317 {
1318         int ret;
1319         struct page *page;
1320
1321         page = bvec->bv_page;
1322         if (is_partial_io(bvec)) {
1323                 /* Use a temporary buffer to decompress the page */
1324                 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
1325                 if (!page)
1326                         return -ENOMEM;
1327         }
1328
1329         ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec));
1330         if (unlikely(ret))
1331                 goto out;
1332
1333         if (is_partial_io(bvec)) {
1334                 void *src = kmap_atomic(page);
1335
1336                 memcpy_to_bvec(bvec, src + offset);
1337                 kunmap_atomic(src);
1338         }
1339 out:
1340         if (is_partial_io(bvec))
1341                 __free_page(page);
1342
1343         return ret;
1344 }
1345
1346 static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
1347                                 u32 index, struct bio *bio)
1348 {
1349         int ret = 0;
1350         unsigned long alloced_pages;
1351         unsigned long handle = 0;
1352         unsigned int comp_len = 0;
1353         void *src, *dst, *mem;
1354         struct zcomp_strm *zstrm;
1355         struct page *page = bvec->bv_page;
1356         unsigned long element = 0;
1357         enum zram_pageflags flags = 0;
1358
1359         mem = kmap_atomic(page);
1360         if (page_same_filled(mem, &element)) {
1361                 kunmap_atomic(mem);
1362                 /* Free memory associated with this sector now. */
1363                 flags = ZRAM_SAME;
1364                 atomic64_inc(&zram->stats.same_pages);
1365                 goto out;
1366         }
1367         kunmap_atomic(mem);
1368
1369 compress_again:
1370         zstrm = zcomp_stream_get(zram->comp);
1371         src = kmap_atomic(page);
1372         ret = zcomp_compress(zstrm, src, &comp_len);
1373         kunmap_atomic(src);
1374
1375         if (unlikely(ret)) {
1376                 zcomp_stream_put(zram->comp);
1377                 pr_err("Compression failed! err=%d\n", ret);
1378                 zs_free(zram->mem_pool, handle);
1379                 return ret;
1380         }
1381
1382         if (comp_len >= huge_class_size)
1383                 comp_len = PAGE_SIZE;
1384         /*
1385          * handle allocation has 2 paths:
1386          * a) fast path is executed with preemption disabled (for
1387          *  per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
1388          *  since we can't sleep;
1389          * b) slow path enables preemption and attempts to allocate
1390          *  the page with __GFP_DIRECT_RECLAIM bit set. we have to
1391          *  put per-cpu compression stream and, thus, to re-do
1392          *  the compression once handle is allocated.
1393          *
1394          * if we have a 'non-null' handle here then we are coming
1395          * from the slow path and handle has already been allocated.
1396          */
1397         if (!handle)
1398                 handle = zs_malloc(zram->mem_pool, comp_len,
1399                                 __GFP_KSWAPD_RECLAIM |
1400                                 __GFP_NOWARN |
1401                                 __GFP_HIGHMEM |
1402                                 __GFP_MOVABLE);
1403         if (!handle) {
1404                 zcomp_stream_put(zram->comp);
1405                 atomic64_inc(&zram->stats.writestall);
1406                 handle = zs_malloc(zram->mem_pool, comp_len,
1407                                 GFP_NOIO | __GFP_HIGHMEM |
1408                                 __GFP_MOVABLE);
1409                 if (handle)
1410                         goto compress_again;
1411                 return -ENOMEM;
1412         }
1413
1414         alloced_pages = zs_get_total_pages(zram->mem_pool);
1415         update_used_max(zram, alloced_pages);
1416
1417         if (zram->limit_pages && alloced_pages > zram->limit_pages) {
1418                 zcomp_stream_put(zram->comp);
1419                 zs_free(zram->mem_pool, handle);
1420                 return -ENOMEM;
1421         }
1422
1423         dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO);
1424
1425         src = zstrm->buffer;
1426         if (comp_len == PAGE_SIZE)
1427                 src = kmap_atomic(page);
1428         memcpy(dst, src, comp_len);
1429         if (comp_len == PAGE_SIZE)
1430                 kunmap_atomic(src);
1431
1432         zcomp_stream_put(zram->comp);
1433         zs_unmap_object(zram->mem_pool, handle);
1434         atomic64_add(comp_len, &zram->stats.compr_data_size);
1435 out:
1436         /*
1437          * Free memory associated with this sector
1438          * before overwriting unused sectors.
1439          */
1440         zram_slot_lock(zram, index);
1441         zram_free_page(zram, index);
1442
1443         if (comp_len == PAGE_SIZE) {
1444                 zram_set_flag(zram, index, ZRAM_HUGE);
1445                 atomic64_inc(&zram->stats.huge_pages);
1446                 atomic64_inc(&zram->stats.huge_pages_since);
1447         }
1448
1449         if (flags) {
1450                 zram_set_flag(zram, index, flags);
1451                 zram_set_element(zram, index, element);
1452         }  else {
1453                 zram_set_handle(zram, index, handle);
1454                 zram_set_obj_size(zram, index, comp_len);
1455         }
1456         zram_slot_unlock(zram, index);
1457
1458         /* Update stats */
1459         atomic64_inc(&zram->stats.pages_stored);
1460         return ret;
1461 }
1462
1463 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
1464                                 u32 index, int offset, struct bio *bio)
1465 {
1466         int ret;
1467         struct page *page = NULL;
1468         struct bio_vec vec;
1469
1470         vec = *bvec;
1471         if (is_partial_io(bvec)) {
1472                 void *dst;
1473                 /*
1474                  * This is a partial IO. We need to read the full page
1475                  * before to write the changes.
1476                  */
1477                 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
1478                 if (!page)
1479                         return -ENOMEM;
1480
1481                 ret = __zram_bvec_read(zram, page, index, bio, true);
1482                 if (ret)
1483                         goto out;
1484
1485                 dst = kmap_atomic(page);
1486                 memcpy_from_bvec(dst + offset, bvec);
1487                 kunmap_atomic(dst);
1488
1489                 vec.bv_page = page;
1490                 vec.bv_len = PAGE_SIZE;
1491                 vec.bv_offset = 0;
1492         }
1493
1494         ret = __zram_bvec_write(zram, &vec, index, bio);
1495 out:
1496         if (is_partial_io(bvec))
1497                 __free_page(page);
1498         return ret;
1499 }
1500
1501 /*
1502  * zram_bio_discard - handler on discard request
1503  * @index: physical block index in PAGE_SIZE units
1504  * @offset: byte offset within physical block
1505  */
1506 static void zram_bio_discard(struct zram *zram, u32 index,
1507                              int offset, struct bio *bio)
1508 {
1509         size_t n = bio->bi_iter.bi_size;
1510
1511         /*
1512          * zram manages data in physical block size units. Because logical block
1513          * size isn't identical with physical block size on some arch, we
1514          * could get a discard request pointing to a specific offset within a
1515          * certain physical block.  Although we can handle this request by
1516          * reading that physiclal block and decompressing and partially zeroing
1517          * and re-compressing and then re-storing it, this isn't reasonable
1518          * because our intent with a discard request is to save memory.  So
1519          * skipping this logical block is appropriate here.
1520          */
1521         if (offset) {
1522                 if (n <= (PAGE_SIZE - offset))
1523                         return;
1524
1525                 n -= (PAGE_SIZE - offset);
1526                 index++;
1527         }
1528
1529         while (n >= PAGE_SIZE) {
1530                 zram_slot_lock(zram, index);
1531                 zram_free_page(zram, index);
1532                 zram_slot_unlock(zram, index);
1533                 atomic64_inc(&zram->stats.notify_free);
1534                 index++;
1535                 n -= PAGE_SIZE;
1536         }
1537 }
1538
1539 /*
1540  * Returns errno if it has some problem. Otherwise return 0 or 1.
1541  * Returns 0 if IO request was done synchronously
1542  * Returns 1 if IO request was successfully submitted.
1543  */
1544 static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
1545                         int offset, unsigned int op, struct bio *bio)
1546 {
1547         int ret;
1548
1549         if (!op_is_write(op)) {
1550                 atomic64_inc(&zram->stats.num_reads);
1551                 ret = zram_bvec_read(zram, bvec, index, offset, bio);
1552                 flush_dcache_page(bvec->bv_page);
1553         } else {
1554                 atomic64_inc(&zram->stats.num_writes);
1555                 ret = zram_bvec_write(zram, bvec, index, offset, bio);
1556         }
1557
1558         zram_slot_lock(zram, index);
1559         zram_accessed(zram, index);
1560         zram_slot_unlock(zram, index);
1561
1562         if (unlikely(ret < 0)) {
1563                 if (!op_is_write(op))
1564                         atomic64_inc(&zram->stats.failed_reads);
1565                 else
1566                         atomic64_inc(&zram->stats.failed_writes);
1567         }
1568
1569         return ret;
1570 }
1571
1572 static void __zram_make_request(struct zram *zram, struct bio *bio)
1573 {
1574         int offset;
1575         u32 index;
1576         struct bio_vec bvec;
1577         struct bvec_iter iter;
1578         unsigned long start_time;
1579
1580         index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
1581         offset = (bio->bi_iter.bi_sector &
1582                   (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
1583
1584         switch (bio_op(bio)) {
1585         case REQ_OP_DISCARD:
1586         case REQ_OP_WRITE_ZEROES:
1587                 zram_bio_discard(zram, index, offset, bio);
1588                 bio_endio(bio);
1589                 return;
1590         default:
1591                 break;
1592         }
1593
1594         start_time = bio_start_io_acct(bio);
1595         bio_for_each_segment(bvec, bio, iter) {
1596                 struct bio_vec bv = bvec;
1597                 unsigned int unwritten = bvec.bv_len;
1598
1599                 do {
1600                         bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset,
1601                                                         unwritten);
1602                         if (zram_bvec_rw(zram, &bv, index, offset,
1603                                          bio_op(bio), bio) < 0) {
1604                                 bio->bi_status = BLK_STS_IOERR;
1605                                 break;
1606                         }
1607
1608                         bv.bv_offset += bv.bv_len;
1609                         unwritten -= bv.bv_len;
1610
1611                         update_position(&index, &offset, &bv);
1612                 } while (unwritten);
1613         }
1614         bio_end_io_acct(bio, start_time);
1615         bio_endio(bio);
1616 }
1617
1618 /*
1619  * Handler function for all zram I/O requests.
1620  */
1621 static void zram_submit_bio(struct bio *bio)
1622 {
1623         struct zram *zram = bio->bi_bdev->bd_disk->private_data;
1624
1625         if (!valid_io_request(zram, bio->bi_iter.bi_sector,
1626                                         bio->bi_iter.bi_size)) {
1627                 atomic64_inc(&zram->stats.invalid_io);
1628                 bio_io_error(bio);
1629                 return;
1630         }
1631
1632         __zram_make_request(zram, bio);
1633 }
1634
1635 static void zram_slot_free_notify(struct block_device *bdev,
1636                                 unsigned long index)
1637 {
1638         struct zram *zram;
1639
1640         zram = bdev->bd_disk->private_data;
1641
1642         atomic64_inc(&zram->stats.notify_free);
1643         if (!zram_slot_trylock(zram, index)) {
1644                 atomic64_inc(&zram->stats.miss_free);
1645                 return;
1646         }
1647
1648         zram_free_page(zram, index);
1649         zram_slot_unlock(zram, index);
1650 }
1651
1652 static int zram_rw_page(struct block_device *bdev, sector_t sector,
1653                        struct page *page, unsigned int op)
1654 {
1655         int offset, ret;
1656         u32 index;
1657         struct zram *zram;
1658         struct bio_vec bv;
1659         unsigned long start_time;
1660
1661         if (PageTransHuge(page))
1662                 return -ENOTSUPP;
1663         zram = bdev->bd_disk->private_data;
1664
1665         if (!valid_io_request(zram, sector, PAGE_SIZE)) {
1666                 atomic64_inc(&zram->stats.invalid_io);
1667                 ret = -EINVAL;
1668                 goto out;
1669         }
1670
1671         index = sector >> SECTORS_PER_PAGE_SHIFT;
1672         offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
1673
1674         bv.bv_page = page;
1675         bv.bv_len = PAGE_SIZE;
1676         bv.bv_offset = 0;
1677
1678         start_time = disk_start_io_acct(bdev->bd_disk, SECTORS_PER_PAGE, op);
1679         ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL);
1680         disk_end_io_acct(bdev->bd_disk, op, start_time);
1681 out:
1682         /*
1683          * If I/O fails, just return error(ie, non-zero) without
1684          * calling page_endio.
1685          * It causes resubmit the I/O with bio request by upper functions
1686          * of rw_page(e.g., swap_readpage, __swap_writepage) and
1687          * bio->bi_end_io does things to handle the error
1688          * (e.g., SetPageError, set_page_dirty and extra works).
1689          */
1690         if (unlikely(ret < 0))
1691                 return ret;
1692
1693         switch (ret) {
1694         case 0:
1695                 page_endio(page, op_is_write(op), 0);
1696                 break;
1697         case 1:
1698                 ret = 0;
1699                 break;
1700         default:
1701                 WARN_ON(1);
1702         }
1703         return ret;
1704 }
1705
1706 static void zram_reset_device(struct zram *zram)
1707 {
1708         struct zcomp *comp;
1709         u64 disksize;
1710
1711         down_write(&zram->init_lock);
1712
1713         zram->limit_pages = 0;
1714
1715         if (!init_done(zram)) {
1716                 up_write(&zram->init_lock);
1717                 return;
1718         }
1719
1720         comp = zram->comp;
1721         disksize = zram->disksize;
1722         zram->disksize = 0;
1723
1724         set_capacity_and_notify(zram->disk, 0);
1725         part_stat_set_all(zram->disk->part0, 0);
1726
1727         /* I/O operation under all of CPU are done so let's free */
1728         zram_meta_free(zram, disksize);
1729         memset(&zram->stats, 0, sizeof(zram->stats));
1730         zcomp_destroy(comp);
1731         reset_bdev(zram);
1732
1733         up_write(&zram->init_lock);
1734 }
1735
1736 static ssize_t disksize_store(struct device *dev,
1737                 struct device_attribute *attr, const char *buf, size_t len)
1738 {
1739         u64 disksize;
1740         struct zcomp *comp;
1741         struct zram *zram = dev_to_zram(dev);
1742         int err;
1743
1744         disksize = memparse(buf, NULL);
1745         if (!disksize)
1746                 return -EINVAL;
1747
1748         down_write(&zram->init_lock);
1749         if (init_done(zram)) {
1750                 pr_info("Cannot change disksize for initialized device\n");
1751                 err = -EBUSY;
1752                 goto out_unlock;
1753         }
1754
1755         disksize = PAGE_ALIGN(disksize);
1756         if (!zram_meta_alloc(zram, disksize)) {
1757                 err = -ENOMEM;
1758                 goto out_unlock;
1759         }
1760
1761         comp = zcomp_create(zram->compressor);
1762         if (IS_ERR(comp)) {
1763                 pr_err("Cannot initialise %s compressing backend\n",
1764                                 zram->compressor);
1765                 err = PTR_ERR(comp);
1766                 goto out_free_meta;
1767         }
1768
1769         zram->comp = comp;
1770         zram->disksize = disksize;
1771         set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT);
1772         up_write(&zram->init_lock);
1773
1774         return len;
1775
1776 out_free_meta:
1777         zram_meta_free(zram, disksize);
1778 out_unlock:
1779         up_write(&zram->init_lock);
1780         return err;
1781 }
1782
1783 static ssize_t reset_store(struct device *dev,
1784                 struct device_attribute *attr, const char *buf, size_t len)
1785 {
1786         int ret;
1787         unsigned short do_reset;
1788         struct zram *zram;
1789         struct block_device *bdev;
1790
1791         ret = kstrtou16(buf, 10, &do_reset);
1792         if (ret)
1793                 return ret;
1794
1795         if (!do_reset)
1796                 return -EINVAL;
1797
1798         zram = dev_to_zram(dev);
1799         bdev = zram->disk->part0;
1800
1801         mutex_lock(&bdev->bd_disk->open_mutex);
1802         /* Do not reset an active device or claimed device */
1803         if (bdev->bd_openers || zram->claim) {
1804                 mutex_unlock(&bdev->bd_disk->open_mutex);
1805                 return -EBUSY;
1806         }
1807
1808         /* From now on, anyone can't open /dev/zram[0-9] */
1809         zram->claim = true;
1810         mutex_unlock(&bdev->bd_disk->open_mutex);
1811
1812         /* Make sure all the pending I/O are finished */
1813         sync_blockdev(bdev);
1814         zram_reset_device(zram);
1815
1816         mutex_lock(&bdev->bd_disk->open_mutex);
1817         zram->claim = false;
1818         mutex_unlock(&bdev->bd_disk->open_mutex);
1819
1820         return len;
1821 }
1822
1823 static int zram_open(struct block_device *bdev, fmode_t mode)
1824 {
1825         int ret = 0;
1826         struct zram *zram;
1827
1828         WARN_ON(!mutex_is_locked(&bdev->bd_disk->open_mutex));
1829
1830         zram = bdev->bd_disk->private_data;
1831         /* zram was claimed to reset so open request fails */
1832         if (zram->claim)
1833                 ret = -EBUSY;
1834
1835         return ret;
1836 }
1837
1838 static const struct block_device_operations zram_devops = {
1839         .open = zram_open,
1840         .submit_bio = zram_submit_bio,
1841         .swap_slot_free_notify = zram_slot_free_notify,
1842         .rw_page = zram_rw_page,
1843         .owner = THIS_MODULE
1844 };
1845
1846 #ifdef CONFIG_ZRAM_WRITEBACK
1847 static const struct block_device_operations zram_wb_devops = {
1848         .open = zram_open,
1849         .submit_bio = zram_submit_bio,
1850         .swap_slot_free_notify = zram_slot_free_notify,
1851         .owner = THIS_MODULE
1852 };
1853 #endif
1854
1855 static DEVICE_ATTR_WO(compact);
1856 static DEVICE_ATTR_RW(disksize);
1857 static DEVICE_ATTR_RO(initstate);
1858 static DEVICE_ATTR_WO(reset);
1859 static DEVICE_ATTR_WO(mem_limit);
1860 static DEVICE_ATTR_WO(mem_used_max);
1861 static DEVICE_ATTR_WO(idle);
1862 static DEVICE_ATTR_RW(max_comp_streams);
1863 static DEVICE_ATTR_RW(comp_algorithm);
1864 #ifdef CONFIG_ZRAM_WRITEBACK
1865 static DEVICE_ATTR_RW(backing_dev);
1866 static DEVICE_ATTR_WO(writeback);
1867 static DEVICE_ATTR_RW(writeback_limit);
1868 static DEVICE_ATTR_RW(writeback_limit_enable);
1869 #endif
1870
1871 static struct attribute *zram_disk_attrs[] = {
1872         &dev_attr_disksize.attr,
1873         &dev_attr_initstate.attr,
1874         &dev_attr_reset.attr,
1875         &dev_attr_compact.attr,
1876         &dev_attr_mem_limit.attr,
1877         &dev_attr_mem_used_max.attr,
1878         &dev_attr_idle.attr,
1879         &dev_attr_max_comp_streams.attr,
1880         &dev_attr_comp_algorithm.attr,
1881 #ifdef CONFIG_ZRAM_WRITEBACK
1882         &dev_attr_backing_dev.attr,
1883         &dev_attr_writeback.attr,
1884         &dev_attr_writeback_limit.attr,
1885         &dev_attr_writeback_limit_enable.attr,
1886 #endif
1887         &dev_attr_io_stat.attr,
1888         &dev_attr_mm_stat.attr,
1889 #ifdef CONFIG_ZRAM_WRITEBACK
1890         &dev_attr_bd_stat.attr,
1891 #endif
1892         &dev_attr_debug_stat.attr,
1893         NULL,
1894 };
1895
1896 ATTRIBUTE_GROUPS(zram_disk);
1897
1898 /*
1899  * Allocate and initialize new zram device. the function returns
1900  * '>= 0' device_id upon success, and negative value otherwise.
1901  */
1902 static int zram_add(void)
1903 {
1904         struct zram *zram;
1905         int ret, device_id;
1906
1907         zram = kzalloc(sizeof(struct zram), GFP_KERNEL);
1908         if (!zram)
1909                 return -ENOMEM;
1910
1911         ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL);
1912         if (ret < 0)
1913                 goto out_free_dev;
1914         device_id = ret;
1915
1916         init_rwsem(&zram->init_lock);
1917 #ifdef CONFIG_ZRAM_WRITEBACK
1918         spin_lock_init(&zram->wb_limit_lock);
1919 #endif
1920
1921         /* gendisk structure */
1922         zram->disk = blk_alloc_disk(NUMA_NO_NODE);
1923         if (!zram->disk) {
1924                 pr_err("Error allocating disk structure for device %d\n",
1925                         device_id);
1926                 ret = -ENOMEM;
1927                 goto out_free_idr;
1928         }
1929
1930         zram->disk->major = zram_major;
1931         zram->disk->first_minor = device_id;
1932         zram->disk->minors = 1;
1933         zram->disk->flags |= GENHD_FL_NO_PART;
1934         zram->disk->fops = &zram_devops;
1935         zram->disk->private_data = zram;
1936         snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
1937
1938         /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
1939         set_capacity(zram->disk, 0);
1940         /* zram devices sort of resembles non-rotational disks */
1941         blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue);
1942         blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
1943
1944         /*
1945          * To ensure that we always get PAGE_SIZE aligned
1946          * and n*PAGE_SIZED sized I/O requests.
1947          */
1948         blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE);
1949         blk_queue_logical_block_size(zram->disk->queue,
1950                                         ZRAM_LOGICAL_BLOCK_SIZE);
1951         blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
1952         blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
1953         zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
1954         blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
1955         blk_queue_flag_set(QUEUE_FLAG_DISCARD, zram->disk->queue);
1956
1957         /*
1958          * zram_bio_discard() will clear all logical blocks if logical block
1959          * size is identical with physical block size(PAGE_SIZE). But if it is
1960          * different, we will skip discarding some parts of logical blocks in
1961          * the part of the request range which isn't aligned to physical block
1962          * size.  So we can't ensure that all discarded logical blocks are
1963          * zeroed.
1964          */
1965         if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
1966                 blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
1967
1968         blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue);
1969         ret = device_add_disk(NULL, zram->disk, zram_disk_groups);
1970         if (ret)
1971                 goto out_cleanup_disk;
1972
1973         strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
1974
1975         zram_debugfs_register(zram);
1976         pr_info("Added device: %s\n", zram->disk->disk_name);
1977         return device_id;
1978
1979 out_cleanup_disk:
1980         blk_cleanup_disk(zram->disk);
1981 out_free_idr:
1982         idr_remove(&zram_index_idr, device_id);
1983 out_free_dev:
1984         kfree(zram);
1985         return ret;
1986 }
1987
1988 static int zram_remove(struct zram *zram)
1989 {
1990         struct block_device *bdev = zram->disk->part0;
1991         bool claimed;
1992
1993         mutex_lock(&bdev->bd_disk->open_mutex);
1994         if (bdev->bd_openers) {
1995                 mutex_unlock(&bdev->bd_disk->open_mutex);
1996                 return -EBUSY;
1997         }
1998
1999         claimed = zram->claim;
2000         if (!claimed)
2001                 zram->claim = true;
2002         mutex_unlock(&bdev->bd_disk->open_mutex);
2003
2004         zram_debugfs_unregister(zram);
2005
2006         if (claimed) {
2007                 /*
2008                  * If we were claimed by reset_store(), del_gendisk() will
2009                  * wait until reset_store() is done, so nothing need to do.
2010                  */
2011                 ;
2012         } else {
2013                 /* Make sure all the pending I/O are finished */
2014                 sync_blockdev(bdev);
2015                 zram_reset_device(zram);
2016         }
2017
2018         pr_info("Removed device: %s\n", zram->disk->disk_name);
2019
2020         del_gendisk(zram->disk);
2021
2022         /* del_gendisk drains pending reset_store */
2023         WARN_ON_ONCE(claimed && zram->claim);
2024
2025         /*
2026          * disksize_store() may be called in between zram_reset_device()
2027          * and del_gendisk(), so run the last reset to avoid leaking
2028          * anything allocated with disksize_store()
2029          */
2030         zram_reset_device(zram);
2031
2032         blk_cleanup_disk(zram->disk);
2033         kfree(zram);
2034         return 0;
2035 }
2036
2037 /* zram-control sysfs attributes */
2038
2039 /*
2040  * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a
2041  * sense that reading from this file does alter the state of your system -- it
2042  * creates a new un-initialized zram device and returns back this device's
2043  * device_id (or an error code if it fails to create a new device).
2044  */
2045 static ssize_t hot_add_show(struct class *class,
2046                         struct class_attribute *attr,
2047                         char *buf)
2048 {
2049         int ret;
2050
2051         mutex_lock(&zram_index_mutex);
2052         ret = zram_add();
2053         mutex_unlock(&zram_index_mutex);
2054
2055         if (ret < 0)
2056                 return ret;
2057         return scnprintf(buf, PAGE_SIZE, "%d\n", ret);
2058 }
2059 static struct class_attribute class_attr_hot_add =
2060         __ATTR(hot_add, 0400, hot_add_show, NULL);
2061
2062 static ssize_t hot_remove_store(struct class *class,
2063                         struct class_attribute *attr,
2064                         const char *buf,
2065                         size_t count)
2066 {
2067         struct zram *zram;
2068         int ret, dev_id;
2069
2070         /* dev_id is gendisk->first_minor, which is `int' */
2071         ret = kstrtoint(buf, 10, &dev_id);
2072         if (ret)
2073                 return ret;
2074         if (dev_id < 0)
2075                 return -EINVAL;
2076
2077         mutex_lock(&zram_index_mutex);
2078
2079         zram = idr_find(&zram_index_idr, dev_id);
2080         if (zram) {
2081                 ret = zram_remove(zram);
2082                 if (!ret)
2083                         idr_remove(&zram_index_idr, dev_id);
2084         } else {
2085                 ret = -ENODEV;
2086         }
2087
2088         mutex_unlock(&zram_index_mutex);
2089         return ret ? ret : count;
2090 }
2091 static CLASS_ATTR_WO(hot_remove);
2092
2093 static struct attribute *zram_control_class_attrs[] = {
2094         &class_attr_hot_add.attr,
2095         &class_attr_hot_remove.attr,
2096         NULL,
2097 };
2098 ATTRIBUTE_GROUPS(zram_control_class);
2099
2100 static struct class zram_control_class = {
2101         .name           = "zram-control",
2102         .owner          = THIS_MODULE,
2103         .class_groups   = zram_control_class_groups,
2104 };
2105
2106 static int zram_remove_cb(int id, void *ptr, void *data)
2107 {
2108         WARN_ON_ONCE(zram_remove(ptr));
2109         return 0;
2110 }
2111
2112 static void destroy_devices(void)
2113 {
2114         class_unregister(&zram_control_class);
2115         idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
2116         zram_debugfs_destroy();
2117         idr_destroy(&zram_index_idr);
2118         unregister_blkdev(zram_major, "zram");
2119         cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
2120 }
2121
2122 static int __init zram_init(void)
2123 {
2124         int ret;
2125
2126         ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare",
2127                                       zcomp_cpu_up_prepare, zcomp_cpu_dead);
2128         if (ret < 0)
2129                 return ret;
2130
2131         ret = class_register(&zram_control_class);
2132         if (ret) {
2133                 pr_err("Unable to register zram-control class\n");
2134                 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
2135                 return ret;
2136         }
2137
2138         zram_debugfs_create();
2139         zram_major = register_blkdev(0, "zram");
2140         if (zram_major <= 0) {
2141                 pr_err("Unable to get major number\n");
2142                 class_unregister(&zram_control_class);
2143                 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
2144                 return -EBUSY;
2145         }
2146
2147         while (num_devices != 0) {
2148                 mutex_lock(&zram_index_mutex);
2149                 ret = zram_add();
2150                 mutex_unlock(&zram_index_mutex);
2151                 if (ret < 0)
2152                         goto out_error;
2153                 num_devices--;
2154         }
2155
2156         return 0;
2157
2158 out_error:
2159         destroy_devices();
2160         return ret;
2161 }
2162
2163 static void __exit zram_exit(void)
2164 {
2165         destroy_devices();
2166 }
2167
2168 module_init(zram_init);
2169 module_exit(zram_exit);
2170
2171 module_param(num_devices, uint, 0);
2172 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices");
2173
2174 MODULE_LICENSE("Dual BSD/GPL");
2175 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
2176 MODULE_DESCRIPTION("Compressed RAM Block Device");