drivers: remove struct module * setting from struct class
[linux-2.6-microblaze.git] / drivers / block / zram / zram_drv.c
1 /*
2  * Compressed RAM block device
3  *
4  * Copyright (C) 2008, 2009, 2010  Nitin Gupta
5  *               2012, 2013 Minchan Kim
6  *
7  * This code is released using a dual license strategy: BSD/GPL
8  * You can choose the licence that better fits your requirements.
9  *
10  * Released under the terms of 3-clause BSD License
11  * Released under the terms of GNU General Public License Version 2.0
12  *
13  */
14
15 #define KMSG_COMPONENT "zram"
16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17
18 #include <linux/module.h>
19 #include <linux/kernel.h>
20 #include <linux/bio.h>
21 #include <linux/bitops.h>
22 #include <linux/blkdev.h>
23 #include <linux/buffer_head.h>
24 #include <linux/device.h>
25 #include <linux/highmem.h>
26 #include <linux/slab.h>
27 #include <linux/backing-dev.h>
28 #include <linux/string.h>
29 #include <linux/vmalloc.h>
30 #include <linux/err.h>
31 #include <linux/idr.h>
32 #include <linux/sysfs.h>
33 #include <linux/debugfs.h>
34 #include <linux/cpuhotplug.h>
35 #include <linux/part_stat.h>
36
37 #include "zram_drv.h"
38
39 static DEFINE_IDR(zram_index_idr);
40 /* idr index must be protected */
41 static DEFINE_MUTEX(zram_index_mutex);
42
43 static int zram_major;
44 static const char *default_compressor = CONFIG_ZRAM_DEF_COMP;
45
46 /* Module params (documentation at end) */
47 static unsigned int num_devices = 1;
48 /*
49  * Pages that compress to sizes equals or greater than this are stored
50  * uncompressed in memory.
51  */
52 static size_t huge_class_size;
53
54 static const struct block_device_operations zram_devops;
55
56 static void zram_free_page(struct zram *zram, size_t index);
57 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
58                                 u32 index, int offset, struct bio *bio);
59
60
61 static int zram_slot_trylock(struct zram *zram, u32 index)
62 {
63         return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags);
64 }
65
66 static void zram_slot_lock(struct zram *zram, u32 index)
67 {
68         bit_spin_lock(ZRAM_LOCK, &zram->table[index].flags);
69 }
70
71 static void zram_slot_unlock(struct zram *zram, u32 index)
72 {
73         bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
74 }
75
76 static inline bool init_done(struct zram *zram)
77 {
78         return zram->disksize;
79 }
80
81 static inline struct zram *dev_to_zram(struct device *dev)
82 {
83         return (struct zram *)dev_to_disk(dev)->private_data;
84 }
85
86 static unsigned long zram_get_handle(struct zram *zram, u32 index)
87 {
88         return zram->table[index].handle;
89 }
90
91 static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle)
92 {
93         zram->table[index].handle = handle;
94 }
95
96 /* flag operations require table entry bit_spin_lock() being held */
97 static bool zram_test_flag(struct zram *zram, u32 index,
98                         enum zram_pageflags flag)
99 {
100         return zram->table[index].flags & BIT(flag);
101 }
102
103 static void zram_set_flag(struct zram *zram, u32 index,
104                         enum zram_pageflags flag)
105 {
106         zram->table[index].flags |= BIT(flag);
107 }
108
109 static void zram_clear_flag(struct zram *zram, u32 index,
110                         enum zram_pageflags flag)
111 {
112         zram->table[index].flags &= ~BIT(flag);
113 }
114
115 static inline void zram_set_element(struct zram *zram, u32 index,
116                         unsigned long element)
117 {
118         zram->table[index].element = element;
119 }
120
121 static unsigned long zram_get_element(struct zram *zram, u32 index)
122 {
123         return zram->table[index].element;
124 }
125
126 static size_t zram_get_obj_size(struct zram *zram, u32 index)
127 {
128         return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1);
129 }
130
131 static void zram_set_obj_size(struct zram *zram,
132                                         u32 index, size_t size)
133 {
134         unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT;
135
136         zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size;
137 }
138
139 static inline bool zram_allocated(struct zram *zram, u32 index)
140 {
141         return zram_get_obj_size(zram, index) ||
142                         zram_test_flag(zram, index, ZRAM_SAME) ||
143                         zram_test_flag(zram, index, ZRAM_WB);
144 }
145
146 #if PAGE_SIZE != 4096
147 static inline bool is_partial_io(struct bio_vec *bvec)
148 {
149         return bvec->bv_len != PAGE_SIZE;
150 }
151 #else
152 static inline bool is_partial_io(struct bio_vec *bvec)
153 {
154         return false;
155 }
156 #endif
157
158 static inline void zram_set_priority(struct zram *zram, u32 index, u32 prio)
159 {
160         prio &= ZRAM_COMP_PRIORITY_MASK;
161         /*
162          * Clear previous priority value first, in case if we recompress
163          * further an already recompressed page
164          */
165         zram->table[index].flags &= ~(ZRAM_COMP_PRIORITY_MASK <<
166                                       ZRAM_COMP_PRIORITY_BIT1);
167         zram->table[index].flags |= (prio << ZRAM_COMP_PRIORITY_BIT1);
168 }
169
170 static inline u32 zram_get_priority(struct zram *zram, u32 index)
171 {
172         u32 prio = zram->table[index].flags >> ZRAM_COMP_PRIORITY_BIT1;
173
174         return prio & ZRAM_COMP_PRIORITY_MASK;
175 }
176
177 /*
178  * Check if request is within bounds and aligned on zram logical blocks.
179  */
180 static inline bool valid_io_request(struct zram *zram,
181                 sector_t start, unsigned int size)
182 {
183         u64 end, bound;
184
185         /* unaligned request */
186         if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
187                 return false;
188         if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
189                 return false;
190
191         end = start + (size >> SECTOR_SHIFT);
192         bound = zram->disksize >> SECTOR_SHIFT;
193         /* out of range */
194         if (unlikely(start >= bound || end > bound || start > end))
195                 return false;
196
197         /* I/O request is valid */
198         return true;
199 }
200
201 static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
202 {
203         *index  += (*offset + bvec->bv_len) / PAGE_SIZE;
204         *offset = (*offset + bvec->bv_len) % PAGE_SIZE;
205 }
206
207 static inline void update_used_max(struct zram *zram,
208                                         const unsigned long pages)
209 {
210         unsigned long cur_max = atomic_long_read(&zram->stats.max_used_pages);
211
212         do {
213                 if (cur_max >= pages)
214                         return;
215         } while (!atomic_long_try_cmpxchg(&zram->stats.max_used_pages,
216                                           &cur_max, pages));
217 }
218
219 static inline void zram_fill_page(void *ptr, unsigned long len,
220                                         unsigned long value)
221 {
222         WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));
223         memset_l(ptr, value, len / sizeof(unsigned long));
224 }
225
226 static bool page_same_filled(void *ptr, unsigned long *element)
227 {
228         unsigned long *page;
229         unsigned long val;
230         unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1;
231
232         page = (unsigned long *)ptr;
233         val = page[0];
234
235         if (val != page[last_pos])
236                 return false;
237
238         for (pos = 1; pos < last_pos; pos++) {
239                 if (val != page[pos])
240                         return false;
241         }
242
243         *element = val;
244
245         return true;
246 }
247
248 static ssize_t initstate_show(struct device *dev,
249                 struct device_attribute *attr, char *buf)
250 {
251         u32 val;
252         struct zram *zram = dev_to_zram(dev);
253
254         down_read(&zram->init_lock);
255         val = init_done(zram);
256         up_read(&zram->init_lock);
257
258         return scnprintf(buf, PAGE_SIZE, "%u\n", val);
259 }
260
261 static ssize_t disksize_show(struct device *dev,
262                 struct device_attribute *attr, char *buf)
263 {
264         struct zram *zram = dev_to_zram(dev);
265
266         return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
267 }
268
269 static ssize_t mem_limit_store(struct device *dev,
270                 struct device_attribute *attr, const char *buf, size_t len)
271 {
272         u64 limit;
273         char *tmp;
274         struct zram *zram = dev_to_zram(dev);
275
276         limit = memparse(buf, &tmp);
277         if (buf == tmp) /* no chars parsed, invalid input */
278                 return -EINVAL;
279
280         down_write(&zram->init_lock);
281         zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
282         up_write(&zram->init_lock);
283
284         return len;
285 }
286
287 static ssize_t mem_used_max_store(struct device *dev,
288                 struct device_attribute *attr, const char *buf, size_t len)
289 {
290         int err;
291         unsigned long val;
292         struct zram *zram = dev_to_zram(dev);
293
294         err = kstrtoul(buf, 10, &val);
295         if (err || val != 0)
296                 return -EINVAL;
297
298         down_read(&zram->init_lock);
299         if (init_done(zram)) {
300                 atomic_long_set(&zram->stats.max_used_pages,
301                                 zs_get_total_pages(zram->mem_pool));
302         }
303         up_read(&zram->init_lock);
304
305         return len;
306 }
307
308 /*
309  * Mark all pages which are older than or equal to cutoff as IDLE.
310  * Callers should hold the zram init lock in read mode
311  */
312 static void mark_idle(struct zram *zram, ktime_t cutoff)
313 {
314         int is_idle = 1;
315         unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
316         int index;
317
318         for (index = 0; index < nr_pages; index++) {
319                 /*
320                  * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
321                  * See the comment in writeback_store.
322                  */
323                 zram_slot_lock(zram, index);
324                 if (zram_allocated(zram, index) &&
325                                 !zram_test_flag(zram, index, ZRAM_UNDER_WB)) {
326 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
327                         is_idle = !cutoff || ktime_after(cutoff, zram->table[index].ac_time);
328 #endif
329                         if (is_idle)
330                                 zram_set_flag(zram, index, ZRAM_IDLE);
331                 }
332                 zram_slot_unlock(zram, index);
333         }
334 }
335
336 static ssize_t idle_store(struct device *dev,
337                 struct device_attribute *attr, const char *buf, size_t len)
338 {
339         struct zram *zram = dev_to_zram(dev);
340         ktime_t cutoff_time = 0;
341         ssize_t rv = -EINVAL;
342
343         if (!sysfs_streq(buf, "all")) {
344                 /*
345                  * If it did not parse as 'all' try to treat it as an integer
346                  * when we have memory tracking enabled.
347                  */
348                 u64 age_sec;
349
350                 if (IS_ENABLED(CONFIG_ZRAM_MEMORY_TRACKING) && !kstrtoull(buf, 0, &age_sec))
351                         cutoff_time = ktime_sub(ktime_get_boottime(),
352                                         ns_to_ktime(age_sec * NSEC_PER_SEC));
353                 else
354                         goto out;
355         }
356
357         down_read(&zram->init_lock);
358         if (!init_done(zram))
359                 goto out_unlock;
360
361         /*
362          * A cutoff_time of 0 marks everything as idle, this is the
363          * "all" behavior.
364          */
365         mark_idle(zram, cutoff_time);
366         rv = len;
367
368 out_unlock:
369         up_read(&zram->init_lock);
370 out:
371         return rv;
372 }
373
374 #ifdef CONFIG_ZRAM_WRITEBACK
375 static ssize_t writeback_limit_enable_store(struct device *dev,
376                 struct device_attribute *attr, const char *buf, size_t len)
377 {
378         struct zram *zram = dev_to_zram(dev);
379         u64 val;
380         ssize_t ret = -EINVAL;
381
382         if (kstrtoull(buf, 10, &val))
383                 return ret;
384
385         down_read(&zram->init_lock);
386         spin_lock(&zram->wb_limit_lock);
387         zram->wb_limit_enable = val;
388         spin_unlock(&zram->wb_limit_lock);
389         up_read(&zram->init_lock);
390         ret = len;
391
392         return ret;
393 }
394
395 static ssize_t writeback_limit_enable_show(struct device *dev,
396                 struct device_attribute *attr, char *buf)
397 {
398         bool val;
399         struct zram *zram = dev_to_zram(dev);
400
401         down_read(&zram->init_lock);
402         spin_lock(&zram->wb_limit_lock);
403         val = zram->wb_limit_enable;
404         spin_unlock(&zram->wb_limit_lock);
405         up_read(&zram->init_lock);
406
407         return scnprintf(buf, PAGE_SIZE, "%d\n", val);
408 }
409
410 static ssize_t writeback_limit_store(struct device *dev,
411                 struct device_attribute *attr, const char *buf, size_t len)
412 {
413         struct zram *zram = dev_to_zram(dev);
414         u64 val;
415         ssize_t ret = -EINVAL;
416
417         if (kstrtoull(buf, 10, &val))
418                 return ret;
419
420         down_read(&zram->init_lock);
421         spin_lock(&zram->wb_limit_lock);
422         zram->bd_wb_limit = val;
423         spin_unlock(&zram->wb_limit_lock);
424         up_read(&zram->init_lock);
425         ret = len;
426
427         return ret;
428 }
429
430 static ssize_t writeback_limit_show(struct device *dev,
431                 struct device_attribute *attr, char *buf)
432 {
433         u64 val;
434         struct zram *zram = dev_to_zram(dev);
435
436         down_read(&zram->init_lock);
437         spin_lock(&zram->wb_limit_lock);
438         val = zram->bd_wb_limit;
439         spin_unlock(&zram->wb_limit_lock);
440         up_read(&zram->init_lock);
441
442         return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
443 }
444
445 static void reset_bdev(struct zram *zram)
446 {
447         struct block_device *bdev;
448
449         if (!zram->backing_dev)
450                 return;
451
452         bdev = zram->bdev;
453         blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
454         /* hope filp_close flush all of IO */
455         filp_close(zram->backing_dev, NULL);
456         zram->backing_dev = NULL;
457         zram->bdev = NULL;
458         zram->disk->fops = &zram_devops;
459         kvfree(zram->bitmap);
460         zram->bitmap = NULL;
461 }
462
463 static ssize_t backing_dev_show(struct device *dev,
464                 struct device_attribute *attr, char *buf)
465 {
466         struct file *file;
467         struct zram *zram = dev_to_zram(dev);
468         char *p;
469         ssize_t ret;
470
471         down_read(&zram->init_lock);
472         file = zram->backing_dev;
473         if (!file) {
474                 memcpy(buf, "none\n", 5);
475                 up_read(&zram->init_lock);
476                 return 5;
477         }
478
479         p = file_path(file, buf, PAGE_SIZE - 1);
480         if (IS_ERR(p)) {
481                 ret = PTR_ERR(p);
482                 goto out;
483         }
484
485         ret = strlen(p);
486         memmove(buf, p, ret);
487         buf[ret++] = '\n';
488 out:
489         up_read(&zram->init_lock);
490         return ret;
491 }
492
493 static ssize_t backing_dev_store(struct device *dev,
494                 struct device_attribute *attr, const char *buf, size_t len)
495 {
496         char *file_name;
497         size_t sz;
498         struct file *backing_dev = NULL;
499         struct inode *inode;
500         struct address_space *mapping;
501         unsigned int bitmap_sz;
502         unsigned long nr_pages, *bitmap = NULL;
503         struct block_device *bdev = NULL;
504         int err;
505         struct zram *zram = dev_to_zram(dev);
506
507         file_name = kmalloc(PATH_MAX, GFP_KERNEL);
508         if (!file_name)
509                 return -ENOMEM;
510
511         down_write(&zram->init_lock);
512         if (init_done(zram)) {
513                 pr_info("Can't setup backing device for initialized device\n");
514                 err = -EBUSY;
515                 goto out;
516         }
517
518         strscpy(file_name, buf, PATH_MAX);
519         /* ignore trailing newline */
520         sz = strlen(file_name);
521         if (sz > 0 && file_name[sz - 1] == '\n')
522                 file_name[sz - 1] = 0x00;
523
524         backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0);
525         if (IS_ERR(backing_dev)) {
526                 err = PTR_ERR(backing_dev);
527                 backing_dev = NULL;
528                 goto out;
529         }
530
531         mapping = backing_dev->f_mapping;
532         inode = mapping->host;
533
534         /* Support only block device in this moment */
535         if (!S_ISBLK(inode->i_mode)) {
536                 err = -ENOTBLK;
537                 goto out;
538         }
539
540         bdev = blkdev_get_by_dev(inode->i_rdev,
541                         FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram);
542         if (IS_ERR(bdev)) {
543                 err = PTR_ERR(bdev);
544                 bdev = NULL;
545                 goto out;
546         }
547
548         nr_pages = i_size_read(inode) >> PAGE_SHIFT;
549         bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long);
550         bitmap = kvzalloc(bitmap_sz, GFP_KERNEL);
551         if (!bitmap) {
552                 err = -ENOMEM;
553                 goto out;
554         }
555
556         reset_bdev(zram);
557
558         zram->bdev = bdev;
559         zram->backing_dev = backing_dev;
560         zram->bitmap = bitmap;
561         zram->nr_pages = nr_pages;
562         up_write(&zram->init_lock);
563
564         pr_info("setup backing device %s\n", file_name);
565         kfree(file_name);
566
567         return len;
568 out:
569         kvfree(bitmap);
570
571         if (bdev)
572                 blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
573
574         if (backing_dev)
575                 filp_close(backing_dev, NULL);
576
577         up_write(&zram->init_lock);
578
579         kfree(file_name);
580
581         return err;
582 }
583
584 static unsigned long alloc_block_bdev(struct zram *zram)
585 {
586         unsigned long blk_idx = 1;
587 retry:
588         /* skip 0 bit to confuse zram.handle = 0 */
589         blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, blk_idx);
590         if (blk_idx == zram->nr_pages)
591                 return 0;
592
593         if (test_and_set_bit(blk_idx, zram->bitmap))
594                 goto retry;
595
596         atomic64_inc(&zram->stats.bd_count);
597         return blk_idx;
598 }
599
600 static void free_block_bdev(struct zram *zram, unsigned long blk_idx)
601 {
602         int was_set;
603
604         was_set = test_and_clear_bit(blk_idx, zram->bitmap);
605         WARN_ON_ONCE(!was_set);
606         atomic64_dec(&zram->stats.bd_count);
607 }
608
609 static void zram_page_end_io(struct bio *bio)
610 {
611         struct page *page = bio_first_page_all(bio);
612
613         page_endio(page, op_is_write(bio_op(bio)),
614                         blk_status_to_errno(bio->bi_status));
615         bio_put(bio);
616 }
617
618 /*
619  * Returns 1 if the submission is successful.
620  */
621 static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
622                         unsigned long entry, struct bio *parent)
623 {
624         struct bio *bio;
625
626         bio = bio_alloc(zram->bdev, 1, parent ? parent->bi_opf : REQ_OP_READ,
627                         GFP_NOIO);
628         if (!bio)
629                 return -ENOMEM;
630
631         bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
632         if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) {
633                 bio_put(bio);
634                 return -EIO;
635         }
636
637         if (!parent)
638                 bio->bi_end_io = zram_page_end_io;
639         else
640                 bio_chain(bio, parent);
641
642         submit_bio(bio);
643         return 1;
644 }
645
646 #define PAGE_WB_SIG "page_index="
647
648 #define PAGE_WRITEBACK                  0
649 #define HUGE_WRITEBACK                  (1<<0)
650 #define IDLE_WRITEBACK                  (1<<1)
651 #define INCOMPRESSIBLE_WRITEBACK        (1<<2)
652
653 static ssize_t writeback_store(struct device *dev,
654                 struct device_attribute *attr, const char *buf, size_t len)
655 {
656         struct zram *zram = dev_to_zram(dev);
657         unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
658         unsigned long index = 0;
659         struct bio bio;
660         struct bio_vec bio_vec;
661         struct page *page;
662         ssize_t ret = len;
663         int mode, err;
664         unsigned long blk_idx = 0;
665
666         if (sysfs_streq(buf, "idle"))
667                 mode = IDLE_WRITEBACK;
668         else if (sysfs_streq(buf, "huge"))
669                 mode = HUGE_WRITEBACK;
670         else if (sysfs_streq(buf, "huge_idle"))
671                 mode = IDLE_WRITEBACK | HUGE_WRITEBACK;
672         else if (sysfs_streq(buf, "incompressible"))
673                 mode = INCOMPRESSIBLE_WRITEBACK;
674         else {
675                 if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1))
676                         return -EINVAL;
677
678                 if (kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index) ||
679                                 index >= nr_pages)
680                         return -EINVAL;
681
682                 nr_pages = 1;
683                 mode = PAGE_WRITEBACK;
684         }
685
686         down_read(&zram->init_lock);
687         if (!init_done(zram)) {
688                 ret = -EINVAL;
689                 goto release_init_lock;
690         }
691
692         if (!zram->backing_dev) {
693                 ret = -ENODEV;
694                 goto release_init_lock;
695         }
696
697         page = alloc_page(GFP_KERNEL);
698         if (!page) {
699                 ret = -ENOMEM;
700                 goto release_init_lock;
701         }
702
703         for (; nr_pages != 0; index++, nr_pages--) {
704                 struct bio_vec bvec;
705
706                 bvec_set_page(&bvec, page, PAGE_SIZE, 0);
707
708                 spin_lock(&zram->wb_limit_lock);
709                 if (zram->wb_limit_enable && !zram->bd_wb_limit) {
710                         spin_unlock(&zram->wb_limit_lock);
711                         ret = -EIO;
712                         break;
713                 }
714                 spin_unlock(&zram->wb_limit_lock);
715
716                 if (!blk_idx) {
717                         blk_idx = alloc_block_bdev(zram);
718                         if (!blk_idx) {
719                                 ret = -ENOSPC;
720                                 break;
721                         }
722                 }
723
724                 zram_slot_lock(zram, index);
725                 if (!zram_allocated(zram, index))
726                         goto next;
727
728                 if (zram_test_flag(zram, index, ZRAM_WB) ||
729                                 zram_test_flag(zram, index, ZRAM_SAME) ||
730                                 zram_test_flag(zram, index, ZRAM_UNDER_WB))
731                         goto next;
732
733                 if (mode & IDLE_WRITEBACK &&
734                     !zram_test_flag(zram, index, ZRAM_IDLE))
735                         goto next;
736                 if (mode & HUGE_WRITEBACK &&
737                     !zram_test_flag(zram, index, ZRAM_HUGE))
738                         goto next;
739                 if (mode & INCOMPRESSIBLE_WRITEBACK &&
740                     !zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
741                         goto next;
742
743                 /*
744                  * Clearing ZRAM_UNDER_WB is duty of caller.
745                  * IOW, zram_free_page never clear it.
746                  */
747                 zram_set_flag(zram, index, ZRAM_UNDER_WB);
748                 /* Need for hugepage writeback racing */
749                 zram_set_flag(zram, index, ZRAM_IDLE);
750                 zram_slot_unlock(zram, index);
751                 if (zram_bvec_read(zram, &bvec, index, 0, NULL)) {
752                         zram_slot_lock(zram, index);
753                         zram_clear_flag(zram, index, ZRAM_UNDER_WB);
754                         zram_clear_flag(zram, index, ZRAM_IDLE);
755                         zram_slot_unlock(zram, index);
756                         continue;
757                 }
758
759                 bio_init(&bio, zram->bdev, &bio_vec, 1,
760                          REQ_OP_WRITE | REQ_SYNC);
761                 bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9);
762
763                 bio_add_page(&bio, bvec.bv_page, bvec.bv_len,
764                                 bvec.bv_offset);
765                 /*
766                  * XXX: A single page IO would be inefficient for write
767                  * but it would be not bad as starter.
768                  */
769                 err = submit_bio_wait(&bio);
770                 if (err) {
771                         zram_slot_lock(zram, index);
772                         zram_clear_flag(zram, index, ZRAM_UNDER_WB);
773                         zram_clear_flag(zram, index, ZRAM_IDLE);
774                         zram_slot_unlock(zram, index);
775                         /*
776                          * BIO errors are not fatal, we continue and simply
777                          * attempt to writeback the remaining objects (pages).
778                          * At the same time we need to signal user-space that
779                          * some writes (at least one, but also could be all of
780                          * them) were not successful and we do so by returning
781                          * the most recent BIO error.
782                          */
783                         ret = err;
784                         continue;
785                 }
786
787                 atomic64_inc(&zram->stats.bd_writes);
788                 /*
789                  * We released zram_slot_lock so need to check if the slot was
790                  * changed. If there is freeing for the slot, we can catch it
791                  * easily by zram_allocated.
792                  * A subtle case is the slot is freed/reallocated/marked as
793                  * ZRAM_IDLE again. To close the race, idle_store doesn't
794                  * mark ZRAM_IDLE once it found the slot was ZRAM_UNDER_WB.
795                  * Thus, we could close the race by checking ZRAM_IDLE bit.
796                  */
797                 zram_slot_lock(zram, index);
798                 if (!zram_allocated(zram, index) ||
799                           !zram_test_flag(zram, index, ZRAM_IDLE)) {
800                         zram_clear_flag(zram, index, ZRAM_UNDER_WB);
801                         zram_clear_flag(zram, index, ZRAM_IDLE);
802                         goto next;
803                 }
804
805                 zram_free_page(zram, index);
806                 zram_clear_flag(zram, index, ZRAM_UNDER_WB);
807                 zram_set_flag(zram, index, ZRAM_WB);
808                 zram_set_element(zram, index, blk_idx);
809                 blk_idx = 0;
810                 atomic64_inc(&zram->stats.pages_stored);
811                 spin_lock(&zram->wb_limit_lock);
812                 if (zram->wb_limit_enable && zram->bd_wb_limit > 0)
813                         zram->bd_wb_limit -=  1UL << (PAGE_SHIFT - 12);
814                 spin_unlock(&zram->wb_limit_lock);
815 next:
816                 zram_slot_unlock(zram, index);
817         }
818
819         if (blk_idx)
820                 free_block_bdev(zram, blk_idx);
821         __free_page(page);
822 release_init_lock:
823         up_read(&zram->init_lock);
824
825         return ret;
826 }
827
828 struct zram_work {
829         struct work_struct work;
830         struct zram *zram;
831         unsigned long entry;
832         struct bio *bio;
833         struct bio_vec bvec;
834 };
835
836 #if PAGE_SIZE != 4096
837 static void zram_sync_read(struct work_struct *work)
838 {
839         struct zram_work *zw = container_of(work, struct zram_work, work);
840         struct zram *zram = zw->zram;
841         unsigned long entry = zw->entry;
842         struct bio *bio = zw->bio;
843
844         read_from_bdev_async(zram, &zw->bvec, entry, bio);
845 }
846
847 /*
848  * Block layer want one ->submit_bio to be active at a time, so if we use
849  * chained IO with parent IO in same context, it's a deadlock. To avoid that,
850  * use a worker thread context.
851  */
852 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
853                                 unsigned long entry, struct bio *bio)
854 {
855         struct zram_work work;
856
857         work.bvec = *bvec;
858         work.zram = zram;
859         work.entry = entry;
860         work.bio = bio;
861
862         INIT_WORK_ONSTACK(&work.work, zram_sync_read);
863         queue_work(system_unbound_wq, &work.work);
864         flush_work(&work.work);
865         destroy_work_on_stack(&work.work);
866
867         return 1;
868 }
869 #else
870 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
871                                 unsigned long entry, struct bio *bio)
872 {
873         WARN_ON(1);
874         return -EIO;
875 }
876 #endif
877
878 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
879                         unsigned long entry, struct bio *parent, bool sync)
880 {
881         atomic64_inc(&zram->stats.bd_reads);
882         if (sync)
883                 return read_from_bdev_sync(zram, bvec, entry, parent);
884         else
885                 return read_from_bdev_async(zram, bvec, entry, parent);
886 }
887 #else
888 static inline void reset_bdev(struct zram *zram) {};
889 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
890                         unsigned long entry, struct bio *parent, bool sync)
891 {
892         return -EIO;
893 }
894
895 static void free_block_bdev(struct zram *zram, unsigned long blk_idx) {};
896 #endif
897
898 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
899
900 static struct dentry *zram_debugfs_root;
901
902 static void zram_debugfs_create(void)
903 {
904         zram_debugfs_root = debugfs_create_dir("zram", NULL);
905 }
906
907 static void zram_debugfs_destroy(void)
908 {
909         debugfs_remove_recursive(zram_debugfs_root);
910 }
911
912 static void zram_accessed(struct zram *zram, u32 index)
913 {
914         zram_clear_flag(zram, index, ZRAM_IDLE);
915         zram->table[index].ac_time = ktime_get_boottime();
916 }
917
918 static ssize_t read_block_state(struct file *file, char __user *buf,
919                                 size_t count, loff_t *ppos)
920 {
921         char *kbuf;
922         ssize_t index, written = 0;
923         struct zram *zram = file->private_data;
924         unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
925         struct timespec64 ts;
926
927         kbuf = kvmalloc(count, GFP_KERNEL);
928         if (!kbuf)
929                 return -ENOMEM;
930
931         down_read(&zram->init_lock);
932         if (!init_done(zram)) {
933                 up_read(&zram->init_lock);
934                 kvfree(kbuf);
935                 return -EINVAL;
936         }
937
938         for (index = *ppos; index < nr_pages; index++) {
939                 int copied;
940
941                 zram_slot_lock(zram, index);
942                 if (!zram_allocated(zram, index))
943                         goto next;
944
945                 ts = ktime_to_timespec64(zram->table[index].ac_time);
946                 copied = snprintf(kbuf + written, count,
947                         "%12zd %12lld.%06lu %c%c%c%c%c%c\n",
948                         index, (s64)ts.tv_sec,
949                         ts.tv_nsec / NSEC_PER_USEC,
950                         zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.',
951                         zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.',
952                         zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.',
953                         zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.',
954                         zram_get_priority(zram, index) ? 'r' : '.',
955                         zram_test_flag(zram, index,
956                                        ZRAM_INCOMPRESSIBLE) ? 'n' : '.');
957
958                 if (count <= copied) {
959                         zram_slot_unlock(zram, index);
960                         break;
961                 }
962                 written += copied;
963                 count -= copied;
964 next:
965                 zram_slot_unlock(zram, index);
966                 *ppos += 1;
967         }
968
969         up_read(&zram->init_lock);
970         if (copy_to_user(buf, kbuf, written))
971                 written = -EFAULT;
972         kvfree(kbuf);
973
974         return written;
975 }
976
977 static const struct file_operations proc_zram_block_state_op = {
978         .open = simple_open,
979         .read = read_block_state,
980         .llseek = default_llseek,
981 };
982
983 static void zram_debugfs_register(struct zram *zram)
984 {
985         if (!zram_debugfs_root)
986                 return;
987
988         zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name,
989                                                 zram_debugfs_root);
990         debugfs_create_file("block_state", 0400, zram->debugfs_dir,
991                                 zram, &proc_zram_block_state_op);
992 }
993
994 static void zram_debugfs_unregister(struct zram *zram)
995 {
996         debugfs_remove_recursive(zram->debugfs_dir);
997 }
998 #else
999 static void zram_debugfs_create(void) {};
1000 static void zram_debugfs_destroy(void) {};
1001 static void zram_accessed(struct zram *zram, u32 index)
1002 {
1003         zram_clear_flag(zram, index, ZRAM_IDLE);
1004 };
1005 static void zram_debugfs_register(struct zram *zram) {};
1006 static void zram_debugfs_unregister(struct zram *zram) {};
1007 #endif
1008
1009 /*
1010  * We switched to per-cpu streams and this attr is not needed anymore.
1011  * However, we will keep it around for some time, because:
1012  * a) we may revert per-cpu streams in the future
1013  * b) it's visible to user space and we need to follow our 2 years
1014  *    retirement rule; but we already have a number of 'soon to be
1015  *    altered' attrs, so max_comp_streams need to wait for the next
1016  *    layoff cycle.
1017  */
1018 static ssize_t max_comp_streams_show(struct device *dev,
1019                 struct device_attribute *attr, char *buf)
1020 {
1021         return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus());
1022 }
1023
1024 static ssize_t max_comp_streams_store(struct device *dev,
1025                 struct device_attribute *attr, const char *buf, size_t len)
1026 {
1027         return len;
1028 }
1029
1030 static void comp_algorithm_set(struct zram *zram, u32 prio, const char *alg)
1031 {
1032         /* Do not free statically defined compression algorithms */
1033         if (zram->comp_algs[prio] != default_compressor)
1034                 kfree(zram->comp_algs[prio]);
1035
1036         zram->comp_algs[prio] = alg;
1037 }
1038
1039 static ssize_t __comp_algorithm_show(struct zram *zram, u32 prio, char *buf)
1040 {
1041         ssize_t sz;
1042
1043         down_read(&zram->init_lock);
1044         sz = zcomp_available_show(zram->comp_algs[prio], buf);
1045         up_read(&zram->init_lock);
1046
1047         return sz;
1048 }
1049
1050 static int __comp_algorithm_store(struct zram *zram, u32 prio, const char *buf)
1051 {
1052         char *compressor;
1053         size_t sz;
1054
1055         sz = strlen(buf);
1056         if (sz >= CRYPTO_MAX_ALG_NAME)
1057                 return -E2BIG;
1058
1059         compressor = kstrdup(buf, GFP_KERNEL);
1060         if (!compressor)
1061                 return -ENOMEM;
1062
1063         /* ignore trailing newline */
1064         if (sz > 0 && compressor[sz - 1] == '\n')
1065                 compressor[sz - 1] = 0x00;
1066
1067         if (!zcomp_available_algorithm(compressor)) {
1068                 kfree(compressor);
1069                 return -EINVAL;
1070         }
1071
1072         down_write(&zram->init_lock);
1073         if (init_done(zram)) {
1074                 up_write(&zram->init_lock);
1075                 kfree(compressor);
1076                 pr_info("Can't change algorithm for initialized device\n");
1077                 return -EBUSY;
1078         }
1079
1080         comp_algorithm_set(zram, prio, compressor);
1081         up_write(&zram->init_lock);
1082         return 0;
1083 }
1084
1085 static ssize_t comp_algorithm_show(struct device *dev,
1086                                    struct device_attribute *attr,
1087                                    char *buf)
1088 {
1089         struct zram *zram = dev_to_zram(dev);
1090
1091         return __comp_algorithm_show(zram, ZRAM_PRIMARY_COMP, buf);
1092 }
1093
1094 static ssize_t comp_algorithm_store(struct device *dev,
1095                                     struct device_attribute *attr,
1096                                     const char *buf,
1097                                     size_t len)
1098 {
1099         struct zram *zram = dev_to_zram(dev);
1100         int ret;
1101
1102         ret = __comp_algorithm_store(zram, ZRAM_PRIMARY_COMP, buf);
1103         return ret ? ret : len;
1104 }
1105
1106 #ifdef CONFIG_ZRAM_MULTI_COMP
1107 static ssize_t recomp_algorithm_show(struct device *dev,
1108                                      struct device_attribute *attr,
1109                                      char *buf)
1110 {
1111         struct zram *zram = dev_to_zram(dev);
1112         ssize_t sz = 0;
1113         u32 prio;
1114
1115         for (prio = ZRAM_SECONDARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
1116                 if (!zram->comp_algs[prio])
1117                         continue;
1118
1119                 sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2, "#%d: ", prio);
1120                 sz += __comp_algorithm_show(zram, prio, buf + sz);
1121         }
1122
1123         return sz;
1124 }
1125
1126 static ssize_t recomp_algorithm_store(struct device *dev,
1127                                       struct device_attribute *attr,
1128                                       const char *buf,
1129                                       size_t len)
1130 {
1131         struct zram *zram = dev_to_zram(dev);
1132         int prio = ZRAM_SECONDARY_COMP;
1133         char *args, *param, *val;
1134         char *alg = NULL;
1135         int ret;
1136
1137         args = skip_spaces(buf);
1138         while (*args) {
1139                 args = next_arg(args, &param, &val);
1140
1141                 if (!val || !*val)
1142                         return -EINVAL;
1143
1144                 if (!strcmp(param, "algo")) {
1145                         alg = val;
1146                         continue;
1147                 }
1148
1149                 if (!strcmp(param, "priority")) {
1150                         ret = kstrtoint(val, 10, &prio);
1151                         if (ret)
1152                                 return ret;
1153                         continue;
1154                 }
1155         }
1156
1157         if (!alg)
1158                 return -EINVAL;
1159
1160         if (prio < ZRAM_SECONDARY_COMP || prio >= ZRAM_MAX_COMPS)
1161                 return -EINVAL;
1162
1163         ret = __comp_algorithm_store(zram, prio, alg);
1164         return ret ? ret : len;
1165 }
1166 #endif
1167
1168 static ssize_t compact_store(struct device *dev,
1169                 struct device_attribute *attr, const char *buf, size_t len)
1170 {
1171         struct zram *zram = dev_to_zram(dev);
1172
1173         down_read(&zram->init_lock);
1174         if (!init_done(zram)) {
1175                 up_read(&zram->init_lock);
1176                 return -EINVAL;
1177         }
1178
1179         zs_compact(zram->mem_pool);
1180         up_read(&zram->init_lock);
1181
1182         return len;
1183 }
1184
1185 static ssize_t io_stat_show(struct device *dev,
1186                 struct device_attribute *attr, char *buf)
1187 {
1188         struct zram *zram = dev_to_zram(dev);
1189         ssize_t ret;
1190
1191         down_read(&zram->init_lock);
1192         ret = scnprintf(buf, PAGE_SIZE,
1193                         "%8llu %8llu %8llu %8llu\n",
1194                         (u64)atomic64_read(&zram->stats.failed_reads),
1195                         (u64)atomic64_read(&zram->stats.failed_writes),
1196                         (u64)atomic64_read(&zram->stats.invalid_io),
1197                         (u64)atomic64_read(&zram->stats.notify_free));
1198         up_read(&zram->init_lock);
1199
1200         return ret;
1201 }
1202
1203 static ssize_t mm_stat_show(struct device *dev,
1204                 struct device_attribute *attr, char *buf)
1205 {
1206         struct zram *zram = dev_to_zram(dev);
1207         struct zs_pool_stats pool_stats;
1208         u64 orig_size, mem_used = 0;
1209         long max_used;
1210         ssize_t ret;
1211
1212         memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));
1213
1214         down_read(&zram->init_lock);
1215         if (init_done(zram)) {
1216                 mem_used = zs_get_total_pages(zram->mem_pool);
1217                 zs_pool_stats(zram->mem_pool, &pool_stats);
1218         }
1219
1220         orig_size = atomic64_read(&zram->stats.pages_stored);
1221         max_used = atomic_long_read(&zram->stats.max_used_pages);
1222
1223         ret = scnprintf(buf, PAGE_SIZE,
1224                         "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n",
1225                         orig_size << PAGE_SHIFT,
1226                         (u64)atomic64_read(&zram->stats.compr_data_size),
1227                         mem_used << PAGE_SHIFT,
1228                         zram->limit_pages << PAGE_SHIFT,
1229                         max_used << PAGE_SHIFT,
1230                         (u64)atomic64_read(&zram->stats.same_pages),
1231                         atomic_long_read(&pool_stats.pages_compacted),
1232                         (u64)atomic64_read(&zram->stats.huge_pages),
1233                         (u64)atomic64_read(&zram->stats.huge_pages_since));
1234         up_read(&zram->init_lock);
1235
1236         return ret;
1237 }
1238
1239 #ifdef CONFIG_ZRAM_WRITEBACK
1240 #define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12)))
1241 static ssize_t bd_stat_show(struct device *dev,
1242                 struct device_attribute *attr, char *buf)
1243 {
1244         struct zram *zram = dev_to_zram(dev);
1245         ssize_t ret;
1246
1247         down_read(&zram->init_lock);
1248         ret = scnprintf(buf, PAGE_SIZE,
1249                 "%8llu %8llu %8llu\n",
1250                         FOUR_K((u64)atomic64_read(&zram->stats.bd_count)),
1251                         FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)),
1252                         FOUR_K((u64)atomic64_read(&zram->stats.bd_writes)));
1253         up_read(&zram->init_lock);
1254
1255         return ret;
1256 }
1257 #endif
1258
1259 static ssize_t debug_stat_show(struct device *dev,
1260                 struct device_attribute *attr, char *buf)
1261 {
1262         int version = 1;
1263         struct zram *zram = dev_to_zram(dev);
1264         ssize_t ret;
1265
1266         down_read(&zram->init_lock);
1267         ret = scnprintf(buf, PAGE_SIZE,
1268                         "version: %d\n%8llu %8llu\n",
1269                         version,
1270                         (u64)atomic64_read(&zram->stats.writestall),
1271                         (u64)atomic64_read(&zram->stats.miss_free));
1272         up_read(&zram->init_lock);
1273
1274         return ret;
1275 }
1276
1277 static DEVICE_ATTR_RO(io_stat);
1278 static DEVICE_ATTR_RO(mm_stat);
1279 #ifdef CONFIG_ZRAM_WRITEBACK
1280 static DEVICE_ATTR_RO(bd_stat);
1281 #endif
1282 static DEVICE_ATTR_RO(debug_stat);
1283
1284 static void zram_meta_free(struct zram *zram, u64 disksize)
1285 {
1286         size_t num_pages = disksize >> PAGE_SHIFT;
1287         size_t index;
1288
1289         /* Free all pages that are still in this zram device */
1290         for (index = 0; index < num_pages; index++)
1291                 zram_free_page(zram, index);
1292
1293         zs_destroy_pool(zram->mem_pool);
1294         vfree(zram->table);
1295 }
1296
1297 static bool zram_meta_alloc(struct zram *zram, u64 disksize)
1298 {
1299         size_t num_pages;
1300
1301         num_pages = disksize >> PAGE_SHIFT;
1302         zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table)));
1303         if (!zram->table)
1304                 return false;
1305
1306         zram->mem_pool = zs_create_pool(zram->disk->disk_name);
1307         if (!zram->mem_pool) {
1308                 vfree(zram->table);
1309                 return false;
1310         }
1311
1312         if (!huge_class_size)
1313                 huge_class_size = zs_huge_class_size(zram->mem_pool);
1314         return true;
1315 }
1316
1317 /*
1318  * To protect concurrent access to the same index entry,
1319  * caller should hold this table index entry's bit_spinlock to
1320  * indicate this index entry is accessing.
1321  */
1322 static void zram_free_page(struct zram *zram, size_t index)
1323 {
1324         unsigned long handle;
1325
1326 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
1327         zram->table[index].ac_time = 0;
1328 #endif
1329         if (zram_test_flag(zram, index, ZRAM_IDLE))
1330                 zram_clear_flag(zram, index, ZRAM_IDLE);
1331
1332         if (zram_test_flag(zram, index, ZRAM_HUGE)) {
1333                 zram_clear_flag(zram, index, ZRAM_HUGE);
1334                 atomic64_dec(&zram->stats.huge_pages);
1335         }
1336
1337         if (zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
1338                 zram_clear_flag(zram, index, ZRAM_INCOMPRESSIBLE);
1339
1340         zram_set_priority(zram, index, 0);
1341
1342         if (zram_test_flag(zram, index, ZRAM_WB)) {
1343                 zram_clear_flag(zram, index, ZRAM_WB);
1344                 free_block_bdev(zram, zram_get_element(zram, index));
1345                 goto out;
1346         }
1347
1348         /*
1349          * No memory is allocated for same element filled pages.
1350          * Simply clear same page flag.
1351          */
1352         if (zram_test_flag(zram, index, ZRAM_SAME)) {
1353                 zram_clear_flag(zram, index, ZRAM_SAME);
1354                 atomic64_dec(&zram->stats.same_pages);
1355                 goto out;
1356         }
1357
1358         handle = zram_get_handle(zram, index);
1359         if (!handle)
1360                 return;
1361
1362         zs_free(zram->mem_pool, handle);
1363
1364         atomic64_sub(zram_get_obj_size(zram, index),
1365                         &zram->stats.compr_data_size);
1366 out:
1367         atomic64_dec(&zram->stats.pages_stored);
1368         zram_set_handle(zram, index, 0);
1369         zram_set_obj_size(zram, index, 0);
1370         WARN_ON_ONCE(zram->table[index].flags &
1371                 ~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB));
1372 }
1373
1374 /*
1375  * Reads a page from the writeback devices. Corresponding ZRAM slot
1376  * should be unlocked.
1377  */
1378 static int zram_bvec_read_from_bdev(struct zram *zram, struct page *page,
1379                                     u32 index, struct bio *bio, bool partial_io)
1380 {
1381         struct bio_vec bvec;
1382
1383         bvec_set_page(&bvec, page, PAGE_SIZE, 0);
1384         return read_from_bdev(zram, &bvec, zram_get_element(zram, index), bio,
1385                               partial_io);
1386 }
1387
1388 /*
1389  * Reads (decompresses if needed) a page from zspool (zsmalloc).
1390  * Corresponding ZRAM slot should be locked.
1391  */
1392 static int zram_read_from_zspool(struct zram *zram, struct page *page,
1393                                  u32 index)
1394 {
1395         struct zcomp_strm *zstrm;
1396         unsigned long handle;
1397         unsigned int size;
1398         void *src, *dst;
1399         u32 prio;
1400         int ret;
1401
1402         handle = zram_get_handle(zram, index);
1403         if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) {
1404                 unsigned long value;
1405                 void *mem;
1406
1407                 value = handle ? zram_get_element(zram, index) : 0;
1408                 mem = kmap_atomic(page);
1409                 zram_fill_page(mem, PAGE_SIZE, value);
1410                 kunmap_atomic(mem);
1411                 return 0;
1412         }
1413
1414         size = zram_get_obj_size(zram, index);
1415
1416         if (size != PAGE_SIZE) {
1417                 prio = zram_get_priority(zram, index);
1418                 zstrm = zcomp_stream_get(zram->comps[prio]);
1419         }
1420
1421         src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
1422         if (size == PAGE_SIZE) {
1423                 dst = kmap_atomic(page);
1424                 memcpy(dst, src, PAGE_SIZE);
1425                 kunmap_atomic(dst);
1426                 ret = 0;
1427         } else {
1428                 dst = kmap_atomic(page);
1429                 ret = zcomp_decompress(zstrm, src, size, dst);
1430                 kunmap_atomic(dst);
1431                 zcomp_stream_put(zram->comps[prio]);
1432         }
1433         zs_unmap_object(zram->mem_pool, handle);
1434         return ret;
1435 }
1436
1437 static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
1438                             struct bio *bio, bool partial_io)
1439 {
1440         int ret;
1441
1442         zram_slot_lock(zram, index);
1443         if (!zram_test_flag(zram, index, ZRAM_WB)) {
1444                 /* Slot should be locked through out the function call */
1445                 ret = zram_read_from_zspool(zram, page, index);
1446                 zram_slot_unlock(zram, index);
1447         } else {
1448                 /* Slot should be unlocked before the function call */
1449                 zram_slot_unlock(zram, index);
1450
1451                 ret = zram_bvec_read_from_bdev(zram, page, index, bio,
1452                                                partial_io);
1453         }
1454
1455         /* Should NEVER happen. Return bio error if it does. */
1456         if (WARN_ON(ret < 0))
1457                 pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
1458
1459         return ret;
1460 }
1461
1462 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
1463                           u32 index, int offset, struct bio *bio)
1464 {
1465         int ret;
1466         struct page *page;
1467
1468         page = bvec->bv_page;
1469         if (is_partial_io(bvec)) {
1470                 /* Use a temporary buffer to decompress the page */
1471                 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
1472                 if (!page)
1473                         return -ENOMEM;
1474         }
1475
1476         ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec));
1477         if (unlikely(ret))
1478                 goto out;
1479
1480         if (is_partial_io(bvec)) {
1481                 void *src = kmap_atomic(page);
1482
1483                 memcpy_to_bvec(bvec, src + offset);
1484                 kunmap_atomic(src);
1485         }
1486 out:
1487         if (is_partial_io(bvec))
1488                 __free_page(page);
1489
1490         return ret;
1491 }
1492
1493 static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
1494                                 u32 index, struct bio *bio)
1495 {
1496         int ret = 0;
1497         unsigned long alloced_pages;
1498         unsigned long handle = -ENOMEM;
1499         unsigned int comp_len = 0;
1500         void *src, *dst, *mem;
1501         struct zcomp_strm *zstrm;
1502         struct page *page = bvec->bv_page;
1503         unsigned long element = 0;
1504         enum zram_pageflags flags = 0;
1505
1506         mem = kmap_atomic(page);
1507         if (page_same_filled(mem, &element)) {
1508                 kunmap_atomic(mem);
1509                 /* Free memory associated with this sector now. */
1510                 flags = ZRAM_SAME;
1511                 atomic64_inc(&zram->stats.same_pages);
1512                 goto out;
1513         }
1514         kunmap_atomic(mem);
1515
1516 compress_again:
1517         zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]);
1518         src = kmap_atomic(page);
1519         ret = zcomp_compress(zstrm, src, &comp_len);
1520         kunmap_atomic(src);
1521
1522         if (unlikely(ret)) {
1523                 zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]);
1524                 pr_err("Compression failed! err=%d\n", ret);
1525                 zs_free(zram->mem_pool, handle);
1526                 return ret;
1527         }
1528
1529         if (comp_len >= huge_class_size)
1530                 comp_len = PAGE_SIZE;
1531         /*
1532          * handle allocation has 2 paths:
1533          * a) fast path is executed with preemption disabled (for
1534          *  per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
1535          *  since we can't sleep;
1536          * b) slow path enables preemption and attempts to allocate
1537          *  the page with __GFP_DIRECT_RECLAIM bit set. we have to
1538          *  put per-cpu compression stream and, thus, to re-do
1539          *  the compression once handle is allocated.
1540          *
1541          * if we have a 'non-null' handle here then we are coming
1542          * from the slow path and handle has already been allocated.
1543          */
1544         if (IS_ERR_VALUE(handle))
1545                 handle = zs_malloc(zram->mem_pool, comp_len,
1546                                 __GFP_KSWAPD_RECLAIM |
1547                                 __GFP_NOWARN |
1548                                 __GFP_HIGHMEM |
1549                                 __GFP_MOVABLE);
1550         if (IS_ERR_VALUE(handle)) {
1551                 zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]);
1552                 atomic64_inc(&zram->stats.writestall);
1553                 handle = zs_malloc(zram->mem_pool, comp_len,
1554                                 GFP_NOIO | __GFP_HIGHMEM |
1555                                 __GFP_MOVABLE);
1556                 if (IS_ERR_VALUE(handle))
1557                         return PTR_ERR((void *)handle);
1558
1559                 if (comp_len != PAGE_SIZE)
1560                         goto compress_again;
1561                 /*
1562                  * If the page is not compressible, you need to acquire the
1563                  * lock and execute the code below. The zcomp_stream_get()
1564                  * call is needed to disable the cpu hotplug and grab the
1565                  * zstrm buffer back. It is necessary that the dereferencing
1566                  * of the zstrm variable below occurs correctly.
1567                  */
1568                 zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]);
1569         }
1570
1571         alloced_pages = zs_get_total_pages(zram->mem_pool);
1572         update_used_max(zram, alloced_pages);
1573
1574         if (zram->limit_pages && alloced_pages > zram->limit_pages) {
1575                 zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]);
1576                 zs_free(zram->mem_pool, handle);
1577                 return -ENOMEM;
1578         }
1579
1580         dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO);
1581
1582         src = zstrm->buffer;
1583         if (comp_len == PAGE_SIZE)
1584                 src = kmap_atomic(page);
1585         memcpy(dst, src, comp_len);
1586         if (comp_len == PAGE_SIZE)
1587                 kunmap_atomic(src);
1588
1589         zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]);
1590         zs_unmap_object(zram->mem_pool, handle);
1591         atomic64_add(comp_len, &zram->stats.compr_data_size);
1592 out:
1593         /*
1594          * Free memory associated with this sector
1595          * before overwriting unused sectors.
1596          */
1597         zram_slot_lock(zram, index);
1598         zram_free_page(zram, index);
1599
1600         if (comp_len == PAGE_SIZE) {
1601                 zram_set_flag(zram, index, ZRAM_HUGE);
1602                 atomic64_inc(&zram->stats.huge_pages);
1603                 atomic64_inc(&zram->stats.huge_pages_since);
1604         }
1605
1606         if (flags) {
1607                 zram_set_flag(zram, index, flags);
1608                 zram_set_element(zram, index, element);
1609         }  else {
1610                 zram_set_handle(zram, index, handle);
1611                 zram_set_obj_size(zram, index, comp_len);
1612         }
1613         zram_slot_unlock(zram, index);
1614
1615         /* Update stats */
1616         atomic64_inc(&zram->stats.pages_stored);
1617         return ret;
1618 }
1619
1620 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
1621                                 u32 index, int offset, struct bio *bio)
1622 {
1623         int ret;
1624         struct page *page = NULL;
1625         struct bio_vec vec;
1626
1627         vec = *bvec;
1628         if (is_partial_io(bvec)) {
1629                 void *dst;
1630                 /*
1631                  * This is a partial IO. We need to read the full page
1632                  * before to write the changes.
1633                  */
1634                 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
1635                 if (!page)
1636                         return -ENOMEM;
1637
1638                 ret = __zram_bvec_read(zram, page, index, bio, true);
1639                 if (ret)
1640                         goto out;
1641
1642                 dst = kmap_atomic(page);
1643                 memcpy_from_bvec(dst + offset, bvec);
1644                 kunmap_atomic(dst);
1645
1646                 bvec_set_page(&vec, page, PAGE_SIZE, 0);
1647         }
1648
1649         ret = __zram_bvec_write(zram, &vec, index, bio);
1650 out:
1651         if (is_partial_io(bvec))
1652                 __free_page(page);
1653         return ret;
1654 }
1655
1656 #ifdef CONFIG_ZRAM_MULTI_COMP
1657 /*
1658  * This function will decompress (unless it's ZRAM_HUGE) the page and then
1659  * attempt to compress it using provided compression algorithm priority
1660  * (which is potentially more effective).
1661  *
1662  * Corresponding ZRAM slot should be locked.
1663  */
1664 static int zram_recompress(struct zram *zram, u32 index, struct page *page,
1665                            u32 threshold, u32 prio, u32 prio_max)
1666 {
1667         struct zcomp_strm *zstrm = NULL;
1668         unsigned long handle_old;
1669         unsigned long handle_new;
1670         unsigned int comp_len_old;
1671         unsigned int comp_len_new;
1672         unsigned int class_index_old;
1673         unsigned int class_index_new;
1674         u32 num_recomps = 0;
1675         void *src, *dst;
1676         int ret;
1677
1678         handle_old = zram_get_handle(zram, index);
1679         if (!handle_old)
1680                 return -EINVAL;
1681
1682         comp_len_old = zram_get_obj_size(zram, index);
1683         /*
1684          * Do not recompress objects that are already "small enough".
1685          */
1686         if (comp_len_old < threshold)
1687                 return 0;
1688
1689         ret = zram_read_from_zspool(zram, page, index);
1690         if (ret)
1691                 return ret;
1692
1693         class_index_old = zs_lookup_class_index(zram->mem_pool, comp_len_old);
1694         /*
1695          * Iterate the secondary comp algorithms list (in order of priority)
1696          * and try to recompress the page.
1697          */
1698         for (; prio < prio_max; prio++) {
1699                 if (!zram->comps[prio])
1700                         continue;
1701
1702                 /*
1703                  * Skip if the object is already re-compressed with a higher
1704                  * priority algorithm (or same algorithm).
1705                  */
1706                 if (prio <= zram_get_priority(zram, index))
1707                         continue;
1708
1709                 num_recomps++;
1710                 zstrm = zcomp_stream_get(zram->comps[prio]);
1711                 src = kmap_atomic(page);
1712                 ret = zcomp_compress(zstrm, src, &comp_len_new);
1713                 kunmap_atomic(src);
1714
1715                 if (ret) {
1716                         zcomp_stream_put(zram->comps[prio]);
1717                         return ret;
1718                 }
1719
1720                 class_index_new = zs_lookup_class_index(zram->mem_pool,
1721                                                         comp_len_new);
1722
1723                 /* Continue until we make progress */
1724                 if (class_index_new >= class_index_old ||
1725                     (threshold && comp_len_new >= threshold)) {
1726                         zcomp_stream_put(zram->comps[prio]);
1727                         continue;
1728                 }
1729
1730                 /* Recompression was successful so break out */
1731                 break;
1732         }
1733
1734         /*
1735          * We did not try to recompress, e.g. when we have only one
1736          * secondary algorithm and the page is already recompressed
1737          * using that algorithm
1738          */
1739         if (!zstrm)
1740                 return 0;
1741
1742         if (class_index_new >= class_index_old) {
1743                 /*
1744                  * Secondary algorithms failed to re-compress the page
1745                  * in a way that would save memory, mark the object as
1746                  * incompressible so that we will not try to compress
1747                  * it again.
1748                  *
1749                  * We need to make sure that all secondary algorithms have
1750                  * failed, so we test if the number of recompressions matches
1751                  * the number of active secondary algorithms.
1752                  */
1753                 if (num_recomps == zram->num_active_comps - 1)
1754                         zram_set_flag(zram, index, ZRAM_INCOMPRESSIBLE);
1755                 return 0;
1756         }
1757
1758         /* Successful recompression but above threshold */
1759         if (threshold && comp_len_new >= threshold)
1760                 return 0;
1761
1762         /*
1763          * No direct reclaim (slow path) for handle allocation and no
1764          * re-compression attempt (unlike in __zram_bvec_write()) since
1765          * we already have stored that object in zsmalloc. If we cannot
1766          * alloc memory for recompressed object then we bail out and
1767          * simply keep the old (existing) object in zsmalloc.
1768          */
1769         handle_new = zs_malloc(zram->mem_pool, comp_len_new,
1770                                __GFP_KSWAPD_RECLAIM |
1771                                __GFP_NOWARN |
1772                                __GFP_HIGHMEM |
1773                                __GFP_MOVABLE);
1774         if (IS_ERR_VALUE(handle_new)) {
1775                 zcomp_stream_put(zram->comps[prio]);
1776                 return PTR_ERR((void *)handle_new);
1777         }
1778
1779         dst = zs_map_object(zram->mem_pool, handle_new, ZS_MM_WO);
1780         memcpy(dst, zstrm->buffer, comp_len_new);
1781         zcomp_stream_put(zram->comps[prio]);
1782
1783         zs_unmap_object(zram->mem_pool, handle_new);
1784
1785         zram_free_page(zram, index);
1786         zram_set_handle(zram, index, handle_new);
1787         zram_set_obj_size(zram, index, comp_len_new);
1788         zram_set_priority(zram, index, prio);
1789
1790         atomic64_add(comp_len_new, &zram->stats.compr_data_size);
1791         atomic64_inc(&zram->stats.pages_stored);
1792
1793         return 0;
1794 }
1795
1796 #define RECOMPRESS_IDLE         (1 << 0)
1797 #define RECOMPRESS_HUGE         (1 << 1)
1798
1799 static ssize_t recompress_store(struct device *dev,
1800                                 struct device_attribute *attr,
1801                                 const char *buf, size_t len)
1802 {
1803         u32 prio = ZRAM_SECONDARY_COMP, prio_max = ZRAM_MAX_COMPS;
1804         struct zram *zram = dev_to_zram(dev);
1805         unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
1806         char *args, *param, *val, *algo = NULL;
1807         u32 mode = 0, threshold = 0;
1808         unsigned long index;
1809         struct page *page;
1810         ssize_t ret;
1811
1812         args = skip_spaces(buf);
1813         while (*args) {
1814                 args = next_arg(args, &param, &val);
1815
1816                 if (!val || !*val)
1817                         return -EINVAL;
1818
1819                 if (!strcmp(param, "type")) {
1820                         if (!strcmp(val, "idle"))
1821                                 mode = RECOMPRESS_IDLE;
1822                         if (!strcmp(val, "huge"))
1823                                 mode = RECOMPRESS_HUGE;
1824                         if (!strcmp(val, "huge_idle"))
1825                                 mode = RECOMPRESS_IDLE | RECOMPRESS_HUGE;
1826                         continue;
1827                 }
1828
1829                 if (!strcmp(param, "threshold")) {
1830                         /*
1831                          * We will re-compress only idle objects equal or
1832                          * greater in size than watermark.
1833                          */
1834                         ret = kstrtouint(val, 10, &threshold);
1835                         if (ret)
1836                                 return ret;
1837                         continue;
1838                 }
1839
1840                 if (!strcmp(param, "algo")) {
1841                         algo = val;
1842                         continue;
1843                 }
1844         }
1845
1846         if (threshold >= PAGE_SIZE)
1847                 return -EINVAL;
1848
1849         down_read(&zram->init_lock);
1850         if (!init_done(zram)) {
1851                 ret = -EINVAL;
1852                 goto release_init_lock;
1853         }
1854
1855         if (algo) {
1856                 bool found = false;
1857
1858                 for (; prio < ZRAM_MAX_COMPS; prio++) {
1859                         if (!zram->comp_algs[prio])
1860                                 continue;
1861
1862                         if (!strcmp(zram->comp_algs[prio], algo)) {
1863                                 prio_max = min(prio + 1, ZRAM_MAX_COMPS);
1864                                 found = true;
1865                                 break;
1866                         }
1867                 }
1868
1869                 if (!found) {
1870                         ret = -EINVAL;
1871                         goto release_init_lock;
1872                 }
1873         }
1874
1875         page = alloc_page(GFP_KERNEL);
1876         if (!page) {
1877                 ret = -ENOMEM;
1878                 goto release_init_lock;
1879         }
1880
1881         ret = len;
1882         for (index = 0; index < nr_pages; index++) {
1883                 int err = 0;
1884
1885                 zram_slot_lock(zram, index);
1886
1887                 if (!zram_allocated(zram, index))
1888                         goto next;
1889
1890                 if (mode & RECOMPRESS_IDLE &&
1891                     !zram_test_flag(zram, index, ZRAM_IDLE))
1892                         goto next;
1893
1894                 if (mode & RECOMPRESS_HUGE &&
1895                     !zram_test_flag(zram, index, ZRAM_HUGE))
1896                         goto next;
1897
1898                 if (zram_test_flag(zram, index, ZRAM_WB) ||
1899                     zram_test_flag(zram, index, ZRAM_UNDER_WB) ||
1900                     zram_test_flag(zram, index, ZRAM_SAME) ||
1901                     zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
1902                         goto next;
1903
1904                 err = zram_recompress(zram, index, page, threshold,
1905                                       prio, prio_max);
1906 next:
1907                 zram_slot_unlock(zram, index);
1908                 if (err) {
1909                         ret = err;
1910                         break;
1911                 }
1912
1913                 cond_resched();
1914         }
1915
1916         __free_page(page);
1917
1918 release_init_lock:
1919         up_read(&zram->init_lock);
1920         return ret;
1921 }
1922 #endif
1923
1924 /*
1925  * zram_bio_discard - handler on discard request
1926  * @index: physical block index in PAGE_SIZE units
1927  * @offset: byte offset within physical block
1928  */
1929 static void zram_bio_discard(struct zram *zram, u32 index,
1930                              int offset, struct bio *bio)
1931 {
1932         size_t n = bio->bi_iter.bi_size;
1933
1934         /*
1935          * zram manages data in physical block size units. Because logical block
1936          * size isn't identical with physical block size on some arch, we
1937          * could get a discard request pointing to a specific offset within a
1938          * certain physical block.  Although we can handle this request by
1939          * reading that physiclal block and decompressing and partially zeroing
1940          * and re-compressing and then re-storing it, this isn't reasonable
1941          * because our intent with a discard request is to save memory.  So
1942          * skipping this logical block is appropriate here.
1943          */
1944         if (offset) {
1945                 if (n <= (PAGE_SIZE - offset))
1946                         return;
1947
1948                 n -= (PAGE_SIZE - offset);
1949                 index++;
1950         }
1951
1952         while (n >= PAGE_SIZE) {
1953                 zram_slot_lock(zram, index);
1954                 zram_free_page(zram, index);
1955                 zram_slot_unlock(zram, index);
1956                 atomic64_inc(&zram->stats.notify_free);
1957                 index++;
1958                 n -= PAGE_SIZE;
1959         }
1960 }
1961
1962 /*
1963  * Returns errno if it has some problem. Otherwise return 0 or 1.
1964  * Returns 0 if IO request was done synchronously
1965  * Returns 1 if IO request was successfully submitted.
1966  */
1967 static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
1968                         int offset, enum req_op op, struct bio *bio)
1969 {
1970         int ret;
1971
1972         if (!op_is_write(op)) {
1973                 ret = zram_bvec_read(zram, bvec, index, offset, bio);
1974                 flush_dcache_page(bvec->bv_page);
1975         } else {
1976                 ret = zram_bvec_write(zram, bvec, index, offset, bio);
1977         }
1978
1979         zram_slot_lock(zram, index);
1980         zram_accessed(zram, index);
1981         zram_slot_unlock(zram, index);
1982
1983         if (unlikely(ret < 0)) {
1984                 if (!op_is_write(op))
1985                         atomic64_inc(&zram->stats.failed_reads);
1986                 else
1987                         atomic64_inc(&zram->stats.failed_writes);
1988         }
1989
1990         return ret;
1991 }
1992
1993 static void __zram_make_request(struct zram *zram, struct bio *bio)
1994 {
1995         int offset;
1996         u32 index;
1997         struct bio_vec bvec;
1998         struct bvec_iter iter;
1999         unsigned long start_time;
2000
2001         index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
2002         offset = (bio->bi_iter.bi_sector &
2003                   (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
2004
2005         switch (bio_op(bio)) {
2006         case REQ_OP_DISCARD:
2007         case REQ_OP_WRITE_ZEROES:
2008                 zram_bio_discard(zram, index, offset, bio);
2009                 bio_endio(bio);
2010                 return;
2011         default:
2012                 break;
2013         }
2014
2015         start_time = bio_start_io_acct(bio);
2016         bio_for_each_segment(bvec, bio, iter) {
2017                 struct bio_vec bv = bvec;
2018                 unsigned int unwritten = bvec.bv_len;
2019
2020                 do {
2021                         bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset,
2022                                                         unwritten);
2023                         if (zram_bvec_rw(zram, &bv, index, offset,
2024                                          bio_op(bio), bio) < 0) {
2025                                 bio->bi_status = BLK_STS_IOERR;
2026                                 break;
2027                         }
2028
2029                         bv.bv_offset += bv.bv_len;
2030                         unwritten -= bv.bv_len;
2031
2032                         update_position(&index, &offset, &bv);
2033                 } while (unwritten);
2034         }
2035         bio_end_io_acct(bio, start_time);
2036         bio_endio(bio);
2037 }
2038
2039 /*
2040  * Handler function for all zram I/O requests.
2041  */
2042 static void zram_submit_bio(struct bio *bio)
2043 {
2044         struct zram *zram = bio->bi_bdev->bd_disk->private_data;
2045
2046         if (!valid_io_request(zram, bio->bi_iter.bi_sector,
2047                                         bio->bi_iter.bi_size)) {
2048                 atomic64_inc(&zram->stats.invalid_io);
2049                 bio_io_error(bio);
2050                 return;
2051         }
2052
2053         __zram_make_request(zram, bio);
2054 }
2055
2056 static void zram_slot_free_notify(struct block_device *bdev,
2057                                 unsigned long index)
2058 {
2059         struct zram *zram;
2060
2061         zram = bdev->bd_disk->private_data;
2062
2063         atomic64_inc(&zram->stats.notify_free);
2064         if (!zram_slot_trylock(zram, index)) {
2065                 atomic64_inc(&zram->stats.miss_free);
2066                 return;
2067         }
2068
2069         zram_free_page(zram, index);
2070         zram_slot_unlock(zram, index);
2071 }
2072
2073 static void zram_destroy_comps(struct zram *zram)
2074 {
2075         u32 prio;
2076
2077         for (prio = 0; prio < ZRAM_MAX_COMPS; prio++) {
2078                 struct zcomp *comp = zram->comps[prio];
2079
2080                 zram->comps[prio] = NULL;
2081                 if (!comp)
2082                         continue;
2083                 zcomp_destroy(comp);
2084                 zram->num_active_comps--;
2085         }
2086 }
2087
2088 static void zram_reset_device(struct zram *zram)
2089 {
2090         down_write(&zram->init_lock);
2091
2092         zram->limit_pages = 0;
2093
2094         if (!init_done(zram)) {
2095                 up_write(&zram->init_lock);
2096                 return;
2097         }
2098
2099         set_capacity_and_notify(zram->disk, 0);
2100         part_stat_set_all(zram->disk->part0, 0);
2101
2102         /* I/O operation under all of CPU are done so let's free */
2103         zram_meta_free(zram, zram->disksize);
2104         zram->disksize = 0;
2105         zram_destroy_comps(zram);
2106         memset(&zram->stats, 0, sizeof(zram->stats));
2107         reset_bdev(zram);
2108
2109         comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor);
2110         up_write(&zram->init_lock);
2111 }
2112
2113 static ssize_t disksize_store(struct device *dev,
2114                 struct device_attribute *attr, const char *buf, size_t len)
2115 {
2116         u64 disksize;
2117         struct zcomp *comp;
2118         struct zram *zram = dev_to_zram(dev);
2119         int err;
2120         u32 prio;
2121
2122         disksize = memparse(buf, NULL);
2123         if (!disksize)
2124                 return -EINVAL;
2125
2126         down_write(&zram->init_lock);
2127         if (init_done(zram)) {
2128                 pr_info("Cannot change disksize for initialized device\n");
2129                 err = -EBUSY;
2130                 goto out_unlock;
2131         }
2132
2133         disksize = PAGE_ALIGN(disksize);
2134         if (!zram_meta_alloc(zram, disksize)) {
2135                 err = -ENOMEM;
2136                 goto out_unlock;
2137         }
2138
2139         for (prio = 0; prio < ZRAM_MAX_COMPS; prio++) {
2140                 if (!zram->comp_algs[prio])
2141                         continue;
2142
2143                 comp = zcomp_create(zram->comp_algs[prio]);
2144                 if (IS_ERR(comp)) {
2145                         pr_err("Cannot initialise %s compressing backend\n",
2146                                zram->comp_algs[prio]);
2147                         err = PTR_ERR(comp);
2148                         goto out_free_comps;
2149                 }
2150
2151                 zram->comps[prio] = comp;
2152                 zram->num_active_comps++;
2153         }
2154         zram->disksize = disksize;
2155         set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT);
2156         up_write(&zram->init_lock);
2157
2158         return len;
2159
2160 out_free_comps:
2161         zram_destroy_comps(zram);
2162         zram_meta_free(zram, disksize);
2163 out_unlock:
2164         up_write(&zram->init_lock);
2165         return err;
2166 }
2167
2168 static ssize_t reset_store(struct device *dev,
2169                 struct device_attribute *attr, const char *buf, size_t len)
2170 {
2171         int ret;
2172         unsigned short do_reset;
2173         struct zram *zram;
2174         struct gendisk *disk;
2175
2176         ret = kstrtou16(buf, 10, &do_reset);
2177         if (ret)
2178                 return ret;
2179
2180         if (!do_reset)
2181                 return -EINVAL;
2182
2183         zram = dev_to_zram(dev);
2184         disk = zram->disk;
2185
2186         mutex_lock(&disk->open_mutex);
2187         /* Do not reset an active device or claimed device */
2188         if (disk_openers(disk) || zram->claim) {
2189                 mutex_unlock(&disk->open_mutex);
2190                 return -EBUSY;
2191         }
2192
2193         /* From now on, anyone can't open /dev/zram[0-9] */
2194         zram->claim = true;
2195         mutex_unlock(&disk->open_mutex);
2196
2197         /* Make sure all the pending I/O are finished */
2198         sync_blockdev(disk->part0);
2199         zram_reset_device(zram);
2200
2201         mutex_lock(&disk->open_mutex);
2202         zram->claim = false;
2203         mutex_unlock(&disk->open_mutex);
2204
2205         return len;
2206 }
2207
2208 static int zram_open(struct block_device *bdev, fmode_t mode)
2209 {
2210         int ret = 0;
2211         struct zram *zram;
2212
2213         WARN_ON(!mutex_is_locked(&bdev->bd_disk->open_mutex));
2214
2215         zram = bdev->bd_disk->private_data;
2216         /* zram was claimed to reset so open request fails */
2217         if (zram->claim)
2218                 ret = -EBUSY;
2219
2220         return ret;
2221 }
2222
2223 static const struct block_device_operations zram_devops = {
2224         .open = zram_open,
2225         .submit_bio = zram_submit_bio,
2226         .swap_slot_free_notify = zram_slot_free_notify,
2227         .owner = THIS_MODULE
2228 };
2229
2230 static DEVICE_ATTR_WO(compact);
2231 static DEVICE_ATTR_RW(disksize);
2232 static DEVICE_ATTR_RO(initstate);
2233 static DEVICE_ATTR_WO(reset);
2234 static DEVICE_ATTR_WO(mem_limit);
2235 static DEVICE_ATTR_WO(mem_used_max);
2236 static DEVICE_ATTR_WO(idle);
2237 static DEVICE_ATTR_RW(max_comp_streams);
2238 static DEVICE_ATTR_RW(comp_algorithm);
2239 #ifdef CONFIG_ZRAM_WRITEBACK
2240 static DEVICE_ATTR_RW(backing_dev);
2241 static DEVICE_ATTR_WO(writeback);
2242 static DEVICE_ATTR_RW(writeback_limit);
2243 static DEVICE_ATTR_RW(writeback_limit_enable);
2244 #endif
2245 #ifdef CONFIG_ZRAM_MULTI_COMP
2246 static DEVICE_ATTR_RW(recomp_algorithm);
2247 static DEVICE_ATTR_WO(recompress);
2248 #endif
2249
2250 static struct attribute *zram_disk_attrs[] = {
2251         &dev_attr_disksize.attr,
2252         &dev_attr_initstate.attr,
2253         &dev_attr_reset.attr,
2254         &dev_attr_compact.attr,
2255         &dev_attr_mem_limit.attr,
2256         &dev_attr_mem_used_max.attr,
2257         &dev_attr_idle.attr,
2258         &dev_attr_max_comp_streams.attr,
2259         &dev_attr_comp_algorithm.attr,
2260 #ifdef CONFIG_ZRAM_WRITEBACK
2261         &dev_attr_backing_dev.attr,
2262         &dev_attr_writeback.attr,
2263         &dev_attr_writeback_limit.attr,
2264         &dev_attr_writeback_limit_enable.attr,
2265 #endif
2266         &dev_attr_io_stat.attr,
2267         &dev_attr_mm_stat.attr,
2268 #ifdef CONFIG_ZRAM_WRITEBACK
2269         &dev_attr_bd_stat.attr,
2270 #endif
2271         &dev_attr_debug_stat.attr,
2272 #ifdef CONFIG_ZRAM_MULTI_COMP
2273         &dev_attr_recomp_algorithm.attr,
2274         &dev_attr_recompress.attr,
2275 #endif
2276         NULL,
2277 };
2278
2279 ATTRIBUTE_GROUPS(zram_disk);
2280
2281 /*
2282  * Allocate and initialize new zram device. the function returns
2283  * '>= 0' device_id upon success, and negative value otherwise.
2284  */
2285 static int zram_add(void)
2286 {
2287         struct zram *zram;
2288         int ret, device_id;
2289
2290         zram = kzalloc(sizeof(struct zram), GFP_KERNEL);
2291         if (!zram)
2292                 return -ENOMEM;
2293
2294         ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL);
2295         if (ret < 0)
2296                 goto out_free_dev;
2297         device_id = ret;
2298
2299         init_rwsem(&zram->init_lock);
2300 #ifdef CONFIG_ZRAM_WRITEBACK
2301         spin_lock_init(&zram->wb_limit_lock);
2302 #endif
2303
2304         /* gendisk structure */
2305         zram->disk = blk_alloc_disk(NUMA_NO_NODE);
2306         if (!zram->disk) {
2307                 pr_err("Error allocating disk structure for device %d\n",
2308                         device_id);
2309                 ret = -ENOMEM;
2310                 goto out_free_idr;
2311         }
2312
2313         zram->disk->major = zram_major;
2314         zram->disk->first_minor = device_id;
2315         zram->disk->minors = 1;
2316         zram->disk->flags |= GENHD_FL_NO_PART;
2317         zram->disk->fops = &zram_devops;
2318         zram->disk->private_data = zram;
2319         snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
2320
2321         /* Actual capacity set using sysfs (/sys/block/zram<id>/disksize */
2322         set_capacity(zram->disk, 0);
2323         /* zram devices sort of resembles non-rotational disks */
2324         blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue);
2325         blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, zram->disk->queue);
2326         blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
2327
2328         /*
2329          * To ensure that we always get PAGE_SIZE aligned
2330          * and n*PAGE_SIZED sized I/O requests.
2331          */
2332         blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE);
2333         blk_queue_logical_block_size(zram->disk->queue,
2334                                         ZRAM_LOGICAL_BLOCK_SIZE);
2335         blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
2336         blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
2337         zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
2338         blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
2339
2340         /*
2341          * zram_bio_discard() will clear all logical blocks if logical block
2342          * size is identical with physical block size(PAGE_SIZE). But if it is
2343          * different, we will skip discarding some parts of logical blocks in
2344          * the part of the request range which isn't aligned to physical block
2345          * size.  So we can't ensure that all discarded logical blocks are
2346          * zeroed.
2347          */
2348         if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
2349                 blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
2350
2351         blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue);
2352         ret = device_add_disk(NULL, zram->disk, zram_disk_groups);
2353         if (ret)
2354                 goto out_cleanup_disk;
2355
2356         comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor);
2357
2358         zram_debugfs_register(zram);
2359         pr_info("Added device: %s\n", zram->disk->disk_name);
2360         return device_id;
2361
2362 out_cleanup_disk:
2363         put_disk(zram->disk);
2364 out_free_idr:
2365         idr_remove(&zram_index_idr, device_id);
2366 out_free_dev:
2367         kfree(zram);
2368         return ret;
2369 }
2370
2371 static int zram_remove(struct zram *zram)
2372 {
2373         bool claimed;
2374
2375         mutex_lock(&zram->disk->open_mutex);
2376         if (disk_openers(zram->disk)) {
2377                 mutex_unlock(&zram->disk->open_mutex);
2378                 return -EBUSY;
2379         }
2380
2381         claimed = zram->claim;
2382         if (!claimed)
2383                 zram->claim = true;
2384         mutex_unlock(&zram->disk->open_mutex);
2385
2386         zram_debugfs_unregister(zram);
2387
2388         if (claimed) {
2389                 /*
2390                  * If we were claimed by reset_store(), del_gendisk() will
2391                  * wait until reset_store() is done, so nothing need to do.
2392                  */
2393                 ;
2394         } else {
2395                 /* Make sure all the pending I/O are finished */
2396                 sync_blockdev(zram->disk->part0);
2397                 zram_reset_device(zram);
2398         }
2399
2400         pr_info("Removed device: %s\n", zram->disk->disk_name);
2401
2402         del_gendisk(zram->disk);
2403
2404         /* del_gendisk drains pending reset_store */
2405         WARN_ON_ONCE(claimed && zram->claim);
2406
2407         /*
2408          * disksize_store() may be called in between zram_reset_device()
2409          * and del_gendisk(), so run the last reset to avoid leaking
2410          * anything allocated with disksize_store()
2411          */
2412         zram_reset_device(zram);
2413
2414         put_disk(zram->disk);
2415         kfree(zram);
2416         return 0;
2417 }
2418
2419 /* zram-control sysfs attributes */
2420
2421 /*
2422  * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a
2423  * sense that reading from this file does alter the state of your system -- it
2424  * creates a new un-initialized zram device and returns back this device's
2425  * device_id (or an error code if it fails to create a new device).
2426  */
2427 static ssize_t hot_add_show(struct class *class,
2428                         struct class_attribute *attr,
2429                         char *buf)
2430 {
2431         int ret;
2432
2433         mutex_lock(&zram_index_mutex);
2434         ret = zram_add();
2435         mutex_unlock(&zram_index_mutex);
2436
2437         if (ret < 0)
2438                 return ret;
2439         return scnprintf(buf, PAGE_SIZE, "%d\n", ret);
2440 }
2441 static struct class_attribute class_attr_hot_add =
2442         __ATTR(hot_add, 0400, hot_add_show, NULL);
2443
2444 static ssize_t hot_remove_store(struct class *class,
2445                         struct class_attribute *attr,
2446                         const char *buf,
2447                         size_t count)
2448 {
2449         struct zram *zram;
2450         int ret, dev_id;
2451
2452         /* dev_id is gendisk->first_minor, which is `int' */
2453         ret = kstrtoint(buf, 10, &dev_id);
2454         if (ret)
2455                 return ret;
2456         if (dev_id < 0)
2457                 return -EINVAL;
2458
2459         mutex_lock(&zram_index_mutex);
2460
2461         zram = idr_find(&zram_index_idr, dev_id);
2462         if (zram) {
2463                 ret = zram_remove(zram);
2464                 if (!ret)
2465                         idr_remove(&zram_index_idr, dev_id);
2466         } else {
2467                 ret = -ENODEV;
2468         }
2469
2470         mutex_unlock(&zram_index_mutex);
2471         return ret ? ret : count;
2472 }
2473 static CLASS_ATTR_WO(hot_remove);
2474
2475 static struct attribute *zram_control_class_attrs[] = {
2476         &class_attr_hot_add.attr,
2477         &class_attr_hot_remove.attr,
2478         NULL,
2479 };
2480 ATTRIBUTE_GROUPS(zram_control_class);
2481
2482 static struct class zram_control_class = {
2483         .name           = "zram-control",
2484         .class_groups   = zram_control_class_groups,
2485 };
2486
2487 static int zram_remove_cb(int id, void *ptr, void *data)
2488 {
2489         WARN_ON_ONCE(zram_remove(ptr));
2490         return 0;
2491 }
2492
2493 static void destroy_devices(void)
2494 {
2495         class_unregister(&zram_control_class);
2496         idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
2497         zram_debugfs_destroy();
2498         idr_destroy(&zram_index_idr);
2499         unregister_blkdev(zram_major, "zram");
2500         cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
2501 }
2502
2503 static int __init zram_init(void)
2504 {
2505         int ret;
2506
2507         BUILD_BUG_ON(__NR_ZRAM_PAGEFLAGS > BITS_PER_LONG);
2508
2509         ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare",
2510                                       zcomp_cpu_up_prepare, zcomp_cpu_dead);
2511         if (ret < 0)
2512                 return ret;
2513
2514         ret = class_register(&zram_control_class);
2515         if (ret) {
2516                 pr_err("Unable to register zram-control class\n");
2517                 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
2518                 return ret;
2519         }
2520
2521         zram_debugfs_create();
2522         zram_major = register_blkdev(0, "zram");
2523         if (zram_major <= 0) {
2524                 pr_err("Unable to get major number\n");
2525                 class_unregister(&zram_control_class);
2526                 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
2527                 return -EBUSY;
2528         }
2529
2530         while (num_devices != 0) {
2531                 mutex_lock(&zram_index_mutex);
2532                 ret = zram_add();
2533                 mutex_unlock(&zram_index_mutex);
2534                 if (ret < 0)
2535                         goto out_error;
2536                 num_devices--;
2537         }
2538
2539         return 0;
2540
2541 out_error:
2542         destroy_devices();
2543         return ret;
2544 }
2545
2546 static void __exit zram_exit(void)
2547 {
2548         destroy_devices();
2549 }
2550
2551 module_init(zram_init);
2552 module_exit(zram_exit);
2553
2554 module_param(num_devices, uint, 0);
2555 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices");
2556
2557 MODULE_LICENSE("Dual BSD/GPL");
2558 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
2559 MODULE_DESCRIPTION("Compressed RAM Block Device");