RDMA/mlx5: Verify that DM operation is reasonable
[linux-2.6-microblaze.git] / drivers / mtd / mtdswap.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Swap block device support for MTDs
4  * Turns an MTD device into a swap device with block wear leveling
5  *
6  * Copyright © 2007,2011 Nokia Corporation. All rights reserved.
7  *
8  * Authors: Jarkko Lavinen <jarkko.lavinen@nokia.com>
9  *
10  * Based on Richard Purdie's earlier implementation in 2007. Background
11  * support and lock-less operation written by Adrian Hunter.
12  */
13
14 #include <linux/kernel.h>
15 #include <linux/module.h>
16 #include <linux/mtd/mtd.h>
17 #include <linux/mtd/blktrans.h>
18 #include <linux/rbtree.h>
19 #include <linux/sched.h>
20 #include <linux/slab.h>
21 #include <linux/vmalloc.h>
22 #include <linux/genhd.h>
23 #include <linux/swap.h>
24 #include <linux/debugfs.h>
25 #include <linux/seq_file.h>
26 #include <linux/device.h>
27 #include <linux/math64.h>
28
29 #define MTDSWAP_PREFIX "mtdswap"
30
31 /*
32  * The number of free eraseblocks when GC should stop
33  */
34 #define CLEAN_BLOCK_THRESHOLD   20
35
36 /*
37  * Number of free eraseblocks below which GC can also collect low frag
38  * blocks.
39  */
40 #define LOW_FRAG_GC_THRESHOLD   5
41
42 /*
43  * Wear level cost amortization. We want to do wear leveling on the background
44  * without disturbing gc too much. This is made by defining max GC frequency.
45  * Frequency value 6 means 1/6 of the GC passes will pick an erase block based
46  * on the biggest wear difference rather than the biggest dirtiness.
47  *
48  * The lower freq2 should be chosen so that it makes sure the maximum erase
49  * difference will decrease even if a malicious application is deliberately
50  * trying to make erase differences large.
51  */
52 #define MAX_ERASE_DIFF          4000
53 #define COLLECT_NONDIRTY_BASE   MAX_ERASE_DIFF
54 #define COLLECT_NONDIRTY_FREQ1  6
55 #define COLLECT_NONDIRTY_FREQ2  4
56
57 #define PAGE_UNDEF              UINT_MAX
58 #define BLOCK_UNDEF             UINT_MAX
59 #define BLOCK_ERROR             (UINT_MAX - 1)
60 #define BLOCK_MAX               (UINT_MAX - 2)
61
62 #define EBLOCK_BAD              (1 << 0)
63 #define EBLOCK_NOMAGIC          (1 << 1)
64 #define EBLOCK_BITFLIP          (1 << 2)
65 #define EBLOCK_FAILED           (1 << 3)
66 #define EBLOCK_READERR          (1 << 4)
67 #define EBLOCK_IDX_SHIFT        5
68
69 struct swap_eb {
70         struct rb_node rb;
71         struct rb_root *root;
72
73         unsigned int flags;
74         unsigned int active_count;
75         unsigned int erase_count;
76         unsigned int pad;               /* speeds up pointer decrement */
77 };
78
79 #define MTDSWAP_ECNT_MIN(rbroot) (rb_entry(rb_first(rbroot), struct swap_eb, \
80                                 rb)->erase_count)
81 #define MTDSWAP_ECNT_MAX(rbroot) (rb_entry(rb_last(rbroot), struct swap_eb, \
82                                 rb)->erase_count)
83
84 struct mtdswap_tree {
85         struct rb_root root;
86         unsigned int count;
87 };
88
89 enum {
90         MTDSWAP_CLEAN,
91         MTDSWAP_USED,
92         MTDSWAP_LOWFRAG,
93         MTDSWAP_HIFRAG,
94         MTDSWAP_DIRTY,
95         MTDSWAP_BITFLIP,
96         MTDSWAP_FAILING,
97         MTDSWAP_TREE_CNT,
98 };
99
100 struct mtdswap_dev {
101         struct mtd_blktrans_dev *mbd_dev;
102         struct mtd_info *mtd;
103         struct device *dev;
104
105         unsigned int *page_data;
106         unsigned int *revmap;
107
108         unsigned int eblks;
109         unsigned int spare_eblks;
110         unsigned int pages_per_eblk;
111         unsigned int max_erase_count;
112         struct swap_eb *eb_data;
113
114         struct mtdswap_tree trees[MTDSWAP_TREE_CNT];
115
116         unsigned long long sect_read_count;
117         unsigned long long sect_write_count;
118         unsigned long long mtd_write_count;
119         unsigned long long mtd_read_count;
120         unsigned long long discard_count;
121         unsigned long long discard_page_count;
122
123         unsigned int curr_write_pos;
124         struct swap_eb *curr_write;
125
126         char *page_buf;
127         char *oob_buf;
128 };
129
130 struct mtdswap_oobdata {
131         __le16 magic;
132         __le32 count;
133 } __packed;
134
135 #define MTDSWAP_MAGIC_CLEAN     0x2095
136 #define MTDSWAP_MAGIC_DIRTY     (MTDSWAP_MAGIC_CLEAN + 1)
137 #define MTDSWAP_TYPE_CLEAN      0
138 #define MTDSWAP_TYPE_DIRTY      1
139 #define MTDSWAP_OOBSIZE         sizeof(struct mtdswap_oobdata)
140
141 #define MTDSWAP_ERASE_RETRIES   3 /* Before marking erase block bad */
142 #define MTDSWAP_IO_RETRIES      3
143
144 enum {
145         MTDSWAP_SCANNED_CLEAN,
146         MTDSWAP_SCANNED_DIRTY,
147         MTDSWAP_SCANNED_BITFLIP,
148         MTDSWAP_SCANNED_BAD,
149 };
150
151 /*
152  * In the worst case mtdswap_writesect() has allocated the last clean
153  * page from the current block and is then pre-empted by the GC
154  * thread. The thread can consume a full erase block when moving a
155  * block.
156  */
157 #define MIN_SPARE_EBLOCKS       2
158 #define MIN_ERASE_BLOCKS        (MIN_SPARE_EBLOCKS + 1)
159
160 #define TREE_ROOT(d, name) (&d->trees[MTDSWAP_ ## name].root)
161 #define TREE_EMPTY(d, name) (TREE_ROOT(d, name)->rb_node == NULL)
162 #define TREE_NONEMPTY(d, name) (!TREE_EMPTY(d, name))
163 #define TREE_COUNT(d, name) (d->trees[MTDSWAP_ ## name].count)
164
165 #define MTDSWAP_MBD_TO_MTDSWAP(dev) ((struct mtdswap_dev *)dev->priv)
166
167 static char partitions[128] = "";
168 module_param_string(partitions, partitions, sizeof(partitions), 0444);
169 MODULE_PARM_DESC(partitions, "MTD partition numbers to use as swap "
170                 "partitions=\"1,3,5\"");
171
172 static unsigned int spare_eblocks = 10;
173 module_param(spare_eblocks, uint, 0444);
174 MODULE_PARM_DESC(spare_eblocks, "Percentage of spare erase blocks for "
175                 "garbage collection (default 10%)");
176
177 static bool header; /* false */
178 module_param(header, bool, 0444);
179 MODULE_PARM_DESC(header,
180                 "Include builtin swap header (default 0, without header)");
181
182 static int mtdswap_gc(struct mtdswap_dev *d, unsigned int background);
183
184 static loff_t mtdswap_eb_offset(struct mtdswap_dev *d, struct swap_eb *eb)
185 {
186         return (loff_t)(eb - d->eb_data) * d->mtd->erasesize;
187 }
188
189 static void mtdswap_eb_detach(struct mtdswap_dev *d, struct swap_eb *eb)
190 {
191         unsigned int oldidx;
192         struct mtdswap_tree *tp;
193
194         if (eb->root) {
195                 tp = container_of(eb->root, struct mtdswap_tree, root);
196                 oldidx = tp - &d->trees[0];
197
198                 d->trees[oldidx].count--;
199                 rb_erase(&eb->rb, eb->root);
200         }
201 }
202
203 static void __mtdswap_rb_add(struct rb_root *root, struct swap_eb *eb)
204 {
205         struct rb_node **p, *parent = NULL;
206         struct swap_eb *cur;
207
208         p = &root->rb_node;
209         while (*p) {
210                 parent = *p;
211                 cur = rb_entry(parent, struct swap_eb, rb);
212                 if (eb->erase_count > cur->erase_count)
213                         p = &(*p)->rb_right;
214                 else
215                         p = &(*p)->rb_left;
216         }
217
218         rb_link_node(&eb->rb, parent, p);
219         rb_insert_color(&eb->rb, root);
220 }
221
222 static void mtdswap_rb_add(struct mtdswap_dev *d, struct swap_eb *eb, int idx)
223 {
224         struct rb_root *root;
225
226         if (eb->root == &d->trees[idx].root)
227                 return;
228
229         mtdswap_eb_detach(d, eb);
230         root = &d->trees[idx].root;
231         __mtdswap_rb_add(root, eb);
232         eb->root = root;
233         d->trees[idx].count++;
234 }
235
236 static struct rb_node *mtdswap_rb_index(struct rb_root *root, unsigned int idx)
237 {
238         struct rb_node *p;
239         unsigned int i;
240
241         p = rb_first(root);
242         i = 0;
243         while (i < idx && p) {
244                 p = rb_next(p);
245                 i++;
246         }
247
248         return p;
249 }
250
251 static int mtdswap_handle_badblock(struct mtdswap_dev *d, struct swap_eb *eb)
252 {
253         int ret;
254         loff_t offset;
255
256         d->spare_eblks--;
257         eb->flags |= EBLOCK_BAD;
258         mtdswap_eb_detach(d, eb);
259         eb->root = NULL;
260
261         /* badblocks not supported */
262         if (!mtd_can_have_bb(d->mtd))
263                 return 1;
264
265         offset = mtdswap_eb_offset(d, eb);
266         dev_warn(d->dev, "Marking bad block at %08llx\n", offset);
267         ret = mtd_block_markbad(d->mtd, offset);
268
269         if (ret) {
270                 dev_warn(d->dev, "Mark block bad failed for block at %08llx "
271                         "error %d\n", offset, ret);
272                 return ret;
273         }
274
275         return 1;
276
277 }
278
279 static int mtdswap_handle_write_error(struct mtdswap_dev *d, struct swap_eb *eb)
280 {
281         unsigned int marked = eb->flags & EBLOCK_FAILED;
282         struct swap_eb *curr_write = d->curr_write;
283
284         eb->flags |= EBLOCK_FAILED;
285         if (curr_write == eb) {
286                 d->curr_write = NULL;
287
288                 if (!marked && d->curr_write_pos != 0) {
289                         mtdswap_rb_add(d, eb, MTDSWAP_FAILING);
290                         return 0;
291                 }
292         }
293
294         return mtdswap_handle_badblock(d, eb);
295 }
296
297 static int mtdswap_read_oob(struct mtdswap_dev *d, loff_t from,
298                         struct mtd_oob_ops *ops)
299 {
300         int ret = mtd_read_oob(d->mtd, from, ops);
301
302         if (mtd_is_bitflip(ret))
303                 return ret;
304
305         if (ret) {
306                 dev_warn(d->dev, "Read OOB failed %d for block at %08llx\n",
307                         ret, from);
308                 return ret;
309         }
310
311         if (ops->oobretlen < ops->ooblen) {
312                 dev_warn(d->dev, "Read OOB return short read (%zd bytes not "
313                         "%zd) for block at %08llx\n",
314                         ops->oobretlen, ops->ooblen, from);
315                 return -EIO;
316         }
317
318         return 0;
319 }
320
321 static int mtdswap_read_markers(struct mtdswap_dev *d, struct swap_eb *eb)
322 {
323         struct mtdswap_oobdata *data, *data2;
324         int ret;
325         loff_t offset;
326         struct mtd_oob_ops ops;
327
328         offset = mtdswap_eb_offset(d, eb);
329
330         /* Check first if the block is bad. */
331         if (mtd_can_have_bb(d->mtd) && mtd_block_isbad(d->mtd, offset))
332                 return MTDSWAP_SCANNED_BAD;
333
334         ops.ooblen = 2 * d->mtd->oobavail;
335         ops.oobbuf = d->oob_buf;
336         ops.ooboffs = 0;
337         ops.datbuf = NULL;
338         ops.mode = MTD_OPS_AUTO_OOB;
339
340         ret = mtdswap_read_oob(d, offset, &ops);
341
342         if (ret && !mtd_is_bitflip(ret))
343                 return ret;
344
345         data = (struct mtdswap_oobdata *)d->oob_buf;
346         data2 = (struct mtdswap_oobdata *)
347                 (d->oob_buf + d->mtd->oobavail);
348
349         if (le16_to_cpu(data->magic) == MTDSWAP_MAGIC_CLEAN) {
350                 eb->erase_count = le32_to_cpu(data->count);
351                 if (mtd_is_bitflip(ret))
352                         ret = MTDSWAP_SCANNED_BITFLIP;
353                 else {
354                         if (le16_to_cpu(data2->magic) == MTDSWAP_MAGIC_DIRTY)
355                                 ret = MTDSWAP_SCANNED_DIRTY;
356                         else
357                                 ret = MTDSWAP_SCANNED_CLEAN;
358                 }
359         } else {
360                 eb->flags |= EBLOCK_NOMAGIC;
361                 ret = MTDSWAP_SCANNED_DIRTY;
362         }
363
364         return ret;
365 }
366
367 static int mtdswap_write_marker(struct mtdswap_dev *d, struct swap_eb *eb,
368                                 u16 marker)
369 {
370         struct mtdswap_oobdata n;
371         int ret;
372         loff_t offset;
373         struct mtd_oob_ops ops;
374
375         ops.ooboffs = 0;
376         ops.oobbuf = (uint8_t *)&n;
377         ops.mode = MTD_OPS_AUTO_OOB;
378         ops.datbuf = NULL;
379
380         if (marker == MTDSWAP_TYPE_CLEAN) {
381                 n.magic = cpu_to_le16(MTDSWAP_MAGIC_CLEAN);
382                 n.count = cpu_to_le32(eb->erase_count);
383                 ops.ooblen = MTDSWAP_OOBSIZE;
384                 offset = mtdswap_eb_offset(d, eb);
385         } else {
386                 n.magic = cpu_to_le16(MTDSWAP_MAGIC_DIRTY);
387                 ops.ooblen = sizeof(n.magic);
388                 offset = mtdswap_eb_offset(d, eb) + d->mtd->writesize;
389         }
390
391         ret = mtd_write_oob(d->mtd, offset, &ops);
392
393         if (ret) {
394                 dev_warn(d->dev, "Write OOB failed for block at %08llx "
395                         "error %d\n", offset, ret);
396                 if (ret == -EIO || mtd_is_eccerr(ret))
397                         mtdswap_handle_write_error(d, eb);
398                 return ret;
399         }
400
401         if (ops.oobretlen != ops.ooblen) {
402                 dev_warn(d->dev, "Short OOB write for block at %08llx: "
403                         "%zd not %zd\n",
404                         offset, ops.oobretlen, ops.ooblen);
405                 return ret;
406         }
407
408         return 0;
409 }
410
411 /*
412  * Are there any erase blocks without MAGIC_CLEAN header, presumably
413  * because power was cut off after erase but before header write? We
414  * need to guestimate the erase count.
415  */
416 static void mtdswap_check_counts(struct mtdswap_dev *d)
417 {
418         struct rb_root hist_root = RB_ROOT;
419         struct rb_node *medrb;
420         struct swap_eb *eb;
421         unsigned int i, cnt, median;
422
423         cnt = 0;
424         for (i = 0; i < d->eblks; i++) {
425                 eb = d->eb_data + i;
426
427                 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_BAD | EBLOCK_READERR))
428                         continue;
429
430                 __mtdswap_rb_add(&hist_root, eb);
431                 cnt++;
432         }
433
434         if (cnt == 0)
435                 return;
436
437         medrb = mtdswap_rb_index(&hist_root, cnt / 2);
438         median = rb_entry(medrb, struct swap_eb, rb)->erase_count;
439
440         d->max_erase_count = MTDSWAP_ECNT_MAX(&hist_root);
441
442         for (i = 0; i < d->eblks; i++) {
443                 eb = d->eb_data + i;
444
445                 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_READERR))
446                         eb->erase_count = median;
447
448                 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_BAD | EBLOCK_READERR))
449                         continue;
450
451                 rb_erase(&eb->rb, &hist_root);
452         }
453 }
454
455 static void mtdswap_scan_eblks(struct mtdswap_dev *d)
456 {
457         int status;
458         unsigned int i, idx;
459         struct swap_eb *eb;
460
461         for (i = 0; i < d->eblks; i++) {
462                 eb = d->eb_data + i;
463
464                 status = mtdswap_read_markers(d, eb);
465                 if (status < 0)
466                         eb->flags |= EBLOCK_READERR;
467                 else if (status == MTDSWAP_SCANNED_BAD) {
468                         eb->flags |= EBLOCK_BAD;
469                         continue;
470                 }
471
472                 switch (status) {
473                 case MTDSWAP_SCANNED_CLEAN:
474                         idx = MTDSWAP_CLEAN;
475                         break;
476                 case MTDSWAP_SCANNED_DIRTY:
477                 case MTDSWAP_SCANNED_BITFLIP:
478                         idx = MTDSWAP_DIRTY;
479                         break;
480                 default:
481                         idx = MTDSWAP_FAILING;
482                 }
483
484                 eb->flags |= (idx << EBLOCK_IDX_SHIFT);
485         }
486
487         mtdswap_check_counts(d);
488
489         for (i = 0; i < d->eblks; i++) {
490                 eb = d->eb_data + i;
491
492                 if (eb->flags & EBLOCK_BAD)
493                         continue;
494
495                 idx = eb->flags >> EBLOCK_IDX_SHIFT;
496                 mtdswap_rb_add(d, eb, idx);
497         }
498 }
499
500 /*
501  * Place eblk into a tree corresponding to its number of active blocks
502  * it contains.
503  */
504 static void mtdswap_store_eb(struct mtdswap_dev *d, struct swap_eb *eb)
505 {
506         unsigned int weight = eb->active_count;
507         unsigned int maxweight = d->pages_per_eblk;
508
509         if (eb == d->curr_write)
510                 return;
511
512         if (eb->flags & EBLOCK_BITFLIP)
513                 mtdswap_rb_add(d, eb, MTDSWAP_BITFLIP);
514         else if (eb->flags & (EBLOCK_READERR | EBLOCK_FAILED))
515                 mtdswap_rb_add(d, eb, MTDSWAP_FAILING);
516         if (weight == maxweight)
517                 mtdswap_rb_add(d, eb, MTDSWAP_USED);
518         else if (weight == 0)
519                 mtdswap_rb_add(d, eb, MTDSWAP_DIRTY);
520         else if (weight > (maxweight/2))
521                 mtdswap_rb_add(d, eb, MTDSWAP_LOWFRAG);
522         else
523                 mtdswap_rb_add(d, eb, MTDSWAP_HIFRAG);
524 }
525
526 static int mtdswap_erase_block(struct mtdswap_dev *d, struct swap_eb *eb)
527 {
528         struct mtd_info *mtd = d->mtd;
529         struct erase_info erase;
530         unsigned int retries = 0;
531         int ret;
532
533         eb->erase_count++;
534         if (eb->erase_count > d->max_erase_count)
535                 d->max_erase_count = eb->erase_count;
536
537 retry:
538         memset(&erase, 0, sizeof(struct erase_info));
539         erase.addr      = mtdswap_eb_offset(d, eb);
540         erase.len       = mtd->erasesize;
541
542         ret = mtd_erase(mtd, &erase);
543         if (ret) {
544                 if (retries++ < MTDSWAP_ERASE_RETRIES) {
545                         dev_warn(d->dev,
546                                 "erase of erase block %#llx on %s failed",
547                                 erase.addr, mtd->name);
548                         yield();
549                         goto retry;
550                 }
551
552                 dev_err(d->dev, "Cannot erase erase block %#llx on %s\n",
553                         erase.addr, mtd->name);
554
555                 mtdswap_handle_badblock(d, eb);
556                 return -EIO;
557         }
558
559         return 0;
560 }
561
562 static int mtdswap_map_free_block(struct mtdswap_dev *d, unsigned int page,
563                                 unsigned int *block)
564 {
565         int ret;
566         struct swap_eb *old_eb = d->curr_write;
567         struct rb_root *clean_root;
568         struct swap_eb *eb;
569
570         if (old_eb == NULL || d->curr_write_pos >= d->pages_per_eblk) {
571                 do {
572                         if (TREE_EMPTY(d, CLEAN))
573                                 return -ENOSPC;
574
575                         clean_root = TREE_ROOT(d, CLEAN);
576                         eb = rb_entry(rb_first(clean_root), struct swap_eb, rb);
577                         rb_erase(&eb->rb, clean_root);
578                         eb->root = NULL;
579                         TREE_COUNT(d, CLEAN)--;
580
581                         ret = mtdswap_write_marker(d, eb, MTDSWAP_TYPE_DIRTY);
582                 } while (ret == -EIO || mtd_is_eccerr(ret));
583
584                 if (ret)
585                         return ret;
586
587                 d->curr_write_pos = 0;
588                 d->curr_write = eb;
589                 if (old_eb)
590                         mtdswap_store_eb(d, old_eb);
591         }
592
593         *block = (d->curr_write - d->eb_data) * d->pages_per_eblk +
594                 d->curr_write_pos;
595
596         d->curr_write->active_count++;
597         d->revmap[*block] = page;
598         d->curr_write_pos++;
599
600         return 0;
601 }
602
603 static unsigned int mtdswap_free_page_cnt(struct mtdswap_dev *d)
604 {
605         return TREE_COUNT(d, CLEAN) * d->pages_per_eblk +
606                 d->pages_per_eblk - d->curr_write_pos;
607 }
608
609 static unsigned int mtdswap_enough_free_pages(struct mtdswap_dev *d)
610 {
611         return mtdswap_free_page_cnt(d) > d->pages_per_eblk;
612 }
613
614 static int mtdswap_write_block(struct mtdswap_dev *d, char *buf,
615                         unsigned int page, unsigned int *bp, int gc_context)
616 {
617         struct mtd_info *mtd = d->mtd;
618         struct swap_eb *eb;
619         size_t retlen;
620         loff_t writepos;
621         int ret;
622
623 retry:
624         if (!gc_context)
625                 while (!mtdswap_enough_free_pages(d))
626                         if (mtdswap_gc(d, 0) > 0)
627                                 return -ENOSPC;
628
629         ret = mtdswap_map_free_block(d, page, bp);
630         eb = d->eb_data + (*bp / d->pages_per_eblk);
631
632         if (ret == -EIO || mtd_is_eccerr(ret)) {
633                 d->curr_write = NULL;
634                 eb->active_count--;
635                 d->revmap[*bp] = PAGE_UNDEF;
636                 goto retry;
637         }
638
639         if (ret < 0)
640                 return ret;
641
642         writepos = (loff_t)*bp << PAGE_SHIFT;
643         ret =  mtd_write(mtd, writepos, PAGE_SIZE, &retlen, buf);
644         if (ret == -EIO || mtd_is_eccerr(ret)) {
645                 d->curr_write_pos--;
646                 eb->active_count--;
647                 d->revmap[*bp] = PAGE_UNDEF;
648                 mtdswap_handle_write_error(d, eb);
649                 goto retry;
650         }
651
652         if (ret < 0) {
653                 dev_err(d->dev, "Write to MTD device failed: %d (%zd written)",
654                         ret, retlen);
655                 goto err;
656         }
657
658         if (retlen != PAGE_SIZE) {
659                 dev_err(d->dev, "Short write to MTD device: %zd written",
660                         retlen);
661                 ret = -EIO;
662                 goto err;
663         }
664
665         return ret;
666
667 err:
668         d->curr_write_pos--;
669         eb->active_count--;
670         d->revmap[*bp] = PAGE_UNDEF;
671
672         return ret;
673 }
674
675 static int mtdswap_move_block(struct mtdswap_dev *d, unsigned int oldblock,
676                 unsigned int *newblock)
677 {
678         struct mtd_info *mtd = d->mtd;
679         struct swap_eb *eb, *oldeb;
680         int ret;
681         size_t retlen;
682         unsigned int page, retries;
683         loff_t readpos;
684
685         page = d->revmap[oldblock];
686         readpos = (loff_t) oldblock << PAGE_SHIFT;
687         retries = 0;
688
689 retry:
690         ret = mtd_read(mtd, readpos, PAGE_SIZE, &retlen, d->page_buf);
691
692         if (ret < 0 && !mtd_is_bitflip(ret)) {
693                 oldeb = d->eb_data + oldblock / d->pages_per_eblk;
694                 oldeb->flags |= EBLOCK_READERR;
695
696                 dev_err(d->dev, "Read Error: %d (block %u)\n", ret,
697                         oldblock);
698                 retries++;
699                 if (retries < MTDSWAP_IO_RETRIES)
700                         goto retry;
701
702                 goto read_error;
703         }
704
705         if (retlen != PAGE_SIZE) {
706                 dev_err(d->dev, "Short read: %zd (block %u)\n", retlen,
707                        oldblock);
708                 ret = -EIO;
709                 goto read_error;
710         }
711
712         ret = mtdswap_write_block(d, d->page_buf, page, newblock, 1);
713         if (ret < 0) {
714                 d->page_data[page] = BLOCK_ERROR;
715                 dev_err(d->dev, "Write error: %d\n", ret);
716                 return ret;
717         }
718
719         eb = d->eb_data + *newblock / d->pages_per_eblk;
720         d->page_data[page] = *newblock;
721         d->revmap[oldblock] = PAGE_UNDEF;
722         eb = d->eb_data + oldblock / d->pages_per_eblk;
723         eb->active_count--;
724
725         return 0;
726
727 read_error:
728         d->page_data[page] = BLOCK_ERROR;
729         d->revmap[oldblock] = PAGE_UNDEF;
730         return ret;
731 }
732
733 static int mtdswap_gc_eblock(struct mtdswap_dev *d, struct swap_eb *eb)
734 {
735         unsigned int i, block, eblk_base, newblock;
736         int ret, errcode;
737
738         errcode = 0;
739         eblk_base = (eb - d->eb_data) * d->pages_per_eblk;
740
741         for (i = 0; i < d->pages_per_eblk; i++) {
742                 if (d->spare_eblks < MIN_SPARE_EBLOCKS)
743                         return -ENOSPC;
744
745                 block = eblk_base + i;
746                 if (d->revmap[block] == PAGE_UNDEF)
747                         continue;
748
749                 ret = mtdswap_move_block(d, block, &newblock);
750                 if (ret < 0 && !errcode)
751                         errcode = ret;
752         }
753
754         return errcode;
755 }
756
757 static int __mtdswap_choose_gc_tree(struct mtdswap_dev *d)
758 {
759         int idx, stopat;
760
761         if (TREE_COUNT(d, CLEAN) < LOW_FRAG_GC_THRESHOLD)
762                 stopat = MTDSWAP_LOWFRAG;
763         else
764                 stopat = MTDSWAP_HIFRAG;
765
766         for (idx = MTDSWAP_BITFLIP; idx >= stopat; idx--)
767                 if (d->trees[idx].root.rb_node != NULL)
768                         return idx;
769
770         return -1;
771 }
772
773 static int mtdswap_wlfreq(unsigned int maxdiff)
774 {
775         unsigned int h, x, y, dist, base;
776
777         /*
778          * Calculate linear ramp down from f1 to f2 when maxdiff goes from
779          * MAX_ERASE_DIFF to MAX_ERASE_DIFF + COLLECT_NONDIRTY_BASE.  Similar
780          * to triangle with height f1 - f1 and width COLLECT_NONDIRTY_BASE.
781          */
782
783         dist = maxdiff - MAX_ERASE_DIFF;
784         if (dist > COLLECT_NONDIRTY_BASE)
785                 dist = COLLECT_NONDIRTY_BASE;
786
787         /*
788          * Modelling the slop as right angular triangle with base
789          * COLLECT_NONDIRTY_BASE and height freq1 - freq2. The ratio y/x is
790          * equal to the ratio h/base.
791          */
792         h = COLLECT_NONDIRTY_FREQ1 - COLLECT_NONDIRTY_FREQ2;
793         base = COLLECT_NONDIRTY_BASE;
794
795         x = dist - base;
796         y = (x * h + base / 2) / base;
797
798         return COLLECT_NONDIRTY_FREQ2 + y;
799 }
800
801 static int mtdswap_choose_wl_tree(struct mtdswap_dev *d)
802 {
803         static unsigned int pick_cnt;
804         unsigned int i, idx = -1, wear, max;
805         struct rb_root *root;
806
807         max = 0;
808         for (i = 0; i <= MTDSWAP_DIRTY; i++) {
809                 root = &d->trees[i].root;
810                 if (root->rb_node == NULL)
811                         continue;
812
813                 wear = d->max_erase_count - MTDSWAP_ECNT_MIN(root);
814                 if (wear > max) {
815                         max = wear;
816                         idx = i;
817                 }
818         }
819
820         if (max > MAX_ERASE_DIFF && pick_cnt >= mtdswap_wlfreq(max) - 1) {
821                 pick_cnt = 0;
822                 return idx;
823         }
824
825         pick_cnt++;
826         return -1;
827 }
828
829 static int mtdswap_choose_gc_tree(struct mtdswap_dev *d,
830                                 unsigned int background)
831 {
832         int idx;
833
834         if (TREE_NONEMPTY(d, FAILING) &&
835                 (background || (TREE_EMPTY(d, CLEAN) && TREE_EMPTY(d, DIRTY))))
836                 return MTDSWAP_FAILING;
837
838         idx = mtdswap_choose_wl_tree(d);
839         if (idx >= MTDSWAP_CLEAN)
840                 return idx;
841
842         return __mtdswap_choose_gc_tree(d);
843 }
844
845 static struct swap_eb *mtdswap_pick_gc_eblk(struct mtdswap_dev *d,
846                                         unsigned int background)
847 {
848         struct rb_root *rp = NULL;
849         struct swap_eb *eb = NULL;
850         int idx;
851
852         if (background && TREE_COUNT(d, CLEAN) > CLEAN_BLOCK_THRESHOLD &&
853                 TREE_EMPTY(d, DIRTY) && TREE_EMPTY(d, FAILING))
854                 return NULL;
855
856         idx = mtdswap_choose_gc_tree(d, background);
857         if (idx < 0)
858                 return NULL;
859
860         rp = &d->trees[idx].root;
861         eb = rb_entry(rb_first(rp), struct swap_eb, rb);
862
863         rb_erase(&eb->rb, rp);
864         eb->root = NULL;
865         d->trees[idx].count--;
866         return eb;
867 }
868
869 static unsigned int mtdswap_test_patt(unsigned int i)
870 {
871         return i % 2 ? 0x55555555 : 0xAAAAAAAA;
872 }
873
874 static unsigned int mtdswap_eblk_passes(struct mtdswap_dev *d,
875                                         struct swap_eb *eb)
876 {
877         struct mtd_info *mtd = d->mtd;
878         unsigned int test, i, j, patt, mtd_pages;
879         loff_t base, pos;
880         unsigned int *p1 = (unsigned int *)d->page_buf;
881         unsigned char *p2 = (unsigned char *)d->oob_buf;
882         struct mtd_oob_ops ops;
883         int ret;
884
885         ops.mode = MTD_OPS_AUTO_OOB;
886         ops.len = mtd->writesize;
887         ops.ooblen = mtd->oobavail;
888         ops.ooboffs = 0;
889         ops.datbuf = d->page_buf;
890         ops.oobbuf = d->oob_buf;
891         base = mtdswap_eb_offset(d, eb);
892         mtd_pages = d->pages_per_eblk * PAGE_SIZE / mtd->writesize;
893
894         for (test = 0; test < 2; test++) {
895                 pos = base;
896                 for (i = 0; i < mtd_pages; i++) {
897                         patt = mtdswap_test_patt(test + i);
898                         memset(d->page_buf, patt, mtd->writesize);
899                         memset(d->oob_buf, patt, mtd->oobavail);
900                         ret = mtd_write_oob(mtd, pos, &ops);
901                         if (ret)
902                                 goto error;
903
904                         pos += mtd->writesize;
905                 }
906
907                 pos = base;
908                 for (i = 0; i < mtd_pages; i++) {
909                         ret = mtd_read_oob(mtd, pos, &ops);
910                         if (ret)
911                                 goto error;
912
913                         patt = mtdswap_test_patt(test + i);
914                         for (j = 0; j < mtd->writesize/sizeof(int); j++)
915                                 if (p1[j] != patt)
916                                         goto error;
917
918                         for (j = 0; j < mtd->oobavail; j++)
919                                 if (p2[j] != (unsigned char)patt)
920                                         goto error;
921
922                         pos += mtd->writesize;
923                 }
924
925                 ret = mtdswap_erase_block(d, eb);
926                 if (ret)
927                         goto error;
928         }
929
930         eb->flags &= ~EBLOCK_READERR;
931         return 1;
932
933 error:
934         mtdswap_handle_badblock(d, eb);
935         return 0;
936 }
937
938 static int mtdswap_gc(struct mtdswap_dev *d, unsigned int background)
939 {
940         struct swap_eb *eb;
941         int ret;
942
943         if (d->spare_eblks < MIN_SPARE_EBLOCKS)
944                 return 1;
945
946         eb = mtdswap_pick_gc_eblk(d, background);
947         if (!eb)
948                 return 1;
949
950         ret = mtdswap_gc_eblock(d, eb);
951         if (ret == -ENOSPC)
952                 return 1;
953
954         if (eb->flags & EBLOCK_FAILED) {
955                 mtdswap_handle_badblock(d, eb);
956                 return 0;
957         }
958
959         eb->flags &= ~EBLOCK_BITFLIP;
960         ret = mtdswap_erase_block(d, eb);
961         if ((eb->flags & EBLOCK_READERR) &&
962                 (ret || !mtdswap_eblk_passes(d, eb)))
963                 return 0;
964
965         if (ret == 0)
966                 ret = mtdswap_write_marker(d, eb, MTDSWAP_TYPE_CLEAN);
967
968         if (ret == 0)
969                 mtdswap_rb_add(d, eb, MTDSWAP_CLEAN);
970         else if (ret != -EIO && !mtd_is_eccerr(ret))
971                 mtdswap_rb_add(d, eb, MTDSWAP_DIRTY);
972
973         return 0;
974 }
975
976 static void mtdswap_background(struct mtd_blktrans_dev *dev)
977 {
978         struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
979         int ret;
980
981         while (1) {
982                 ret = mtdswap_gc(d, 1);
983                 if (ret || mtd_blktrans_cease_background(dev))
984                         return;
985         }
986 }
987
988 static void mtdswap_cleanup(struct mtdswap_dev *d)
989 {
990         vfree(d->eb_data);
991         vfree(d->revmap);
992         vfree(d->page_data);
993         kfree(d->oob_buf);
994         kfree(d->page_buf);
995 }
996
997 static int mtdswap_flush(struct mtd_blktrans_dev *dev)
998 {
999         struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1000
1001         mtd_sync(d->mtd);
1002         return 0;
1003 }
1004
1005 static unsigned int mtdswap_badblocks(struct mtd_info *mtd, uint64_t size)
1006 {
1007         loff_t offset;
1008         unsigned int badcnt;
1009
1010         badcnt = 0;
1011
1012         if (mtd_can_have_bb(mtd))
1013                 for (offset = 0; offset < size; offset += mtd->erasesize)
1014                         if (mtd_block_isbad(mtd, offset))
1015                                 badcnt++;
1016
1017         return badcnt;
1018 }
1019
1020 static int mtdswap_writesect(struct mtd_blktrans_dev *dev,
1021                         unsigned long page, char *buf)
1022 {
1023         struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1024         unsigned int newblock, mapped;
1025         struct swap_eb *eb;
1026         int ret;
1027
1028         d->sect_write_count++;
1029
1030         if (d->spare_eblks < MIN_SPARE_EBLOCKS)
1031                 return -ENOSPC;
1032
1033         if (header) {
1034                 /* Ignore writes to the header page */
1035                 if (unlikely(page == 0))
1036                         return 0;
1037
1038                 page--;
1039         }
1040
1041         mapped = d->page_data[page];
1042         if (mapped <= BLOCK_MAX) {
1043                 eb = d->eb_data + (mapped / d->pages_per_eblk);
1044                 eb->active_count--;
1045                 mtdswap_store_eb(d, eb);
1046                 d->page_data[page] = BLOCK_UNDEF;
1047                 d->revmap[mapped] = PAGE_UNDEF;
1048         }
1049
1050         ret = mtdswap_write_block(d, buf, page, &newblock, 0);
1051         d->mtd_write_count++;
1052
1053         if (ret < 0)
1054                 return ret;
1055
1056         d->page_data[page] = newblock;
1057
1058         return 0;
1059 }
1060
1061 /* Provide a dummy swap header for the kernel */
1062 static int mtdswap_auto_header(struct mtdswap_dev *d, char *buf)
1063 {
1064         union swap_header *hd = (union swap_header *)(buf);
1065
1066         memset(buf, 0, PAGE_SIZE - 10);
1067
1068         hd->info.version = 1;
1069         hd->info.last_page = d->mbd_dev->size - 1;
1070         hd->info.nr_badpages = 0;
1071
1072         memcpy(buf + PAGE_SIZE - 10, "SWAPSPACE2", 10);
1073
1074         return 0;
1075 }
1076
1077 static int mtdswap_readsect(struct mtd_blktrans_dev *dev,
1078                         unsigned long page, char *buf)
1079 {
1080         struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1081         struct mtd_info *mtd = d->mtd;
1082         unsigned int realblock, retries;
1083         loff_t readpos;
1084         struct swap_eb *eb;
1085         size_t retlen;
1086         int ret;
1087
1088         d->sect_read_count++;
1089
1090         if (header) {
1091                 if (unlikely(page == 0))
1092                         return mtdswap_auto_header(d, buf);
1093
1094                 page--;
1095         }
1096
1097         realblock = d->page_data[page];
1098         if (realblock > BLOCK_MAX) {
1099                 memset(buf, 0x0, PAGE_SIZE);
1100                 if (realblock == BLOCK_UNDEF)
1101                         return 0;
1102                 else
1103                         return -EIO;
1104         }
1105
1106         eb = d->eb_data + (realblock / d->pages_per_eblk);
1107         BUG_ON(d->revmap[realblock] == PAGE_UNDEF);
1108
1109         readpos = (loff_t)realblock << PAGE_SHIFT;
1110         retries = 0;
1111
1112 retry:
1113         ret = mtd_read(mtd, readpos, PAGE_SIZE, &retlen, buf);
1114
1115         d->mtd_read_count++;
1116         if (mtd_is_bitflip(ret)) {
1117                 eb->flags |= EBLOCK_BITFLIP;
1118                 mtdswap_rb_add(d, eb, MTDSWAP_BITFLIP);
1119                 ret = 0;
1120         }
1121
1122         if (ret < 0) {
1123                 dev_err(d->dev, "Read error %d\n", ret);
1124                 eb->flags |= EBLOCK_READERR;
1125                 mtdswap_rb_add(d, eb, MTDSWAP_FAILING);
1126                 retries++;
1127                 if (retries < MTDSWAP_IO_RETRIES)
1128                         goto retry;
1129
1130                 return ret;
1131         }
1132
1133         if (retlen != PAGE_SIZE) {
1134                 dev_err(d->dev, "Short read %zd\n", retlen);
1135                 return -EIO;
1136         }
1137
1138         return 0;
1139 }
1140
1141 static int mtdswap_discard(struct mtd_blktrans_dev *dev, unsigned long first,
1142                         unsigned nr_pages)
1143 {
1144         struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1145         unsigned long page;
1146         struct swap_eb *eb;
1147         unsigned int mapped;
1148
1149         d->discard_count++;
1150
1151         for (page = first; page < first + nr_pages; page++) {
1152                 mapped = d->page_data[page];
1153                 if (mapped <= BLOCK_MAX) {
1154                         eb = d->eb_data + (mapped / d->pages_per_eblk);
1155                         eb->active_count--;
1156                         mtdswap_store_eb(d, eb);
1157                         d->page_data[page] = BLOCK_UNDEF;
1158                         d->revmap[mapped] = PAGE_UNDEF;
1159                         d->discard_page_count++;
1160                 } else if (mapped == BLOCK_ERROR) {
1161                         d->page_data[page] = BLOCK_UNDEF;
1162                         d->discard_page_count++;
1163                 }
1164         }
1165
1166         return 0;
1167 }
1168
1169 static int mtdswap_show(struct seq_file *s, void *data)
1170 {
1171         struct mtdswap_dev *d = (struct mtdswap_dev *) s->private;
1172         unsigned long sum;
1173         unsigned int count[MTDSWAP_TREE_CNT];
1174         unsigned int min[MTDSWAP_TREE_CNT];
1175         unsigned int max[MTDSWAP_TREE_CNT];
1176         unsigned int i, cw = 0, cwp = 0, cwecount = 0, bb_cnt, mapped, pages;
1177         uint64_t use_size;
1178         static const char * const name[] = {
1179                 "clean", "used", "low", "high", "dirty", "bitflip", "failing"
1180         };
1181
1182         mutex_lock(&d->mbd_dev->lock);
1183
1184         for (i = 0; i < MTDSWAP_TREE_CNT; i++) {
1185                 struct rb_root *root = &d->trees[i].root;
1186
1187                 if (root->rb_node) {
1188                         count[i] = d->trees[i].count;
1189                         min[i] = MTDSWAP_ECNT_MIN(root);
1190                         max[i] = MTDSWAP_ECNT_MAX(root);
1191                 } else
1192                         count[i] = 0;
1193         }
1194
1195         if (d->curr_write) {
1196                 cw = 1;
1197                 cwp = d->curr_write_pos;
1198                 cwecount = d->curr_write->erase_count;
1199         }
1200
1201         sum = 0;
1202         for (i = 0; i < d->eblks; i++)
1203                 sum += d->eb_data[i].erase_count;
1204
1205         use_size = (uint64_t)d->eblks * d->mtd->erasesize;
1206         bb_cnt = mtdswap_badblocks(d->mtd, use_size);
1207
1208         mapped = 0;
1209         pages = d->mbd_dev->size;
1210         for (i = 0; i < pages; i++)
1211                 if (d->page_data[i] != BLOCK_UNDEF)
1212                         mapped++;
1213
1214         mutex_unlock(&d->mbd_dev->lock);
1215
1216         for (i = 0; i < MTDSWAP_TREE_CNT; i++) {
1217                 if (!count[i])
1218                         continue;
1219
1220                 if (min[i] != max[i])
1221                         seq_printf(s, "%s:\t%5d erase blocks, erased min %d, "
1222                                 "max %d times\n",
1223                                 name[i], count[i], min[i], max[i]);
1224                 else
1225                         seq_printf(s, "%s:\t%5d erase blocks, all erased %d "
1226                                 "times\n", name[i], count[i], min[i]);
1227         }
1228
1229         if (bb_cnt)
1230                 seq_printf(s, "bad:\t%5u erase blocks\n", bb_cnt);
1231
1232         if (cw)
1233                 seq_printf(s, "current erase block: %u pages used, %u free, "
1234                         "erased %u times\n",
1235                         cwp, d->pages_per_eblk - cwp, cwecount);
1236
1237         seq_printf(s, "total erasures: %lu\n", sum);
1238
1239         seq_puts(s, "\n");
1240
1241         seq_printf(s, "mtdswap_readsect count: %llu\n", d->sect_read_count);
1242         seq_printf(s, "mtdswap_writesect count: %llu\n", d->sect_write_count);
1243         seq_printf(s, "mtdswap_discard count: %llu\n", d->discard_count);
1244         seq_printf(s, "mtd read count: %llu\n", d->mtd_read_count);
1245         seq_printf(s, "mtd write count: %llu\n", d->mtd_write_count);
1246         seq_printf(s, "discarded pages count: %llu\n", d->discard_page_count);
1247
1248         seq_puts(s, "\n");
1249         seq_printf(s, "total pages: %u\n", pages);
1250         seq_printf(s, "pages mapped: %u\n", mapped);
1251
1252         return 0;
1253 }
1254 DEFINE_SHOW_ATTRIBUTE(mtdswap);
1255
1256 static int mtdswap_add_debugfs(struct mtdswap_dev *d)
1257 {
1258         struct dentry *root = d->mtd->dbg.dfs_dir;
1259
1260         if (!IS_ENABLED(CONFIG_DEBUG_FS))
1261                 return 0;
1262
1263         if (IS_ERR_OR_NULL(root))
1264                 return -1;
1265
1266         debugfs_create_file("mtdswap_stats", S_IRUSR, root, d, &mtdswap_fops);
1267
1268         return 0;
1269 }
1270
1271 static int mtdswap_init(struct mtdswap_dev *d, unsigned int eblocks,
1272                         unsigned int spare_cnt)
1273 {
1274         struct mtd_info *mtd = d->mbd_dev->mtd;
1275         unsigned int i, eblk_bytes, pages, blocks;
1276         int ret = -ENOMEM;
1277
1278         d->mtd = mtd;
1279         d->eblks = eblocks;
1280         d->spare_eblks = spare_cnt;
1281         d->pages_per_eblk = mtd->erasesize >> PAGE_SHIFT;
1282
1283         pages = d->mbd_dev->size;
1284         blocks = eblocks * d->pages_per_eblk;
1285
1286         for (i = 0; i < MTDSWAP_TREE_CNT; i++)
1287                 d->trees[i].root = RB_ROOT;
1288
1289         d->page_data = vmalloc(array_size(pages, sizeof(int)));
1290         if (!d->page_data)
1291                 goto page_data_fail;
1292
1293         d->revmap = vmalloc(array_size(blocks, sizeof(int)));
1294         if (!d->revmap)
1295                 goto revmap_fail;
1296
1297         eblk_bytes = sizeof(struct swap_eb)*d->eblks;
1298         d->eb_data = vzalloc(eblk_bytes);
1299         if (!d->eb_data)
1300                 goto eb_data_fail;
1301
1302         for (i = 0; i < pages; i++)
1303                 d->page_data[i] = BLOCK_UNDEF;
1304
1305         for (i = 0; i < blocks; i++)
1306                 d->revmap[i] = PAGE_UNDEF;
1307
1308         d->page_buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
1309         if (!d->page_buf)
1310                 goto page_buf_fail;
1311
1312         d->oob_buf = kmalloc_array(2, mtd->oobavail, GFP_KERNEL);
1313         if (!d->oob_buf)
1314                 goto oob_buf_fail;
1315
1316         mtdswap_scan_eblks(d);
1317
1318         return 0;
1319
1320 oob_buf_fail:
1321         kfree(d->page_buf);
1322 page_buf_fail:
1323         vfree(d->eb_data);
1324 eb_data_fail:
1325         vfree(d->revmap);
1326 revmap_fail:
1327         vfree(d->page_data);
1328 page_data_fail:
1329         printk(KERN_ERR "%s: init failed (%d)\n", MTDSWAP_PREFIX, ret);
1330         return ret;
1331 }
1332
1333 static void mtdswap_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
1334 {
1335         struct mtdswap_dev *d;
1336         struct mtd_blktrans_dev *mbd_dev;
1337         char *parts;
1338         char *this_opt;
1339         unsigned long part;
1340         unsigned int eblocks, eavailable, bad_blocks, spare_cnt;
1341         uint64_t swap_size, use_size, size_limit;
1342         int ret;
1343
1344         parts = &partitions[0];
1345         if (!*parts)
1346                 return;
1347
1348         while ((this_opt = strsep(&parts, ",")) != NULL) {
1349                 if (kstrtoul(this_opt, 0, &part) < 0)
1350                         return;
1351
1352                 if (mtd->index == part)
1353                         break;
1354         }
1355
1356         if (mtd->index != part)
1357                 return;
1358
1359         if (mtd->erasesize < PAGE_SIZE || mtd->erasesize % PAGE_SIZE) {
1360                 printk(KERN_ERR "%s: Erase size %u not multiple of PAGE_SIZE "
1361                         "%lu\n", MTDSWAP_PREFIX, mtd->erasesize, PAGE_SIZE);
1362                 return;
1363         }
1364
1365         if (PAGE_SIZE % mtd->writesize || mtd->writesize > PAGE_SIZE) {
1366                 printk(KERN_ERR "%s: PAGE_SIZE %lu not multiple of write size"
1367                         " %u\n", MTDSWAP_PREFIX, PAGE_SIZE, mtd->writesize);
1368                 return;
1369         }
1370
1371         if (!mtd->oobsize || mtd->oobavail < MTDSWAP_OOBSIZE) {
1372                 printk(KERN_ERR "%s: Not enough free bytes in OOB, "
1373                         "%d available, %zu needed.\n",
1374                         MTDSWAP_PREFIX, mtd->oobavail, MTDSWAP_OOBSIZE);
1375                 return;
1376         }
1377
1378         if (spare_eblocks > 100)
1379                 spare_eblocks = 100;
1380
1381         use_size = mtd->size;
1382         size_limit = (uint64_t) BLOCK_MAX * PAGE_SIZE;
1383
1384         if (mtd->size > size_limit) {
1385                 printk(KERN_WARNING "%s: Device too large. Limiting size to "
1386                         "%llu bytes\n", MTDSWAP_PREFIX, size_limit);
1387                 use_size = size_limit;
1388         }
1389
1390         eblocks = mtd_div_by_eb(use_size, mtd);
1391         use_size = (uint64_t)eblocks * mtd->erasesize;
1392         bad_blocks = mtdswap_badblocks(mtd, use_size);
1393         eavailable = eblocks - bad_blocks;
1394
1395         if (eavailable < MIN_ERASE_BLOCKS) {
1396                 printk(KERN_ERR "%s: Not enough erase blocks. %u available, "
1397                         "%d needed\n", MTDSWAP_PREFIX, eavailable,
1398                         MIN_ERASE_BLOCKS);
1399                 return;
1400         }
1401
1402         spare_cnt = div_u64((uint64_t)eavailable * spare_eblocks, 100);
1403
1404         if (spare_cnt < MIN_SPARE_EBLOCKS)
1405                 spare_cnt = MIN_SPARE_EBLOCKS;
1406
1407         if (spare_cnt > eavailable - 1)
1408                 spare_cnt = eavailable - 1;
1409
1410         swap_size = (uint64_t)(eavailable - spare_cnt) * mtd->erasesize +
1411                 (header ? PAGE_SIZE : 0);
1412
1413         printk(KERN_INFO "%s: Enabling MTD swap on device %lu, size %llu KB, "
1414                 "%u spare, %u bad blocks\n",
1415                 MTDSWAP_PREFIX, part, swap_size / 1024, spare_cnt, bad_blocks);
1416
1417         d = kzalloc(sizeof(struct mtdswap_dev), GFP_KERNEL);
1418         if (!d)
1419                 return;
1420
1421         mbd_dev = kzalloc(sizeof(struct mtd_blktrans_dev), GFP_KERNEL);
1422         if (!mbd_dev) {
1423                 kfree(d);
1424                 return;
1425         }
1426
1427         d->mbd_dev = mbd_dev;
1428         mbd_dev->priv = d;
1429
1430         mbd_dev->mtd = mtd;
1431         mbd_dev->devnum = mtd->index;
1432         mbd_dev->size = swap_size >> PAGE_SHIFT;
1433         mbd_dev->tr = tr;
1434
1435         if (!(mtd->flags & MTD_WRITEABLE))
1436                 mbd_dev->readonly = 1;
1437
1438         if (mtdswap_init(d, eblocks, spare_cnt) < 0)
1439                 goto init_failed;
1440
1441         if (add_mtd_blktrans_dev(mbd_dev) < 0)
1442                 goto cleanup;
1443
1444         d->dev = disk_to_dev(mbd_dev->disk);
1445
1446         ret = mtdswap_add_debugfs(d);
1447         if (ret < 0)
1448                 goto debugfs_failed;
1449
1450         return;
1451
1452 debugfs_failed:
1453         del_mtd_blktrans_dev(mbd_dev);
1454
1455 cleanup:
1456         mtdswap_cleanup(d);
1457
1458 init_failed:
1459         kfree(mbd_dev);
1460         kfree(d);
1461 }
1462
1463 static void mtdswap_remove_dev(struct mtd_blktrans_dev *dev)
1464 {
1465         struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1466
1467         del_mtd_blktrans_dev(dev);
1468         mtdswap_cleanup(d);
1469         kfree(d);
1470 }
1471
1472 static struct mtd_blktrans_ops mtdswap_ops = {
1473         .name           = "mtdswap",
1474         .major          = 0,
1475         .part_bits      = 0,
1476         .blksize        = PAGE_SIZE,
1477         .flush          = mtdswap_flush,
1478         .readsect       = mtdswap_readsect,
1479         .writesect      = mtdswap_writesect,
1480         .discard        = mtdswap_discard,
1481         .background     = mtdswap_background,
1482         .add_mtd        = mtdswap_add_mtd,
1483         .remove_dev     = mtdswap_remove_dev,
1484         .owner          = THIS_MODULE,
1485 };
1486
1487 module_mtd_blktrans(mtdswap_ops);
1488
1489 MODULE_LICENSE("GPL");
1490 MODULE_AUTHOR("Jarkko Lavinen <jarkko.lavinen@nokia.com>");
1491 MODULE_DESCRIPTION("Block device access to an MTD suitable for using as "
1492                 "swap space");