4 * - Heavily based on MD badblocks code from Neil Brown
6 * Copyright (c) 2015, Intel Corporation.
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 #include <linux/badblocks.h>
19 #include <linux/seqlock.h>
20 #include <linux/device.h>
21 #include <linux/kernel.h>
22 #include <linux/module.h>
23 #include <linux/stddef.h>
24 #include <linux/types.h>
25 #include <linux/slab.h>
28 * badblocks_check() - check a given range for bad sectors
29 * @bb: the badblocks structure that holds all badblock information
30 * @s: sector (start) at which to check for badblocks
31 * @sectors: number of sectors to check for badblocks
32 * @first_bad: pointer to store location of the first badblock
33 * @bad_sectors: pointer to store number of badblocks after @first_bad
35 * We can record which blocks on each device are 'bad' and so just
36 * fail those blocks, or that stripe, rather than the whole device.
37 * Entries in the bad-block table are 64bits wide. This comprises:
38 * Length of bad-range, in sectors: 0-511 for lengths 1-512
39 * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes)
40 * A 'shift' can be set so that larger blocks are tracked and
41 * consequently larger devices can be covered.
42 * 'Acknowledged' flag - 1 bit. - the most significant bit.
44 * Locking of the bad-block table uses a seqlock so badblocks_check
45 * might need to retry if it is very unlucky.
46 * We will sometimes want to check for bad blocks in a bi_end_io function,
47 * so we use the write_seqlock_irq variant.
49 * When looking for a bad block we specify a range and want to
50 * know if any block in the range is bad. So we binary-search
51 * to the last range that starts at-or-before the given endpoint,
52 * (or "before the sector after the target range")
53 * then see if it ends after the given start.
56 * 0: there are no known bad blocks in the range
57 * 1: there are known bad block which are all acknowledged
58 * -1: there are bad blocks which have not yet been acknowledged in metadata.
59 * plus the start/length of the first bad section we overlap.
61 int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
62 sector_t *first_bad, int *bad_sectors)
68 sector_t target = s + sectors;
72 /* round the start down, and the end up */
74 target += (1<<bb->shift) - 1;
78 /* 'target' is now the first block after the bad range */
81 seq = read_seqbegin(&bb->lock);
86 /* Binary search between lo and hi for 'target'
87 * i.e. for the last range that starts before 'target'
89 /* INVARIANT: ranges before 'lo' and at-or-after 'hi'
90 * are known not to be the last range before target.
91 * VARIANT: hi-lo is the number of possible
92 * ranges, and decreases until it reaches 1
95 int mid = (lo + hi) / 2;
96 sector_t a = BB_OFFSET(p[mid]);
99 /* This could still be the one, earlier ranges
104 /* This and later ranges are definitely out. */
107 /* 'lo' might be the last that started before target, but 'hi' isn't */
109 /* need to check all range that end after 's' to see if
110 * any are unacknowledged.
113 BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
114 if (BB_OFFSET(p[lo]) < target) {
115 /* starts before the end, and finishes after
116 * the start, so they must overlap
118 if (rv != -1 && BB_ACK(p[lo]))
122 *first_bad = BB_OFFSET(p[lo]);
123 *bad_sectors = BB_LEN(p[lo]);
129 if (read_seqretry(&bb->lock, seq))
134 EXPORT_SYMBOL_GPL(badblocks_check);
136 static void badblocks_update_acked(struct badblocks *bb)
140 bool unacked = false;
142 if (!bb->unacked_exist)
145 for (i = 0; i < bb->count ; i++) {
153 bb->unacked_exist = 0;
157 * badblocks_set() - Add a range of bad blocks to the table.
158 * @bb: the badblocks structure that holds all badblock information
159 * @s: first sector to mark as bad
160 * @sectors: number of sectors to mark as bad
161 * @acknowledged: weather to mark the bad sectors as acknowledged
163 * This might extend the table, or might contract it if two adjacent ranges
164 * can be merged. We binary-search to find the 'insertion' point, then
165 * decide how best to handle it.
169 * 1: failed to set badblocks (out of space)
171 int badblocks_set(struct badblocks *bb, sector_t s, int sectors,
180 /* badblocks are disabled */
184 /* round the start down, and the end up */
185 sector_t next = s + sectors;
188 next += (1<<bb->shift) - 1;
193 write_seqlock_irqsave(&bb->lock, flags);
198 /* Find the last range that starts at-or-before 's' */
199 while (hi - lo > 1) {
200 int mid = (lo + hi) / 2;
201 sector_t a = BB_OFFSET(p[mid]);
208 if (hi > lo && BB_OFFSET(p[lo]) > s)
212 /* we found a range that might merge with the start
215 sector_t a = BB_OFFSET(p[lo]);
216 sector_t e = a + BB_LEN(p[lo]);
217 int ack = BB_ACK(p[lo]);
220 /* Yes, we can merge with a previous range */
221 if (s == a && s + sectors >= e)
222 /* new range covers old */
225 ack = ack && acknowledged;
229 if (e - a <= BB_MAX_LEN) {
230 p[lo] = BB_MAKE(a, e-a, ack);
233 /* does not all fit in one range,
236 if (BB_LEN(p[lo]) != BB_MAX_LEN)
237 p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
243 if (sectors && hi < bb->count) {
244 /* 'hi' points to the first range that starts after 's'.
245 * Maybe we can merge with the start of that range
247 sector_t a = BB_OFFSET(p[hi]);
248 sector_t e = a + BB_LEN(p[hi]);
249 int ack = BB_ACK(p[hi]);
251 if (a <= s + sectors) {
252 /* merging is possible */
253 if (e <= s + sectors) {
258 ack = ack && acknowledged;
261 if (e - a <= BB_MAX_LEN) {
262 p[hi] = BB_MAKE(a, e-a, ack);
265 p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
273 if (sectors == 0 && hi < bb->count) {
274 /* we might be able to combine lo and hi */
275 /* Note: 's' is at the end of 'lo' */
276 sector_t a = BB_OFFSET(p[hi]);
277 int lolen = BB_LEN(p[lo]);
278 int hilen = BB_LEN(p[hi]);
279 int newlen = lolen + hilen - (s - a);
281 if (s >= a && newlen < BB_MAX_LEN) {
282 /* yes, we can combine them */
283 int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
285 p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
286 memmove(p + hi, p + hi + 1,
287 (bb->count - hi - 1) * 8);
292 /* didn't merge (it all).
293 * Need to add a range just before 'hi'
295 if (bb->count >= MAX_BADBLOCKS) {
296 /* No room for more */
300 int this_sectors = sectors;
302 memmove(p + hi + 1, p + hi,
303 (bb->count - hi) * 8);
306 if (this_sectors > BB_MAX_LEN)
307 this_sectors = BB_MAX_LEN;
308 p[hi] = BB_MAKE(s, this_sectors, acknowledged);
309 sectors -= this_sectors;
316 bb->unacked_exist = 1;
318 badblocks_update_acked(bb);
319 write_sequnlock_irqrestore(&bb->lock, flags);
323 EXPORT_SYMBOL_GPL(badblocks_set);
326 * badblocks_clear() - Remove a range of bad blocks to the table.
327 * @bb: the badblocks structure that holds all badblock information
328 * @s: first sector to mark as bad
329 * @sectors: number of sectors to mark as bad
331 * This may involve extending the table if we spilt a region,
332 * but it must not fail. So if the table becomes full, we just
333 * drop the remove request.
337 * 1: failed to clear badblocks
339 int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
343 sector_t target = s + sectors;
347 /* When clearing we round the start up and the end down.
348 * This should not matter as the shift should align with
349 * the block size and no rounding should ever be needed.
350 * However it is better the think a block is bad when it
351 * isn't than to think a block is not bad when it is.
353 s += (1<<bb->shift) - 1;
355 target >>= bb->shift;
356 sectors = target - s;
359 write_seqlock_irq(&bb->lock);
364 /* Find the last range that starts before 'target' */
365 while (hi - lo > 1) {
366 int mid = (lo + hi) / 2;
367 sector_t a = BB_OFFSET(p[mid]);
375 /* p[lo] is the last range that could overlap the
376 * current range. Earlier ranges could also overlap,
377 * but only this one can overlap the end of the range.
379 if ((BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) &&
380 (BB_OFFSET(p[lo]) < target)) {
381 /* Partial overlap, leave the tail of this range */
382 int ack = BB_ACK(p[lo]);
383 sector_t a = BB_OFFSET(p[lo]);
384 sector_t end = a + BB_LEN(p[lo]);
387 /* we need to split this range */
388 if (bb->count >= MAX_BADBLOCKS) {
392 memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
394 p[lo] = BB_MAKE(a, s-a, ack);
397 p[lo] = BB_MAKE(target, end - target, ack);
398 /* there is no longer an overlap */
403 (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) &&
404 (BB_OFFSET(p[lo]) < target)) {
405 /* This range does overlap */
406 if (BB_OFFSET(p[lo]) < s) {
407 /* Keep the early parts of this range. */
408 int ack = BB_ACK(p[lo]);
409 sector_t start = BB_OFFSET(p[lo]);
411 p[lo] = BB_MAKE(start, s - start, ack);
412 /* now low doesn't overlap, so.. */
417 /* 'lo' is strictly before, 'hi' is strictly after,
418 * anything between needs to be discarded
421 memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
422 bb->count -= (hi - lo - 1);
426 badblocks_update_acked(bb);
429 write_sequnlock_irq(&bb->lock);
432 EXPORT_SYMBOL_GPL(badblocks_clear);
435 * ack_all_badblocks() - Acknowledge all bad blocks in a list.
436 * @bb: the badblocks structure that holds all badblock information
438 * This only succeeds if ->changed is clear. It is used by
439 * in-kernel metadata updates
441 void ack_all_badblocks(struct badblocks *bb)
443 if (bb->page == NULL || bb->changed)
444 /* no point even trying */
446 write_seqlock_irq(&bb->lock);
448 if (bb->changed == 0 && bb->unacked_exist) {
452 for (i = 0; i < bb->count ; i++) {
454 sector_t start = BB_OFFSET(p[i]);
455 int len = BB_LEN(p[i]);
457 p[i] = BB_MAKE(start, len, 1);
460 bb->unacked_exist = 0;
462 write_sequnlock_irq(&bb->lock);
464 EXPORT_SYMBOL_GPL(ack_all_badblocks);
467 * badblocks_show() - sysfs access to bad-blocks list
468 * @bb: the badblocks structure that holds all badblock information
469 * @page: buffer received from sysfs
470 * @unack: weather to show unacknowledged badblocks
473 * Length of returned data
475 ssize_t badblocks_show(struct badblocks *bb, char *page, int unack)
486 seq = read_seqbegin(&bb->lock);
491 while (len < PAGE_SIZE && i < bb->count) {
492 sector_t s = BB_OFFSET(p[i]);
493 unsigned int length = BB_LEN(p[i]);
494 int ack = BB_ACK(p[i]);
501 len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
502 (unsigned long long)s << bb->shift,
503 length << bb->shift);
505 if (unack && len == 0)
506 bb->unacked_exist = 0;
508 if (read_seqretry(&bb->lock, seq))
513 EXPORT_SYMBOL_GPL(badblocks_show);
516 * badblocks_store() - sysfs access to bad-blocks list
517 * @bb: the badblocks structure that holds all badblock information
518 * @page: buffer received from sysfs
519 * @len: length of data received from sysfs
520 * @unack: weather to show unacknowledged badblocks
523 * Length of the buffer processed or -ve error.
525 ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
528 unsigned long long sector;
532 switch (sscanf(page, "%llu %d%c", §or, &length, &newline)) {
544 if (badblocks_set(bb, sector, length, !unack))
549 EXPORT_SYMBOL_GPL(badblocks_store);
551 static int __badblocks_init(struct device *dev, struct badblocks *bb,
561 bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL);
563 bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL);
568 seqlock_init(&bb->lock);
574 * badblocks_init() - initialize the badblocks structure
575 * @bb: the badblocks structure that holds all badblock information
576 * @enable: weather to enable badblocks accounting
580 * -ve errno: on error
582 int badblocks_init(struct badblocks *bb, int enable)
584 return __badblocks_init(NULL, bb, enable);
586 EXPORT_SYMBOL_GPL(badblocks_init);
588 int devm_init_badblocks(struct device *dev, struct badblocks *bb)
592 return __badblocks_init(dev, bb, 1);
594 EXPORT_SYMBOL_GPL(devm_init_badblocks);
597 * badblocks_exit() - free the badblocks structure
598 * @bb: the badblocks structure that holds all badblock information
600 void badblocks_exit(struct badblocks *bb)
605 devm_kfree(bb->dev, bb->page);
610 EXPORT_SYMBOL_GPL(badblocks_exit);