1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * This file contains an ECC algorithm that detects and corrects 1 bit
4 * errors in a 256 byte block of data.
6 * Copyright © 2008 Koninklijke Philips Electronics NV.
7 * Author: Frans Meulenbroeks
9 * Completely replaces the previous ECC implementation which was written by:
10 * Steven J. Hill (sjhill@realitydiluted.com)
11 * Thomas Gleixner (tglx@linutronix.de)
13 * Information on how this algorithm works and how it was developed
14 * can be found in Documentation/driver-api/mtd/nand_ecc.rst
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/mtd/mtd.h>
21 #include <linux/mtd/rawnand.h>
22 #include <linux/mtd/nand-ecc-sw-hamming.h>
23 #include <asm/byteorder.h>
26 * invparity is a 256 byte table that contains the odd parity
27 * for each byte. So if the number of bits in a byte is even,
28 * the array element is 1, and when the number of bits is odd
29 * the array eleemnt is 0.
31 static const char invparity[256] = {
32 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
33 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
34 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
35 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
36 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
37 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
38 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
39 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
40 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
41 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
42 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
43 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
44 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
45 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
46 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
47 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1
51 * bitsperbyte contains the number of bits per byte
52 * this is only used for testing and repairing parity
53 * (a precalculated value slightly improves performance)
55 static const char bitsperbyte[256] = {
56 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
57 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
58 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
59 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
60 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
61 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
62 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
63 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
64 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
65 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
66 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
67 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
68 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
69 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
70 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
71 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
75 * addressbits is a lookup table to filter out the bits from the xor-ed
76 * ECC data that identify the faulty location.
77 * this is only used for repairing parity
78 * see the comments in nand_ecc_sw_hamming_correct for more details
80 static const char addressbits[256] = {
81 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
82 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
83 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
84 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
85 0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
86 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
87 0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
88 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
89 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
90 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
91 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
92 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
93 0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
94 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
95 0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
96 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
97 0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
98 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
99 0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
100 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
101 0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
102 0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
103 0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
104 0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
105 0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
106 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
107 0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
108 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
109 0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
110 0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
111 0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
112 0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f
115 int ecc_sw_hamming_calculate(const unsigned char *buf, unsigned int step_size,
116 unsigned char *code, bool sm_order)
118 const u32 *bp = (uint32_t *)buf;
119 const u32 eccsize_mult = step_size >> 8;
120 /* current value in buffer */
122 /* rp0..rp17 are the various accumulated parities (per byte) */
123 u32 rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7, rp8, rp9, rp10, rp11, rp12,
124 rp13, rp14, rp15, rp16, rp17;
125 /* Cumulative parity for all data */
127 /* Cumulative parity at the end of the loop (rp12, rp14, rp16) */
141 * The loop is unrolled a number of times;
142 * This avoids if statements to decide on which rp value to update
143 * Also we process the data by longwords.
144 * Note: passing unaligned data might give a performance penalty.
145 * It is assumed that the buffers are aligned.
146 * tmppar is the cumulative sum of this iteration.
147 * needed for calculating rp12, rp14, rp16 and par
148 * also used as a performance improvement for rp6, rp8 and rp10
150 for (i = 0; i < eccsize_mult << 2; i++) {
213 if (eccsize_mult == 2 && (i & 0x4) == 0)
218 * handle the fact that we use longword operations
219 * we'll bring rp4..rp14..rp16 back to single byte entities by
220 * shifting and xoring first fold the upper and lower 16 bits,
221 * then the upper and lower 8 bits.
232 rp10 ^= (rp10 >> 16);
235 rp12 ^= (rp12 >> 16);
238 rp14 ^= (rp14 >> 16);
241 if (eccsize_mult == 2) {
242 rp16 ^= (rp16 >> 16);
248 * we also need to calculate the row parity for rp0..rp3
249 * This is present in par, because par is now
250 * rp3 rp3 rp2 rp2 in little endian and
251 * rp2 rp2 rp3 rp3 in big endian
253 * rp1 rp0 rp1 rp0 in little endian and
254 * rp0 rp1 rp0 rp1 in big endian
255 * First calculate rp2 and rp3
273 /* reduce par to 16 bits then calculate rp1 and rp0 */
276 rp0 = (par >> 8) & 0xff;
279 rp1 = (par >> 8) & 0xff;
283 /* finally reduce par to 8 bits */
288 * and calculate rp5..rp15..rp17
289 * note that par = rp4 ^ rp5 and due to the commutative property
290 * of the ^ operator we can say:
292 * The & 0xff seems superfluous, but benchmarking learned that
293 * leaving it out gives slightly worse results. No idea why, probably
294 * it has to do with the way the pipeline in pentium is organized.
296 rp5 = (par ^ rp4) & 0xff;
297 rp7 = (par ^ rp6) & 0xff;
298 rp9 = (par ^ rp8) & 0xff;
299 rp11 = (par ^ rp10) & 0xff;
300 rp13 = (par ^ rp12) & 0xff;
301 rp15 = (par ^ rp14) & 0xff;
302 if (eccsize_mult == 2)
303 rp17 = (par ^ rp16) & 0xff;
306 * Finally calculate the ECC bits.
307 * Again here it might seem that there are performance optimisations
308 * possible, but benchmarks showed that on the system this is developed
309 * the code below is the fastest
312 code[0] = (invparity[rp7] << 7) | (invparity[rp6] << 6) |
313 (invparity[rp5] << 5) | (invparity[rp4] << 4) |
314 (invparity[rp3] << 3) | (invparity[rp2] << 2) |
315 (invparity[rp1] << 1) | (invparity[rp0]);
316 code[1] = (invparity[rp15] << 7) | (invparity[rp14] << 6) |
317 (invparity[rp13] << 5) | (invparity[rp12] << 4) |
318 (invparity[rp11] << 3) | (invparity[rp10] << 2) |
319 (invparity[rp9] << 1) | (invparity[rp8]);
321 code[1] = (invparity[rp7] << 7) | (invparity[rp6] << 6) |
322 (invparity[rp5] << 5) | (invparity[rp4] << 4) |
323 (invparity[rp3] << 3) | (invparity[rp2] << 2) |
324 (invparity[rp1] << 1) | (invparity[rp0]);
325 code[0] = (invparity[rp15] << 7) | (invparity[rp14] << 6) |
326 (invparity[rp13] << 5) | (invparity[rp12] << 4) |
327 (invparity[rp11] << 3) | (invparity[rp10] << 2) |
328 (invparity[rp9] << 1) | (invparity[rp8]);
331 if (eccsize_mult == 1)
333 (invparity[par & 0xf0] << 7) |
334 (invparity[par & 0x0f] << 6) |
335 (invparity[par & 0xcc] << 5) |
336 (invparity[par & 0x33] << 4) |
337 (invparity[par & 0xaa] << 3) |
338 (invparity[par & 0x55] << 2) |
342 (invparity[par & 0xf0] << 7) |
343 (invparity[par & 0x0f] << 6) |
344 (invparity[par & 0xcc] << 5) |
345 (invparity[par & 0x33] << 4) |
346 (invparity[par & 0xaa] << 3) |
347 (invparity[par & 0x55] << 2) |
348 (invparity[rp17] << 1) |
349 (invparity[rp16] << 0);
353 EXPORT_SYMBOL(ecc_sw_hamming_calculate);
356 * nand_ecc_sw_hamming_calculate - Calculate 3-byte ECC for 256/512-byte block
358 * @buf: Input buffer with raw data
359 * @code: Output buffer with ECC
361 int nand_ecc_sw_hamming_calculate(struct nand_device *nand,
362 const unsigned char *buf, unsigned char *code)
364 struct nand_chip *chip = mtd_to_nand(nanddev_to_mtd(nand));
365 bool sm_order = chip->ecc.options & NAND_ECC_SOFT_HAMMING_SM_ORDER;
367 return ecc_sw_hamming_calculate(buf, chip->ecc.size, code, sm_order);
369 EXPORT_SYMBOL(nand_ecc_sw_hamming_calculate);
371 int ecc_sw_hamming_correct(unsigned char *buf, unsigned char *read_ecc,
372 unsigned char *calc_ecc, unsigned int step_size,
375 const u32 eccsize_mult = step_size >> 8;
376 unsigned char b0, b1, b2, bit_addr;
377 unsigned int byte_addr;
380 * b0 to b2 indicate which bit is faulty (if any)
381 * we might need the xor result more than once,
382 * so keep them in a local var
385 b0 = read_ecc[0] ^ calc_ecc[0];
386 b1 = read_ecc[1] ^ calc_ecc[1];
388 b0 = read_ecc[1] ^ calc_ecc[1];
389 b1 = read_ecc[0] ^ calc_ecc[0];
392 b2 = read_ecc[2] ^ calc_ecc[2];
394 /* check if there are any bitfaults */
396 /* repeated if statements are slightly more efficient than switch ... */
397 /* ordered in order of likelihood */
399 if ((b0 | b1 | b2) == 0)
400 return 0; /* no error */
402 if ((((b0 ^ (b0 >> 1)) & 0x55) == 0x55) &&
403 (((b1 ^ (b1 >> 1)) & 0x55) == 0x55) &&
404 ((eccsize_mult == 1 && ((b2 ^ (b2 >> 1)) & 0x54) == 0x54) ||
405 (eccsize_mult == 2 && ((b2 ^ (b2 >> 1)) & 0x55) == 0x55))) {
406 /* single bit error */
408 * rp17/rp15/13/11/9/7/5/3/1 indicate which byte is the faulty
409 * byte, cp 5/3/1 indicate the faulty bit.
410 * A lookup table (called addressbits) is used to filter
411 * the bits from the byte they are in.
412 * A marginal optimisation is possible by having three
413 * different lookup tables.
414 * One as we have now (for b0), one for b2
415 * (that would avoid the >> 1), and one for b1 (with all values
416 * << 4). However it was felt that introducing two more tables
417 * hardly justify the gain.
419 * The b2 shift is there to get rid of the lowest two bits.
420 * We could also do addressbits[b2] >> 1 but for the
421 * performance it does not make any difference
423 if (eccsize_mult == 1)
424 byte_addr = (addressbits[b1] << 4) + addressbits[b0];
426 byte_addr = (addressbits[b2 & 0x3] << 8) +
427 (addressbits[b1] << 4) + addressbits[b0];
428 bit_addr = addressbits[b2 >> 2];
430 buf[byte_addr] ^= (1 << bit_addr);
434 /* count nr of bits; use table lookup, faster than calculating it */
435 if ((bitsperbyte[b0] + bitsperbyte[b1] + bitsperbyte[b2]) == 1)
436 return 1; /* error in ECC data; no action needed */
438 pr_err("%s: uncorrectable ECC error\n", __func__);
441 EXPORT_SYMBOL(ecc_sw_hamming_correct);
444 * nand_ecc_sw_hamming_correct - Detect and correct bit error(s)
446 * @buf: Raw data read from the chip
447 * @read_ecc: ECC bytes read from the chip
448 * @calc_ecc: ECC calculated from the raw data
450 * Detect and correct up to 1 bit error per 256/512-byte block.
452 int nand_ecc_sw_hamming_correct(struct nand_device *nand, unsigned char *buf,
453 unsigned char *read_ecc,
454 unsigned char *calc_ecc)
456 struct nand_chip *chip = mtd_to_nand(nanddev_to_mtd(nand));
457 bool sm_order = chip->ecc.options & NAND_ECC_SOFT_HAMMING_SM_ORDER;
459 return ecc_sw_hamming_correct(buf, read_ecc, calc_ecc, chip->ecc.size,
462 EXPORT_SYMBOL(nand_ecc_sw_hamming_correct);
464 MODULE_LICENSE("GPL");
465 MODULE_AUTHOR("Frans Meulenbroeks <fransmeulenbroeks@gmail.com>");
466 MODULE_DESCRIPTION("NAND software Hamming ECC support");