drivers/mtd/nand/ecc-sw-hamming.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * This file contains an ECC algorithm that detects and corrects 1 bit
   4  * errors in a 256 byte block of data.
   5  *
   6  * Copyright © 2008 Koninklijke Philips Electronics NV.
   7  *                  Author: Frans Meulenbroeks
   8  *
   9  * Completely replaces the previous ECC implementation which was written by:
  10  *   Steven J. Hill (sjhill@realitydiluted.com)
  11  *   Thomas Gleixner (tglx@linutronix.de)
  12  *
  13  * Information on how this algorithm works and how it was developed
  14  * can be found in Documentation/driver-api/mtd/nand_ecc.rst
  15  */
  16
  17 #include <linux/types.h>
  18 #include <linux/kernel.h>
  19 #include <linux/module.h>
  20 #include <linux/mtd/nand.h>
  21 #include <linux/mtd/nand-ecc-sw-hamming.h>
  22 #include <linux/slab.h>
  23 #include <asm/byteorder.h>
  24
  25 /*
  26  * invparity is a 256 byte table that contains the odd parity
  27  * for each byte. So if the number of bits in a byte is even,
  28  * the array element is 1, and when the number of bits is odd
  29  * the array eleemnt is 0.
  30  */
  31 static const char invparity[256] = {
  32         1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
  33         0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
  34         0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
  35         1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
  36         0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
  37         1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
  38         1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
  39         0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
  40         0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
  41         1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
  42         1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
  43         0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
  44         1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
  45         0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
  46         0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
  47         1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1
  48 };
  49
  50 /*
  51  * bitsperbyte contains the number of bits per byte
  52  * this is only used for testing and repairing parity
  53  * (a precalculated value slightly improves performance)
  54  */
  55 static const char bitsperbyte[256] = {
  56         0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
  57         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
  58         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
  59         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
  60         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
  61         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
  62         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
  63         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
  64         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
  65         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
  66         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
  67         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
  68         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
  69         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
  70         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
  71         4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
  72 };
  73
  74 /*
  75  * addressbits is a lookup table to filter out the bits from the xor-ed
  76  * ECC data that identify the faulty location.
  77  * this is only used for repairing parity
  78  * see the comments in nand_ecc_sw_hamming_correct for more details
  79  */
  80 static const char addressbits[256] = {
  81         0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
  82         0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
  83         0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
  84         0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
  85         0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
  86         0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
  87         0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
  88         0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
  89         0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
  90         0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
  91         0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
  92         0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
  93         0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
  94         0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
  95         0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
  96         0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
  97         0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
  98         0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
  99         0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
 100         0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
 101         0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
 102         0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
 103         0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
 104         0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
 105         0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
 106         0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
 107         0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
 108         0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
 109         0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
 110         0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
 111         0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
 112         0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f
 113 };
 114
 115 int ecc_sw_hamming_calculate(const unsigned char *buf, unsigned int step_size,
 116                              unsigned char *code, bool sm_order)
 117 {
 118         const u32 *bp = (uint32_t *)buf;
 119         const u32 eccsize_mult = (step_size == 256) ? 1 : 2;
 120         /* current value in buffer */
 121         u32 cur;
 122         /* rp0..rp17 are the various accumulated parities (per byte) */
 123         u32 rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7, rp8, rp9, rp10, rp11, rp12,
 124                 rp13, rp14, rp15, rp16, rp17;
 125         /* Cumulative parity for all data */
 126         u32 par;
 127         /* Cumulative parity at the end of the loop (rp12, rp14, rp16) */
 128         u32 tmppar;
 129         int i;
 130
 131         par = 0;
 132         rp4 = 0;
 133         rp6 = 0;
 134         rp8 = 0;
 135         rp10 = 0;
 136         rp12 = 0;
 137         rp14 = 0;
 138         rp16 = 0;
 139         rp17 = 0;
 140
 141         /*
 142          * The loop is unrolled a number of times;
 143          * This avoids if statements to decide on which rp value to update
 144          * Also we process the data by longwords.
 145          * Note: passing unaligned data might give a performance penalty.
 146          * It is assumed that the buffers are aligned.
 147          * tmppar is the cumulative sum of this iteration.
 148          * needed for calculating rp12, rp14, rp16 and par
 149          * also used as a performance improvement for rp6, rp8 and rp10
 150          */
 151         for (i = 0; i < eccsize_mult << 2; i++) {
 152                 cur = *bp++;
 153                 tmppar = cur;
 154                 rp4 ^= cur;
 155                 cur = *bp++;
 156                 tmppar ^= cur;
 157                 rp6 ^= tmppar;
 158                 cur = *bp++;
 159                 tmppar ^= cur;
 160                 rp4 ^= cur;
 161                 cur = *bp++;
 162                 tmppar ^= cur;
 163                 rp8 ^= tmppar;
 164
 165                 cur = *bp++;
 166                 tmppar ^= cur;
 167                 rp4 ^= cur;
 168                 rp6 ^= cur;
 169                 cur = *bp++;
 170                 tmppar ^= cur;
 171                 rp6 ^= cur;
 172                 cur = *bp++;
 173                 tmppar ^= cur;
 174                 rp4 ^= cur;
 175                 cur = *bp++;
 176                 tmppar ^= cur;
 177                 rp10 ^= tmppar;
 178
 179                 cur = *bp++;
 180                 tmppar ^= cur;
 181                 rp4 ^= cur;
 182                 rp6 ^= cur;
 183                 rp8 ^= cur;
 184                 cur = *bp++;
 185                 tmppar ^= cur;
 186                 rp6 ^= cur;
 187                 rp8 ^= cur;
 188                 cur = *bp++;
 189                 tmppar ^= cur;
 190                 rp4 ^= cur;
 191                 rp8 ^= cur;
 192                 cur = *bp++;
 193                 tmppar ^= cur;
 194                 rp8 ^= cur;
 195
 196                 cur = *bp++;
 197                 tmppar ^= cur;
 198                 rp4 ^= cur;
 199                 rp6 ^= cur;
 200                 cur = *bp++;
 201                 tmppar ^= cur;
 202                 rp6 ^= cur;
 203                 cur = *bp++;
 204                 tmppar ^= cur;
 205                 rp4 ^= cur;
 206                 cur = *bp++;
 207                 tmppar ^= cur;
 208
 209                 par ^= tmppar;
 210                 if ((i & 0x1) == 0)
 211                         rp12 ^= tmppar;
 212                 if ((i & 0x2) == 0)
 213                         rp14 ^= tmppar;
 214                 if (eccsize_mult == 2 && (i & 0x4) == 0)
 215                         rp16 ^= tmppar;
 216         }
 217
 218         /*
 219          * handle the fact that we use longword operations
 220          * we'll bring rp4..rp14..rp16 back to single byte entities by
 221          * shifting and xoring first fold the upper and lower 16 bits,
 222          * then the upper and lower 8 bits.
 223          */
 224         rp4 ^= (rp4 >> 16);
 225         rp4 ^= (rp4 >> 8);
 226         rp4 &= 0xff;
 227         rp6 ^= (rp6 >> 16);
 228         rp6 ^= (rp6 >> 8);
 229         rp6 &= 0xff;
 230         rp8 ^= (rp8 >> 16);
 231         rp8 ^= (rp8 >> 8);
 232         rp8 &= 0xff;
 233         rp10 ^= (rp10 >> 16);
 234         rp10 ^= (rp10 >> 8);
 235         rp10 &= 0xff;
 236         rp12 ^= (rp12 >> 16);
 237         rp12 ^= (rp12 >> 8);
 238         rp12 &= 0xff;
 239         rp14 ^= (rp14 >> 16);
 240         rp14 ^= (rp14 >> 8);
 241         rp14 &= 0xff;
 242         if (eccsize_mult == 2) {
 243                 rp16 ^= (rp16 >> 16);
 244                 rp16 ^= (rp16 >> 8);
 245                 rp16 &= 0xff;
 246         }
 247
 248         /*
 249          * we also need to calculate the row parity for rp0..rp3
 250          * This is present in par, because par is now
 251          * rp3 rp3 rp2 rp2 in little endian and
 252          * rp2 rp2 rp3 rp3 in big endian
 253          * as well as
 254          * rp1 rp0 rp1 rp0 in little endian and
 255          * rp0 rp1 rp0 rp1 in big endian
 256          * First calculate rp2 and rp3
 257          */
 258 #ifdef __BIG_ENDIAN
 259         rp2 = (par >> 16);
 260         rp2 ^= (rp2 >> 8);
 261         rp2 &= 0xff;
 262         rp3 = par & 0xffff;
 263         rp3 ^= (rp3 >> 8);
 264         rp3 &= 0xff;
 265 #else
 266         rp3 = (par >> 16);
 267         rp3 ^= (rp3 >> 8);
 268         rp3 &= 0xff;
 269         rp2 = par & 0xffff;
 270         rp2 ^= (rp2 >> 8);
 271         rp2 &= 0xff;
 272 #endif
 273
 274         /* reduce par to 16 bits then calculate rp1 and rp0 */
 275         par ^= (par >> 16);
 276 #ifdef __BIG_ENDIAN
 277         rp0 = (par >> 8) & 0xff;
 278         rp1 = (par & 0xff);
 279 #else
 280         rp1 = (par >> 8) & 0xff;
 281         rp0 = (par & 0xff);
 282 #endif
 283
 284         /* finally reduce par to 8 bits */
 285         par ^= (par >> 8);
 286         par &= 0xff;
 287
 288         /*
 289          * and calculate rp5..rp15..rp17
 290          * note that par = rp4 ^ rp5 and due to the commutative property
 291          * of the ^ operator we can say:
 292          * rp5 = (par ^ rp4);
 293          * The & 0xff seems superfluous, but benchmarking learned that
 294          * leaving it out gives slightly worse results. No idea why, probably
 295          * it has to do with the way the pipeline in pentium is organized.
 296          */
 297         rp5 = (par ^ rp4) & 0xff;
 298         rp7 = (par ^ rp6) & 0xff;
 299         rp9 = (par ^ rp8) & 0xff;
 300         rp11 = (par ^ rp10) & 0xff;
 301         rp13 = (par ^ rp12) & 0xff;
 302         rp15 = (par ^ rp14) & 0xff;
 303         if (eccsize_mult == 2)
 304                 rp17 = (par ^ rp16) & 0xff;
 305
 306         /*
 307          * Finally calculate the ECC bits.
 308          * Again here it might seem that there are performance optimisations
 309          * possible, but benchmarks showed that on the system this is developed
 310          * the code below is the fastest
 311          */
 312         if (sm_order) {
 313                 code[0] = (invparity[rp7] << 7) | (invparity[rp6] << 6) |
 314                           (invparity[rp5] << 5) | (invparity[rp4] << 4) |
 315                           (invparity[rp3] << 3) | (invparity[rp2] << 2) |
 316                           (invparity[rp1] << 1) | (invparity[rp0]);
 317                 code[1] = (invparity[rp15] << 7) | (invparity[rp14] << 6) |
 318                           (invparity[rp13] << 5) | (invparity[rp12] << 4) |
 319                           (invparity[rp11] << 3) | (invparity[rp10] << 2) |
 320                           (invparity[rp9] << 1) | (invparity[rp8]);
 321         } else {
 322                 code[1] = (invparity[rp7] << 7) | (invparity[rp6] << 6) |
 323                           (invparity[rp5] << 5) | (invparity[rp4] << 4) |
 324                           (invparity[rp3] << 3) | (invparity[rp2] << 2) |
 325                           (invparity[rp1] << 1) | (invparity[rp0]);
 326                 code[0] = (invparity[rp15] << 7) | (invparity[rp14] << 6) |
 327                           (invparity[rp13] << 5) | (invparity[rp12] << 4) |
 328                           (invparity[rp11] << 3) | (invparity[rp10] << 2) |
 329                           (invparity[rp9] << 1) | (invparity[rp8]);
 330         }
 331
 332         if (eccsize_mult == 1)
 333                 code[2] =
 334                     (invparity[par & 0xf0] << 7) |
 335                     (invparity[par & 0x0f] << 6) |
 336                     (invparity[par & 0xcc] << 5) |
 337                     (invparity[par & 0x33] << 4) |
 338                     (invparity[par & 0xaa] << 3) |
 339                     (invparity[par & 0x55] << 2) |
 340                     3;
 341         else
 342                 code[2] =
 343                     (invparity[par & 0xf0] << 7) |
 344                     (invparity[par & 0x0f] << 6) |
 345                     (invparity[par & 0xcc] << 5) |
 346                     (invparity[par & 0x33] << 4) |
 347                     (invparity[par & 0xaa] << 3) |
 348                     (invparity[par & 0x55] << 2) |
 349                     (invparity[rp17] << 1) |
 350                     (invparity[rp16] << 0);
 351
 352         return 0;
 353 }
 354 EXPORT_SYMBOL(ecc_sw_hamming_calculate);
 355
 356 /**
 357  * nand_ecc_sw_hamming_calculate - Calculate 3-byte ECC for 256/512-byte block
 358  * @nand: NAND device
 359  * @buf: Input buffer with raw data
 360  * @code: Output buffer with ECC
 361  */
 362 int nand_ecc_sw_hamming_calculate(struct nand_device *nand,
 363                                   const unsigned char *buf, unsigned char *code)
 364 {
 365         struct nand_ecc_sw_hamming_conf *engine_conf = nand->ecc.ctx.priv;
 366         unsigned int step_size = nand->ecc.ctx.conf.step_size;
 367
 368         return ecc_sw_hamming_calculate(buf, step_size, code,
 369                                         engine_conf->sm_order);
 370 }
 371 EXPORT_SYMBOL(nand_ecc_sw_hamming_calculate);
 372
 373 int ecc_sw_hamming_correct(unsigned char *buf, unsigned char *read_ecc,
 374                            unsigned char *calc_ecc, unsigned int step_size,
 375                            bool sm_order)
 376 {
 377         const u32 eccsize_mult = step_size >> 8;
 378         unsigned char b0, b1, b2, bit_addr;
 379         unsigned int byte_addr;
 380
 381         /*
 382          * b0 to b2 indicate which bit is faulty (if any)
 383          * we might need the xor result  more than once,
 384          * so keep them in a local var
 385         */
 386         if (sm_order) {
 387                 b0 = read_ecc[0] ^ calc_ecc[0];
 388                 b1 = read_ecc[1] ^ calc_ecc[1];
 389         } else {
 390                 b0 = read_ecc[1] ^ calc_ecc[1];
 391                 b1 = read_ecc[0] ^ calc_ecc[0];
 392         }
 393
 394         b2 = read_ecc[2] ^ calc_ecc[2];
 395
 396         /* check if there are any bitfaults */
 397
 398         /* repeated if statements are slightly more efficient than switch ... */
 399         /* ordered in order of likelihood */
 400
 401         if ((b0 | b1 | b2) == 0)
 402                 return 0;       /* no error */
 403
 404         if ((((b0 ^ (b0 >> 1)) & 0x55) == 0x55) &&
 405             (((b1 ^ (b1 >> 1)) & 0x55) == 0x55) &&
 406             ((eccsize_mult == 1 && ((b2 ^ (b2 >> 1)) & 0x54) == 0x54) ||
 407              (eccsize_mult == 2 && ((b2 ^ (b2 >> 1)) & 0x55) == 0x55))) {
 408         /* single bit error */
 409                 /*
 410                  * rp17/rp15/13/11/9/7/5/3/1 indicate which byte is the faulty
 411                  * byte, cp 5/3/1 indicate the faulty bit.
 412                  * A lookup table (called addressbits) is used to filter
 413                  * the bits from the byte they are in.
 414                  * A marginal optimisation is possible by having three
 415                  * different lookup tables.
 416                  * One as we have now (for b0), one for b2
 417                  * (that would avoid the >> 1), and one for b1 (with all values
 418                  * << 4). However it was felt that introducing two more tables
 419                  * hardly justify the gain.
 420                  *
 421                  * The b2 shift is there to get rid of the lowest two bits.
 422                  * We could also do addressbits[b2] >> 1 but for the
 423                  * performance it does not make any difference
 424                  */
 425                 if (eccsize_mult == 1)
 426                         byte_addr = (addressbits[b1] << 4) + addressbits[b0];
 427                 else
 428                         byte_addr = (addressbits[b2 & 0x3] << 8) +
 429                                     (addressbits[b1] << 4) + addressbits[b0];
 430                 bit_addr = addressbits[b2 >> 2];
 431                 /* flip the bit */
 432                 buf[byte_addr] ^= (1 << bit_addr);
 433                 return 1;
 434
 435         }
 436         /* count nr of bits; use table lookup, faster than calculating it */
 437         if ((bitsperbyte[b0] + bitsperbyte[b1] + bitsperbyte[b2]) == 1)
 438                 return 1;       /* error in ECC data; no action needed */
 439
 440         pr_err("%s: uncorrectable ECC error\n", __func__);
 441         return -EBADMSG;
 442 }
 443 EXPORT_SYMBOL(ecc_sw_hamming_correct);
 444
 445 /**
 446  * nand_ecc_sw_hamming_correct - Detect and correct bit error(s)
 447  * @nand: NAND device
 448  * @buf: Raw data read from the chip
 449  * @read_ecc: ECC bytes read from the chip
 450  * @calc_ecc: ECC calculated from the raw data
 451  *
 452  * Detect and correct up to 1 bit error per 256/512-byte block.
 453  */
 454 int nand_ecc_sw_hamming_correct(struct nand_device *nand, unsigned char *buf,
 455                                 unsigned char *read_ecc,
 456                                 unsigned char *calc_ecc)
 457 {
 458         struct nand_ecc_sw_hamming_conf *engine_conf = nand->ecc.ctx.priv;
 459         unsigned int step_size = nand->ecc.ctx.conf.step_size;
 460
 461         return ecc_sw_hamming_correct(buf, read_ecc, calc_ecc, step_size,
 462                                       engine_conf->sm_order);
 463 }
 464 EXPORT_SYMBOL(nand_ecc_sw_hamming_correct);
 465
 466 int nand_ecc_sw_hamming_init_ctx(struct nand_device *nand)
 467 {
 468         struct nand_ecc_props *conf = &nand->ecc.ctx.conf;
 469         struct nand_ecc_sw_hamming_conf *engine_conf;
 470         struct mtd_info *mtd = nanddev_to_mtd(nand);
 471         int ret;
 472
 473         if (!mtd->ooblayout) {
 474                 switch (mtd->oobsize) {
 475                 case 8:
 476                 case 16:
 477                         mtd_set_ooblayout(mtd, nand_get_small_page_ooblayout());
 478                         break;
 479                 case 64:
 480                 case 128:
 481                         mtd_set_ooblayout(mtd,
 482                                           nand_get_large_page_hamming_ooblayout());
 483                         break;
 484                 default:
 485                         return -ENOTSUPP;
 486                 }
 487         }
 488
 489         conf->engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
 490         conf->algo = NAND_ECC_ALGO_HAMMING;
 491         conf->step_size = nand->ecc.user_conf.step_size;
 492         conf->strength = 1;
 493
 494         /* Use the strongest configuration by default */
 495         if (conf->step_size != 256 && conf->step_size != 512)
 496                 conf->step_size = 256;
 497
 498         engine_conf = kzalloc(sizeof(*engine_conf), GFP_KERNEL);
 499         if (!engine_conf)
 500                 return -ENOMEM;
 501
 502         ret = nand_ecc_init_req_tweaking(&engine_conf->req_ctx, nand);
 503         if (ret)
 504                 goto free_engine_conf;
 505
 506         engine_conf->code_size = 3;
 507         engine_conf->calc_buf = kzalloc(mtd->oobsize, GFP_KERNEL);
 508         engine_conf->code_buf = kzalloc(mtd->oobsize, GFP_KERNEL);
 509         if (!engine_conf->calc_buf || !engine_conf->code_buf) {
 510                 ret = -ENOMEM;
 511                 goto free_bufs;
 512         }
 513
 514         nand->ecc.ctx.priv = engine_conf;
 515         nand->ecc.ctx.nsteps = mtd->writesize / conf->step_size;
 516         nand->ecc.ctx.total = nand->ecc.ctx.nsteps * engine_conf->code_size;
 517
 518         return 0;
 519
 520 free_bufs:
 521         nand_ecc_cleanup_req_tweaking(&engine_conf->req_ctx);
 522         kfree(engine_conf->calc_buf);
 523         kfree(engine_conf->code_buf);
 524 free_engine_conf:
 525         kfree(engine_conf);
 526
 527         return ret;
 528 }
 529 EXPORT_SYMBOL(nand_ecc_sw_hamming_init_ctx);
 530
 531 void nand_ecc_sw_hamming_cleanup_ctx(struct nand_device *nand)
 532 {
 533         struct nand_ecc_sw_hamming_conf *engine_conf = nand->ecc.ctx.priv;
 534
 535         if (engine_conf) {
 536                 nand_ecc_cleanup_req_tweaking(&engine_conf->req_ctx);
 537                 kfree(engine_conf->calc_buf);
 538                 kfree(engine_conf->code_buf);
 539                 kfree(engine_conf);
 540         }
 541 }
 542 EXPORT_SYMBOL(nand_ecc_sw_hamming_cleanup_ctx);
 543
 544 static int nand_ecc_sw_hamming_prepare_io_req(struct nand_device *nand,
 545                                               struct nand_page_io_req *req)
 546 {
 547         struct nand_ecc_sw_hamming_conf *engine_conf = nand->ecc.ctx.priv;
 548         struct mtd_info *mtd = nanddev_to_mtd(nand);
 549         int eccsize = nand->ecc.ctx.conf.step_size;
 550         int eccbytes = engine_conf->code_size;
 551         int eccsteps = nand->ecc.ctx.nsteps;
 552         int total = nand->ecc.ctx.total;
 553         u8 *ecccalc = engine_conf->calc_buf;
 554         const u8 *data;
 555         int i;
 556
 557         /* Nothing to do for a raw operation */
 558         if (req->mode == MTD_OPS_RAW)
 559                 return 0;
 560
 561         /* This engine does not provide BBM/free OOB bytes protection */
 562         if (!req->datalen)
 563                 return 0;
 564
 565         nand_ecc_tweak_req(&engine_conf->req_ctx, req);
 566
 567         /* No more preparation for page read */
 568         if (req->type == NAND_PAGE_READ)
 569                 return 0;
 570
 571         /* Preparation for page write: derive the ECC bytes and place them */
 572         for (i = 0, data = req->databuf.out;
 573              eccsteps;
 574              eccsteps--, i += eccbytes, data += eccsize)
 575                 nand_ecc_sw_hamming_calculate(nand, data, &ecccalc[i]);
 576
 577         return mtd_ooblayout_set_eccbytes(mtd, ecccalc, (void *)req->oobbuf.out,
 578                                           0, total);
 579 }
 580
 581 static int nand_ecc_sw_hamming_finish_io_req(struct nand_device *nand,
 582                                              struct nand_page_io_req *req)
 583 {
 584         struct nand_ecc_sw_hamming_conf *engine_conf = nand->ecc.ctx.priv;
 585         struct mtd_info *mtd = nanddev_to_mtd(nand);
 586         int eccsize = nand->ecc.ctx.conf.step_size;
 587         int total = nand->ecc.ctx.total;
 588         int eccbytes = engine_conf->code_size;
 589         int eccsteps = nand->ecc.ctx.nsteps;
 590         u8 *ecccalc = engine_conf->calc_buf;
 591         u8 *ecccode = engine_conf->code_buf;
 592         unsigned int max_bitflips = 0;
 593         u8 *data = req->databuf.in;
 594         int i, ret;
 595
 596         /* Nothing to do for a raw operation */
 597         if (req->mode == MTD_OPS_RAW)
 598                 return 0;
 599
 600         /* This engine does not provide BBM/free OOB bytes protection */
 601         if (!req->datalen)
 602                 return 0;
 603
 604         /* No more preparation for page write */
 605         if (req->type == NAND_PAGE_WRITE) {
 606                 nand_ecc_restore_req(&engine_conf->req_ctx, req);
 607                 return 0;
 608         }
 609
 610         /* Finish a page read: retrieve the (raw) ECC bytes*/
 611         ret = mtd_ooblayout_get_eccbytes(mtd, ecccode, req->oobbuf.in, 0,
 612                                          total);
 613         if (ret)
 614                 return ret;
 615
 616         /* Calculate the ECC bytes */
 617         for (i = 0; eccsteps; eccsteps--, i += eccbytes, data += eccsize)
 618                 nand_ecc_sw_hamming_calculate(nand, data, &ecccalc[i]);
 619
 620         /* Finish a page read: compare and correct */
 621         for (eccsteps = nand->ecc.ctx.nsteps, i = 0, data = req->databuf.in;
 622              eccsteps;
 623              eccsteps--, i += eccbytes, data += eccsize) {
 624                 int stat =  nand_ecc_sw_hamming_correct(nand, data,
 625                                                         &ecccode[i],
 626                                                         &ecccalc[i]);
 627                 if (stat < 0) {
 628                         mtd->ecc_stats.failed++;
 629                 } else {
 630                         mtd->ecc_stats.corrected += stat;
 631                         max_bitflips = max_t(unsigned int, max_bitflips, stat);
 632                 }
 633         }
 634
 635         nand_ecc_restore_req(&engine_conf->req_ctx, req);
 636
 637         return max_bitflips;
 638 }
 639
 640 static struct nand_ecc_engine_ops nand_ecc_sw_hamming_engine_ops = {
 641         .init_ctx = nand_ecc_sw_hamming_init_ctx,
 642         .cleanup_ctx = nand_ecc_sw_hamming_cleanup_ctx,
 643         .prepare_io_req = nand_ecc_sw_hamming_prepare_io_req,
 644         .finish_io_req = nand_ecc_sw_hamming_finish_io_req,
 645 };
 646
 647 static struct nand_ecc_engine nand_ecc_sw_hamming_engine = {
 648         .ops = &nand_ecc_sw_hamming_engine_ops,
 649 };
 650
 651 struct nand_ecc_engine *nand_ecc_sw_hamming_get_engine(void)
 652 {
 653         return &nand_ecc_sw_hamming_engine;
 654 }
 655 EXPORT_SYMBOL(nand_ecc_sw_hamming_get_engine);
 656
 657 MODULE_LICENSE("GPL");
 658 MODULE_AUTHOR("Frans Meulenbroeks <fransmeulenbroeks@gmail.com>");
 659 MODULE_DESCRIPTION("NAND software Hamming ECC support");