crypto: powerpc - move files to fix build error
authorKim Phillips <kim.phillips@freescale.com>
Sat, 7 Mar 2015 00:46:21 +0000 (18:46 -0600)
committerHerbert Xu <herbert@gondor.apana.org.au>
Mon, 9 Mar 2015 10:06:19 +0000 (21:06 +1100)
The current cryptodev-2.6 tree commits:

d9850fc529ef ("crypto: powerpc/sha1 - kernel config")
50ba29aaa7b0 ("crypto: powerpc/sha1 - glue")

failed to properly place files under arch/powerpc/crypto, which
leads to build errors:

make[1]: *** No rule to make target 'arch/powerpc/crypto/sha1-spe-asm.o', needed by 'arch/powerpc/crypto/sha1-ppc-spe.o'.  Stop.
make[1]: *** No rule to make target 'arch/powerpc/crypto/sha1_spe_glue.o', needed by 'arch/powerpc/crypto/sha1-ppc-spe.o'.  Stop.
Makefile:947: recipe for target 'arch/powerpc/crypto' failed

Move the two sha1 spe files under crypto/, and whilst there, rename
other powerpc crypto files with underscores to use dashes for
consistency.

Cc: Markus Stockhausen <stockhausen@collogia.de>
Signed-off-by: Kim Phillips <kim.phillips@freescale.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
arch/powerpc/crypto/Makefile
arch/powerpc/crypto/aes-spe-glue.c [new file with mode: 0644]
arch/powerpc/crypto/aes_spe_glue.c [deleted file]
arch/powerpc/crypto/md5-glue.c [new file with mode: 0644]
arch/powerpc/crypto/md5_glue.c [deleted file]
arch/powerpc/crypto/sha1-spe-asm.S [new file with mode: 0644]
arch/powerpc/crypto/sha1-spe-glue.c [new file with mode: 0644]
arch/powerpc/crypto/sha256-spe-glue.c [new file with mode: 0644]
arch/powerpc/crypto/sha256_spe_glue.c [deleted file]
arch/powerpc/sha1-spe-asm.S [deleted file]
arch/powerpc/sha1_spe_glue.c [deleted file]

index c6b25cb..9c221b6 100644 (file)
@@ -10,8 +10,8 @@ obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o
 obj-$(CONFIG_CRYPTO_SHA1_PPC_SPE) += sha1-ppc-spe.o
 obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o
 
-aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes_spe_glue.o
-md5-ppc-y := md5-asm.o md5_glue.o
+aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o
+md5-ppc-y := md5-asm.o md5-glue.o
 sha1-powerpc-y := sha1-powerpc-asm.o sha1.o
-sha1-ppc-spe-y := sha1-spe-asm.o sha1_spe_glue.o
-sha256-ppc-spe-y := sha256-spe-asm.o sha256_spe_glue.o
+sha1-ppc-spe-y := sha1-spe-asm.o sha1-spe-glue.o
+sha256-ppc-spe-y := sha256-spe-asm.o sha256-spe-glue.o
diff --git a/arch/powerpc/crypto/aes-spe-glue.c b/arch/powerpc/crypto/aes-spe-glue.c
new file mode 100644 (file)
index 0000000..bd5e63f
--- /dev/null
@@ -0,0 +1,512 @@
+/*
+ * Glue code for AES implementation for SPE instructions (PPC)
+ *
+ * Based on generic implementation. The assembler module takes care
+ * about the SPE registers so it can run from interrupt context.
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/aes.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/crypto.h>
+#include <asm/byteorder.h>
+#include <asm/switch_to.h>
+#include <crypto/algapi.h>
+
+/*
+ * MAX_BYTES defines the number of bytes that are allowed to be processed
+ * between preempt_disable() and preempt_enable(). e500 cores can issue two
+ * instructions per clock cycle using one 32/64 bit unit (SU1) and one 32
+ * bit unit (SU2). One of these can be a memory access that is executed via
+ * a single load and store unit (LSU). XTS-AES-256 takes ~780 operations per
+ * 16 byte block block or 25 cycles per byte. Thus 768 bytes of input data
+ * will need an estimated maximum of 20,000 cycles. Headroom for cache misses
+ * included. Even with the low end model clocked at 667 MHz this equals to a
+ * critical time window of less than 30us. The value has been choosen to
+ * process a 512 byte disk block in one or a large 1400 bytes IPsec network
+ * packet in two runs.
+ *
+ */
+#define MAX_BYTES 768
+
+struct ppc_aes_ctx {
+       u32 key_enc[AES_MAX_KEYLENGTH_U32];
+       u32 key_dec[AES_MAX_KEYLENGTH_U32];
+       u32 rounds;
+};
+
+struct ppc_xts_ctx {
+       u32 key_enc[AES_MAX_KEYLENGTH_U32];
+       u32 key_dec[AES_MAX_KEYLENGTH_U32];
+       u32 key_twk[AES_MAX_KEYLENGTH_U32];
+       u32 rounds;
+};
+
+extern void ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc, u32 rounds);
+extern void ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec, u32 rounds);
+extern void ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
+                           u32 bytes);
+extern void ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
+                           u32 bytes);
+extern void ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
+                           u32 bytes, u8 *iv);
+extern void ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
+                           u32 bytes, u8 *iv);
+extern void ppc_crypt_ctr  (u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
+                           u32 bytes, u8 *iv);
+extern void ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
+                           u32 bytes, u8 *iv, u32 *key_twk);
+extern void ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
+                           u32 bytes, u8 *iv, u32 *key_twk);
+
+extern void ppc_expand_key_128(u32 *key_enc, const u8 *key);
+extern void ppc_expand_key_192(u32 *key_enc, const u8 *key);
+extern void ppc_expand_key_256(u32 *key_enc, const u8 *key);
+
+extern void ppc_generate_decrypt_key(u32 *key_dec,u32 *key_enc,
+                                    unsigned int key_len);
+
+static void spe_begin(void)
+{
+       /* disable preemption and save users SPE registers if required */
+       preempt_disable();
+       enable_kernel_spe();
+}
+
+static void spe_end(void)
+{
+       /* reenable preemption */
+       preempt_enable();
+}
+
+static int ppc_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
+               unsigned int key_len)
+{
+       struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+       if (key_len != AES_KEYSIZE_128 &&
+           key_len != AES_KEYSIZE_192 &&
+           key_len != AES_KEYSIZE_256) {
+               tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+               return -EINVAL;
+       }
+
+       switch (key_len) {
+       case AES_KEYSIZE_128:
+               ctx->rounds = 4;
+               ppc_expand_key_128(ctx->key_enc, in_key);
+               break;
+       case AES_KEYSIZE_192:
+               ctx->rounds = 5;
+               ppc_expand_key_192(ctx->key_enc, in_key);
+               break;
+       case AES_KEYSIZE_256:
+               ctx->rounds = 6;
+               ppc_expand_key_256(ctx->key_enc, in_key);
+               break;
+       }
+
+       ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
+
+       return 0;
+}
+
+static int ppc_xts_setkey(struct crypto_tfm *tfm, const u8 *in_key,
+                  unsigned int key_len)
+{
+       struct ppc_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+
+       key_len >>= 1;
+
+       if (key_len != AES_KEYSIZE_128 &&
+           key_len != AES_KEYSIZE_192 &&
+           key_len != AES_KEYSIZE_256) {
+               tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+               return -EINVAL;
+       }
+
+       switch (key_len) {
+       case AES_KEYSIZE_128:
+               ctx->rounds = 4;
+               ppc_expand_key_128(ctx->key_enc, in_key);
+               ppc_expand_key_128(ctx->key_twk, in_key + AES_KEYSIZE_128);
+               break;
+       case AES_KEYSIZE_192:
+               ctx->rounds = 5;
+               ppc_expand_key_192(ctx->key_enc, in_key);
+               ppc_expand_key_192(ctx->key_twk, in_key + AES_KEYSIZE_192);
+               break;
+       case AES_KEYSIZE_256:
+               ctx->rounds = 6;
+               ppc_expand_key_256(ctx->key_enc, in_key);
+               ppc_expand_key_256(ctx->key_twk, in_key + AES_KEYSIZE_256);
+               break;
+       }
+
+       ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
+
+       return 0;
+}
+
+static void ppc_aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+       struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+       spe_begin();
+       ppc_encrypt_aes(out, in, ctx->key_enc, ctx->rounds);
+       spe_end();
+}
+
+static void ppc_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+       struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+       spe_begin();
+       ppc_decrypt_aes(out, in, ctx->key_dec, ctx->rounds);
+       spe_end();
+}
+
+static int ppc_ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+                          struct scatterlist *src, unsigned int nbytes)
+{
+       struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+       struct blkcipher_walk walk;
+       unsigned int ubytes;
+       int err;
+
+       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+       blkcipher_walk_init(&walk, dst, src, nbytes);
+       err = blkcipher_walk_virt(desc, &walk);
+
+       while ((nbytes = walk.nbytes)) {
+               ubytes = nbytes > MAX_BYTES ?
+                        nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
+               nbytes -= ubytes;
+
+               spe_begin();
+               ppc_encrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
+                               ctx->key_enc, ctx->rounds, nbytes);
+               spe_end();
+
+               err = blkcipher_walk_done(desc, &walk, ubytes);
+       }
+
+       return err;
+}
+
+static int ppc_ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+                          struct scatterlist *src, unsigned int nbytes)
+{
+       struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+       struct blkcipher_walk walk;
+       unsigned int ubytes;
+       int err;
+
+       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+       blkcipher_walk_init(&walk, dst, src, nbytes);
+       err = blkcipher_walk_virt(desc, &walk);
+
+       while ((nbytes = walk.nbytes)) {
+               ubytes = nbytes > MAX_BYTES ?
+                        nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
+               nbytes -= ubytes;
+
+               spe_begin();
+               ppc_decrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
+                               ctx->key_dec, ctx->rounds, nbytes);
+               spe_end();
+
+               err = blkcipher_walk_done(desc, &walk, ubytes);
+       }
+
+       return err;
+}
+
+static int ppc_cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+                          struct scatterlist *src, unsigned int nbytes)
+{
+       struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+       struct blkcipher_walk walk;
+       unsigned int ubytes;
+       int err;
+
+       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+       blkcipher_walk_init(&walk, dst, src, nbytes);
+       err = blkcipher_walk_virt(desc, &walk);
+
+       while ((nbytes = walk.nbytes)) {
+               ubytes = nbytes > MAX_BYTES ?
+                        nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
+               nbytes -= ubytes;
+
+               spe_begin();
+               ppc_encrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
+                               ctx->key_enc, ctx->rounds, nbytes, walk.iv);
+               spe_end();
+
+               err = blkcipher_walk_done(desc, &walk, ubytes);
+       }
+
+       return err;
+}
+
+static int ppc_cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+                          struct scatterlist *src, unsigned int nbytes)
+{
+       struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+       struct blkcipher_walk walk;
+       unsigned int ubytes;
+       int err;
+
+       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+       blkcipher_walk_init(&walk, dst, src, nbytes);
+       err = blkcipher_walk_virt(desc, &walk);
+
+       while ((nbytes = walk.nbytes)) {
+               ubytes = nbytes > MAX_BYTES ?
+                        nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
+               nbytes -= ubytes;
+
+               spe_begin();
+               ppc_decrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
+                               ctx->key_dec, ctx->rounds, nbytes, walk.iv);
+               spe_end();
+
+               err = blkcipher_walk_done(desc, &walk, ubytes);
+       }
+
+       return err;
+}
+
+static int ppc_ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+                        struct scatterlist *src, unsigned int nbytes)
+{
+       struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+       struct blkcipher_walk walk;
+       unsigned int pbytes, ubytes;
+       int err;
+
+       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+       blkcipher_walk_init(&walk, dst, src, nbytes);
+       err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
+
+       while ((pbytes = walk.nbytes)) {
+               pbytes = pbytes > MAX_BYTES ? MAX_BYTES : pbytes;
+               pbytes = pbytes == nbytes ?
+                        nbytes : pbytes & ~(AES_BLOCK_SIZE - 1);
+               ubytes = walk.nbytes - pbytes;
+
+               spe_begin();
+               ppc_crypt_ctr(walk.dst.virt.addr, walk.src.virt.addr,
+                             ctx->key_enc, ctx->rounds, pbytes , walk.iv);
+               spe_end();
+
+               nbytes -= pbytes;
+               err = blkcipher_walk_done(desc, &walk, ubytes);
+       }
+
+       return err;
+}
+
+static int ppc_xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+                          struct scatterlist *src, unsigned int nbytes)
+{
+       struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+       struct blkcipher_walk walk;
+       unsigned int ubytes;
+       int err;
+       u32 *twk;
+
+       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+       blkcipher_walk_init(&walk, dst, src, nbytes);
+       err = blkcipher_walk_virt(desc, &walk);
+       twk = ctx->key_twk;
+
+       while ((nbytes = walk.nbytes)) {
+               ubytes = nbytes > MAX_BYTES ?
+                        nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
+               nbytes -= ubytes;
+
+               spe_begin();
+               ppc_encrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
+                               ctx->key_enc, ctx->rounds, nbytes, walk.iv, twk);
+               spe_end();
+
+               twk = NULL;
+               err = blkcipher_walk_done(desc, &walk, ubytes);
+       }
+
+       return err;
+}
+
+static int ppc_xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+                          struct scatterlist *src, unsigned int nbytes)
+{
+       struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+       struct blkcipher_walk walk;
+       unsigned int ubytes;
+       int err;
+       u32 *twk;
+
+       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+       blkcipher_walk_init(&walk, dst, src, nbytes);
+       err = blkcipher_walk_virt(desc, &walk);
+       twk = ctx->key_twk;
+
+       while ((nbytes = walk.nbytes)) {
+               ubytes = nbytes > MAX_BYTES ?
+                        nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
+               nbytes -= ubytes;
+
+               spe_begin();
+               ppc_decrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
+                               ctx->key_dec, ctx->rounds, nbytes, walk.iv, twk);
+               spe_end();
+
+               twk = NULL;
+               err = blkcipher_walk_done(desc, &walk, ubytes);
+       }
+
+       return err;
+}
+
+/*
+ * Algorithm definitions. Disabling alignment (cra_alignmask=0) was chosen
+ * because the e500 platform can handle unaligned reads/writes very efficently.
+ * This improves IPsec thoughput by another few percent. Additionally we assume
+ * that AES context is always aligned to at least 8 bytes because it is created
+ * with kmalloc() in the crypto infrastructure
+ *
+ */
+static struct crypto_alg aes_algs[] = { {
+       .cra_name               =       "aes",
+       .cra_driver_name        =       "aes-ppc-spe",
+       .cra_priority           =       300,
+       .cra_flags              =       CRYPTO_ALG_TYPE_CIPHER,
+       .cra_blocksize          =       AES_BLOCK_SIZE,
+       .cra_ctxsize            =       sizeof(struct ppc_aes_ctx),
+       .cra_alignmask          =       0,
+       .cra_module             =       THIS_MODULE,
+       .cra_u                  =       {
+               .cipher = {
+                       .cia_min_keysize        =       AES_MIN_KEY_SIZE,
+                       .cia_max_keysize        =       AES_MAX_KEY_SIZE,
+                       .cia_setkey             =       ppc_aes_setkey,
+                       .cia_encrypt            =       ppc_aes_encrypt,
+                       .cia_decrypt            =       ppc_aes_decrypt
+               }
+       }
+}, {
+       .cra_name               =       "ecb(aes)",
+       .cra_driver_name        =       "ecb-ppc-spe",
+       .cra_priority           =       300,
+       .cra_flags              =       CRYPTO_ALG_TYPE_BLKCIPHER,
+       .cra_blocksize          =       AES_BLOCK_SIZE,
+       .cra_ctxsize            =       sizeof(struct ppc_aes_ctx),
+       .cra_alignmask          =       0,
+       .cra_type               =       &crypto_blkcipher_type,
+       .cra_module             =       THIS_MODULE,
+       .cra_u = {
+               .blkcipher = {
+                       .min_keysize            =       AES_MIN_KEY_SIZE,
+                       .max_keysize            =       AES_MAX_KEY_SIZE,
+                       .ivsize                 =       AES_BLOCK_SIZE,
+                       .setkey                 =       ppc_aes_setkey,
+                       .encrypt                =       ppc_ecb_encrypt,
+                       .decrypt                =       ppc_ecb_decrypt,
+               }
+       }
+}, {
+       .cra_name               =       "cbc(aes)",
+       .cra_driver_name        =       "cbc-ppc-spe",
+       .cra_priority           =       300,
+       .cra_flags              =       CRYPTO_ALG_TYPE_BLKCIPHER,
+       .cra_blocksize          =       AES_BLOCK_SIZE,
+       .cra_ctxsize            =       sizeof(struct ppc_aes_ctx),
+       .cra_alignmask          =       0,
+       .cra_type               =       &crypto_blkcipher_type,
+       .cra_module             =       THIS_MODULE,
+       .cra_u = {
+               .blkcipher = {
+                       .min_keysize            =       AES_MIN_KEY_SIZE,
+                       .max_keysize            =       AES_MAX_KEY_SIZE,
+                       .ivsize                 =       AES_BLOCK_SIZE,
+                       .setkey                 =       ppc_aes_setkey,
+                       .encrypt                =       ppc_cbc_encrypt,
+                       .decrypt                =       ppc_cbc_decrypt,
+               }
+       }
+}, {
+       .cra_name               =       "ctr(aes)",
+       .cra_driver_name        =       "ctr-ppc-spe",
+       .cra_priority           =       300,
+       .cra_flags              =       CRYPTO_ALG_TYPE_BLKCIPHER,
+       .cra_blocksize          =       1,
+       .cra_ctxsize            =       sizeof(struct ppc_aes_ctx),
+       .cra_alignmask          =       0,
+       .cra_type               =       &crypto_blkcipher_type,
+       .cra_module             =       THIS_MODULE,
+       .cra_u = {
+               .blkcipher = {
+                       .min_keysize            =       AES_MIN_KEY_SIZE,
+                       .max_keysize            =       AES_MAX_KEY_SIZE,
+                       .ivsize                 =       AES_BLOCK_SIZE,
+                       .setkey                 =       ppc_aes_setkey,
+                       .encrypt                =       ppc_ctr_crypt,
+                       .decrypt                =       ppc_ctr_crypt,
+               }
+       }
+}, {
+       .cra_name               =       "xts(aes)",
+       .cra_driver_name        =       "xts-ppc-spe",
+       .cra_priority           =       300,
+       .cra_flags              =       CRYPTO_ALG_TYPE_BLKCIPHER,
+       .cra_blocksize          =       AES_BLOCK_SIZE,
+       .cra_ctxsize            =       sizeof(struct ppc_xts_ctx),
+       .cra_alignmask          =       0,
+       .cra_type               =       &crypto_blkcipher_type,
+       .cra_module             =       THIS_MODULE,
+       .cra_u = {
+               .blkcipher = {
+                       .min_keysize            =       AES_MIN_KEY_SIZE * 2,
+                       .max_keysize            =       AES_MAX_KEY_SIZE * 2,
+                       .ivsize                 =       AES_BLOCK_SIZE,
+                       .setkey                 =       ppc_xts_setkey,
+                       .encrypt                =       ppc_xts_encrypt,
+                       .decrypt                =       ppc_xts_decrypt,
+               }
+       }
+} };
+
+static int __init ppc_aes_mod_init(void)
+{
+       return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
+}
+
+static void __exit ppc_aes_mod_fini(void)
+{
+       crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
+}
+
+module_init(ppc_aes_mod_init);
+module_exit(ppc_aes_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS, SPE optimized");
+
+MODULE_ALIAS_CRYPTO("aes");
+MODULE_ALIAS_CRYPTO("ecb(aes)");
+MODULE_ALIAS_CRYPTO("cbc(aes)");
+MODULE_ALIAS_CRYPTO("ctr(aes)");
+MODULE_ALIAS_CRYPTO("xts(aes)");
+MODULE_ALIAS_CRYPTO("aes-ppc-spe");
diff --git a/arch/powerpc/crypto/aes_spe_glue.c b/arch/powerpc/crypto/aes_spe_glue.c
deleted file mode 100644 (file)
index bd5e63f..0000000
+++ /dev/null
@@ -1,512 +0,0 @@
-/*
- * Glue code for AES implementation for SPE instructions (PPC)
- *
- * Based on generic implementation. The assembler module takes care
- * about the SPE registers so it can run from interrupt context.
- *
- * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-
-#include <crypto/aes.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/crypto.h>
-#include <asm/byteorder.h>
-#include <asm/switch_to.h>
-#include <crypto/algapi.h>
-
-/*
- * MAX_BYTES defines the number of bytes that are allowed to be processed
- * between preempt_disable() and preempt_enable(). e500 cores can issue two
- * instructions per clock cycle using one 32/64 bit unit (SU1) and one 32
- * bit unit (SU2). One of these can be a memory access that is executed via
- * a single load and store unit (LSU). XTS-AES-256 takes ~780 operations per
- * 16 byte block block or 25 cycles per byte. Thus 768 bytes of input data
- * will need an estimated maximum of 20,000 cycles. Headroom for cache misses
- * included. Even with the low end model clocked at 667 MHz this equals to a
- * critical time window of less than 30us. The value has been choosen to
- * process a 512 byte disk block in one or a large 1400 bytes IPsec network
- * packet in two runs.
- *
- */
-#define MAX_BYTES 768
-
-struct ppc_aes_ctx {
-       u32 key_enc[AES_MAX_KEYLENGTH_U32];
-       u32 key_dec[AES_MAX_KEYLENGTH_U32];
-       u32 rounds;
-};
-
-struct ppc_xts_ctx {
-       u32 key_enc[AES_MAX_KEYLENGTH_U32];
-       u32 key_dec[AES_MAX_KEYLENGTH_U32];
-       u32 key_twk[AES_MAX_KEYLENGTH_U32];
-       u32 rounds;
-};
-
-extern void ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc, u32 rounds);
-extern void ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec, u32 rounds);
-extern void ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
-                           u32 bytes);
-extern void ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
-                           u32 bytes);
-extern void ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
-                           u32 bytes, u8 *iv);
-extern void ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
-                           u32 bytes, u8 *iv);
-extern void ppc_crypt_ctr  (u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
-                           u32 bytes, u8 *iv);
-extern void ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
-                           u32 bytes, u8 *iv, u32 *key_twk);
-extern void ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
-                           u32 bytes, u8 *iv, u32 *key_twk);
-
-extern void ppc_expand_key_128(u32 *key_enc, const u8 *key);
-extern void ppc_expand_key_192(u32 *key_enc, const u8 *key);
-extern void ppc_expand_key_256(u32 *key_enc, const u8 *key);
-
-extern void ppc_generate_decrypt_key(u32 *key_dec,u32 *key_enc,
-                                    unsigned int key_len);
-
-static void spe_begin(void)
-{
-       /* disable preemption and save users SPE registers if required */
-       preempt_disable();
-       enable_kernel_spe();
-}
-
-static void spe_end(void)
-{
-       /* reenable preemption */
-       preempt_enable();
-}
-
-static int ppc_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
-               unsigned int key_len)
-{
-       struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-
-       if (key_len != AES_KEYSIZE_128 &&
-           key_len != AES_KEYSIZE_192 &&
-           key_len != AES_KEYSIZE_256) {
-               tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-               return -EINVAL;
-       }
-
-       switch (key_len) {
-       case AES_KEYSIZE_128:
-               ctx->rounds = 4;
-               ppc_expand_key_128(ctx->key_enc, in_key);
-               break;
-       case AES_KEYSIZE_192:
-               ctx->rounds = 5;
-               ppc_expand_key_192(ctx->key_enc, in_key);
-               break;
-       case AES_KEYSIZE_256:
-               ctx->rounds = 6;
-               ppc_expand_key_256(ctx->key_enc, in_key);
-               break;
-       }
-
-       ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
-
-       return 0;
-}
-
-static int ppc_xts_setkey(struct crypto_tfm *tfm, const u8 *in_key,
-                  unsigned int key_len)
-{
-       struct ppc_xts_ctx *ctx = crypto_tfm_ctx(tfm);
-
-       key_len >>= 1;
-
-       if (key_len != AES_KEYSIZE_128 &&
-           key_len != AES_KEYSIZE_192 &&
-           key_len != AES_KEYSIZE_256) {
-               tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-               return -EINVAL;
-       }
-
-       switch (key_len) {
-       case AES_KEYSIZE_128:
-               ctx->rounds = 4;
-               ppc_expand_key_128(ctx->key_enc, in_key);
-               ppc_expand_key_128(ctx->key_twk, in_key + AES_KEYSIZE_128);
-               break;
-       case AES_KEYSIZE_192:
-               ctx->rounds = 5;
-               ppc_expand_key_192(ctx->key_enc, in_key);
-               ppc_expand_key_192(ctx->key_twk, in_key + AES_KEYSIZE_192);
-               break;
-       case AES_KEYSIZE_256:
-               ctx->rounds = 6;
-               ppc_expand_key_256(ctx->key_enc, in_key);
-               ppc_expand_key_256(ctx->key_twk, in_key + AES_KEYSIZE_256);
-               break;
-       }
-
-       ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
-
-       return 0;
-}
-
-static void ppc_aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
-{
-       struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-
-       spe_begin();
-       ppc_encrypt_aes(out, in, ctx->key_enc, ctx->rounds);
-       spe_end();
-}
-
-static void ppc_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
-{
-       struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-
-       spe_begin();
-       ppc_decrypt_aes(out, in, ctx->key_dec, ctx->rounds);
-       spe_end();
-}
-
-static int ppc_ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                          struct scatterlist *src, unsigned int nbytes)
-{
-       struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-       struct blkcipher_walk walk;
-       unsigned int ubytes;
-       int err;
-
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt(desc, &walk);
-
-       while ((nbytes = walk.nbytes)) {
-               ubytes = nbytes > MAX_BYTES ?
-                        nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
-               nbytes -= ubytes;
-
-               spe_begin();
-               ppc_encrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
-                               ctx->key_enc, ctx->rounds, nbytes);
-               spe_end();
-
-               err = blkcipher_walk_done(desc, &walk, ubytes);
-       }
-
-       return err;
-}
-
-static int ppc_ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                          struct scatterlist *src, unsigned int nbytes)
-{
-       struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-       struct blkcipher_walk walk;
-       unsigned int ubytes;
-       int err;
-
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt(desc, &walk);
-
-       while ((nbytes = walk.nbytes)) {
-               ubytes = nbytes > MAX_BYTES ?
-                        nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
-               nbytes -= ubytes;
-
-               spe_begin();
-               ppc_decrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
-                               ctx->key_dec, ctx->rounds, nbytes);
-               spe_end();
-
-               err = blkcipher_walk_done(desc, &walk, ubytes);
-       }
-
-       return err;
-}
-
-static int ppc_cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                          struct scatterlist *src, unsigned int nbytes)
-{
-       struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-       struct blkcipher_walk walk;
-       unsigned int ubytes;
-       int err;
-
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt(desc, &walk);
-
-       while ((nbytes = walk.nbytes)) {
-               ubytes = nbytes > MAX_BYTES ?
-                        nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
-               nbytes -= ubytes;
-
-               spe_begin();
-               ppc_encrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
-                               ctx->key_enc, ctx->rounds, nbytes, walk.iv);
-               spe_end();
-
-               err = blkcipher_walk_done(desc, &walk, ubytes);
-       }
-
-       return err;
-}
-
-static int ppc_cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                          struct scatterlist *src, unsigned int nbytes)
-{
-       struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-       struct blkcipher_walk walk;
-       unsigned int ubytes;
-       int err;
-
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt(desc, &walk);
-
-       while ((nbytes = walk.nbytes)) {
-               ubytes = nbytes > MAX_BYTES ?
-                        nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
-               nbytes -= ubytes;
-
-               spe_begin();
-               ppc_decrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
-                               ctx->key_dec, ctx->rounds, nbytes, walk.iv);
-               spe_end();
-
-               err = blkcipher_walk_done(desc, &walk, ubytes);
-       }
-
-       return err;
-}
-
-static int ppc_ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                        struct scatterlist *src, unsigned int nbytes)
-{
-       struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-       struct blkcipher_walk walk;
-       unsigned int pbytes, ubytes;
-       int err;
-
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
-
-       while ((pbytes = walk.nbytes)) {
-               pbytes = pbytes > MAX_BYTES ? MAX_BYTES : pbytes;
-               pbytes = pbytes == nbytes ?
-                        nbytes : pbytes & ~(AES_BLOCK_SIZE - 1);
-               ubytes = walk.nbytes - pbytes;
-
-               spe_begin();
-               ppc_crypt_ctr(walk.dst.virt.addr, walk.src.virt.addr,
-                             ctx->key_enc, ctx->rounds, pbytes , walk.iv);
-               spe_end();
-
-               nbytes -= pbytes;
-               err = blkcipher_walk_done(desc, &walk, ubytes);
-       }
-
-       return err;
-}
-
-static int ppc_xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                          struct scatterlist *src, unsigned int nbytes)
-{
-       struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-       struct blkcipher_walk walk;
-       unsigned int ubytes;
-       int err;
-       u32 *twk;
-
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt(desc, &walk);
-       twk = ctx->key_twk;
-
-       while ((nbytes = walk.nbytes)) {
-               ubytes = nbytes > MAX_BYTES ?
-                        nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
-               nbytes -= ubytes;
-
-               spe_begin();
-               ppc_encrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
-                               ctx->key_enc, ctx->rounds, nbytes, walk.iv, twk);
-               spe_end();
-
-               twk = NULL;
-               err = blkcipher_walk_done(desc, &walk, ubytes);
-       }
-
-       return err;
-}
-
-static int ppc_xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                          struct scatterlist *src, unsigned int nbytes)
-{
-       struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-       struct blkcipher_walk walk;
-       unsigned int ubytes;
-       int err;
-       u32 *twk;
-
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt(desc, &walk);
-       twk = ctx->key_twk;
-
-       while ((nbytes = walk.nbytes)) {
-               ubytes = nbytes > MAX_BYTES ?
-                        nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
-               nbytes -= ubytes;
-
-               spe_begin();
-               ppc_decrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
-                               ctx->key_dec, ctx->rounds, nbytes, walk.iv, twk);
-               spe_end();
-
-               twk = NULL;
-               err = blkcipher_walk_done(desc, &walk, ubytes);
-       }
-
-       return err;
-}
-
-/*
- * Algorithm definitions. Disabling alignment (cra_alignmask=0) was chosen
- * because the e500 platform can handle unaligned reads/writes very efficently.
- * This improves IPsec thoughput by another few percent. Additionally we assume
- * that AES context is always aligned to at least 8 bytes because it is created
- * with kmalloc() in the crypto infrastructure
- *
- */
-static struct crypto_alg aes_algs[] = { {
-       .cra_name               =       "aes",
-       .cra_driver_name        =       "aes-ppc-spe",
-       .cra_priority           =       300,
-       .cra_flags              =       CRYPTO_ALG_TYPE_CIPHER,
-       .cra_blocksize          =       AES_BLOCK_SIZE,
-       .cra_ctxsize            =       sizeof(struct ppc_aes_ctx),
-       .cra_alignmask          =       0,
-       .cra_module             =       THIS_MODULE,
-       .cra_u                  =       {
-               .cipher = {
-                       .cia_min_keysize        =       AES_MIN_KEY_SIZE,
-                       .cia_max_keysize        =       AES_MAX_KEY_SIZE,
-                       .cia_setkey             =       ppc_aes_setkey,
-                       .cia_encrypt            =       ppc_aes_encrypt,
-                       .cia_decrypt            =       ppc_aes_decrypt
-               }
-       }
-}, {
-       .cra_name               =       "ecb(aes)",
-       .cra_driver_name        =       "ecb-ppc-spe",
-       .cra_priority           =       300,
-       .cra_flags              =       CRYPTO_ALG_TYPE_BLKCIPHER,
-       .cra_blocksize          =       AES_BLOCK_SIZE,
-       .cra_ctxsize            =       sizeof(struct ppc_aes_ctx),
-       .cra_alignmask          =       0,
-       .cra_type               =       &crypto_blkcipher_type,
-       .cra_module             =       THIS_MODULE,
-       .cra_u = {
-               .blkcipher = {
-                       .min_keysize            =       AES_MIN_KEY_SIZE,
-                       .max_keysize            =       AES_MAX_KEY_SIZE,
-                       .ivsize                 =       AES_BLOCK_SIZE,
-                       .setkey                 =       ppc_aes_setkey,
-                       .encrypt                =       ppc_ecb_encrypt,
-                       .decrypt                =       ppc_ecb_decrypt,
-               }
-       }
-}, {
-       .cra_name               =       "cbc(aes)",
-       .cra_driver_name        =       "cbc-ppc-spe",
-       .cra_priority           =       300,
-       .cra_flags              =       CRYPTO_ALG_TYPE_BLKCIPHER,
-       .cra_blocksize          =       AES_BLOCK_SIZE,
-       .cra_ctxsize            =       sizeof(struct ppc_aes_ctx),
-       .cra_alignmask          =       0,
-       .cra_type               =       &crypto_blkcipher_type,
-       .cra_module             =       THIS_MODULE,
-       .cra_u = {
-               .blkcipher = {
-                       .min_keysize            =       AES_MIN_KEY_SIZE,
-                       .max_keysize            =       AES_MAX_KEY_SIZE,
-                       .ivsize                 =       AES_BLOCK_SIZE,
-                       .setkey                 =       ppc_aes_setkey,
-                       .encrypt                =       ppc_cbc_encrypt,
-                       .decrypt                =       ppc_cbc_decrypt,
-               }
-       }
-}, {
-       .cra_name               =       "ctr(aes)",
-       .cra_driver_name        =       "ctr-ppc-spe",
-       .cra_priority           =       300,
-       .cra_flags              =       CRYPTO_ALG_TYPE_BLKCIPHER,
-       .cra_blocksize          =       1,
-       .cra_ctxsize            =       sizeof(struct ppc_aes_ctx),
-       .cra_alignmask          =       0,
-       .cra_type               =       &crypto_blkcipher_type,
-       .cra_module             =       THIS_MODULE,
-       .cra_u = {
-               .blkcipher = {
-                       .min_keysize            =       AES_MIN_KEY_SIZE,
-                       .max_keysize            =       AES_MAX_KEY_SIZE,
-                       .ivsize                 =       AES_BLOCK_SIZE,
-                       .setkey                 =       ppc_aes_setkey,
-                       .encrypt                =       ppc_ctr_crypt,
-                       .decrypt                =       ppc_ctr_crypt,
-               }
-       }
-}, {
-       .cra_name               =       "xts(aes)",
-       .cra_driver_name        =       "xts-ppc-spe",
-       .cra_priority           =       300,
-       .cra_flags              =       CRYPTO_ALG_TYPE_BLKCIPHER,
-       .cra_blocksize          =       AES_BLOCK_SIZE,
-       .cra_ctxsize            =       sizeof(struct ppc_xts_ctx),
-       .cra_alignmask          =       0,
-       .cra_type               =       &crypto_blkcipher_type,
-       .cra_module             =       THIS_MODULE,
-       .cra_u = {
-               .blkcipher = {
-                       .min_keysize            =       AES_MIN_KEY_SIZE * 2,
-                       .max_keysize            =       AES_MAX_KEY_SIZE * 2,
-                       .ivsize                 =       AES_BLOCK_SIZE,
-                       .setkey                 =       ppc_xts_setkey,
-                       .encrypt                =       ppc_xts_encrypt,
-                       .decrypt                =       ppc_xts_decrypt,
-               }
-       }
-} };
-
-static int __init ppc_aes_mod_init(void)
-{
-       return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
-}
-
-static void __exit ppc_aes_mod_fini(void)
-{
-       crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
-}
-
-module_init(ppc_aes_mod_init);
-module_exit(ppc_aes_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS, SPE optimized");
-
-MODULE_ALIAS_CRYPTO("aes");
-MODULE_ALIAS_CRYPTO("ecb(aes)");
-MODULE_ALIAS_CRYPTO("cbc(aes)");
-MODULE_ALIAS_CRYPTO("ctr(aes)");
-MODULE_ALIAS_CRYPTO("xts(aes)");
-MODULE_ALIAS_CRYPTO("aes-ppc-spe");
diff --git a/arch/powerpc/crypto/md5-glue.c b/arch/powerpc/crypto/md5-glue.c
new file mode 100644 (file)
index 0000000..452fb4d
--- /dev/null
@@ -0,0 +1,165 @@
+/*
+ * Glue code for MD5 implementation for PPC assembler
+ *
+ * Based on generic implementation.
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <crypto/md5.h>
+#include <asm/byteorder.h>
+
+extern void ppc_md5_transform(u32 *state, const u8 *src, u32 blocks);
+
+static inline void ppc_md5_clear_context(struct md5_state *sctx)
+{
+       int count = sizeof(struct md5_state) >> 2;
+       u32 *ptr = (u32 *)sctx;
+
+       /* make sure we can clear the fast way */
+       BUILD_BUG_ON(sizeof(struct md5_state) % 4);
+       do { *ptr++ = 0; } while (--count);
+}
+
+static int ppc_md5_init(struct shash_desc *desc)
+{
+       struct md5_state *sctx = shash_desc_ctx(desc);
+
+       sctx->hash[0] = 0x67452301;
+       sctx->hash[1] = 0xefcdab89;
+       sctx->hash[2] = 0x98badcfe;
+       sctx->hash[3] = 0x10325476;
+       sctx->byte_count = 0;
+
+       return 0;
+}
+
+static int ppc_md5_update(struct shash_desc *desc, const u8 *data,
+                       unsigned int len)
+{
+       struct md5_state *sctx = shash_desc_ctx(desc);
+       const unsigned int offset = sctx->byte_count & 0x3f;
+       unsigned int avail = 64 - offset;
+       const u8 *src = data;
+
+       sctx->byte_count += len;
+
+       if (avail > len) {
+               memcpy((char *)sctx->block + offset, src, len);
+               return 0;
+       }
+
+       if (offset) {
+               memcpy((char *)sctx->block + offset, src, avail);
+               ppc_md5_transform(sctx->hash, (const u8 *)sctx->block, 1);
+               len -= avail;
+               src += avail;
+       }
+
+       if (len > 63) {
+               ppc_md5_transform(sctx->hash, src, len >> 6);
+               src += len & ~0x3f;
+               len &= 0x3f;
+       }
+
+       memcpy((char *)sctx->block, src, len);
+       return 0;
+}
+
+static int ppc_md5_final(struct shash_desc *desc, u8 *out)
+{
+       struct md5_state *sctx = shash_desc_ctx(desc);
+       const unsigned int offset = sctx->byte_count & 0x3f;
+       const u8 *src = (const u8 *)sctx->block;
+       u8 *p = (u8 *)src + offset;
+       int padlen = 55 - offset;
+       __le64 *pbits = (__le64 *)((char *)sctx->block + 56);
+       __le32 *dst = (__le32 *)out;
+
+       *p++ = 0x80;
+
+       if (padlen < 0) {
+               memset(p, 0x00, padlen + sizeof (u64));
+               ppc_md5_transform(sctx->hash, src, 1);
+               p = (char *)sctx->block;
+               padlen = 56;
+       }
+
+       memset(p, 0, padlen);
+       *pbits = cpu_to_le64(sctx->byte_count << 3);
+       ppc_md5_transform(sctx->hash, src, 1);
+
+       dst[0] = cpu_to_le32(sctx->hash[0]);
+       dst[1] = cpu_to_le32(sctx->hash[1]);
+       dst[2] = cpu_to_le32(sctx->hash[2]);
+       dst[3] = cpu_to_le32(sctx->hash[3]);
+
+       ppc_md5_clear_context(sctx);
+       return 0;
+}
+
+static int ppc_md5_export(struct shash_desc *desc, void *out)
+{
+       struct md5_state *sctx = shash_desc_ctx(desc);
+
+       memcpy(out, sctx, sizeof(*sctx));
+       return 0;
+}
+
+static int ppc_md5_import(struct shash_desc *desc, const void *in)
+{
+       struct md5_state *sctx = shash_desc_ctx(desc);
+
+       memcpy(sctx, in, sizeof(*sctx));
+       return 0;
+}
+
+static struct shash_alg alg = {
+       .digestsize     =       MD5_DIGEST_SIZE,
+       .init           =       ppc_md5_init,
+       .update         =       ppc_md5_update,
+       .final          =       ppc_md5_final,
+       .export         =       ppc_md5_export,
+       .import         =       ppc_md5_import,
+       .descsize       =       sizeof(struct md5_state),
+       .statesize      =       sizeof(struct md5_state),
+       .base           =       {
+               .cra_name       =       "md5",
+               .cra_driver_name=       "md5-ppc",
+               .cra_priority   =       200,
+               .cra_flags      =       CRYPTO_ALG_TYPE_SHASH,
+               .cra_blocksize  =       MD5_HMAC_BLOCK_SIZE,
+               .cra_module     =       THIS_MODULE,
+       }
+};
+
+static int __init ppc_md5_mod_init(void)
+{
+       return crypto_register_shash(&alg);
+}
+
+static void __exit ppc_md5_mod_fini(void)
+{
+       crypto_unregister_shash(&alg);
+}
+
+module_init(ppc_md5_mod_init);
+module_exit(ppc_md5_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("MD5 Secure Hash Algorithm, PPC assembler");
+
+MODULE_ALIAS_CRYPTO("md5");
+MODULE_ALIAS_CRYPTO("md5-ppc");
diff --git a/arch/powerpc/crypto/md5_glue.c b/arch/powerpc/crypto/md5_glue.c
deleted file mode 100644 (file)
index 452fb4d..0000000
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * Glue code for MD5 implementation for PPC assembler
- *
- * Based on generic implementation.
- *
- * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/cryptohash.h>
-#include <linux/types.h>
-#include <crypto/md5.h>
-#include <asm/byteorder.h>
-
-extern void ppc_md5_transform(u32 *state, const u8 *src, u32 blocks);
-
-static inline void ppc_md5_clear_context(struct md5_state *sctx)
-{
-       int count = sizeof(struct md5_state) >> 2;
-       u32 *ptr = (u32 *)sctx;
-
-       /* make sure we can clear the fast way */
-       BUILD_BUG_ON(sizeof(struct md5_state) % 4);
-       do { *ptr++ = 0; } while (--count);
-}
-
-static int ppc_md5_init(struct shash_desc *desc)
-{
-       struct md5_state *sctx = shash_desc_ctx(desc);
-
-       sctx->hash[0] = 0x67452301;
-       sctx->hash[1] = 0xefcdab89;
-       sctx->hash[2] = 0x98badcfe;
-       sctx->hash[3] = 0x10325476;
-       sctx->byte_count = 0;
-
-       return 0;
-}
-
-static int ppc_md5_update(struct shash_desc *desc, const u8 *data,
-                       unsigned int len)
-{
-       struct md5_state *sctx = shash_desc_ctx(desc);
-       const unsigned int offset = sctx->byte_count & 0x3f;
-       unsigned int avail = 64 - offset;
-       const u8 *src = data;
-
-       sctx->byte_count += len;
-
-       if (avail > len) {
-               memcpy((char *)sctx->block + offset, src, len);
-               return 0;
-       }
-
-       if (offset) {
-               memcpy((char *)sctx->block + offset, src, avail);
-               ppc_md5_transform(sctx->hash, (const u8 *)sctx->block, 1);
-               len -= avail;
-               src += avail;
-       }
-
-       if (len > 63) {
-               ppc_md5_transform(sctx->hash, src, len >> 6);
-               src += len & ~0x3f;
-               len &= 0x3f;
-       }
-
-       memcpy((char *)sctx->block, src, len);
-       return 0;
-}
-
-static int ppc_md5_final(struct shash_desc *desc, u8 *out)
-{
-       struct md5_state *sctx = shash_desc_ctx(desc);
-       const unsigned int offset = sctx->byte_count & 0x3f;
-       const u8 *src = (const u8 *)sctx->block;
-       u8 *p = (u8 *)src + offset;
-       int padlen = 55 - offset;
-       __le64 *pbits = (__le64 *)((char *)sctx->block + 56);
-       __le32 *dst = (__le32 *)out;
-
-       *p++ = 0x80;
-
-       if (padlen < 0) {
-               memset(p, 0x00, padlen + sizeof (u64));
-               ppc_md5_transform(sctx->hash, src, 1);
-               p = (char *)sctx->block;
-               padlen = 56;
-       }
-
-       memset(p, 0, padlen);
-       *pbits = cpu_to_le64(sctx->byte_count << 3);
-       ppc_md5_transform(sctx->hash, src, 1);
-
-       dst[0] = cpu_to_le32(sctx->hash[0]);
-       dst[1] = cpu_to_le32(sctx->hash[1]);
-       dst[2] = cpu_to_le32(sctx->hash[2]);
-       dst[3] = cpu_to_le32(sctx->hash[3]);
-
-       ppc_md5_clear_context(sctx);
-       return 0;
-}
-
-static int ppc_md5_export(struct shash_desc *desc, void *out)
-{
-       struct md5_state *sctx = shash_desc_ctx(desc);
-
-       memcpy(out, sctx, sizeof(*sctx));
-       return 0;
-}
-
-static int ppc_md5_import(struct shash_desc *desc, const void *in)
-{
-       struct md5_state *sctx = shash_desc_ctx(desc);
-
-       memcpy(sctx, in, sizeof(*sctx));
-       return 0;
-}
-
-static struct shash_alg alg = {
-       .digestsize     =       MD5_DIGEST_SIZE,
-       .init           =       ppc_md5_init,
-       .update         =       ppc_md5_update,
-       .final          =       ppc_md5_final,
-       .export         =       ppc_md5_export,
-       .import         =       ppc_md5_import,
-       .descsize       =       sizeof(struct md5_state),
-       .statesize      =       sizeof(struct md5_state),
-       .base           =       {
-               .cra_name       =       "md5",
-               .cra_driver_name=       "md5-ppc",
-               .cra_priority   =       200,
-               .cra_flags      =       CRYPTO_ALG_TYPE_SHASH,
-               .cra_blocksize  =       MD5_HMAC_BLOCK_SIZE,
-               .cra_module     =       THIS_MODULE,
-       }
-};
-
-static int __init ppc_md5_mod_init(void)
-{
-       return crypto_register_shash(&alg);
-}
-
-static void __exit ppc_md5_mod_fini(void)
-{
-       crypto_unregister_shash(&alg);
-}
-
-module_init(ppc_md5_mod_init);
-module_exit(ppc_md5_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("MD5 Secure Hash Algorithm, PPC assembler");
-
-MODULE_ALIAS_CRYPTO("md5");
-MODULE_ALIAS_CRYPTO("md5-ppc");
diff --git a/arch/powerpc/crypto/sha1-spe-asm.S b/arch/powerpc/crypto/sha1-spe-asm.S
new file mode 100644 (file)
index 0000000..fcb6cf0
--- /dev/null
@@ -0,0 +1,299 @@
+/*
+ * Fast SHA-1 implementation for SPE instruction set (PPC)
+ *
+ * This code makes use of the SPE SIMD instruction set as defined in
+ * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
+ * Implementation is based on optimization guide notes from
+ * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+
+#define rHP    r3      /* pointer to hash value                        */
+#define rWP    r4      /* pointer to input                             */
+#define rKP    r5      /* pointer to constants                         */
+
+#define rW0    r14     /* 64 bit round words                           */
+#define rW1    r15
+#define rW2    r16
+#define rW3    r17
+#define rW4    r18
+#define rW5    r19
+#define rW6    r20
+#define rW7    r21
+
+#define rH0    r6      /* 32 bit hash values                           */
+#define rH1    r7
+#define rH2    r8
+#define rH3    r9
+#define rH4    r10
+
+#define rT0    r22     /* 64 bit temporary                             */
+#define rT1    r0      /* 32 bit temporaries                           */
+#define rT2    r11
+#define rT3    r12
+
+#define rK     r23     /* 64 bit constant in volatile register         */
+
+#define LOAD_K01
+
+#define LOAD_K11 \
+       evlwwsplat      rK,0(rKP);
+
+#define LOAD_K21 \
+       evlwwsplat      rK,4(rKP);
+
+#define LOAD_K31 \
+       evlwwsplat      rK,8(rKP);
+
+#define LOAD_K41 \
+       evlwwsplat      rK,12(rKP);
+
+#define INITIALIZE \
+       stwu            r1,-128(r1);    /* create stack frame           */ \
+       evstdw          r14,8(r1);      /* We must save non volatile    */ \
+       evstdw          r15,16(r1);     /* registers. Take the chance   */ \
+       evstdw          r16,24(r1);     /* and save the SPE part too    */ \
+       evstdw          r17,32(r1);                                        \
+       evstdw          r18,40(r1);                                        \
+       evstdw          r19,48(r1);                                        \
+       evstdw          r20,56(r1);                                        \
+       evstdw          r21,64(r1);                                        \
+       evstdw          r22,72(r1);                                        \
+       evstdw          r23,80(r1);
+
+
+#define FINALIZE \
+       evldw           r14,8(r1);      /* restore SPE registers        */ \
+       evldw           r15,16(r1);                                        \
+       evldw           r16,24(r1);                                        \
+       evldw           r17,32(r1);                                        \
+       evldw           r18,40(r1);                                        \
+       evldw           r19,48(r1);                                        \
+       evldw           r20,56(r1);                                        \
+       evldw           r21,64(r1);                                        \
+       evldw           r22,72(r1);                                        \
+       evldw           r23,80(r1);                                        \
+       xor             r0,r0,r0;                                          \
+       stw             r0,8(r1);       /* Delete sensitive data        */ \
+       stw             r0,16(r1);      /* that we might have pushed    */ \
+       stw             r0,24(r1);      /* from other context that runs */ \
+       stw             r0,32(r1);      /* the same code. Assume that   */ \
+       stw             r0,40(r1);      /* the lower part of the GPRs   */ \
+       stw             r0,48(r1);      /* were already overwritten on  */ \
+       stw             r0,56(r1);      /* the way down to here         */ \
+       stw             r0,64(r1);                                         \
+       stw             r0,72(r1);                                         \
+       stw             r0,80(r1);                                         \
+       addi            r1,r1,128;      /* cleanup stack frame          */
+
+#ifdef __BIG_ENDIAN__
+#define LOAD_DATA(reg, off) \
+       lwz             reg,off(rWP);   /* load data                    */
+#define NEXT_BLOCK \
+       addi            rWP,rWP,64;     /* increment per block          */
+#else
+#define LOAD_DATA(reg, off) \
+       lwbrx           reg,0,rWP;      /* load data                    */ \
+       addi            rWP,rWP,4;      /* increment per word           */
+#define NEXT_BLOCK                     /* nothing to do                */
+#endif
+
+#define        R_00_15(a, b, c, d, e, w0, w1, k, off) \
+       LOAD_DATA(w0, off)              /* 1: W                         */ \
+       and             rT2,b,c;        /* 1: F' = B and C              */ \
+       LOAD_K##k##1                                                       \
+       andc            rT1,d,b;        /* 1: F" = ~B and D             */ \
+       rotrwi          rT0,a,27;       /* 1: A' = A rotl 5             */ \
+       or              rT2,rT2,rT1;    /* 1: F = F' or F"              */ \
+       add             e,e,rT0;        /* 1: E = E + A'                */ \
+       rotrwi          b,b,2;          /* 1: B = B rotl 30             */ \
+       add             e,e,w0;         /* 1: E = E + W                 */ \
+       LOAD_DATA(w1, off+4)            /* 2: W                         */ \
+       add             e,e,rT2;        /* 1: E = E + F                 */ \
+       and             rT1,a,b;        /* 2: F' = B and C              */ \
+       add             e,e,rK;         /* 1: E = E + K                 */ \
+       andc            rT2,c,a;        /* 2: F" = ~B and D             */ \
+       add             d,d,rK;         /* 2: E = E + K                 */ \
+       or              rT2,rT2,rT1;    /* 2: F = F' or F"              */ \
+       rotrwi          rT0,e,27;       /* 2: A' = A rotl 5             */ \
+       add             d,d,w1;         /* 2: E = E + W                 */ \
+       rotrwi          a,a,2;          /* 2: B = B rotl 30             */ \
+       add             d,d,rT0;        /* 2: E = E + A'                */ \
+       evmergelo       w1,w1,w0;       /*    mix W[0]/W[1]             */ \
+       add             d,d,rT2         /* 2: E = E + F                 */
+
+#define R_16_19(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
+       and             rT2,b,c;        /* 1: F' = B and C              */ \
+       evmergelohi     rT0,w7,w6;      /*    W[-3]                     */ \
+       andc            rT1,d,b;        /* 1: F" = ~B and D             */ \
+       evxor           w0,w0,rT0;      /*    W = W[-16] xor W[-3]      */ \
+       or              rT1,rT1,rT2;    /* 1: F = F' or F"              */ \
+       evxor           w0,w0,w4;       /*    W = W xor W[-8]           */ \
+       add             e,e,rT1;        /* 1: E = E + F                 */ \
+       evxor           w0,w0,w1;       /*    W = W xor W[-14]          */ \
+       rotrwi          rT2,a,27;       /* 1: A' = A rotl 5             */ \
+       evrlwi          w0,w0,1;        /*    W = W rotl 1              */ \
+       add             e,e,rT2;        /* 1: E = E + A'                */ \
+       evaddw          rT0,w0,rK;      /*    WK = W + K                */ \
+       rotrwi          b,b,2;          /* 1: B = B rotl 30             */ \
+       LOAD_K##k##1                                                       \
+       evmergehi       rT1,rT1,rT0;    /*    WK1/WK2                   */ \
+       add             e,e,rT0;        /* 1: E = E + WK                */ \
+       add             d,d,rT1;        /* 2: E = E + WK                */ \
+       and             rT2,a,b;        /* 2: F' = B and C              */ \
+       andc            rT1,c,a;        /* 2: F" = ~B and D             */ \
+       rotrwi          rT0,e,27;       /* 2: A' = A rotl 5             */ \
+       or              rT1,rT1,rT2;    /* 2: F = F' or F"              */ \
+       add             d,d,rT0;        /* 2: E = E + A'                */ \
+       rotrwi          a,a,2;          /* 2: B = B rotl 30             */ \
+       add             d,d,rT1         /* 2: E = E + F                 */
+
+#define R_20_39(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
+       evmergelohi     rT0,w7,w6;      /*    W[-3]                     */ \
+       xor             rT2,b,c;        /* 1: F' = B xor C              */ \
+       evxor           w0,w0,rT0;      /*    W = W[-16] xor W[-3]      */ \
+       xor             rT2,rT2,d;      /* 1: F = F' xor D              */ \
+       evxor           w0,w0,w4;       /*    W = W xor W[-8]           */ \
+       add             e,e,rT2;        /* 1: E = E + F                 */ \
+       evxor           w0,w0,w1;       /*    W = W xor W[-14]          */ \
+       rotrwi          rT2,a,27;       /* 1: A' = A rotl 5             */ \
+       evrlwi          w0,w0,1;        /*    W = W rotl 1              */ \
+       add             e,e,rT2;        /* 1: E = E + A'                */ \
+       evaddw          rT0,w0,rK;      /*    WK = W + K                */ \
+       rotrwi          b,b,2;          /* 1: B = B rotl 30             */ \
+       LOAD_K##k##1                                                       \
+       evmergehi       rT1,rT1,rT0;    /*    WK1/WK2                   */ \
+       add             e,e,rT0;        /* 1: E = E + WK                */ \
+       xor             rT2,a,b;        /* 2: F' = B xor C              */ \
+       add             d,d,rT1;        /* 2: E = E + WK                */ \
+       xor             rT2,rT2,c;      /* 2: F = F' xor D              */ \
+       rotrwi          rT0,e,27;       /* 2: A' = A rotl 5             */ \
+       add             d,d,rT2;        /* 2: E = E + F                 */ \
+       rotrwi          a,a,2;          /* 2: B = B rotl 30             */ \
+       add             d,d,rT0         /* 2: E = E + A'                */
+
+#define R_40_59(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
+       and             rT2,b,c;        /* 1: F' = B and C              */ \
+       evmergelohi     rT0,w7,w6;      /*    W[-3]                     */ \
+       or              rT1,b,c;        /* 1: F" = B or C               */ \
+       evxor           w0,w0,rT0;      /*    W = W[-16] xor W[-3]      */ \
+       and             rT1,d,rT1;      /* 1: F" = F" and D             */ \
+       evxor           w0,w0,w4;       /*    W = W xor W[-8]           */ \
+       or              rT2,rT2,rT1;    /* 1: F = F' or F"              */ \
+       evxor           w0,w0,w1;       /*    W = W xor W[-14]          */ \
+       add             e,e,rT2;        /* 1: E = E + F                 */ \
+       evrlwi          w0,w0,1;        /*    W = W rotl 1              */ \
+       rotrwi          rT2,a,27;       /* 1: A' = A rotl 5             */ \
+       evaddw          rT0,w0,rK;      /*    WK = W + K                */ \
+       add             e,e,rT2;        /* 1: E = E + A'                */ \
+       LOAD_K##k##1                                                       \
+       evmergehi       rT1,rT1,rT0;    /*    WK1/WK2                   */ \
+       rotrwi          b,b,2;          /* 1: B = B rotl 30             */ \
+       add             e,e,rT0;        /* 1: E = E + WK                */ \
+       and             rT2,a,b;        /* 2: F' = B and C              */ \
+       or              rT0,a,b;        /* 2: F" = B or C               */ \
+       add             d,d,rT1;        /* 2: E = E + WK                */ \
+       and             rT0,c,rT0;      /* 2: F" = F" and D             */ \
+       rotrwi          a,a,2;          /* 2: B = B rotl 30             */ \
+       or              rT2,rT2,rT0;    /* 2: F = F' or F"              */ \
+       rotrwi          rT0,e,27;       /* 2: A' = A rotl 5             */ \
+       add             d,d,rT2;        /* 2: E = E + F                 */ \
+       add             d,d,rT0         /* 2: E = E + A'                */
+
+#define R_60_79(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
+       R_20_39(a, b, c, d, e, w0, w1, w4, w6, w7, k)
+
+_GLOBAL(ppc_spe_sha1_transform)
+       INITIALIZE
+
+       lwz             rH0,0(rHP)
+       lwz             rH1,4(rHP)
+       mtctr           r5
+       lwz             rH2,8(rHP)
+       lis             rKP,PPC_SPE_SHA1_K@h
+       lwz             rH3,12(rHP)
+       ori             rKP,rKP,PPC_SPE_SHA1_K@l
+       lwz             rH4,16(rHP)
+
+ppc_spe_sha1_main:
+       R_00_15(rH0, rH1, rH2, rH3, rH4, rW1, rW0, 1, 0)
+       R_00_15(rH3, rH4, rH0, rH1, rH2, rW2, rW1, 0, 8)
+       R_00_15(rH1, rH2, rH3, rH4, rH0, rW3, rW2, 0, 16)
+       R_00_15(rH4, rH0, rH1, rH2, rH3, rW4, rW3, 0, 24)
+       R_00_15(rH2, rH3, rH4, rH0, rH1, rW5, rW4, 0, 32)
+       R_00_15(rH0, rH1, rH2, rH3, rH4, rW6, rW5, 0, 40)
+       R_00_15(rH3, rH4, rH0, rH1, rH2, rT3, rW6, 0, 48)
+       R_00_15(rH1, rH2, rH3, rH4, rH0, rT3, rW7, 0, 56)
+
+       R_16_19(rH4, rH0, rH1, rH2, rH3, rW0, rW1, rW4, rW6, rW7, 0)
+       R_16_19(rH2, rH3, rH4, rH0, rH1, rW1, rW2, rW5, rW7, rW0, 2)
+
+       R_20_39(rH0, rH1, rH2, rH3, rH4, rW2, rW3, rW6, rW0, rW1, 0)
+       R_20_39(rH3, rH4, rH0, rH1, rH2, rW3, rW4, rW7, rW1, rW2, 0)
+       R_20_39(rH1, rH2, rH3, rH4, rH0, rW4, rW5, rW0, rW2, rW3, 0)
+       R_20_39(rH4, rH0, rH1, rH2, rH3, rW5, rW6, rW1, rW3, rW4, 0)
+       R_20_39(rH2, rH3, rH4, rH0, rH1, rW6, rW7, rW2, rW4, rW5, 0)
+       R_20_39(rH0, rH1, rH2, rH3, rH4, rW7, rW0, rW3, rW5, rW6, 0)
+       R_20_39(rH3, rH4, rH0, rH1, rH2, rW0, rW1, rW4, rW6, rW7, 0)
+       R_20_39(rH1, rH2, rH3, rH4, rH0, rW1, rW2, rW5, rW7, rW0, 0)
+       R_20_39(rH4, rH0, rH1, rH2, rH3, rW2, rW3, rW6, rW0, rW1, 0)
+       R_20_39(rH2, rH3, rH4, rH0, rH1, rW3, rW4, rW7, rW1, rW2, 3)
+
+       R_40_59(rH0, rH1, rH2, rH3, rH4, rW4, rW5, rW0, rW2, rW3, 0)
+       R_40_59(rH3, rH4, rH0, rH1, rH2, rW5, rW6, rW1, rW3, rW4, 0)
+       R_40_59(rH1, rH2, rH3, rH4, rH0, rW6, rW7, rW2, rW4, rW5, 0)
+       R_40_59(rH4, rH0, rH1, rH2, rH3, rW7, rW0, rW3, rW5, rW6, 0)
+       R_40_59(rH2, rH3, rH4, rH0, rH1, rW0, rW1, rW4, rW6, rW7, 0)
+       R_40_59(rH0, rH1, rH2, rH3, rH4, rW1, rW2, rW5, rW7, rW0, 0)
+       R_40_59(rH3, rH4, rH0, rH1, rH2, rW2, rW3, rW6, rW0, rW1, 0)
+       R_40_59(rH1, rH2, rH3, rH4, rH0, rW3, rW4, rW7, rW1, rW2, 0)
+       R_40_59(rH4, rH0, rH1, rH2, rH3, rW4, rW5, rW0, rW2, rW3, 0)
+       R_40_59(rH2, rH3, rH4, rH0, rH1, rW5, rW6, rW1, rW3, rW4, 4)
+
+       R_60_79(rH0, rH1, rH2, rH3, rH4, rW6, rW7, rW2, rW4, rW5, 0)
+       R_60_79(rH3, rH4, rH0, rH1, rH2, rW7, rW0, rW3, rW5, rW6, 0)
+       R_60_79(rH1, rH2, rH3, rH4, rH0, rW0, rW1, rW4, rW6, rW7, 0)
+       R_60_79(rH4, rH0, rH1, rH2, rH3, rW1, rW2, rW5, rW7, rW0, 0)
+       R_60_79(rH2, rH3, rH4, rH0, rH1, rW2, rW3, rW6, rW0, rW1, 0)
+       R_60_79(rH0, rH1, rH2, rH3, rH4, rW3, rW4, rW7, rW1, rW2, 0)
+       R_60_79(rH3, rH4, rH0, rH1, rH2, rW4, rW5, rW0, rW2, rW3, 0)
+       lwz             rT3,0(rHP)
+       R_60_79(rH1, rH2, rH3, rH4, rH0, rW5, rW6, rW1, rW3, rW4, 0)
+       lwz             rW1,4(rHP)
+       R_60_79(rH4, rH0, rH1, rH2, rH3, rW6, rW7, rW2, rW4, rW5, 0)
+       lwz             rW2,8(rHP)
+       R_60_79(rH2, rH3, rH4, rH0, rH1, rW7, rW0, rW3, rW5, rW6, 0)
+       lwz             rW3,12(rHP)
+       NEXT_BLOCK
+       lwz             rW4,16(rHP)
+
+       add             rH0,rH0,rT3
+       stw             rH0,0(rHP)
+       add             rH1,rH1,rW1
+       stw             rH1,4(rHP)
+       add             rH2,rH2,rW2
+       stw             rH2,8(rHP)
+       add             rH3,rH3,rW3
+       stw             rH3,12(rHP)
+       add             rH4,rH4,rW4
+       stw             rH4,16(rHP)
+
+       bdnz            ppc_spe_sha1_main
+
+       FINALIZE
+       blr
+
+.data
+.align 4
+PPC_SPE_SHA1_K:
+       .long 0x5A827999,0x6ED9EBA1,0x8F1BBCDC,0xCA62C1D6
diff --git a/arch/powerpc/crypto/sha1-spe-glue.c b/arch/powerpc/crypto/sha1-spe-glue.c
new file mode 100644 (file)
index 0000000..3e1d222
--- /dev/null
@@ -0,0 +1,210 @@
+/*
+ * Glue code for SHA-1 implementation for SPE instructions (PPC)
+ *
+ * Based on generic implementation.
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <crypto/sha.h>
+#include <asm/byteorder.h>
+#include <asm/switch_to.h>
+#include <linux/hardirq.h>
+
+/*
+ * MAX_BYTES defines the number of bytes that are allowed to be processed
+ * between preempt_disable() and preempt_enable(). SHA1 takes ~1000
+ * operations per 64 bytes. e500 cores can issue two arithmetic instructions
+ * per clock cycle using one 32/64 bit unit (SU1) and one 32 bit unit (SU2).
+ * Thus 2KB of input data will need an estimated maximum of 18,000 cycles.
+ * Headroom for cache misses included. Even with the low end model clocked
+ * at 667 MHz this equals to a critical time window of less than 27us.
+ *
+ */
+#define MAX_BYTES 2048
+
+extern void ppc_spe_sha1_transform(u32 *state, const u8 *src, u32 blocks);
+
+static void spe_begin(void)
+{
+       /* We just start SPE operations and will save SPE registers later. */
+       preempt_disable();
+       enable_kernel_spe();
+}
+
+static void spe_end(void)
+{
+       /* reenable preemption */
+       preempt_enable();
+}
+
+static inline void ppc_sha1_clear_context(struct sha1_state *sctx)
+{
+       int count = sizeof(struct sha1_state) >> 2;
+       u32 *ptr = (u32 *)sctx;
+
+       /* make sure we can clear the fast way */
+       BUILD_BUG_ON(sizeof(struct sha1_state) % 4);
+       do { *ptr++ = 0; } while (--count);
+}
+
+static int ppc_spe_sha1_init(struct shash_desc *desc)
+{
+       struct sha1_state *sctx = shash_desc_ctx(desc);
+
+       sctx->state[0] = SHA1_H0;
+       sctx->state[1] = SHA1_H1;
+       sctx->state[2] = SHA1_H2;
+       sctx->state[3] = SHA1_H3;
+       sctx->state[4] = SHA1_H4;
+       sctx->count = 0;
+
+       return 0;
+}
+
+static int ppc_spe_sha1_update(struct shash_desc *desc, const u8 *data,
+                       unsigned int len)
+{
+       struct sha1_state *sctx = shash_desc_ctx(desc);
+       const unsigned int offset = sctx->count & 0x3f;
+       const unsigned int avail = 64 - offset;
+       unsigned int bytes;
+       const u8 *src = data;
+
+       if (avail > len) {
+               sctx->count += len;
+               memcpy((char *)sctx->buffer + offset, src, len);
+               return 0;
+       }
+
+       sctx->count += len;
+
+       if (offset) {
+               memcpy((char *)sctx->buffer + offset, src, avail);
+
+               spe_begin();
+               ppc_spe_sha1_transform(sctx->state, (const u8 *)sctx->buffer, 1);
+               spe_end();
+
+               len -= avail;
+               src += avail;
+       }
+
+       while (len > 63) {
+               bytes = (len > MAX_BYTES) ? MAX_BYTES : len;
+               bytes = bytes & ~0x3f;
+
+               spe_begin();
+               ppc_spe_sha1_transform(sctx->state, src, bytes >> 6);
+               spe_end();
+
+               src += bytes;
+               len -= bytes;
+       };
+
+       memcpy((char *)sctx->buffer, src, len);
+       return 0;
+}
+
+static int ppc_spe_sha1_final(struct shash_desc *desc, u8 *out)
+{
+       struct sha1_state *sctx = shash_desc_ctx(desc);
+       const unsigned int offset = sctx->count & 0x3f;
+       char *p = (char *)sctx->buffer + offset;
+       int padlen;
+       __be64 *pbits = (__be64 *)(((char *)&sctx->buffer) + 56);
+       __be32 *dst = (__be32 *)out;
+
+       padlen = 55 - offset;
+       *p++ = 0x80;
+
+       spe_begin();
+
+       if (padlen < 0) {
+               memset(p, 0x00, padlen + sizeof (u64));
+               ppc_spe_sha1_transform(sctx->state, sctx->buffer, 1);
+               p = (char *)sctx->buffer;
+               padlen = 56;
+       }
+
+       memset(p, 0, padlen);
+       *pbits = cpu_to_be64(sctx->count << 3);
+       ppc_spe_sha1_transform(sctx->state, sctx->buffer, 1);
+
+       spe_end();
+
+       dst[0] = cpu_to_be32(sctx->state[0]);
+       dst[1] = cpu_to_be32(sctx->state[1]);
+       dst[2] = cpu_to_be32(sctx->state[2]);
+       dst[3] = cpu_to_be32(sctx->state[3]);
+       dst[4] = cpu_to_be32(sctx->state[4]);
+
+       ppc_sha1_clear_context(sctx);
+       return 0;
+}
+
+static int ppc_spe_sha1_export(struct shash_desc *desc, void *out)
+{
+       struct sha1_state *sctx = shash_desc_ctx(desc);
+
+       memcpy(out, sctx, sizeof(*sctx));
+       return 0;
+}
+
+static int ppc_spe_sha1_import(struct shash_desc *desc, const void *in)
+{
+       struct sha1_state *sctx = shash_desc_ctx(desc);
+
+       memcpy(sctx, in, sizeof(*sctx));
+       return 0;
+}
+
+static struct shash_alg alg = {
+       .digestsize     =       SHA1_DIGEST_SIZE,
+       .init           =       ppc_spe_sha1_init,
+       .update         =       ppc_spe_sha1_update,
+       .final          =       ppc_spe_sha1_final,
+       .export         =       ppc_spe_sha1_export,
+       .import         =       ppc_spe_sha1_import,
+       .descsize       =       sizeof(struct sha1_state),
+       .statesize      =       sizeof(struct sha1_state),
+       .base           =       {
+               .cra_name       =       "sha1",
+               .cra_driver_name=       "sha1-ppc-spe",
+               .cra_priority   =       300,
+               .cra_flags      =       CRYPTO_ALG_TYPE_SHASH,
+               .cra_blocksize  =       SHA1_BLOCK_SIZE,
+               .cra_module     =       THIS_MODULE,
+       }
+};
+
+static int __init ppc_spe_sha1_mod_init(void)
+{
+       return crypto_register_shash(&alg);
+}
+
+static void __exit ppc_spe_sha1_mod_fini(void)
+{
+       crypto_unregister_shash(&alg);
+}
+
+module_init(ppc_spe_sha1_mod_init);
+module_exit(ppc_spe_sha1_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, SPE optimized");
+
+MODULE_ALIAS_CRYPTO("sha1");
+MODULE_ALIAS_CRYPTO("sha1-ppc-spe");
diff --git a/arch/powerpc/crypto/sha256-spe-glue.c b/arch/powerpc/crypto/sha256-spe-glue.c
new file mode 100644 (file)
index 0000000..f4a616f
--- /dev/null
@@ -0,0 +1,275 @@
+/*
+ * Glue code for SHA-256 implementation for SPE instructions (PPC)
+ *
+ * Based on generic implementation. The assembler module takes care 
+ * about the SPE registers so it can run from interrupt context.
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <crypto/sha.h>
+#include <asm/byteorder.h>
+#include <asm/switch_to.h>
+#include <linux/hardirq.h>
+
+/*
+ * MAX_BYTES defines the number of bytes that are allowed to be processed
+ * between preempt_disable() and preempt_enable(). SHA256 takes ~2,000
+ * operations per 64 bytes. e500 cores can issue two arithmetic instructions
+ * per clock cycle using one 32/64 bit unit (SU1) and one 32 bit unit (SU2).
+ * Thus 1KB of input data will need an estimated maximum of 18,000 cycles.
+ * Headroom for cache misses included. Even with the low end model clocked
+ * at 667 MHz this equals to a critical time window of less than 27us.
+ *
+ */
+#define MAX_BYTES 1024
+
+extern void ppc_spe_sha256_transform(u32 *state, const u8 *src, u32 blocks);
+
+static void spe_begin(void)
+{
+       /* We just start SPE operations and will save SPE registers later. */
+       preempt_disable();
+       enable_kernel_spe();
+}
+
+static void spe_end(void)
+{
+       /* reenable preemption */
+       preempt_enable();
+}
+
+static inline void ppc_sha256_clear_context(struct sha256_state *sctx)
+{
+       int count = sizeof(struct sha256_state) >> 2;
+       u32 *ptr = (u32 *)sctx;
+
+       /* make sure we can clear the fast way */
+       BUILD_BUG_ON(sizeof(struct sha256_state) % 4);
+       do { *ptr++ = 0; } while (--count);
+}
+
+static int ppc_spe_sha256_init(struct shash_desc *desc)
+{
+       struct sha256_state *sctx = shash_desc_ctx(desc);
+
+       sctx->state[0] = SHA256_H0;
+       sctx->state[1] = SHA256_H1;
+       sctx->state[2] = SHA256_H2;
+       sctx->state[3] = SHA256_H3;
+       sctx->state[4] = SHA256_H4;
+       sctx->state[5] = SHA256_H5;
+       sctx->state[6] = SHA256_H6;
+       sctx->state[7] = SHA256_H7;
+       sctx->count = 0;
+
+       return 0;
+}
+
+static int ppc_spe_sha224_init(struct shash_desc *desc)
+{
+       struct sha256_state *sctx = shash_desc_ctx(desc);
+
+       sctx->state[0] = SHA224_H0;
+       sctx->state[1] = SHA224_H1;
+       sctx->state[2] = SHA224_H2;
+       sctx->state[3] = SHA224_H3;
+       sctx->state[4] = SHA224_H4;
+       sctx->state[5] = SHA224_H5;
+       sctx->state[6] = SHA224_H6;
+       sctx->state[7] = SHA224_H7;
+       sctx->count = 0;
+
+       return 0;
+}
+
+static int ppc_spe_sha256_update(struct shash_desc *desc, const u8 *data,
+                       unsigned int len)
+{
+       struct sha256_state *sctx = shash_desc_ctx(desc);
+       const unsigned int offset = sctx->count & 0x3f;
+       const unsigned int avail = 64 - offset;
+       unsigned int bytes;
+       const u8 *src = data;
+
+       if (avail > len) {
+               sctx->count += len;
+               memcpy((char *)sctx->buf + offset, src, len);
+               return 0;
+       }
+
+       sctx->count += len;
+
+       if (offset) {
+               memcpy((char *)sctx->buf + offset, src, avail);
+
+               spe_begin();
+               ppc_spe_sha256_transform(sctx->state, (const u8 *)sctx->buf, 1);
+               spe_end();
+
+               len -= avail;
+               src += avail;
+       }
+
+       while (len > 63) {
+               /* cut input data into smaller blocks */
+               bytes = (len > MAX_BYTES) ? MAX_BYTES : len;
+               bytes = bytes & ~0x3f;
+
+               spe_begin();
+               ppc_spe_sha256_transform(sctx->state, src, bytes >> 6);
+               spe_end();
+
+               src += bytes;
+               len -= bytes;
+       };
+
+       memcpy((char *)sctx->buf, src, len);
+       return 0;
+}
+
+static int ppc_spe_sha256_final(struct shash_desc *desc, u8 *out)
+{
+       struct sha256_state *sctx = shash_desc_ctx(desc);
+       const unsigned int offset = sctx->count & 0x3f;
+       char *p = (char *)sctx->buf + offset;
+       int padlen;
+       __be64 *pbits = (__be64 *)(((char *)&sctx->buf) + 56);
+       __be32 *dst = (__be32 *)out;
+
+       padlen = 55 - offset;
+       *p++ = 0x80;
+
+       spe_begin();
+
+       if (padlen < 0) {
+               memset(p, 0x00, padlen + sizeof (u64));
+               ppc_spe_sha256_transform(sctx->state, sctx->buf, 1);
+               p = (char *)sctx->buf;
+               padlen = 56;
+       }
+
+       memset(p, 0, padlen);
+       *pbits = cpu_to_be64(sctx->count << 3);
+       ppc_spe_sha256_transform(sctx->state, sctx->buf, 1);
+
+       spe_end();
+
+       dst[0] = cpu_to_be32(sctx->state[0]);
+       dst[1] = cpu_to_be32(sctx->state[1]);
+       dst[2] = cpu_to_be32(sctx->state[2]);
+       dst[3] = cpu_to_be32(sctx->state[3]);
+       dst[4] = cpu_to_be32(sctx->state[4]);
+       dst[5] = cpu_to_be32(sctx->state[5]);
+       dst[6] = cpu_to_be32(sctx->state[6]);
+       dst[7] = cpu_to_be32(sctx->state[7]);
+
+       ppc_sha256_clear_context(sctx);
+       return 0;
+}
+
+static int ppc_spe_sha224_final(struct shash_desc *desc, u8 *out)
+{
+       u32 D[SHA256_DIGEST_SIZE >> 2];
+       __be32 *dst = (__be32 *)out;
+
+       ppc_spe_sha256_final(desc, (u8 *)D);
+
+       /* avoid bytewise memcpy */
+       dst[0] = D[0];
+       dst[1] = D[1];
+       dst[2] = D[2];
+       dst[3] = D[3];
+       dst[4] = D[4];
+       dst[5] = D[5];
+       dst[6] = D[6];
+
+       /* clear sensitive data */
+       memzero_explicit(D, SHA256_DIGEST_SIZE);
+       return 0;
+}
+
+static int ppc_spe_sha256_export(struct shash_desc *desc, void *out)
+{
+       struct sha256_state *sctx = shash_desc_ctx(desc);
+
+       memcpy(out, sctx, sizeof(*sctx));
+       return 0;
+}
+
+static int ppc_spe_sha256_import(struct shash_desc *desc, const void *in)
+{
+       struct sha256_state *sctx = shash_desc_ctx(desc);
+
+       memcpy(sctx, in, sizeof(*sctx));
+       return 0;
+}
+
+static struct shash_alg algs[2] = { {
+       .digestsize     =       SHA256_DIGEST_SIZE,
+       .init           =       ppc_spe_sha256_init,
+       .update         =       ppc_spe_sha256_update,
+       .final          =       ppc_spe_sha256_final,
+       .export         =       ppc_spe_sha256_export,
+       .import         =       ppc_spe_sha256_import,
+       .descsize       =       sizeof(struct sha256_state),
+       .statesize      =       sizeof(struct sha256_state),
+       .base           =       {
+               .cra_name       =       "sha256",
+               .cra_driver_name=       "sha256-ppc-spe",
+               .cra_priority   =       300,
+               .cra_flags      =       CRYPTO_ALG_TYPE_SHASH,
+               .cra_blocksize  =       SHA256_BLOCK_SIZE,
+               .cra_module     =       THIS_MODULE,
+       }
+}, {
+       .digestsize     =       SHA224_DIGEST_SIZE,
+       .init           =       ppc_spe_sha224_init,
+       .update         =       ppc_spe_sha256_update,
+       .final          =       ppc_spe_sha224_final,
+       .export         =       ppc_spe_sha256_export,
+       .import         =       ppc_spe_sha256_import,
+       .descsize       =       sizeof(struct sha256_state),
+       .statesize      =       sizeof(struct sha256_state),
+       .base           =       {
+               .cra_name       =       "sha224",
+               .cra_driver_name=       "sha224-ppc-spe",
+               .cra_priority   =       300,
+               .cra_flags      =       CRYPTO_ALG_TYPE_SHASH,
+               .cra_blocksize  =       SHA224_BLOCK_SIZE,
+               .cra_module     =       THIS_MODULE,
+       }
+} };
+
+static int __init ppc_spe_sha256_mod_init(void)
+{
+       return crypto_register_shashes(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit ppc_spe_sha256_mod_fini(void)
+{
+       crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+}
+
+module_init(ppc_spe_sha256_mod_init);
+module_exit(ppc_spe_sha256_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm, SPE optimized");
+
+MODULE_ALIAS_CRYPTO("sha224");
+MODULE_ALIAS_CRYPTO("sha224-ppc-spe");
+MODULE_ALIAS_CRYPTO("sha256");
+MODULE_ALIAS_CRYPTO("sha256-ppc-spe");
diff --git a/arch/powerpc/crypto/sha256_spe_glue.c b/arch/powerpc/crypto/sha256_spe_glue.c
deleted file mode 100644 (file)
index f4a616f..0000000
+++ /dev/null
@@ -1,275 +0,0 @@
-/*
- * Glue code for SHA-256 implementation for SPE instructions (PPC)
- *
- * Based on generic implementation. The assembler module takes care 
- * about the SPE registers so it can run from interrupt context.
- *
- * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/cryptohash.h>
-#include <linux/types.h>
-#include <crypto/sha.h>
-#include <asm/byteorder.h>
-#include <asm/switch_to.h>
-#include <linux/hardirq.h>
-
-/*
- * MAX_BYTES defines the number of bytes that are allowed to be processed
- * between preempt_disable() and preempt_enable(). SHA256 takes ~2,000
- * operations per 64 bytes. e500 cores can issue two arithmetic instructions
- * per clock cycle using one 32/64 bit unit (SU1) and one 32 bit unit (SU2).
- * Thus 1KB of input data will need an estimated maximum of 18,000 cycles.
- * Headroom for cache misses included. Even with the low end model clocked
- * at 667 MHz this equals to a critical time window of less than 27us.
- *
- */
-#define MAX_BYTES 1024
-
-extern void ppc_spe_sha256_transform(u32 *state, const u8 *src, u32 blocks);
-
-static void spe_begin(void)
-{
-       /* We just start SPE operations and will save SPE registers later. */
-       preempt_disable();
-       enable_kernel_spe();
-}
-
-static void spe_end(void)
-{
-       /* reenable preemption */
-       preempt_enable();
-}
-
-static inline void ppc_sha256_clear_context(struct sha256_state *sctx)
-{
-       int count = sizeof(struct sha256_state) >> 2;
-       u32 *ptr = (u32 *)sctx;
-
-       /* make sure we can clear the fast way */
-       BUILD_BUG_ON(sizeof(struct sha256_state) % 4);
-       do { *ptr++ = 0; } while (--count);
-}
-
-static int ppc_spe_sha256_init(struct shash_desc *desc)
-{
-       struct sha256_state *sctx = shash_desc_ctx(desc);
-
-       sctx->state[0] = SHA256_H0;
-       sctx->state[1] = SHA256_H1;
-       sctx->state[2] = SHA256_H2;
-       sctx->state[3] = SHA256_H3;
-       sctx->state[4] = SHA256_H4;
-       sctx->state[5] = SHA256_H5;
-       sctx->state[6] = SHA256_H6;
-       sctx->state[7] = SHA256_H7;
-       sctx->count = 0;
-
-       return 0;
-}
-
-static int ppc_spe_sha224_init(struct shash_desc *desc)
-{
-       struct sha256_state *sctx = shash_desc_ctx(desc);
-
-       sctx->state[0] = SHA224_H0;
-       sctx->state[1] = SHA224_H1;
-       sctx->state[2] = SHA224_H2;
-       sctx->state[3] = SHA224_H3;
-       sctx->state[4] = SHA224_H4;
-       sctx->state[5] = SHA224_H5;
-       sctx->state[6] = SHA224_H6;
-       sctx->state[7] = SHA224_H7;
-       sctx->count = 0;
-
-       return 0;
-}
-
-static int ppc_spe_sha256_update(struct shash_desc *desc, const u8 *data,
-                       unsigned int len)
-{
-       struct sha256_state *sctx = shash_desc_ctx(desc);
-       const unsigned int offset = sctx->count & 0x3f;
-       const unsigned int avail = 64 - offset;
-       unsigned int bytes;
-       const u8 *src = data;
-
-       if (avail > len) {
-               sctx->count += len;
-               memcpy((char *)sctx->buf + offset, src, len);
-               return 0;
-       }
-
-       sctx->count += len;
-
-       if (offset) {
-               memcpy((char *)sctx->buf + offset, src, avail);
-
-               spe_begin();
-               ppc_spe_sha256_transform(sctx->state, (const u8 *)sctx->buf, 1);
-               spe_end();
-
-               len -= avail;
-               src += avail;
-       }
-
-       while (len > 63) {
-               /* cut input data into smaller blocks */
-               bytes = (len > MAX_BYTES) ? MAX_BYTES : len;
-               bytes = bytes & ~0x3f;
-
-               spe_begin();
-               ppc_spe_sha256_transform(sctx->state, src, bytes >> 6);
-               spe_end();
-
-               src += bytes;
-               len -= bytes;
-       };
-
-       memcpy((char *)sctx->buf, src, len);
-       return 0;
-}
-
-static int ppc_spe_sha256_final(struct shash_desc *desc, u8 *out)
-{
-       struct sha256_state *sctx = shash_desc_ctx(desc);
-       const unsigned int offset = sctx->count & 0x3f;
-       char *p = (char *)sctx->buf + offset;
-       int padlen;
-       __be64 *pbits = (__be64 *)(((char *)&sctx->buf) + 56);
-       __be32 *dst = (__be32 *)out;
-
-       padlen = 55 - offset;
-       *p++ = 0x80;
-
-       spe_begin();
-
-       if (padlen < 0) {
-               memset(p, 0x00, padlen + sizeof (u64));
-               ppc_spe_sha256_transform(sctx->state, sctx->buf, 1);
-               p = (char *)sctx->buf;
-               padlen = 56;
-       }
-
-       memset(p, 0, padlen);
-       *pbits = cpu_to_be64(sctx->count << 3);
-       ppc_spe_sha256_transform(sctx->state, sctx->buf, 1);
-
-       spe_end();
-
-       dst[0] = cpu_to_be32(sctx->state[0]);
-       dst[1] = cpu_to_be32(sctx->state[1]);
-       dst[2] = cpu_to_be32(sctx->state[2]);
-       dst[3] = cpu_to_be32(sctx->state[3]);
-       dst[4] = cpu_to_be32(sctx->state[4]);
-       dst[5] = cpu_to_be32(sctx->state[5]);
-       dst[6] = cpu_to_be32(sctx->state[6]);
-       dst[7] = cpu_to_be32(sctx->state[7]);
-
-       ppc_sha256_clear_context(sctx);
-       return 0;
-}
-
-static int ppc_spe_sha224_final(struct shash_desc *desc, u8 *out)
-{
-       u32 D[SHA256_DIGEST_SIZE >> 2];
-       __be32 *dst = (__be32 *)out;
-
-       ppc_spe_sha256_final(desc, (u8 *)D);
-
-       /* avoid bytewise memcpy */
-       dst[0] = D[0];
-       dst[1] = D[1];
-       dst[2] = D[2];
-       dst[3] = D[3];
-       dst[4] = D[4];
-       dst[5] = D[5];
-       dst[6] = D[6];
-
-       /* clear sensitive data */
-       memzero_explicit(D, SHA256_DIGEST_SIZE);
-       return 0;
-}
-
-static int ppc_spe_sha256_export(struct shash_desc *desc, void *out)
-{
-       struct sha256_state *sctx = shash_desc_ctx(desc);
-
-       memcpy(out, sctx, sizeof(*sctx));
-       return 0;
-}
-
-static int ppc_spe_sha256_import(struct shash_desc *desc, const void *in)
-{
-       struct sha256_state *sctx = shash_desc_ctx(desc);
-
-       memcpy(sctx, in, sizeof(*sctx));
-       return 0;
-}
-
-static struct shash_alg algs[2] = { {
-       .digestsize     =       SHA256_DIGEST_SIZE,
-       .init           =       ppc_spe_sha256_init,
-       .update         =       ppc_spe_sha256_update,
-       .final          =       ppc_spe_sha256_final,
-       .export         =       ppc_spe_sha256_export,
-       .import         =       ppc_spe_sha256_import,
-       .descsize       =       sizeof(struct sha256_state),
-       .statesize      =       sizeof(struct sha256_state),
-       .base           =       {
-               .cra_name       =       "sha256",
-               .cra_driver_name=       "sha256-ppc-spe",
-               .cra_priority   =       300,
-               .cra_flags      =       CRYPTO_ALG_TYPE_SHASH,
-               .cra_blocksize  =       SHA256_BLOCK_SIZE,
-               .cra_module     =       THIS_MODULE,
-       }
-}, {
-       .digestsize     =       SHA224_DIGEST_SIZE,
-       .init           =       ppc_spe_sha224_init,
-       .update         =       ppc_spe_sha256_update,
-       .final          =       ppc_spe_sha224_final,
-       .export         =       ppc_spe_sha256_export,
-       .import         =       ppc_spe_sha256_import,
-       .descsize       =       sizeof(struct sha256_state),
-       .statesize      =       sizeof(struct sha256_state),
-       .base           =       {
-               .cra_name       =       "sha224",
-               .cra_driver_name=       "sha224-ppc-spe",
-               .cra_priority   =       300,
-               .cra_flags      =       CRYPTO_ALG_TYPE_SHASH,
-               .cra_blocksize  =       SHA224_BLOCK_SIZE,
-               .cra_module     =       THIS_MODULE,
-       }
-} };
-
-static int __init ppc_spe_sha256_mod_init(void)
-{
-       return crypto_register_shashes(algs, ARRAY_SIZE(algs));
-}
-
-static void __exit ppc_spe_sha256_mod_fini(void)
-{
-       crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
-}
-
-module_init(ppc_spe_sha256_mod_init);
-module_exit(ppc_spe_sha256_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm, SPE optimized");
-
-MODULE_ALIAS_CRYPTO("sha224");
-MODULE_ALIAS_CRYPTO("sha224-ppc-spe");
-MODULE_ALIAS_CRYPTO("sha256");
-MODULE_ALIAS_CRYPTO("sha256-ppc-spe");
diff --git a/arch/powerpc/sha1-spe-asm.S b/arch/powerpc/sha1-spe-asm.S
deleted file mode 100644 (file)
index fcb6cf0..0000000
+++ /dev/null
@@ -1,299 +0,0 @@
-/*
- * Fast SHA-1 implementation for SPE instruction set (PPC)
- *
- * This code makes use of the SPE SIMD instruction set as defined in
- * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
- * Implementation is based on optimization guide notes from
- * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
- *
- * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-
-#define rHP    r3      /* pointer to hash value                        */
-#define rWP    r4      /* pointer to input                             */
-#define rKP    r5      /* pointer to constants                         */
-
-#define rW0    r14     /* 64 bit round words                           */
-#define rW1    r15
-#define rW2    r16
-#define rW3    r17
-#define rW4    r18
-#define rW5    r19
-#define rW6    r20
-#define rW7    r21
-
-#define rH0    r6      /* 32 bit hash values                           */
-#define rH1    r7
-#define rH2    r8
-#define rH3    r9
-#define rH4    r10
-
-#define rT0    r22     /* 64 bit temporary                             */
-#define rT1    r0      /* 32 bit temporaries                           */
-#define rT2    r11
-#define rT3    r12
-
-#define rK     r23     /* 64 bit constant in volatile register         */
-
-#define LOAD_K01
-
-#define LOAD_K11 \
-       evlwwsplat      rK,0(rKP);
-
-#define LOAD_K21 \
-       evlwwsplat      rK,4(rKP);
-
-#define LOAD_K31 \
-       evlwwsplat      rK,8(rKP);
-
-#define LOAD_K41 \
-       evlwwsplat      rK,12(rKP);
-
-#define INITIALIZE \
-       stwu            r1,-128(r1);    /* create stack frame           */ \
-       evstdw          r14,8(r1);      /* We must save non volatile    */ \
-       evstdw          r15,16(r1);     /* registers. Take the chance   */ \
-       evstdw          r16,24(r1);     /* and save the SPE part too    */ \
-       evstdw          r17,32(r1);                                        \
-       evstdw          r18,40(r1);                                        \
-       evstdw          r19,48(r1);                                        \
-       evstdw          r20,56(r1);                                        \
-       evstdw          r21,64(r1);                                        \
-       evstdw          r22,72(r1);                                        \
-       evstdw          r23,80(r1);
-
-
-#define FINALIZE \
-       evldw           r14,8(r1);      /* restore SPE registers        */ \
-       evldw           r15,16(r1);                                        \
-       evldw           r16,24(r1);                                        \
-       evldw           r17,32(r1);                                        \
-       evldw           r18,40(r1);                                        \
-       evldw           r19,48(r1);                                        \
-       evldw           r20,56(r1);                                        \
-       evldw           r21,64(r1);                                        \
-       evldw           r22,72(r1);                                        \
-       evldw           r23,80(r1);                                        \
-       xor             r0,r0,r0;                                          \
-       stw             r0,8(r1);       /* Delete sensitive data        */ \
-       stw             r0,16(r1);      /* that we might have pushed    */ \
-       stw             r0,24(r1);      /* from other context that runs */ \
-       stw             r0,32(r1);      /* the same code. Assume that   */ \
-       stw             r0,40(r1);      /* the lower part of the GPRs   */ \
-       stw             r0,48(r1);      /* were already overwritten on  */ \
-       stw             r0,56(r1);      /* the way down to here         */ \
-       stw             r0,64(r1);                                         \
-       stw             r0,72(r1);                                         \
-       stw             r0,80(r1);                                         \
-       addi            r1,r1,128;      /* cleanup stack frame          */
-
-#ifdef __BIG_ENDIAN__
-#define LOAD_DATA(reg, off) \
-       lwz             reg,off(rWP);   /* load data                    */
-#define NEXT_BLOCK \
-       addi            rWP,rWP,64;     /* increment per block          */
-#else
-#define LOAD_DATA(reg, off) \
-       lwbrx           reg,0,rWP;      /* load data                    */ \
-       addi            rWP,rWP,4;      /* increment per word           */
-#define NEXT_BLOCK                     /* nothing to do                */
-#endif
-
-#define        R_00_15(a, b, c, d, e, w0, w1, k, off) \
-       LOAD_DATA(w0, off)              /* 1: W                         */ \
-       and             rT2,b,c;        /* 1: F' = B and C              */ \
-       LOAD_K##k##1                                                       \
-       andc            rT1,d,b;        /* 1: F" = ~B and D             */ \
-       rotrwi          rT0,a,27;       /* 1: A' = A rotl 5             */ \
-       or              rT2,rT2,rT1;    /* 1: F = F' or F"              */ \
-       add             e,e,rT0;        /* 1: E = E + A'                */ \
-       rotrwi          b,b,2;          /* 1: B = B rotl 30             */ \
-       add             e,e,w0;         /* 1: E = E + W                 */ \
-       LOAD_DATA(w1, off+4)            /* 2: W                         */ \
-       add             e,e,rT2;        /* 1: E = E + F                 */ \
-       and             rT1,a,b;        /* 2: F' = B and C              */ \
-       add             e,e,rK;         /* 1: E = E + K                 */ \
-       andc            rT2,c,a;        /* 2: F" = ~B and D             */ \
-       add             d,d,rK;         /* 2: E = E + K                 */ \
-       or              rT2,rT2,rT1;    /* 2: F = F' or F"              */ \
-       rotrwi          rT0,e,27;       /* 2: A' = A rotl 5             */ \
-       add             d,d,w1;         /* 2: E = E + W                 */ \
-       rotrwi          a,a,2;          /* 2: B = B rotl 30             */ \
-       add             d,d,rT0;        /* 2: E = E + A'                */ \
-       evmergelo       w1,w1,w0;       /*    mix W[0]/W[1]             */ \
-       add             d,d,rT2         /* 2: E = E + F                 */
-
-#define R_16_19(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
-       and             rT2,b,c;        /* 1: F' = B and C              */ \
-       evmergelohi     rT0,w7,w6;      /*    W[-3]                     */ \
-       andc            rT1,d,b;        /* 1: F" = ~B and D             */ \
-       evxor           w0,w0,rT0;      /*    W = W[-16] xor W[-3]      */ \
-       or              rT1,rT1,rT2;    /* 1: F = F' or F"              */ \
-       evxor           w0,w0,w4;       /*    W = W xor W[-8]           */ \
-       add             e,e,rT1;        /* 1: E = E + F                 */ \
-       evxor           w0,w0,w1;       /*    W = W xor W[-14]          */ \
-       rotrwi          rT2,a,27;       /* 1: A' = A rotl 5             */ \
-       evrlwi          w0,w0,1;        /*    W = W rotl 1              */ \
-       add             e,e,rT2;        /* 1: E = E + A'                */ \
-       evaddw          rT0,w0,rK;      /*    WK = W + K                */ \
-       rotrwi          b,b,2;          /* 1: B = B rotl 30             */ \
-       LOAD_K##k##1                                                       \
-       evmergehi       rT1,rT1,rT0;    /*    WK1/WK2                   */ \
-       add             e,e,rT0;        /* 1: E = E + WK                */ \
-       add             d,d,rT1;        /* 2: E = E + WK                */ \
-       and             rT2,a,b;        /* 2: F' = B and C              */ \
-       andc            rT1,c,a;        /* 2: F" = ~B and D             */ \
-       rotrwi          rT0,e,27;       /* 2: A' = A rotl 5             */ \
-       or              rT1,rT1,rT2;    /* 2: F = F' or F"              */ \
-       add             d,d,rT0;        /* 2: E = E + A'                */ \
-       rotrwi          a,a,2;          /* 2: B = B rotl 30             */ \
-       add             d,d,rT1         /* 2: E = E + F                 */
-
-#define R_20_39(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
-       evmergelohi     rT0,w7,w6;      /*    W[-3]                     */ \
-       xor             rT2,b,c;        /* 1: F' = B xor C              */ \
-       evxor           w0,w0,rT0;      /*    W = W[-16] xor W[-3]      */ \
-       xor             rT2,rT2,d;      /* 1: F = F' xor D              */ \
-       evxor           w0,w0,w4;       /*    W = W xor W[-8]           */ \
-       add             e,e,rT2;        /* 1: E = E + F                 */ \
-       evxor           w0,w0,w1;       /*    W = W xor W[-14]          */ \
-       rotrwi          rT2,a,27;       /* 1: A' = A rotl 5             */ \
-       evrlwi          w0,w0,1;        /*    W = W rotl 1              */ \
-       add             e,e,rT2;        /* 1: E = E + A'                */ \
-       evaddw          rT0,w0,rK;      /*    WK = W + K                */ \
-       rotrwi          b,b,2;          /* 1: B = B rotl 30             */ \
-       LOAD_K##k##1                                                       \
-       evmergehi       rT1,rT1,rT0;    /*    WK1/WK2                   */ \
-       add             e,e,rT0;        /* 1: E = E + WK                */ \
-       xor             rT2,a,b;        /* 2: F' = B xor C              */ \
-       add             d,d,rT1;        /* 2: E = E + WK                */ \
-       xor             rT2,rT2,c;      /* 2: F = F' xor D              */ \
-       rotrwi          rT0,e,27;       /* 2: A' = A rotl 5             */ \
-       add             d,d,rT2;        /* 2: E = E + F                 */ \
-       rotrwi          a,a,2;          /* 2: B = B rotl 30             */ \
-       add             d,d,rT0         /* 2: E = E + A'                */
-
-#define R_40_59(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
-       and             rT2,b,c;        /* 1: F' = B and C              */ \
-       evmergelohi     rT0,w7,w6;      /*    W[-3]                     */ \
-       or              rT1,b,c;        /* 1: F" = B or C               */ \
-       evxor           w0,w0,rT0;      /*    W = W[-16] xor W[-3]      */ \
-       and             rT1,d,rT1;      /* 1: F" = F" and D             */ \
-       evxor           w0,w0,w4;       /*    W = W xor W[-8]           */ \
-       or              rT2,rT2,rT1;    /* 1: F = F' or F"              */ \
-       evxor           w0,w0,w1;       /*    W = W xor W[-14]          */ \
-       add             e,e,rT2;        /* 1: E = E + F                 */ \
-       evrlwi          w0,w0,1;        /*    W = W rotl 1              */ \
-       rotrwi          rT2,a,27;       /* 1: A' = A rotl 5             */ \
-       evaddw          rT0,w0,rK;      /*    WK = W + K                */ \
-       add             e,e,rT2;        /* 1: E = E + A'                */ \
-       LOAD_K##k##1                                                       \
-       evmergehi       rT1,rT1,rT0;    /*    WK1/WK2                   */ \
-       rotrwi          b,b,2;          /* 1: B = B rotl 30             */ \
-       add             e,e,rT0;        /* 1: E = E + WK                */ \
-       and             rT2,a,b;        /* 2: F' = B and C              */ \
-       or              rT0,a,b;        /* 2: F" = B or C               */ \
-       add             d,d,rT1;        /* 2: E = E + WK                */ \
-       and             rT0,c,rT0;      /* 2: F" = F" and D             */ \
-       rotrwi          a,a,2;          /* 2: B = B rotl 30             */ \
-       or              rT2,rT2,rT0;    /* 2: F = F' or F"              */ \
-       rotrwi          rT0,e,27;       /* 2: A' = A rotl 5             */ \
-       add             d,d,rT2;        /* 2: E = E + F                 */ \
-       add             d,d,rT0         /* 2: E = E + A'                */
-
-#define R_60_79(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
-       R_20_39(a, b, c, d, e, w0, w1, w4, w6, w7, k)
-
-_GLOBAL(ppc_spe_sha1_transform)
-       INITIALIZE
-
-       lwz             rH0,0(rHP)
-       lwz             rH1,4(rHP)
-       mtctr           r5
-       lwz             rH2,8(rHP)
-       lis             rKP,PPC_SPE_SHA1_K@h
-       lwz             rH3,12(rHP)
-       ori             rKP,rKP,PPC_SPE_SHA1_K@l
-       lwz             rH4,16(rHP)
-
-ppc_spe_sha1_main:
-       R_00_15(rH0, rH1, rH2, rH3, rH4, rW1, rW0, 1, 0)
-       R_00_15(rH3, rH4, rH0, rH1, rH2, rW2, rW1, 0, 8)
-       R_00_15(rH1, rH2, rH3, rH4, rH0, rW3, rW2, 0, 16)
-       R_00_15(rH4, rH0, rH1, rH2, rH3, rW4, rW3, 0, 24)
-       R_00_15(rH2, rH3, rH4, rH0, rH1, rW5, rW4, 0, 32)
-       R_00_15(rH0, rH1, rH2, rH3, rH4, rW6, rW5, 0, 40)
-       R_00_15(rH3, rH4, rH0, rH1, rH2, rT3, rW6, 0, 48)
-       R_00_15(rH1, rH2, rH3, rH4, rH0, rT3, rW7, 0, 56)
-
-       R_16_19(rH4, rH0, rH1, rH2, rH3, rW0, rW1, rW4, rW6, rW7, 0)
-       R_16_19(rH2, rH3, rH4, rH0, rH1, rW1, rW2, rW5, rW7, rW0, 2)
-
-       R_20_39(rH0, rH1, rH2, rH3, rH4, rW2, rW3, rW6, rW0, rW1, 0)
-       R_20_39(rH3, rH4, rH0, rH1, rH2, rW3, rW4, rW7, rW1, rW2, 0)
-       R_20_39(rH1, rH2, rH3, rH4, rH0, rW4, rW5, rW0, rW2, rW3, 0)
-       R_20_39(rH4, rH0, rH1, rH2, rH3, rW5, rW6, rW1, rW3, rW4, 0)
-       R_20_39(rH2, rH3, rH4, rH0, rH1, rW6, rW7, rW2, rW4, rW5, 0)
-       R_20_39(rH0, rH1, rH2, rH3, rH4, rW7, rW0, rW3, rW5, rW6, 0)
-       R_20_39(rH3, rH4, rH0, rH1, rH2, rW0, rW1, rW4, rW6, rW7, 0)
-       R_20_39(rH1, rH2, rH3, rH4, rH0, rW1, rW2, rW5, rW7, rW0, 0)
-       R_20_39(rH4, rH0, rH1, rH2, rH3, rW2, rW3, rW6, rW0, rW1, 0)
-       R_20_39(rH2, rH3, rH4, rH0, rH1, rW3, rW4, rW7, rW1, rW2, 3)
-
-       R_40_59(rH0, rH1, rH2, rH3, rH4, rW4, rW5, rW0, rW2, rW3, 0)
-       R_40_59(rH3, rH4, rH0, rH1, rH2, rW5, rW6, rW1, rW3, rW4, 0)
-       R_40_59(rH1, rH2, rH3, rH4, rH0, rW6, rW7, rW2, rW4, rW5, 0)
-       R_40_59(rH4, rH0, rH1, rH2, rH3, rW7, rW0, rW3, rW5, rW6, 0)
-       R_40_59(rH2, rH3, rH4, rH0, rH1, rW0, rW1, rW4, rW6, rW7, 0)
-       R_40_59(rH0, rH1, rH2, rH3, rH4, rW1, rW2, rW5, rW7, rW0, 0)
-       R_40_59(rH3, rH4, rH0, rH1, rH2, rW2, rW3, rW6, rW0, rW1, 0)
-       R_40_59(rH1, rH2, rH3, rH4, rH0, rW3, rW4, rW7, rW1, rW2, 0)
-       R_40_59(rH4, rH0, rH1, rH2, rH3, rW4, rW5, rW0, rW2, rW3, 0)
-       R_40_59(rH2, rH3, rH4, rH0, rH1, rW5, rW6, rW1, rW3, rW4, 4)
-
-       R_60_79(rH0, rH1, rH2, rH3, rH4, rW6, rW7, rW2, rW4, rW5, 0)
-       R_60_79(rH3, rH4, rH0, rH1, rH2, rW7, rW0, rW3, rW5, rW6, 0)
-       R_60_79(rH1, rH2, rH3, rH4, rH0, rW0, rW1, rW4, rW6, rW7, 0)
-       R_60_79(rH4, rH0, rH1, rH2, rH3, rW1, rW2, rW5, rW7, rW0, 0)
-       R_60_79(rH2, rH3, rH4, rH0, rH1, rW2, rW3, rW6, rW0, rW1, 0)
-       R_60_79(rH0, rH1, rH2, rH3, rH4, rW3, rW4, rW7, rW1, rW2, 0)
-       R_60_79(rH3, rH4, rH0, rH1, rH2, rW4, rW5, rW0, rW2, rW3, 0)
-       lwz             rT3,0(rHP)
-       R_60_79(rH1, rH2, rH3, rH4, rH0, rW5, rW6, rW1, rW3, rW4, 0)
-       lwz             rW1,4(rHP)
-       R_60_79(rH4, rH0, rH1, rH2, rH3, rW6, rW7, rW2, rW4, rW5, 0)
-       lwz             rW2,8(rHP)
-       R_60_79(rH2, rH3, rH4, rH0, rH1, rW7, rW0, rW3, rW5, rW6, 0)
-       lwz             rW3,12(rHP)
-       NEXT_BLOCK
-       lwz             rW4,16(rHP)
-
-       add             rH0,rH0,rT3
-       stw             rH0,0(rHP)
-       add             rH1,rH1,rW1
-       stw             rH1,4(rHP)
-       add             rH2,rH2,rW2
-       stw             rH2,8(rHP)
-       add             rH3,rH3,rW3
-       stw             rH3,12(rHP)
-       add             rH4,rH4,rW4
-       stw             rH4,16(rHP)
-
-       bdnz            ppc_spe_sha1_main
-
-       FINALIZE
-       blr
-
-.data
-.align 4
-PPC_SPE_SHA1_K:
-       .long 0x5A827999,0x6ED9EBA1,0x8F1BBCDC,0xCA62C1D6
diff --git a/arch/powerpc/sha1_spe_glue.c b/arch/powerpc/sha1_spe_glue.c
deleted file mode 100644 (file)
index 3e1d222..0000000
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * Glue code for SHA-1 implementation for SPE instructions (PPC)
- *
- * Based on generic implementation.
- *
- * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/cryptohash.h>
-#include <linux/types.h>
-#include <crypto/sha.h>
-#include <asm/byteorder.h>
-#include <asm/switch_to.h>
-#include <linux/hardirq.h>
-
-/*
- * MAX_BYTES defines the number of bytes that are allowed to be processed
- * between preempt_disable() and preempt_enable(). SHA1 takes ~1000
- * operations per 64 bytes. e500 cores can issue two arithmetic instructions
- * per clock cycle using one 32/64 bit unit (SU1) and one 32 bit unit (SU2).
- * Thus 2KB of input data will need an estimated maximum of 18,000 cycles.
- * Headroom for cache misses included. Even with the low end model clocked
- * at 667 MHz this equals to a critical time window of less than 27us.
- *
- */
-#define MAX_BYTES 2048
-
-extern void ppc_spe_sha1_transform(u32 *state, const u8 *src, u32 blocks);
-
-static void spe_begin(void)
-{
-       /* We just start SPE operations and will save SPE registers later. */
-       preempt_disable();
-       enable_kernel_spe();
-}
-
-static void spe_end(void)
-{
-       /* reenable preemption */
-       preempt_enable();
-}
-
-static inline void ppc_sha1_clear_context(struct sha1_state *sctx)
-{
-       int count = sizeof(struct sha1_state) >> 2;
-       u32 *ptr = (u32 *)sctx;
-
-       /* make sure we can clear the fast way */
-       BUILD_BUG_ON(sizeof(struct sha1_state) % 4);
-       do { *ptr++ = 0; } while (--count);
-}
-
-static int ppc_spe_sha1_init(struct shash_desc *desc)
-{
-       struct sha1_state *sctx = shash_desc_ctx(desc);
-
-       sctx->state[0] = SHA1_H0;
-       sctx->state[1] = SHA1_H1;
-       sctx->state[2] = SHA1_H2;
-       sctx->state[3] = SHA1_H3;
-       sctx->state[4] = SHA1_H4;
-       sctx->count = 0;
-
-       return 0;
-}
-
-static int ppc_spe_sha1_update(struct shash_desc *desc, const u8 *data,
-                       unsigned int len)
-{
-       struct sha1_state *sctx = shash_desc_ctx(desc);
-       const unsigned int offset = sctx->count & 0x3f;
-       const unsigned int avail = 64 - offset;
-       unsigned int bytes;
-       const u8 *src = data;
-
-       if (avail > len) {
-               sctx->count += len;
-               memcpy((char *)sctx->buffer + offset, src, len);
-               return 0;
-       }
-
-       sctx->count += len;
-
-       if (offset) {
-               memcpy((char *)sctx->buffer + offset, src, avail);
-
-               spe_begin();
-               ppc_spe_sha1_transform(sctx->state, (const u8 *)sctx->buffer, 1);
-               spe_end();
-
-               len -= avail;
-               src += avail;
-       }
-
-       while (len > 63) {
-               bytes = (len > MAX_BYTES) ? MAX_BYTES : len;
-               bytes = bytes & ~0x3f;
-
-               spe_begin();
-               ppc_spe_sha1_transform(sctx->state, src, bytes >> 6);
-               spe_end();
-
-               src += bytes;
-               len -= bytes;
-       };
-
-       memcpy((char *)sctx->buffer, src, len);
-       return 0;
-}
-
-static int ppc_spe_sha1_final(struct shash_desc *desc, u8 *out)
-{
-       struct sha1_state *sctx = shash_desc_ctx(desc);
-       const unsigned int offset = sctx->count & 0x3f;
-       char *p = (char *)sctx->buffer + offset;
-       int padlen;
-       __be64 *pbits = (__be64 *)(((char *)&sctx->buffer) + 56);
-       __be32 *dst = (__be32 *)out;
-
-       padlen = 55 - offset;
-       *p++ = 0x80;
-
-       spe_begin();
-
-       if (padlen < 0) {
-               memset(p, 0x00, padlen + sizeof (u64));
-               ppc_spe_sha1_transform(sctx->state, sctx->buffer, 1);
-               p = (char *)sctx->buffer;
-               padlen = 56;
-       }
-
-       memset(p, 0, padlen);
-       *pbits = cpu_to_be64(sctx->count << 3);
-       ppc_spe_sha1_transform(sctx->state, sctx->buffer, 1);
-
-       spe_end();
-
-       dst[0] = cpu_to_be32(sctx->state[0]);
-       dst[1] = cpu_to_be32(sctx->state[1]);
-       dst[2] = cpu_to_be32(sctx->state[2]);
-       dst[3] = cpu_to_be32(sctx->state[3]);
-       dst[4] = cpu_to_be32(sctx->state[4]);
-
-       ppc_sha1_clear_context(sctx);
-       return 0;
-}
-
-static int ppc_spe_sha1_export(struct shash_desc *desc, void *out)
-{
-       struct sha1_state *sctx = shash_desc_ctx(desc);
-
-       memcpy(out, sctx, sizeof(*sctx));
-       return 0;
-}
-
-static int ppc_spe_sha1_import(struct shash_desc *desc, const void *in)
-{
-       struct sha1_state *sctx = shash_desc_ctx(desc);
-
-       memcpy(sctx, in, sizeof(*sctx));
-       return 0;
-}
-
-static struct shash_alg alg = {
-       .digestsize     =       SHA1_DIGEST_SIZE,
-       .init           =       ppc_spe_sha1_init,
-       .update         =       ppc_spe_sha1_update,
-       .final          =       ppc_spe_sha1_final,
-       .export         =       ppc_spe_sha1_export,
-       .import         =       ppc_spe_sha1_import,
-       .descsize       =       sizeof(struct sha1_state),
-       .statesize      =       sizeof(struct sha1_state),
-       .base           =       {
-               .cra_name       =       "sha1",
-               .cra_driver_name=       "sha1-ppc-spe",
-               .cra_priority   =       300,
-               .cra_flags      =       CRYPTO_ALG_TYPE_SHASH,
-               .cra_blocksize  =       SHA1_BLOCK_SIZE,
-               .cra_module     =       THIS_MODULE,
-       }
-};
-
-static int __init ppc_spe_sha1_mod_init(void)
-{
-       return crypto_register_shash(&alg);
-}
-
-static void __exit ppc_spe_sha1_mod_fini(void)
-{
-       crypto_unregister_shash(&alg);
-}
-
-module_init(ppc_spe_sha1_mod_init);
-module_exit(ppc_spe_sha1_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, SPE optimized");
-
-MODULE_ALIAS_CRYPTO("sha1");
-MODULE_ALIAS_CRYPTO("sha1-ppc-spe");