config CRYPTO_AES_PPC_SPE
tristate "Ciphers: AES, modes: ECB/CBC/CTR/XTS (SPE)"
depends on SPE
+ select CRYPTO_LIB_AES
select CRYPTO_SKCIPHER
help
- Block ciphers: AES cipher algorithms (FIPS-197)
Length-preserving ciphers: AES with ECB, CBC, CTR, and XTS modes
Architecture: powerpc using:
obj-$(CONFIG_CRYPTO_AES_GCM_P10) += aes-gcm-p10-crypto.o
obj-$(CONFIG_CRYPTO_DEV_VMX_ENCRYPT) += vmx-crypto.o
-aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o
+aes-ppc-spe-y := aes-spe-glue.o
aes-gcm-p10-crypto-y := aes-gcm-p10-glue.o aes-gcm-p10.o ghashp10-ppc.o aesp10-ppc.o
vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o aes_xts.o ghash.o
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Fast AES implementation for SPE instruction set (PPC)
- *
- * This code makes use of the SPE SIMD instruction set as defined in
- * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
- * Implementation is based on optimization guide notes from
- * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
- *
- * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
- */
-
-#include <asm/ppc_asm.h>
-#include "aes-spe-regs.h"
-
-#define EAD(in, bpos) \
- rlwimi rT0,in,28-((bpos+3)%4)*8,20,27;
-
-#define DAD(in, bpos) \
- rlwimi rT1,in,24-((bpos+3)%4)*8,24,31;
-
-#define LWH(out, off) \
- evlwwsplat out,off(rT0); /* load word high */
-
-#define LWL(out, off) \
- lwz out,off(rT0); /* load word low */
-
-#define LBZ(out, tab, off) \
- lbz out,off(tab); /* load byte */
-
-#define LAH(out, in, bpos, off) \
- EAD(in, bpos) /* calc addr + load word high */ \
- LWH(out, off)
-
-#define LAL(out, in, bpos, off) \
- EAD(in, bpos) /* calc addr + load word low */ \
- LWL(out, off)
-
-#define LAE(out, in, bpos) \
- EAD(in, bpos) /* calc addr + load enc byte */ \
- LBZ(out, rT0, 8)
-
-#define LBE(out) \
- LBZ(out, rT0, 8) /* load enc byte */
-
-#define LAD(out, in, bpos) \
- DAD(in, bpos) /* calc addr + load dec byte */ \
- LBZ(out, rT1, 0)
-
-#define LBD(out) \
- LBZ(out, rT1, 0)
-
-/*
- * ppc_encrypt_block: The central encryption function for a single 16 bytes
- * block. It does no stack handling or register saving to support fast calls
- * via bl/blr. It expects that caller has pre-xored input data with first
- * 4 words of encryption key into rD0-rD3. Pointer/counter registers must
- * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
- * and rW0-rW3 and caller must execute a final xor on the output registers.
- * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
- *
- */
-_GLOBAL(ppc_encrypt_block)
- LAH(rW4, rD1, 2, 4)
- LAH(rW6, rD0, 3, 0)
- LAH(rW3, rD0, 1, 8)
-ppc_encrypt_block_loop:
- LAH(rW0, rD3, 0, 12)
- LAL(rW0, rD0, 0, 12)
- LAH(rW1, rD1, 0, 12)
- LAH(rW2, rD2, 1, 8)
- LAL(rW2, rD3, 1, 8)
- LAL(rW3, rD1, 1, 8)
- LAL(rW4, rD2, 2, 4)
- LAL(rW6, rD1, 3, 0)
- LAH(rW5, rD3, 2, 4)
- LAL(rW5, rD0, 2, 4)
- LAH(rW7, rD2, 3, 0)
- evldw rD1,16(rKP)
- EAD(rD3, 3)
- evxor rW2,rW2,rW4
- LWL(rW7, 0)
- evxor rW2,rW2,rW6
- EAD(rD2, 0)
- evxor rD1,rD1,rW2
- LWL(rW1, 12)
- evxor rD1,rD1,rW0
- evldw rD3,24(rKP)
- evmergehi rD0,rD0,rD1
- EAD(rD1, 2)
- evxor rW3,rW3,rW5
- LWH(rW4, 4)
- evxor rW3,rW3,rW7
- EAD(rD0, 3)
- evxor rD3,rD3,rW3
- LWH(rW6, 0)
- evxor rD3,rD3,rW1
- EAD(rD0, 1)
- evmergehi rD2,rD2,rD3
- LWH(rW3, 8)
- LAH(rW0, rD3, 0, 12)
- LAL(rW0, rD0, 0, 12)
- LAH(rW1, rD1, 0, 12)
- LAH(rW2, rD2, 1, 8)
- LAL(rW2, rD3, 1, 8)
- LAL(rW3, rD1, 1, 8)
- LAL(rW4, rD2, 2, 4)
- LAL(rW6, rD1, 3, 0)
- LAH(rW5, rD3, 2, 4)
- LAL(rW5, rD0, 2, 4)
- LAH(rW7, rD2, 3, 0)
- evldw rD1,32(rKP)
- EAD(rD3, 3)
- evxor rW2,rW2,rW4
- LWL(rW7, 0)
- evxor rW2,rW2,rW6
- EAD(rD2, 0)
- evxor rD1,rD1,rW2
- LWL(rW1, 12)
- evxor rD1,rD1,rW0
- evldw rD3,40(rKP)
- evmergehi rD0,rD0,rD1
- EAD(rD1, 2)
- evxor rW3,rW3,rW5
- LWH(rW4, 4)
- evxor rW3,rW3,rW7
- EAD(rD0, 3)
- evxor rD3,rD3,rW3
- LWH(rW6, 0)
- evxor rD3,rD3,rW1
- EAD(rD0, 1)
- evmergehi rD2,rD2,rD3
- LWH(rW3, 8)
- addi rKP,rKP,32
- bdnz ppc_encrypt_block_loop
- LAH(rW0, rD3, 0, 12)
- LAL(rW0, rD0, 0, 12)
- LAH(rW1, rD1, 0, 12)
- LAH(rW2, rD2, 1, 8)
- LAL(rW2, rD3, 1, 8)
- LAL(rW3, rD1, 1, 8)
- LAL(rW4, rD2, 2, 4)
- LAH(rW5, rD3, 2, 4)
- LAL(rW6, rD1, 3, 0)
- LAL(rW5, rD0, 2, 4)
- LAH(rW7, rD2, 3, 0)
- evldw rD1,16(rKP)
- EAD(rD3, 3)
- evxor rW2,rW2,rW4
- LWL(rW7, 0)
- evxor rW2,rW2,rW6
- EAD(rD2, 0)
- evxor rD1,rD1,rW2
- LWL(rW1, 12)
- evxor rD1,rD1,rW0
- evldw rD3,24(rKP)
- evmergehi rD0,rD0,rD1
- EAD(rD1, 0)
- evxor rW3,rW3,rW5
- LBE(rW2)
- evxor rW3,rW3,rW7
- EAD(rD0, 1)
- evxor rD3,rD3,rW3
- LBE(rW6)
- evxor rD3,rD3,rW1
- EAD(rD0, 0)
- evmergehi rD2,rD2,rD3
- LBE(rW1)
- LAE(rW0, rD3, 0)
- LAE(rW1, rD0, 0)
- LAE(rW4, rD2, 1)
- LAE(rW5, rD3, 1)
- LAE(rW3, rD2, 0)
- LAE(rW7, rD1, 1)
- rlwimi rW0,rW4,8,16,23
- rlwimi rW1,rW5,8,16,23
- LAE(rW4, rD1, 2)
- LAE(rW5, rD2, 2)
- rlwimi rW2,rW6,8,16,23
- rlwimi rW3,rW7,8,16,23
- LAE(rW6, rD3, 2)
- LAE(rW7, rD0, 2)
- rlwimi rW0,rW4,16,8,15
- rlwimi rW1,rW5,16,8,15
- LAE(rW4, rD0, 3)
- LAE(rW5, rD1, 3)
- rlwimi rW2,rW6,16,8,15
- lwz rD0,32(rKP)
- rlwimi rW3,rW7,16,8,15
- lwz rD1,36(rKP)
- LAE(rW6, rD2, 3)
- LAE(rW7, rD3, 3)
- rlwimi rW0,rW4,24,0,7
- lwz rD2,40(rKP)
- rlwimi rW1,rW5,24,0,7
- lwz rD3,44(rKP)
- rlwimi rW2,rW6,24,0,7
- rlwimi rW3,rW7,24,0,7
- blr
-
-/*
- * ppc_decrypt_block: The central decryption function for a single 16 bytes
- * block. It does no stack handling or register saving to support fast calls
- * via bl/blr. It expects that caller has pre-xored input data with first
- * 4 words of encryption key into rD0-rD3. Pointer/counter registers must
- * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
- * and rW0-rW3 and caller must execute a final xor on the output registers.
- * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
- *
- */
-_GLOBAL(ppc_decrypt_block)
- LAH(rW0, rD1, 0, 12)
- LAH(rW6, rD0, 3, 0)
- LAH(rW3, rD0, 1, 8)
-ppc_decrypt_block_loop:
- LAH(rW1, rD3, 0, 12)
- LAL(rW0, rD2, 0, 12)
- LAH(rW2, rD2, 1, 8)
- LAL(rW2, rD3, 1, 8)
- LAH(rW4, rD3, 2, 4)
- LAL(rW4, rD0, 2, 4)
- LAL(rW6, rD1, 3, 0)
- LAH(rW5, rD1, 2, 4)
- LAH(rW7, rD2, 3, 0)
- LAL(rW7, rD3, 3, 0)
- LAL(rW3, rD1, 1, 8)
- evldw rD1,16(rKP)
- EAD(rD0, 0)
- evxor rW4,rW4,rW6
- LWL(rW1, 12)
- evxor rW0,rW0,rW4
- EAD(rD2, 2)
- evxor rW0,rW0,rW2
- LWL(rW5, 4)
- evxor rD1,rD1,rW0
- evldw rD3,24(rKP)
- evmergehi rD0,rD0,rD1
- EAD(rD1, 0)
- evxor rW3,rW3,rW7
- LWH(rW0, 12)
- evxor rW3,rW3,rW1
- EAD(rD0, 3)
- evxor rD3,rD3,rW3
- LWH(rW6, 0)
- evxor rD3,rD3,rW5
- EAD(rD0, 1)
- evmergehi rD2,rD2,rD3
- LWH(rW3, 8)
- LAH(rW1, rD3, 0, 12)
- LAL(rW0, rD2, 0, 12)
- LAH(rW2, rD2, 1, 8)
- LAL(rW2, rD3, 1, 8)
- LAH(rW4, rD3, 2, 4)
- LAL(rW4, rD0, 2, 4)
- LAL(rW6, rD1, 3, 0)
- LAH(rW5, rD1, 2, 4)
- LAH(rW7, rD2, 3, 0)
- LAL(rW7, rD3, 3, 0)
- LAL(rW3, rD1, 1, 8)
- evldw rD1,32(rKP)
- EAD(rD0, 0)
- evxor rW4,rW4,rW6
- LWL(rW1, 12)
- evxor rW0,rW0,rW4
- EAD(rD2, 2)
- evxor rW0,rW0,rW2
- LWL(rW5, 4)
- evxor rD1,rD1,rW0
- evldw rD3,40(rKP)
- evmergehi rD0,rD0,rD1
- EAD(rD1, 0)
- evxor rW3,rW3,rW7
- LWH(rW0, 12)
- evxor rW3,rW3,rW1
- EAD(rD0, 3)
- evxor rD3,rD3,rW3
- LWH(rW6, 0)
- evxor rD3,rD3,rW5
- EAD(rD0, 1)
- evmergehi rD2,rD2,rD3
- LWH(rW3, 8)
- addi rKP,rKP,32
- bdnz ppc_decrypt_block_loop
- LAH(rW1, rD3, 0, 12)
- LAL(rW0, rD2, 0, 12)
- LAH(rW2, rD2, 1, 8)
- LAL(rW2, rD3, 1, 8)
- LAH(rW4, rD3, 2, 4)
- LAL(rW4, rD0, 2, 4)
- LAL(rW6, rD1, 3, 0)
- LAH(rW5, rD1, 2, 4)
- LAH(rW7, rD2, 3, 0)
- LAL(rW7, rD3, 3, 0)
- LAL(rW3, rD1, 1, 8)
- evldw rD1,16(rKP)
- EAD(rD0, 0)
- evxor rW4,rW4,rW6
- LWL(rW1, 12)
- evxor rW0,rW0,rW4
- EAD(rD2, 2)
- evxor rW0,rW0,rW2
- LWL(rW5, 4)
- evxor rD1,rD1,rW0
- evldw rD3,24(rKP)
- evmergehi rD0,rD0,rD1
- DAD(rD1, 0)
- evxor rW3,rW3,rW7
- LBD(rW0)
- evxor rW3,rW3,rW1
- DAD(rD0, 1)
- evxor rD3,rD3,rW3
- LBD(rW6)
- evxor rD3,rD3,rW5
- DAD(rD0, 0)
- evmergehi rD2,rD2,rD3
- LBD(rW3)
- LAD(rW2, rD3, 0)
- LAD(rW1, rD2, 0)
- LAD(rW4, rD2, 1)
- LAD(rW5, rD3, 1)
- LAD(rW7, rD1, 1)
- rlwimi rW0,rW4,8,16,23
- rlwimi rW1,rW5,8,16,23
- LAD(rW4, rD3, 2)
- LAD(rW5, rD0, 2)
- rlwimi rW2,rW6,8,16,23
- rlwimi rW3,rW7,8,16,23
- LAD(rW6, rD1, 2)
- LAD(rW7, rD2, 2)
- rlwimi rW0,rW4,16,8,15
- rlwimi rW1,rW5,16,8,15
- LAD(rW4, rD0, 3)
- LAD(rW5, rD1, 3)
- rlwimi rW2,rW6,16,8,15
- lwz rD0,32(rKP)
- rlwimi rW3,rW7,16,8,15
- lwz rD1,36(rKP)
- LAD(rW6, rD2, 3)
- LAD(rW7, rD3, 3)
- rlwimi rW0,rW4,24,0,7
- lwz rD2,40(rKP)
- rlwimi rW1,rW5,24,0,7
- lwz rD3,44(rKP)
- rlwimi rW2,rW6,24,0,7
- rlwimi rW3,rW7,24,0,7
- blr
u32 rounds;
};
-extern void ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc, u32 rounds);
-extern void ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec, u32 rounds);
-extern void ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
- u32 bytes);
-extern void ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
- u32 bytes);
-extern void ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
- u32 bytes, u8 *iv);
-extern void ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
- u32 bytes, u8 *iv);
-extern void ppc_crypt_ctr (u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
- u32 bytes, u8 *iv);
-extern void ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
- u32 bytes, u8 *iv, u32 *key_twk);
-extern void ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
- u32 bytes, u8 *iv, u32 *key_twk);
-
-extern void ppc_expand_key_128(u32 *key_enc, const u8 *key);
-extern void ppc_expand_key_192(u32 *key_enc, const u8 *key);
-extern void ppc_expand_key_256(u32 *key_enc, const u8 *key);
-
-extern void ppc_generate_decrypt_key(u32 *key_dec,u32 *key_enc,
- unsigned int key_len);
-
static void spe_begin(void)
{
/* disable preemption and save users SPE registers if required */
preempt_enable();
}
-static int ppc_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
- unsigned int key_len)
+static int ppc_aes_setkey_skcipher(struct crypto_skcipher *tfm,
+ const u8 *in_key, unsigned int key_len)
{
- struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct ppc_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
switch (key_len) {
case AES_KEYSIZE_128:
return 0;
}
-static int ppc_aes_setkey_skcipher(struct crypto_skcipher *tfm,
- const u8 *in_key, unsigned int key_len)
-{
- return ppc_aes_setkey(crypto_skcipher_tfm(tfm), in_key, key_len);
-}
-
static int ppc_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
return 0;
}
-static void ppc_aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
-{
- struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-
- spe_begin();
- ppc_encrypt_aes(out, in, ctx->key_enc, ctx->rounds);
- spe_end();
-}
-
-static void ppc_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
-{
- struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-
- spe_begin();
- ppc_decrypt_aes(out, in, ctx->key_dec, ctx->rounds);
- spe_end();
-}
-
static int ppc_ecb_crypt(struct skcipher_request *req, bool enc)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
* with kmalloc() in the crypto infrastructure
*/
-static struct crypto_alg aes_cipher_alg = {
- .cra_name = "aes",
- .cra_driver_name = "aes-ppc-spe",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct ppc_aes_ctx),
- .cra_alignmask = 0,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .cipher = {
- .cia_min_keysize = AES_MIN_KEY_SIZE,
- .cia_max_keysize = AES_MAX_KEY_SIZE,
- .cia_setkey = ppc_aes_setkey,
- .cia_encrypt = ppc_aes_encrypt,
- .cia_decrypt = ppc_aes_decrypt
- }
- }
-};
-
static struct skcipher_alg aes_skcipher_algs[] = {
{
.base.cra_name = "ecb(aes)",
static int __init ppc_aes_mod_init(void)
{
- int err;
-
- err = crypto_register_alg(&aes_cipher_alg);
- if (err)
- return err;
-
- err = crypto_register_skciphers(aes_skcipher_algs,
- ARRAY_SIZE(aes_skcipher_algs));
- if (err)
- crypto_unregister_alg(&aes_cipher_alg);
- return err;
+ return crypto_register_skciphers(aes_skcipher_algs,
+ ARRAY_SIZE(aes_skcipher_algs));
}
static void __exit ppc_aes_mod_fini(void)
{
- crypto_unregister_alg(&aes_cipher_alg);
crypto_unregister_skciphers(aes_skcipher_algs,
ARRAY_SIZE(aes_skcipher_algs));
}
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Key handling functions for PPC AES implementation
- *
- * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
- */
-
-#include <asm/ppc_asm.h>
-
-#ifdef __BIG_ENDIAN__
-#define LOAD_KEY(d, s, off) \
- lwz d,off(s);
-#else
-#define LOAD_KEY(d, s, off) \
- li r0,off; \
- lwbrx d,s,r0;
-#endif
-
-#define INITIALIZE_KEY \
- stwu r1,-32(r1); /* create stack frame */ \
- stw r14,8(r1); /* save registers */ \
- stw r15,12(r1); \
- stw r16,16(r1);
-
-#define FINALIZE_KEY \
- lwz r14,8(r1); /* restore registers */ \
- lwz r15,12(r1); \
- lwz r16,16(r1); \
- xor r5,r5,r5; /* clear sensitive data */ \
- xor r6,r6,r6; \
- xor r7,r7,r7; \
- xor r8,r8,r8; \
- xor r9,r9,r9; \
- xor r10,r10,r10; \
- xor r11,r11,r11; \
- xor r12,r12,r12; \
- addi r1,r1,32; /* cleanup stack */
-
-#define LS_BOX(r, t1, t2) \
- lis t2,PPC_AES_4K_ENCTAB@h; \
- ori t2,t2,PPC_AES_4K_ENCTAB@l; \
- rlwimi t2,r,4,20,27; \
- lbz t1,8(t2); \
- rlwimi r,t1,0,24,31; \
- rlwimi t2,r,28,20,27; \
- lbz t1,8(t2); \
- rlwimi r,t1,8,16,23; \
- rlwimi t2,r,20,20,27; \
- lbz t1,8(t2); \
- rlwimi r,t1,16,8,15; \
- rlwimi t2,r,12,20,27; \
- lbz t1,8(t2); \
- rlwimi r,t1,24,0,7;
-
-#define GF8_MUL(out, in, t1, t2) \
- lis t1,0x8080; /* multiplication in GF8 */ \
- ori t1,t1,0x8080; \
- and t1,t1,in; \
- srwi t1,t1,7; \
- mulli t1,t1,0x1b; \
- lis t2,0x7f7f; \
- ori t2,t2,0x7f7f; \
- and t2,t2,in; \
- slwi t2,t2,1; \
- xor out,t1,t2;
-
-/*
- * ppc_expand_key_128(u32 *key_enc, const u8 *key)
- *
- * Expand 128 bit key into 176 bytes encryption key. It consists of
- * key itself plus 10 rounds with 16 bytes each
- *
- */
-_GLOBAL(ppc_expand_key_128)
- INITIALIZE_KEY
- LOAD_KEY(r5,r4,0)
- LOAD_KEY(r6,r4,4)
- LOAD_KEY(r7,r4,8)
- LOAD_KEY(r8,r4,12)
- stw r5,0(r3) /* key[0..3] = input data */
- stw r6,4(r3)
- stw r7,8(r3)
- stw r8,12(r3)
- li r16,10 /* 10 expansion rounds */
- lis r0,0x0100 /* RCO(1) */
-ppc_expand_128_loop:
- addi r3,r3,16
- mr r14,r8 /* apply LS_BOX to 4th temp */
- rotlwi r14,r14,8
- LS_BOX(r14, r15, r4)
- xor r14,r14,r0
- xor r5,r5,r14 /* xor next 4 keys */
- xor r6,r6,r5
- xor r7,r7,r6
- xor r8,r8,r7
- stw r5,0(r3) /* store next 4 keys */
- stw r6,4(r3)
- stw r7,8(r3)
- stw r8,12(r3)
- GF8_MUL(r0, r0, r4, r14) /* multiply RCO by 2 in GF */
- subi r16,r16,1
- cmpwi r16,0
- bt eq,ppc_expand_128_end
- b ppc_expand_128_loop
-ppc_expand_128_end:
- FINALIZE_KEY
- blr
-
-/*
- * ppc_expand_key_192(u32 *key_enc, const u8 *key)
- *
- * Expand 192 bit key into 208 bytes encryption key. It consists of key
- * itself plus 12 rounds with 16 bytes each
- *
- */
-_GLOBAL(ppc_expand_key_192)
- INITIALIZE_KEY
- LOAD_KEY(r5,r4,0)
- LOAD_KEY(r6,r4,4)
- LOAD_KEY(r7,r4,8)
- LOAD_KEY(r8,r4,12)
- LOAD_KEY(r9,r4,16)
- LOAD_KEY(r10,r4,20)
- stw r5,0(r3)
- stw r6,4(r3)
- stw r7,8(r3)
- stw r8,12(r3)
- stw r9,16(r3)
- stw r10,20(r3)
- li r16,8 /* 8 expansion rounds */
- lis r0,0x0100 /* RCO(1) */
-ppc_expand_192_loop:
- addi r3,r3,24
- mr r14,r10 /* apply LS_BOX to 6th temp */
- rotlwi r14,r14,8
- LS_BOX(r14, r15, r4)
- xor r14,r14,r0
- xor r5,r5,r14 /* xor next 6 keys */
- xor r6,r6,r5
- xor r7,r7,r6
- xor r8,r8,r7
- xor r9,r9,r8
- xor r10,r10,r9
- stw r5,0(r3)
- stw r6,4(r3)
- stw r7,8(r3)
- stw r8,12(r3)
- subi r16,r16,1
- cmpwi r16,0 /* last round early kick out */
- bt eq,ppc_expand_192_end
- stw r9,16(r3)
- stw r10,20(r3)
- GF8_MUL(r0, r0, r4, r14) /* multiply RCO GF8 */
- b ppc_expand_192_loop
-ppc_expand_192_end:
- FINALIZE_KEY
- blr
-
-/*
- * ppc_expand_key_256(u32 *key_enc, const u8 *key)
- *
- * Expand 256 bit key into 240 bytes encryption key. It consists of key
- * itself plus 14 rounds with 16 bytes each
- *
- */
-_GLOBAL(ppc_expand_key_256)
- INITIALIZE_KEY
- LOAD_KEY(r5,r4,0)
- LOAD_KEY(r6,r4,4)
- LOAD_KEY(r7,r4,8)
- LOAD_KEY(r8,r4,12)
- LOAD_KEY(r9,r4,16)
- LOAD_KEY(r10,r4,20)
- LOAD_KEY(r11,r4,24)
- LOAD_KEY(r12,r4,28)
- stw r5,0(r3)
- stw r6,4(r3)
- stw r7,8(r3)
- stw r8,12(r3)
- stw r9,16(r3)
- stw r10,20(r3)
- stw r11,24(r3)
- stw r12,28(r3)
- li r16,7 /* 7 expansion rounds */
- lis r0,0x0100 /* RCO(1) */
-ppc_expand_256_loop:
- addi r3,r3,32
- mr r14,r12 /* apply LS_BOX to 8th temp */
- rotlwi r14,r14,8
- LS_BOX(r14, r15, r4)
- xor r14,r14,r0
- xor r5,r5,r14 /* xor 4 keys */
- xor r6,r6,r5
- xor r7,r7,r6
- xor r8,r8,r7
- mr r14,r8
- LS_BOX(r14, r15, r4) /* apply LS_BOX to 4th temp */
- xor r9,r9,r14 /* xor 4 keys */
- xor r10,r10,r9
- xor r11,r11,r10
- xor r12,r12,r11
- stw r5,0(r3)
- stw r6,4(r3)
- stw r7,8(r3)
- stw r8,12(r3)
- subi r16,r16,1
- cmpwi r16,0 /* last round early kick out */
- bt eq,ppc_expand_256_end
- stw r9,16(r3)
- stw r10,20(r3)
- stw r11,24(r3)
- stw r12,28(r3)
- GF8_MUL(r0, r0, r4, r14)
- b ppc_expand_256_loop
-ppc_expand_256_end:
- FINALIZE_KEY
- blr
-
-/*
- * ppc_generate_decrypt_key: derive decryption key from encryption key
- * number of bytes to handle are calculated from length of key (16/24/32)
- *
- */
-_GLOBAL(ppc_generate_decrypt_key)
- addi r6,r5,24
- slwi r6,r6,2
- lwzx r7,r4,r6 /* first/last 4 words are same */
- stw r7,0(r3)
- lwz r7,0(r4)
- stwx r7,r3,r6
- addi r6,r6,4
- lwzx r7,r4,r6
- stw r7,4(r3)
- lwz r7,4(r4)
- stwx r7,r3,r6
- addi r6,r6,4
- lwzx r7,r4,r6
- stw r7,8(r3)
- lwz r7,8(r4)
- stwx r7,r3,r6
- addi r6,r6,4
- lwzx r7,r4,r6
- stw r7,12(r3)
- lwz r7,12(r4)
- stwx r7,r3,r6
- addi r3,r3,16
- add r4,r4,r6
- subi r4,r4,28
- addi r5,r5,20
- srwi r5,r5,2
-ppc_generate_decrypt_block:
- li r6,4
- mtctr r6
-ppc_generate_decrypt_word:
- lwz r6,0(r4)
- GF8_MUL(r7, r6, r0, r7)
- GF8_MUL(r8, r7, r0, r8)
- GF8_MUL(r9, r8, r0, r9)
- xor r10,r9,r6
- xor r11,r7,r8
- xor r11,r11,r9
- xor r12,r7,r10
- rotrwi r12,r12,24
- xor r11,r11,r12
- xor r12,r8,r10
- rotrwi r12,r12,16
- xor r11,r11,r12
- rotrwi r12,r10,8
- xor r11,r11,r12
- stw r11,0(r3)
- addi r3,r3,4
- addi r4,r4,4
- bdnz ppc_generate_decrypt_word
- subi r4,r4,32
- subi r5,r5,1
- cmpwi r5,0
- bt gt,ppc_generate_decrypt_block
- blr
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * AES modes (ECB/CBC/CTR/XTS) for PPC AES implementation
- *
- * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
- */
-
-#include <asm/ppc_asm.h>
-#include "aes-spe-regs.h"
-
-#ifdef __BIG_ENDIAN__ /* Macros for big endian builds */
-
-#define LOAD_DATA(reg, off) \
- lwz reg,off(rSP); /* load with offset */
-#define SAVE_DATA(reg, off) \
- stw reg,off(rDP); /* save with offset */
-#define NEXT_BLOCK \
- addi rSP,rSP,16; /* increment pointers per bloc */ \
- addi rDP,rDP,16;
-#define LOAD_IV(reg, off) \
- lwz reg,off(rIP); /* IV loading with offset */
-#define SAVE_IV(reg, off) \
- stw reg,off(rIP); /* IV saving with offset */
-#define START_IV /* nothing to reset */
-#define CBC_DEC 16 /* CBC decrement per block */
-#define CTR_DEC 1 /* CTR decrement one byte */
-
-#else /* Macros for little endian */
-
-#define LOAD_DATA(reg, off) \
- lwbrx reg,0,rSP; /* load reversed */ \
- addi rSP,rSP,4; /* and increment pointer */
-#define SAVE_DATA(reg, off) \
- stwbrx reg,0,rDP; /* save reversed */ \
- addi rDP,rDP,4; /* and increment pointer */
-#define NEXT_BLOCK /* nothing todo */
-#define LOAD_IV(reg, off) \
- lwbrx reg,0,rIP; /* load reversed */ \
- addi rIP,rIP,4; /* and increment pointer */
-#define SAVE_IV(reg, off) \
- stwbrx reg,0,rIP; /* load reversed */ \
- addi rIP,rIP,4; /* and increment pointer */
-#define START_IV \
- subi rIP,rIP,16; /* must reset pointer */
-#define CBC_DEC 32 /* 2 blocks because of incs */
-#define CTR_DEC 17 /* 1 block because of incs */
-
-#endif
-
-#define SAVE_0_REGS
-#define LOAD_0_REGS
-
-#define SAVE_4_REGS \
- stw rI0,96(r1); /* save 32 bit registers */ \
- stw rI1,100(r1); \
- stw rI2,104(r1); \
- stw rI3,108(r1);
-
-#define LOAD_4_REGS \
- lwz rI0,96(r1); /* restore 32 bit registers */ \
- lwz rI1,100(r1); \
- lwz rI2,104(r1); \
- lwz rI3,108(r1);
-
-#define SAVE_8_REGS \
- SAVE_4_REGS \
- stw rG0,112(r1); /* save 32 bit registers */ \
- stw rG1,116(r1); \
- stw rG2,120(r1); \
- stw rG3,124(r1);
-
-#define LOAD_8_REGS \
- LOAD_4_REGS \
- lwz rG0,112(r1); /* restore 32 bit registers */ \
- lwz rG1,116(r1); \
- lwz rG2,120(r1); \
- lwz rG3,124(r1);
-
-#define INITIALIZE_CRYPT(tab,nr32bitregs) \
- mflr r0; \
- stwu r1,-160(r1); /* create stack frame */ \
- lis rT0,tab@h; /* en-/decryption table pointer */ \
- stw r0,8(r1); /* save link register */ \
- ori rT0,rT0,tab@l; \
- evstdw r14,16(r1); \
- mr rKS,rKP; \
- evstdw r15,24(r1); /* We must save non volatile */ \
- evstdw r16,32(r1); /* registers. Take the chance */ \
- evstdw r17,40(r1); /* and save the SPE part too */ \
- evstdw r18,48(r1); \
- evstdw r19,56(r1); \
- evstdw r20,64(r1); \
- evstdw r21,72(r1); \
- evstdw r22,80(r1); \
- evstdw r23,88(r1); \
- SAVE_##nr32bitregs##_REGS
-
-#define FINALIZE_CRYPT(nr32bitregs) \
- lwz r0,8(r1); \
- evldw r14,16(r1); /* restore SPE registers */ \
- evldw r15,24(r1); \
- evldw r16,32(r1); \
- evldw r17,40(r1); \
- evldw r18,48(r1); \
- evldw r19,56(r1); \
- evldw r20,64(r1); \
- evldw r21,72(r1); \
- evldw r22,80(r1); \
- evldw r23,88(r1); \
- LOAD_##nr32bitregs##_REGS \
- mtlr r0; /* restore link register */ \
- xor r0,r0,r0; \
- stw r0,16(r1); /* delete sensitive data */ \
- stw r0,24(r1); /* that we might have pushed */ \
- stw r0,32(r1); /* from other context that runs */ \
- stw r0,40(r1); /* the same code */ \
- stw r0,48(r1); \
- stw r0,56(r1); \
- stw r0,64(r1); \
- stw r0,72(r1); \
- stw r0,80(r1); \
- stw r0,88(r1); \
- addi r1,r1,160; /* cleanup stack frame */
-
-#define ENDIAN_SWAP(t0, t1, s0, s1) \
- rotrwi t0,s0,8; /* swap endianness for 2 GPRs */ \
- rotrwi t1,s1,8; \
- rlwimi t0,s0,8,8,15; \
- rlwimi t1,s1,8,8,15; \
- rlwimi t0,s0,8,24,31; \
- rlwimi t1,s1,8,24,31;
-
-#define GF128_MUL(d0, d1, d2, d3, t0) \
- li t0,0x87; /* multiplication in GF128 */ \
- cmpwi d3,-1; \
- iselgt t0,0,t0; \
- rlwimi d3,d2,0,0,0; /* propagate "carry" bits */ \
- rotlwi d3,d3,1; \
- rlwimi d2,d1,0,0,0; \
- rotlwi d2,d2,1; \
- rlwimi d1,d0,0,0,0; \
- slwi d0,d0,1; /* shift left 128 bit */ \
- rotlwi d1,d1,1; \
- xor d0,d0,t0;
-
-#define START_KEY(d0, d1, d2, d3) \
- lwz rW0,0(rKP); \
- mtctr rRR; \
- lwz rW1,4(rKP); \
- lwz rW2,8(rKP); \
- lwz rW3,12(rKP); \
- xor rD0,d0,rW0; \
- xor rD1,d1,rW1; \
- xor rD2,d2,rW2; \
- xor rD3,d3,rW3;
-
-/*
- * ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc,
- * u32 rounds)
- *
- * called from glue layer to encrypt a single 16 byte block
- * round values are AES128 = 4, AES192 = 5, AES256 = 6
- *
- */
-_GLOBAL(ppc_encrypt_aes)
- INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
- LOAD_DATA(rD0, 0)
- LOAD_DATA(rD1, 4)
- LOAD_DATA(rD2, 8)
- LOAD_DATA(rD3, 12)
- START_KEY(rD0, rD1, rD2, rD3)
- bl ppc_encrypt_block
- xor rD0,rD0,rW0
- SAVE_DATA(rD0, 0)
- xor rD1,rD1,rW1
- SAVE_DATA(rD1, 4)
- xor rD2,rD2,rW2
- SAVE_DATA(rD2, 8)
- xor rD3,rD3,rW3
- SAVE_DATA(rD3, 12)
- FINALIZE_CRYPT(0)
- blr
-
-/*
- * ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec,
- * u32 rounds)
- *
- * called from glue layer to decrypt a single 16 byte block
- * round values are AES128 = 4, AES192 = 5, AES256 = 6
- *
- */
-_GLOBAL(ppc_decrypt_aes)
- INITIALIZE_CRYPT(PPC_AES_4K_DECTAB,0)
- LOAD_DATA(rD0, 0)
- addi rT1,rT0,4096
- LOAD_DATA(rD1, 4)
- LOAD_DATA(rD2, 8)
- LOAD_DATA(rD3, 12)
- START_KEY(rD0, rD1, rD2, rD3)
- bl ppc_decrypt_block
- xor rD0,rD0,rW0
- SAVE_DATA(rD0, 0)
- xor rD1,rD1,rW1
- SAVE_DATA(rD1, 4)
- xor rD2,rD2,rW2
- SAVE_DATA(rD2, 8)
- xor rD3,rD3,rW3
- SAVE_DATA(rD3, 12)
- FINALIZE_CRYPT(0)
- blr
-
-/*
- * ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc,
- * u32 rounds, u32 bytes);
- *
- * called from glue layer to encrypt multiple blocks via ECB
- * Bytes must be larger or equal 16 and only whole blocks are
- * processed. round values are AES128 = 4, AES192 = 5 and
- * AES256 = 6
- *
- */
-_GLOBAL(ppc_encrypt_ecb)
- INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
-ppc_encrypt_ecb_loop:
- LOAD_DATA(rD0, 0)
- mr rKP,rKS
- LOAD_DATA(rD1, 4)
- subi rLN,rLN,16
- LOAD_DATA(rD2, 8)
- cmpwi rLN,15
- LOAD_DATA(rD3, 12)
- START_KEY(rD0, rD1, rD2, rD3)
- bl ppc_encrypt_block
- xor rD0,rD0,rW0
- SAVE_DATA(rD0, 0)
- xor rD1,rD1,rW1
- SAVE_DATA(rD1, 4)
- xor rD2,rD2,rW2
- SAVE_DATA(rD2, 8)
- xor rD3,rD3,rW3
- SAVE_DATA(rD3, 12)
- NEXT_BLOCK
- bt gt,ppc_encrypt_ecb_loop
- FINALIZE_CRYPT(0)
- blr
-
-/*
- * ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec,
- * u32 rounds, u32 bytes);
- *
- * called from glue layer to decrypt multiple blocks via ECB
- * Bytes must be larger or equal 16 and only whole blocks are
- * processed. round values are AES128 = 4, AES192 = 5 and
- * AES256 = 6
- *
- */
-_GLOBAL(ppc_decrypt_ecb)
- INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 0)
- addi rT1,rT0,4096
-ppc_decrypt_ecb_loop:
- LOAD_DATA(rD0, 0)
- mr rKP,rKS
- LOAD_DATA(rD1, 4)
- subi rLN,rLN,16
- LOAD_DATA(rD2, 8)
- cmpwi rLN,15
- LOAD_DATA(rD3, 12)
- START_KEY(rD0, rD1, rD2, rD3)
- bl ppc_decrypt_block
- xor rD0,rD0,rW0
- SAVE_DATA(rD0, 0)
- xor rD1,rD1,rW1
- SAVE_DATA(rD1, 4)
- xor rD2,rD2,rW2
- SAVE_DATA(rD2, 8)
- xor rD3,rD3,rW3
- SAVE_DATA(rD3, 12)
- NEXT_BLOCK
- bt gt,ppc_decrypt_ecb_loop
- FINALIZE_CRYPT(0)
- blr
-
-/*
- * ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc,
- * 32 rounds, u32 bytes, u8 *iv);
- *
- * called from glue layer to encrypt multiple blocks via CBC
- * Bytes must be larger or equal 16 and only whole blocks are
- * processed. round values are AES128 = 4, AES192 = 5 and
- * AES256 = 6
- *
- */
-_GLOBAL(ppc_encrypt_cbc)
- INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
- LOAD_IV(rI0, 0)
- LOAD_IV(rI1, 4)
- LOAD_IV(rI2, 8)
- LOAD_IV(rI3, 12)
-ppc_encrypt_cbc_loop:
- LOAD_DATA(rD0, 0)
- mr rKP,rKS
- LOAD_DATA(rD1, 4)
- subi rLN,rLN,16
- LOAD_DATA(rD2, 8)
- cmpwi rLN,15
- LOAD_DATA(rD3, 12)
- xor rD0,rD0,rI0
- xor rD1,rD1,rI1
- xor rD2,rD2,rI2
- xor rD3,rD3,rI3
- START_KEY(rD0, rD1, rD2, rD3)
- bl ppc_encrypt_block
- xor rI0,rD0,rW0
- SAVE_DATA(rI0, 0)
- xor rI1,rD1,rW1
- SAVE_DATA(rI1, 4)
- xor rI2,rD2,rW2
- SAVE_DATA(rI2, 8)
- xor rI3,rD3,rW3
- SAVE_DATA(rI3, 12)
- NEXT_BLOCK
- bt gt,ppc_encrypt_cbc_loop
- START_IV
- SAVE_IV(rI0, 0)
- SAVE_IV(rI1, 4)
- SAVE_IV(rI2, 8)
- SAVE_IV(rI3, 12)
- FINALIZE_CRYPT(4)
- blr
-
-/*
- * ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec,
- * u32 rounds, u32 bytes, u8 *iv);
- *
- * called from glue layer to decrypt multiple blocks via CBC
- * round values are AES128 = 4, AES192 = 5, AES256 = 6
- *
- */
-_GLOBAL(ppc_decrypt_cbc)
- INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 4)
- li rT1,15
- LOAD_IV(rI0, 0)
- andc rLN,rLN,rT1
- LOAD_IV(rI1, 4)
- subi rLN,rLN,16
- LOAD_IV(rI2, 8)
- add rSP,rSP,rLN /* reverse processing */
- LOAD_IV(rI3, 12)
- add rDP,rDP,rLN
- LOAD_DATA(rD0, 0)
- addi rT1,rT0,4096
- LOAD_DATA(rD1, 4)
- LOAD_DATA(rD2, 8)
- LOAD_DATA(rD3, 12)
- START_IV
- SAVE_IV(rD0, 0)
- SAVE_IV(rD1, 4)
- SAVE_IV(rD2, 8)
- cmpwi rLN,16
- SAVE_IV(rD3, 12)
- bt lt,ppc_decrypt_cbc_end
-ppc_decrypt_cbc_loop:
- mr rKP,rKS
- START_KEY(rD0, rD1, rD2, rD3)
- bl ppc_decrypt_block
- subi rLN,rLN,16
- subi rSP,rSP,CBC_DEC
- xor rW0,rD0,rW0
- LOAD_DATA(rD0, 0)
- xor rW1,rD1,rW1
- LOAD_DATA(rD1, 4)
- xor rW2,rD2,rW2
- LOAD_DATA(rD2, 8)
- xor rW3,rD3,rW3
- LOAD_DATA(rD3, 12)
- xor rW0,rW0,rD0
- SAVE_DATA(rW0, 0)
- xor rW1,rW1,rD1
- SAVE_DATA(rW1, 4)
- xor rW2,rW2,rD2
- SAVE_DATA(rW2, 8)
- xor rW3,rW3,rD3
- SAVE_DATA(rW3, 12)
- cmpwi rLN,15
- subi rDP,rDP,CBC_DEC
- bt gt,ppc_decrypt_cbc_loop
-ppc_decrypt_cbc_end:
- mr rKP,rKS
- START_KEY(rD0, rD1, rD2, rD3)
- bl ppc_decrypt_block
- xor rW0,rW0,rD0
- xor rW1,rW1,rD1
- xor rW2,rW2,rD2
- xor rW3,rW3,rD3
- xor rW0,rW0,rI0 /* decrypt with initial IV */
- SAVE_DATA(rW0, 0)
- xor rW1,rW1,rI1
- SAVE_DATA(rW1, 4)
- xor rW2,rW2,rI2
- SAVE_DATA(rW2, 8)
- xor rW3,rW3,rI3
- SAVE_DATA(rW3, 12)
- FINALIZE_CRYPT(4)
- blr
-
-/*
- * ppc_crypt_ctr(u8 *out, const u8 *in, u32 *key_enc,
- * u32 rounds, u32 bytes, u8 *iv);
- *
- * called from glue layer to encrypt/decrypt multiple blocks
- * via CTR. Number of bytes does not need to be a multiple of
- * 16. Round values are AES128 = 4, AES192 = 5, AES256 = 6
- *
- */
-_GLOBAL(ppc_crypt_ctr)
- INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
- LOAD_IV(rI0, 0)
- LOAD_IV(rI1, 4)
- LOAD_IV(rI2, 8)
- cmpwi rLN,16
- LOAD_IV(rI3, 12)
- START_IV
- bt lt,ppc_crypt_ctr_partial
-ppc_crypt_ctr_loop:
- mr rKP,rKS
- START_KEY(rI0, rI1, rI2, rI3)
- bl ppc_encrypt_block
- xor rW0,rD0,rW0
- xor rW1,rD1,rW1
- xor rW2,rD2,rW2
- xor rW3,rD3,rW3
- LOAD_DATA(rD0, 0)
- subi rLN,rLN,16
- LOAD_DATA(rD1, 4)
- LOAD_DATA(rD2, 8)
- LOAD_DATA(rD3, 12)
- xor rD0,rD0,rW0
- SAVE_DATA(rD0, 0)
- xor rD1,rD1,rW1
- SAVE_DATA(rD1, 4)
- xor rD2,rD2,rW2
- SAVE_DATA(rD2, 8)
- xor rD3,rD3,rW3
- SAVE_DATA(rD3, 12)
- addic rI3,rI3,1 /* increase counter */
- addze rI2,rI2
- addze rI1,rI1
- addze rI0,rI0
- NEXT_BLOCK
- cmpwi rLN,15
- bt gt,ppc_crypt_ctr_loop
-ppc_crypt_ctr_partial:
- cmpwi rLN,0
- bt eq,ppc_crypt_ctr_end
- mr rKP,rKS
- START_KEY(rI0, rI1, rI2, rI3)
- bl ppc_encrypt_block
- xor rW0,rD0,rW0
- SAVE_IV(rW0, 0)
- xor rW1,rD1,rW1
- SAVE_IV(rW1, 4)
- xor rW2,rD2,rW2
- SAVE_IV(rW2, 8)
- xor rW3,rD3,rW3
- SAVE_IV(rW3, 12)
- mtctr rLN
- subi rIP,rIP,CTR_DEC
- subi rSP,rSP,1
- subi rDP,rDP,1
-ppc_crypt_ctr_xorbyte:
- lbzu rW4,1(rIP) /* bytewise xor for partial block */
- lbzu rW5,1(rSP)
- xor rW4,rW4,rW5
- stbu rW4,1(rDP)
- bdnz ppc_crypt_ctr_xorbyte
- subf rIP,rLN,rIP
- addi rIP,rIP,1
- addic rI3,rI3,1
- addze rI2,rI2
- addze rI1,rI1
- addze rI0,rI0
-ppc_crypt_ctr_end:
- SAVE_IV(rI0, 0)
- SAVE_IV(rI1, 4)
- SAVE_IV(rI2, 8)
- SAVE_IV(rI3, 12)
- FINALIZE_CRYPT(4)
- blr
-
-/*
- * ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc,
- * u32 rounds, u32 bytes, u8 *iv, u32 *key_twk);
- *
- * called from glue layer to encrypt multiple blocks via XTS
- * If key_twk is given, the initial IV encryption will be
- * processed too. Round values are AES128 = 4, AES192 = 5,
- * AES256 = 6
- *
- */
-_GLOBAL(ppc_encrypt_xts)
- INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 8)
- LOAD_IV(rI0, 0)
- LOAD_IV(rI1, 4)
- LOAD_IV(rI2, 8)
- cmpwi rKT,0
- LOAD_IV(rI3, 12)
- bt eq,ppc_encrypt_xts_notweak
- mr rKP,rKT
- START_KEY(rI0, rI1, rI2, rI3)
- bl ppc_encrypt_block
- xor rI0,rD0,rW0
- xor rI1,rD1,rW1
- xor rI2,rD2,rW2
- xor rI3,rD3,rW3
-ppc_encrypt_xts_notweak:
- ENDIAN_SWAP(rG0, rG1, rI0, rI1)
- ENDIAN_SWAP(rG2, rG3, rI2, rI3)
-ppc_encrypt_xts_loop:
- LOAD_DATA(rD0, 0)
- mr rKP,rKS
- LOAD_DATA(rD1, 4)
- subi rLN,rLN,16
- LOAD_DATA(rD2, 8)
- LOAD_DATA(rD3, 12)
- xor rD0,rD0,rI0
- xor rD1,rD1,rI1
- xor rD2,rD2,rI2
- xor rD3,rD3,rI3
- START_KEY(rD0, rD1, rD2, rD3)
- bl ppc_encrypt_block
- xor rD0,rD0,rW0
- xor rD1,rD1,rW1
- xor rD2,rD2,rW2
- xor rD3,rD3,rW3
- xor rD0,rD0,rI0
- SAVE_DATA(rD0, 0)
- xor rD1,rD1,rI1
- SAVE_DATA(rD1, 4)
- xor rD2,rD2,rI2
- SAVE_DATA(rD2, 8)
- xor rD3,rD3,rI3
- SAVE_DATA(rD3, 12)
- GF128_MUL(rG0, rG1, rG2, rG3, rW0)
- ENDIAN_SWAP(rI0, rI1, rG0, rG1)
- ENDIAN_SWAP(rI2, rI3, rG2, rG3)
- cmpwi rLN,0
- NEXT_BLOCK
- bt gt,ppc_encrypt_xts_loop
- START_IV
- SAVE_IV(rI0, 0)
- SAVE_IV(rI1, 4)
- SAVE_IV(rI2, 8)
- SAVE_IV(rI3, 12)
- FINALIZE_CRYPT(8)
- blr
-
-/*
- * ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec,
- * u32 rounds, u32 blocks, u8 *iv, u32 *key_twk);
- *
- * called from glue layer to decrypt multiple blocks via XTS
- * If key_twk is given, the initial IV encryption will be
- * processed too. Round values are AES128 = 4, AES192 = 5,
- * AES256 = 6
- *
- */
-_GLOBAL(ppc_decrypt_xts)
- INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 8)
- LOAD_IV(rI0, 0)
- addi rT1,rT0,4096
- LOAD_IV(rI1, 4)
- LOAD_IV(rI2, 8)
- cmpwi rKT,0
- LOAD_IV(rI3, 12)
- bt eq,ppc_decrypt_xts_notweak
- subi rT0,rT0,4096
- mr rKP,rKT
- START_KEY(rI0, rI1, rI2, rI3)
- bl ppc_encrypt_block
- xor rI0,rD0,rW0
- xor rI1,rD1,rW1
- xor rI2,rD2,rW2
- xor rI3,rD3,rW3
- addi rT0,rT0,4096
-ppc_decrypt_xts_notweak:
- ENDIAN_SWAP(rG0, rG1, rI0, rI1)
- ENDIAN_SWAP(rG2, rG3, rI2, rI3)
-ppc_decrypt_xts_loop:
- LOAD_DATA(rD0, 0)
- mr rKP,rKS
- LOAD_DATA(rD1, 4)
- subi rLN,rLN,16
- LOAD_DATA(rD2, 8)
- LOAD_DATA(rD3, 12)
- xor rD0,rD0,rI0
- xor rD1,rD1,rI1
- xor rD2,rD2,rI2
- xor rD3,rD3,rI3
- START_KEY(rD0, rD1, rD2, rD3)
- bl ppc_decrypt_block
- xor rD0,rD0,rW0
- xor rD1,rD1,rW1
- xor rD2,rD2,rW2
- xor rD3,rD3,rW3
- xor rD0,rD0,rI0
- SAVE_DATA(rD0, 0)
- xor rD1,rD1,rI1
- SAVE_DATA(rD1, 4)
- xor rD2,rD2,rI2
- SAVE_DATA(rD2, 8)
- xor rD3,rD3,rI3
- SAVE_DATA(rD3, 12)
- GF128_MUL(rG0, rG1, rG2, rG3, rW0)
- ENDIAN_SWAP(rI0, rI1, rG0, rG1)
- ENDIAN_SWAP(rI2, rI3, rG2, rG3)
- cmpwi rLN,0
- NEXT_BLOCK
- bt gt,ppc_decrypt_xts_loop
- START_IV
- SAVE_IV(rI0, 0)
- SAVE_IV(rI1, 4)
- SAVE_IV(rI2, 8)
- SAVE_IV(rI3, 12)
- FINALIZE_CRYPT(8)
- blr
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Common registers for PPC AES implementation
- *
- * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
- */
-
-#define rKS r0 /* copy of en-/decryption key pointer */
-#define rDP r3 /* destination pointer */
-#define rSP r4 /* source pointer */
-#define rKP r5 /* pointer to en-/decryption key pointer */
-#define rRR r6 /* en-/decryption rounds */
-#define rLN r7 /* length of data to be processed */
-#define rIP r8 /* potiner to IV (CBC/CTR/XTS modes) */
-#define rKT r9 /* pointer to tweak key (XTS mode) */
-#define rT0 r11 /* pointers to en-/decryption tables */
-#define rT1 r10
-#define rD0 r9 /* data */
-#define rD1 r14
-#define rD2 r12
-#define rD3 r15
-#define rW0 r16 /* working registers */
-#define rW1 r17
-#define rW2 r18
-#define rW3 r19
-#define rW4 r20
-#define rW5 r21
-#define rW6 r22
-#define rW7 r23
-#define rI0 r24 /* IV */
-#define rI1 r25
-#define rI2 r26
-#define rI3 r27
-#define rG0 r28 /* endian reversed tweak (XTS mode) */
-#define rG1 r29
-#define rG2 r30
-#define rG3 r31
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * 4K AES tables for PPC AES implementation
- *
- * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
- */
-
-/*
- * These big endian AES encryption/decryption tables have been taken from
- * crypto/aes_generic.c and are designed to be simply accessed by a combination
- * of rlwimi/lwz instructions with a minimum of table registers (usually only
- * one required). Thus they are aligned to 4K. The locality of rotated values
- * is derived from the reduced offsets that are available in the SPE load
- * instructions. E.g. evldw, evlwwsplat, ...
- *
- * For the safety-conscious it has to be noted that they might be vulnerable
- * to cache timing attacks because of their size. Nevertheless in contrast to
- * the generic tables they have been reduced from 16KB to 8KB + 256 bytes.
- * This is a quite good tradeoff for low power devices (e.g. routers) without
- * dedicated encryption hardware where we usually have no multiuser
- * environment.
- *
- */
-
-#define R(a, b, c, d) \
- 0x##a##b##c##d, 0x##d##a##b##c, 0x##c##d##a##b, 0x##b##c##d##a
-
-.data
-.align 12
-.globl PPC_AES_4K_ENCTAB
-PPC_AES_4K_ENCTAB:
-/* encryption table, same as crypto_ft_tab in crypto/aes-generic.c */
- .long R(c6, 63, 63, a5), R(f8, 7c, 7c, 84)
- .long R(ee, 77, 77, 99), R(f6, 7b, 7b, 8d)
- .long R(ff, f2, f2, 0d), R(d6, 6b, 6b, bd)
- .long R(de, 6f, 6f, b1), R(91, c5, c5, 54)
- .long R(60, 30, 30, 50), R(02, 01, 01, 03)
- .long R(ce, 67, 67, a9), R(56, 2b, 2b, 7d)
- .long R(e7, fe, fe, 19), R(b5, d7, d7, 62)
- .long R(4d, ab, ab, e6), R(ec, 76, 76, 9a)
- .long R(8f, ca, ca, 45), R(1f, 82, 82, 9d)
- .long R(89, c9, c9, 40), R(fa, 7d, 7d, 87)
- .long R(ef, fa, fa, 15), R(b2, 59, 59, eb)
- .long R(8e, 47, 47, c9), R(fb, f0, f0, 0b)
- .long R(41, ad, ad, ec), R(b3, d4, d4, 67)
- .long R(5f, a2, a2, fd), R(45, af, af, ea)
- .long R(23, 9c, 9c, bf), R(53, a4, a4, f7)
- .long R(e4, 72, 72, 96), R(9b, c0, c0, 5b)
- .long R(75, b7, b7, c2), R(e1, fd, fd, 1c)
- .long R(3d, 93, 93, ae), R(4c, 26, 26, 6a)
- .long R(6c, 36, 36, 5a), R(7e, 3f, 3f, 41)
- .long R(f5, f7, f7, 02), R(83, cc, cc, 4f)
- .long R(68, 34, 34, 5c), R(51, a5, a5, f4)
- .long R(d1, e5, e5, 34), R(f9, f1, f1, 08)
- .long R(e2, 71, 71, 93), R(ab, d8, d8, 73)
- .long R(62, 31, 31, 53), R(2a, 15, 15, 3f)
- .long R(08, 04, 04, 0c), R(95, c7, c7, 52)
- .long R(46, 23, 23, 65), R(9d, c3, c3, 5e)
- .long R(30, 18, 18, 28), R(37, 96, 96, a1)
- .long R(0a, 05, 05, 0f), R(2f, 9a, 9a, b5)
- .long R(0e, 07, 07, 09), R(24, 12, 12, 36)
- .long R(1b, 80, 80, 9b), R(df, e2, e2, 3d)
- .long R(cd, eb, eb, 26), R(4e, 27, 27, 69)
- .long R(7f, b2, b2, cd), R(ea, 75, 75, 9f)
- .long R(12, 09, 09, 1b), R(1d, 83, 83, 9e)
- .long R(58, 2c, 2c, 74), R(34, 1a, 1a, 2e)
- .long R(36, 1b, 1b, 2d), R(dc, 6e, 6e, b2)
- .long R(b4, 5a, 5a, ee), R(5b, a0, a0, fb)
- .long R(a4, 52, 52, f6), R(76, 3b, 3b, 4d)
- .long R(b7, d6, d6, 61), R(7d, b3, b3, ce)
- .long R(52, 29, 29, 7b), R(dd, e3, e3, 3e)
- .long R(5e, 2f, 2f, 71), R(13, 84, 84, 97)
- .long R(a6, 53, 53, f5), R(b9, d1, d1, 68)
- .long R(00, 00, 00, 00), R(c1, ed, ed, 2c)
- .long R(40, 20, 20, 60), R(e3, fc, fc, 1f)
- .long R(79, b1, b1, c8), R(b6, 5b, 5b, ed)
- .long R(d4, 6a, 6a, be), R(8d, cb, cb, 46)
- .long R(67, be, be, d9), R(72, 39, 39, 4b)
- .long R(94, 4a, 4a, de), R(98, 4c, 4c, d4)
- .long R(b0, 58, 58, e8), R(85, cf, cf, 4a)
- .long R(bb, d0, d0, 6b), R(c5, ef, ef, 2a)
- .long R(4f, aa, aa, e5), R(ed, fb, fb, 16)
- .long R(86, 43, 43, c5), R(9a, 4d, 4d, d7)
- .long R(66, 33, 33, 55), R(11, 85, 85, 94)
- .long R(8a, 45, 45, cf), R(e9, f9, f9, 10)
- .long R(04, 02, 02, 06), R(fe, 7f, 7f, 81)
- .long R(a0, 50, 50, f0), R(78, 3c, 3c, 44)
- .long R(25, 9f, 9f, ba), R(4b, a8, a8, e3)
- .long R(a2, 51, 51, f3), R(5d, a3, a3, fe)
- .long R(80, 40, 40, c0), R(05, 8f, 8f, 8a)
- .long R(3f, 92, 92, ad), R(21, 9d, 9d, bc)
- .long R(70, 38, 38, 48), R(f1, f5, f5, 04)
- .long R(63, bc, bc, df), R(77, b6, b6, c1)
- .long R(af, da, da, 75), R(42, 21, 21, 63)
- .long R(20, 10, 10, 30), R(e5, ff, ff, 1a)
- .long R(fd, f3, f3, 0e), R(bf, d2, d2, 6d)
- .long R(81, cd, cd, 4c), R(18, 0c, 0c, 14)
- .long R(26, 13, 13, 35), R(c3, ec, ec, 2f)
- .long R(be, 5f, 5f, e1), R(35, 97, 97, a2)
- .long R(88, 44, 44, cc), R(2e, 17, 17, 39)
- .long R(93, c4, c4, 57), R(55, a7, a7, f2)
- .long R(fc, 7e, 7e, 82), R(7a, 3d, 3d, 47)
- .long R(c8, 64, 64, ac), R(ba, 5d, 5d, e7)
- .long R(32, 19, 19, 2b), R(e6, 73, 73, 95)
- .long R(c0, 60, 60, a0), R(19, 81, 81, 98)
- .long R(9e, 4f, 4f, d1), R(a3, dc, dc, 7f)
- .long R(44, 22, 22, 66), R(54, 2a, 2a, 7e)
- .long R(3b, 90, 90, ab), R(0b, 88, 88, 83)
- .long R(8c, 46, 46, ca), R(c7, ee, ee, 29)
- .long R(6b, b8, b8, d3), R(28, 14, 14, 3c)
- .long R(a7, de, de, 79), R(bc, 5e, 5e, e2)
- .long R(16, 0b, 0b, 1d), R(ad, db, db, 76)
- .long R(db, e0, e0, 3b), R(64, 32, 32, 56)
- .long R(74, 3a, 3a, 4e), R(14, 0a, 0a, 1e)
- .long R(92, 49, 49, db), R(0c, 06, 06, 0a)
- .long R(48, 24, 24, 6c), R(b8, 5c, 5c, e4)
- .long R(9f, c2, c2, 5d), R(bd, d3, d3, 6e)
- .long R(43, ac, ac, ef), R(c4, 62, 62, a6)
- .long R(39, 91, 91, a8), R(31, 95, 95, a4)
- .long R(d3, e4, e4, 37), R(f2, 79, 79, 8b)
- .long R(d5, e7, e7, 32), R(8b, c8, c8, 43)
- .long R(6e, 37, 37, 59), R(da, 6d, 6d, b7)
- .long R(01, 8d, 8d, 8c), R(b1, d5, d5, 64)
- .long R(9c, 4e, 4e, d2), R(49, a9, a9, e0)
- .long R(d8, 6c, 6c, b4), R(ac, 56, 56, fa)
- .long R(f3, f4, f4, 07), R(cf, ea, ea, 25)
- .long R(ca, 65, 65, af), R(f4, 7a, 7a, 8e)
- .long R(47, ae, ae, e9), R(10, 08, 08, 18)
- .long R(6f, ba, ba, d5), R(f0, 78, 78, 88)
- .long R(4a, 25, 25, 6f), R(5c, 2e, 2e, 72)
- .long R(38, 1c, 1c, 24), R(57, a6, a6, f1)
- .long R(73, b4, b4, c7), R(97, c6, c6, 51)
- .long R(cb, e8, e8, 23), R(a1, dd, dd, 7c)
- .long R(e8, 74, 74, 9c), R(3e, 1f, 1f, 21)
- .long R(96, 4b, 4b, dd), R(61, bd, bd, dc)
- .long R(0d, 8b, 8b, 86), R(0f, 8a, 8a, 85)
- .long R(e0, 70, 70, 90), R(7c, 3e, 3e, 42)
- .long R(71, b5, b5, c4), R(cc, 66, 66, aa)
- .long R(90, 48, 48, d8), R(06, 03, 03, 05)
- .long R(f7, f6, f6, 01), R(1c, 0e, 0e, 12)
- .long R(c2, 61, 61, a3), R(6a, 35, 35, 5f)
- .long R(ae, 57, 57, f9), R(69, b9, b9, d0)
- .long R(17, 86, 86, 91), R(99, c1, c1, 58)
- .long R(3a, 1d, 1d, 27), R(27, 9e, 9e, b9)
- .long R(d9, e1, e1, 38), R(eb, f8, f8, 13)
- .long R(2b, 98, 98, b3), R(22, 11, 11, 33)
- .long R(d2, 69, 69, bb), R(a9, d9, d9, 70)
- .long R(07, 8e, 8e, 89), R(33, 94, 94, a7)
- .long R(2d, 9b, 9b, b6), R(3c, 1e, 1e, 22)
- .long R(15, 87, 87, 92), R(c9, e9, e9, 20)
- .long R(87, ce, ce, 49), R(aa, 55, 55, ff)
- .long R(50, 28, 28, 78), R(a5, df, df, 7a)
- .long R(03, 8c, 8c, 8f), R(59, a1, a1, f8)
- .long R(09, 89, 89, 80), R(1a, 0d, 0d, 17)
- .long R(65, bf, bf, da), R(d7, e6, e6, 31)
- .long R(84, 42, 42, c6), R(d0, 68, 68, b8)
- .long R(82, 41, 41, c3), R(29, 99, 99, b0)
- .long R(5a, 2d, 2d, 77), R(1e, 0f, 0f, 11)
- .long R(7b, b0, b0, cb), R(a8, 54, 54, fc)
- .long R(6d, bb, bb, d6), R(2c, 16, 16, 3a)
-.globl PPC_AES_4K_DECTAB
-PPC_AES_4K_DECTAB:
-/* decryption table, same as crypto_it_tab in crypto/aes-generic.c */
- .long R(51, f4, a7, 50), R(7e, 41, 65, 53)
- .long R(1a, 17, a4, c3), R(3a, 27, 5e, 96)
- .long R(3b, ab, 6b, cb), R(1f, 9d, 45, f1)
- .long R(ac, fa, 58, ab), R(4b, e3, 03, 93)
- .long R(20, 30, fa, 55), R(ad, 76, 6d, f6)
- .long R(88, cc, 76, 91), R(f5, 02, 4c, 25)
- .long R(4f, e5, d7, fc), R(c5, 2a, cb, d7)
- .long R(26, 35, 44, 80), R(b5, 62, a3, 8f)
- .long R(de, b1, 5a, 49), R(25, ba, 1b, 67)
- .long R(45, ea, 0e, 98), R(5d, fe, c0, e1)
- .long R(c3, 2f, 75, 02), R(81, 4c, f0, 12)
- .long R(8d, 46, 97, a3), R(6b, d3, f9, c6)
- .long R(03, 8f, 5f, e7), R(15, 92, 9c, 95)
- .long R(bf, 6d, 7a, eb), R(95, 52, 59, da)
- .long R(d4, be, 83, 2d), R(58, 74, 21, d3)
- .long R(49, e0, 69, 29), R(8e, c9, c8, 44)
- .long R(75, c2, 89, 6a), R(f4, 8e, 79, 78)
- .long R(99, 58, 3e, 6b), R(27, b9, 71, dd)
- .long R(be, e1, 4f, b6), R(f0, 88, ad, 17)
- .long R(c9, 20, ac, 66), R(7d, ce, 3a, b4)
- .long R(63, df, 4a, 18), R(e5, 1a, 31, 82)
- .long R(97, 51, 33, 60), R(62, 53, 7f, 45)
- .long R(b1, 64, 77, e0), R(bb, 6b, ae, 84)
- .long R(fe, 81, a0, 1c), R(f9, 08, 2b, 94)
- .long R(70, 48, 68, 58), R(8f, 45, fd, 19)
- .long R(94, de, 6c, 87), R(52, 7b, f8, b7)
- .long R(ab, 73, d3, 23), R(72, 4b, 02, e2)
- .long R(e3, 1f, 8f, 57), R(66, 55, ab, 2a)
- .long R(b2, eb, 28, 07), R(2f, b5, c2, 03)
- .long R(86, c5, 7b, 9a), R(d3, 37, 08, a5)
- .long R(30, 28, 87, f2), R(23, bf, a5, b2)
- .long R(02, 03, 6a, ba), R(ed, 16, 82, 5c)
- .long R(8a, cf, 1c, 2b), R(a7, 79, b4, 92)
- .long R(f3, 07, f2, f0), R(4e, 69, e2, a1)
- .long R(65, da, f4, cd), R(06, 05, be, d5)
- .long R(d1, 34, 62, 1f), R(c4, a6, fe, 8a)
- .long R(34, 2e, 53, 9d), R(a2, f3, 55, a0)
- .long R(05, 8a, e1, 32), R(a4, f6, eb, 75)
- .long R(0b, 83, ec, 39), R(40, 60, ef, aa)
- .long R(5e, 71, 9f, 06), R(bd, 6e, 10, 51)
- .long R(3e, 21, 8a, f9), R(96, dd, 06, 3d)
- .long R(dd, 3e, 05, ae), R(4d, e6, bd, 46)
- .long R(91, 54, 8d, b5), R(71, c4, 5d, 05)
- .long R(04, 06, d4, 6f), R(60, 50, 15, ff)
- .long R(19, 98, fb, 24), R(d6, bd, e9, 97)
- .long R(89, 40, 43, cc), R(67, d9, 9e, 77)
- .long R(b0, e8, 42, bd), R(07, 89, 8b, 88)
- .long R(e7, 19, 5b, 38), R(79, c8, ee, db)
- .long R(a1, 7c, 0a, 47), R(7c, 42, 0f, e9)
- .long R(f8, 84, 1e, c9), R(00, 00, 00, 00)
- .long R(09, 80, 86, 83), R(32, 2b, ed, 48)
- .long R(1e, 11, 70, ac), R(6c, 5a, 72, 4e)
- .long R(fd, 0e, ff, fb), R(0f, 85, 38, 56)
- .long R(3d, ae, d5, 1e), R(36, 2d, 39, 27)
- .long R(0a, 0f, d9, 64), R(68, 5c, a6, 21)
- .long R(9b, 5b, 54, d1), R(24, 36, 2e, 3a)
- .long R(0c, 0a, 67, b1), R(93, 57, e7, 0f)
- .long R(b4, ee, 96, d2), R(1b, 9b, 91, 9e)
- .long R(80, c0, c5, 4f), R(61, dc, 20, a2)
- .long R(5a, 77, 4b, 69), R(1c, 12, 1a, 16)
- .long R(e2, 93, ba, 0a), R(c0, a0, 2a, e5)
- .long R(3c, 22, e0, 43), R(12, 1b, 17, 1d)
- .long R(0e, 09, 0d, 0b), R(f2, 8b, c7, ad)
- .long R(2d, b6, a8, b9), R(14, 1e, a9, c8)
- .long R(57, f1, 19, 85), R(af, 75, 07, 4c)
- .long R(ee, 99, dd, bb), R(a3, 7f, 60, fd)
- .long R(f7, 01, 26, 9f), R(5c, 72, f5, bc)
- .long R(44, 66, 3b, c5), R(5b, fb, 7e, 34)
- .long R(8b, 43, 29, 76), R(cb, 23, c6, dc)
- .long R(b6, ed, fc, 68), R(b8, e4, f1, 63)
- .long R(d7, 31, dc, ca), R(42, 63, 85, 10)
- .long R(13, 97, 22, 40), R(84, c6, 11, 20)
- .long R(85, 4a, 24, 7d), R(d2, bb, 3d, f8)
- .long R(ae, f9, 32, 11), R(c7, 29, a1, 6d)
- .long R(1d, 9e, 2f, 4b), R(dc, b2, 30, f3)
- .long R(0d, 86, 52, ec), R(77, c1, e3, d0)
- .long R(2b, b3, 16, 6c), R(a9, 70, b9, 99)
- .long R(11, 94, 48, fa), R(47, e9, 64, 22)
- .long R(a8, fc, 8c, c4), R(a0, f0, 3f, 1a)
- .long R(56, 7d, 2c, d8), R(22, 33, 90, ef)
- .long R(87, 49, 4e, c7), R(d9, 38, d1, c1)
- .long R(8c, ca, a2, fe), R(98, d4, 0b, 36)
- .long R(a6, f5, 81, cf), R(a5, 7a, de, 28)
- .long R(da, b7, 8e, 26), R(3f, ad, bf, a4)
- .long R(2c, 3a, 9d, e4), R(50, 78, 92, 0d)
- .long R(6a, 5f, cc, 9b), R(54, 7e, 46, 62)
- .long R(f6, 8d, 13, c2), R(90, d8, b8, e8)
- .long R(2e, 39, f7, 5e), R(82, c3, af, f5)
- .long R(9f, 5d, 80, be), R(69, d0, 93, 7c)
- .long R(6f, d5, 2d, a9), R(cf, 25, 12, b3)
- .long R(c8, ac, 99, 3b), R(10, 18, 7d, a7)
- .long R(e8, 9c, 63, 6e), R(db, 3b, bb, 7b)
- .long R(cd, 26, 78, 09), R(6e, 59, 18, f4)
- .long R(ec, 9a, b7, 01), R(83, 4f, 9a, a8)
- .long R(e6, 95, 6e, 65), R(aa, ff, e6, 7e)
- .long R(21, bc, cf, 08), R(ef, 15, e8, e6)
- .long R(ba, e7, 9b, d9), R(4a, 6f, 36, ce)
- .long R(ea, 9f, 09, d4), R(29, b0, 7c, d6)
- .long R(31, a4, b2, af), R(2a, 3f, 23, 31)
- .long R(c6, a5, 94, 30), R(35, a2, 66, c0)
- .long R(74, 4e, bc, 37), R(fc, 82, ca, a6)
- .long R(e0, 90, d0, b0), R(33, a7, d8, 15)
- .long R(f1, 04, 98, 4a), R(41, ec, da, f7)
- .long R(7f, cd, 50, 0e), R(17, 91, f6, 2f)
- .long R(76, 4d, d6, 8d), R(43, ef, b0, 4d)
- .long R(cc, aa, 4d, 54), R(e4, 96, 04, df)
- .long R(9e, d1, b5, e3), R(4c, 6a, 88, 1b)
- .long R(c1, 2c, 1f, b8), R(46, 65, 51, 7f)
- .long R(9d, 5e, ea, 04), R(01, 8c, 35, 5d)
- .long R(fa, 87, 74, 73), R(fb, 0b, 41, 2e)
- .long R(b3, 67, 1d, 5a), R(92, db, d2, 52)
- .long R(e9, 10, 56, 33), R(6d, d6, 47, 13)
- .long R(9a, d7, 61, 8c), R(37, a1, 0c, 7a)
- .long R(59, f8, 14, 8e), R(eb, 13, 3c, 89)
- .long R(ce, a9, 27, ee), R(b7, 61, c9, 35)
- .long R(e1, 1c, e5, ed), R(7a, 47, b1, 3c)
- .long R(9c, d2, df, 59), R(55, f2, 73, 3f)
- .long R(18, 14, ce, 79), R(73, c7, 37, bf)
- .long R(53, f7, cd, ea), R(5f, fd, aa, 5b)
- .long R(df, 3d, 6f, 14), R(78, 44, db, 86)
- .long R(ca, af, f3, 81), R(b9, 68, c4, 3e)
- .long R(38, 24, 34, 2c), R(c2, a3, 40, 5f)
- .long R(16, 1d, c3, 72), R(bc, e2, 25, 0c)
- .long R(28, 3c, 49, 8b), R(ff, 0d, 95, 41)
- .long R(39, a8, 01, 71), R(08, 0c, b3, de)
- .long R(d8, b4, e4, 9c), R(64, 56, c1, 90)
- .long R(7b, cb, 84, 61), R(d5, 32, b6, 70)
- .long R(48, 6c, 5c, 74), R(d0, b8, 57, 42)
-.globl PPC_AES_4K_DECTAB2
-PPC_AES_4K_DECTAB2:
-/* decryption table, same as crypto_il_tab in crypto/aes-generic.c */
- .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
- .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
- .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
- .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
- .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
- .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
- .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
- .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
- .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
- .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
- .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
- .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
- .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
- .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
- .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
- .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
- .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
- .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
- .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
- .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
- .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
- .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
- .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
- .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
- .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
- .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
- .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
- .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
- .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
- .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
- .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
- .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
union aes_enckey_arch {
u32 rndkeys[AES_MAX_KEYLENGTH_U32];
+#ifdef CONFIG_CRYPTO_LIB_AES_ARCH
+#if defined(CONFIG_PPC) && defined(CONFIG_SPE)
+ /* Used unconditionally (when SPE AES code is enabled in kconfig) */
+ u32 spe_enc_key[AES_MAX_KEYLENGTH_U32] __aligned(8);
+#endif
+#endif /* CONFIG_CRYPTO_LIB_AES_ARCH */
};
union aes_invkey_arch {
u32 inv_rndkeys[AES_MAX_KEYLENGTH_U32];
+#ifdef CONFIG_CRYPTO_LIB_AES_ARCH
+#if defined(CONFIG_PPC) && defined(CONFIG_SPE)
+ /* Used unconditionally (when SPE AES code is enabled in kconfig) */
+ u32 spe_dec_key[AES_MAX_KEYLENGTH_U32] __aligned(8);
+#endif
+#endif /* CONFIG_CRYPTO_LIB_AES_ARCH */
};
/**
#ifdef CONFIG_ARM64
int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
unsigned int key_len);
+#elif defined(CONFIG_PPC)
+void ppc_expand_key_128(u32 *key_enc, const u8 *key);
+void ppc_expand_key_192(u32 *key_enc, const u8 *key);
+void ppc_expand_key_256(u32 *key_enc, const u8 *key);
+void ppc_generate_decrypt_key(u32 *key_dec, u32 *key_enc, unsigned int key_len);
+void ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
+ u32 bytes);
+void ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
+ u32 bytes);
+void ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc, u32 rounds, u32 bytes,
+ u8 *iv);
+void ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec, u32 rounds, u32 bytes,
+ u8 *iv);
+void ppc_crypt_ctr(u8 *out, const u8 *in, u32 *key_enc, u32 rounds, u32 bytes,
+ u8 *iv);
+void ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc, u32 rounds, u32 bytes,
+ u8 *iv, u32 *key_twk);
+void ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec, u32 rounds, u32 bytes,
+ u8 *iv, u32 *key_twk);
#endif
/**
depends on CRYPTO_LIB_AES && !UML && !KMSAN
default y if ARM
default y if ARM64
+ default y if PPC && SPE
config CRYPTO_LIB_AESCFB
tristate
libaes-$(CONFIG_KERNEL_MODE_NEON) += arm64/aes-ce-core.o
endif
+ifeq ($(CONFIG_PPC),y)
+ifeq ($(CONFIG_SPE),y)
+libaes-y += powerpc/aes-spe-core.o \
+ powerpc/aes-spe-keys.o \
+ powerpc/aes-spe-modes.o \
+ powerpc/aes-tab-4k.o
+endif
+endif # CONFIG_PPC
+
endif # CONFIG_CRYPTO_LIB_AES_ARCH
################################################################################
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Fast AES implementation for SPE instruction set (PPC)
+ *
+ * This code makes use of the SPE SIMD instruction set as defined in
+ * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
+ * Implementation is based on optimization guide notes from
+ * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ */
+
+#include <asm/ppc_asm.h>
+#include "aes-spe-regs.h"
+
+#define EAD(in, bpos) \
+ rlwimi rT0,in,28-((bpos+3)%4)*8,20,27;
+
+#define DAD(in, bpos) \
+ rlwimi rT1,in,24-((bpos+3)%4)*8,24,31;
+
+#define LWH(out, off) \
+ evlwwsplat out,off(rT0); /* load word high */
+
+#define LWL(out, off) \
+ lwz out,off(rT0); /* load word low */
+
+#define LBZ(out, tab, off) \
+ lbz out,off(tab); /* load byte */
+
+#define LAH(out, in, bpos, off) \
+ EAD(in, bpos) /* calc addr + load word high */ \
+ LWH(out, off)
+
+#define LAL(out, in, bpos, off) \
+ EAD(in, bpos) /* calc addr + load word low */ \
+ LWL(out, off)
+
+#define LAE(out, in, bpos) \
+ EAD(in, bpos) /* calc addr + load enc byte */ \
+ LBZ(out, rT0, 8)
+
+#define LBE(out) \
+ LBZ(out, rT0, 8) /* load enc byte */
+
+#define LAD(out, in, bpos) \
+ DAD(in, bpos) /* calc addr + load dec byte */ \
+ LBZ(out, rT1, 0)
+
+#define LBD(out) \
+ LBZ(out, rT1, 0)
+
+/*
+ * ppc_encrypt_block: The central encryption function for a single 16 bytes
+ * block. It does no stack handling or register saving to support fast calls
+ * via bl/blr. It expects that caller has pre-xored input data with first
+ * 4 words of encryption key into rD0-rD3. Pointer/counter registers must
+ * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
+ * and rW0-rW3 and caller must execute a final xor on the output registers.
+ * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
+ *
+ */
+_GLOBAL(ppc_encrypt_block)
+ LAH(rW4, rD1, 2, 4)
+ LAH(rW6, rD0, 3, 0)
+ LAH(rW3, rD0, 1, 8)
+ppc_encrypt_block_loop:
+ LAH(rW0, rD3, 0, 12)
+ LAL(rW0, rD0, 0, 12)
+ LAH(rW1, rD1, 0, 12)
+ LAH(rW2, rD2, 1, 8)
+ LAL(rW2, rD3, 1, 8)
+ LAL(rW3, rD1, 1, 8)
+ LAL(rW4, rD2, 2, 4)
+ LAL(rW6, rD1, 3, 0)
+ LAH(rW5, rD3, 2, 4)
+ LAL(rW5, rD0, 2, 4)
+ LAH(rW7, rD2, 3, 0)
+ evldw rD1,16(rKP)
+ EAD(rD3, 3)
+ evxor rW2,rW2,rW4
+ LWL(rW7, 0)
+ evxor rW2,rW2,rW6
+ EAD(rD2, 0)
+ evxor rD1,rD1,rW2
+ LWL(rW1, 12)
+ evxor rD1,rD1,rW0
+ evldw rD3,24(rKP)
+ evmergehi rD0,rD0,rD1
+ EAD(rD1, 2)
+ evxor rW3,rW3,rW5
+ LWH(rW4, 4)
+ evxor rW3,rW3,rW7
+ EAD(rD0, 3)
+ evxor rD3,rD3,rW3
+ LWH(rW6, 0)
+ evxor rD3,rD3,rW1
+ EAD(rD0, 1)
+ evmergehi rD2,rD2,rD3
+ LWH(rW3, 8)
+ LAH(rW0, rD3, 0, 12)
+ LAL(rW0, rD0, 0, 12)
+ LAH(rW1, rD1, 0, 12)
+ LAH(rW2, rD2, 1, 8)
+ LAL(rW2, rD3, 1, 8)
+ LAL(rW3, rD1, 1, 8)
+ LAL(rW4, rD2, 2, 4)
+ LAL(rW6, rD1, 3, 0)
+ LAH(rW5, rD3, 2, 4)
+ LAL(rW5, rD0, 2, 4)
+ LAH(rW7, rD2, 3, 0)
+ evldw rD1,32(rKP)
+ EAD(rD3, 3)
+ evxor rW2,rW2,rW4
+ LWL(rW7, 0)
+ evxor rW2,rW2,rW6
+ EAD(rD2, 0)
+ evxor rD1,rD1,rW2
+ LWL(rW1, 12)
+ evxor rD1,rD1,rW0
+ evldw rD3,40(rKP)
+ evmergehi rD0,rD0,rD1
+ EAD(rD1, 2)
+ evxor rW3,rW3,rW5
+ LWH(rW4, 4)
+ evxor rW3,rW3,rW7
+ EAD(rD0, 3)
+ evxor rD3,rD3,rW3
+ LWH(rW6, 0)
+ evxor rD3,rD3,rW1
+ EAD(rD0, 1)
+ evmergehi rD2,rD2,rD3
+ LWH(rW3, 8)
+ addi rKP,rKP,32
+ bdnz ppc_encrypt_block_loop
+ LAH(rW0, rD3, 0, 12)
+ LAL(rW0, rD0, 0, 12)
+ LAH(rW1, rD1, 0, 12)
+ LAH(rW2, rD2, 1, 8)
+ LAL(rW2, rD3, 1, 8)
+ LAL(rW3, rD1, 1, 8)
+ LAL(rW4, rD2, 2, 4)
+ LAH(rW5, rD3, 2, 4)
+ LAL(rW6, rD1, 3, 0)
+ LAL(rW5, rD0, 2, 4)
+ LAH(rW7, rD2, 3, 0)
+ evldw rD1,16(rKP)
+ EAD(rD3, 3)
+ evxor rW2,rW2,rW4
+ LWL(rW7, 0)
+ evxor rW2,rW2,rW6
+ EAD(rD2, 0)
+ evxor rD1,rD1,rW2
+ LWL(rW1, 12)
+ evxor rD1,rD1,rW0
+ evldw rD3,24(rKP)
+ evmergehi rD0,rD0,rD1
+ EAD(rD1, 0)
+ evxor rW3,rW3,rW5
+ LBE(rW2)
+ evxor rW3,rW3,rW7
+ EAD(rD0, 1)
+ evxor rD3,rD3,rW3
+ LBE(rW6)
+ evxor rD3,rD3,rW1
+ EAD(rD0, 0)
+ evmergehi rD2,rD2,rD3
+ LBE(rW1)
+ LAE(rW0, rD3, 0)
+ LAE(rW1, rD0, 0)
+ LAE(rW4, rD2, 1)
+ LAE(rW5, rD3, 1)
+ LAE(rW3, rD2, 0)
+ LAE(rW7, rD1, 1)
+ rlwimi rW0,rW4,8,16,23
+ rlwimi rW1,rW5,8,16,23
+ LAE(rW4, rD1, 2)
+ LAE(rW5, rD2, 2)
+ rlwimi rW2,rW6,8,16,23
+ rlwimi rW3,rW7,8,16,23
+ LAE(rW6, rD3, 2)
+ LAE(rW7, rD0, 2)
+ rlwimi rW0,rW4,16,8,15
+ rlwimi rW1,rW5,16,8,15
+ LAE(rW4, rD0, 3)
+ LAE(rW5, rD1, 3)
+ rlwimi rW2,rW6,16,8,15
+ lwz rD0,32(rKP)
+ rlwimi rW3,rW7,16,8,15
+ lwz rD1,36(rKP)
+ LAE(rW6, rD2, 3)
+ LAE(rW7, rD3, 3)
+ rlwimi rW0,rW4,24,0,7
+ lwz rD2,40(rKP)
+ rlwimi rW1,rW5,24,0,7
+ lwz rD3,44(rKP)
+ rlwimi rW2,rW6,24,0,7
+ rlwimi rW3,rW7,24,0,7
+ blr
+
+/*
+ * ppc_decrypt_block: The central decryption function for a single 16 bytes
+ * block. It does no stack handling or register saving to support fast calls
+ * via bl/blr. It expects that caller has pre-xored input data with first
+ * 4 words of encryption key into rD0-rD3. Pointer/counter registers must
+ * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
+ * and rW0-rW3 and caller must execute a final xor on the output registers.
+ * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
+ *
+ */
+_GLOBAL(ppc_decrypt_block)
+ LAH(rW0, rD1, 0, 12)
+ LAH(rW6, rD0, 3, 0)
+ LAH(rW3, rD0, 1, 8)
+ppc_decrypt_block_loop:
+ LAH(rW1, rD3, 0, 12)
+ LAL(rW0, rD2, 0, 12)
+ LAH(rW2, rD2, 1, 8)
+ LAL(rW2, rD3, 1, 8)
+ LAH(rW4, rD3, 2, 4)
+ LAL(rW4, rD0, 2, 4)
+ LAL(rW6, rD1, 3, 0)
+ LAH(rW5, rD1, 2, 4)
+ LAH(rW7, rD2, 3, 0)
+ LAL(rW7, rD3, 3, 0)
+ LAL(rW3, rD1, 1, 8)
+ evldw rD1,16(rKP)
+ EAD(rD0, 0)
+ evxor rW4,rW4,rW6
+ LWL(rW1, 12)
+ evxor rW0,rW0,rW4
+ EAD(rD2, 2)
+ evxor rW0,rW0,rW2
+ LWL(rW5, 4)
+ evxor rD1,rD1,rW0
+ evldw rD3,24(rKP)
+ evmergehi rD0,rD0,rD1
+ EAD(rD1, 0)
+ evxor rW3,rW3,rW7
+ LWH(rW0, 12)
+ evxor rW3,rW3,rW1
+ EAD(rD0, 3)
+ evxor rD3,rD3,rW3
+ LWH(rW6, 0)
+ evxor rD3,rD3,rW5
+ EAD(rD0, 1)
+ evmergehi rD2,rD2,rD3
+ LWH(rW3, 8)
+ LAH(rW1, rD3, 0, 12)
+ LAL(rW0, rD2, 0, 12)
+ LAH(rW2, rD2, 1, 8)
+ LAL(rW2, rD3, 1, 8)
+ LAH(rW4, rD3, 2, 4)
+ LAL(rW4, rD0, 2, 4)
+ LAL(rW6, rD1, 3, 0)
+ LAH(rW5, rD1, 2, 4)
+ LAH(rW7, rD2, 3, 0)
+ LAL(rW7, rD3, 3, 0)
+ LAL(rW3, rD1, 1, 8)
+ evldw rD1,32(rKP)
+ EAD(rD0, 0)
+ evxor rW4,rW4,rW6
+ LWL(rW1, 12)
+ evxor rW0,rW0,rW4
+ EAD(rD2, 2)
+ evxor rW0,rW0,rW2
+ LWL(rW5, 4)
+ evxor rD1,rD1,rW0
+ evldw rD3,40(rKP)
+ evmergehi rD0,rD0,rD1
+ EAD(rD1, 0)
+ evxor rW3,rW3,rW7
+ LWH(rW0, 12)
+ evxor rW3,rW3,rW1
+ EAD(rD0, 3)
+ evxor rD3,rD3,rW3
+ LWH(rW6, 0)
+ evxor rD3,rD3,rW5
+ EAD(rD0, 1)
+ evmergehi rD2,rD2,rD3
+ LWH(rW3, 8)
+ addi rKP,rKP,32
+ bdnz ppc_decrypt_block_loop
+ LAH(rW1, rD3, 0, 12)
+ LAL(rW0, rD2, 0, 12)
+ LAH(rW2, rD2, 1, 8)
+ LAL(rW2, rD3, 1, 8)
+ LAH(rW4, rD3, 2, 4)
+ LAL(rW4, rD0, 2, 4)
+ LAL(rW6, rD1, 3, 0)
+ LAH(rW5, rD1, 2, 4)
+ LAH(rW7, rD2, 3, 0)
+ LAL(rW7, rD3, 3, 0)
+ LAL(rW3, rD1, 1, 8)
+ evldw rD1,16(rKP)
+ EAD(rD0, 0)
+ evxor rW4,rW4,rW6
+ LWL(rW1, 12)
+ evxor rW0,rW0,rW4
+ EAD(rD2, 2)
+ evxor rW0,rW0,rW2
+ LWL(rW5, 4)
+ evxor rD1,rD1,rW0
+ evldw rD3,24(rKP)
+ evmergehi rD0,rD0,rD1
+ DAD(rD1, 0)
+ evxor rW3,rW3,rW7
+ LBD(rW0)
+ evxor rW3,rW3,rW1
+ DAD(rD0, 1)
+ evxor rD3,rD3,rW3
+ LBD(rW6)
+ evxor rD3,rD3,rW5
+ DAD(rD0, 0)
+ evmergehi rD2,rD2,rD3
+ LBD(rW3)
+ LAD(rW2, rD3, 0)
+ LAD(rW1, rD2, 0)
+ LAD(rW4, rD2, 1)
+ LAD(rW5, rD3, 1)
+ LAD(rW7, rD1, 1)
+ rlwimi rW0,rW4,8,16,23
+ rlwimi rW1,rW5,8,16,23
+ LAD(rW4, rD3, 2)
+ LAD(rW5, rD0, 2)
+ rlwimi rW2,rW6,8,16,23
+ rlwimi rW3,rW7,8,16,23
+ LAD(rW6, rD1, 2)
+ LAD(rW7, rD2, 2)
+ rlwimi rW0,rW4,16,8,15
+ rlwimi rW1,rW5,16,8,15
+ LAD(rW4, rD0, 3)
+ LAD(rW5, rD1, 3)
+ rlwimi rW2,rW6,16,8,15
+ lwz rD0,32(rKP)
+ rlwimi rW3,rW7,16,8,15
+ lwz rD1,36(rKP)
+ LAD(rW6, rD2, 3)
+ LAD(rW7, rD3, 3)
+ rlwimi rW0,rW4,24,0,7
+ lwz rD2,40(rKP)
+ rlwimi rW1,rW5,24,0,7
+ lwz rD3,44(rKP)
+ rlwimi rW2,rW6,24,0,7
+ rlwimi rW3,rW7,24,0,7
+ blr
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Key handling functions for PPC AES implementation
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ */
+
+#include <asm/ppc_asm.h>
+
+#ifdef __BIG_ENDIAN__
+#define LOAD_KEY(d, s, off) \
+ lwz d,off(s);
+#else
+#define LOAD_KEY(d, s, off) \
+ li r0,off; \
+ lwbrx d,s,r0;
+#endif
+
+#define INITIALIZE_KEY \
+ stwu r1,-32(r1); /* create stack frame */ \
+ stw r14,8(r1); /* save registers */ \
+ stw r15,12(r1); \
+ stw r16,16(r1);
+
+#define FINALIZE_KEY \
+ lwz r14,8(r1); /* restore registers */ \
+ lwz r15,12(r1); \
+ lwz r16,16(r1); \
+ xor r5,r5,r5; /* clear sensitive data */ \
+ xor r6,r6,r6; \
+ xor r7,r7,r7; \
+ xor r8,r8,r8; \
+ xor r9,r9,r9; \
+ xor r10,r10,r10; \
+ xor r11,r11,r11; \
+ xor r12,r12,r12; \
+ addi r1,r1,32; /* cleanup stack */
+
+#define LS_BOX(r, t1, t2) \
+ lis t2,PPC_AES_4K_ENCTAB@h; \
+ ori t2,t2,PPC_AES_4K_ENCTAB@l; \
+ rlwimi t2,r,4,20,27; \
+ lbz t1,8(t2); \
+ rlwimi r,t1,0,24,31; \
+ rlwimi t2,r,28,20,27; \
+ lbz t1,8(t2); \
+ rlwimi r,t1,8,16,23; \
+ rlwimi t2,r,20,20,27; \
+ lbz t1,8(t2); \
+ rlwimi r,t1,16,8,15; \
+ rlwimi t2,r,12,20,27; \
+ lbz t1,8(t2); \
+ rlwimi r,t1,24,0,7;
+
+#define GF8_MUL(out, in, t1, t2) \
+ lis t1,0x8080; /* multiplication in GF8 */ \
+ ori t1,t1,0x8080; \
+ and t1,t1,in; \
+ srwi t1,t1,7; \
+ mulli t1,t1,0x1b; \
+ lis t2,0x7f7f; \
+ ori t2,t2,0x7f7f; \
+ and t2,t2,in; \
+ slwi t2,t2,1; \
+ xor out,t1,t2;
+
+/*
+ * ppc_expand_key_128(u32 *key_enc, const u8 *key)
+ *
+ * Expand 128 bit key into 176 bytes encryption key. It consists of
+ * key itself plus 10 rounds with 16 bytes each
+ *
+ */
+_GLOBAL(ppc_expand_key_128)
+ INITIALIZE_KEY
+ LOAD_KEY(r5,r4,0)
+ LOAD_KEY(r6,r4,4)
+ LOAD_KEY(r7,r4,8)
+ LOAD_KEY(r8,r4,12)
+ stw r5,0(r3) /* key[0..3] = input data */
+ stw r6,4(r3)
+ stw r7,8(r3)
+ stw r8,12(r3)
+ li r16,10 /* 10 expansion rounds */
+ lis r0,0x0100 /* RCO(1) */
+ppc_expand_128_loop:
+ addi r3,r3,16
+ mr r14,r8 /* apply LS_BOX to 4th temp */
+ rotlwi r14,r14,8
+ LS_BOX(r14, r15, r4)
+ xor r14,r14,r0
+ xor r5,r5,r14 /* xor next 4 keys */
+ xor r6,r6,r5
+ xor r7,r7,r6
+ xor r8,r8,r7
+ stw r5,0(r3) /* store next 4 keys */
+ stw r6,4(r3)
+ stw r7,8(r3)
+ stw r8,12(r3)
+ GF8_MUL(r0, r0, r4, r14) /* multiply RCO by 2 in GF */
+ subi r16,r16,1
+ cmpwi r16,0
+ bt eq,ppc_expand_128_end
+ b ppc_expand_128_loop
+ppc_expand_128_end:
+ FINALIZE_KEY
+ blr
+
+/*
+ * ppc_expand_key_192(u32 *key_enc, const u8 *key)
+ *
+ * Expand 192 bit key into 208 bytes encryption key. It consists of key
+ * itself plus 12 rounds with 16 bytes each
+ *
+ */
+_GLOBAL(ppc_expand_key_192)
+ INITIALIZE_KEY
+ LOAD_KEY(r5,r4,0)
+ LOAD_KEY(r6,r4,4)
+ LOAD_KEY(r7,r4,8)
+ LOAD_KEY(r8,r4,12)
+ LOAD_KEY(r9,r4,16)
+ LOAD_KEY(r10,r4,20)
+ stw r5,0(r3)
+ stw r6,4(r3)
+ stw r7,8(r3)
+ stw r8,12(r3)
+ stw r9,16(r3)
+ stw r10,20(r3)
+ li r16,8 /* 8 expansion rounds */
+ lis r0,0x0100 /* RCO(1) */
+ppc_expand_192_loop:
+ addi r3,r3,24
+ mr r14,r10 /* apply LS_BOX to 6th temp */
+ rotlwi r14,r14,8
+ LS_BOX(r14, r15, r4)
+ xor r14,r14,r0
+ xor r5,r5,r14 /* xor next 6 keys */
+ xor r6,r6,r5
+ xor r7,r7,r6
+ xor r8,r8,r7
+ xor r9,r9,r8
+ xor r10,r10,r9
+ stw r5,0(r3)
+ stw r6,4(r3)
+ stw r7,8(r3)
+ stw r8,12(r3)
+ subi r16,r16,1
+ cmpwi r16,0 /* last round early kick out */
+ bt eq,ppc_expand_192_end
+ stw r9,16(r3)
+ stw r10,20(r3)
+ GF8_MUL(r0, r0, r4, r14) /* multiply RCO GF8 */
+ b ppc_expand_192_loop
+ppc_expand_192_end:
+ FINALIZE_KEY
+ blr
+
+/*
+ * ppc_expand_key_256(u32 *key_enc, const u8 *key)
+ *
+ * Expand 256 bit key into 240 bytes encryption key. It consists of key
+ * itself plus 14 rounds with 16 bytes each
+ *
+ */
+_GLOBAL(ppc_expand_key_256)
+ INITIALIZE_KEY
+ LOAD_KEY(r5,r4,0)
+ LOAD_KEY(r6,r4,4)
+ LOAD_KEY(r7,r4,8)
+ LOAD_KEY(r8,r4,12)
+ LOAD_KEY(r9,r4,16)
+ LOAD_KEY(r10,r4,20)
+ LOAD_KEY(r11,r4,24)
+ LOAD_KEY(r12,r4,28)
+ stw r5,0(r3)
+ stw r6,4(r3)
+ stw r7,8(r3)
+ stw r8,12(r3)
+ stw r9,16(r3)
+ stw r10,20(r3)
+ stw r11,24(r3)
+ stw r12,28(r3)
+ li r16,7 /* 7 expansion rounds */
+ lis r0,0x0100 /* RCO(1) */
+ppc_expand_256_loop:
+ addi r3,r3,32
+ mr r14,r12 /* apply LS_BOX to 8th temp */
+ rotlwi r14,r14,8
+ LS_BOX(r14, r15, r4)
+ xor r14,r14,r0
+ xor r5,r5,r14 /* xor 4 keys */
+ xor r6,r6,r5
+ xor r7,r7,r6
+ xor r8,r8,r7
+ mr r14,r8
+ LS_BOX(r14, r15, r4) /* apply LS_BOX to 4th temp */
+ xor r9,r9,r14 /* xor 4 keys */
+ xor r10,r10,r9
+ xor r11,r11,r10
+ xor r12,r12,r11
+ stw r5,0(r3)
+ stw r6,4(r3)
+ stw r7,8(r3)
+ stw r8,12(r3)
+ subi r16,r16,1
+ cmpwi r16,0 /* last round early kick out */
+ bt eq,ppc_expand_256_end
+ stw r9,16(r3)
+ stw r10,20(r3)
+ stw r11,24(r3)
+ stw r12,28(r3)
+ GF8_MUL(r0, r0, r4, r14)
+ b ppc_expand_256_loop
+ppc_expand_256_end:
+ FINALIZE_KEY
+ blr
+
+/*
+ * ppc_generate_decrypt_key: derive decryption key from encryption key
+ * number of bytes to handle are calculated from length of key (16/24/32)
+ *
+ */
+_GLOBAL(ppc_generate_decrypt_key)
+ addi r6,r5,24
+ slwi r6,r6,2
+ lwzx r7,r4,r6 /* first/last 4 words are same */
+ stw r7,0(r3)
+ lwz r7,0(r4)
+ stwx r7,r3,r6
+ addi r6,r6,4
+ lwzx r7,r4,r6
+ stw r7,4(r3)
+ lwz r7,4(r4)
+ stwx r7,r3,r6
+ addi r6,r6,4
+ lwzx r7,r4,r6
+ stw r7,8(r3)
+ lwz r7,8(r4)
+ stwx r7,r3,r6
+ addi r6,r6,4
+ lwzx r7,r4,r6
+ stw r7,12(r3)
+ lwz r7,12(r4)
+ stwx r7,r3,r6
+ addi r3,r3,16
+ add r4,r4,r6
+ subi r4,r4,28
+ addi r5,r5,20
+ srwi r5,r5,2
+ppc_generate_decrypt_block:
+ li r6,4
+ mtctr r6
+ppc_generate_decrypt_word:
+ lwz r6,0(r4)
+ GF8_MUL(r7, r6, r0, r7)
+ GF8_MUL(r8, r7, r0, r8)
+ GF8_MUL(r9, r8, r0, r9)
+ xor r10,r9,r6
+ xor r11,r7,r8
+ xor r11,r11,r9
+ xor r12,r7,r10
+ rotrwi r12,r12,24
+ xor r11,r11,r12
+ xor r12,r8,r10
+ rotrwi r12,r12,16
+ xor r11,r11,r12
+ rotrwi r12,r10,8
+ xor r11,r11,r12
+ stw r11,0(r3)
+ addi r3,r3,4
+ addi r4,r4,4
+ bdnz ppc_generate_decrypt_word
+ subi r4,r4,32
+ subi r5,r5,1
+ cmpwi r5,0
+ bt gt,ppc_generate_decrypt_block
+ blr
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * AES modes (ECB/CBC/CTR/XTS) for PPC AES implementation
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ */
+
+#include <asm/ppc_asm.h>
+#include "aes-spe-regs.h"
+
+#ifdef __BIG_ENDIAN__ /* Macros for big endian builds */
+
+#define LOAD_DATA(reg, off) \
+ lwz reg,off(rSP); /* load with offset */
+#define SAVE_DATA(reg, off) \
+ stw reg,off(rDP); /* save with offset */
+#define NEXT_BLOCK \
+ addi rSP,rSP,16; /* increment pointers per bloc */ \
+ addi rDP,rDP,16;
+#define LOAD_IV(reg, off) \
+ lwz reg,off(rIP); /* IV loading with offset */
+#define SAVE_IV(reg, off) \
+ stw reg,off(rIP); /* IV saving with offset */
+#define START_IV /* nothing to reset */
+#define CBC_DEC 16 /* CBC decrement per block */
+#define CTR_DEC 1 /* CTR decrement one byte */
+
+#else /* Macros for little endian */
+
+#define LOAD_DATA(reg, off) \
+ lwbrx reg,0,rSP; /* load reversed */ \
+ addi rSP,rSP,4; /* and increment pointer */
+#define SAVE_DATA(reg, off) \
+ stwbrx reg,0,rDP; /* save reversed */ \
+ addi rDP,rDP,4; /* and increment pointer */
+#define NEXT_BLOCK /* nothing todo */
+#define LOAD_IV(reg, off) \
+ lwbrx reg,0,rIP; /* load reversed */ \
+ addi rIP,rIP,4; /* and increment pointer */
+#define SAVE_IV(reg, off) \
+ stwbrx reg,0,rIP; /* load reversed */ \
+ addi rIP,rIP,4; /* and increment pointer */
+#define START_IV \
+ subi rIP,rIP,16; /* must reset pointer */
+#define CBC_DEC 32 /* 2 blocks because of incs */
+#define CTR_DEC 17 /* 1 block because of incs */
+
+#endif
+
+#define SAVE_0_REGS
+#define LOAD_0_REGS
+
+#define SAVE_4_REGS \
+ stw rI0,96(r1); /* save 32 bit registers */ \
+ stw rI1,100(r1); \
+ stw rI2,104(r1); \
+ stw rI3,108(r1);
+
+#define LOAD_4_REGS \
+ lwz rI0,96(r1); /* restore 32 bit registers */ \
+ lwz rI1,100(r1); \
+ lwz rI2,104(r1); \
+ lwz rI3,108(r1);
+
+#define SAVE_8_REGS \
+ SAVE_4_REGS \
+ stw rG0,112(r1); /* save 32 bit registers */ \
+ stw rG1,116(r1); \
+ stw rG2,120(r1); \
+ stw rG3,124(r1);
+
+#define LOAD_8_REGS \
+ LOAD_4_REGS \
+ lwz rG0,112(r1); /* restore 32 bit registers */ \
+ lwz rG1,116(r1); \
+ lwz rG2,120(r1); \
+ lwz rG3,124(r1);
+
+#define INITIALIZE_CRYPT(tab,nr32bitregs) \
+ mflr r0; \
+ stwu r1,-160(r1); /* create stack frame */ \
+ lis rT0,tab@h; /* en-/decryption table pointer */ \
+ stw r0,8(r1); /* save link register */ \
+ ori rT0,rT0,tab@l; \
+ evstdw r14,16(r1); \
+ mr rKS,rKP; \
+ evstdw r15,24(r1); /* We must save non volatile */ \
+ evstdw r16,32(r1); /* registers. Take the chance */ \
+ evstdw r17,40(r1); /* and save the SPE part too */ \
+ evstdw r18,48(r1); \
+ evstdw r19,56(r1); \
+ evstdw r20,64(r1); \
+ evstdw r21,72(r1); \
+ evstdw r22,80(r1); \
+ evstdw r23,88(r1); \
+ SAVE_##nr32bitregs##_REGS
+
+#define FINALIZE_CRYPT(nr32bitregs) \
+ lwz r0,8(r1); \
+ evldw r14,16(r1); /* restore SPE registers */ \
+ evldw r15,24(r1); \
+ evldw r16,32(r1); \
+ evldw r17,40(r1); \
+ evldw r18,48(r1); \
+ evldw r19,56(r1); \
+ evldw r20,64(r1); \
+ evldw r21,72(r1); \
+ evldw r22,80(r1); \
+ evldw r23,88(r1); \
+ LOAD_##nr32bitregs##_REGS \
+ mtlr r0; /* restore link register */ \
+ xor r0,r0,r0; \
+ stw r0,16(r1); /* delete sensitive data */ \
+ stw r0,24(r1); /* that we might have pushed */ \
+ stw r0,32(r1); /* from other context that runs */ \
+ stw r0,40(r1); /* the same code */ \
+ stw r0,48(r1); \
+ stw r0,56(r1); \
+ stw r0,64(r1); \
+ stw r0,72(r1); \
+ stw r0,80(r1); \
+ stw r0,88(r1); \
+ addi r1,r1,160; /* cleanup stack frame */
+
+#define ENDIAN_SWAP(t0, t1, s0, s1) \
+ rotrwi t0,s0,8; /* swap endianness for 2 GPRs */ \
+ rotrwi t1,s1,8; \
+ rlwimi t0,s0,8,8,15; \
+ rlwimi t1,s1,8,8,15; \
+ rlwimi t0,s0,8,24,31; \
+ rlwimi t1,s1,8,24,31;
+
+#define GF128_MUL(d0, d1, d2, d3, t0) \
+ li t0,0x87; /* multiplication in GF128 */ \
+ cmpwi d3,-1; \
+ iselgt t0,0,t0; \
+ rlwimi d3,d2,0,0,0; /* propagate "carry" bits */ \
+ rotlwi d3,d3,1; \
+ rlwimi d2,d1,0,0,0; \
+ rotlwi d2,d2,1; \
+ rlwimi d1,d0,0,0,0; \
+ slwi d0,d0,1; /* shift left 128 bit */ \
+ rotlwi d1,d1,1; \
+ xor d0,d0,t0;
+
+#define START_KEY(d0, d1, d2, d3) \
+ lwz rW0,0(rKP); \
+ mtctr rRR; \
+ lwz rW1,4(rKP); \
+ lwz rW2,8(rKP); \
+ lwz rW3,12(rKP); \
+ xor rD0,d0,rW0; \
+ xor rD1,d1,rW1; \
+ xor rD2,d2,rW2; \
+ xor rD3,d3,rW3;
+
+/*
+ * ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc,
+ * u32 rounds)
+ *
+ * called from glue layer to encrypt a single 16 byte block
+ * round values are AES128 = 4, AES192 = 5, AES256 = 6
+ *
+ */
+_GLOBAL(ppc_encrypt_aes)
+ INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
+ LOAD_DATA(rD0, 0)
+ LOAD_DATA(rD1, 4)
+ LOAD_DATA(rD2, 8)
+ LOAD_DATA(rD3, 12)
+ START_KEY(rD0, rD1, rD2, rD3)
+ bl ppc_encrypt_block
+ xor rD0,rD0,rW0
+ SAVE_DATA(rD0, 0)
+ xor rD1,rD1,rW1
+ SAVE_DATA(rD1, 4)
+ xor rD2,rD2,rW2
+ SAVE_DATA(rD2, 8)
+ xor rD3,rD3,rW3
+ SAVE_DATA(rD3, 12)
+ FINALIZE_CRYPT(0)
+ blr
+
+/*
+ * ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec,
+ * u32 rounds)
+ *
+ * called from glue layer to decrypt a single 16 byte block
+ * round values are AES128 = 4, AES192 = 5, AES256 = 6
+ *
+ */
+_GLOBAL(ppc_decrypt_aes)
+ INITIALIZE_CRYPT(PPC_AES_4K_DECTAB,0)
+ LOAD_DATA(rD0, 0)
+ addi rT1,rT0,4096
+ LOAD_DATA(rD1, 4)
+ LOAD_DATA(rD2, 8)
+ LOAD_DATA(rD3, 12)
+ START_KEY(rD0, rD1, rD2, rD3)
+ bl ppc_decrypt_block
+ xor rD0,rD0,rW0
+ SAVE_DATA(rD0, 0)
+ xor rD1,rD1,rW1
+ SAVE_DATA(rD1, 4)
+ xor rD2,rD2,rW2
+ SAVE_DATA(rD2, 8)
+ xor rD3,rD3,rW3
+ SAVE_DATA(rD3, 12)
+ FINALIZE_CRYPT(0)
+ blr
+
+/*
+ * ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc,
+ * u32 rounds, u32 bytes);
+ *
+ * called from glue layer to encrypt multiple blocks via ECB
+ * Bytes must be larger or equal 16 and only whole blocks are
+ * processed. round values are AES128 = 4, AES192 = 5 and
+ * AES256 = 6
+ *
+ */
+_GLOBAL(ppc_encrypt_ecb)
+ INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
+ppc_encrypt_ecb_loop:
+ LOAD_DATA(rD0, 0)
+ mr rKP,rKS
+ LOAD_DATA(rD1, 4)
+ subi rLN,rLN,16
+ LOAD_DATA(rD2, 8)
+ cmpwi rLN,15
+ LOAD_DATA(rD3, 12)
+ START_KEY(rD0, rD1, rD2, rD3)
+ bl ppc_encrypt_block
+ xor rD0,rD0,rW0
+ SAVE_DATA(rD0, 0)
+ xor rD1,rD1,rW1
+ SAVE_DATA(rD1, 4)
+ xor rD2,rD2,rW2
+ SAVE_DATA(rD2, 8)
+ xor rD3,rD3,rW3
+ SAVE_DATA(rD3, 12)
+ NEXT_BLOCK
+ bt gt,ppc_encrypt_ecb_loop
+ FINALIZE_CRYPT(0)
+ blr
+
+/*
+ * ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec,
+ * u32 rounds, u32 bytes);
+ *
+ * called from glue layer to decrypt multiple blocks via ECB
+ * Bytes must be larger or equal 16 and only whole blocks are
+ * processed. round values are AES128 = 4, AES192 = 5 and
+ * AES256 = 6
+ *
+ */
+_GLOBAL(ppc_decrypt_ecb)
+ INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 0)
+ addi rT1,rT0,4096
+ppc_decrypt_ecb_loop:
+ LOAD_DATA(rD0, 0)
+ mr rKP,rKS
+ LOAD_DATA(rD1, 4)
+ subi rLN,rLN,16
+ LOAD_DATA(rD2, 8)
+ cmpwi rLN,15
+ LOAD_DATA(rD3, 12)
+ START_KEY(rD0, rD1, rD2, rD3)
+ bl ppc_decrypt_block
+ xor rD0,rD0,rW0
+ SAVE_DATA(rD0, 0)
+ xor rD1,rD1,rW1
+ SAVE_DATA(rD1, 4)
+ xor rD2,rD2,rW2
+ SAVE_DATA(rD2, 8)
+ xor rD3,rD3,rW3
+ SAVE_DATA(rD3, 12)
+ NEXT_BLOCK
+ bt gt,ppc_decrypt_ecb_loop
+ FINALIZE_CRYPT(0)
+ blr
+
+/*
+ * ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc,
+ * 32 rounds, u32 bytes, u8 *iv);
+ *
+ * called from glue layer to encrypt multiple blocks via CBC
+ * Bytes must be larger or equal 16 and only whole blocks are
+ * processed. round values are AES128 = 4, AES192 = 5 and
+ * AES256 = 6
+ *
+ */
+_GLOBAL(ppc_encrypt_cbc)
+ INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
+ LOAD_IV(rI0, 0)
+ LOAD_IV(rI1, 4)
+ LOAD_IV(rI2, 8)
+ LOAD_IV(rI3, 12)
+ppc_encrypt_cbc_loop:
+ LOAD_DATA(rD0, 0)
+ mr rKP,rKS
+ LOAD_DATA(rD1, 4)
+ subi rLN,rLN,16
+ LOAD_DATA(rD2, 8)
+ cmpwi rLN,15
+ LOAD_DATA(rD3, 12)
+ xor rD0,rD0,rI0
+ xor rD1,rD1,rI1
+ xor rD2,rD2,rI2
+ xor rD3,rD3,rI3
+ START_KEY(rD0, rD1, rD2, rD3)
+ bl ppc_encrypt_block
+ xor rI0,rD0,rW0
+ SAVE_DATA(rI0, 0)
+ xor rI1,rD1,rW1
+ SAVE_DATA(rI1, 4)
+ xor rI2,rD2,rW2
+ SAVE_DATA(rI2, 8)
+ xor rI3,rD3,rW3
+ SAVE_DATA(rI3, 12)
+ NEXT_BLOCK
+ bt gt,ppc_encrypt_cbc_loop
+ START_IV
+ SAVE_IV(rI0, 0)
+ SAVE_IV(rI1, 4)
+ SAVE_IV(rI2, 8)
+ SAVE_IV(rI3, 12)
+ FINALIZE_CRYPT(4)
+ blr
+
+/*
+ * ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec,
+ * u32 rounds, u32 bytes, u8 *iv);
+ *
+ * called from glue layer to decrypt multiple blocks via CBC
+ * round values are AES128 = 4, AES192 = 5, AES256 = 6
+ *
+ */
+_GLOBAL(ppc_decrypt_cbc)
+ INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 4)
+ li rT1,15
+ LOAD_IV(rI0, 0)
+ andc rLN,rLN,rT1
+ LOAD_IV(rI1, 4)
+ subi rLN,rLN,16
+ LOAD_IV(rI2, 8)
+ add rSP,rSP,rLN /* reverse processing */
+ LOAD_IV(rI3, 12)
+ add rDP,rDP,rLN
+ LOAD_DATA(rD0, 0)
+ addi rT1,rT0,4096
+ LOAD_DATA(rD1, 4)
+ LOAD_DATA(rD2, 8)
+ LOAD_DATA(rD3, 12)
+ START_IV
+ SAVE_IV(rD0, 0)
+ SAVE_IV(rD1, 4)
+ SAVE_IV(rD2, 8)
+ cmpwi rLN,16
+ SAVE_IV(rD3, 12)
+ bt lt,ppc_decrypt_cbc_end
+ppc_decrypt_cbc_loop:
+ mr rKP,rKS
+ START_KEY(rD0, rD1, rD2, rD3)
+ bl ppc_decrypt_block
+ subi rLN,rLN,16
+ subi rSP,rSP,CBC_DEC
+ xor rW0,rD0,rW0
+ LOAD_DATA(rD0, 0)
+ xor rW1,rD1,rW1
+ LOAD_DATA(rD1, 4)
+ xor rW2,rD2,rW2
+ LOAD_DATA(rD2, 8)
+ xor rW3,rD3,rW3
+ LOAD_DATA(rD3, 12)
+ xor rW0,rW0,rD0
+ SAVE_DATA(rW0, 0)
+ xor rW1,rW1,rD1
+ SAVE_DATA(rW1, 4)
+ xor rW2,rW2,rD2
+ SAVE_DATA(rW2, 8)
+ xor rW3,rW3,rD3
+ SAVE_DATA(rW3, 12)
+ cmpwi rLN,15
+ subi rDP,rDP,CBC_DEC
+ bt gt,ppc_decrypt_cbc_loop
+ppc_decrypt_cbc_end:
+ mr rKP,rKS
+ START_KEY(rD0, rD1, rD2, rD3)
+ bl ppc_decrypt_block
+ xor rW0,rW0,rD0
+ xor rW1,rW1,rD1
+ xor rW2,rW2,rD2
+ xor rW3,rW3,rD3
+ xor rW0,rW0,rI0 /* decrypt with initial IV */
+ SAVE_DATA(rW0, 0)
+ xor rW1,rW1,rI1
+ SAVE_DATA(rW1, 4)
+ xor rW2,rW2,rI2
+ SAVE_DATA(rW2, 8)
+ xor rW3,rW3,rI3
+ SAVE_DATA(rW3, 12)
+ FINALIZE_CRYPT(4)
+ blr
+
+/*
+ * ppc_crypt_ctr(u8 *out, const u8 *in, u32 *key_enc,
+ * u32 rounds, u32 bytes, u8 *iv);
+ *
+ * called from glue layer to encrypt/decrypt multiple blocks
+ * via CTR. Number of bytes does not need to be a multiple of
+ * 16. Round values are AES128 = 4, AES192 = 5, AES256 = 6
+ *
+ */
+_GLOBAL(ppc_crypt_ctr)
+ INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
+ LOAD_IV(rI0, 0)
+ LOAD_IV(rI1, 4)
+ LOAD_IV(rI2, 8)
+ cmpwi rLN,16
+ LOAD_IV(rI3, 12)
+ START_IV
+ bt lt,ppc_crypt_ctr_partial
+ppc_crypt_ctr_loop:
+ mr rKP,rKS
+ START_KEY(rI0, rI1, rI2, rI3)
+ bl ppc_encrypt_block
+ xor rW0,rD0,rW0
+ xor rW1,rD1,rW1
+ xor rW2,rD2,rW2
+ xor rW3,rD3,rW3
+ LOAD_DATA(rD0, 0)
+ subi rLN,rLN,16
+ LOAD_DATA(rD1, 4)
+ LOAD_DATA(rD2, 8)
+ LOAD_DATA(rD3, 12)
+ xor rD0,rD0,rW0
+ SAVE_DATA(rD0, 0)
+ xor rD1,rD1,rW1
+ SAVE_DATA(rD1, 4)
+ xor rD2,rD2,rW2
+ SAVE_DATA(rD2, 8)
+ xor rD3,rD3,rW3
+ SAVE_DATA(rD3, 12)
+ addic rI3,rI3,1 /* increase counter */
+ addze rI2,rI2
+ addze rI1,rI1
+ addze rI0,rI0
+ NEXT_BLOCK
+ cmpwi rLN,15
+ bt gt,ppc_crypt_ctr_loop
+ppc_crypt_ctr_partial:
+ cmpwi rLN,0
+ bt eq,ppc_crypt_ctr_end
+ mr rKP,rKS
+ START_KEY(rI0, rI1, rI2, rI3)
+ bl ppc_encrypt_block
+ xor rW0,rD0,rW0
+ SAVE_IV(rW0, 0)
+ xor rW1,rD1,rW1
+ SAVE_IV(rW1, 4)
+ xor rW2,rD2,rW2
+ SAVE_IV(rW2, 8)
+ xor rW3,rD3,rW3
+ SAVE_IV(rW3, 12)
+ mtctr rLN
+ subi rIP,rIP,CTR_DEC
+ subi rSP,rSP,1
+ subi rDP,rDP,1
+ppc_crypt_ctr_xorbyte:
+ lbzu rW4,1(rIP) /* bytewise xor for partial block */
+ lbzu rW5,1(rSP)
+ xor rW4,rW4,rW5
+ stbu rW4,1(rDP)
+ bdnz ppc_crypt_ctr_xorbyte
+ subf rIP,rLN,rIP
+ addi rIP,rIP,1
+ addic rI3,rI3,1
+ addze rI2,rI2
+ addze rI1,rI1
+ addze rI0,rI0
+ppc_crypt_ctr_end:
+ SAVE_IV(rI0, 0)
+ SAVE_IV(rI1, 4)
+ SAVE_IV(rI2, 8)
+ SAVE_IV(rI3, 12)
+ FINALIZE_CRYPT(4)
+ blr
+
+/*
+ * ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc,
+ * u32 rounds, u32 bytes, u8 *iv, u32 *key_twk);
+ *
+ * called from glue layer to encrypt multiple blocks via XTS
+ * If key_twk is given, the initial IV encryption will be
+ * processed too. Round values are AES128 = 4, AES192 = 5,
+ * AES256 = 6
+ *
+ */
+_GLOBAL(ppc_encrypt_xts)
+ INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 8)
+ LOAD_IV(rI0, 0)
+ LOAD_IV(rI1, 4)
+ LOAD_IV(rI2, 8)
+ cmpwi rKT,0
+ LOAD_IV(rI3, 12)
+ bt eq,ppc_encrypt_xts_notweak
+ mr rKP,rKT
+ START_KEY(rI0, rI1, rI2, rI3)
+ bl ppc_encrypt_block
+ xor rI0,rD0,rW0
+ xor rI1,rD1,rW1
+ xor rI2,rD2,rW2
+ xor rI3,rD3,rW3
+ppc_encrypt_xts_notweak:
+ ENDIAN_SWAP(rG0, rG1, rI0, rI1)
+ ENDIAN_SWAP(rG2, rG3, rI2, rI3)
+ppc_encrypt_xts_loop:
+ LOAD_DATA(rD0, 0)
+ mr rKP,rKS
+ LOAD_DATA(rD1, 4)
+ subi rLN,rLN,16
+ LOAD_DATA(rD2, 8)
+ LOAD_DATA(rD3, 12)
+ xor rD0,rD0,rI0
+ xor rD1,rD1,rI1
+ xor rD2,rD2,rI2
+ xor rD3,rD3,rI3
+ START_KEY(rD0, rD1, rD2, rD3)
+ bl ppc_encrypt_block
+ xor rD0,rD0,rW0
+ xor rD1,rD1,rW1
+ xor rD2,rD2,rW2
+ xor rD3,rD3,rW3
+ xor rD0,rD0,rI0
+ SAVE_DATA(rD0, 0)
+ xor rD1,rD1,rI1
+ SAVE_DATA(rD1, 4)
+ xor rD2,rD2,rI2
+ SAVE_DATA(rD2, 8)
+ xor rD3,rD3,rI3
+ SAVE_DATA(rD3, 12)
+ GF128_MUL(rG0, rG1, rG2, rG3, rW0)
+ ENDIAN_SWAP(rI0, rI1, rG0, rG1)
+ ENDIAN_SWAP(rI2, rI3, rG2, rG3)
+ cmpwi rLN,0
+ NEXT_BLOCK
+ bt gt,ppc_encrypt_xts_loop
+ START_IV
+ SAVE_IV(rI0, 0)
+ SAVE_IV(rI1, 4)
+ SAVE_IV(rI2, 8)
+ SAVE_IV(rI3, 12)
+ FINALIZE_CRYPT(8)
+ blr
+
+/*
+ * ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec,
+ * u32 rounds, u32 blocks, u8 *iv, u32 *key_twk);
+ *
+ * called from glue layer to decrypt multiple blocks via XTS
+ * If key_twk is given, the initial IV encryption will be
+ * processed too. Round values are AES128 = 4, AES192 = 5,
+ * AES256 = 6
+ *
+ */
+_GLOBAL(ppc_decrypt_xts)
+ INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 8)
+ LOAD_IV(rI0, 0)
+ addi rT1,rT0,4096
+ LOAD_IV(rI1, 4)
+ LOAD_IV(rI2, 8)
+ cmpwi rKT,0
+ LOAD_IV(rI3, 12)
+ bt eq,ppc_decrypt_xts_notweak
+ subi rT0,rT0,4096
+ mr rKP,rKT
+ START_KEY(rI0, rI1, rI2, rI3)
+ bl ppc_encrypt_block
+ xor rI0,rD0,rW0
+ xor rI1,rD1,rW1
+ xor rI2,rD2,rW2
+ xor rI3,rD3,rW3
+ addi rT0,rT0,4096
+ppc_decrypt_xts_notweak:
+ ENDIAN_SWAP(rG0, rG1, rI0, rI1)
+ ENDIAN_SWAP(rG2, rG3, rI2, rI3)
+ppc_decrypt_xts_loop:
+ LOAD_DATA(rD0, 0)
+ mr rKP,rKS
+ LOAD_DATA(rD1, 4)
+ subi rLN,rLN,16
+ LOAD_DATA(rD2, 8)
+ LOAD_DATA(rD3, 12)
+ xor rD0,rD0,rI0
+ xor rD1,rD1,rI1
+ xor rD2,rD2,rI2
+ xor rD3,rD3,rI3
+ START_KEY(rD0, rD1, rD2, rD3)
+ bl ppc_decrypt_block
+ xor rD0,rD0,rW0
+ xor rD1,rD1,rW1
+ xor rD2,rD2,rW2
+ xor rD3,rD3,rW3
+ xor rD0,rD0,rI0
+ SAVE_DATA(rD0, 0)
+ xor rD1,rD1,rI1
+ SAVE_DATA(rD1, 4)
+ xor rD2,rD2,rI2
+ SAVE_DATA(rD2, 8)
+ xor rD3,rD3,rI3
+ SAVE_DATA(rD3, 12)
+ GF128_MUL(rG0, rG1, rG2, rG3, rW0)
+ ENDIAN_SWAP(rI0, rI1, rG0, rG1)
+ ENDIAN_SWAP(rI2, rI3, rG2, rG3)
+ cmpwi rLN,0
+ NEXT_BLOCK
+ bt gt,ppc_decrypt_xts_loop
+ START_IV
+ SAVE_IV(rI0, 0)
+ SAVE_IV(rI1, 4)
+ SAVE_IV(rI2, 8)
+ SAVE_IV(rI3, 12)
+ FINALIZE_CRYPT(8)
+ blr
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Common registers for PPC AES implementation
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ */
+
+#define rKS r0 /* copy of en-/decryption key pointer */
+#define rDP r3 /* destination pointer */
+#define rSP r4 /* source pointer */
+#define rKP r5 /* pointer to en-/decryption key pointer */
+#define rRR r6 /* en-/decryption rounds */
+#define rLN r7 /* length of data to be processed */
+#define rIP r8 /* potiner to IV (CBC/CTR/XTS modes) */
+#define rKT r9 /* pointer to tweak key (XTS mode) */
+#define rT0 r11 /* pointers to en-/decryption tables */
+#define rT1 r10
+#define rD0 r9 /* data */
+#define rD1 r14
+#define rD2 r12
+#define rD3 r15
+#define rW0 r16 /* working registers */
+#define rW1 r17
+#define rW2 r18
+#define rW3 r19
+#define rW4 r20
+#define rW5 r21
+#define rW6 r22
+#define rW7 r23
+#define rI0 r24 /* IV */
+#define rI1 r25
+#define rI2 r26
+#define rI3 r27
+#define rG0 r28 /* endian reversed tweak (XTS mode) */
+#define rG1 r29
+#define rG2 r30
+#define rG3 r31
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * 4K AES tables for PPC AES implementation
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ */
+
+/*
+ * These big endian AES encryption/decryption tables have been taken from
+ * crypto/aes_generic.c and are designed to be simply accessed by a combination
+ * of rlwimi/lwz instructions with a minimum of table registers (usually only
+ * one required). Thus they are aligned to 4K. The locality of rotated values
+ * is derived from the reduced offsets that are available in the SPE load
+ * instructions. E.g. evldw, evlwwsplat, ...
+ *
+ * For the safety-conscious it has to be noted that they might be vulnerable
+ * to cache timing attacks because of their size. Nevertheless in contrast to
+ * the generic tables they have been reduced from 16KB to 8KB + 256 bytes.
+ * This is a quite good tradeoff for low power devices (e.g. routers) without
+ * dedicated encryption hardware where we usually have no multiuser
+ * environment.
+ *
+ */
+
+#define R(a, b, c, d) \
+ 0x##a##b##c##d, 0x##d##a##b##c, 0x##c##d##a##b, 0x##b##c##d##a
+
+.data
+.align 12
+.globl PPC_AES_4K_ENCTAB
+PPC_AES_4K_ENCTAB:
+/* encryption table, same as crypto_ft_tab in crypto/aes-generic.c */
+ .long R(c6, 63, 63, a5), R(f8, 7c, 7c, 84)
+ .long R(ee, 77, 77, 99), R(f6, 7b, 7b, 8d)
+ .long R(ff, f2, f2, 0d), R(d6, 6b, 6b, bd)
+ .long R(de, 6f, 6f, b1), R(91, c5, c5, 54)
+ .long R(60, 30, 30, 50), R(02, 01, 01, 03)
+ .long R(ce, 67, 67, a9), R(56, 2b, 2b, 7d)
+ .long R(e7, fe, fe, 19), R(b5, d7, d7, 62)
+ .long R(4d, ab, ab, e6), R(ec, 76, 76, 9a)
+ .long R(8f, ca, ca, 45), R(1f, 82, 82, 9d)
+ .long R(89, c9, c9, 40), R(fa, 7d, 7d, 87)
+ .long R(ef, fa, fa, 15), R(b2, 59, 59, eb)
+ .long R(8e, 47, 47, c9), R(fb, f0, f0, 0b)
+ .long R(41, ad, ad, ec), R(b3, d4, d4, 67)
+ .long R(5f, a2, a2, fd), R(45, af, af, ea)
+ .long R(23, 9c, 9c, bf), R(53, a4, a4, f7)
+ .long R(e4, 72, 72, 96), R(9b, c0, c0, 5b)
+ .long R(75, b7, b7, c2), R(e1, fd, fd, 1c)
+ .long R(3d, 93, 93, ae), R(4c, 26, 26, 6a)
+ .long R(6c, 36, 36, 5a), R(7e, 3f, 3f, 41)
+ .long R(f5, f7, f7, 02), R(83, cc, cc, 4f)
+ .long R(68, 34, 34, 5c), R(51, a5, a5, f4)
+ .long R(d1, e5, e5, 34), R(f9, f1, f1, 08)
+ .long R(e2, 71, 71, 93), R(ab, d8, d8, 73)
+ .long R(62, 31, 31, 53), R(2a, 15, 15, 3f)
+ .long R(08, 04, 04, 0c), R(95, c7, c7, 52)
+ .long R(46, 23, 23, 65), R(9d, c3, c3, 5e)
+ .long R(30, 18, 18, 28), R(37, 96, 96, a1)
+ .long R(0a, 05, 05, 0f), R(2f, 9a, 9a, b5)
+ .long R(0e, 07, 07, 09), R(24, 12, 12, 36)
+ .long R(1b, 80, 80, 9b), R(df, e2, e2, 3d)
+ .long R(cd, eb, eb, 26), R(4e, 27, 27, 69)
+ .long R(7f, b2, b2, cd), R(ea, 75, 75, 9f)
+ .long R(12, 09, 09, 1b), R(1d, 83, 83, 9e)
+ .long R(58, 2c, 2c, 74), R(34, 1a, 1a, 2e)
+ .long R(36, 1b, 1b, 2d), R(dc, 6e, 6e, b2)
+ .long R(b4, 5a, 5a, ee), R(5b, a0, a0, fb)
+ .long R(a4, 52, 52, f6), R(76, 3b, 3b, 4d)
+ .long R(b7, d6, d6, 61), R(7d, b3, b3, ce)
+ .long R(52, 29, 29, 7b), R(dd, e3, e3, 3e)
+ .long R(5e, 2f, 2f, 71), R(13, 84, 84, 97)
+ .long R(a6, 53, 53, f5), R(b9, d1, d1, 68)
+ .long R(00, 00, 00, 00), R(c1, ed, ed, 2c)
+ .long R(40, 20, 20, 60), R(e3, fc, fc, 1f)
+ .long R(79, b1, b1, c8), R(b6, 5b, 5b, ed)
+ .long R(d4, 6a, 6a, be), R(8d, cb, cb, 46)
+ .long R(67, be, be, d9), R(72, 39, 39, 4b)
+ .long R(94, 4a, 4a, de), R(98, 4c, 4c, d4)
+ .long R(b0, 58, 58, e8), R(85, cf, cf, 4a)
+ .long R(bb, d0, d0, 6b), R(c5, ef, ef, 2a)
+ .long R(4f, aa, aa, e5), R(ed, fb, fb, 16)
+ .long R(86, 43, 43, c5), R(9a, 4d, 4d, d7)
+ .long R(66, 33, 33, 55), R(11, 85, 85, 94)
+ .long R(8a, 45, 45, cf), R(e9, f9, f9, 10)
+ .long R(04, 02, 02, 06), R(fe, 7f, 7f, 81)
+ .long R(a0, 50, 50, f0), R(78, 3c, 3c, 44)
+ .long R(25, 9f, 9f, ba), R(4b, a8, a8, e3)
+ .long R(a2, 51, 51, f3), R(5d, a3, a3, fe)
+ .long R(80, 40, 40, c0), R(05, 8f, 8f, 8a)
+ .long R(3f, 92, 92, ad), R(21, 9d, 9d, bc)
+ .long R(70, 38, 38, 48), R(f1, f5, f5, 04)
+ .long R(63, bc, bc, df), R(77, b6, b6, c1)
+ .long R(af, da, da, 75), R(42, 21, 21, 63)
+ .long R(20, 10, 10, 30), R(e5, ff, ff, 1a)
+ .long R(fd, f3, f3, 0e), R(bf, d2, d2, 6d)
+ .long R(81, cd, cd, 4c), R(18, 0c, 0c, 14)
+ .long R(26, 13, 13, 35), R(c3, ec, ec, 2f)
+ .long R(be, 5f, 5f, e1), R(35, 97, 97, a2)
+ .long R(88, 44, 44, cc), R(2e, 17, 17, 39)
+ .long R(93, c4, c4, 57), R(55, a7, a7, f2)
+ .long R(fc, 7e, 7e, 82), R(7a, 3d, 3d, 47)
+ .long R(c8, 64, 64, ac), R(ba, 5d, 5d, e7)
+ .long R(32, 19, 19, 2b), R(e6, 73, 73, 95)
+ .long R(c0, 60, 60, a0), R(19, 81, 81, 98)
+ .long R(9e, 4f, 4f, d1), R(a3, dc, dc, 7f)
+ .long R(44, 22, 22, 66), R(54, 2a, 2a, 7e)
+ .long R(3b, 90, 90, ab), R(0b, 88, 88, 83)
+ .long R(8c, 46, 46, ca), R(c7, ee, ee, 29)
+ .long R(6b, b8, b8, d3), R(28, 14, 14, 3c)
+ .long R(a7, de, de, 79), R(bc, 5e, 5e, e2)
+ .long R(16, 0b, 0b, 1d), R(ad, db, db, 76)
+ .long R(db, e0, e0, 3b), R(64, 32, 32, 56)
+ .long R(74, 3a, 3a, 4e), R(14, 0a, 0a, 1e)
+ .long R(92, 49, 49, db), R(0c, 06, 06, 0a)
+ .long R(48, 24, 24, 6c), R(b8, 5c, 5c, e4)
+ .long R(9f, c2, c2, 5d), R(bd, d3, d3, 6e)
+ .long R(43, ac, ac, ef), R(c4, 62, 62, a6)
+ .long R(39, 91, 91, a8), R(31, 95, 95, a4)
+ .long R(d3, e4, e4, 37), R(f2, 79, 79, 8b)
+ .long R(d5, e7, e7, 32), R(8b, c8, c8, 43)
+ .long R(6e, 37, 37, 59), R(da, 6d, 6d, b7)
+ .long R(01, 8d, 8d, 8c), R(b1, d5, d5, 64)
+ .long R(9c, 4e, 4e, d2), R(49, a9, a9, e0)
+ .long R(d8, 6c, 6c, b4), R(ac, 56, 56, fa)
+ .long R(f3, f4, f4, 07), R(cf, ea, ea, 25)
+ .long R(ca, 65, 65, af), R(f4, 7a, 7a, 8e)
+ .long R(47, ae, ae, e9), R(10, 08, 08, 18)
+ .long R(6f, ba, ba, d5), R(f0, 78, 78, 88)
+ .long R(4a, 25, 25, 6f), R(5c, 2e, 2e, 72)
+ .long R(38, 1c, 1c, 24), R(57, a6, a6, f1)
+ .long R(73, b4, b4, c7), R(97, c6, c6, 51)
+ .long R(cb, e8, e8, 23), R(a1, dd, dd, 7c)
+ .long R(e8, 74, 74, 9c), R(3e, 1f, 1f, 21)
+ .long R(96, 4b, 4b, dd), R(61, bd, bd, dc)
+ .long R(0d, 8b, 8b, 86), R(0f, 8a, 8a, 85)
+ .long R(e0, 70, 70, 90), R(7c, 3e, 3e, 42)
+ .long R(71, b5, b5, c4), R(cc, 66, 66, aa)
+ .long R(90, 48, 48, d8), R(06, 03, 03, 05)
+ .long R(f7, f6, f6, 01), R(1c, 0e, 0e, 12)
+ .long R(c2, 61, 61, a3), R(6a, 35, 35, 5f)
+ .long R(ae, 57, 57, f9), R(69, b9, b9, d0)
+ .long R(17, 86, 86, 91), R(99, c1, c1, 58)
+ .long R(3a, 1d, 1d, 27), R(27, 9e, 9e, b9)
+ .long R(d9, e1, e1, 38), R(eb, f8, f8, 13)
+ .long R(2b, 98, 98, b3), R(22, 11, 11, 33)
+ .long R(d2, 69, 69, bb), R(a9, d9, d9, 70)
+ .long R(07, 8e, 8e, 89), R(33, 94, 94, a7)
+ .long R(2d, 9b, 9b, b6), R(3c, 1e, 1e, 22)
+ .long R(15, 87, 87, 92), R(c9, e9, e9, 20)
+ .long R(87, ce, ce, 49), R(aa, 55, 55, ff)
+ .long R(50, 28, 28, 78), R(a5, df, df, 7a)
+ .long R(03, 8c, 8c, 8f), R(59, a1, a1, f8)
+ .long R(09, 89, 89, 80), R(1a, 0d, 0d, 17)
+ .long R(65, bf, bf, da), R(d7, e6, e6, 31)
+ .long R(84, 42, 42, c6), R(d0, 68, 68, b8)
+ .long R(82, 41, 41, c3), R(29, 99, 99, b0)
+ .long R(5a, 2d, 2d, 77), R(1e, 0f, 0f, 11)
+ .long R(7b, b0, b0, cb), R(a8, 54, 54, fc)
+ .long R(6d, bb, bb, d6), R(2c, 16, 16, 3a)
+.globl PPC_AES_4K_DECTAB
+PPC_AES_4K_DECTAB:
+/* decryption table, same as crypto_it_tab in crypto/aes-generic.c */
+ .long R(51, f4, a7, 50), R(7e, 41, 65, 53)
+ .long R(1a, 17, a4, c3), R(3a, 27, 5e, 96)
+ .long R(3b, ab, 6b, cb), R(1f, 9d, 45, f1)
+ .long R(ac, fa, 58, ab), R(4b, e3, 03, 93)
+ .long R(20, 30, fa, 55), R(ad, 76, 6d, f6)
+ .long R(88, cc, 76, 91), R(f5, 02, 4c, 25)
+ .long R(4f, e5, d7, fc), R(c5, 2a, cb, d7)
+ .long R(26, 35, 44, 80), R(b5, 62, a3, 8f)
+ .long R(de, b1, 5a, 49), R(25, ba, 1b, 67)
+ .long R(45, ea, 0e, 98), R(5d, fe, c0, e1)
+ .long R(c3, 2f, 75, 02), R(81, 4c, f0, 12)
+ .long R(8d, 46, 97, a3), R(6b, d3, f9, c6)
+ .long R(03, 8f, 5f, e7), R(15, 92, 9c, 95)
+ .long R(bf, 6d, 7a, eb), R(95, 52, 59, da)
+ .long R(d4, be, 83, 2d), R(58, 74, 21, d3)
+ .long R(49, e0, 69, 29), R(8e, c9, c8, 44)
+ .long R(75, c2, 89, 6a), R(f4, 8e, 79, 78)
+ .long R(99, 58, 3e, 6b), R(27, b9, 71, dd)
+ .long R(be, e1, 4f, b6), R(f0, 88, ad, 17)
+ .long R(c9, 20, ac, 66), R(7d, ce, 3a, b4)
+ .long R(63, df, 4a, 18), R(e5, 1a, 31, 82)
+ .long R(97, 51, 33, 60), R(62, 53, 7f, 45)
+ .long R(b1, 64, 77, e0), R(bb, 6b, ae, 84)
+ .long R(fe, 81, a0, 1c), R(f9, 08, 2b, 94)
+ .long R(70, 48, 68, 58), R(8f, 45, fd, 19)
+ .long R(94, de, 6c, 87), R(52, 7b, f8, b7)
+ .long R(ab, 73, d3, 23), R(72, 4b, 02, e2)
+ .long R(e3, 1f, 8f, 57), R(66, 55, ab, 2a)
+ .long R(b2, eb, 28, 07), R(2f, b5, c2, 03)
+ .long R(86, c5, 7b, 9a), R(d3, 37, 08, a5)
+ .long R(30, 28, 87, f2), R(23, bf, a5, b2)
+ .long R(02, 03, 6a, ba), R(ed, 16, 82, 5c)
+ .long R(8a, cf, 1c, 2b), R(a7, 79, b4, 92)
+ .long R(f3, 07, f2, f0), R(4e, 69, e2, a1)
+ .long R(65, da, f4, cd), R(06, 05, be, d5)
+ .long R(d1, 34, 62, 1f), R(c4, a6, fe, 8a)
+ .long R(34, 2e, 53, 9d), R(a2, f3, 55, a0)
+ .long R(05, 8a, e1, 32), R(a4, f6, eb, 75)
+ .long R(0b, 83, ec, 39), R(40, 60, ef, aa)
+ .long R(5e, 71, 9f, 06), R(bd, 6e, 10, 51)
+ .long R(3e, 21, 8a, f9), R(96, dd, 06, 3d)
+ .long R(dd, 3e, 05, ae), R(4d, e6, bd, 46)
+ .long R(91, 54, 8d, b5), R(71, c4, 5d, 05)
+ .long R(04, 06, d4, 6f), R(60, 50, 15, ff)
+ .long R(19, 98, fb, 24), R(d6, bd, e9, 97)
+ .long R(89, 40, 43, cc), R(67, d9, 9e, 77)
+ .long R(b0, e8, 42, bd), R(07, 89, 8b, 88)
+ .long R(e7, 19, 5b, 38), R(79, c8, ee, db)
+ .long R(a1, 7c, 0a, 47), R(7c, 42, 0f, e9)
+ .long R(f8, 84, 1e, c9), R(00, 00, 00, 00)
+ .long R(09, 80, 86, 83), R(32, 2b, ed, 48)
+ .long R(1e, 11, 70, ac), R(6c, 5a, 72, 4e)
+ .long R(fd, 0e, ff, fb), R(0f, 85, 38, 56)
+ .long R(3d, ae, d5, 1e), R(36, 2d, 39, 27)
+ .long R(0a, 0f, d9, 64), R(68, 5c, a6, 21)
+ .long R(9b, 5b, 54, d1), R(24, 36, 2e, 3a)
+ .long R(0c, 0a, 67, b1), R(93, 57, e7, 0f)
+ .long R(b4, ee, 96, d2), R(1b, 9b, 91, 9e)
+ .long R(80, c0, c5, 4f), R(61, dc, 20, a2)
+ .long R(5a, 77, 4b, 69), R(1c, 12, 1a, 16)
+ .long R(e2, 93, ba, 0a), R(c0, a0, 2a, e5)
+ .long R(3c, 22, e0, 43), R(12, 1b, 17, 1d)
+ .long R(0e, 09, 0d, 0b), R(f2, 8b, c7, ad)
+ .long R(2d, b6, a8, b9), R(14, 1e, a9, c8)
+ .long R(57, f1, 19, 85), R(af, 75, 07, 4c)
+ .long R(ee, 99, dd, bb), R(a3, 7f, 60, fd)
+ .long R(f7, 01, 26, 9f), R(5c, 72, f5, bc)
+ .long R(44, 66, 3b, c5), R(5b, fb, 7e, 34)
+ .long R(8b, 43, 29, 76), R(cb, 23, c6, dc)
+ .long R(b6, ed, fc, 68), R(b8, e4, f1, 63)
+ .long R(d7, 31, dc, ca), R(42, 63, 85, 10)
+ .long R(13, 97, 22, 40), R(84, c6, 11, 20)
+ .long R(85, 4a, 24, 7d), R(d2, bb, 3d, f8)
+ .long R(ae, f9, 32, 11), R(c7, 29, a1, 6d)
+ .long R(1d, 9e, 2f, 4b), R(dc, b2, 30, f3)
+ .long R(0d, 86, 52, ec), R(77, c1, e3, d0)
+ .long R(2b, b3, 16, 6c), R(a9, 70, b9, 99)
+ .long R(11, 94, 48, fa), R(47, e9, 64, 22)
+ .long R(a8, fc, 8c, c4), R(a0, f0, 3f, 1a)
+ .long R(56, 7d, 2c, d8), R(22, 33, 90, ef)
+ .long R(87, 49, 4e, c7), R(d9, 38, d1, c1)
+ .long R(8c, ca, a2, fe), R(98, d4, 0b, 36)
+ .long R(a6, f5, 81, cf), R(a5, 7a, de, 28)
+ .long R(da, b7, 8e, 26), R(3f, ad, bf, a4)
+ .long R(2c, 3a, 9d, e4), R(50, 78, 92, 0d)
+ .long R(6a, 5f, cc, 9b), R(54, 7e, 46, 62)
+ .long R(f6, 8d, 13, c2), R(90, d8, b8, e8)
+ .long R(2e, 39, f7, 5e), R(82, c3, af, f5)
+ .long R(9f, 5d, 80, be), R(69, d0, 93, 7c)
+ .long R(6f, d5, 2d, a9), R(cf, 25, 12, b3)
+ .long R(c8, ac, 99, 3b), R(10, 18, 7d, a7)
+ .long R(e8, 9c, 63, 6e), R(db, 3b, bb, 7b)
+ .long R(cd, 26, 78, 09), R(6e, 59, 18, f4)
+ .long R(ec, 9a, b7, 01), R(83, 4f, 9a, a8)
+ .long R(e6, 95, 6e, 65), R(aa, ff, e6, 7e)
+ .long R(21, bc, cf, 08), R(ef, 15, e8, e6)
+ .long R(ba, e7, 9b, d9), R(4a, 6f, 36, ce)
+ .long R(ea, 9f, 09, d4), R(29, b0, 7c, d6)
+ .long R(31, a4, b2, af), R(2a, 3f, 23, 31)
+ .long R(c6, a5, 94, 30), R(35, a2, 66, c0)
+ .long R(74, 4e, bc, 37), R(fc, 82, ca, a6)
+ .long R(e0, 90, d0, b0), R(33, a7, d8, 15)
+ .long R(f1, 04, 98, 4a), R(41, ec, da, f7)
+ .long R(7f, cd, 50, 0e), R(17, 91, f6, 2f)
+ .long R(76, 4d, d6, 8d), R(43, ef, b0, 4d)
+ .long R(cc, aa, 4d, 54), R(e4, 96, 04, df)
+ .long R(9e, d1, b5, e3), R(4c, 6a, 88, 1b)
+ .long R(c1, 2c, 1f, b8), R(46, 65, 51, 7f)
+ .long R(9d, 5e, ea, 04), R(01, 8c, 35, 5d)
+ .long R(fa, 87, 74, 73), R(fb, 0b, 41, 2e)
+ .long R(b3, 67, 1d, 5a), R(92, db, d2, 52)
+ .long R(e9, 10, 56, 33), R(6d, d6, 47, 13)
+ .long R(9a, d7, 61, 8c), R(37, a1, 0c, 7a)
+ .long R(59, f8, 14, 8e), R(eb, 13, 3c, 89)
+ .long R(ce, a9, 27, ee), R(b7, 61, c9, 35)
+ .long R(e1, 1c, e5, ed), R(7a, 47, b1, 3c)
+ .long R(9c, d2, df, 59), R(55, f2, 73, 3f)
+ .long R(18, 14, ce, 79), R(73, c7, 37, bf)
+ .long R(53, f7, cd, ea), R(5f, fd, aa, 5b)
+ .long R(df, 3d, 6f, 14), R(78, 44, db, 86)
+ .long R(ca, af, f3, 81), R(b9, 68, c4, 3e)
+ .long R(38, 24, 34, 2c), R(c2, a3, 40, 5f)
+ .long R(16, 1d, c3, 72), R(bc, e2, 25, 0c)
+ .long R(28, 3c, 49, 8b), R(ff, 0d, 95, 41)
+ .long R(39, a8, 01, 71), R(08, 0c, b3, de)
+ .long R(d8, b4, e4, 9c), R(64, 56, c1, 90)
+ .long R(7b, cb, 84, 61), R(d5, 32, b6, 70)
+ .long R(48, 6c, 5c, 74), R(d0, b8, 57, 42)
+.globl PPC_AES_4K_DECTAB2
+PPC_AES_4K_DECTAB2:
+/* decryption table, same as crypto_il_tab in crypto/aes-generic.c */
+ .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+ .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+ .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+ .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+ .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+ .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+ .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+ .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+ .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+ .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+ .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+ .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+ .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+ .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+ .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+ .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+ .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+ .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+ .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+ .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+ .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+ .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+ .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+ .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+ .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+ .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+ .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+ .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+ .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+ .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+ .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+ .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ * Copyright 2026 Google LLC
+ */
+#include <asm/simd.h>
+#include <asm/switch_to.h>
+#include <linux/cpufeature.h>
+#include <linux/jump_label.h>
+#include <linux/preempt.h>
+#include <linux/uaccess.h>
+
+EXPORT_SYMBOL_GPL(ppc_expand_key_128);
+EXPORT_SYMBOL_GPL(ppc_expand_key_192);
+EXPORT_SYMBOL_GPL(ppc_expand_key_256);
+EXPORT_SYMBOL_GPL(ppc_generate_decrypt_key);
+EXPORT_SYMBOL_GPL(ppc_encrypt_ecb);
+EXPORT_SYMBOL_GPL(ppc_decrypt_ecb);
+EXPORT_SYMBOL_GPL(ppc_encrypt_cbc);
+EXPORT_SYMBOL_GPL(ppc_decrypt_cbc);
+EXPORT_SYMBOL_GPL(ppc_crypt_ctr);
+EXPORT_SYMBOL_GPL(ppc_encrypt_xts);
+EXPORT_SYMBOL_GPL(ppc_decrypt_xts);
+
+void ppc_encrypt_aes(u8 *out, const u8 *in, const u32 *key_enc, u32 rounds);
+void ppc_decrypt_aes(u8 *out, const u8 *in, const u32 *key_dec, u32 rounds);
+
+static void spe_begin(void)
+{
+ /* disable preemption and save users SPE registers if required */
+ preempt_disable();
+ enable_kernel_spe();
+}
+
+static void spe_end(void)
+{
+ disable_kernel_spe();
+ /* reenable preemption */
+ preempt_enable();
+}
+
+static void aes_preparekey_arch(union aes_enckey_arch *k,
+ union aes_invkey_arch *inv_k,
+ const u8 *in_key, int key_len, int nrounds)
+{
+ if (key_len == AES_KEYSIZE_128)
+ ppc_expand_key_128(k->spe_enc_key, in_key);
+ else if (key_len == AES_KEYSIZE_192)
+ ppc_expand_key_192(k->spe_enc_key, in_key);
+ else
+ ppc_expand_key_256(k->spe_enc_key, in_key);
+
+ if (inv_k)
+ ppc_generate_decrypt_key(inv_k->spe_dec_key, k->spe_enc_key,
+ key_len);
+}
+
+static void aes_encrypt_arch(const struct aes_enckey *key,
+ u8 out[AES_BLOCK_SIZE],
+ const u8 in[AES_BLOCK_SIZE])
+{
+ spe_begin();
+ ppc_encrypt_aes(out, in, key->k.spe_enc_key, key->nrounds / 2 - 1);
+ spe_end();
+}
+
+static void aes_decrypt_arch(const struct aes_key *key,
+ u8 out[AES_BLOCK_SIZE],
+ const u8 in[AES_BLOCK_SIZE])
+{
+ spe_begin();
+ ppc_decrypt_aes(out, in, key->inv_k.spe_dec_key, key->nrounds / 2 - 1);
+ spe_end();
+}