Architecture: arm64 using:
- ARMv8.2 Crypto Extensions
-config CRYPTO_AES_ARM64
- tristate "Ciphers: AES, modes: ECB, CBC, CTR, CTS, XCTR, XTS"
- select CRYPTO_AES
- help
- Block ciphers: AES cipher algorithms (FIPS-197)
- Length-preserving ciphers: AES with ECB, CBC, CTR, CTS,
- XCTR, and XTS modes
- AEAD cipher: AES with CBC, ESSIV, and SHA-256
- for fscrypt and dm-crypt
-
- Architecture: arm64
-
-config CRYPTO_AES_ARM64_CE
- tristate "Ciphers: AES (ARMv8 Crypto Extensions)"
- depends on KERNEL_MODE_NEON
- select CRYPTO_ALGAPI
- select CRYPTO_LIB_AES
- help
- Block ciphers: AES cipher algorithms (FIPS-197)
-
- Architecture: arm64 using:
- - ARMv8 Crypto Extensions
-
config CRYPTO_AES_ARM64_CE_BLK
tristate "Ciphers: AES, modes: ECB/CBC/CTR/XTS (ARMv8 Crypto Extensions)"
depends on KERNEL_MODE_NEON
select CRYPTO_SKCIPHER
- select CRYPTO_AES_ARM64_CE
+ select CRYPTO_LIB_AES
select CRYPTO_LIB_SHA256
help
Length-preserving ciphers: AES cipher algorithms (FIPS-197)
tristate "AEAD cipher: AES in CCM mode (ARMv8 Crypto Extensions)"
depends on KERNEL_MODE_NEON
select CRYPTO_ALGAPI
- select CRYPTO_AES_ARM64_CE
select CRYPTO_AES_ARM64_CE_BLK
select CRYPTO_AEAD
select CRYPTO_LIB_AES
obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
-obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o
-aes-ce-cipher-y := aes-ce-core.o aes-ce-glue.o
-
obj-$(CONFIG_CRYPTO_AES_ARM64_CE_CCM) += aes-ce-ccm.o
aes-ce-ccm-y := aes-ce-ccm-glue.o aes-ce-ccm-core.o
obj-$(CONFIG_CRYPTO_AES_ARM64_NEON_BLK) += aes-neon-blk.o
aes-neon-blk-y := aes-glue-neon.o aes-neon.o
-obj-$(CONFIG_CRYPTO_AES_ARM64) += aes-arm64.o
-aes-arm64-y := aes-cipher-core.o aes-cipher-glue.o
-
obj-$(CONFIG_CRYPTO_AES_ARM64_BS) += aes-neon-bs.o
aes-neon-bs-y := aes-neonbs-core.o aes-neonbs-glue.o
#include <asm/simd.h>
-#include "aes-ce-setkey.h"
-
MODULE_IMPORT_NS("CRYPTO_INTERNAL");
static int num_rounds(struct crypto_aes_ctx *ctx)
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
- .arch armv8-a+crypto
-
-SYM_FUNC_START(__aes_ce_encrypt)
- sub w3, w3, #2
- ld1 {v0.16b}, [x2]
- ld1 {v1.4s}, [x0], #16
- cmp w3, #10
- bmi 0f
- bne 3f
- mov v3.16b, v1.16b
- b 2f
-0: mov v2.16b, v1.16b
- ld1 {v3.4s}, [x0], #16
-1: aese v0.16b, v2.16b
- aesmc v0.16b, v0.16b
-2: ld1 {v1.4s}, [x0], #16
- aese v0.16b, v3.16b
- aesmc v0.16b, v0.16b
-3: ld1 {v2.4s}, [x0], #16
- subs w3, w3, #3
- aese v0.16b, v1.16b
- aesmc v0.16b, v0.16b
- ld1 {v3.4s}, [x0], #16
- bpl 1b
- aese v0.16b, v2.16b
- eor v0.16b, v0.16b, v3.16b
- st1 {v0.16b}, [x1]
- ret
-SYM_FUNC_END(__aes_ce_encrypt)
-
-SYM_FUNC_START(__aes_ce_decrypt)
- sub w3, w3, #2
- ld1 {v0.16b}, [x2]
- ld1 {v1.4s}, [x0], #16
- cmp w3, #10
- bmi 0f
- bne 3f
- mov v3.16b, v1.16b
- b 2f
-0: mov v2.16b, v1.16b
- ld1 {v3.4s}, [x0], #16
-1: aesd v0.16b, v2.16b
- aesimc v0.16b, v0.16b
-2: ld1 {v1.4s}, [x0], #16
- aesd v0.16b, v3.16b
- aesimc v0.16b, v0.16b
-3: ld1 {v2.4s}, [x0], #16
- subs w3, w3, #3
- aesd v0.16b, v1.16b
- aesimc v0.16b, v0.16b
- ld1 {v3.4s}, [x0], #16
- bpl 1b
- aesd v0.16b, v2.16b
- eor v0.16b, v0.16b, v3.16b
- st1 {v0.16b}, [x1]
- ret
-SYM_FUNC_END(__aes_ce_decrypt)
-
-/*
- * __aes_ce_sub() - use the aese instruction to perform the AES sbox
- * substitution on each byte in 'input'
- */
-SYM_FUNC_START(__aes_ce_sub)
- dup v1.4s, w0
- movi v0.16b, #0
- aese v0.16b, v1.16b
- umov w0, v0.s[0]
- ret
-SYM_FUNC_END(__aes_ce_sub)
-
-SYM_FUNC_START(__aes_ce_invert)
- ld1 {v0.4s}, [x1]
- aesimc v1.16b, v0.16b
- st1 {v1.4s}, [x0]
- ret
-SYM_FUNC_END(__aes_ce_invert)
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * aes-ce-cipher.c - core AES cipher using ARMv8 Crypto Extensions
- *
- * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
- */
-
-#include <asm/neon.h>
-#include <asm/simd.h>
-#include <linux/unaligned.h>
-#include <crypto/aes.h>
-#include <crypto/algapi.h>
-#include <crypto/internal/simd.h>
-#include <linux/cpufeature.h>
-#include <linux/module.h>
-
-#include "aes-ce-setkey.h"
-
-MODULE_DESCRIPTION("Synchronous AES cipher using ARMv8 Crypto Extensions");
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_LICENSE("GPL v2");
-
-struct aes_block {
- u8 b[AES_BLOCK_SIZE];
-};
-
-asmlinkage void __aes_ce_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
-asmlinkage void __aes_ce_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
-
-asmlinkage u32 __aes_ce_sub(u32 l);
-asmlinkage void __aes_ce_invert(struct aes_block *out,
- const struct aes_block *in);
-
-static int num_rounds(struct crypto_aes_ctx *ctx)
-{
- /*
- * # of rounds specified by AES:
- * 128 bit key 10 rounds
- * 192 bit key 12 rounds
- * 256 bit key 14 rounds
- * => n byte key => 6 + (n/4) rounds
- */
- return 6 + ctx->key_length / 4;
-}
-
-static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
-{
- struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-
- if (!crypto_simd_usable()) {
- aes_encrypt(ctx, dst, src);
- return;
- }
-
- scoped_ksimd()
- __aes_ce_encrypt(ctx->key_enc, dst, src, num_rounds(ctx));
-}
-
-static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
-{
- struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-
- if (!crypto_simd_usable()) {
- aes_decrypt(ctx, dst, src);
- return;
- }
-
- scoped_ksimd()
- __aes_ce_decrypt(ctx->key_dec, dst, src, num_rounds(ctx));
-}
-
-int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
- unsigned int key_len)
-{
- /*
- * The AES key schedule round constants
- */
- static u8 const rcon[] = {
- 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36,
- };
-
- u32 kwords = key_len / sizeof(u32);
- struct aes_block *key_enc, *key_dec;
- int i, j;
-
- if (key_len != AES_KEYSIZE_128 &&
- key_len != AES_KEYSIZE_192 &&
- key_len != AES_KEYSIZE_256)
- return -EINVAL;
-
- ctx->key_length = key_len;
- for (i = 0; i < kwords; i++)
- ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32));
-
- scoped_ksimd() {
- for (i = 0; i < sizeof(rcon); i++) {
- u32 *rki = ctx->key_enc + (i * kwords);
- u32 *rko = rki + kwords;
-
- rko[0] = ror32(__aes_ce_sub(rki[kwords - 1]), 8) ^
- rcon[i] ^ rki[0];
- rko[1] = rko[0] ^ rki[1];
- rko[2] = rko[1] ^ rki[2];
- rko[3] = rko[2] ^ rki[3];
-
- if (key_len == AES_KEYSIZE_192) {
- if (i >= 7)
- break;
- rko[4] = rko[3] ^ rki[4];
- rko[5] = rko[4] ^ rki[5];
- } else if (key_len == AES_KEYSIZE_256) {
- if (i >= 6)
- break;
- rko[4] = __aes_ce_sub(rko[3]) ^ rki[4];
- rko[5] = rko[4] ^ rki[5];
- rko[6] = rko[5] ^ rki[6];
- rko[7] = rko[6] ^ rki[7];
- }
- }
-
- /*
- * Generate the decryption keys for the Equivalent Inverse
- * Cipher. This involves reversing the order of the round
- * keys, and applying the Inverse Mix Columns transformation on
- * all but the first and the last one.
- */
- key_enc = (struct aes_block *)ctx->key_enc;
- key_dec = (struct aes_block *)ctx->key_dec;
- j = num_rounds(ctx);
-
- key_dec[0] = key_enc[j];
- for (i = 1, j--; j > 0; i++, j--)
- __aes_ce_invert(key_dec + i, key_enc + j);
- key_dec[i] = key_enc[0];
- }
-
- return 0;
-}
-EXPORT_SYMBOL(ce_aes_expandkey);
-
-int ce_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
- unsigned int key_len)
-{
- struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-
- return ce_aes_expandkey(ctx, in_key, key_len);
-}
-EXPORT_SYMBOL(ce_aes_setkey);
-
-static struct crypto_alg aes_alg = {
- .cra_name = "aes",
- .cra_driver_name = "aes-ce",
- .cra_priority = 250,
- .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypto_aes_ctx),
- .cra_module = THIS_MODULE,
- .cra_cipher = {
- .cia_min_keysize = AES_MIN_KEY_SIZE,
- .cia_max_keysize = AES_MAX_KEY_SIZE,
- .cia_setkey = ce_aes_setkey,
- .cia_encrypt = aes_cipher_encrypt,
- .cia_decrypt = aes_cipher_decrypt
- }
-};
-
-static int __init aes_mod_init(void)
-{
- return crypto_register_alg(&aes_alg);
-}
-
-static void __exit aes_mod_exit(void)
-{
- crypto_unregister_alg(&aes_alg);
-}
-
-module_cpu_feature_match(AES, aes_mod_init);
-module_exit(aes_mod_exit);
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-
-int ce_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
- unsigned int key_len);
-int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
- unsigned int key_len);
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Scalar AES core transform
- *
- * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/cache.h>
-
- .text
-
- rk .req x0
- out .req x1
- in .req x2
- rounds .req x3
- tt .req x2
-
- .macro __pair1, sz, op, reg0, reg1, in0, in1e, in1d, shift
- .ifc \op\shift, b0
- ubfiz \reg0, \in0, #2, #8
- ubfiz \reg1, \in1e, #2, #8
- .else
- ubfx \reg0, \in0, #\shift, #8
- ubfx \reg1, \in1e, #\shift, #8
- .endif
-
- /*
- * AArch64 cannot do byte size indexed loads from a table containing
- * 32-bit quantities, i.e., 'ldrb w12, [tt, w12, uxtw #2]' is not a
- * valid instruction. So perform the shift explicitly first for the
- * high bytes (the low byte is shifted implicitly by using ubfiz rather
- * than ubfx above)
- */
- .ifnc \op, b
- ldr \reg0, [tt, \reg0, uxtw #2]
- ldr \reg1, [tt, \reg1, uxtw #2]
- .else
- .if \shift > 0
- lsl \reg0, \reg0, #2
- lsl \reg1, \reg1, #2
- .endif
- ldrb \reg0, [tt, \reg0, uxtw]
- ldrb \reg1, [tt, \reg1, uxtw]
- .endif
- .endm
-
- .macro __pair0, sz, op, reg0, reg1, in0, in1e, in1d, shift
- ubfx \reg0, \in0, #\shift, #8
- ubfx \reg1, \in1d, #\shift, #8
- ldr\op \reg0, [tt, \reg0, uxtw #\sz]
- ldr\op \reg1, [tt, \reg1, uxtw #\sz]
- .endm
-
- .macro __hround, out0, out1, in0, in1, in2, in3, t0, t1, enc, sz, op
- ldp \out0, \out1, [rk], #8
-
- __pair\enc \sz, \op, w12, w13, \in0, \in1, \in3, 0
- __pair\enc \sz, \op, w14, w15, \in1, \in2, \in0, 8
- __pair\enc \sz, \op, w16, w17, \in2, \in3, \in1, 16
- __pair\enc \sz, \op, \t0, \t1, \in3, \in0, \in2, 24
-
- eor \out0, \out0, w12
- eor \out1, \out1, w13
- eor \out0, \out0, w14, ror #24
- eor \out1, \out1, w15, ror #24
- eor \out0, \out0, w16, ror #16
- eor \out1, \out1, w17, ror #16
- eor \out0, \out0, \t0, ror #8
- eor \out1, \out1, \t1, ror #8
- .endm
-
- .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
- __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
- __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op
- .endm
-
- .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
- __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
- __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op
- .endm
-
- .macro do_crypt, round, ttab, ltab, bsz
- ldp w4, w5, [in]
- ldp w6, w7, [in, #8]
- ldp w8, w9, [rk], #16
- ldp w10, w11, [rk, #-8]
-
-CPU_BE( rev w4, w4 )
-CPU_BE( rev w5, w5 )
-CPU_BE( rev w6, w6 )
-CPU_BE( rev w7, w7 )
-
- eor w4, w4, w8
- eor w5, w5, w9
- eor w6, w6, w10
- eor w7, w7, w11
-
- adr_l tt, \ttab
-
- tbnz rounds, #1, 1f
-
-0: \round w8, w9, w10, w11, w4, w5, w6, w7
- \round w4, w5, w6, w7, w8, w9, w10, w11
-
-1: subs rounds, rounds, #4
- \round w8, w9, w10, w11, w4, w5, w6, w7
- b.ls 3f
-2: \round w4, w5, w6, w7, w8, w9, w10, w11
- b 0b
-3: adr_l tt, \ltab
- \round w4, w5, w6, w7, w8, w9, w10, w11, \bsz, b
-
-CPU_BE( rev w4, w4 )
-CPU_BE( rev w5, w5 )
-CPU_BE( rev w6, w6 )
-CPU_BE( rev w7, w7 )
-
- stp w4, w5, [out]
- stp w6, w7, [out, #8]
- ret
- .endm
-
-SYM_FUNC_START(__aes_arm64_encrypt)
- do_crypt fround, aes_enc_tab, aes_enc_tab + 1, 2
-SYM_FUNC_END(__aes_arm64_encrypt)
-
- .align 5
-SYM_FUNC_START(__aes_arm64_decrypt)
- do_crypt iround, aes_dec_tab, crypto_aes_inv_sbox, 0
-SYM_FUNC_END(__aes_arm64_decrypt)
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Scalar AES core transform
- *
- * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
- */
-
-#include <crypto/aes.h>
-#include <crypto/algapi.h>
-#include <linux/module.h>
-
-asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
-asmlinkage void __aes_arm64_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
-
-static int aes_arm64_setkey(struct crypto_tfm *tfm, const u8 *in_key,
- unsigned int key_len)
-{
- struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-
- return aes_expandkey(ctx, in_key, key_len);
-}
-
-static void aes_arm64_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
-{
- struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
- int rounds = 6 + ctx->key_length / 4;
-
- __aes_arm64_encrypt(ctx->key_enc, out, in, rounds);
-}
-
-static void aes_arm64_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
-{
- struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
- int rounds = 6 + ctx->key_length / 4;
-
- __aes_arm64_decrypt(ctx->key_dec, out, in, rounds);
-}
-
-static struct crypto_alg aes_alg = {
- .cra_name = "aes",
- .cra_driver_name = "aes-arm64",
- .cra_priority = 200,
- .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypto_aes_ctx),
- .cra_module = THIS_MODULE,
-
- .cra_cipher.cia_min_keysize = AES_MIN_KEY_SIZE,
- .cra_cipher.cia_max_keysize = AES_MAX_KEY_SIZE,
- .cra_cipher.cia_setkey = aes_arm64_setkey,
- .cra_cipher.cia_encrypt = aes_arm64_encrypt,
- .cra_cipher.cia_decrypt = aes_arm64_decrypt
-};
-
-static int __init aes_init(void)
-{
- return crypto_register_alg(&aes_alg);
-}
-
-static void __exit aes_fini(void)
-{
- crypto_unregister_alg(&aes_alg);
-}
-
-module_init(aes_init);
-module_exit(aes_fini);
-
-MODULE_DESCRIPTION("Scalar AES cipher for arm64");
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS_CRYPTO("aes");
#include <asm/hwcap.h>
#include <asm/simd.h>
-#include "aes-ce-setkey.h"
-
#ifdef USE_V8_CRYPTO_EXTENSIONS
#define MODE "ce"
#define PRIO 300
int aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
unsigned int key_len);
+/*
+ * The following functions are temporarily exported for use by the AES mode
+ * implementations in arch/$(SRCARCH)/crypto/. These exports will go away when
+ * that code is migrated into lib/crypto/.
+ */
+#ifdef CONFIG_ARM64
+int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
+ unsigned int key_len);
+#endif
+
/**
* aes_preparekey() - Prepare an AES key for encryption and decryption
* @key: (output) The key structure to initialize
bool
depends on CRYPTO_LIB_AES && !UML && !KMSAN
default y if ARM
+ default y if ARM64
config CRYPTO_LIB_AESCFB
tristate
libaes-$(CONFIG_ARM) += arm/aes-cipher-core.o
+ifeq ($(CONFIG_ARM64),y)
+libaes-y += arm64/aes-cipher-core.o
+libaes-$(CONFIG_KERNEL_MODE_NEON) += arm64/aes-ce-core.o
+endif
+
endif # CONFIG_CRYPTO_LIB_AES_ARCH
################################################################################
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ .arch armv8-a+crypto
+
+SYM_FUNC_START(__aes_ce_encrypt)
+ sub w3, w3, #2
+ ld1 {v0.16b}, [x2]
+ ld1 {v1.4s}, [x0], #16
+ cmp w3, #10
+ bmi 0f
+ bne 3f
+ mov v3.16b, v1.16b
+ b 2f
+0: mov v2.16b, v1.16b
+ ld1 {v3.4s}, [x0], #16
+1: aese v0.16b, v2.16b
+ aesmc v0.16b, v0.16b
+2: ld1 {v1.4s}, [x0], #16
+ aese v0.16b, v3.16b
+ aesmc v0.16b, v0.16b
+3: ld1 {v2.4s}, [x0], #16
+ subs w3, w3, #3
+ aese v0.16b, v1.16b
+ aesmc v0.16b, v0.16b
+ ld1 {v3.4s}, [x0], #16
+ bpl 1b
+ aese v0.16b, v2.16b
+ eor v0.16b, v0.16b, v3.16b
+ st1 {v0.16b}, [x1]
+ ret
+SYM_FUNC_END(__aes_ce_encrypt)
+
+SYM_FUNC_START(__aes_ce_decrypt)
+ sub w3, w3, #2
+ ld1 {v0.16b}, [x2]
+ ld1 {v1.4s}, [x0], #16
+ cmp w3, #10
+ bmi 0f
+ bne 3f
+ mov v3.16b, v1.16b
+ b 2f
+0: mov v2.16b, v1.16b
+ ld1 {v3.4s}, [x0], #16
+1: aesd v0.16b, v2.16b
+ aesimc v0.16b, v0.16b
+2: ld1 {v1.4s}, [x0], #16
+ aesd v0.16b, v3.16b
+ aesimc v0.16b, v0.16b
+3: ld1 {v2.4s}, [x0], #16
+ subs w3, w3, #3
+ aesd v0.16b, v1.16b
+ aesimc v0.16b, v0.16b
+ ld1 {v3.4s}, [x0], #16
+ bpl 1b
+ aesd v0.16b, v2.16b
+ eor v0.16b, v0.16b, v3.16b
+ st1 {v0.16b}, [x1]
+ ret
+SYM_FUNC_END(__aes_ce_decrypt)
+
+/*
+ * __aes_ce_sub() - use the aese instruction to perform the AES sbox
+ * substitution on each byte in 'input'
+ */
+SYM_FUNC_START(__aes_ce_sub)
+ dup v1.4s, w0
+ movi v0.16b, #0
+ aese v0.16b, v1.16b
+ umov w0, v0.s[0]
+ ret
+SYM_FUNC_END(__aes_ce_sub)
+
+SYM_FUNC_START(__aes_ce_invert)
+ ld1 {v0.4s}, [x1]
+ aesimc v1.16b, v0.16b
+ st1 {v1.4s}, [x0]
+ ret
+SYM_FUNC_END(__aes_ce_invert)
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Scalar AES core transform
+ *
+ * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/cache.h>
+
+ .text
+
+ rk .req x0
+ out .req x1
+ in .req x2
+ rounds .req x3
+ tt .req x2
+
+ .macro __pair1, sz, op, reg0, reg1, in0, in1e, in1d, shift
+ .ifc \op\shift, b0
+ ubfiz \reg0, \in0, #2, #8
+ ubfiz \reg1, \in1e, #2, #8
+ .else
+ ubfx \reg0, \in0, #\shift, #8
+ ubfx \reg1, \in1e, #\shift, #8
+ .endif
+
+ /*
+ * AArch64 cannot do byte size indexed loads from a table containing
+ * 32-bit quantities, i.e., 'ldrb w12, [tt, w12, uxtw #2]' is not a
+ * valid instruction. So perform the shift explicitly first for the
+ * high bytes (the low byte is shifted implicitly by using ubfiz rather
+ * than ubfx above)
+ */
+ .ifnc \op, b
+ ldr \reg0, [tt, \reg0, uxtw #2]
+ ldr \reg1, [tt, \reg1, uxtw #2]
+ .else
+ .if \shift > 0
+ lsl \reg0, \reg0, #2
+ lsl \reg1, \reg1, #2
+ .endif
+ ldrb \reg0, [tt, \reg0, uxtw]
+ ldrb \reg1, [tt, \reg1, uxtw]
+ .endif
+ .endm
+
+ .macro __pair0, sz, op, reg0, reg1, in0, in1e, in1d, shift
+ ubfx \reg0, \in0, #\shift, #8
+ ubfx \reg1, \in1d, #\shift, #8
+ ldr\op \reg0, [tt, \reg0, uxtw #\sz]
+ ldr\op \reg1, [tt, \reg1, uxtw #\sz]
+ .endm
+
+ .macro __hround, out0, out1, in0, in1, in2, in3, t0, t1, enc, sz, op
+ ldp \out0, \out1, [rk], #8
+
+ __pair\enc \sz, \op, w12, w13, \in0, \in1, \in3, 0
+ __pair\enc \sz, \op, w14, w15, \in1, \in2, \in0, 8
+ __pair\enc \sz, \op, w16, w17, \in2, \in3, \in1, 16
+ __pair\enc \sz, \op, \t0, \t1, \in3, \in0, \in2, 24
+
+ eor \out0, \out0, w12
+ eor \out1, \out1, w13
+ eor \out0, \out0, w14, ror #24
+ eor \out1, \out1, w15, ror #24
+ eor \out0, \out0, w16, ror #16
+ eor \out1, \out1, w17, ror #16
+ eor \out0, \out0, \t0, ror #8
+ eor \out1, \out1, \t1, ror #8
+ .endm
+
+ .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+ __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
+ __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op
+ .endm
+
+ .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+ __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
+ __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op
+ .endm
+
+ .macro do_crypt, round, ttab, ltab, bsz
+ ldp w4, w5, [in]
+ ldp w6, w7, [in, #8]
+ ldp w8, w9, [rk], #16
+ ldp w10, w11, [rk, #-8]
+
+CPU_BE( rev w4, w4 )
+CPU_BE( rev w5, w5 )
+CPU_BE( rev w6, w6 )
+CPU_BE( rev w7, w7 )
+
+ eor w4, w4, w8
+ eor w5, w5, w9
+ eor w6, w6, w10
+ eor w7, w7, w11
+
+ adr_l tt, \ttab
+
+ tbnz rounds, #1, 1f
+
+0: \round w8, w9, w10, w11, w4, w5, w6, w7
+ \round w4, w5, w6, w7, w8, w9, w10, w11
+
+1: subs rounds, rounds, #4
+ \round w8, w9, w10, w11, w4, w5, w6, w7
+ b.ls 3f
+2: \round w4, w5, w6, w7, w8, w9, w10, w11
+ b 0b
+3: adr_l tt, \ltab
+ \round w4, w5, w6, w7, w8, w9, w10, w11, \bsz, b
+
+CPU_BE( rev w4, w4 )
+CPU_BE( rev w5, w5 )
+CPU_BE( rev w6, w6 )
+CPU_BE( rev w7, w7 )
+
+ stp w4, w5, [out]
+ stp w6, w7, [out, #8]
+ ret
+ .endm
+
+SYM_FUNC_START(__aes_arm64_encrypt)
+ do_crypt fround, aes_enc_tab, aes_enc_tab + 1, 2
+SYM_FUNC_END(__aes_arm64_encrypt)
+
+ .align 5
+SYM_FUNC_START(__aes_arm64_decrypt)
+ do_crypt iround, aes_dec_tab, crypto_aes_inv_sbox, 0
+SYM_FUNC_END(__aes_arm64_decrypt)
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * AES block cipher, optimized for ARM64
+ *
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright 2026 Google LLC
+ */
+
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <linux/unaligned.h>
+#include <linux/cpufeature.h>
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_aes);
+
+struct aes_block {
+ u8 b[AES_BLOCK_SIZE];
+};
+
+asmlinkage void __aes_arm64_encrypt(const u32 rk[], u8 out[AES_BLOCK_SIZE],
+ const u8 in[AES_BLOCK_SIZE], int rounds);
+asmlinkage void __aes_arm64_decrypt(const u32 inv_rk[], u8 out[AES_BLOCK_SIZE],
+ const u8 in[AES_BLOCK_SIZE], int rounds);
+asmlinkage void __aes_ce_encrypt(const u32 rk[], u8 out[AES_BLOCK_SIZE],
+ const u8 in[AES_BLOCK_SIZE], int rounds);
+asmlinkage void __aes_ce_decrypt(const u32 inv_rk[], u8 out[AES_BLOCK_SIZE],
+ const u8 in[AES_BLOCK_SIZE], int rounds);
+asmlinkage u32 __aes_ce_sub(u32 l);
+asmlinkage void __aes_ce_invert(struct aes_block *out,
+ const struct aes_block *in);
+
+/*
+ * Expand an AES key using the crypto extensions if supported and usable or
+ * generic code otherwise. The expanded key format is compatible between the
+ * two cases. The outputs are @rndkeys (required) and @inv_rndkeys (optional).
+ */
+static void aes_expandkey_arm64(u32 rndkeys[], u32 *inv_rndkeys,
+ const u8 *in_key, int key_len, int nrounds)
+{
+ /*
+ * The AES key schedule round constants
+ */
+ static u8 const rcon[] = {
+ 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36,
+ };
+
+ u32 kwords = key_len / sizeof(u32);
+ struct aes_block *key_enc, *key_dec;
+ int i, j;
+
+ if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) ||
+ !static_branch_likely(&have_aes) || unlikely(!may_use_simd())) {
+ aes_expandkey_generic(rndkeys, inv_rndkeys, in_key, key_len);
+ return;
+ }
+
+ for (i = 0; i < kwords; i++)
+ rndkeys[i] = get_unaligned_le32(&in_key[i * sizeof(u32)]);
+
+ scoped_ksimd() {
+ for (i = 0; i < sizeof(rcon); i++) {
+ u32 *rki = &rndkeys[i * kwords];
+ u32 *rko = rki + kwords;
+
+ rko[0] = ror32(__aes_ce_sub(rki[kwords - 1]), 8) ^
+ rcon[i] ^ rki[0];
+ rko[1] = rko[0] ^ rki[1];
+ rko[2] = rko[1] ^ rki[2];
+ rko[3] = rko[2] ^ rki[3];
+
+ if (key_len == AES_KEYSIZE_192) {
+ if (i >= 7)
+ break;
+ rko[4] = rko[3] ^ rki[4];
+ rko[5] = rko[4] ^ rki[5];
+ } else if (key_len == AES_KEYSIZE_256) {
+ if (i >= 6)
+ break;
+ rko[4] = __aes_ce_sub(rko[3]) ^ rki[4];
+ rko[5] = rko[4] ^ rki[5];
+ rko[6] = rko[5] ^ rki[6];
+ rko[7] = rko[6] ^ rki[7];
+ }
+ }
+
+ /*
+ * Generate the decryption keys for the Equivalent Inverse
+ * Cipher. This involves reversing the order of the round
+ * keys, and applying the Inverse Mix Columns transformation on
+ * all but the first and the last one.
+ */
+ if (inv_rndkeys) {
+ key_enc = (struct aes_block *)rndkeys;
+ key_dec = (struct aes_block *)inv_rndkeys;
+ j = nrounds;
+
+ key_dec[0] = key_enc[j];
+ for (i = 1, j--; j > 0; i++, j--)
+ __aes_ce_invert(key_dec + i, key_enc + j);
+ key_dec[i] = key_enc[0];
+ }
+ }
+}
+
+static void aes_preparekey_arch(union aes_enckey_arch *k,
+ union aes_invkey_arch *inv_k,
+ const u8 *in_key, int key_len, int nrounds)
+{
+ aes_expandkey_arm64(k->rndkeys, inv_k ? inv_k->inv_rndkeys : NULL,
+ in_key, key_len, nrounds);
+}
+
+/*
+ * This is here temporarily until the remaining AES mode implementations are
+ * migrated from arch/arm64/crypto/ to lib/crypto/arm64/.
+ */
+int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
+ unsigned int key_len)
+{
+ if (aes_check_keylen(key_len) != 0)
+ return -EINVAL;
+ ctx->key_length = key_len;
+ aes_expandkey_arm64(ctx->key_enc, ctx->key_dec, in_key, key_len,
+ 6 + key_len / 4);
+ return 0;
+}
+EXPORT_SYMBOL(ce_aes_expandkey);
+
+static void aes_encrypt_arch(const struct aes_enckey *key,
+ u8 out[AES_BLOCK_SIZE],
+ const u8 in[AES_BLOCK_SIZE])
+{
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
+ static_branch_likely(&have_aes) && likely(may_use_simd())) {
+ scoped_ksimd()
+ __aes_ce_encrypt(key->k.rndkeys, out, in, key->nrounds);
+ } else {
+ __aes_arm64_encrypt(key->k.rndkeys, out, in, key->nrounds);
+ }
+}
+
+static void aes_decrypt_arch(const struct aes_key *key,
+ u8 out[AES_BLOCK_SIZE],
+ const u8 in[AES_BLOCK_SIZE])
+{
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
+ static_branch_likely(&have_aes) && likely(may_use_simd())) {
+ scoped_ksimd()
+ __aes_ce_decrypt(key->inv_k.inv_rndkeys, out, in,
+ key->nrounds);
+ } else {
+ __aes_arm64_decrypt(key->inv_k.inv_rndkeys, out, in,
+ key->nrounds);
+ }
+}
+
+#ifdef CONFIG_KERNEL_MODE_NEON
+#define aes_mod_init_arch aes_mod_init_arch
+static void aes_mod_init_arch(void)
+{
+ if (cpu_have_named_feature(AES))
+ static_branch_enable(&have_aes);
+}
+#endif /* CONFIG_KERNEL_MODE_NEON */