lib/crypto: arm/aes: Migrate optimized code into library
authorEric Biggers <ebiggers@kernel.org>
Mon, 12 Jan 2026 19:20:08 +0000 (11:20 -0800)
committerEric Biggers <ebiggers@kernel.org>
Mon, 12 Jan 2026 19:39:58 +0000 (11:39 -0800)
Move the ARM optimized single-block AES en/decryption code into
lib/crypto/, wire it up to the AES library API, and remove the
superseded "aes-arm" crypto_cipher algorithm.

The result is that both the AES library and crypto_cipher APIs are now
optimized for ARM, whereas previously only crypto_cipher was (and the
optimizations weren't enabled by default, which this fixes as well).

Acked-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20260112192035.10427-11-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
13 files changed:
arch/arm/configs/milbeaut_m10v_defconfig
arch/arm/configs/multi_v7_defconfig
arch/arm/configs/omap2plus_defconfig
arch/arm/configs/pxa_defconfig
arch/arm/crypto/Kconfig
arch/arm/crypto/Makefile
arch/arm/crypto/aes-cipher-core.S [deleted file]
arch/arm/crypto/aes-cipher-glue.c [deleted file]
arch/arm/crypto/aes-cipher.h [deleted file]
lib/crypto/Kconfig
lib/crypto/Makefile
lib/crypto/arm/aes-cipher-core.S [new file with mode: 0644]
lib/crypto/arm/aes.h [new file with mode: 0644]

index a2995eb..77b69d6 100644 (file)
@@ -98,7 +98,6 @@ CONFIG_CRYPTO_SELFTESTS=y
 CONFIG_CRYPTO_AES=y
 CONFIG_CRYPTO_SEQIV=m
 CONFIG_CRYPTO_GHASH_ARM_CE=m
-CONFIG_CRYPTO_AES_ARM=m
 CONFIG_CRYPTO_AES_ARM_BS=m
 CONFIG_CRYPTO_AES_ARM_CE=m
 # CONFIG_CRYPTO_HW is not set
index 7f1fa9d..b6d3e20 100644 (file)
@@ -1286,7 +1286,7 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
 CONFIG_CRYPTO_GHASH_ARM_CE=m
-CONFIG_CRYPTO_AES_ARM=m
+CONFIG_CRYPTO_AES=m
 CONFIG_CRYPTO_AES_ARM_BS=m
 CONFIG_CRYPTO_AES_ARM_CE=m
 CONFIG_CRYPTO_DEV_SUN4I_SS=m
index 4e53c33..0464f65 100644 (file)
@@ -706,7 +706,7 @@ CONFIG_NLS_ISO8859_1=y
 CONFIG_SECURITY=y
 CONFIG_CRYPTO_MICHAEL_MIC=y
 CONFIG_CRYPTO_GHASH_ARM_CE=m
-CONFIG_CRYPTO_AES_ARM=m
+CONFIG_CRYPTO_AES=m
 CONFIG_CRYPTO_AES_ARM_BS=m
 CONFIG_CRYPTO_DEV_OMAP=m
 CONFIG_CRYPTO_DEV_OMAP_SHAM=m
index 3ea189f..eacd08f 100644 (file)
@@ -657,7 +657,7 @@ CONFIG_CRYPTO_ANUBIS=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_DEFLATE=y
 CONFIG_CRYPTO_LZO=y
-CONFIG_CRYPTO_AES_ARM=m
+CONFIG_CRYPTO_AES=m
 CONFIG_FONTS=y
 CONFIG_FONT_8x8=y
 CONFIG_FONT_8x16=y
index 167a648..b9c28c8 100644 (file)
@@ -23,24 +23,6 @@ config CRYPTO_GHASH_ARM_CE
          that is part of the ARMv8 Crypto Extensions, or a slower variant that
          uses the vmull.p8 instruction that is part of the basic NEON ISA.
 
-config CRYPTO_AES_ARM
-       tristate "Ciphers: AES"
-       select CRYPTO_ALGAPI
-       select CRYPTO_AES
-       help
-         Block ciphers: AES cipher algorithms (FIPS-197)
-
-         Architecture: arm
-
-         On ARM processors without the Crypto Extensions, this is the
-         fastest AES implementation for single blocks.  For multiple
-         blocks, the NEON bit-sliced implementation is usually faster.
-
-         This implementation may be vulnerable to cache timing attacks,
-         since it uses lookup tables.  However, as countermeasures it
-         disables IRQs and preloads the tables; it is hoped this makes
-         such attacks very difficult.
-
 config CRYPTO_AES_ARM_BS
        tristate "Ciphers: AES, modes: ECB/CBC/CTR/XTS (bit-sliced NEON)"
        depends on KERNEL_MODE_NEON
index d6683e9..e73099e 100644 (file)
@@ -3,13 +3,11 @@
 # Arch-specific CryptoAPI modules.
 #
 
-obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o
 obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o
 
 obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
 obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o
 
-aes-arm-y      := aes-cipher-core.o aes-cipher-glue.o
 aes-arm-bs-y   := aes-neonbs-core.o aes-neonbs-glue.o
 aes-arm-ce-y   := aes-ce-core.o aes-ce-glue.o
 ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o
diff --git a/arch/arm/crypto/aes-cipher-core.S b/arch/arm/crypto/aes-cipher-core.S
deleted file mode 100644 (file)
index 87567d6..0000000
+++ /dev/null
@@ -1,201 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Scalar AES core transform
- *
- * Copyright (C) 2017 Linaro Ltd.
- * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/cache.h>
-
-       .text
-       .align          5
-
-       rk              .req    r0
-       rounds          .req    r1
-       in              .req    r2
-       out             .req    r3
-       ttab            .req    ip
-
-       t0              .req    lr
-       t1              .req    r2
-       t2              .req    r3
-
-       .macro          __select, out, in, idx
-       .if             __LINUX_ARM_ARCH__ < 7
-       and             \out, \in, #0xff << (8 * \idx)
-       .else
-       ubfx            \out, \in, #(8 * \idx), #8
-       .endif
-       .endm
-
-       .macro          __load, out, in, idx, sz, op
-       .if             __LINUX_ARM_ARCH__ < 7 && \idx > 0
-       ldr\op          \out, [ttab, \in, lsr #(8 * \idx) - \sz]
-       .else
-       ldr\op          \out, [ttab, \in, lsl #\sz]
-       .endif
-       .endm
-
-       .macro          __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr
-       __select        \out0, \in0, 0
-       __select        t0, \in1, 1
-       __load          \out0, \out0, 0, \sz, \op
-       __load          t0, t0, 1, \sz, \op
-
-       .if             \enc
-       __select        \out1, \in1, 0
-       __select        t1, \in2, 1
-       .else
-       __select        \out1, \in3, 0
-       __select        t1, \in0, 1
-       .endif
-       __load          \out1, \out1, 0, \sz, \op
-       __select        t2, \in2, 2
-       __load          t1, t1, 1, \sz, \op
-       __load          t2, t2, 2, \sz, \op
-
-       eor             \out0, \out0, t0, ror #24
-
-       __select        t0, \in3, 3
-       .if             \enc
-       __select        \t3, \in3, 2
-       __select        \t4, \in0, 3
-       .else
-       __select        \t3, \in1, 2
-       __select        \t4, \in2, 3
-       .endif
-       __load          \t3, \t3, 2, \sz, \op
-       __load          t0, t0, 3, \sz, \op
-       __load          \t4, \t4, 3, \sz, \op
-
-       .ifnb           \oldcpsr
-       /*
-        * This is the final round and we're done with all data-dependent table
-        * lookups, so we can safely re-enable interrupts.
-        */
-       restore_irqs    \oldcpsr
-       .endif
-
-       eor             \out1, \out1, t1, ror #24
-       eor             \out0, \out0, t2, ror #16
-       ldm             rk!, {t1, t2}
-       eor             \out1, \out1, \t3, ror #16
-       eor             \out0, \out0, t0, ror #8
-       eor             \out1, \out1, \t4, ror #8
-       eor             \out0, \out0, t1
-       eor             \out1, \out1, t2
-       .endm
-
-       .macro          fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
-       __hround        \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
-       __hround        \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr
-       .endm
-
-       .macro          iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
-       __hround        \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
-       __hround        \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr
-       .endm
-
-       .macro          do_crypt, round, ttab, ltab, bsz
-       push            {r3-r11, lr}
-
-       // Load keys first, to reduce latency in case they're not cached yet.
-       ldm             rk!, {r8-r11}
-
-       ldr             r4, [in]
-       ldr             r5, [in, #4]
-       ldr             r6, [in, #8]
-       ldr             r7, [in, #12]
-
-#ifdef CONFIG_CPU_BIG_ENDIAN
-       rev_l           r4, t0
-       rev_l           r5, t0
-       rev_l           r6, t0
-       rev_l           r7, t0
-#endif
-
-       eor             r4, r4, r8
-       eor             r5, r5, r9
-       eor             r6, r6, r10
-       eor             r7, r7, r11
-
-       mov_l           ttab, \ttab
-       /*
-        * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into
-        * L1 cache, assuming cacheline size >= 32.  This is a hardening measure
-        * intended to make cache-timing attacks more difficult.  They may not
-        * be fully prevented, however; see the paper
-        * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf
-        * ("Cache-timing attacks on AES") for a discussion of the many
-        * difficulties involved in writing truly constant-time AES software.
-        */
-        save_and_disable_irqs  t0
-       .set            i, 0
-       .rept           1024 / 128
-       ldr             r8, [ttab, #i + 0]
-       ldr             r9, [ttab, #i + 32]
-       ldr             r10, [ttab, #i + 64]
-       ldr             r11, [ttab, #i + 96]
-       .set            i, i + 128
-       .endr
-       push            {t0}            // oldcpsr
-
-       tst             rounds, #2
-       bne             1f
-
-0:     \round          r8, r9, r10, r11, r4, r5, r6, r7
-       \round          r4, r5, r6, r7, r8, r9, r10, r11
-
-1:     subs            rounds, rounds, #4
-       \round          r8, r9, r10, r11, r4, r5, r6, r7
-       bls             2f
-       \round          r4, r5, r6, r7, r8, r9, r10, r11
-       b               0b
-
-2:     .ifb            \ltab
-       add             ttab, ttab, #1
-       .else
-       mov_l           ttab, \ltab
-       // Prefetch inverse S-box for final round; see explanation above
-       .set            i, 0
-       .rept           256 / 64
-       ldr             t0, [ttab, #i + 0]
-       ldr             t1, [ttab, #i + 32]
-       .set            i, i + 64
-       .endr
-       .endif
-
-       pop             {rounds}        // oldcpsr
-       \round          r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds
-
-#ifdef CONFIG_CPU_BIG_ENDIAN
-       rev_l           r4, t0
-       rev_l           r5, t0
-       rev_l           r6, t0
-       rev_l           r7, t0
-#endif
-
-       ldr             out, [sp]
-
-       str             r4, [out]
-       str             r5, [out, #4]
-       str             r6, [out, #8]
-       str             r7, [out, #12]
-
-       pop             {r3-r11, pc}
-
-       .align          3
-       .ltorg
-       .endm
-
-ENTRY(__aes_arm_encrypt)
-       do_crypt        fround, aes_enc_tab,, 2
-ENDPROC(__aes_arm_encrypt)
-
-       .align          5
-ENTRY(__aes_arm_decrypt)
-       do_crypt        iround, aes_dec_tab, crypto_aes_inv_sbox, 0
-ENDPROC(__aes_arm_decrypt)
diff --git a/arch/arm/crypto/aes-cipher-glue.c b/arch/arm/crypto/aes-cipher-glue.c
deleted file mode 100644 (file)
index f302db8..0000000
+++ /dev/null
@@ -1,77 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Scalar AES core transform
- *
- * Copyright (C) 2017 Linaro Ltd.
- * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
- */
-
-#include <crypto/aes.h>
-#include <crypto/algapi.h>
-#include <linux/module.h>
-#include "aes-cipher.h"
-
-EXPORT_SYMBOL_GPL(__aes_arm_encrypt);
-EXPORT_SYMBOL_GPL(__aes_arm_decrypt);
-
-static int aes_arm_setkey(struct crypto_tfm *tfm, const u8 *in_key,
-                         unsigned int key_len)
-{
-       struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-
-       return aes_expandkey(ctx, in_key, key_len);
-}
-
-static void aes_arm_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
-{
-       struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-       int rounds = 6 + ctx->key_length / 4;
-
-       __aes_arm_encrypt(ctx->key_enc, rounds, in, out);
-}
-
-static void aes_arm_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
-{
-       struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-       int rounds = 6 + ctx->key_length / 4;
-
-       __aes_arm_decrypt(ctx->key_dec, rounds, in, out);
-}
-
-static struct crypto_alg aes_alg = {
-       .cra_name                       = "aes",
-       .cra_driver_name                = "aes-arm",
-       .cra_priority                   = 200,
-       .cra_flags                      = CRYPTO_ALG_TYPE_CIPHER,
-       .cra_blocksize                  = AES_BLOCK_SIZE,
-       .cra_ctxsize                    = sizeof(struct crypto_aes_ctx),
-       .cra_module                     = THIS_MODULE,
-
-       .cra_cipher.cia_min_keysize     = AES_MIN_KEY_SIZE,
-       .cra_cipher.cia_max_keysize     = AES_MAX_KEY_SIZE,
-       .cra_cipher.cia_setkey          = aes_arm_setkey,
-       .cra_cipher.cia_encrypt         = aes_arm_encrypt,
-       .cra_cipher.cia_decrypt         = aes_arm_decrypt,
-
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-       .cra_alignmask                  = 3,
-#endif
-};
-
-static int __init aes_init(void)
-{
-       return crypto_register_alg(&aes_alg);
-}
-
-static void __exit aes_fini(void)
-{
-       crypto_unregister_alg(&aes_alg);
-}
-
-module_init(aes_init);
-module_exit(aes_fini);
-
-MODULE_DESCRIPTION("Scalar AES cipher for ARM");
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS_CRYPTO("aes");
diff --git a/arch/arm/crypto/aes-cipher.h b/arch/arm/crypto/aes-cipher.h
deleted file mode 100644 (file)
index d5db2b8..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef ARM_CRYPTO_AES_CIPHER_H
-#define ARM_CRYPTO_AES_CIPHER_H
-
-#include <linux/linkage.h>
-#include <linux/types.h>
-
-asmlinkage void __aes_arm_encrypt(const u32 rk[], int rounds,
-                                 const u8 *in, u8 *out);
-asmlinkage void __aes_arm_decrypt(const u32 rk[], int rounds,
-                                 const u8 *in, u8 *out);
-
-#endif /* ARM_CRYPTO_AES_CIPHER_H */
index 4efad77..60420b4 100644 (file)
@@ -14,6 +14,7 @@ config CRYPTO_LIB_AES
 config CRYPTO_LIB_AES_ARCH
        bool
        depends on CRYPTO_LIB_AES && !UML && !KMSAN
+       default y if ARM
 
 config CRYPTO_LIB_AESCFB
        tristate
index 01193b3..2f6b0f5 100644 (file)
@@ -21,6 +21,9 @@ obj-$(CONFIG_CRYPTO_LIB_AES) += libaes.o
 libaes-y := aes.o
 ifeq ($(CONFIG_CRYPTO_LIB_AES_ARCH),y)
 CFLAGS_aes.o += -I$(src)/$(SRCARCH)
+
+libaes-$(CONFIG_ARM) += arm/aes-cipher-core.o
+
 endif # CONFIG_CRYPTO_LIB_AES_ARCH
 
 ################################################################################
diff --git a/lib/crypto/arm/aes-cipher-core.S b/lib/crypto/arm/aes-cipher-core.S
new file mode 100644 (file)
index 0000000..87567d6
--- /dev/null
@@ -0,0 +1,201 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Scalar AES core transform
+ *
+ * Copyright (C) 2017 Linaro Ltd.
+ * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/cache.h>
+
+       .text
+       .align          5
+
+       rk              .req    r0
+       rounds          .req    r1
+       in              .req    r2
+       out             .req    r3
+       ttab            .req    ip
+
+       t0              .req    lr
+       t1              .req    r2
+       t2              .req    r3
+
+       .macro          __select, out, in, idx
+       .if             __LINUX_ARM_ARCH__ < 7
+       and             \out, \in, #0xff << (8 * \idx)
+       .else
+       ubfx            \out, \in, #(8 * \idx), #8
+       .endif
+       .endm
+
+       .macro          __load, out, in, idx, sz, op
+       .if             __LINUX_ARM_ARCH__ < 7 && \idx > 0
+       ldr\op          \out, [ttab, \in, lsr #(8 * \idx) - \sz]
+       .else
+       ldr\op          \out, [ttab, \in, lsl #\sz]
+       .endif
+       .endm
+
+       .macro          __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr
+       __select        \out0, \in0, 0
+       __select        t0, \in1, 1
+       __load          \out0, \out0, 0, \sz, \op
+       __load          t0, t0, 1, \sz, \op
+
+       .if             \enc
+       __select        \out1, \in1, 0
+       __select        t1, \in2, 1
+       .else
+       __select        \out1, \in3, 0
+       __select        t1, \in0, 1
+       .endif
+       __load          \out1, \out1, 0, \sz, \op
+       __select        t2, \in2, 2
+       __load          t1, t1, 1, \sz, \op
+       __load          t2, t2, 2, \sz, \op
+
+       eor             \out0, \out0, t0, ror #24
+
+       __select        t0, \in3, 3
+       .if             \enc
+       __select        \t3, \in3, 2
+       __select        \t4, \in0, 3
+       .else
+       __select        \t3, \in1, 2
+       __select        \t4, \in2, 3
+       .endif
+       __load          \t3, \t3, 2, \sz, \op
+       __load          t0, t0, 3, \sz, \op
+       __load          \t4, \t4, 3, \sz, \op
+
+       .ifnb           \oldcpsr
+       /*
+        * This is the final round and we're done with all data-dependent table
+        * lookups, so we can safely re-enable interrupts.
+        */
+       restore_irqs    \oldcpsr
+       .endif
+
+       eor             \out1, \out1, t1, ror #24
+       eor             \out0, \out0, t2, ror #16
+       ldm             rk!, {t1, t2}
+       eor             \out1, \out1, \t3, ror #16
+       eor             \out0, \out0, t0, ror #8
+       eor             \out1, \out1, \t4, ror #8
+       eor             \out0, \out0, t1
+       eor             \out1, \out1, t2
+       .endm
+
+       .macro          fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
+       __hround        \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
+       __hround        \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr
+       .endm
+
+       .macro          iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
+       __hround        \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
+       __hround        \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr
+       .endm
+
+       .macro          do_crypt, round, ttab, ltab, bsz
+       push            {r3-r11, lr}
+
+       // Load keys first, to reduce latency in case they're not cached yet.
+       ldm             rk!, {r8-r11}
+
+       ldr             r4, [in]
+       ldr             r5, [in, #4]
+       ldr             r6, [in, #8]
+       ldr             r7, [in, #12]
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+       rev_l           r4, t0
+       rev_l           r5, t0
+       rev_l           r6, t0
+       rev_l           r7, t0
+#endif
+
+       eor             r4, r4, r8
+       eor             r5, r5, r9
+       eor             r6, r6, r10
+       eor             r7, r7, r11
+
+       mov_l           ttab, \ttab
+       /*
+        * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into
+        * L1 cache, assuming cacheline size >= 32.  This is a hardening measure
+        * intended to make cache-timing attacks more difficult.  They may not
+        * be fully prevented, however; see the paper
+        * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf
+        * ("Cache-timing attacks on AES") for a discussion of the many
+        * difficulties involved in writing truly constant-time AES software.
+        */
+        save_and_disable_irqs  t0
+       .set            i, 0
+       .rept           1024 / 128
+       ldr             r8, [ttab, #i + 0]
+       ldr             r9, [ttab, #i + 32]
+       ldr             r10, [ttab, #i + 64]
+       ldr             r11, [ttab, #i + 96]
+       .set            i, i + 128
+       .endr
+       push            {t0}            // oldcpsr
+
+       tst             rounds, #2
+       bne             1f
+
+0:     \round          r8, r9, r10, r11, r4, r5, r6, r7
+       \round          r4, r5, r6, r7, r8, r9, r10, r11
+
+1:     subs            rounds, rounds, #4
+       \round          r8, r9, r10, r11, r4, r5, r6, r7
+       bls             2f
+       \round          r4, r5, r6, r7, r8, r9, r10, r11
+       b               0b
+
+2:     .ifb            \ltab
+       add             ttab, ttab, #1
+       .else
+       mov_l           ttab, \ltab
+       // Prefetch inverse S-box for final round; see explanation above
+       .set            i, 0
+       .rept           256 / 64
+       ldr             t0, [ttab, #i + 0]
+       ldr             t1, [ttab, #i + 32]
+       .set            i, i + 64
+       .endr
+       .endif
+
+       pop             {rounds}        // oldcpsr
+       \round          r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+       rev_l           r4, t0
+       rev_l           r5, t0
+       rev_l           r6, t0
+       rev_l           r7, t0
+#endif
+
+       ldr             out, [sp]
+
+       str             r4, [out]
+       str             r5, [out, #4]
+       str             r6, [out, #8]
+       str             r7, [out, #12]
+
+       pop             {r3-r11, pc}
+
+       .align          3
+       .ltorg
+       .endm
+
+ENTRY(__aes_arm_encrypt)
+       do_crypt        fround, aes_enc_tab,, 2
+ENDPROC(__aes_arm_encrypt)
+
+       .align          5
+ENTRY(__aes_arm_decrypt)
+       do_crypt        iround, aes_dec_tab, crypto_aes_inv_sbox, 0
+ENDPROC(__aes_arm_decrypt)
diff --git a/lib/crypto/arm/aes.h b/lib/crypto/arm/aes.h
new file mode 100644 (file)
index 0000000..1dd7dfa
--- /dev/null
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * AES block cipher, optimized for ARM
+ *
+ * Copyright (C) 2017 Linaro Ltd.
+ * Copyright 2026 Google LLC
+ */
+
+asmlinkage void __aes_arm_encrypt(const u32 rk[], int rounds,
+                                 const u8 in[AES_BLOCK_SIZE],
+                                 u8 out[AES_BLOCK_SIZE]);
+asmlinkage void __aes_arm_decrypt(const u32 inv_rk[], int rounds,
+                                 const u8 in[AES_BLOCK_SIZE],
+                                 u8 out[AES_BLOCK_SIZE]);
+
+static void aes_preparekey_arch(union aes_enckey_arch *k,
+                               union aes_invkey_arch *inv_k,
+                               const u8 *in_key, int key_len, int nrounds)
+{
+       aes_expandkey_generic(k->rndkeys, inv_k ? inv_k->inv_rndkeys : NULL,
+                             in_key, key_len);
+}
+
+static void aes_encrypt_arch(const struct aes_enckey *key,
+                            u8 out[AES_BLOCK_SIZE],
+                            const u8 in[AES_BLOCK_SIZE])
+{
+       if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
+           !IS_ALIGNED((uintptr_t)out | (uintptr_t)in, 4)) {
+               u8 bounce_buf[AES_BLOCK_SIZE] __aligned(4);
+
+               memcpy(bounce_buf, in, AES_BLOCK_SIZE);
+               __aes_arm_encrypt(key->k.rndkeys, key->nrounds, bounce_buf,
+                                 bounce_buf);
+               memcpy(out, bounce_buf, AES_BLOCK_SIZE);
+               return;
+       }
+       __aes_arm_encrypt(key->k.rndkeys, key->nrounds, in, out);
+}
+
+static void aes_decrypt_arch(const struct aes_key *key,
+                            u8 out[AES_BLOCK_SIZE],
+                            const u8 in[AES_BLOCK_SIZE])
+{
+       if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
+           !IS_ALIGNED((uintptr_t)out | (uintptr_t)in, 4)) {
+               u8 bounce_buf[AES_BLOCK_SIZE] __aligned(4);
+
+               memcpy(bounce_buf, in, AES_BLOCK_SIZE);
+               __aes_arm_decrypt(key->inv_k.inv_rndkeys, key->nrounds,
+                                 bounce_buf, bounce_buf);
+               memcpy(out, bounce_buf, AES_BLOCK_SIZE);
+               return;
+       }
+       __aes_arm_decrypt(key->inv_k.inv_rndkeys, key->nrounds, in, out);
+}