1 // SPDX-License-Identifier: GPL-2.0-only
3 * Accelerated GHASH implementation with ARMv8 PMULL instructions.
5 * Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org>
10 #include <asm/unaligned.h>
11 #include <crypto/aes.h>
12 #include <crypto/algapi.h>
13 #include <crypto/b128ops.h>
14 #include <crypto/gf128mul.h>
15 #include <crypto/internal/aead.h>
16 #include <crypto/internal/hash.h>
17 #include <crypto/internal/simd.h>
18 #include <crypto/internal/skcipher.h>
19 #include <crypto/scatterwalk.h>
20 #include <linux/cpufeature.h>
21 #include <linux/crypto.h>
22 #include <linux/module.h>
24 MODULE_DESCRIPTION("GHASH and AES-GCM using ARMv8 Crypto Extensions");
25 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
26 MODULE_LICENSE("GPL v2");
27 MODULE_ALIAS_CRYPTO("ghash");
29 #define GHASH_BLOCK_SIZE 16
30 #define GHASH_DIGEST_SIZE 16
31 #define GCM_IV_SIZE 12
42 struct ghash_desc_ctx {
43 u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)];
44 u8 buf[GHASH_BLOCK_SIZE];
49 struct crypto_aes_ctx aes_key;
50 struct ghash_key ghash_key;
53 asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src,
54 struct ghash_key const *k,
57 asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
58 struct ghash_key const *k,
61 asmlinkage void pmull_gcm_encrypt(int bytes, u8 dst[], const u8 src[],
62 struct ghash_key const *k, u64 dg[],
63 u8 ctr[], u32 const rk[], int rounds,
66 asmlinkage void pmull_gcm_decrypt(int bytes, u8 dst[], const u8 src[],
67 struct ghash_key const *k, u64 dg[],
68 u8 ctr[], u32 const rk[], int rounds,
71 static int ghash_init(struct shash_desc *desc)
73 struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
75 *ctx = (struct ghash_desc_ctx){};
79 static void ghash_do_update(int blocks, u64 dg[], const char *src,
80 struct ghash_key *key, const char *head,
81 void (*simd_update)(int blocks, u64 dg[],
83 struct ghash_key const *k,
86 if (likely(crypto_simd_usable() && simd_update)) {
88 simd_update(blocks, dg, src, key, head);
91 be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) };
101 src += GHASH_BLOCK_SIZE;
104 crypto_xor((u8 *)&dst, in, GHASH_BLOCK_SIZE);
105 gf128mul_lle(&dst, &key->k);
108 dg[0] = be64_to_cpu(dst.b);
109 dg[1] = be64_to_cpu(dst.a);
113 /* avoid hogging the CPU for too long */
114 #define MAX_BLOCKS (SZ_64K / GHASH_BLOCK_SIZE)
116 static int __ghash_update(struct shash_desc *desc, const u8 *src,
118 void (*simd_update)(int blocks, u64 dg[],
120 struct ghash_key const *k,
123 struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
124 unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
128 if ((partial + len) >= GHASH_BLOCK_SIZE) {
129 struct ghash_key *key = crypto_shash_ctx(desc->tfm);
133 int p = GHASH_BLOCK_SIZE - partial;
135 memcpy(ctx->buf + partial, src, p);
140 blocks = len / GHASH_BLOCK_SIZE;
141 len %= GHASH_BLOCK_SIZE;
144 int chunk = min(blocks, MAX_BLOCKS);
146 ghash_do_update(chunk, ctx->digest, src, key,
147 partial ? ctx->buf : NULL,
151 src += chunk * GHASH_BLOCK_SIZE;
153 } while (unlikely(blocks > 0));
156 memcpy(ctx->buf + partial, src, len);
160 static int ghash_update_p8(struct shash_desc *desc, const u8 *src,
163 return __ghash_update(desc, src, len, pmull_ghash_update_p8);
166 static int ghash_update_p64(struct shash_desc *desc, const u8 *src,
169 return __ghash_update(desc, src, len, pmull_ghash_update_p64);
172 static int ghash_final_p8(struct shash_desc *desc, u8 *dst)
174 struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
175 unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
178 struct ghash_key *key = crypto_shash_ctx(desc->tfm);
180 memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
182 ghash_do_update(1, ctx->digest, ctx->buf, key, NULL,
183 pmull_ghash_update_p8);
185 put_unaligned_be64(ctx->digest[1], dst);
186 put_unaligned_be64(ctx->digest[0], dst + 8);
188 *ctx = (struct ghash_desc_ctx){};
192 static int ghash_final_p64(struct shash_desc *desc, u8 *dst)
194 struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
195 unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
198 struct ghash_key *key = crypto_shash_ctx(desc->tfm);
200 memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
202 ghash_do_update(1, ctx->digest, ctx->buf, key, NULL,
203 pmull_ghash_update_p64);
205 put_unaligned_be64(ctx->digest[1], dst);
206 put_unaligned_be64(ctx->digest[0], dst + 8);
208 *ctx = (struct ghash_desc_ctx){};
212 static void ghash_reflect(u64 h[], const be128 *k)
214 u64 carry = be64_to_cpu(k->a) & BIT(63) ? 1 : 0;
216 h[0] = (be64_to_cpu(k->b) << 1) | carry;
217 h[1] = (be64_to_cpu(k->a) << 1) | (be64_to_cpu(k->b) >> 63);
220 h[1] ^= 0xc200000000000000UL;
223 static int __ghash_setkey(struct ghash_key *key,
224 const u8 *inkey, unsigned int keylen)
228 /* needed for the fallback */
229 memcpy(&key->k, inkey, GHASH_BLOCK_SIZE);
231 ghash_reflect(key->h, &key->k);
234 gf128mul_lle(&h, &key->k);
235 ghash_reflect(key->h2, &h);
237 gf128mul_lle(&h, &key->k);
238 ghash_reflect(key->h3, &h);
240 gf128mul_lle(&h, &key->k);
241 ghash_reflect(key->h4, &h);
246 static int ghash_setkey(struct crypto_shash *tfm,
247 const u8 *inkey, unsigned int keylen)
249 struct ghash_key *key = crypto_shash_ctx(tfm);
251 if (keylen != GHASH_BLOCK_SIZE) {
252 crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
256 return __ghash_setkey(key, inkey, keylen);
259 static struct shash_alg ghash_alg[] = {{
260 .base.cra_name = "ghash",
261 .base.cra_driver_name = "ghash-neon",
262 .base.cra_priority = 100,
263 .base.cra_blocksize = GHASH_BLOCK_SIZE,
264 .base.cra_ctxsize = sizeof(struct ghash_key),
265 .base.cra_module = THIS_MODULE,
267 .digestsize = GHASH_DIGEST_SIZE,
269 .update = ghash_update_p8,
270 .final = ghash_final_p8,
271 .setkey = ghash_setkey,
272 .descsize = sizeof(struct ghash_desc_ctx),
274 .base.cra_name = "ghash",
275 .base.cra_driver_name = "ghash-ce",
276 .base.cra_priority = 200,
277 .base.cra_blocksize = GHASH_BLOCK_SIZE,
278 .base.cra_ctxsize = sizeof(struct ghash_key),
279 .base.cra_module = THIS_MODULE,
281 .digestsize = GHASH_DIGEST_SIZE,
283 .update = ghash_update_p64,
284 .final = ghash_final_p64,
285 .setkey = ghash_setkey,
286 .descsize = sizeof(struct ghash_desc_ctx),
289 static int num_rounds(struct crypto_aes_ctx *ctx)
292 * # of rounds specified by AES:
293 * 128 bit key 10 rounds
294 * 192 bit key 12 rounds
295 * 256 bit key 14 rounds
296 * => n byte key => 6 + (n/4) rounds
298 return 6 + ctx->key_length / 4;
301 static int gcm_setkey(struct crypto_aead *tfm, const u8 *inkey,
304 struct gcm_aes_ctx *ctx = crypto_aead_ctx(tfm);
305 u8 key[GHASH_BLOCK_SIZE];
308 ret = aes_expandkey(&ctx->aes_key, inkey, keylen);
310 tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
314 aes_encrypt(&ctx->aes_key, key, (u8[AES_BLOCK_SIZE]){});
316 return __ghash_setkey(&ctx->ghash_key, key, sizeof(be128));
319 static int gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
332 static void gcm_update_mac(u64 dg[], const u8 *src, int count, u8 buf[],
333 int *buf_count, struct gcm_aes_ctx *ctx)
335 if (*buf_count > 0) {
336 int buf_added = min(count, GHASH_BLOCK_SIZE - *buf_count);
338 memcpy(&buf[*buf_count], src, buf_added);
340 *buf_count += buf_added;
345 if (count >= GHASH_BLOCK_SIZE || *buf_count == GHASH_BLOCK_SIZE) {
346 int blocks = count / GHASH_BLOCK_SIZE;
348 ghash_do_update(blocks, dg, src, &ctx->ghash_key,
349 *buf_count ? buf : NULL,
350 pmull_ghash_update_p64);
352 src += blocks * GHASH_BLOCK_SIZE;
353 count %= GHASH_BLOCK_SIZE;
358 memcpy(buf, src, count);
363 static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[])
365 struct crypto_aead *aead = crypto_aead_reqtfm(req);
366 struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
367 u8 buf[GHASH_BLOCK_SIZE];
368 struct scatter_walk walk;
369 u32 len = req->assoclen;
372 scatterwalk_start(&walk, req->src);
375 u32 n = scatterwalk_clamp(&walk, len);
379 scatterwalk_start(&walk, sg_next(walk.sg));
380 n = scatterwalk_clamp(&walk, len);
382 p = scatterwalk_map(&walk);
384 gcm_update_mac(dg, p, n, buf, &buf_count, ctx);
387 scatterwalk_unmap(p);
388 scatterwalk_advance(&walk, n);
389 scatterwalk_done(&walk, 0, len);
393 memset(&buf[buf_count], 0, GHASH_BLOCK_SIZE - buf_count);
394 ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL,
395 pmull_ghash_update_p64);
399 static int gcm_encrypt(struct aead_request *req)
401 struct crypto_aead *aead = crypto_aead_reqtfm(req);
402 struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
403 int nrounds = num_rounds(&ctx->aes_key);
404 struct skcipher_walk walk;
405 u8 buf[AES_BLOCK_SIZE];
406 u8 iv[AES_BLOCK_SIZE];
412 lengths.a = cpu_to_be64(req->assoclen * 8);
413 lengths.b = cpu_to_be64(req->cryptlen * 8);
416 gcm_calculate_auth_mac(req, dg);
418 memcpy(iv, req->iv, GCM_IV_SIZE);
419 put_unaligned_be32(2, iv + GCM_IV_SIZE);
421 err = skcipher_walk_aead_encrypt(&walk, req, false);
423 if (likely(crypto_simd_usable())) {
425 const u8 *src = walk.src.virt.addr;
426 u8 *dst = walk.dst.virt.addr;
427 int nbytes = walk.nbytes;
429 tag = (u8 *)&lengths;
431 if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) {
432 src = dst = memcpy(buf + sizeof(buf) - nbytes,
434 } else if (nbytes < walk.total) {
435 nbytes &= ~(AES_BLOCK_SIZE - 1);
440 pmull_gcm_encrypt(nbytes, dst, src, &ctx->ghash_key, dg,
441 iv, ctx->aes_key.key_enc, nrounds,
445 if (unlikely(!nbytes))
448 if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE))
449 memcpy(walk.dst.virt.addr,
450 buf + sizeof(buf) - nbytes, nbytes);
452 err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
453 } while (walk.nbytes);
455 while (walk.nbytes >= AES_BLOCK_SIZE) {
456 int blocks = walk.nbytes / AES_BLOCK_SIZE;
457 const u8 *src = walk.src.virt.addr;
458 u8 *dst = walk.dst.virt.addr;
459 int remaining = blocks;
462 aes_encrypt(&ctx->aes_key, buf, iv);
463 crypto_xor_cpy(dst, src, buf, AES_BLOCK_SIZE);
464 crypto_inc(iv, AES_BLOCK_SIZE);
466 dst += AES_BLOCK_SIZE;
467 src += AES_BLOCK_SIZE;
468 } while (--remaining > 0);
470 ghash_do_update(blocks, dg, walk.dst.virt.addr,
471 &ctx->ghash_key, NULL, NULL);
473 err = skcipher_walk_done(&walk,
474 walk.nbytes % AES_BLOCK_SIZE);
477 /* handle the tail */
479 aes_encrypt(&ctx->aes_key, buf, iv);
481 crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr,
484 memcpy(buf, walk.dst.virt.addr, walk.nbytes);
485 memset(buf + walk.nbytes, 0, sizeof(buf) - walk.nbytes);
488 tag = (u8 *)&lengths;
489 ghash_do_update(1, dg, tag, &ctx->ghash_key,
490 walk.nbytes ? buf : NULL, NULL);
493 err = skcipher_walk_done(&walk, 0);
495 put_unaligned_be64(dg[1], tag);
496 put_unaligned_be64(dg[0], tag + 8);
497 put_unaligned_be32(1, iv + GCM_IV_SIZE);
498 aes_encrypt(&ctx->aes_key, iv, iv);
499 crypto_xor(tag, iv, AES_BLOCK_SIZE);
505 /* copy authtag to end of dst */
506 scatterwalk_map_and_copy(tag, req->dst, req->assoclen + req->cryptlen,
507 crypto_aead_authsize(aead), 1);
512 static int gcm_decrypt(struct aead_request *req)
514 struct crypto_aead *aead = crypto_aead_reqtfm(req);
515 struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
516 unsigned int authsize = crypto_aead_authsize(aead);
517 int nrounds = num_rounds(&ctx->aes_key);
518 struct skcipher_walk walk;
519 u8 buf[AES_BLOCK_SIZE];
520 u8 iv[AES_BLOCK_SIZE];
526 lengths.a = cpu_to_be64(req->assoclen * 8);
527 lengths.b = cpu_to_be64((req->cryptlen - authsize) * 8);
530 gcm_calculate_auth_mac(req, dg);
532 memcpy(iv, req->iv, GCM_IV_SIZE);
533 put_unaligned_be32(2, iv + GCM_IV_SIZE);
535 err = skcipher_walk_aead_decrypt(&walk, req, false);
537 if (likely(crypto_simd_usable())) {
539 const u8 *src = walk.src.virt.addr;
540 u8 *dst = walk.dst.virt.addr;
541 int nbytes = walk.nbytes;
543 tag = (u8 *)&lengths;
545 if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) {
546 src = dst = memcpy(buf + sizeof(buf) - nbytes,
548 } else if (nbytes < walk.total) {
549 nbytes &= ~(AES_BLOCK_SIZE - 1);
554 pmull_gcm_decrypt(nbytes, dst, src, &ctx->ghash_key, dg,
555 iv, ctx->aes_key.key_enc, nrounds,
559 if (unlikely(!nbytes))
562 if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE))
563 memcpy(walk.dst.virt.addr,
564 buf + sizeof(buf) - nbytes, nbytes);
566 err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
567 } while (walk.nbytes);
569 while (walk.nbytes >= AES_BLOCK_SIZE) {
570 int blocks = walk.nbytes / AES_BLOCK_SIZE;
571 const u8 *src = walk.src.virt.addr;
572 u8 *dst = walk.dst.virt.addr;
574 ghash_do_update(blocks, dg, walk.src.virt.addr,
575 &ctx->ghash_key, NULL, NULL);
578 aes_encrypt(&ctx->aes_key, buf, iv);
579 crypto_xor_cpy(dst, src, buf, AES_BLOCK_SIZE);
580 crypto_inc(iv, AES_BLOCK_SIZE);
582 dst += AES_BLOCK_SIZE;
583 src += AES_BLOCK_SIZE;
584 } while (--blocks > 0);
586 err = skcipher_walk_done(&walk,
587 walk.nbytes % AES_BLOCK_SIZE);
590 /* handle the tail */
592 memcpy(buf, walk.src.virt.addr, walk.nbytes);
593 memset(buf + walk.nbytes, 0, sizeof(buf) - walk.nbytes);
596 tag = (u8 *)&lengths;
597 ghash_do_update(1, dg, tag, &ctx->ghash_key,
598 walk.nbytes ? buf : NULL, NULL);
601 aes_encrypt(&ctx->aes_key, buf, iv);
603 crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr,
606 err = skcipher_walk_done(&walk, 0);
609 put_unaligned_be64(dg[1], tag);
610 put_unaligned_be64(dg[0], tag + 8);
611 put_unaligned_be32(1, iv + GCM_IV_SIZE);
612 aes_encrypt(&ctx->aes_key, iv, iv);
613 crypto_xor(tag, iv, AES_BLOCK_SIZE);
619 /* compare calculated auth tag with the stored one */
620 scatterwalk_map_and_copy(buf, req->src,
621 req->assoclen + req->cryptlen - authsize,
624 if (crypto_memneq(tag, buf, authsize))
629 static struct aead_alg gcm_aes_alg = {
630 .ivsize = GCM_IV_SIZE,
631 .chunksize = AES_BLOCK_SIZE,
632 .maxauthsize = AES_BLOCK_SIZE,
633 .setkey = gcm_setkey,
634 .setauthsize = gcm_setauthsize,
635 .encrypt = gcm_encrypt,
636 .decrypt = gcm_decrypt,
638 .base.cra_name = "gcm(aes)",
639 .base.cra_driver_name = "gcm-aes-ce",
640 .base.cra_priority = 300,
641 .base.cra_blocksize = 1,
642 .base.cra_ctxsize = sizeof(struct gcm_aes_ctx),
643 .base.cra_module = THIS_MODULE,
646 static int __init ghash_ce_mod_init(void)
650 if (!cpu_have_named_feature(ASIMD))
653 if (cpu_have_named_feature(PMULL))
654 ret = crypto_register_shashes(ghash_alg,
655 ARRAY_SIZE(ghash_alg));
657 /* only register the first array element */
658 ret = crypto_register_shash(ghash_alg);
663 if (cpu_have_named_feature(PMULL)) {
664 ret = crypto_register_aead(&gcm_aes_alg);
666 crypto_unregister_shashes(ghash_alg,
667 ARRAY_SIZE(ghash_alg));
672 static void __exit ghash_ce_mod_exit(void)
674 if (cpu_have_named_feature(PMULL))
675 crypto_unregister_shashes(ghash_alg, ARRAY_SIZE(ghash_alg));
677 crypto_unregister_shash(ghash_alg);
678 crypto_unregister_aead(&gcm_aes_alg);
681 static const struct cpu_feature ghash_cpu_feature[] = {
682 { cpu_feature(PMULL) }, { }
684 MODULE_DEVICE_TABLE(cpu, ghash_cpu_feature);
686 module_init(ghash_ce_mod_init);
687 module_exit(ghash_ce_mod_exit);