2 * AVX2 implementation of MORUS-1280
4 * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
5 * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as published
9 * by the Free Software Foundation.
12 #include <linux/linkage.h>
13 #include <asm/frame.h>
15 #define SHUFFLE_MASK(i0, i1, i2, i3) \
16 (i0 | (i1 << 2) | (i2 << 4) | (i3 << 6))
18 #define MASK1 SHUFFLE_MASK(3, 0, 1, 2)
19 #define MASK2 SHUFFLE_MASK(2, 3, 0, 1)
20 #define MASK3 SHUFFLE_MASK(1, 2, 3, 0)
23 #define STATE0_LOW %xmm0
35 .section .rodata.cst32.morus1280_const, "aM", @progbits, 32
38 .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
39 .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
40 .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
41 .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
43 .section .rodata.cst32.morus1280_counter, "aM", @progbits, 32
46 .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
47 .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
48 .byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
49 .byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
53 .macro morus1280_round s0, s1, s2, s3, s4, b, w
58 vpsrlq $(64 - \b), \s0, \s0
64 * __morus1280_update: internal ABI
66 * STATE[0-4] - input state
69 * STATE[0-4] - output state
74 morus1280_round STATE0, STATE1, STATE2, STATE3, STATE4, 13, MASK1
75 vpxor MSG, STATE1, STATE1
76 morus1280_round STATE1, STATE2, STATE3, STATE4, STATE0, 46, MASK2
77 vpxor MSG, STATE2, STATE2
78 morus1280_round STATE2, STATE3, STATE4, STATE0, STATE1, 38, MASK3
79 vpxor MSG, STATE3, STATE3
80 morus1280_round STATE3, STATE4, STATE0, STATE1, STATE2, 7, MASK2
81 vpxor MSG, STATE4, STATE4
82 morus1280_round STATE4, STATE0, STATE1, STATE2, STATE3, 4, MASK1
84 ENDPROC(__morus1280_update)
87 * __morus1280_update_zero: internal ABI
89 * STATE[0-4] - input state
91 * STATE[0-4] - output state
95 __morus1280_update_zero:
96 morus1280_round STATE0, STATE1, STATE2, STATE3, STATE4, 13, MASK1
97 morus1280_round STATE1, STATE2, STATE3, STATE4, STATE0, 46, MASK2
98 morus1280_round STATE2, STATE3, STATE4, STATE0, STATE1, 38, MASK3
99 morus1280_round STATE3, STATE4, STATE0, STATE1, STATE2, 7, MASK2
100 morus1280_round STATE4, STATE0, STATE1, STATE2, STATE3, 4, MASK1
102 ENDPROC(__morus1280_update_zero)
105 * __load_partial: internal ABI
110 * MSG - message block
161 pshufd $MASK2, MSG_LOW, MSG_LOW
162 pinsrq $0, (%r8), MSG_LOW
169 vpermq $MASK2, MSG, MSG
170 movdqu (%rsi), MSG_LOW
174 ENDPROC(__load_partial)
177 * __store_partial: internal ABI
196 vpermq $MASK2, T0, T0
208 pextrq $1, T0_LOW, %r10
241 ENDPROC(__store_partial)
244 * void crypto_morus1280_avx2_init(void *state, const void *key,
247 ENTRY(crypto_morus1280_avx2_init)
251 vpxor STATE0, STATE0, STATE0
252 movdqu (%rdx), STATE0_LOW
257 vpcmpeqd STATE2, STATE2, STATE2
258 /* load all zeros: */
259 vpxor STATE3, STATE3, STATE3
260 /* load the constant: */
261 vmovdqa .Lmorus1280_const, STATE4
263 /* update 16 times with zero: */
264 call __morus1280_update_zero
265 call __morus1280_update_zero
266 call __morus1280_update_zero
267 call __morus1280_update_zero
268 call __morus1280_update_zero
269 call __morus1280_update_zero
270 call __morus1280_update_zero
271 call __morus1280_update_zero
272 call __morus1280_update_zero
273 call __morus1280_update_zero
274 call __morus1280_update_zero
275 call __morus1280_update_zero
276 call __morus1280_update_zero
277 call __morus1280_update_zero
278 call __morus1280_update_zero
279 call __morus1280_update_zero
281 /* xor-in the key again after updates: */
282 vpxor KEY, STATE1, STATE1
284 /* store the state: */
285 vmovdqu STATE0, (0 * 32)(%rdi)
286 vmovdqu STATE1, (1 * 32)(%rdi)
287 vmovdqu STATE2, (2 * 32)(%rdi)
288 vmovdqu STATE3, (3 * 32)(%rdi)
289 vmovdqu STATE4, (4 * 32)(%rdi)
293 ENDPROC(crypto_morus1280_avx2_init)
296 * void crypto_morus1280_avx2_ad(void *state, const void *data,
297 * unsigned int length);
299 ENTRY(crypto_morus1280_avx2_ad)
305 /* load the state: */
306 vmovdqu (0 * 32)(%rdi), STATE0
307 vmovdqu (1 * 32)(%rdi), STATE1
308 vmovdqu (2 * 32)(%rdi), STATE2
309 vmovdqu (3 * 32)(%rdi), STATE3
310 vmovdqu (4 * 32)(%rdi), STATE4
319 call __morus1280_update
329 call __morus1280_update
336 /* store the state: */
337 vmovdqu STATE0, (0 * 32)(%rdi)
338 vmovdqu STATE1, (1 * 32)(%rdi)
339 vmovdqu STATE2, (2 * 32)(%rdi)
340 vmovdqu STATE3, (3 * 32)(%rdi)
341 vmovdqu STATE4, (4 * 32)(%rdi)
346 ENDPROC(crypto_morus1280_avx2_ad)
349 * void crypto_morus1280_avx2_enc(void *state, const void *src, void *dst,
350 * unsigned int length);
352 ENTRY(crypto_morus1280_avx2_enc)
358 /* load the state: */
359 vmovdqu (0 * 32)(%rdi), STATE0
360 vmovdqu (1 * 32)(%rdi), STATE1
361 vmovdqu (2 * 32)(%rdi), STATE2
362 vmovdqu (3 * 32)(%rdi), STATE3
363 vmovdqu (4 * 32)(%rdi), STATE4
375 vpermq $MASK3, STATE1, T1
377 vpand STATE2, STATE3, T1
381 call __morus1280_update
394 vpermq $MASK3, STATE1, T1
396 vpand STATE2, STATE3, T1
400 call __morus1280_update
408 /* store the state: */
409 vmovdqu STATE0, (0 * 32)(%rdi)
410 vmovdqu STATE1, (1 * 32)(%rdi)
411 vmovdqu STATE2, (2 * 32)(%rdi)
412 vmovdqu STATE3, (3 * 32)(%rdi)
413 vmovdqu STATE4, (4 * 32)(%rdi)
418 ENDPROC(crypto_morus1280_avx2_enc)
421 * void crypto_morus1280_avx2_enc_tail(void *state, const void *src, void *dst,
422 * unsigned int length);
424 ENTRY(crypto_morus1280_avx2_enc_tail)
427 /* load the state: */
428 vmovdqu (0 * 32)(%rdi), STATE0
429 vmovdqu (1 * 32)(%rdi), STATE1
430 vmovdqu (2 * 32)(%rdi), STATE2
431 vmovdqu (3 * 32)(%rdi), STATE3
432 vmovdqu (4 * 32)(%rdi), STATE4
434 /* encrypt message: */
439 vpermq $MASK3, STATE1, T1
441 vpand STATE2, STATE3, T1
446 call __morus1280_update
448 /* store the state: */
449 vmovdqu STATE0, (0 * 32)(%rdi)
450 vmovdqu STATE1, (1 * 32)(%rdi)
451 vmovdqu STATE2, (2 * 32)(%rdi)
452 vmovdqu STATE3, (3 * 32)(%rdi)
453 vmovdqu STATE4, (4 * 32)(%rdi)
456 ENDPROC(crypto_morus1280_avx2_enc_tail)
459 * void crypto_morus1280_avx2_dec(void *state, const void *src, void *dst,
460 * unsigned int length);
462 ENTRY(crypto_morus1280_avx2_dec)
468 /* load the state: */
469 vmovdqu (0 * 32)(%rdi), STATE0
470 vmovdqu (1 * 32)(%rdi), STATE1
471 vmovdqu (2 * 32)(%rdi), STATE2
472 vmovdqu (3 * 32)(%rdi), STATE3
473 vmovdqu (4 * 32)(%rdi), STATE4
483 vpxor STATE0, MSG, MSG
484 vpermq $MASK3, STATE1, T0
486 vpand STATE2, STATE3, T0
490 call __morus1280_update
501 vpxor STATE0, MSG, MSG
502 vpermq $MASK3, STATE1, T0
504 vpand STATE2, STATE3, T0
508 call __morus1280_update
516 /* store the state: */
517 vmovdqu STATE0, (0 * 32)(%rdi)
518 vmovdqu STATE1, (1 * 32)(%rdi)
519 vmovdqu STATE2, (2 * 32)(%rdi)
520 vmovdqu STATE3, (3 * 32)(%rdi)
521 vmovdqu STATE4, (4 * 32)(%rdi)
526 ENDPROC(crypto_morus1280_avx2_dec)
529 * void crypto_morus1280_avx2_dec_tail(void *state, const void *src, void *dst,
530 * unsigned int length);
532 ENTRY(crypto_morus1280_avx2_dec_tail)
535 /* load the state: */
536 vmovdqu (0 * 32)(%rdi), STATE0
537 vmovdqu (1 * 32)(%rdi), STATE1
538 vmovdqu (2 * 32)(%rdi), STATE2
539 vmovdqu (3 * 32)(%rdi), STATE3
540 vmovdqu (4 * 32)(%rdi), STATE4
542 /* decrypt message: */
545 vpxor STATE0, MSG, MSG
546 vpermq $MASK3, STATE1, T0
548 vpand STATE2, STATE3, T0
554 /* mask with byte count: */
556 vpbroadcastb T0_LOW, T0
557 vmovdqa .Lmorus1280_counter, T1
561 call __morus1280_update
563 /* store the state: */
564 vmovdqu STATE0, (0 * 32)(%rdi)
565 vmovdqu STATE1, (1 * 32)(%rdi)
566 vmovdqu STATE2, (2 * 32)(%rdi)
567 vmovdqu STATE3, (3 * 32)(%rdi)
568 vmovdqu STATE4, (4 * 32)(%rdi)
572 ENDPROC(crypto_morus1280_avx2_dec_tail)
575 * void crypto_morus1280_avx2_final(void *state, void *tag_xor,
576 * u64 assoclen, u64 cryptlen);
578 ENTRY(crypto_morus1280_avx2_final)
581 /* load the state: */
582 vmovdqu (0 * 32)(%rdi), STATE0
583 vmovdqu (1 * 32)(%rdi), STATE1
584 vmovdqu (2 * 32)(%rdi), STATE2
585 vmovdqu (3 * 32)(%rdi), STATE3
586 vmovdqu (4 * 32)(%rdi), STATE4
588 /* xor state[0] into state[4]: */
589 vpxor STATE0, STATE4, STATE4
591 /* prepare length block: */
593 vpinsrq $0, %rdx, MSG_LOW, MSG_LOW
594 vpinsrq $1, %rcx, MSG_LOW, MSG_LOW
595 vpsllq $3, MSG, MSG /* multiply by 8 (to get bit count) */
598 call __morus1280_update
599 call __morus1280_update
600 call __morus1280_update
601 call __morus1280_update
602 call __morus1280_update
603 call __morus1280_update
604 call __morus1280_update
605 call __morus1280_update
606 call __morus1280_update
607 call __morus1280_update
612 vpxor STATE0, MSG, MSG
613 vpermq $MASK3, STATE1, T0
615 vpand STATE2, STATE3, T0
621 ENDPROC(crypto_morus1280_avx2_final)