1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * SSE2 implementation of MORUS-640
5 * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
6 * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
9 #include <linux/linkage.h>
10 #include <asm/frame.h>
12 #define SHUFFLE_MASK(i0, i1, i2, i3) \
13 (i0 | (i1 << 2) | (i2 << 4) | (i3 << 6))
15 #define MASK1 SHUFFLE_MASK(3, 0, 1, 2)
16 #define MASK2 SHUFFLE_MASK(2, 3, 0, 1)
17 #define MASK3 SHUFFLE_MASK(1, 2, 3, 0)
29 .section .rodata.cst16.morus640_const, "aM", @progbits, 32
32 .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
33 .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
35 .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
36 .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
38 .section .rodata.cst16.morus640_counter, "aM", @progbits, 16
41 .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
42 .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
46 .macro morus640_round s0, s1, s2, s3, s4, b, w
59 * __morus640_update: internal ABI
61 * STATE[0-4] - input state
64 * STATE[0-4] - output state
69 morus640_round STATE0, STATE1, STATE2, STATE3, STATE4, 5, MASK1
71 morus640_round STATE1, STATE2, STATE3, STATE4, STATE0, 31, MASK2
73 morus640_round STATE2, STATE3, STATE4, STATE0, STATE1, 7, MASK3
75 morus640_round STATE3, STATE4, STATE0, STATE1, STATE2, 22, MASK2
77 morus640_round STATE4, STATE0, STATE1, STATE2, STATE3, 13, MASK1
79 ENDPROC(__morus640_update)
83 * __morus640_update_zero: internal ABI
85 * STATE[0-4] - input state
87 * STATE[0-4] - output state
91 __morus640_update_zero:
92 morus640_round STATE0, STATE1, STATE2, STATE3, STATE4, 5, MASK1
93 morus640_round STATE1, STATE2, STATE3, STATE4, STATE0, 31, MASK2
94 morus640_round STATE2, STATE3, STATE4, STATE0, STATE1, 7, MASK3
95 morus640_round STATE3, STATE4, STATE0, STATE1, STATE2, 22, MASK2
96 morus640_round STATE4, STATE0, STATE1, STATE2, STATE3, 13, MASK1
98 ENDPROC(__morus640_update_zero)
101 * __load_partial: internal ABI
106 * MSG - message block
164 ENDPROC(__load_partial)
167 * __store_partial: internal ABI
222 ENDPROC(__store_partial)
225 * void crypto_morus640_sse2_init(void *state, const void *key, const void *iv);
227 ENTRY(crypto_morus640_sse2_init)
231 movdqu (%rdx), STATE0
236 pcmpeqd STATE2, STATE2
237 /* load the constants: */
238 movdqa .Lmorus640_const_0, STATE3
239 movdqa .Lmorus640_const_1, STATE4
241 /* update 16 times with zero: */
242 call __morus640_update_zero
243 call __morus640_update_zero
244 call __morus640_update_zero
245 call __morus640_update_zero
246 call __morus640_update_zero
247 call __morus640_update_zero
248 call __morus640_update_zero
249 call __morus640_update_zero
250 call __morus640_update_zero
251 call __morus640_update_zero
252 call __morus640_update_zero
253 call __morus640_update_zero
254 call __morus640_update_zero
255 call __morus640_update_zero
256 call __morus640_update_zero
257 call __morus640_update_zero
259 /* xor-in the key again after updates: */
262 /* store the state: */
263 movdqu STATE0, (0 * 16)(%rdi)
264 movdqu STATE1, (1 * 16)(%rdi)
265 movdqu STATE2, (2 * 16)(%rdi)
266 movdqu STATE3, (3 * 16)(%rdi)
267 movdqu STATE4, (4 * 16)(%rdi)
271 ENDPROC(crypto_morus640_sse2_init)
274 * void crypto_morus640_sse2_ad(void *state, const void *data,
275 * unsigned int length);
277 ENTRY(crypto_morus640_sse2_ad)
283 /* load the state: */
284 movdqu (0 * 16)(%rdi), STATE0
285 movdqu (1 * 16)(%rdi), STATE1
286 movdqu (2 * 16)(%rdi), STATE2
287 movdqu (3 * 16)(%rdi), STATE3
288 movdqu (4 * 16)(%rdi), STATE4
297 call __morus640_update
307 call __morus640_update
314 /* store the state: */
315 movdqu STATE0, (0 * 16)(%rdi)
316 movdqu STATE1, (1 * 16)(%rdi)
317 movdqu STATE2, (2 * 16)(%rdi)
318 movdqu STATE3, (3 * 16)(%rdi)
319 movdqu STATE4, (4 * 16)(%rdi)
324 ENDPROC(crypto_morus640_sse2_ad)
327 * void crypto_morus640_sse2_enc(void *state, const void *src, void *dst,
328 * unsigned int length);
330 ENTRY(crypto_morus640_sse2_enc)
336 /* load the state: */
337 movdqu (0 * 16)(%rdi), STATE0
338 movdqu (1 * 16)(%rdi), STATE1
339 movdqu (2 * 16)(%rdi), STATE2
340 movdqu (3 * 16)(%rdi), STATE3
341 movdqu (4 * 16)(%rdi), STATE4
353 pshufd $MASK3, STATE1, T1
360 call __morus640_update
373 pshufd $MASK3, STATE1, T1
380 call __morus640_update
388 /* store the state: */
389 movdqu STATE0, (0 * 16)(%rdi)
390 movdqu STATE1, (1 * 16)(%rdi)
391 movdqu STATE2, (2 * 16)(%rdi)
392 movdqu STATE3, (3 * 16)(%rdi)
393 movdqu STATE4, (4 * 16)(%rdi)
398 ENDPROC(crypto_morus640_sse2_enc)
401 * void crypto_morus640_sse2_enc_tail(void *state, const void *src, void *dst,
402 * unsigned int length);
404 ENTRY(crypto_morus640_sse2_enc_tail)
407 /* load the state: */
408 movdqu (0 * 16)(%rdi), STATE0
409 movdqu (1 * 16)(%rdi), STATE1
410 movdqu (2 * 16)(%rdi), STATE2
411 movdqu (3 * 16)(%rdi), STATE3
412 movdqu (4 * 16)(%rdi), STATE4
414 /* encrypt message: */
419 pshufd $MASK3, STATE1, T1
427 call __morus640_update
429 /* store the state: */
430 movdqu STATE0, (0 * 16)(%rdi)
431 movdqu STATE1, (1 * 16)(%rdi)
432 movdqu STATE2, (2 * 16)(%rdi)
433 movdqu STATE3, (3 * 16)(%rdi)
434 movdqu STATE4, (4 * 16)(%rdi)
438 ENDPROC(crypto_morus640_sse2_enc_tail)
441 * void crypto_morus640_sse2_dec(void *state, const void *src, void *dst,
442 * unsigned int length);
444 ENTRY(crypto_morus640_sse2_dec)
450 /* load the state: */
451 movdqu (0 * 16)(%rdi), STATE0
452 movdqu (1 * 16)(%rdi), STATE1
453 movdqu (2 * 16)(%rdi), STATE2
454 movdqu (3 * 16)(%rdi), STATE3
455 movdqu (4 * 16)(%rdi), STATE4
466 pshufd $MASK3, STATE1, T0
473 call __morus640_update
485 pshufd $MASK3, STATE1, T0
492 call __morus640_update
500 /* store the state: */
501 movdqu STATE0, (0 * 16)(%rdi)
502 movdqu STATE1, (1 * 16)(%rdi)
503 movdqu STATE2, (2 * 16)(%rdi)
504 movdqu STATE3, (3 * 16)(%rdi)
505 movdqu STATE4, (4 * 16)(%rdi)
510 ENDPROC(crypto_morus640_sse2_dec)
513 * void crypto_morus640_sse2_dec_tail(void *state, const void *src, void *dst,
514 * unsigned int length);
516 ENTRY(crypto_morus640_sse2_dec_tail)
519 /* load the state: */
520 movdqu (0 * 16)(%rdi), STATE0
521 movdqu (1 * 16)(%rdi), STATE1
522 movdqu (2 * 16)(%rdi), STATE2
523 movdqu (3 * 16)(%rdi), STATE3
524 movdqu (4 * 16)(%rdi), STATE4
526 /* decrypt message: */
530 pshufd $MASK3, STATE1, T0
539 /* mask with byte count: */
545 movdqa .Lmorus640_counter, T1
549 call __morus640_update
551 /* store the state: */
552 movdqu STATE0, (0 * 16)(%rdi)
553 movdqu STATE1, (1 * 16)(%rdi)
554 movdqu STATE2, (2 * 16)(%rdi)
555 movdqu STATE3, (3 * 16)(%rdi)
556 movdqu STATE4, (4 * 16)(%rdi)
560 ENDPROC(crypto_morus640_sse2_dec_tail)
563 * void crypto_morus640_sse2_final(void *state, void *tag_xor,
564 * u64 assoclen, u64 cryptlen);
566 ENTRY(crypto_morus640_sse2_final)
569 /* load the state: */
570 movdqu (0 * 16)(%rdi), STATE0
571 movdqu (1 * 16)(%rdi), STATE1
572 movdqu (2 * 16)(%rdi), STATE2
573 movdqu (3 * 16)(%rdi), STATE3
574 movdqu (4 * 16)(%rdi), STATE4
576 /* xor state[0] into state[4]: */
579 /* prepare length block: */
584 psllq $3, MSG /* multiply by 8 (to get bit count) */
587 call __morus640_update
588 call __morus640_update
589 call __morus640_update
590 call __morus640_update
591 call __morus640_update
592 call __morus640_update
593 call __morus640_update
594 call __morus640_update
595 call __morus640_update
596 call __morus640_update
602 pshufd $MASK3, STATE1, T0
612 ENDPROC(crypto_morus640_sse2_final)