crypto: arm - use a pattern rule for generating *.S files
[linux-2.6-microblaze.git] / arch / arm64 / crypto / sha512-core.S_shipped
1 // SPDX-License-Identifier: GPL-2.0
2
3 // This code is taken from the OpenSSL project but the author (Andy Polyakov)
4 // has relicensed it under the GPLv2. Therefore this program is free software;
5 // you can redistribute it and/or modify it under the terms of the GNU General
6 // Public License version 2 as published by the Free Software Foundation.
7 //
8 // The original headers, including the original license headers, are
9 // included below for completeness.
10
11 // Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
12 //
13 // Licensed under the OpenSSL license (the "License").  You may not use
14 // this file except in compliance with the License.  You can obtain a copy
15 // in the file LICENSE in the source distribution or at
16 // https://www.openssl.org/source/license.html
17
18 // ====================================================================
19 // Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
20 // project. The module is, however, dual licensed under OpenSSL and
21 // CRYPTOGAMS licenses depending on where you obtain it. For further
22 // details see http://www.openssl.org/~appro/cryptogams/.
23 // ====================================================================
24 //
25 // SHA256/512 for ARMv8.
26 //
27 // Performance in cycles per processed byte and improvement coefficient
28 // over code generated with "default" compiler:
29 //
30 //              SHA256-hw       SHA256(*)       SHA512
31 // Apple A7     1.97            10.5 (+33%)     6.73 (-1%(**))
32 // Cortex-A53   2.38            15.5 (+115%)    10.0 (+150%(***))
33 // Cortex-A57   2.31            11.6 (+86%)     7.51 (+260%(***))
34 // Denver       2.01            10.5 (+26%)     6.70 (+8%)
35 // X-Gene                       20.0 (+100%)    12.8 (+300%(***))
36 // Mongoose     2.36            13.0 (+50%)     8.36 (+33%)
37 //
38 // (*)  Software SHA256 results are of lesser relevance, presented
39 //      mostly for informational purposes.
40 // (**) The result is a trade-off: it's possible to improve it by
41 //      10% (or by 1 cycle per round), but at the cost of 20% loss
42 //      on Cortex-A53 (or by 4 cycles per round).
43 // (***)        Super-impressive coefficients over gcc-generated code are
44 //      indication of some compiler "pathology", most notably code
45 //      generated with -mgeneral-regs-only is significanty faster
46 //      and the gap is only 40-90%.
47 //
48 // October 2016.
49 //
50 // Originally it was reckoned that it makes no sense to implement NEON
51 // version of SHA256 for 64-bit processors. This is because performance
52 // improvement on most wide-spread Cortex-A5x processors was observed
53 // to be marginal, same on Cortex-A53 and ~10% on A57. But then it was
54 // observed that 32-bit NEON SHA256 performs significantly better than
55 // 64-bit scalar version on *some* of the more recent processors. As
56 // result 64-bit NEON version of SHA256 was added to provide best
57 // all-round performance. For example it executes ~30% faster on X-Gene
58 // and Mongoose. [For reference, NEON version of SHA512 is bound to
59 // deliver much less improvement, likely *negative* on Cortex-A5x.
60 // Which is why NEON support is limited to SHA256.]
61
62 #ifndef __KERNEL__
63 # include "arm_arch.h"
64 #endif
65
66 .text
67
68 .extern OPENSSL_armcap_P
69 .globl  sha512_block_data_order
70 .type   sha512_block_data_order,%function
71 .align  6
72 sha512_block_data_order:
73         stp     x29,x30,[sp,#-128]!
74         add     x29,sp,#0
75
76         stp     x19,x20,[sp,#16]
77         stp     x21,x22,[sp,#32]
78         stp     x23,x24,[sp,#48]
79         stp     x25,x26,[sp,#64]
80         stp     x27,x28,[sp,#80]
81         sub     sp,sp,#4*8
82
83         ldp     x20,x21,[x0]                            // load context
84         ldp     x22,x23,[x0,#2*8]
85         ldp     x24,x25,[x0,#4*8]
86         add     x2,x1,x2,lsl#7  // end of input
87         ldp     x26,x27,[x0,#6*8]
88         adr     x30,.LK512
89         stp     x0,x2,[x29,#96]
90
91 .Loop:
92         ldp     x3,x4,[x1],#2*8
93         ldr     x19,[x30],#8                    // *K++
94         eor     x28,x21,x22                             // magic seed
95         str     x1,[x29,#112]
96 #ifndef __AARCH64EB__
97         rev     x3,x3                   // 0
98 #endif
99         ror     x16,x24,#14
100         add     x27,x27,x19                     // h+=K[i]
101         eor     x6,x24,x24,ror#23
102         and     x17,x25,x24
103         bic     x19,x26,x24
104         add     x27,x27,x3                      // h+=X[i]
105         orr     x17,x17,x19                     // Ch(e,f,g)
106         eor     x19,x20,x21                     // a^b, b^c in next round
107         eor     x16,x16,x6,ror#18       // Sigma1(e)
108         ror     x6,x20,#28
109         add     x27,x27,x17                     // h+=Ch(e,f,g)
110         eor     x17,x20,x20,ror#5
111         add     x27,x27,x16                     // h+=Sigma1(e)
112         and     x28,x28,x19                     // (b^c)&=(a^b)
113         add     x23,x23,x27                     // d+=h
114         eor     x28,x28,x21                     // Maj(a,b,c)
115         eor     x17,x6,x17,ror#34       // Sigma0(a)
116         add     x27,x27,x28                     // h+=Maj(a,b,c)
117         ldr     x28,[x30],#8            // *K++, x19 in next round
118         //add   x27,x27,x17                     // h+=Sigma0(a)
119 #ifndef __AARCH64EB__
120         rev     x4,x4                   // 1
121 #endif
122         ldp     x5,x6,[x1],#2*8
123         add     x27,x27,x17                     // h+=Sigma0(a)
124         ror     x16,x23,#14
125         add     x26,x26,x28                     // h+=K[i]
126         eor     x7,x23,x23,ror#23
127         and     x17,x24,x23
128         bic     x28,x25,x23
129         add     x26,x26,x4                      // h+=X[i]
130         orr     x17,x17,x28                     // Ch(e,f,g)
131         eor     x28,x27,x20                     // a^b, b^c in next round
132         eor     x16,x16,x7,ror#18       // Sigma1(e)
133         ror     x7,x27,#28
134         add     x26,x26,x17                     // h+=Ch(e,f,g)
135         eor     x17,x27,x27,ror#5
136         add     x26,x26,x16                     // h+=Sigma1(e)
137         and     x19,x19,x28                     // (b^c)&=(a^b)
138         add     x22,x22,x26                     // d+=h
139         eor     x19,x19,x20                     // Maj(a,b,c)
140         eor     x17,x7,x17,ror#34       // Sigma0(a)
141         add     x26,x26,x19                     // h+=Maj(a,b,c)
142         ldr     x19,[x30],#8            // *K++, x28 in next round
143         //add   x26,x26,x17                     // h+=Sigma0(a)
144 #ifndef __AARCH64EB__
145         rev     x5,x5                   // 2
146 #endif
147         add     x26,x26,x17                     // h+=Sigma0(a)
148         ror     x16,x22,#14
149         add     x25,x25,x19                     // h+=K[i]
150         eor     x8,x22,x22,ror#23
151         and     x17,x23,x22
152         bic     x19,x24,x22
153         add     x25,x25,x5                      // h+=X[i]
154         orr     x17,x17,x19                     // Ch(e,f,g)
155         eor     x19,x26,x27                     // a^b, b^c in next round
156         eor     x16,x16,x8,ror#18       // Sigma1(e)
157         ror     x8,x26,#28
158         add     x25,x25,x17                     // h+=Ch(e,f,g)
159         eor     x17,x26,x26,ror#5
160         add     x25,x25,x16                     // h+=Sigma1(e)
161         and     x28,x28,x19                     // (b^c)&=(a^b)
162         add     x21,x21,x25                     // d+=h
163         eor     x28,x28,x27                     // Maj(a,b,c)
164         eor     x17,x8,x17,ror#34       // Sigma0(a)
165         add     x25,x25,x28                     // h+=Maj(a,b,c)
166         ldr     x28,[x30],#8            // *K++, x19 in next round
167         //add   x25,x25,x17                     // h+=Sigma0(a)
168 #ifndef __AARCH64EB__
169         rev     x6,x6                   // 3
170 #endif
171         ldp     x7,x8,[x1],#2*8
172         add     x25,x25,x17                     // h+=Sigma0(a)
173         ror     x16,x21,#14
174         add     x24,x24,x28                     // h+=K[i]
175         eor     x9,x21,x21,ror#23
176         and     x17,x22,x21
177         bic     x28,x23,x21
178         add     x24,x24,x6                      // h+=X[i]
179         orr     x17,x17,x28                     // Ch(e,f,g)
180         eor     x28,x25,x26                     // a^b, b^c in next round
181         eor     x16,x16,x9,ror#18       // Sigma1(e)
182         ror     x9,x25,#28
183         add     x24,x24,x17                     // h+=Ch(e,f,g)
184         eor     x17,x25,x25,ror#5
185         add     x24,x24,x16                     // h+=Sigma1(e)
186         and     x19,x19,x28                     // (b^c)&=(a^b)
187         add     x20,x20,x24                     // d+=h
188         eor     x19,x19,x26                     // Maj(a,b,c)
189         eor     x17,x9,x17,ror#34       // Sigma0(a)
190         add     x24,x24,x19                     // h+=Maj(a,b,c)
191         ldr     x19,[x30],#8            // *K++, x28 in next round
192         //add   x24,x24,x17                     // h+=Sigma0(a)
193 #ifndef __AARCH64EB__
194         rev     x7,x7                   // 4
195 #endif
196         add     x24,x24,x17                     // h+=Sigma0(a)
197         ror     x16,x20,#14
198         add     x23,x23,x19                     // h+=K[i]
199         eor     x10,x20,x20,ror#23
200         and     x17,x21,x20
201         bic     x19,x22,x20
202         add     x23,x23,x7                      // h+=X[i]
203         orr     x17,x17,x19                     // Ch(e,f,g)
204         eor     x19,x24,x25                     // a^b, b^c in next round
205         eor     x16,x16,x10,ror#18      // Sigma1(e)
206         ror     x10,x24,#28
207         add     x23,x23,x17                     // h+=Ch(e,f,g)
208         eor     x17,x24,x24,ror#5
209         add     x23,x23,x16                     // h+=Sigma1(e)
210         and     x28,x28,x19                     // (b^c)&=(a^b)
211         add     x27,x27,x23                     // d+=h
212         eor     x28,x28,x25                     // Maj(a,b,c)
213         eor     x17,x10,x17,ror#34      // Sigma0(a)
214         add     x23,x23,x28                     // h+=Maj(a,b,c)
215         ldr     x28,[x30],#8            // *K++, x19 in next round
216         //add   x23,x23,x17                     // h+=Sigma0(a)
217 #ifndef __AARCH64EB__
218         rev     x8,x8                   // 5
219 #endif
220         ldp     x9,x10,[x1],#2*8
221         add     x23,x23,x17                     // h+=Sigma0(a)
222         ror     x16,x27,#14
223         add     x22,x22,x28                     // h+=K[i]
224         eor     x11,x27,x27,ror#23
225         and     x17,x20,x27
226         bic     x28,x21,x27
227         add     x22,x22,x8                      // h+=X[i]
228         orr     x17,x17,x28                     // Ch(e,f,g)
229         eor     x28,x23,x24                     // a^b, b^c in next round
230         eor     x16,x16,x11,ror#18      // Sigma1(e)
231         ror     x11,x23,#28
232         add     x22,x22,x17                     // h+=Ch(e,f,g)
233         eor     x17,x23,x23,ror#5
234         add     x22,x22,x16                     // h+=Sigma1(e)
235         and     x19,x19,x28                     // (b^c)&=(a^b)
236         add     x26,x26,x22                     // d+=h
237         eor     x19,x19,x24                     // Maj(a,b,c)
238         eor     x17,x11,x17,ror#34      // Sigma0(a)
239         add     x22,x22,x19                     // h+=Maj(a,b,c)
240         ldr     x19,[x30],#8            // *K++, x28 in next round
241         //add   x22,x22,x17                     // h+=Sigma0(a)
242 #ifndef __AARCH64EB__
243         rev     x9,x9                   // 6
244 #endif
245         add     x22,x22,x17                     // h+=Sigma0(a)
246         ror     x16,x26,#14
247         add     x21,x21,x19                     // h+=K[i]
248         eor     x12,x26,x26,ror#23
249         and     x17,x27,x26
250         bic     x19,x20,x26
251         add     x21,x21,x9                      // h+=X[i]
252         orr     x17,x17,x19                     // Ch(e,f,g)
253         eor     x19,x22,x23                     // a^b, b^c in next round
254         eor     x16,x16,x12,ror#18      // Sigma1(e)
255         ror     x12,x22,#28
256         add     x21,x21,x17                     // h+=Ch(e,f,g)
257         eor     x17,x22,x22,ror#5
258         add     x21,x21,x16                     // h+=Sigma1(e)
259         and     x28,x28,x19                     // (b^c)&=(a^b)
260         add     x25,x25,x21                     // d+=h
261         eor     x28,x28,x23                     // Maj(a,b,c)
262         eor     x17,x12,x17,ror#34      // Sigma0(a)
263         add     x21,x21,x28                     // h+=Maj(a,b,c)
264         ldr     x28,[x30],#8            // *K++, x19 in next round
265         //add   x21,x21,x17                     // h+=Sigma0(a)
266 #ifndef __AARCH64EB__
267         rev     x10,x10                 // 7
268 #endif
269         ldp     x11,x12,[x1],#2*8
270         add     x21,x21,x17                     // h+=Sigma0(a)
271         ror     x16,x25,#14
272         add     x20,x20,x28                     // h+=K[i]
273         eor     x13,x25,x25,ror#23
274         and     x17,x26,x25
275         bic     x28,x27,x25
276         add     x20,x20,x10                     // h+=X[i]
277         orr     x17,x17,x28                     // Ch(e,f,g)
278         eor     x28,x21,x22                     // a^b, b^c in next round
279         eor     x16,x16,x13,ror#18      // Sigma1(e)
280         ror     x13,x21,#28
281         add     x20,x20,x17                     // h+=Ch(e,f,g)
282         eor     x17,x21,x21,ror#5
283         add     x20,x20,x16                     // h+=Sigma1(e)
284         and     x19,x19,x28                     // (b^c)&=(a^b)
285         add     x24,x24,x20                     // d+=h
286         eor     x19,x19,x22                     // Maj(a,b,c)
287         eor     x17,x13,x17,ror#34      // Sigma0(a)
288         add     x20,x20,x19                     // h+=Maj(a,b,c)
289         ldr     x19,[x30],#8            // *K++, x28 in next round
290         //add   x20,x20,x17                     // h+=Sigma0(a)
291 #ifndef __AARCH64EB__
292         rev     x11,x11                 // 8
293 #endif
294         add     x20,x20,x17                     // h+=Sigma0(a)
295         ror     x16,x24,#14
296         add     x27,x27,x19                     // h+=K[i]
297         eor     x14,x24,x24,ror#23
298         and     x17,x25,x24
299         bic     x19,x26,x24
300         add     x27,x27,x11                     // h+=X[i]
301         orr     x17,x17,x19                     // Ch(e,f,g)
302         eor     x19,x20,x21                     // a^b, b^c in next round
303         eor     x16,x16,x14,ror#18      // Sigma1(e)
304         ror     x14,x20,#28
305         add     x27,x27,x17                     // h+=Ch(e,f,g)
306         eor     x17,x20,x20,ror#5
307         add     x27,x27,x16                     // h+=Sigma1(e)
308         and     x28,x28,x19                     // (b^c)&=(a^b)
309         add     x23,x23,x27                     // d+=h
310         eor     x28,x28,x21                     // Maj(a,b,c)
311         eor     x17,x14,x17,ror#34      // Sigma0(a)
312         add     x27,x27,x28                     // h+=Maj(a,b,c)
313         ldr     x28,[x30],#8            // *K++, x19 in next round
314         //add   x27,x27,x17                     // h+=Sigma0(a)
315 #ifndef __AARCH64EB__
316         rev     x12,x12                 // 9
317 #endif
318         ldp     x13,x14,[x1],#2*8
319         add     x27,x27,x17                     // h+=Sigma0(a)
320         ror     x16,x23,#14
321         add     x26,x26,x28                     // h+=K[i]
322         eor     x15,x23,x23,ror#23
323         and     x17,x24,x23
324         bic     x28,x25,x23
325         add     x26,x26,x12                     // h+=X[i]
326         orr     x17,x17,x28                     // Ch(e,f,g)
327         eor     x28,x27,x20                     // a^b, b^c in next round
328         eor     x16,x16,x15,ror#18      // Sigma1(e)
329         ror     x15,x27,#28
330         add     x26,x26,x17                     // h+=Ch(e,f,g)
331         eor     x17,x27,x27,ror#5
332         add     x26,x26,x16                     // h+=Sigma1(e)
333         and     x19,x19,x28                     // (b^c)&=(a^b)
334         add     x22,x22,x26                     // d+=h
335         eor     x19,x19,x20                     // Maj(a,b,c)
336         eor     x17,x15,x17,ror#34      // Sigma0(a)
337         add     x26,x26,x19                     // h+=Maj(a,b,c)
338         ldr     x19,[x30],#8            // *K++, x28 in next round
339         //add   x26,x26,x17                     // h+=Sigma0(a)
340 #ifndef __AARCH64EB__
341         rev     x13,x13                 // 10
342 #endif
343         add     x26,x26,x17                     // h+=Sigma0(a)
344         ror     x16,x22,#14
345         add     x25,x25,x19                     // h+=K[i]
346         eor     x0,x22,x22,ror#23
347         and     x17,x23,x22
348         bic     x19,x24,x22
349         add     x25,x25,x13                     // h+=X[i]
350         orr     x17,x17,x19                     // Ch(e,f,g)
351         eor     x19,x26,x27                     // a^b, b^c in next round
352         eor     x16,x16,x0,ror#18       // Sigma1(e)
353         ror     x0,x26,#28
354         add     x25,x25,x17                     // h+=Ch(e,f,g)
355         eor     x17,x26,x26,ror#5
356         add     x25,x25,x16                     // h+=Sigma1(e)
357         and     x28,x28,x19                     // (b^c)&=(a^b)
358         add     x21,x21,x25                     // d+=h
359         eor     x28,x28,x27                     // Maj(a,b,c)
360         eor     x17,x0,x17,ror#34       // Sigma0(a)
361         add     x25,x25,x28                     // h+=Maj(a,b,c)
362         ldr     x28,[x30],#8            // *K++, x19 in next round
363         //add   x25,x25,x17                     // h+=Sigma0(a)
364 #ifndef __AARCH64EB__
365         rev     x14,x14                 // 11
366 #endif
367         ldp     x15,x0,[x1],#2*8
368         add     x25,x25,x17                     // h+=Sigma0(a)
369         str     x6,[sp,#24]
370         ror     x16,x21,#14
371         add     x24,x24,x28                     // h+=K[i]
372         eor     x6,x21,x21,ror#23
373         and     x17,x22,x21
374         bic     x28,x23,x21
375         add     x24,x24,x14                     // h+=X[i]
376         orr     x17,x17,x28                     // Ch(e,f,g)
377         eor     x28,x25,x26                     // a^b, b^c in next round
378         eor     x16,x16,x6,ror#18       // Sigma1(e)
379         ror     x6,x25,#28
380         add     x24,x24,x17                     // h+=Ch(e,f,g)
381         eor     x17,x25,x25,ror#5
382         add     x24,x24,x16                     // h+=Sigma1(e)
383         and     x19,x19,x28                     // (b^c)&=(a^b)
384         add     x20,x20,x24                     // d+=h
385         eor     x19,x19,x26                     // Maj(a,b,c)
386         eor     x17,x6,x17,ror#34       // Sigma0(a)
387         add     x24,x24,x19                     // h+=Maj(a,b,c)
388         ldr     x19,[x30],#8            // *K++, x28 in next round
389         //add   x24,x24,x17                     // h+=Sigma0(a)
390 #ifndef __AARCH64EB__
391         rev     x15,x15                 // 12
392 #endif
393         add     x24,x24,x17                     // h+=Sigma0(a)
394         str     x7,[sp,#0]
395         ror     x16,x20,#14
396         add     x23,x23,x19                     // h+=K[i]
397         eor     x7,x20,x20,ror#23
398         and     x17,x21,x20
399         bic     x19,x22,x20
400         add     x23,x23,x15                     // h+=X[i]
401         orr     x17,x17,x19                     // Ch(e,f,g)
402         eor     x19,x24,x25                     // a^b, b^c in next round
403         eor     x16,x16,x7,ror#18       // Sigma1(e)
404         ror     x7,x24,#28
405         add     x23,x23,x17                     // h+=Ch(e,f,g)
406         eor     x17,x24,x24,ror#5
407         add     x23,x23,x16                     // h+=Sigma1(e)
408         and     x28,x28,x19                     // (b^c)&=(a^b)
409         add     x27,x27,x23                     // d+=h
410         eor     x28,x28,x25                     // Maj(a,b,c)
411         eor     x17,x7,x17,ror#34       // Sigma0(a)
412         add     x23,x23,x28                     // h+=Maj(a,b,c)
413         ldr     x28,[x30],#8            // *K++, x19 in next round
414         //add   x23,x23,x17                     // h+=Sigma0(a)
415 #ifndef __AARCH64EB__
416         rev     x0,x0                   // 13
417 #endif
418         ldp     x1,x2,[x1]
419         add     x23,x23,x17                     // h+=Sigma0(a)
420         str     x8,[sp,#8]
421         ror     x16,x27,#14
422         add     x22,x22,x28                     // h+=K[i]
423         eor     x8,x27,x27,ror#23
424         and     x17,x20,x27
425         bic     x28,x21,x27
426         add     x22,x22,x0                      // h+=X[i]
427         orr     x17,x17,x28                     // Ch(e,f,g)
428         eor     x28,x23,x24                     // a^b, b^c in next round
429         eor     x16,x16,x8,ror#18       // Sigma1(e)
430         ror     x8,x23,#28
431         add     x22,x22,x17                     // h+=Ch(e,f,g)
432         eor     x17,x23,x23,ror#5
433         add     x22,x22,x16                     // h+=Sigma1(e)
434         and     x19,x19,x28                     // (b^c)&=(a^b)
435         add     x26,x26,x22                     // d+=h
436         eor     x19,x19,x24                     // Maj(a,b,c)
437         eor     x17,x8,x17,ror#34       // Sigma0(a)
438         add     x22,x22,x19                     // h+=Maj(a,b,c)
439         ldr     x19,[x30],#8            // *K++, x28 in next round
440         //add   x22,x22,x17                     // h+=Sigma0(a)
441 #ifndef __AARCH64EB__
442         rev     x1,x1                   // 14
443 #endif
444         ldr     x6,[sp,#24]
445         add     x22,x22,x17                     // h+=Sigma0(a)
446         str     x9,[sp,#16]
447         ror     x16,x26,#14
448         add     x21,x21,x19                     // h+=K[i]
449         eor     x9,x26,x26,ror#23
450         and     x17,x27,x26
451         bic     x19,x20,x26
452         add     x21,x21,x1                      // h+=X[i]
453         orr     x17,x17,x19                     // Ch(e,f,g)
454         eor     x19,x22,x23                     // a^b, b^c in next round
455         eor     x16,x16,x9,ror#18       // Sigma1(e)
456         ror     x9,x22,#28
457         add     x21,x21,x17                     // h+=Ch(e,f,g)
458         eor     x17,x22,x22,ror#5
459         add     x21,x21,x16                     // h+=Sigma1(e)
460         and     x28,x28,x19                     // (b^c)&=(a^b)
461         add     x25,x25,x21                     // d+=h
462         eor     x28,x28,x23                     // Maj(a,b,c)
463         eor     x17,x9,x17,ror#34       // Sigma0(a)
464         add     x21,x21,x28                     // h+=Maj(a,b,c)
465         ldr     x28,[x30],#8            // *K++, x19 in next round
466         //add   x21,x21,x17                     // h+=Sigma0(a)
467 #ifndef __AARCH64EB__
468         rev     x2,x2                   // 15
469 #endif
470         ldr     x7,[sp,#0]
471         add     x21,x21,x17                     // h+=Sigma0(a)
472         str     x10,[sp,#24]
473         ror     x16,x25,#14
474         add     x20,x20,x28                     // h+=K[i]
475         ror     x9,x4,#1
476         and     x17,x26,x25
477         ror     x8,x1,#19
478         bic     x28,x27,x25
479         ror     x10,x21,#28
480         add     x20,x20,x2                      // h+=X[i]
481         eor     x16,x16,x25,ror#18
482         eor     x9,x9,x4,ror#8
483         orr     x17,x17,x28                     // Ch(e,f,g)
484         eor     x28,x21,x22                     // a^b, b^c in next round
485         eor     x16,x16,x25,ror#41      // Sigma1(e)
486         eor     x10,x10,x21,ror#34
487         add     x20,x20,x17                     // h+=Ch(e,f,g)
488         and     x19,x19,x28                     // (b^c)&=(a^b)
489         eor     x8,x8,x1,ror#61
490         eor     x9,x9,x4,lsr#7  // sigma0(X[i+1])
491         add     x20,x20,x16                     // h+=Sigma1(e)
492         eor     x19,x19,x22                     // Maj(a,b,c)
493         eor     x17,x10,x21,ror#39      // Sigma0(a)
494         eor     x8,x8,x1,lsr#6  // sigma1(X[i+14])
495         add     x3,x3,x12
496         add     x24,x24,x20                     // d+=h
497         add     x20,x20,x19                     // h+=Maj(a,b,c)
498         ldr     x19,[x30],#8            // *K++, x28 in next round
499         add     x3,x3,x9
500         add     x20,x20,x17                     // h+=Sigma0(a)
501         add     x3,x3,x8
502 .Loop_16_xx:
503         ldr     x8,[sp,#8]
504         str     x11,[sp,#0]
505         ror     x16,x24,#14
506         add     x27,x27,x19                     // h+=K[i]
507         ror     x10,x5,#1
508         and     x17,x25,x24
509         ror     x9,x2,#19
510         bic     x19,x26,x24
511         ror     x11,x20,#28
512         add     x27,x27,x3                      // h+=X[i]
513         eor     x16,x16,x24,ror#18
514         eor     x10,x10,x5,ror#8
515         orr     x17,x17,x19                     // Ch(e,f,g)
516         eor     x19,x20,x21                     // a^b, b^c in next round
517         eor     x16,x16,x24,ror#41      // Sigma1(e)
518         eor     x11,x11,x20,ror#34
519         add     x27,x27,x17                     // h+=Ch(e,f,g)
520         and     x28,x28,x19                     // (b^c)&=(a^b)
521         eor     x9,x9,x2,ror#61
522         eor     x10,x10,x5,lsr#7        // sigma0(X[i+1])
523         add     x27,x27,x16                     // h+=Sigma1(e)
524         eor     x28,x28,x21                     // Maj(a,b,c)
525         eor     x17,x11,x20,ror#39      // Sigma0(a)
526         eor     x9,x9,x2,lsr#6  // sigma1(X[i+14])
527         add     x4,x4,x13
528         add     x23,x23,x27                     // d+=h
529         add     x27,x27,x28                     // h+=Maj(a,b,c)
530         ldr     x28,[x30],#8            // *K++, x19 in next round
531         add     x4,x4,x10
532         add     x27,x27,x17                     // h+=Sigma0(a)
533         add     x4,x4,x9
534         ldr     x9,[sp,#16]
535         str     x12,[sp,#8]
536         ror     x16,x23,#14
537         add     x26,x26,x28                     // h+=K[i]
538         ror     x11,x6,#1
539         and     x17,x24,x23
540         ror     x10,x3,#19
541         bic     x28,x25,x23
542         ror     x12,x27,#28
543         add     x26,x26,x4                      // h+=X[i]
544         eor     x16,x16,x23,ror#18
545         eor     x11,x11,x6,ror#8
546         orr     x17,x17,x28                     // Ch(e,f,g)
547         eor     x28,x27,x20                     // a^b, b^c in next round
548         eor     x16,x16,x23,ror#41      // Sigma1(e)
549         eor     x12,x12,x27,ror#34
550         add     x26,x26,x17                     // h+=Ch(e,f,g)
551         and     x19,x19,x28                     // (b^c)&=(a^b)
552         eor     x10,x10,x3,ror#61
553         eor     x11,x11,x6,lsr#7        // sigma0(X[i+1])
554         add     x26,x26,x16                     // h+=Sigma1(e)
555         eor     x19,x19,x20                     // Maj(a,b,c)
556         eor     x17,x12,x27,ror#39      // Sigma0(a)
557         eor     x10,x10,x3,lsr#6        // sigma1(X[i+14])
558         add     x5,x5,x14
559         add     x22,x22,x26                     // d+=h
560         add     x26,x26,x19                     // h+=Maj(a,b,c)
561         ldr     x19,[x30],#8            // *K++, x28 in next round
562         add     x5,x5,x11
563         add     x26,x26,x17                     // h+=Sigma0(a)
564         add     x5,x5,x10
565         ldr     x10,[sp,#24]
566         str     x13,[sp,#16]
567         ror     x16,x22,#14
568         add     x25,x25,x19                     // h+=K[i]
569         ror     x12,x7,#1
570         and     x17,x23,x22
571         ror     x11,x4,#19
572         bic     x19,x24,x22
573         ror     x13,x26,#28
574         add     x25,x25,x5                      // h+=X[i]
575         eor     x16,x16,x22,ror#18
576         eor     x12,x12,x7,ror#8
577         orr     x17,x17,x19                     // Ch(e,f,g)
578         eor     x19,x26,x27                     // a^b, b^c in next round
579         eor     x16,x16,x22,ror#41      // Sigma1(e)
580         eor     x13,x13,x26,ror#34
581         add     x25,x25,x17                     // h+=Ch(e,f,g)
582         and     x28,x28,x19                     // (b^c)&=(a^b)
583         eor     x11,x11,x4,ror#61
584         eor     x12,x12,x7,lsr#7        // sigma0(X[i+1])
585         add     x25,x25,x16                     // h+=Sigma1(e)
586         eor     x28,x28,x27                     // Maj(a,b,c)
587         eor     x17,x13,x26,ror#39      // Sigma0(a)
588         eor     x11,x11,x4,lsr#6        // sigma1(X[i+14])
589         add     x6,x6,x15
590         add     x21,x21,x25                     // d+=h
591         add     x25,x25,x28                     // h+=Maj(a,b,c)
592         ldr     x28,[x30],#8            // *K++, x19 in next round
593         add     x6,x6,x12
594         add     x25,x25,x17                     // h+=Sigma0(a)
595         add     x6,x6,x11
596         ldr     x11,[sp,#0]
597         str     x14,[sp,#24]
598         ror     x16,x21,#14
599         add     x24,x24,x28                     // h+=K[i]
600         ror     x13,x8,#1
601         and     x17,x22,x21
602         ror     x12,x5,#19
603         bic     x28,x23,x21
604         ror     x14,x25,#28
605         add     x24,x24,x6                      // h+=X[i]
606         eor     x16,x16,x21,ror#18
607         eor     x13,x13,x8,ror#8
608         orr     x17,x17,x28                     // Ch(e,f,g)
609         eor     x28,x25,x26                     // a^b, b^c in next round
610         eor     x16,x16,x21,ror#41      // Sigma1(e)
611         eor     x14,x14,x25,ror#34
612         add     x24,x24,x17                     // h+=Ch(e,f,g)
613         and     x19,x19,x28                     // (b^c)&=(a^b)
614         eor     x12,x12,x5,ror#61
615         eor     x13,x13,x8,lsr#7        // sigma0(X[i+1])
616         add     x24,x24,x16                     // h+=Sigma1(e)
617         eor     x19,x19,x26                     // Maj(a,b,c)
618         eor     x17,x14,x25,ror#39      // Sigma0(a)
619         eor     x12,x12,x5,lsr#6        // sigma1(X[i+14])
620         add     x7,x7,x0
621         add     x20,x20,x24                     // d+=h
622         add     x24,x24,x19                     // h+=Maj(a,b,c)
623         ldr     x19,[x30],#8            // *K++, x28 in next round
624         add     x7,x7,x13
625         add     x24,x24,x17                     // h+=Sigma0(a)
626         add     x7,x7,x12
627         ldr     x12,[sp,#8]
628         str     x15,[sp,#0]
629         ror     x16,x20,#14
630         add     x23,x23,x19                     // h+=K[i]
631         ror     x14,x9,#1
632         and     x17,x21,x20
633         ror     x13,x6,#19
634         bic     x19,x22,x20
635         ror     x15,x24,#28
636         add     x23,x23,x7                      // h+=X[i]
637         eor     x16,x16,x20,ror#18
638         eor     x14,x14,x9,ror#8
639         orr     x17,x17,x19                     // Ch(e,f,g)
640         eor     x19,x24,x25                     // a^b, b^c in next round
641         eor     x16,x16,x20,ror#41      // Sigma1(e)
642         eor     x15,x15,x24,ror#34
643         add     x23,x23,x17                     // h+=Ch(e,f,g)
644         and     x28,x28,x19                     // (b^c)&=(a^b)
645         eor     x13,x13,x6,ror#61
646         eor     x14,x14,x9,lsr#7        // sigma0(X[i+1])
647         add     x23,x23,x16                     // h+=Sigma1(e)
648         eor     x28,x28,x25                     // Maj(a,b,c)
649         eor     x17,x15,x24,ror#39      // Sigma0(a)
650         eor     x13,x13,x6,lsr#6        // sigma1(X[i+14])
651         add     x8,x8,x1
652         add     x27,x27,x23                     // d+=h
653         add     x23,x23,x28                     // h+=Maj(a,b,c)
654         ldr     x28,[x30],#8            // *K++, x19 in next round
655         add     x8,x8,x14
656         add     x23,x23,x17                     // h+=Sigma0(a)
657         add     x8,x8,x13
658         ldr     x13,[sp,#16]
659         str     x0,[sp,#8]
660         ror     x16,x27,#14
661         add     x22,x22,x28                     // h+=K[i]
662         ror     x15,x10,#1
663         and     x17,x20,x27
664         ror     x14,x7,#19
665         bic     x28,x21,x27
666         ror     x0,x23,#28
667         add     x22,x22,x8                      // h+=X[i]
668         eor     x16,x16,x27,ror#18
669         eor     x15,x15,x10,ror#8
670         orr     x17,x17,x28                     // Ch(e,f,g)
671         eor     x28,x23,x24                     // a^b, b^c in next round
672         eor     x16,x16,x27,ror#41      // Sigma1(e)
673         eor     x0,x0,x23,ror#34
674         add     x22,x22,x17                     // h+=Ch(e,f,g)
675         and     x19,x19,x28                     // (b^c)&=(a^b)
676         eor     x14,x14,x7,ror#61
677         eor     x15,x15,x10,lsr#7       // sigma0(X[i+1])
678         add     x22,x22,x16                     // h+=Sigma1(e)
679         eor     x19,x19,x24                     // Maj(a,b,c)
680         eor     x17,x0,x23,ror#39       // Sigma0(a)
681         eor     x14,x14,x7,lsr#6        // sigma1(X[i+14])
682         add     x9,x9,x2
683         add     x26,x26,x22                     // d+=h
684         add     x22,x22,x19                     // h+=Maj(a,b,c)
685         ldr     x19,[x30],#8            // *K++, x28 in next round
686         add     x9,x9,x15
687         add     x22,x22,x17                     // h+=Sigma0(a)
688         add     x9,x9,x14
689         ldr     x14,[sp,#24]
690         str     x1,[sp,#16]
691         ror     x16,x26,#14
692         add     x21,x21,x19                     // h+=K[i]
693         ror     x0,x11,#1
694         and     x17,x27,x26
695         ror     x15,x8,#19
696         bic     x19,x20,x26
697         ror     x1,x22,#28
698         add     x21,x21,x9                      // h+=X[i]
699         eor     x16,x16,x26,ror#18
700         eor     x0,x0,x11,ror#8
701         orr     x17,x17,x19                     // Ch(e,f,g)
702         eor     x19,x22,x23                     // a^b, b^c in next round
703         eor     x16,x16,x26,ror#41      // Sigma1(e)
704         eor     x1,x1,x22,ror#34
705         add     x21,x21,x17                     // h+=Ch(e,f,g)
706         and     x28,x28,x19                     // (b^c)&=(a^b)
707         eor     x15,x15,x8,ror#61
708         eor     x0,x0,x11,lsr#7 // sigma0(X[i+1])
709         add     x21,x21,x16                     // h+=Sigma1(e)
710         eor     x28,x28,x23                     // Maj(a,b,c)
711         eor     x17,x1,x22,ror#39       // Sigma0(a)
712         eor     x15,x15,x8,lsr#6        // sigma1(X[i+14])
713         add     x10,x10,x3
714         add     x25,x25,x21                     // d+=h
715         add     x21,x21,x28                     // h+=Maj(a,b,c)
716         ldr     x28,[x30],#8            // *K++, x19 in next round
717         add     x10,x10,x0
718         add     x21,x21,x17                     // h+=Sigma0(a)
719         add     x10,x10,x15
720         ldr     x15,[sp,#0]
721         str     x2,[sp,#24]
722         ror     x16,x25,#14
723         add     x20,x20,x28                     // h+=K[i]
724         ror     x1,x12,#1
725         and     x17,x26,x25
726         ror     x0,x9,#19
727         bic     x28,x27,x25
728         ror     x2,x21,#28
729         add     x20,x20,x10                     // h+=X[i]
730         eor     x16,x16,x25,ror#18
731         eor     x1,x1,x12,ror#8
732         orr     x17,x17,x28                     // Ch(e,f,g)
733         eor     x28,x21,x22                     // a^b, b^c in next round
734         eor     x16,x16,x25,ror#41      // Sigma1(e)
735         eor     x2,x2,x21,ror#34
736         add     x20,x20,x17                     // h+=Ch(e,f,g)
737         and     x19,x19,x28                     // (b^c)&=(a^b)
738         eor     x0,x0,x9,ror#61
739         eor     x1,x1,x12,lsr#7 // sigma0(X[i+1])
740         add     x20,x20,x16                     // h+=Sigma1(e)
741         eor     x19,x19,x22                     // Maj(a,b,c)
742         eor     x17,x2,x21,ror#39       // Sigma0(a)
743         eor     x0,x0,x9,lsr#6  // sigma1(X[i+14])
744         add     x11,x11,x4
745         add     x24,x24,x20                     // d+=h
746         add     x20,x20,x19                     // h+=Maj(a,b,c)
747         ldr     x19,[x30],#8            // *K++, x28 in next round
748         add     x11,x11,x1
749         add     x20,x20,x17                     // h+=Sigma0(a)
750         add     x11,x11,x0
751         ldr     x0,[sp,#8]
752         str     x3,[sp,#0]
753         ror     x16,x24,#14
754         add     x27,x27,x19                     // h+=K[i]
755         ror     x2,x13,#1
756         and     x17,x25,x24
757         ror     x1,x10,#19
758         bic     x19,x26,x24
759         ror     x3,x20,#28
760         add     x27,x27,x11                     // h+=X[i]
761         eor     x16,x16,x24,ror#18
762         eor     x2,x2,x13,ror#8
763         orr     x17,x17,x19                     // Ch(e,f,g)
764         eor     x19,x20,x21                     // a^b, b^c in next round
765         eor     x16,x16,x24,ror#41      // Sigma1(e)
766         eor     x3,x3,x20,ror#34
767         add     x27,x27,x17                     // h+=Ch(e,f,g)
768         and     x28,x28,x19                     // (b^c)&=(a^b)
769         eor     x1,x1,x10,ror#61
770         eor     x2,x2,x13,lsr#7 // sigma0(X[i+1])
771         add     x27,x27,x16                     // h+=Sigma1(e)
772         eor     x28,x28,x21                     // Maj(a,b,c)
773         eor     x17,x3,x20,ror#39       // Sigma0(a)
774         eor     x1,x1,x10,lsr#6 // sigma1(X[i+14])
775         add     x12,x12,x5
776         add     x23,x23,x27                     // d+=h
777         add     x27,x27,x28                     // h+=Maj(a,b,c)
778         ldr     x28,[x30],#8            // *K++, x19 in next round
779         add     x12,x12,x2
780         add     x27,x27,x17                     // h+=Sigma0(a)
781         add     x12,x12,x1
782         ldr     x1,[sp,#16]
783         str     x4,[sp,#8]
784         ror     x16,x23,#14
785         add     x26,x26,x28                     // h+=K[i]
786         ror     x3,x14,#1
787         and     x17,x24,x23
788         ror     x2,x11,#19
789         bic     x28,x25,x23
790         ror     x4,x27,#28
791         add     x26,x26,x12                     // h+=X[i]
792         eor     x16,x16,x23,ror#18
793         eor     x3,x3,x14,ror#8
794         orr     x17,x17,x28                     // Ch(e,f,g)
795         eor     x28,x27,x20                     // a^b, b^c in next round
796         eor     x16,x16,x23,ror#41      // Sigma1(e)
797         eor     x4,x4,x27,ror#34
798         add     x26,x26,x17                     // h+=Ch(e,f,g)
799         and     x19,x19,x28                     // (b^c)&=(a^b)
800         eor     x2,x2,x11,ror#61
801         eor     x3,x3,x14,lsr#7 // sigma0(X[i+1])
802         add     x26,x26,x16                     // h+=Sigma1(e)
803         eor     x19,x19,x20                     // Maj(a,b,c)
804         eor     x17,x4,x27,ror#39       // Sigma0(a)
805         eor     x2,x2,x11,lsr#6 // sigma1(X[i+14])
806         add     x13,x13,x6
807         add     x22,x22,x26                     // d+=h
808         add     x26,x26,x19                     // h+=Maj(a,b,c)
809         ldr     x19,[x30],#8            // *K++, x28 in next round
810         add     x13,x13,x3
811         add     x26,x26,x17                     // h+=Sigma0(a)
812         add     x13,x13,x2
813         ldr     x2,[sp,#24]
814         str     x5,[sp,#16]
815         ror     x16,x22,#14
816         add     x25,x25,x19                     // h+=K[i]
817         ror     x4,x15,#1
818         and     x17,x23,x22
819         ror     x3,x12,#19
820         bic     x19,x24,x22
821         ror     x5,x26,#28
822         add     x25,x25,x13                     // h+=X[i]
823         eor     x16,x16,x22,ror#18
824         eor     x4,x4,x15,ror#8
825         orr     x17,x17,x19                     // Ch(e,f,g)
826         eor     x19,x26,x27                     // a^b, b^c in next round
827         eor     x16,x16,x22,ror#41      // Sigma1(e)
828         eor     x5,x5,x26,ror#34
829         add     x25,x25,x17                     // h+=Ch(e,f,g)
830         and     x28,x28,x19                     // (b^c)&=(a^b)
831         eor     x3,x3,x12,ror#61
832         eor     x4,x4,x15,lsr#7 // sigma0(X[i+1])
833         add     x25,x25,x16                     // h+=Sigma1(e)
834         eor     x28,x28,x27                     // Maj(a,b,c)
835         eor     x17,x5,x26,ror#39       // Sigma0(a)
836         eor     x3,x3,x12,lsr#6 // sigma1(X[i+14])
837         add     x14,x14,x7
838         add     x21,x21,x25                     // d+=h
839         add     x25,x25,x28                     // h+=Maj(a,b,c)
840         ldr     x28,[x30],#8            // *K++, x19 in next round
841         add     x14,x14,x4
842         add     x25,x25,x17                     // h+=Sigma0(a)
843         add     x14,x14,x3
844         ldr     x3,[sp,#0]
845         str     x6,[sp,#24]
846         ror     x16,x21,#14
847         add     x24,x24,x28                     // h+=K[i]
848         ror     x5,x0,#1
849         and     x17,x22,x21
850         ror     x4,x13,#19
851         bic     x28,x23,x21
852         ror     x6,x25,#28
853         add     x24,x24,x14                     // h+=X[i]
854         eor     x16,x16,x21,ror#18
855         eor     x5,x5,x0,ror#8
856         orr     x17,x17,x28                     // Ch(e,f,g)
857         eor     x28,x25,x26                     // a^b, b^c in next round
858         eor     x16,x16,x21,ror#41      // Sigma1(e)
859         eor     x6,x6,x25,ror#34
860         add     x24,x24,x17                     // h+=Ch(e,f,g)
861         and     x19,x19,x28                     // (b^c)&=(a^b)
862         eor     x4,x4,x13,ror#61
863         eor     x5,x5,x0,lsr#7  // sigma0(X[i+1])
864         add     x24,x24,x16                     // h+=Sigma1(e)
865         eor     x19,x19,x26                     // Maj(a,b,c)
866         eor     x17,x6,x25,ror#39       // Sigma0(a)
867         eor     x4,x4,x13,lsr#6 // sigma1(X[i+14])
868         add     x15,x15,x8
869         add     x20,x20,x24                     // d+=h
870         add     x24,x24,x19                     // h+=Maj(a,b,c)
871         ldr     x19,[x30],#8            // *K++, x28 in next round
872         add     x15,x15,x5
873         add     x24,x24,x17                     // h+=Sigma0(a)
874         add     x15,x15,x4
875         ldr     x4,[sp,#8]
876         str     x7,[sp,#0]
877         ror     x16,x20,#14
878         add     x23,x23,x19                     // h+=K[i]
879         ror     x6,x1,#1
880         and     x17,x21,x20
881         ror     x5,x14,#19
882         bic     x19,x22,x20
883         ror     x7,x24,#28
884         add     x23,x23,x15                     // h+=X[i]
885         eor     x16,x16,x20,ror#18
886         eor     x6,x6,x1,ror#8
887         orr     x17,x17,x19                     // Ch(e,f,g)
888         eor     x19,x24,x25                     // a^b, b^c in next round
889         eor     x16,x16,x20,ror#41      // Sigma1(e)
890         eor     x7,x7,x24,ror#34
891         add     x23,x23,x17                     // h+=Ch(e,f,g)
892         and     x28,x28,x19                     // (b^c)&=(a^b)
893         eor     x5,x5,x14,ror#61
894         eor     x6,x6,x1,lsr#7  // sigma0(X[i+1])
895         add     x23,x23,x16                     // h+=Sigma1(e)
896         eor     x28,x28,x25                     // Maj(a,b,c)
897         eor     x17,x7,x24,ror#39       // Sigma0(a)
898         eor     x5,x5,x14,lsr#6 // sigma1(X[i+14])
899         add     x0,x0,x9
900         add     x27,x27,x23                     // d+=h
901         add     x23,x23,x28                     // h+=Maj(a,b,c)
902         ldr     x28,[x30],#8            // *K++, x19 in next round
903         add     x0,x0,x6
904         add     x23,x23,x17                     // h+=Sigma0(a)
905         add     x0,x0,x5
906         ldr     x5,[sp,#16]
907         str     x8,[sp,#8]
908         ror     x16,x27,#14
909         add     x22,x22,x28                     // h+=K[i]
910         ror     x7,x2,#1
911         and     x17,x20,x27
912         ror     x6,x15,#19
913         bic     x28,x21,x27
914         ror     x8,x23,#28
915         add     x22,x22,x0                      // h+=X[i]
916         eor     x16,x16,x27,ror#18
917         eor     x7,x7,x2,ror#8
918         orr     x17,x17,x28                     // Ch(e,f,g)
919         eor     x28,x23,x24                     // a^b, b^c in next round
920         eor     x16,x16,x27,ror#41      // Sigma1(e)
921         eor     x8,x8,x23,ror#34
922         add     x22,x22,x17                     // h+=Ch(e,f,g)
923         and     x19,x19,x28                     // (b^c)&=(a^b)
924         eor     x6,x6,x15,ror#61
925         eor     x7,x7,x2,lsr#7  // sigma0(X[i+1])
926         add     x22,x22,x16                     // h+=Sigma1(e)
927         eor     x19,x19,x24                     // Maj(a,b,c)
928         eor     x17,x8,x23,ror#39       // Sigma0(a)
929         eor     x6,x6,x15,lsr#6 // sigma1(X[i+14])
930         add     x1,x1,x10
931         add     x26,x26,x22                     // d+=h
932         add     x22,x22,x19                     // h+=Maj(a,b,c)
933         ldr     x19,[x30],#8            // *K++, x28 in next round
934         add     x1,x1,x7
935         add     x22,x22,x17                     // h+=Sigma0(a)
936         add     x1,x1,x6
937         ldr     x6,[sp,#24]
938         str     x9,[sp,#16]
939         ror     x16,x26,#14
940         add     x21,x21,x19                     // h+=K[i]
941         ror     x8,x3,#1
942         and     x17,x27,x26
943         ror     x7,x0,#19
944         bic     x19,x20,x26
945         ror     x9,x22,#28
946         add     x21,x21,x1                      // h+=X[i]
947         eor     x16,x16,x26,ror#18
948         eor     x8,x8,x3,ror#8
949         orr     x17,x17,x19                     // Ch(e,f,g)
950         eor     x19,x22,x23                     // a^b, b^c in next round
951         eor     x16,x16,x26,ror#41      // Sigma1(e)
952         eor     x9,x9,x22,ror#34
953         add     x21,x21,x17                     // h+=Ch(e,f,g)
954         and     x28,x28,x19                     // (b^c)&=(a^b)
955         eor     x7,x7,x0,ror#61
956         eor     x8,x8,x3,lsr#7  // sigma0(X[i+1])
957         add     x21,x21,x16                     // h+=Sigma1(e)
958         eor     x28,x28,x23                     // Maj(a,b,c)
959         eor     x17,x9,x22,ror#39       // Sigma0(a)
960         eor     x7,x7,x0,lsr#6  // sigma1(X[i+14])
961         add     x2,x2,x11
962         add     x25,x25,x21                     // d+=h
963         add     x21,x21,x28                     // h+=Maj(a,b,c)
964         ldr     x28,[x30],#8            // *K++, x19 in next round
965         add     x2,x2,x8
966         add     x21,x21,x17                     // h+=Sigma0(a)
967         add     x2,x2,x7
968         ldr     x7,[sp,#0]
969         str     x10,[sp,#24]
970         ror     x16,x25,#14
971         add     x20,x20,x28                     // h+=K[i]
972         ror     x9,x4,#1
973         and     x17,x26,x25
974         ror     x8,x1,#19
975         bic     x28,x27,x25
976         ror     x10,x21,#28
977         add     x20,x20,x2                      // h+=X[i]
978         eor     x16,x16,x25,ror#18
979         eor     x9,x9,x4,ror#8
980         orr     x17,x17,x28                     // Ch(e,f,g)
981         eor     x28,x21,x22                     // a^b, b^c in next round
982         eor     x16,x16,x25,ror#41      // Sigma1(e)
983         eor     x10,x10,x21,ror#34
984         add     x20,x20,x17                     // h+=Ch(e,f,g)
985         and     x19,x19,x28                     // (b^c)&=(a^b)
986         eor     x8,x8,x1,ror#61
987         eor     x9,x9,x4,lsr#7  // sigma0(X[i+1])
988         add     x20,x20,x16                     // h+=Sigma1(e)
989         eor     x19,x19,x22                     // Maj(a,b,c)
990         eor     x17,x10,x21,ror#39      // Sigma0(a)
991         eor     x8,x8,x1,lsr#6  // sigma1(X[i+14])
992         add     x3,x3,x12
993         add     x24,x24,x20                     // d+=h
994         add     x20,x20,x19                     // h+=Maj(a,b,c)
995         ldr     x19,[x30],#8            // *K++, x28 in next round
996         add     x3,x3,x9
997         add     x20,x20,x17                     // h+=Sigma0(a)
998         add     x3,x3,x8
999         cbnz    x19,.Loop_16_xx
1000
1001         ldp     x0,x2,[x29,#96]
1002         ldr     x1,[x29,#112]
1003         sub     x30,x30,#648            // rewind
1004
1005         ldp     x3,x4,[x0]
1006         ldp     x5,x6,[x0,#2*8]
1007         add     x1,x1,#14*8                     // advance input pointer
1008         ldp     x7,x8,[x0,#4*8]
1009         add     x20,x20,x3
1010         ldp     x9,x10,[x0,#6*8]
1011         add     x21,x21,x4
1012         add     x22,x22,x5
1013         add     x23,x23,x6
1014         stp     x20,x21,[x0]
1015         add     x24,x24,x7
1016         add     x25,x25,x8
1017         stp     x22,x23,[x0,#2*8]
1018         add     x26,x26,x9
1019         add     x27,x27,x10
1020         cmp     x1,x2
1021         stp     x24,x25,[x0,#4*8]
1022         stp     x26,x27,[x0,#6*8]
1023         b.ne    .Loop
1024
1025         ldp     x19,x20,[x29,#16]
1026         add     sp,sp,#4*8
1027         ldp     x21,x22,[x29,#32]
1028         ldp     x23,x24,[x29,#48]
1029         ldp     x25,x26,[x29,#64]
1030         ldp     x27,x28,[x29,#80]
1031         ldp     x29,x30,[sp],#128
1032         ret
1033 .size   sha512_block_data_order,.-sha512_block_data_order
1034
1035 .align  6
1036 .type   .LK512,%object
1037 .LK512:
1038         .quad   0x428a2f98d728ae22,0x7137449123ef65cd
1039         .quad   0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
1040         .quad   0x3956c25bf348b538,0x59f111f1b605d019
1041         .quad   0x923f82a4af194f9b,0xab1c5ed5da6d8118
1042         .quad   0xd807aa98a3030242,0x12835b0145706fbe
1043         .quad   0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
1044         .quad   0x72be5d74f27b896f,0x80deb1fe3b1696b1
1045         .quad   0x9bdc06a725c71235,0xc19bf174cf692694
1046         .quad   0xe49b69c19ef14ad2,0xefbe4786384f25e3
1047         .quad   0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
1048         .quad   0x2de92c6f592b0275,0x4a7484aa6ea6e483
1049         .quad   0x5cb0a9dcbd41fbd4,0x76f988da831153b5
1050         .quad   0x983e5152ee66dfab,0xa831c66d2db43210
1051         .quad   0xb00327c898fb213f,0xbf597fc7beef0ee4
1052         .quad   0xc6e00bf33da88fc2,0xd5a79147930aa725
1053         .quad   0x06ca6351e003826f,0x142929670a0e6e70
1054         .quad   0x27b70a8546d22ffc,0x2e1b21385c26c926
1055         .quad   0x4d2c6dfc5ac42aed,0x53380d139d95b3df
1056         .quad   0x650a73548baf63de,0x766a0abb3c77b2a8
1057         .quad   0x81c2c92e47edaee6,0x92722c851482353b
1058         .quad   0xa2bfe8a14cf10364,0xa81a664bbc423001
1059         .quad   0xc24b8b70d0f89791,0xc76c51a30654be30
1060         .quad   0xd192e819d6ef5218,0xd69906245565a910
1061         .quad   0xf40e35855771202a,0x106aa07032bbd1b8
1062         .quad   0x19a4c116b8d2d0c8,0x1e376c085141ab53
1063         .quad   0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
1064         .quad   0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
1065         .quad   0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
1066         .quad   0x748f82ee5defb2fc,0x78a5636f43172f60
1067         .quad   0x84c87814a1f0ab72,0x8cc702081a6439ec
1068         .quad   0x90befffa23631e28,0xa4506cebde82bde9
1069         .quad   0xbef9a3f7b2c67915,0xc67178f2e372532b
1070         .quad   0xca273eceea26619c,0xd186b8c721c0c207
1071         .quad   0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
1072         .quad   0x06f067aa72176fba,0x0a637dc5a2c898a6
1073         .quad   0x113f9804bef90dae,0x1b710b35131c471b
1074         .quad   0x28db77f523047d84,0x32caab7b40c72493
1075         .quad   0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
1076         .quad   0x4cc5d4becb3e42b6,0x597f299cfc657e2a
1077         .quad   0x5fcb6fab3ad6faec,0x6c44198c4a475817
1078         .quad   0       // terminator
1079 .size   .LK512,.-.LK512
1080 #ifndef __KERNEL__
1081 .align  3
1082 .LOPENSSL_armcap_P:
1083 # ifdef __ILP32__
1084         .long   OPENSSL_armcap_P-.
1085 # else
1086         .quad   OPENSSL_armcap_P-.
1087 # endif
1088 #endif
1089 .asciz  "SHA512 block transform for ARMv8, CRYPTOGAMS by <appro@openssl.org>"
1090 .align  2
1091 #ifndef __KERNEL__
1092 .comm   OPENSSL_armcap_P,4,4
1093 #endif