Merge tag 'gpio-updates-for-v5.13-v2' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6-microblaze.git] / arch / arm / crypto / sha256-core.S_shipped
1 @ SPDX-License-Identifier: GPL-2.0
2
3 @ This code is taken from the OpenSSL project but the author (Andy Polyakov)
4 @ has relicensed it under the GPLv2. Therefore this program is free software;
5 @ you can redistribute it and/or modify it under the terms of the GNU General
6 @ Public License version 2 as published by the Free Software Foundation.
7 @
8 @ The original headers, including the original license headers, are
9 @ included below for completeness.
10
11 @ ====================================================================
12 @ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
13 @ project. The module is, however, dual licensed under OpenSSL and
14 @ CRYPTOGAMS licenses depending on where you obtain it. For further
15 @ details see https://www.openssl.org/~appro/cryptogams/.
16 @ ====================================================================
17
18 @ SHA256 block procedure for ARMv4. May 2007.
19
20 @ Performance is ~2x better than gcc 3.4 generated code and in "abso-
21 @ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
22 @ byte [on single-issue Xscale PXA250 core].
23
24 @ July 2010.
25 @
26 @ Rescheduling for dual-issue pipeline resulted in 22% improvement on
27 @ Cortex A8 core and ~20 cycles per processed byte.
28
29 @ February 2011.
30 @
31 @ Profiler-assisted and platform-specific optimization resulted in 16%
32 @ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
33
34 @ September 2013.
35 @
36 @ Add NEON implementation. On Cortex A8 it was measured to process one
37 @ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
38 @ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
39 @ code (meaning that latter performs sub-optimally, nothing was done
40 @ about it).
41
42 @ May 2014.
43 @
44 @ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
45
46 #ifndef __KERNEL__
47 # include "arm_arch.h"
48 #else
49 # define __ARM_ARCH__ __LINUX_ARM_ARCH__
50 # define __ARM_MAX_ARCH__ 7
51 #endif
52
53 .text
54 #if __ARM_ARCH__<7
55 .code   32
56 #else
57 .syntax unified
58 # ifdef __thumb2__
59 .thumb
60 # else
61 .code   32
62 # endif
63 #endif
64
65 .type   K256,%object
66 .align  5
67 K256:
68 .word   0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
69 .word   0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
70 .word   0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
71 .word   0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
72 .word   0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
73 .word   0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
74 .word   0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
75 .word   0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
76 .word   0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
77 .word   0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
78 .word   0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
79 .word   0xd192e819,0xd6990624,0xf40e3585,0x106aa070
80 .word   0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
81 .word   0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
82 .word   0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
83 .word   0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
84 .size   K256,.-K256
85 .word   0                               @ terminator
86 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
87 .LOPENSSL_armcap:
88 .word   OPENSSL_armcap_P-sha256_block_data_order
89 #endif
90 .align  5
91
92 .global sha256_block_data_order
93 .type   sha256_block_data_order,%function
94 sha256_block_data_order:
95 .Lsha256_block_data_order:
96 #if __ARM_ARCH__<7
97         sub     r3,pc,#8                @ sha256_block_data_order
98 #else
99         adr     r3,.Lsha256_block_data_order
100 #endif
101 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
102         ldr     r12,.LOPENSSL_armcap
103         ldr     r12,[r3,r12]            @ OPENSSL_armcap_P
104         tst     r12,#ARMV8_SHA256
105         bne     .LARMv8
106         tst     r12,#ARMV7_NEON
107         bne     .LNEON
108 #endif
109         add     r2,r1,r2,lsl#6  @ len to point at the end of inp
110         stmdb   sp!,{r0,r1,r2,r4-r11,lr}
111         ldmia   r0,{r4,r5,r6,r7,r8,r9,r10,r11}
112         sub     r14,r3,#256+32  @ K256
113         sub     sp,sp,#16*4             @ alloca(X[16])
114 .Loop:
115 # if __ARM_ARCH__>=7
116         ldr     r2,[r1],#4
117 # else
118         ldrb    r2,[r1,#3]
119 # endif
120         eor     r3,r5,r6                @ magic
121         eor     r12,r12,r12
122 #if __ARM_ARCH__>=7
123         @ ldr   r2,[r1],#4                      @ 0
124 # if 0==15
125         str     r1,[sp,#17*4]                   @ make room for r1
126 # endif
127         eor     r0,r8,r8,ror#5
128         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
129         eor     r0,r0,r8,ror#19 @ Sigma1(e)
130 # ifndef __ARMEB__
131         rev     r2,r2
132 # endif
133 #else
134         @ ldrb  r2,[r1,#3]                      @ 0
135         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
136         ldrb    r12,[r1,#2]
137         ldrb    r0,[r1,#1]
138         orr     r2,r2,r12,lsl#8
139         ldrb    r12,[r1],#4
140         orr     r2,r2,r0,lsl#16
141 # if 0==15
142         str     r1,[sp,#17*4]                   @ make room for r1
143 # endif
144         eor     r0,r8,r8,ror#5
145         orr     r2,r2,r12,lsl#24
146         eor     r0,r0,r8,ror#19 @ Sigma1(e)
147 #endif
148         ldr     r12,[r14],#4                    @ *K256++
149         add     r11,r11,r2                      @ h+=X[i]
150         str     r2,[sp,#0*4]
151         eor     r2,r9,r10
152         add     r11,r11,r0,ror#6        @ h+=Sigma1(e)
153         and     r2,r2,r8
154         add     r11,r11,r12                     @ h+=K256[i]
155         eor     r2,r2,r10                       @ Ch(e,f,g)
156         eor     r0,r4,r4,ror#11
157         add     r11,r11,r2                      @ h+=Ch(e,f,g)
158 #if 0==31
159         and     r12,r12,#0xff
160         cmp     r12,#0xf2                       @ done?
161 #endif
162 #if 0<15
163 # if __ARM_ARCH__>=7
164         ldr     r2,[r1],#4                      @ prefetch
165 # else
166         ldrb    r2,[r1,#3]
167 # endif
168         eor     r12,r4,r5                       @ a^b, b^c in next round
169 #else
170         ldr     r2,[sp,#2*4]            @ from future BODY_16_xx
171         eor     r12,r4,r5                       @ a^b, b^c in next round
172         ldr     r1,[sp,#15*4]   @ from future BODY_16_xx
173 #endif
174         eor     r0,r0,r4,ror#20 @ Sigma0(a)
175         and     r3,r3,r12                       @ (b^c)&=(a^b)
176         add     r7,r7,r11                       @ d+=h
177         eor     r3,r3,r5                        @ Maj(a,b,c)
178         add     r11,r11,r0,ror#2        @ h+=Sigma0(a)
179         @ add   r11,r11,r3                      @ h+=Maj(a,b,c)
180 #if __ARM_ARCH__>=7
181         @ ldr   r2,[r1],#4                      @ 1
182 # if 1==15
183         str     r1,[sp,#17*4]                   @ make room for r1
184 # endif
185         eor     r0,r7,r7,ror#5
186         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
187         eor     r0,r0,r7,ror#19 @ Sigma1(e)
188 # ifndef __ARMEB__
189         rev     r2,r2
190 # endif
191 #else
192         @ ldrb  r2,[r1,#3]                      @ 1
193         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
194         ldrb    r3,[r1,#2]
195         ldrb    r0,[r1,#1]
196         orr     r2,r2,r3,lsl#8
197         ldrb    r3,[r1],#4
198         orr     r2,r2,r0,lsl#16
199 # if 1==15
200         str     r1,[sp,#17*4]                   @ make room for r1
201 # endif
202         eor     r0,r7,r7,ror#5
203         orr     r2,r2,r3,lsl#24
204         eor     r0,r0,r7,ror#19 @ Sigma1(e)
205 #endif
206         ldr     r3,[r14],#4                     @ *K256++
207         add     r10,r10,r2                      @ h+=X[i]
208         str     r2,[sp,#1*4]
209         eor     r2,r8,r9
210         add     r10,r10,r0,ror#6        @ h+=Sigma1(e)
211         and     r2,r2,r7
212         add     r10,r10,r3                      @ h+=K256[i]
213         eor     r2,r2,r9                        @ Ch(e,f,g)
214         eor     r0,r11,r11,ror#11
215         add     r10,r10,r2                      @ h+=Ch(e,f,g)
216 #if 1==31
217         and     r3,r3,#0xff
218         cmp     r3,#0xf2                        @ done?
219 #endif
220 #if 1<15
221 # if __ARM_ARCH__>=7
222         ldr     r2,[r1],#4                      @ prefetch
223 # else
224         ldrb    r2,[r1,#3]
225 # endif
226         eor     r3,r11,r4                       @ a^b, b^c in next round
227 #else
228         ldr     r2,[sp,#3*4]            @ from future BODY_16_xx
229         eor     r3,r11,r4                       @ a^b, b^c in next round
230         ldr     r1,[sp,#0*4]    @ from future BODY_16_xx
231 #endif
232         eor     r0,r0,r11,ror#20        @ Sigma0(a)
233         and     r12,r12,r3                      @ (b^c)&=(a^b)
234         add     r6,r6,r10                       @ d+=h
235         eor     r12,r12,r4                      @ Maj(a,b,c)
236         add     r10,r10,r0,ror#2        @ h+=Sigma0(a)
237         @ add   r10,r10,r12                     @ h+=Maj(a,b,c)
238 #if __ARM_ARCH__>=7
239         @ ldr   r2,[r1],#4                      @ 2
240 # if 2==15
241         str     r1,[sp,#17*4]                   @ make room for r1
242 # endif
243         eor     r0,r6,r6,ror#5
244         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
245         eor     r0,r0,r6,ror#19 @ Sigma1(e)
246 # ifndef __ARMEB__
247         rev     r2,r2
248 # endif
249 #else
250         @ ldrb  r2,[r1,#3]                      @ 2
251         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
252         ldrb    r12,[r1,#2]
253         ldrb    r0,[r1,#1]
254         orr     r2,r2,r12,lsl#8
255         ldrb    r12,[r1],#4
256         orr     r2,r2,r0,lsl#16
257 # if 2==15
258         str     r1,[sp,#17*4]                   @ make room for r1
259 # endif
260         eor     r0,r6,r6,ror#5
261         orr     r2,r2,r12,lsl#24
262         eor     r0,r0,r6,ror#19 @ Sigma1(e)
263 #endif
264         ldr     r12,[r14],#4                    @ *K256++
265         add     r9,r9,r2                        @ h+=X[i]
266         str     r2,[sp,#2*4]
267         eor     r2,r7,r8
268         add     r9,r9,r0,ror#6  @ h+=Sigma1(e)
269         and     r2,r2,r6
270         add     r9,r9,r12                       @ h+=K256[i]
271         eor     r2,r2,r8                        @ Ch(e,f,g)
272         eor     r0,r10,r10,ror#11
273         add     r9,r9,r2                        @ h+=Ch(e,f,g)
274 #if 2==31
275         and     r12,r12,#0xff
276         cmp     r12,#0xf2                       @ done?
277 #endif
278 #if 2<15
279 # if __ARM_ARCH__>=7
280         ldr     r2,[r1],#4                      @ prefetch
281 # else
282         ldrb    r2,[r1,#3]
283 # endif
284         eor     r12,r10,r11                     @ a^b, b^c in next round
285 #else
286         ldr     r2,[sp,#4*4]            @ from future BODY_16_xx
287         eor     r12,r10,r11                     @ a^b, b^c in next round
288         ldr     r1,[sp,#1*4]    @ from future BODY_16_xx
289 #endif
290         eor     r0,r0,r10,ror#20        @ Sigma0(a)
291         and     r3,r3,r12                       @ (b^c)&=(a^b)
292         add     r5,r5,r9                        @ d+=h
293         eor     r3,r3,r11                       @ Maj(a,b,c)
294         add     r9,r9,r0,ror#2  @ h+=Sigma0(a)
295         @ add   r9,r9,r3                        @ h+=Maj(a,b,c)
296 #if __ARM_ARCH__>=7
297         @ ldr   r2,[r1],#4                      @ 3
298 # if 3==15
299         str     r1,[sp,#17*4]                   @ make room for r1
300 # endif
301         eor     r0,r5,r5,ror#5
302         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
303         eor     r0,r0,r5,ror#19 @ Sigma1(e)
304 # ifndef __ARMEB__
305         rev     r2,r2
306 # endif
307 #else
308         @ ldrb  r2,[r1,#3]                      @ 3
309         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
310         ldrb    r3,[r1,#2]
311         ldrb    r0,[r1,#1]
312         orr     r2,r2,r3,lsl#8
313         ldrb    r3,[r1],#4
314         orr     r2,r2,r0,lsl#16
315 # if 3==15
316         str     r1,[sp,#17*4]                   @ make room for r1
317 # endif
318         eor     r0,r5,r5,ror#5
319         orr     r2,r2,r3,lsl#24
320         eor     r0,r0,r5,ror#19 @ Sigma1(e)
321 #endif
322         ldr     r3,[r14],#4                     @ *K256++
323         add     r8,r8,r2                        @ h+=X[i]
324         str     r2,[sp,#3*4]
325         eor     r2,r6,r7
326         add     r8,r8,r0,ror#6  @ h+=Sigma1(e)
327         and     r2,r2,r5
328         add     r8,r8,r3                        @ h+=K256[i]
329         eor     r2,r2,r7                        @ Ch(e,f,g)
330         eor     r0,r9,r9,ror#11
331         add     r8,r8,r2                        @ h+=Ch(e,f,g)
332 #if 3==31
333         and     r3,r3,#0xff
334         cmp     r3,#0xf2                        @ done?
335 #endif
336 #if 3<15
337 # if __ARM_ARCH__>=7
338         ldr     r2,[r1],#4                      @ prefetch
339 # else
340         ldrb    r2,[r1,#3]
341 # endif
342         eor     r3,r9,r10                       @ a^b, b^c in next round
343 #else
344         ldr     r2,[sp,#5*4]            @ from future BODY_16_xx
345         eor     r3,r9,r10                       @ a^b, b^c in next round
346         ldr     r1,[sp,#2*4]    @ from future BODY_16_xx
347 #endif
348         eor     r0,r0,r9,ror#20 @ Sigma0(a)
349         and     r12,r12,r3                      @ (b^c)&=(a^b)
350         add     r4,r4,r8                        @ d+=h
351         eor     r12,r12,r10                     @ Maj(a,b,c)
352         add     r8,r8,r0,ror#2  @ h+=Sigma0(a)
353         @ add   r8,r8,r12                       @ h+=Maj(a,b,c)
354 #if __ARM_ARCH__>=7
355         @ ldr   r2,[r1],#4                      @ 4
356 # if 4==15
357         str     r1,[sp,#17*4]                   @ make room for r1
358 # endif
359         eor     r0,r4,r4,ror#5
360         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
361         eor     r0,r0,r4,ror#19 @ Sigma1(e)
362 # ifndef __ARMEB__
363         rev     r2,r2
364 # endif
365 #else
366         @ ldrb  r2,[r1,#3]                      @ 4
367         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
368         ldrb    r12,[r1,#2]
369         ldrb    r0,[r1,#1]
370         orr     r2,r2,r12,lsl#8
371         ldrb    r12,[r1],#4
372         orr     r2,r2,r0,lsl#16
373 # if 4==15
374         str     r1,[sp,#17*4]                   @ make room for r1
375 # endif
376         eor     r0,r4,r4,ror#5
377         orr     r2,r2,r12,lsl#24
378         eor     r0,r0,r4,ror#19 @ Sigma1(e)
379 #endif
380         ldr     r12,[r14],#4                    @ *K256++
381         add     r7,r7,r2                        @ h+=X[i]
382         str     r2,[sp,#4*4]
383         eor     r2,r5,r6
384         add     r7,r7,r0,ror#6  @ h+=Sigma1(e)
385         and     r2,r2,r4
386         add     r7,r7,r12                       @ h+=K256[i]
387         eor     r2,r2,r6                        @ Ch(e,f,g)
388         eor     r0,r8,r8,ror#11
389         add     r7,r7,r2                        @ h+=Ch(e,f,g)
390 #if 4==31
391         and     r12,r12,#0xff
392         cmp     r12,#0xf2                       @ done?
393 #endif
394 #if 4<15
395 # if __ARM_ARCH__>=7
396         ldr     r2,[r1],#4                      @ prefetch
397 # else
398         ldrb    r2,[r1,#3]
399 # endif
400         eor     r12,r8,r9                       @ a^b, b^c in next round
401 #else
402         ldr     r2,[sp,#6*4]            @ from future BODY_16_xx
403         eor     r12,r8,r9                       @ a^b, b^c in next round
404         ldr     r1,[sp,#3*4]    @ from future BODY_16_xx
405 #endif
406         eor     r0,r0,r8,ror#20 @ Sigma0(a)
407         and     r3,r3,r12                       @ (b^c)&=(a^b)
408         add     r11,r11,r7                      @ d+=h
409         eor     r3,r3,r9                        @ Maj(a,b,c)
410         add     r7,r7,r0,ror#2  @ h+=Sigma0(a)
411         @ add   r7,r7,r3                        @ h+=Maj(a,b,c)
412 #if __ARM_ARCH__>=7
413         @ ldr   r2,[r1],#4                      @ 5
414 # if 5==15
415         str     r1,[sp,#17*4]                   @ make room for r1
416 # endif
417         eor     r0,r11,r11,ror#5
418         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
419         eor     r0,r0,r11,ror#19        @ Sigma1(e)
420 # ifndef __ARMEB__
421         rev     r2,r2
422 # endif
423 #else
424         @ ldrb  r2,[r1,#3]                      @ 5
425         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
426         ldrb    r3,[r1,#2]
427         ldrb    r0,[r1,#1]
428         orr     r2,r2,r3,lsl#8
429         ldrb    r3,[r1],#4
430         orr     r2,r2,r0,lsl#16
431 # if 5==15
432         str     r1,[sp,#17*4]                   @ make room for r1
433 # endif
434         eor     r0,r11,r11,ror#5
435         orr     r2,r2,r3,lsl#24
436         eor     r0,r0,r11,ror#19        @ Sigma1(e)
437 #endif
438         ldr     r3,[r14],#4                     @ *K256++
439         add     r6,r6,r2                        @ h+=X[i]
440         str     r2,[sp,#5*4]
441         eor     r2,r4,r5
442         add     r6,r6,r0,ror#6  @ h+=Sigma1(e)
443         and     r2,r2,r11
444         add     r6,r6,r3                        @ h+=K256[i]
445         eor     r2,r2,r5                        @ Ch(e,f,g)
446         eor     r0,r7,r7,ror#11
447         add     r6,r6,r2                        @ h+=Ch(e,f,g)
448 #if 5==31
449         and     r3,r3,#0xff
450         cmp     r3,#0xf2                        @ done?
451 #endif
452 #if 5<15
453 # if __ARM_ARCH__>=7
454         ldr     r2,[r1],#4                      @ prefetch
455 # else
456         ldrb    r2,[r1,#3]
457 # endif
458         eor     r3,r7,r8                        @ a^b, b^c in next round
459 #else
460         ldr     r2,[sp,#7*4]            @ from future BODY_16_xx
461         eor     r3,r7,r8                        @ a^b, b^c in next round
462         ldr     r1,[sp,#4*4]    @ from future BODY_16_xx
463 #endif
464         eor     r0,r0,r7,ror#20 @ Sigma0(a)
465         and     r12,r12,r3                      @ (b^c)&=(a^b)
466         add     r10,r10,r6                      @ d+=h
467         eor     r12,r12,r8                      @ Maj(a,b,c)
468         add     r6,r6,r0,ror#2  @ h+=Sigma0(a)
469         @ add   r6,r6,r12                       @ h+=Maj(a,b,c)
470 #if __ARM_ARCH__>=7
471         @ ldr   r2,[r1],#4                      @ 6
472 # if 6==15
473         str     r1,[sp,#17*4]                   @ make room for r1
474 # endif
475         eor     r0,r10,r10,ror#5
476         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
477         eor     r0,r0,r10,ror#19        @ Sigma1(e)
478 # ifndef __ARMEB__
479         rev     r2,r2
480 # endif
481 #else
482         @ ldrb  r2,[r1,#3]                      @ 6
483         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
484         ldrb    r12,[r1,#2]
485         ldrb    r0,[r1,#1]
486         orr     r2,r2,r12,lsl#8
487         ldrb    r12,[r1],#4
488         orr     r2,r2,r0,lsl#16
489 # if 6==15
490         str     r1,[sp,#17*4]                   @ make room for r1
491 # endif
492         eor     r0,r10,r10,ror#5
493         orr     r2,r2,r12,lsl#24
494         eor     r0,r0,r10,ror#19        @ Sigma1(e)
495 #endif
496         ldr     r12,[r14],#4                    @ *K256++
497         add     r5,r5,r2                        @ h+=X[i]
498         str     r2,[sp,#6*4]
499         eor     r2,r11,r4
500         add     r5,r5,r0,ror#6  @ h+=Sigma1(e)
501         and     r2,r2,r10
502         add     r5,r5,r12                       @ h+=K256[i]
503         eor     r2,r2,r4                        @ Ch(e,f,g)
504         eor     r0,r6,r6,ror#11
505         add     r5,r5,r2                        @ h+=Ch(e,f,g)
506 #if 6==31
507         and     r12,r12,#0xff
508         cmp     r12,#0xf2                       @ done?
509 #endif
510 #if 6<15
511 # if __ARM_ARCH__>=7
512         ldr     r2,[r1],#4                      @ prefetch
513 # else
514         ldrb    r2,[r1,#3]
515 # endif
516         eor     r12,r6,r7                       @ a^b, b^c in next round
517 #else
518         ldr     r2,[sp,#8*4]            @ from future BODY_16_xx
519         eor     r12,r6,r7                       @ a^b, b^c in next round
520         ldr     r1,[sp,#5*4]    @ from future BODY_16_xx
521 #endif
522         eor     r0,r0,r6,ror#20 @ Sigma0(a)
523         and     r3,r3,r12                       @ (b^c)&=(a^b)
524         add     r9,r9,r5                        @ d+=h
525         eor     r3,r3,r7                        @ Maj(a,b,c)
526         add     r5,r5,r0,ror#2  @ h+=Sigma0(a)
527         @ add   r5,r5,r3                        @ h+=Maj(a,b,c)
528 #if __ARM_ARCH__>=7
529         @ ldr   r2,[r1],#4                      @ 7
530 # if 7==15
531         str     r1,[sp,#17*4]                   @ make room for r1
532 # endif
533         eor     r0,r9,r9,ror#5
534         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
535         eor     r0,r0,r9,ror#19 @ Sigma1(e)
536 # ifndef __ARMEB__
537         rev     r2,r2
538 # endif
539 #else
540         @ ldrb  r2,[r1,#3]                      @ 7
541         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
542         ldrb    r3,[r1,#2]
543         ldrb    r0,[r1,#1]
544         orr     r2,r2,r3,lsl#8
545         ldrb    r3,[r1],#4
546         orr     r2,r2,r0,lsl#16
547 # if 7==15
548         str     r1,[sp,#17*4]                   @ make room for r1
549 # endif
550         eor     r0,r9,r9,ror#5
551         orr     r2,r2,r3,lsl#24
552         eor     r0,r0,r9,ror#19 @ Sigma1(e)
553 #endif
554         ldr     r3,[r14],#4                     @ *K256++
555         add     r4,r4,r2                        @ h+=X[i]
556         str     r2,[sp,#7*4]
557         eor     r2,r10,r11
558         add     r4,r4,r0,ror#6  @ h+=Sigma1(e)
559         and     r2,r2,r9
560         add     r4,r4,r3                        @ h+=K256[i]
561         eor     r2,r2,r11                       @ Ch(e,f,g)
562         eor     r0,r5,r5,ror#11
563         add     r4,r4,r2                        @ h+=Ch(e,f,g)
564 #if 7==31
565         and     r3,r3,#0xff
566         cmp     r3,#0xf2                        @ done?
567 #endif
568 #if 7<15
569 # if __ARM_ARCH__>=7
570         ldr     r2,[r1],#4                      @ prefetch
571 # else
572         ldrb    r2,[r1,#3]
573 # endif
574         eor     r3,r5,r6                        @ a^b, b^c in next round
575 #else
576         ldr     r2,[sp,#9*4]            @ from future BODY_16_xx
577         eor     r3,r5,r6                        @ a^b, b^c in next round
578         ldr     r1,[sp,#6*4]    @ from future BODY_16_xx
579 #endif
580         eor     r0,r0,r5,ror#20 @ Sigma0(a)
581         and     r12,r12,r3                      @ (b^c)&=(a^b)
582         add     r8,r8,r4                        @ d+=h
583         eor     r12,r12,r6                      @ Maj(a,b,c)
584         add     r4,r4,r0,ror#2  @ h+=Sigma0(a)
585         @ add   r4,r4,r12                       @ h+=Maj(a,b,c)
586 #if __ARM_ARCH__>=7
587         @ ldr   r2,[r1],#4                      @ 8
588 # if 8==15
589         str     r1,[sp,#17*4]                   @ make room for r1
590 # endif
591         eor     r0,r8,r8,ror#5
592         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
593         eor     r0,r0,r8,ror#19 @ Sigma1(e)
594 # ifndef __ARMEB__
595         rev     r2,r2
596 # endif
597 #else
598         @ ldrb  r2,[r1,#3]                      @ 8
599         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
600         ldrb    r12,[r1,#2]
601         ldrb    r0,[r1,#1]
602         orr     r2,r2,r12,lsl#8
603         ldrb    r12,[r1],#4
604         orr     r2,r2,r0,lsl#16
605 # if 8==15
606         str     r1,[sp,#17*4]                   @ make room for r1
607 # endif
608         eor     r0,r8,r8,ror#5
609         orr     r2,r2,r12,lsl#24
610         eor     r0,r0,r8,ror#19 @ Sigma1(e)
611 #endif
612         ldr     r12,[r14],#4                    @ *K256++
613         add     r11,r11,r2                      @ h+=X[i]
614         str     r2,[sp,#8*4]
615         eor     r2,r9,r10
616         add     r11,r11,r0,ror#6        @ h+=Sigma1(e)
617         and     r2,r2,r8
618         add     r11,r11,r12                     @ h+=K256[i]
619         eor     r2,r2,r10                       @ Ch(e,f,g)
620         eor     r0,r4,r4,ror#11
621         add     r11,r11,r2                      @ h+=Ch(e,f,g)
622 #if 8==31
623         and     r12,r12,#0xff
624         cmp     r12,#0xf2                       @ done?
625 #endif
626 #if 8<15
627 # if __ARM_ARCH__>=7
628         ldr     r2,[r1],#4                      @ prefetch
629 # else
630         ldrb    r2,[r1,#3]
631 # endif
632         eor     r12,r4,r5                       @ a^b, b^c in next round
633 #else
634         ldr     r2,[sp,#10*4]           @ from future BODY_16_xx
635         eor     r12,r4,r5                       @ a^b, b^c in next round
636         ldr     r1,[sp,#7*4]    @ from future BODY_16_xx
637 #endif
638         eor     r0,r0,r4,ror#20 @ Sigma0(a)
639         and     r3,r3,r12                       @ (b^c)&=(a^b)
640         add     r7,r7,r11                       @ d+=h
641         eor     r3,r3,r5                        @ Maj(a,b,c)
642         add     r11,r11,r0,ror#2        @ h+=Sigma0(a)
643         @ add   r11,r11,r3                      @ h+=Maj(a,b,c)
644 #if __ARM_ARCH__>=7
645         @ ldr   r2,[r1],#4                      @ 9
646 # if 9==15
647         str     r1,[sp,#17*4]                   @ make room for r1
648 # endif
649         eor     r0,r7,r7,ror#5
650         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
651         eor     r0,r0,r7,ror#19 @ Sigma1(e)
652 # ifndef __ARMEB__
653         rev     r2,r2
654 # endif
655 #else
656         @ ldrb  r2,[r1,#3]                      @ 9
657         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
658         ldrb    r3,[r1,#2]
659         ldrb    r0,[r1,#1]
660         orr     r2,r2,r3,lsl#8
661         ldrb    r3,[r1],#4
662         orr     r2,r2,r0,lsl#16
663 # if 9==15
664         str     r1,[sp,#17*4]                   @ make room for r1
665 # endif
666         eor     r0,r7,r7,ror#5
667         orr     r2,r2,r3,lsl#24
668         eor     r0,r0,r7,ror#19 @ Sigma1(e)
669 #endif
670         ldr     r3,[r14],#4                     @ *K256++
671         add     r10,r10,r2                      @ h+=X[i]
672         str     r2,[sp,#9*4]
673         eor     r2,r8,r9
674         add     r10,r10,r0,ror#6        @ h+=Sigma1(e)
675         and     r2,r2,r7
676         add     r10,r10,r3                      @ h+=K256[i]
677         eor     r2,r2,r9                        @ Ch(e,f,g)
678         eor     r0,r11,r11,ror#11
679         add     r10,r10,r2                      @ h+=Ch(e,f,g)
680 #if 9==31
681         and     r3,r3,#0xff
682         cmp     r3,#0xf2                        @ done?
683 #endif
684 #if 9<15
685 # if __ARM_ARCH__>=7
686         ldr     r2,[r1],#4                      @ prefetch
687 # else
688         ldrb    r2,[r1,#3]
689 # endif
690         eor     r3,r11,r4                       @ a^b, b^c in next round
691 #else
692         ldr     r2,[sp,#11*4]           @ from future BODY_16_xx
693         eor     r3,r11,r4                       @ a^b, b^c in next round
694         ldr     r1,[sp,#8*4]    @ from future BODY_16_xx
695 #endif
696         eor     r0,r0,r11,ror#20        @ Sigma0(a)
697         and     r12,r12,r3                      @ (b^c)&=(a^b)
698         add     r6,r6,r10                       @ d+=h
699         eor     r12,r12,r4                      @ Maj(a,b,c)
700         add     r10,r10,r0,ror#2        @ h+=Sigma0(a)
701         @ add   r10,r10,r12                     @ h+=Maj(a,b,c)
702 #if __ARM_ARCH__>=7
703         @ ldr   r2,[r1],#4                      @ 10
704 # if 10==15
705         str     r1,[sp,#17*4]                   @ make room for r1
706 # endif
707         eor     r0,r6,r6,ror#5
708         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
709         eor     r0,r0,r6,ror#19 @ Sigma1(e)
710 # ifndef __ARMEB__
711         rev     r2,r2
712 # endif
713 #else
714         @ ldrb  r2,[r1,#3]                      @ 10
715         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
716         ldrb    r12,[r1,#2]
717         ldrb    r0,[r1,#1]
718         orr     r2,r2,r12,lsl#8
719         ldrb    r12,[r1],#4
720         orr     r2,r2,r0,lsl#16
721 # if 10==15
722         str     r1,[sp,#17*4]                   @ make room for r1
723 # endif
724         eor     r0,r6,r6,ror#5
725         orr     r2,r2,r12,lsl#24
726         eor     r0,r0,r6,ror#19 @ Sigma1(e)
727 #endif
728         ldr     r12,[r14],#4                    @ *K256++
729         add     r9,r9,r2                        @ h+=X[i]
730         str     r2,[sp,#10*4]
731         eor     r2,r7,r8
732         add     r9,r9,r0,ror#6  @ h+=Sigma1(e)
733         and     r2,r2,r6
734         add     r9,r9,r12                       @ h+=K256[i]
735         eor     r2,r2,r8                        @ Ch(e,f,g)
736         eor     r0,r10,r10,ror#11
737         add     r9,r9,r2                        @ h+=Ch(e,f,g)
738 #if 10==31
739         and     r12,r12,#0xff
740         cmp     r12,#0xf2                       @ done?
741 #endif
742 #if 10<15
743 # if __ARM_ARCH__>=7
744         ldr     r2,[r1],#4                      @ prefetch
745 # else
746         ldrb    r2,[r1,#3]
747 # endif
748         eor     r12,r10,r11                     @ a^b, b^c in next round
749 #else
750         ldr     r2,[sp,#12*4]           @ from future BODY_16_xx
751         eor     r12,r10,r11                     @ a^b, b^c in next round
752         ldr     r1,[sp,#9*4]    @ from future BODY_16_xx
753 #endif
754         eor     r0,r0,r10,ror#20        @ Sigma0(a)
755         and     r3,r3,r12                       @ (b^c)&=(a^b)
756         add     r5,r5,r9                        @ d+=h
757         eor     r3,r3,r11                       @ Maj(a,b,c)
758         add     r9,r9,r0,ror#2  @ h+=Sigma0(a)
759         @ add   r9,r9,r3                        @ h+=Maj(a,b,c)
760 #if __ARM_ARCH__>=7
761         @ ldr   r2,[r1],#4                      @ 11
762 # if 11==15
763         str     r1,[sp,#17*4]                   @ make room for r1
764 # endif
765         eor     r0,r5,r5,ror#5
766         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
767         eor     r0,r0,r5,ror#19 @ Sigma1(e)
768 # ifndef __ARMEB__
769         rev     r2,r2
770 # endif
771 #else
772         @ ldrb  r2,[r1,#3]                      @ 11
773         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
774         ldrb    r3,[r1,#2]
775         ldrb    r0,[r1,#1]
776         orr     r2,r2,r3,lsl#8
777         ldrb    r3,[r1],#4
778         orr     r2,r2,r0,lsl#16
779 # if 11==15
780         str     r1,[sp,#17*4]                   @ make room for r1
781 # endif
782         eor     r0,r5,r5,ror#5
783         orr     r2,r2,r3,lsl#24
784         eor     r0,r0,r5,ror#19 @ Sigma1(e)
785 #endif
786         ldr     r3,[r14],#4                     @ *K256++
787         add     r8,r8,r2                        @ h+=X[i]
788         str     r2,[sp,#11*4]
789         eor     r2,r6,r7
790         add     r8,r8,r0,ror#6  @ h+=Sigma1(e)
791         and     r2,r2,r5
792         add     r8,r8,r3                        @ h+=K256[i]
793         eor     r2,r2,r7                        @ Ch(e,f,g)
794         eor     r0,r9,r9,ror#11
795         add     r8,r8,r2                        @ h+=Ch(e,f,g)
796 #if 11==31
797         and     r3,r3,#0xff
798         cmp     r3,#0xf2                        @ done?
799 #endif
800 #if 11<15
801 # if __ARM_ARCH__>=7
802         ldr     r2,[r1],#4                      @ prefetch
803 # else
804         ldrb    r2,[r1,#3]
805 # endif
806         eor     r3,r9,r10                       @ a^b, b^c in next round
807 #else
808         ldr     r2,[sp,#13*4]           @ from future BODY_16_xx
809         eor     r3,r9,r10                       @ a^b, b^c in next round
810         ldr     r1,[sp,#10*4]   @ from future BODY_16_xx
811 #endif
812         eor     r0,r0,r9,ror#20 @ Sigma0(a)
813         and     r12,r12,r3                      @ (b^c)&=(a^b)
814         add     r4,r4,r8                        @ d+=h
815         eor     r12,r12,r10                     @ Maj(a,b,c)
816         add     r8,r8,r0,ror#2  @ h+=Sigma0(a)
817         @ add   r8,r8,r12                       @ h+=Maj(a,b,c)
818 #if __ARM_ARCH__>=7
819         @ ldr   r2,[r1],#4                      @ 12
820 # if 12==15
821         str     r1,[sp,#17*4]                   @ make room for r1
822 # endif
823         eor     r0,r4,r4,ror#5
824         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
825         eor     r0,r0,r4,ror#19 @ Sigma1(e)
826 # ifndef __ARMEB__
827         rev     r2,r2
828 # endif
829 #else
830         @ ldrb  r2,[r1,#3]                      @ 12
831         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
832         ldrb    r12,[r1,#2]
833         ldrb    r0,[r1,#1]
834         orr     r2,r2,r12,lsl#8
835         ldrb    r12,[r1],#4
836         orr     r2,r2,r0,lsl#16
837 # if 12==15
838         str     r1,[sp,#17*4]                   @ make room for r1
839 # endif
840         eor     r0,r4,r4,ror#5
841         orr     r2,r2,r12,lsl#24
842         eor     r0,r0,r4,ror#19 @ Sigma1(e)
843 #endif
844         ldr     r12,[r14],#4                    @ *K256++
845         add     r7,r7,r2                        @ h+=X[i]
846         str     r2,[sp,#12*4]
847         eor     r2,r5,r6
848         add     r7,r7,r0,ror#6  @ h+=Sigma1(e)
849         and     r2,r2,r4
850         add     r7,r7,r12                       @ h+=K256[i]
851         eor     r2,r2,r6                        @ Ch(e,f,g)
852         eor     r0,r8,r8,ror#11
853         add     r7,r7,r2                        @ h+=Ch(e,f,g)
854 #if 12==31
855         and     r12,r12,#0xff
856         cmp     r12,#0xf2                       @ done?
857 #endif
858 #if 12<15
859 # if __ARM_ARCH__>=7
860         ldr     r2,[r1],#4                      @ prefetch
861 # else
862         ldrb    r2,[r1,#3]
863 # endif
864         eor     r12,r8,r9                       @ a^b, b^c in next round
865 #else
866         ldr     r2,[sp,#14*4]           @ from future BODY_16_xx
867         eor     r12,r8,r9                       @ a^b, b^c in next round
868         ldr     r1,[sp,#11*4]   @ from future BODY_16_xx
869 #endif
870         eor     r0,r0,r8,ror#20 @ Sigma0(a)
871         and     r3,r3,r12                       @ (b^c)&=(a^b)
872         add     r11,r11,r7                      @ d+=h
873         eor     r3,r3,r9                        @ Maj(a,b,c)
874         add     r7,r7,r0,ror#2  @ h+=Sigma0(a)
875         @ add   r7,r7,r3                        @ h+=Maj(a,b,c)
876 #if __ARM_ARCH__>=7
877         @ ldr   r2,[r1],#4                      @ 13
878 # if 13==15
879         str     r1,[sp,#17*4]                   @ make room for r1
880 # endif
881         eor     r0,r11,r11,ror#5
882         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
883         eor     r0,r0,r11,ror#19        @ Sigma1(e)
884 # ifndef __ARMEB__
885         rev     r2,r2
886 # endif
887 #else
888         @ ldrb  r2,[r1,#3]                      @ 13
889         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
890         ldrb    r3,[r1,#2]
891         ldrb    r0,[r1,#1]
892         orr     r2,r2,r3,lsl#8
893         ldrb    r3,[r1],#4
894         orr     r2,r2,r0,lsl#16
895 # if 13==15
896         str     r1,[sp,#17*4]                   @ make room for r1
897 # endif
898         eor     r0,r11,r11,ror#5
899         orr     r2,r2,r3,lsl#24
900         eor     r0,r0,r11,ror#19        @ Sigma1(e)
901 #endif
902         ldr     r3,[r14],#4                     @ *K256++
903         add     r6,r6,r2                        @ h+=X[i]
904         str     r2,[sp,#13*4]
905         eor     r2,r4,r5
906         add     r6,r6,r0,ror#6  @ h+=Sigma1(e)
907         and     r2,r2,r11
908         add     r6,r6,r3                        @ h+=K256[i]
909         eor     r2,r2,r5                        @ Ch(e,f,g)
910         eor     r0,r7,r7,ror#11
911         add     r6,r6,r2                        @ h+=Ch(e,f,g)
912 #if 13==31
913         and     r3,r3,#0xff
914         cmp     r3,#0xf2                        @ done?
915 #endif
916 #if 13<15
917 # if __ARM_ARCH__>=7
918         ldr     r2,[r1],#4                      @ prefetch
919 # else
920         ldrb    r2,[r1,#3]
921 # endif
922         eor     r3,r7,r8                        @ a^b, b^c in next round
923 #else
924         ldr     r2,[sp,#15*4]           @ from future BODY_16_xx
925         eor     r3,r7,r8                        @ a^b, b^c in next round
926         ldr     r1,[sp,#12*4]   @ from future BODY_16_xx
927 #endif
928         eor     r0,r0,r7,ror#20 @ Sigma0(a)
929         and     r12,r12,r3                      @ (b^c)&=(a^b)
930         add     r10,r10,r6                      @ d+=h
931         eor     r12,r12,r8                      @ Maj(a,b,c)
932         add     r6,r6,r0,ror#2  @ h+=Sigma0(a)
933         @ add   r6,r6,r12                       @ h+=Maj(a,b,c)
934 #if __ARM_ARCH__>=7
935         @ ldr   r2,[r1],#4                      @ 14
936 # if 14==15
937         str     r1,[sp,#17*4]                   @ make room for r1
938 # endif
939         eor     r0,r10,r10,ror#5
940         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
941         eor     r0,r0,r10,ror#19        @ Sigma1(e)
942 # ifndef __ARMEB__
943         rev     r2,r2
944 # endif
945 #else
946         @ ldrb  r2,[r1,#3]                      @ 14
947         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
948         ldrb    r12,[r1,#2]
949         ldrb    r0,[r1,#1]
950         orr     r2,r2,r12,lsl#8
951         ldrb    r12,[r1],#4
952         orr     r2,r2,r0,lsl#16
953 # if 14==15
954         str     r1,[sp,#17*4]                   @ make room for r1
955 # endif
956         eor     r0,r10,r10,ror#5
957         orr     r2,r2,r12,lsl#24
958         eor     r0,r0,r10,ror#19        @ Sigma1(e)
959 #endif
960         ldr     r12,[r14],#4                    @ *K256++
961         add     r5,r5,r2                        @ h+=X[i]
962         str     r2,[sp,#14*4]
963         eor     r2,r11,r4
964         add     r5,r5,r0,ror#6  @ h+=Sigma1(e)
965         and     r2,r2,r10
966         add     r5,r5,r12                       @ h+=K256[i]
967         eor     r2,r2,r4                        @ Ch(e,f,g)
968         eor     r0,r6,r6,ror#11
969         add     r5,r5,r2                        @ h+=Ch(e,f,g)
970 #if 14==31
971         and     r12,r12,#0xff
972         cmp     r12,#0xf2                       @ done?
973 #endif
974 #if 14<15
975 # if __ARM_ARCH__>=7
976         ldr     r2,[r1],#4                      @ prefetch
977 # else
978         ldrb    r2,[r1,#3]
979 # endif
980         eor     r12,r6,r7                       @ a^b, b^c in next round
981 #else
982         ldr     r2,[sp,#0*4]            @ from future BODY_16_xx
983         eor     r12,r6,r7                       @ a^b, b^c in next round
984         ldr     r1,[sp,#13*4]   @ from future BODY_16_xx
985 #endif
986         eor     r0,r0,r6,ror#20 @ Sigma0(a)
987         and     r3,r3,r12                       @ (b^c)&=(a^b)
988         add     r9,r9,r5                        @ d+=h
989         eor     r3,r3,r7                        @ Maj(a,b,c)
990         add     r5,r5,r0,ror#2  @ h+=Sigma0(a)
991         @ add   r5,r5,r3                        @ h+=Maj(a,b,c)
992 #if __ARM_ARCH__>=7
993         @ ldr   r2,[r1],#4                      @ 15
994 # if 15==15
995         str     r1,[sp,#17*4]                   @ make room for r1
996 # endif
997         eor     r0,r9,r9,ror#5
998         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
999         eor     r0,r0,r9,ror#19 @ Sigma1(e)
1000 # ifndef __ARMEB__
1001         rev     r2,r2
1002 # endif
1003 #else
1004         @ ldrb  r2,[r1,#3]                      @ 15
1005         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
1006         ldrb    r3,[r1,#2]
1007         ldrb    r0,[r1,#1]
1008         orr     r2,r2,r3,lsl#8
1009         ldrb    r3,[r1],#4
1010         orr     r2,r2,r0,lsl#16
1011 # if 15==15
1012         str     r1,[sp,#17*4]                   @ make room for r1
1013 # endif
1014         eor     r0,r9,r9,ror#5
1015         orr     r2,r2,r3,lsl#24
1016         eor     r0,r0,r9,ror#19 @ Sigma1(e)
1017 #endif
1018         ldr     r3,[r14],#4                     @ *K256++
1019         add     r4,r4,r2                        @ h+=X[i]
1020         str     r2,[sp,#15*4]
1021         eor     r2,r10,r11
1022         add     r4,r4,r0,ror#6  @ h+=Sigma1(e)
1023         and     r2,r2,r9
1024         add     r4,r4,r3                        @ h+=K256[i]
1025         eor     r2,r2,r11                       @ Ch(e,f,g)
1026         eor     r0,r5,r5,ror#11
1027         add     r4,r4,r2                        @ h+=Ch(e,f,g)
1028 #if 15==31
1029         and     r3,r3,#0xff
1030         cmp     r3,#0xf2                        @ done?
1031 #endif
1032 #if 15<15
1033 # if __ARM_ARCH__>=7
1034         ldr     r2,[r1],#4                      @ prefetch
1035 # else
1036         ldrb    r2,[r1,#3]
1037 # endif
1038         eor     r3,r5,r6                        @ a^b, b^c in next round
1039 #else
1040         ldr     r2,[sp,#1*4]            @ from future BODY_16_xx
1041         eor     r3,r5,r6                        @ a^b, b^c in next round
1042         ldr     r1,[sp,#14*4]   @ from future BODY_16_xx
1043 #endif
1044         eor     r0,r0,r5,ror#20 @ Sigma0(a)
1045         and     r12,r12,r3                      @ (b^c)&=(a^b)
1046         add     r8,r8,r4                        @ d+=h
1047         eor     r12,r12,r6                      @ Maj(a,b,c)
1048         add     r4,r4,r0,ror#2  @ h+=Sigma0(a)
1049         @ add   r4,r4,r12                       @ h+=Maj(a,b,c)
1050 .Lrounds_16_xx:
1051         @ ldr   r2,[sp,#1*4]            @ 16
1052         @ ldr   r1,[sp,#14*4]
1053         mov     r0,r2,ror#7
1054         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
1055         mov     r12,r1,ror#17
1056         eor     r0,r0,r2,ror#18
1057         eor     r12,r12,r1,ror#19
1058         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1059         ldr     r2,[sp,#0*4]
1060         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1061         ldr     r1,[sp,#9*4]
1062
1063         add     r12,r12,r0
1064         eor     r0,r8,r8,ror#5  @ from BODY_00_15
1065         add     r2,r2,r12
1066         eor     r0,r0,r8,ror#19 @ Sigma1(e)
1067         add     r2,r2,r1                        @ X[i]
1068         ldr     r12,[r14],#4                    @ *K256++
1069         add     r11,r11,r2                      @ h+=X[i]
1070         str     r2,[sp,#0*4]
1071         eor     r2,r9,r10
1072         add     r11,r11,r0,ror#6        @ h+=Sigma1(e)
1073         and     r2,r2,r8
1074         add     r11,r11,r12                     @ h+=K256[i]
1075         eor     r2,r2,r10                       @ Ch(e,f,g)
1076         eor     r0,r4,r4,ror#11
1077         add     r11,r11,r2                      @ h+=Ch(e,f,g)
1078 #if 16==31
1079         and     r12,r12,#0xff
1080         cmp     r12,#0xf2                       @ done?
1081 #endif
1082 #if 16<15
1083 # if __ARM_ARCH__>=7
1084         ldr     r2,[r1],#4                      @ prefetch
1085 # else
1086         ldrb    r2,[r1,#3]
1087 # endif
1088         eor     r12,r4,r5                       @ a^b, b^c in next round
1089 #else
1090         ldr     r2,[sp,#2*4]            @ from future BODY_16_xx
1091         eor     r12,r4,r5                       @ a^b, b^c in next round
1092         ldr     r1,[sp,#15*4]   @ from future BODY_16_xx
1093 #endif
1094         eor     r0,r0,r4,ror#20 @ Sigma0(a)
1095         and     r3,r3,r12                       @ (b^c)&=(a^b)
1096         add     r7,r7,r11                       @ d+=h
1097         eor     r3,r3,r5                        @ Maj(a,b,c)
1098         add     r11,r11,r0,ror#2        @ h+=Sigma0(a)
1099         @ add   r11,r11,r3                      @ h+=Maj(a,b,c)
1100         @ ldr   r2,[sp,#2*4]            @ 17
1101         @ ldr   r1,[sp,#15*4]
1102         mov     r0,r2,ror#7
1103         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
1104         mov     r3,r1,ror#17
1105         eor     r0,r0,r2,ror#18
1106         eor     r3,r3,r1,ror#19
1107         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1108         ldr     r2,[sp,#1*4]
1109         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1110         ldr     r1,[sp,#10*4]
1111
1112         add     r3,r3,r0
1113         eor     r0,r7,r7,ror#5  @ from BODY_00_15
1114         add     r2,r2,r3
1115         eor     r0,r0,r7,ror#19 @ Sigma1(e)
1116         add     r2,r2,r1                        @ X[i]
1117         ldr     r3,[r14],#4                     @ *K256++
1118         add     r10,r10,r2                      @ h+=X[i]
1119         str     r2,[sp,#1*4]
1120         eor     r2,r8,r9
1121         add     r10,r10,r0,ror#6        @ h+=Sigma1(e)
1122         and     r2,r2,r7
1123         add     r10,r10,r3                      @ h+=K256[i]
1124         eor     r2,r2,r9                        @ Ch(e,f,g)
1125         eor     r0,r11,r11,ror#11
1126         add     r10,r10,r2                      @ h+=Ch(e,f,g)
1127 #if 17==31
1128         and     r3,r3,#0xff
1129         cmp     r3,#0xf2                        @ done?
1130 #endif
1131 #if 17<15
1132 # if __ARM_ARCH__>=7
1133         ldr     r2,[r1],#4                      @ prefetch
1134 # else
1135         ldrb    r2,[r1,#3]
1136 # endif
1137         eor     r3,r11,r4                       @ a^b, b^c in next round
1138 #else
1139         ldr     r2,[sp,#3*4]            @ from future BODY_16_xx
1140         eor     r3,r11,r4                       @ a^b, b^c in next round
1141         ldr     r1,[sp,#0*4]    @ from future BODY_16_xx
1142 #endif
1143         eor     r0,r0,r11,ror#20        @ Sigma0(a)
1144         and     r12,r12,r3                      @ (b^c)&=(a^b)
1145         add     r6,r6,r10                       @ d+=h
1146         eor     r12,r12,r4                      @ Maj(a,b,c)
1147         add     r10,r10,r0,ror#2        @ h+=Sigma0(a)
1148         @ add   r10,r10,r12                     @ h+=Maj(a,b,c)
1149         @ ldr   r2,[sp,#3*4]            @ 18
1150         @ ldr   r1,[sp,#0*4]
1151         mov     r0,r2,ror#7
1152         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
1153         mov     r12,r1,ror#17
1154         eor     r0,r0,r2,ror#18
1155         eor     r12,r12,r1,ror#19
1156         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1157         ldr     r2,[sp,#2*4]
1158         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1159         ldr     r1,[sp,#11*4]
1160
1161         add     r12,r12,r0
1162         eor     r0,r6,r6,ror#5  @ from BODY_00_15
1163         add     r2,r2,r12
1164         eor     r0,r0,r6,ror#19 @ Sigma1(e)
1165         add     r2,r2,r1                        @ X[i]
1166         ldr     r12,[r14],#4                    @ *K256++
1167         add     r9,r9,r2                        @ h+=X[i]
1168         str     r2,[sp,#2*4]
1169         eor     r2,r7,r8
1170         add     r9,r9,r0,ror#6  @ h+=Sigma1(e)
1171         and     r2,r2,r6
1172         add     r9,r9,r12                       @ h+=K256[i]
1173         eor     r2,r2,r8                        @ Ch(e,f,g)
1174         eor     r0,r10,r10,ror#11
1175         add     r9,r9,r2                        @ h+=Ch(e,f,g)
1176 #if 18==31
1177         and     r12,r12,#0xff
1178         cmp     r12,#0xf2                       @ done?
1179 #endif
1180 #if 18<15
1181 # if __ARM_ARCH__>=7
1182         ldr     r2,[r1],#4                      @ prefetch
1183 # else
1184         ldrb    r2,[r1,#3]
1185 # endif
1186         eor     r12,r10,r11                     @ a^b, b^c in next round
1187 #else
1188         ldr     r2,[sp,#4*4]            @ from future BODY_16_xx
1189         eor     r12,r10,r11                     @ a^b, b^c in next round
1190         ldr     r1,[sp,#1*4]    @ from future BODY_16_xx
1191 #endif
1192         eor     r0,r0,r10,ror#20        @ Sigma0(a)
1193         and     r3,r3,r12                       @ (b^c)&=(a^b)
1194         add     r5,r5,r9                        @ d+=h
1195         eor     r3,r3,r11                       @ Maj(a,b,c)
1196         add     r9,r9,r0,ror#2  @ h+=Sigma0(a)
1197         @ add   r9,r9,r3                        @ h+=Maj(a,b,c)
1198         @ ldr   r2,[sp,#4*4]            @ 19
1199         @ ldr   r1,[sp,#1*4]
1200         mov     r0,r2,ror#7
1201         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
1202         mov     r3,r1,ror#17
1203         eor     r0,r0,r2,ror#18
1204         eor     r3,r3,r1,ror#19
1205         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1206         ldr     r2,[sp,#3*4]
1207         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1208         ldr     r1,[sp,#12*4]
1209
1210         add     r3,r3,r0
1211         eor     r0,r5,r5,ror#5  @ from BODY_00_15
1212         add     r2,r2,r3
1213         eor     r0,r0,r5,ror#19 @ Sigma1(e)
1214         add     r2,r2,r1                        @ X[i]
1215         ldr     r3,[r14],#4                     @ *K256++
1216         add     r8,r8,r2                        @ h+=X[i]
1217         str     r2,[sp,#3*4]
1218         eor     r2,r6,r7
1219         add     r8,r8,r0,ror#6  @ h+=Sigma1(e)
1220         and     r2,r2,r5
1221         add     r8,r8,r3                        @ h+=K256[i]
1222         eor     r2,r2,r7                        @ Ch(e,f,g)
1223         eor     r0,r9,r9,ror#11
1224         add     r8,r8,r2                        @ h+=Ch(e,f,g)
1225 #if 19==31
1226         and     r3,r3,#0xff
1227         cmp     r3,#0xf2                        @ done?
1228 #endif
1229 #if 19<15
1230 # if __ARM_ARCH__>=7
1231         ldr     r2,[r1],#4                      @ prefetch
1232 # else
1233         ldrb    r2,[r1,#3]
1234 # endif
1235         eor     r3,r9,r10                       @ a^b, b^c in next round
1236 #else
1237         ldr     r2,[sp,#5*4]            @ from future BODY_16_xx
1238         eor     r3,r9,r10                       @ a^b, b^c in next round
1239         ldr     r1,[sp,#2*4]    @ from future BODY_16_xx
1240 #endif
1241         eor     r0,r0,r9,ror#20 @ Sigma0(a)
1242         and     r12,r12,r3                      @ (b^c)&=(a^b)
1243         add     r4,r4,r8                        @ d+=h
1244         eor     r12,r12,r10                     @ Maj(a,b,c)
1245         add     r8,r8,r0,ror#2  @ h+=Sigma0(a)
1246         @ add   r8,r8,r12                       @ h+=Maj(a,b,c)
1247         @ ldr   r2,[sp,#5*4]            @ 20
1248         @ ldr   r1,[sp,#2*4]
1249         mov     r0,r2,ror#7
1250         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
1251         mov     r12,r1,ror#17
1252         eor     r0,r0,r2,ror#18
1253         eor     r12,r12,r1,ror#19
1254         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1255         ldr     r2,[sp,#4*4]
1256         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1257         ldr     r1,[sp,#13*4]
1258
1259         add     r12,r12,r0
1260         eor     r0,r4,r4,ror#5  @ from BODY_00_15
1261         add     r2,r2,r12
1262         eor     r0,r0,r4,ror#19 @ Sigma1(e)
1263         add     r2,r2,r1                        @ X[i]
1264         ldr     r12,[r14],#4                    @ *K256++
1265         add     r7,r7,r2                        @ h+=X[i]
1266         str     r2,[sp,#4*4]
1267         eor     r2,r5,r6
1268         add     r7,r7,r0,ror#6  @ h+=Sigma1(e)
1269         and     r2,r2,r4
1270         add     r7,r7,r12                       @ h+=K256[i]
1271         eor     r2,r2,r6                        @ Ch(e,f,g)
1272         eor     r0,r8,r8,ror#11
1273         add     r7,r7,r2                        @ h+=Ch(e,f,g)
1274 #if 20==31
1275         and     r12,r12,#0xff
1276         cmp     r12,#0xf2                       @ done?
1277 #endif
1278 #if 20<15
1279 # if __ARM_ARCH__>=7
1280         ldr     r2,[r1],#4                      @ prefetch
1281 # else
1282         ldrb    r2,[r1,#3]
1283 # endif
1284         eor     r12,r8,r9                       @ a^b, b^c in next round
1285 #else
1286         ldr     r2,[sp,#6*4]            @ from future BODY_16_xx
1287         eor     r12,r8,r9                       @ a^b, b^c in next round
1288         ldr     r1,[sp,#3*4]    @ from future BODY_16_xx
1289 #endif
1290         eor     r0,r0,r8,ror#20 @ Sigma0(a)
1291         and     r3,r3,r12                       @ (b^c)&=(a^b)
1292         add     r11,r11,r7                      @ d+=h
1293         eor     r3,r3,r9                        @ Maj(a,b,c)
1294         add     r7,r7,r0,ror#2  @ h+=Sigma0(a)
1295         @ add   r7,r7,r3                        @ h+=Maj(a,b,c)
1296         @ ldr   r2,[sp,#6*4]            @ 21
1297         @ ldr   r1,[sp,#3*4]
1298         mov     r0,r2,ror#7
1299         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
1300         mov     r3,r1,ror#17
1301         eor     r0,r0,r2,ror#18
1302         eor     r3,r3,r1,ror#19
1303         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1304         ldr     r2,[sp,#5*4]
1305         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1306         ldr     r1,[sp,#14*4]
1307
1308         add     r3,r3,r0
1309         eor     r0,r11,r11,ror#5        @ from BODY_00_15
1310         add     r2,r2,r3
1311         eor     r0,r0,r11,ror#19        @ Sigma1(e)
1312         add     r2,r2,r1                        @ X[i]
1313         ldr     r3,[r14],#4                     @ *K256++
1314         add     r6,r6,r2                        @ h+=X[i]
1315         str     r2,[sp,#5*4]
1316         eor     r2,r4,r5
1317         add     r6,r6,r0,ror#6  @ h+=Sigma1(e)
1318         and     r2,r2,r11
1319         add     r6,r6,r3                        @ h+=K256[i]
1320         eor     r2,r2,r5                        @ Ch(e,f,g)
1321         eor     r0,r7,r7,ror#11
1322         add     r6,r6,r2                        @ h+=Ch(e,f,g)
1323 #if 21==31
1324         and     r3,r3,#0xff
1325         cmp     r3,#0xf2                        @ done?
1326 #endif
1327 #if 21<15
1328 # if __ARM_ARCH__>=7
1329         ldr     r2,[r1],#4                      @ prefetch
1330 # else
1331         ldrb    r2,[r1,#3]
1332 # endif
1333         eor     r3,r7,r8                        @ a^b, b^c in next round
1334 #else
1335         ldr     r2,[sp,#7*4]            @ from future BODY_16_xx
1336         eor     r3,r7,r8                        @ a^b, b^c in next round
1337         ldr     r1,[sp,#4*4]    @ from future BODY_16_xx
1338 #endif
1339         eor     r0,r0,r7,ror#20 @ Sigma0(a)
1340         and     r12,r12,r3                      @ (b^c)&=(a^b)
1341         add     r10,r10,r6                      @ d+=h
1342         eor     r12,r12,r8                      @ Maj(a,b,c)
1343         add     r6,r6,r0,ror#2  @ h+=Sigma0(a)
1344         @ add   r6,r6,r12                       @ h+=Maj(a,b,c)
1345         @ ldr   r2,[sp,#7*4]            @ 22
1346         @ ldr   r1,[sp,#4*4]
1347         mov     r0,r2,ror#7
1348         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
1349         mov     r12,r1,ror#17
1350         eor     r0,r0,r2,ror#18
1351         eor     r12,r12,r1,ror#19
1352         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1353         ldr     r2,[sp,#6*4]
1354         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1355         ldr     r1,[sp,#15*4]
1356
1357         add     r12,r12,r0
1358         eor     r0,r10,r10,ror#5        @ from BODY_00_15
1359         add     r2,r2,r12
1360         eor     r0,r0,r10,ror#19        @ Sigma1(e)
1361         add     r2,r2,r1                        @ X[i]
1362         ldr     r12,[r14],#4                    @ *K256++
1363         add     r5,r5,r2                        @ h+=X[i]
1364         str     r2,[sp,#6*4]
1365         eor     r2,r11,r4
1366         add     r5,r5,r0,ror#6  @ h+=Sigma1(e)
1367         and     r2,r2,r10
1368         add     r5,r5,r12                       @ h+=K256[i]
1369         eor     r2,r2,r4                        @ Ch(e,f,g)
1370         eor     r0,r6,r6,ror#11
1371         add     r5,r5,r2                        @ h+=Ch(e,f,g)
1372 #if 22==31
1373         and     r12,r12,#0xff
1374         cmp     r12,#0xf2                       @ done?
1375 #endif
1376 #if 22<15
1377 # if __ARM_ARCH__>=7
1378         ldr     r2,[r1],#4                      @ prefetch
1379 # else
1380         ldrb    r2,[r1,#3]
1381 # endif
1382         eor     r12,r6,r7                       @ a^b, b^c in next round
1383 #else
1384         ldr     r2,[sp,#8*4]            @ from future BODY_16_xx
1385         eor     r12,r6,r7                       @ a^b, b^c in next round
1386         ldr     r1,[sp,#5*4]    @ from future BODY_16_xx
1387 #endif
1388         eor     r0,r0,r6,ror#20 @ Sigma0(a)
1389         and     r3,r3,r12                       @ (b^c)&=(a^b)
1390         add     r9,r9,r5                        @ d+=h
1391         eor     r3,r3,r7                        @ Maj(a,b,c)
1392         add     r5,r5,r0,ror#2  @ h+=Sigma0(a)
1393         @ add   r5,r5,r3                        @ h+=Maj(a,b,c)
1394         @ ldr   r2,[sp,#8*4]            @ 23
1395         @ ldr   r1,[sp,#5*4]
1396         mov     r0,r2,ror#7
1397         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
1398         mov     r3,r1,ror#17
1399         eor     r0,r0,r2,ror#18
1400         eor     r3,r3,r1,ror#19
1401         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1402         ldr     r2,[sp,#7*4]
1403         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1404         ldr     r1,[sp,#0*4]
1405
1406         add     r3,r3,r0
1407         eor     r0,r9,r9,ror#5  @ from BODY_00_15
1408         add     r2,r2,r3
1409         eor     r0,r0,r9,ror#19 @ Sigma1(e)
1410         add     r2,r2,r1                        @ X[i]
1411         ldr     r3,[r14],#4                     @ *K256++
1412         add     r4,r4,r2                        @ h+=X[i]
1413         str     r2,[sp,#7*4]
1414         eor     r2,r10,r11
1415         add     r4,r4,r0,ror#6  @ h+=Sigma1(e)
1416         and     r2,r2,r9
1417         add     r4,r4,r3                        @ h+=K256[i]
1418         eor     r2,r2,r11                       @ Ch(e,f,g)
1419         eor     r0,r5,r5,ror#11
1420         add     r4,r4,r2                        @ h+=Ch(e,f,g)
1421 #if 23==31
1422         and     r3,r3,#0xff
1423         cmp     r3,#0xf2                        @ done?
1424 #endif
1425 #if 23<15
1426 # if __ARM_ARCH__>=7
1427         ldr     r2,[r1],#4                      @ prefetch
1428 # else
1429         ldrb    r2,[r1,#3]
1430 # endif
1431         eor     r3,r5,r6                        @ a^b, b^c in next round
1432 #else
1433         ldr     r2,[sp,#9*4]            @ from future BODY_16_xx
1434         eor     r3,r5,r6                        @ a^b, b^c in next round
1435         ldr     r1,[sp,#6*4]    @ from future BODY_16_xx
1436 #endif
1437         eor     r0,r0,r5,ror#20 @ Sigma0(a)
1438         and     r12,r12,r3                      @ (b^c)&=(a^b)
1439         add     r8,r8,r4                        @ d+=h
1440         eor     r12,r12,r6                      @ Maj(a,b,c)
1441         add     r4,r4,r0,ror#2  @ h+=Sigma0(a)
1442         @ add   r4,r4,r12                       @ h+=Maj(a,b,c)
1443         @ ldr   r2,[sp,#9*4]            @ 24
1444         @ ldr   r1,[sp,#6*4]
1445         mov     r0,r2,ror#7
1446         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
1447         mov     r12,r1,ror#17
1448         eor     r0,r0,r2,ror#18
1449         eor     r12,r12,r1,ror#19
1450         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1451         ldr     r2,[sp,#8*4]
1452         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1453         ldr     r1,[sp,#1*4]
1454
1455         add     r12,r12,r0
1456         eor     r0,r8,r8,ror#5  @ from BODY_00_15
1457         add     r2,r2,r12
1458         eor     r0,r0,r8,ror#19 @ Sigma1(e)
1459         add     r2,r2,r1                        @ X[i]
1460         ldr     r12,[r14],#4                    @ *K256++
1461         add     r11,r11,r2                      @ h+=X[i]
1462         str     r2,[sp,#8*4]
1463         eor     r2,r9,r10
1464         add     r11,r11,r0,ror#6        @ h+=Sigma1(e)
1465         and     r2,r2,r8
1466         add     r11,r11,r12                     @ h+=K256[i]
1467         eor     r2,r2,r10                       @ Ch(e,f,g)
1468         eor     r0,r4,r4,ror#11
1469         add     r11,r11,r2                      @ h+=Ch(e,f,g)
1470 #if 24==31
1471         and     r12,r12,#0xff
1472         cmp     r12,#0xf2                       @ done?
1473 #endif
1474 #if 24<15
1475 # if __ARM_ARCH__>=7
1476         ldr     r2,[r1],#4                      @ prefetch
1477 # else
1478         ldrb    r2,[r1,#3]
1479 # endif
1480         eor     r12,r4,r5                       @ a^b, b^c in next round
1481 #else
1482         ldr     r2,[sp,#10*4]           @ from future BODY_16_xx
1483         eor     r12,r4,r5                       @ a^b, b^c in next round
1484         ldr     r1,[sp,#7*4]    @ from future BODY_16_xx
1485 #endif
1486         eor     r0,r0,r4,ror#20 @ Sigma0(a)
1487         and     r3,r3,r12                       @ (b^c)&=(a^b)
1488         add     r7,r7,r11                       @ d+=h
1489         eor     r3,r3,r5                        @ Maj(a,b,c)
1490         add     r11,r11,r0,ror#2        @ h+=Sigma0(a)
1491         @ add   r11,r11,r3                      @ h+=Maj(a,b,c)
1492         @ ldr   r2,[sp,#10*4]           @ 25
1493         @ ldr   r1,[sp,#7*4]
1494         mov     r0,r2,ror#7
1495         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
1496         mov     r3,r1,ror#17
1497         eor     r0,r0,r2,ror#18
1498         eor     r3,r3,r1,ror#19
1499         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1500         ldr     r2,[sp,#9*4]
1501         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1502         ldr     r1,[sp,#2*4]
1503
1504         add     r3,r3,r0
1505         eor     r0,r7,r7,ror#5  @ from BODY_00_15
1506         add     r2,r2,r3
1507         eor     r0,r0,r7,ror#19 @ Sigma1(e)
1508         add     r2,r2,r1                        @ X[i]
1509         ldr     r3,[r14],#4                     @ *K256++
1510         add     r10,r10,r2                      @ h+=X[i]
1511         str     r2,[sp,#9*4]
1512         eor     r2,r8,r9
1513         add     r10,r10,r0,ror#6        @ h+=Sigma1(e)
1514         and     r2,r2,r7
1515         add     r10,r10,r3                      @ h+=K256[i]
1516         eor     r2,r2,r9                        @ Ch(e,f,g)
1517         eor     r0,r11,r11,ror#11
1518         add     r10,r10,r2                      @ h+=Ch(e,f,g)
1519 #if 25==31
1520         and     r3,r3,#0xff
1521         cmp     r3,#0xf2                        @ done?
1522 #endif
1523 #if 25<15
1524 # if __ARM_ARCH__>=7
1525         ldr     r2,[r1],#4                      @ prefetch
1526 # else
1527         ldrb    r2,[r1,#3]
1528 # endif
1529         eor     r3,r11,r4                       @ a^b, b^c in next round
1530 #else
1531         ldr     r2,[sp,#11*4]           @ from future BODY_16_xx
1532         eor     r3,r11,r4                       @ a^b, b^c in next round
1533         ldr     r1,[sp,#8*4]    @ from future BODY_16_xx
1534 #endif
1535         eor     r0,r0,r11,ror#20        @ Sigma0(a)
1536         and     r12,r12,r3                      @ (b^c)&=(a^b)
1537         add     r6,r6,r10                       @ d+=h
1538         eor     r12,r12,r4                      @ Maj(a,b,c)
1539         add     r10,r10,r0,ror#2        @ h+=Sigma0(a)
1540         @ add   r10,r10,r12                     @ h+=Maj(a,b,c)
1541         @ ldr   r2,[sp,#11*4]           @ 26
1542         @ ldr   r1,[sp,#8*4]
1543         mov     r0,r2,ror#7
1544         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
1545         mov     r12,r1,ror#17
1546         eor     r0,r0,r2,ror#18
1547         eor     r12,r12,r1,ror#19
1548         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1549         ldr     r2,[sp,#10*4]
1550         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1551         ldr     r1,[sp,#3*4]
1552
1553         add     r12,r12,r0
1554         eor     r0,r6,r6,ror#5  @ from BODY_00_15
1555         add     r2,r2,r12
1556         eor     r0,r0,r6,ror#19 @ Sigma1(e)
1557         add     r2,r2,r1                        @ X[i]
1558         ldr     r12,[r14],#4                    @ *K256++
1559         add     r9,r9,r2                        @ h+=X[i]
1560         str     r2,[sp,#10*4]
1561         eor     r2,r7,r8
1562         add     r9,r9,r0,ror#6  @ h+=Sigma1(e)
1563         and     r2,r2,r6
1564         add     r9,r9,r12                       @ h+=K256[i]
1565         eor     r2,r2,r8                        @ Ch(e,f,g)
1566         eor     r0,r10,r10,ror#11
1567         add     r9,r9,r2                        @ h+=Ch(e,f,g)
1568 #if 26==31
1569         and     r12,r12,#0xff
1570         cmp     r12,#0xf2                       @ done?
1571 #endif
1572 #if 26<15
1573 # if __ARM_ARCH__>=7
1574         ldr     r2,[r1],#4                      @ prefetch
1575 # else
1576         ldrb    r2,[r1,#3]
1577 # endif
1578         eor     r12,r10,r11                     @ a^b, b^c in next round
1579 #else
1580         ldr     r2,[sp,#12*4]           @ from future BODY_16_xx
1581         eor     r12,r10,r11                     @ a^b, b^c in next round
1582         ldr     r1,[sp,#9*4]    @ from future BODY_16_xx
1583 #endif
1584         eor     r0,r0,r10,ror#20        @ Sigma0(a)
1585         and     r3,r3,r12                       @ (b^c)&=(a^b)
1586         add     r5,r5,r9                        @ d+=h
1587         eor     r3,r3,r11                       @ Maj(a,b,c)
1588         add     r9,r9,r0,ror#2  @ h+=Sigma0(a)
1589         @ add   r9,r9,r3                        @ h+=Maj(a,b,c)
1590         @ ldr   r2,[sp,#12*4]           @ 27
1591         @ ldr   r1,[sp,#9*4]
1592         mov     r0,r2,ror#7
1593         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
1594         mov     r3,r1,ror#17
1595         eor     r0,r0,r2,ror#18
1596         eor     r3,r3,r1,ror#19
1597         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1598         ldr     r2,[sp,#11*4]
1599         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1600         ldr     r1,[sp,#4*4]
1601
1602         add     r3,r3,r0
1603         eor     r0,r5,r5,ror#5  @ from BODY_00_15
1604         add     r2,r2,r3
1605         eor     r0,r0,r5,ror#19 @ Sigma1(e)
1606         add     r2,r2,r1                        @ X[i]
1607         ldr     r3,[r14],#4                     @ *K256++
1608         add     r8,r8,r2                        @ h+=X[i]
1609         str     r2,[sp,#11*4]
1610         eor     r2,r6,r7
1611         add     r8,r8,r0,ror#6  @ h+=Sigma1(e)
1612         and     r2,r2,r5
1613         add     r8,r8,r3                        @ h+=K256[i]
1614         eor     r2,r2,r7                        @ Ch(e,f,g)
1615         eor     r0,r9,r9,ror#11
1616         add     r8,r8,r2                        @ h+=Ch(e,f,g)
1617 #if 27==31
1618         and     r3,r3,#0xff
1619         cmp     r3,#0xf2                        @ done?
1620 #endif
1621 #if 27<15
1622 # if __ARM_ARCH__>=7
1623         ldr     r2,[r1],#4                      @ prefetch
1624 # else
1625         ldrb    r2,[r1,#3]
1626 # endif
1627         eor     r3,r9,r10                       @ a^b, b^c in next round
1628 #else
1629         ldr     r2,[sp,#13*4]           @ from future BODY_16_xx
1630         eor     r3,r9,r10                       @ a^b, b^c in next round
1631         ldr     r1,[sp,#10*4]   @ from future BODY_16_xx
1632 #endif
1633         eor     r0,r0,r9,ror#20 @ Sigma0(a)
1634         and     r12,r12,r3                      @ (b^c)&=(a^b)
1635         add     r4,r4,r8                        @ d+=h
1636         eor     r12,r12,r10                     @ Maj(a,b,c)
1637         add     r8,r8,r0,ror#2  @ h+=Sigma0(a)
1638         @ add   r8,r8,r12                       @ h+=Maj(a,b,c)
1639         @ ldr   r2,[sp,#13*4]           @ 28
1640         @ ldr   r1,[sp,#10*4]
1641         mov     r0,r2,ror#7
1642         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
1643         mov     r12,r1,ror#17
1644         eor     r0,r0,r2,ror#18
1645         eor     r12,r12,r1,ror#19
1646         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1647         ldr     r2,[sp,#12*4]
1648         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1649         ldr     r1,[sp,#5*4]
1650
1651         add     r12,r12,r0
1652         eor     r0,r4,r4,ror#5  @ from BODY_00_15
1653         add     r2,r2,r12
1654         eor     r0,r0,r4,ror#19 @ Sigma1(e)
1655         add     r2,r2,r1                        @ X[i]
1656         ldr     r12,[r14],#4                    @ *K256++
1657         add     r7,r7,r2                        @ h+=X[i]
1658         str     r2,[sp,#12*4]
1659         eor     r2,r5,r6
1660         add     r7,r7,r0,ror#6  @ h+=Sigma1(e)
1661         and     r2,r2,r4
1662         add     r7,r7,r12                       @ h+=K256[i]
1663         eor     r2,r2,r6                        @ Ch(e,f,g)
1664         eor     r0,r8,r8,ror#11
1665         add     r7,r7,r2                        @ h+=Ch(e,f,g)
1666 #if 28==31
1667         and     r12,r12,#0xff
1668         cmp     r12,#0xf2                       @ done?
1669 #endif
1670 #if 28<15
1671 # if __ARM_ARCH__>=7
1672         ldr     r2,[r1],#4                      @ prefetch
1673 # else
1674         ldrb    r2,[r1,#3]
1675 # endif
1676         eor     r12,r8,r9                       @ a^b, b^c in next round
1677 #else
1678         ldr     r2,[sp,#14*4]           @ from future BODY_16_xx
1679         eor     r12,r8,r9                       @ a^b, b^c in next round
1680         ldr     r1,[sp,#11*4]   @ from future BODY_16_xx
1681 #endif
1682         eor     r0,r0,r8,ror#20 @ Sigma0(a)
1683         and     r3,r3,r12                       @ (b^c)&=(a^b)
1684         add     r11,r11,r7                      @ d+=h
1685         eor     r3,r3,r9                        @ Maj(a,b,c)
1686         add     r7,r7,r0,ror#2  @ h+=Sigma0(a)
1687         @ add   r7,r7,r3                        @ h+=Maj(a,b,c)
1688         @ ldr   r2,[sp,#14*4]           @ 29
1689         @ ldr   r1,[sp,#11*4]
1690         mov     r0,r2,ror#7
1691         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
1692         mov     r3,r1,ror#17
1693         eor     r0,r0,r2,ror#18
1694         eor     r3,r3,r1,ror#19
1695         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1696         ldr     r2,[sp,#13*4]
1697         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1698         ldr     r1,[sp,#6*4]
1699
1700         add     r3,r3,r0
1701         eor     r0,r11,r11,ror#5        @ from BODY_00_15
1702         add     r2,r2,r3
1703         eor     r0,r0,r11,ror#19        @ Sigma1(e)
1704         add     r2,r2,r1                        @ X[i]
1705         ldr     r3,[r14],#4                     @ *K256++
1706         add     r6,r6,r2                        @ h+=X[i]
1707         str     r2,[sp,#13*4]
1708         eor     r2,r4,r5
1709         add     r6,r6,r0,ror#6  @ h+=Sigma1(e)
1710         and     r2,r2,r11
1711         add     r6,r6,r3                        @ h+=K256[i]
1712         eor     r2,r2,r5                        @ Ch(e,f,g)
1713         eor     r0,r7,r7,ror#11
1714         add     r6,r6,r2                        @ h+=Ch(e,f,g)
1715 #if 29==31
1716         and     r3,r3,#0xff
1717         cmp     r3,#0xf2                        @ done?
1718 #endif
1719 #if 29<15
1720 # if __ARM_ARCH__>=7
1721         ldr     r2,[r1],#4                      @ prefetch
1722 # else
1723         ldrb    r2,[r1,#3]
1724 # endif
1725         eor     r3,r7,r8                        @ a^b, b^c in next round
1726 #else
1727         ldr     r2,[sp,#15*4]           @ from future BODY_16_xx
1728         eor     r3,r7,r8                        @ a^b, b^c in next round
1729         ldr     r1,[sp,#12*4]   @ from future BODY_16_xx
1730 #endif
1731         eor     r0,r0,r7,ror#20 @ Sigma0(a)
1732         and     r12,r12,r3                      @ (b^c)&=(a^b)
1733         add     r10,r10,r6                      @ d+=h
1734         eor     r12,r12,r8                      @ Maj(a,b,c)
1735         add     r6,r6,r0,ror#2  @ h+=Sigma0(a)
1736         @ add   r6,r6,r12                       @ h+=Maj(a,b,c)
1737         @ ldr   r2,[sp,#15*4]           @ 30
1738         @ ldr   r1,[sp,#12*4]
1739         mov     r0,r2,ror#7
1740         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
1741         mov     r12,r1,ror#17
1742         eor     r0,r0,r2,ror#18
1743         eor     r12,r12,r1,ror#19
1744         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1745         ldr     r2,[sp,#14*4]
1746         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1747         ldr     r1,[sp,#7*4]
1748
1749         add     r12,r12,r0
1750         eor     r0,r10,r10,ror#5        @ from BODY_00_15
1751         add     r2,r2,r12
1752         eor     r0,r0,r10,ror#19        @ Sigma1(e)
1753         add     r2,r2,r1                        @ X[i]
1754         ldr     r12,[r14],#4                    @ *K256++
1755         add     r5,r5,r2                        @ h+=X[i]
1756         str     r2,[sp,#14*4]
1757         eor     r2,r11,r4
1758         add     r5,r5,r0,ror#6  @ h+=Sigma1(e)
1759         and     r2,r2,r10
1760         add     r5,r5,r12                       @ h+=K256[i]
1761         eor     r2,r2,r4                        @ Ch(e,f,g)
1762         eor     r0,r6,r6,ror#11
1763         add     r5,r5,r2                        @ h+=Ch(e,f,g)
1764 #if 30==31
1765         and     r12,r12,#0xff
1766         cmp     r12,#0xf2                       @ done?
1767 #endif
1768 #if 30<15
1769 # if __ARM_ARCH__>=7
1770         ldr     r2,[r1],#4                      @ prefetch
1771 # else
1772         ldrb    r2,[r1,#3]
1773 # endif
1774         eor     r12,r6,r7                       @ a^b, b^c in next round
1775 #else
1776         ldr     r2,[sp,#0*4]            @ from future BODY_16_xx
1777         eor     r12,r6,r7                       @ a^b, b^c in next round
1778         ldr     r1,[sp,#13*4]   @ from future BODY_16_xx
1779 #endif
1780         eor     r0,r0,r6,ror#20 @ Sigma0(a)
1781         and     r3,r3,r12                       @ (b^c)&=(a^b)
1782         add     r9,r9,r5                        @ d+=h
1783         eor     r3,r3,r7                        @ Maj(a,b,c)
1784         add     r5,r5,r0,ror#2  @ h+=Sigma0(a)
1785         @ add   r5,r5,r3                        @ h+=Maj(a,b,c)
1786         @ ldr   r2,[sp,#0*4]            @ 31
1787         @ ldr   r1,[sp,#13*4]
1788         mov     r0,r2,ror#7
1789         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
1790         mov     r3,r1,ror#17
1791         eor     r0,r0,r2,ror#18
1792         eor     r3,r3,r1,ror#19
1793         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1794         ldr     r2,[sp,#15*4]
1795         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1796         ldr     r1,[sp,#8*4]
1797
1798         add     r3,r3,r0
1799         eor     r0,r9,r9,ror#5  @ from BODY_00_15
1800         add     r2,r2,r3
1801         eor     r0,r0,r9,ror#19 @ Sigma1(e)
1802         add     r2,r2,r1                        @ X[i]
1803         ldr     r3,[r14],#4                     @ *K256++
1804         add     r4,r4,r2                        @ h+=X[i]
1805         str     r2,[sp,#15*4]
1806         eor     r2,r10,r11
1807         add     r4,r4,r0,ror#6  @ h+=Sigma1(e)
1808         and     r2,r2,r9
1809         add     r4,r4,r3                        @ h+=K256[i]
1810         eor     r2,r2,r11                       @ Ch(e,f,g)
1811         eor     r0,r5,r5,ror#11
1812         add     r4,r4,r2                        @ h+=Ch(e,f,g)
1813 #if 31==31
1814         and     r3,r3,#0xff
1815         cmp     r3,#0xf2                        @ done?
1816 #endif
1817 #if 31<15
1818 # if __ARM_ARCH__>=7
1819         ldr     r2,[r1],#4                      @ prefetch
1820 # else
1821         ldrb    r2,[r1,#3]
1822 # endif
1823         eor     r3,r5,r6                        @ a^b, b^c in next round
1824 #else
1825         ldr     r2,[sp,#1*4]            @ from future BODY_16_xx
1826         eor     r3,r5,r6                        @ a^b, b^c in next round
1827         ldr     r1,[sp,#14*4]   @ from future BODY_16_xx
1828 #endif
1829         eor     r0,r0,r5,ror#20 @ Sigma0(a)
1830         and     r12,r12,r3                      @ (b^c)&=(a^b)
1831         add     r8,r8,r4                        @ d+=h
1832         eor     r12,r12,r6                      @ Maj(a,b,c)
1833         add     r4,r4,r0,ror#2  @ h+=Sigma0(a)
1834         @ add   r4,r4,r12                       @ h+=Maj(a,b,c)
1835 #if __ARM_ARCH__>=7
1836         ite     eq                      @ Thumb2 thing, sanity check in ARM
1837 #endif
1838         ldreq   r3,[sp,#16*4]           @ pull ctx
1839         bne     .Lrounds_16_xx
1840
1841         add     r4,r4,r12               @ h+=Maj(a,b,c) from the past
1842         ldr     r0,[r3,#0]
1843         ldr     r2,[r3,#4]
1844         ldr     r12,[r3,#8]
1845         add     r4,r4,r0
1846         ldr     r0,[r3,#12]
1847         add     r5,r5,r2
1848         ldr     r2,[r3,#16]
1849         add     r6,r6,r12
1850         ldr     r12,[r3,#20]
1851         add     r7,r7,r0
1852         ldr     r0,[r3,#24]
1853         add     r8,r8,r2
1854         ldr     r2,[r3,#28]
1855         add     r9,r9,r12
1856         ldr     r1,[sp,#17*4]           @ pull inp
1857         ldr     r12,[sp,#18*4]          @ pull inp+len
1858         add     r10,r10,r0
1859         add     r11,r11,r2
1860         stmia   r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1861         cmp     r1,r12
1862         sub     r14,r14,#256    @ rewind Ktbl
1863         bne     .Loop
1864
1865         add     sp,sp,#19*4     @ destroy frame
1866 #if __ARM_ARCH__>=5
1867         ldmia   sp!,{r4-r11,pc}
1868 #else
1869         ldmia   sp!,{r4-r11,lr}
1870         tst     lr,#1
1871         moveq   pc,lr                   @ be binary compatible with V4, yet
1872         .word   0xe12fff1e                      @ interoperable with Thumb ISA:-)
1873 #endif
1874 .size   sha256_block_data_order,.-sha256_block_data_order
1875 #if __ARM_MAX_ARCH__>=7
1876 .arch   armv7-a
1877 .fpu    neon
1878
1879 .global sha256_block_data_order_neon
1880 .type   sha256_block_data_order_neon,%function
1881 .align  4
1882 sha256_block_data_order_neon:
1883 .LNEON:
1884         stmdb   sp!,{r4-r12,lr}
1885
1886         sub     r11,sp,#16*4+16
1887         adr     r14,.Lsha256_block_data_order
1888         sub     r14,r14,#.Lsha256_block_data_order-K256
1889         bic     r11,r11,#15             @ align for 128-bit stores
1890         mov     r12,sp
1891         mov     sp,r11                  @ alloca
1892         add     r2,r1,r2,lsl#6  @ len to point at the end of inp
1893
1894         vld1.8          {q0},[r1]!
1895         vld1.8          {q1},[r1]!
1896         vld1.8          {q2},[r1]!
1897         vld1.8          {q3},[r1]!
1898         vld1.32         {q8},[r14,:128]!
1899         vld1.32         {q9},[r14,:128]!
1900         vld1.32         {q10},[r14,:128]!
1901         vld1.32         {q11},[r14,:128]!
1902         vrev32.8        q0,q0           @ yes, even on
1903         str             r0,[sp,#64]
1904         vrev32.8        q1,q1           @ big-endian
1905         str             r1,[sp,#68]
1906         mov             r1,sp
1907         vrev32.8        q2,q2
1908         str             r2,[sp,#72]
1909         vrev32.8        q3,q3
1910         str             r12,[sp,#76]            @ save original sp
1911         vadd.i32        q8,q8,q0
1912         vadd.i32        q9,q9,q1
1913         vst1.32         {q8},[r1,:128]!
1914         vadd.i32        q10,q10,q2
1915         vst1.32         {q9},[r1,:128]!
1916         vadd.i32        q11,q11,q3
1917         vst1.32         {q10},[r1,:128]!
1918         vst1.32         {q11},[r1,:128]!
1919
1920         ldmia           r0,{r4-r11}
1921         sub             r1,r1,#64
1922         ldr             r2,[sp,#0]
1923         eor             r12,r12,r12
1924         eor             r3,r5,r6
1925         b               .L_00_48
1926
1927 .align  4
1928 .L_00_48:
1929         vext.8  q8,q0,q1,#4
1930         add     r11,r11,r2
1931         eor     r2,r9,r10
1932         eor     r0,r8,r8,ror#5
1933         vext.8  q9,q2,q3,#4
1934         add     r4,r4,r12
1935         and     r2,r2,r8
1936         eor     r12,r0,r8,ror#19
1937         vshr.u32        q10,q8,#7
1938         eor     r0,r4,r4,ror#11
1939         eor     r2,r2,r10
1940         vadd.i32        q0,q0,q9
1941         add     r11,r11,r12,ror#6
1942         eor     r12,r4,r5
1943         vshr.u32        q9,q8,#3
1944         eor     r0,r0,r4,ror#20
1945         add     r11,r11,r2
1946         vsli.32 q10,q8,#25
1947         ldr     r2,[sp,#4]
1948         and     r3,r3,r12
1949         vshr.u32        q11,q8,#18
1950         add     r7,r7,r11
1951         add     r11,r11,r0,ror#2
1952         eor     r3,r3,r5
1953         veor    q9,q9,q10
1954         add     r10,r10,r2
1955         vsli.32 q11,q8,#14
1956         eor     r2,r8,r9
1957         eor     r0,r7,r7,ror#5
1958         vshr.u32        d24,d7,#17
1959         add     r11,r11,r3
1960         and     r2,r2,r7
1961         veor    q9,q9,q11
1962         eor     r3,r0,r7,ror#19
1963         eor     r0,r11,r11,ror#11
1964         vsli.32 d24,d7,#15
1965         eor     r2,r2,r9
1966         add     r10,r10,r3,ror#6
1967         vshr.u32        d25,d7,#10
1968         eor     r3,r11,r4
1969         eor     r0,r0,r11,ror#20
1970         vadd.i32        q0,q0,q9
1971         add     r10,r10,r2
1972         ldr     r2,[sp,#8]
1973         veor    d25,d25,d24
1974         and     r12,r12,r3
1975         add     r6,r6,r10
1976         vshr.u32        d24,d7,#19
1977         add     r10,r10,r0,ror#2
1978         eor     r12,r12,r4
1979         vsli.32 d24,d7,#13
1980         add     r9,r9,r2
1981         eor     r2,r7,r8
1982         veor    d25,d25,d24
1983         eor     r0,r6,r6,ror#5
1984         add     r10,r10,r12
1985         vadd.i32        d0,d0,d25
1986         and     r2,r2,r6
1987         eor     r12,r0,r6,ror#19
1988         vshr.u32        d24,d0,#17
1989         eor     r0,r10,r10,ror#11
1990         eor     r2,r2,r8
1991         vsli.32 d24,d0,#15
1992         add     r9,r9,r12,ror#6
1993         eor     r12,r10,r11
1994         vshr.u32        d25,d0,#10
1995         eor     r0,r0,r10,ror#20
1996         add     r9,r9,r2
1997         veor    d25,d25,d24
1998         ldr     r2,[sp,#12]
1999         and     r3,r3,r12
2000         vshr.u32        d24,d0,#19
2001         add     r5,r5,r9
2002         add     r9,r9,r0,ror#2
2003         eor     r3,r3,r11
2004         vld1.32 {q8},[r14,:128]!
2005         add     r8,r8,r2
2006         vsli.32 d24,d0,#13
2007         eor     r2,r6,r7
2008         eor     r0,r5,r5,ror#5
2009         veor    d25,d25,d24
2010         add     r9,r9,r3
2011         and     r2,r2,r5
2012         vadd.i32        d1,d1,d25
2013         eor     r3,r0,r5,ror#19
2014         eor     r0,r9,r9,ror#11
2015         vadd.i32        q8,q8,q0
2016         eor     r2,r2,r7
2017         add     r8,r8,r3,ror#6
2018         eor     r3,r9,r10
2019         eor     r0,r0,r9,ror#20
2020         add     r8,r8,r2
2021         ldr     r2,[sp,#16]
2022         and     r12,r12,r3
2023         add     r4,r4,r8
2024         vst1.32 {q8},[r1,:128]!
2025         add     r8,r8,r0,ror#2
2026         eor     r12,r12,r10
2027         vext.8  q8,q1,q2,#4
2028         add     r7,r7,r2
2029         eor     r2,r5,r6
2030         eor     r0,r4,r4,ror#5
2031         vext.8  q9,q3,q0,#4
2032         add     r8,r8,r12
2033         and     r2,r2,r4
2034         eor     r12,r0,r4,ror#19
2035         vshr.u32        q10,q8,#7
2036         eor     r0,r8,r8,ror#11
2037         eor     r2,r2,r6
2038         vadd.i32        q1,q1,q9
2039         add     r7,r7,r12,ror#6
2040         eor     r12,r8,r9
2041         vshr.u32        q9,q8,#3
2042         eor     r0,r0,r8,ror#20
2043         add     r7,r7,r2
2044         vsli.32 q10,q8,#25
2045         ldr     r2,[sp,#20]
2046         and     r3,r3,r12
2047         vshr.u32        q11,q8,#18
2048         add     r11,r11,r7
2049         add     r7,r7,r0,ror#2
2050         eor     r3,r3,r9
2051         veor    q9,q9,q10
2052         add     r6,r6,r2
2053         vsli.32 q11,q8,#14
2054         eor     r2,r4,r5
2055         eor     r0,r11,r11,ror#5
2056         vshr.u32        d24,d1,#17
2057         add     r7,r7,r3
2058         and     r2,r2,r11
2059         veor    q9,q9,q11
2060         eor     r3,r0,r11,ror#19
2061         eor     r0,r7,r7,ror#11
2062         vsli.32 d24,d1,#15
2063         eor     r2,r2,r5
2064         add     r6,r6,r3,ror#6
2065         vshr.u32        d25,d1,#10
2066         eor     r3,r7,r8
2067         eor     r0,r0,r7,ror#20
2068         vadd.i32        q1,q1,q9
2069         add     r6,r6,r2
2070         ldr     r2,[sp,#24]
2071         veor    d25,d25,d24
2072         and     r12,r12,r3
2073         add     r10,r10,r6
2074         vshr.u32        d24,d1,#19
2075         add     r6,r6,r0,ror#2
2076         eor     r12,r12,r8
2077         vsli.32 d24,d1,#13
2078         add     r5,r5,r2
2079         eor     r2,r11,r4
2080         veor    d25,d25,d24
2081         eor     r0,r10,r10,ror#5
2082         add     r6,r6,r12
2083         vadd.i32        d2,d2,d25
2084         and     r2,r2,r10
2085         eor     r12,r0,r10,ror#19
2086         vshr.u32        d24,d2,#17
2087         eor     r0,r6,r6,ror#11
2088         eor     r2,r2,r4
2089         vsli.32 d24,d2,#15
2090         add     r5,r5,r12,ror#6
2091         eor     r12,r6,r7
2092         vshr.u32        d25,d2,#10
2093         eor     r0,r0,r6,ror#20
2094         add     r5,r5,r2
2095         veor    d25,d25,d24
2096         ldr     r2,[sp,#28]
2097         and     r3,r3,r12
2098         vshr.u32        d24,d2,#19
2099         add     r9,r9,r5
2100         add     r5,r5,r0,ror#2
2101         eor     r3,r3,r7
2102         vld1.32 {q8},[r14,:128]!
2103         add     r4,r4,r2
2104         vsli.32 d24,d2,#13
2105         eor     r2,r10,r11
2106         eor     r0,r9,r9,ror#5
2107         veor    d25,d25,d24
2108         add     r5,r5,r3
2109         and     r2,r2,r9
2110         vadd.i32        d3,d3,d25
2111         eor     r3,r0,r9,ror#19
2112         eor     r0,r5,r5,ror#11
2113         vadd.i32        q8,q8,q1
2114         eor     r2,r2,r11
2115         add     r4,r4,r3,ror#6
2116         eor     r3,r5,r6
2117         eor     r0,r0,r5,ror#20
2118         add     r4,r4,r2
2119         ldr     r2,[sp,#32]
2120         and     r12,r12,r3
2121         add     r8,r8,r4
2122         vst1.32 {q8},[r1,:128]!
2123         add     r4,r4,r0,ror#2
2124         eor     r12,r12,r6
2125         vext.8  q8,q2,q3,#4
2126         add     r11,r11,r2
2127         eor     r2,r9,r10
2128         eor     r0,r8,r8,ror#5
2129         vext.8  q9,q0,q1,#4
2130         add     r4,r4,r12
2131         and     r2,r2,r8
2132         eor     r12,r0,r8,ror#19
2133         vshr.u32        q10,q8,#7
2134         eor     r0,r4,r4,ror#11
2135         eor     r2,r2,r10
2136         vadd.i32        q2,q2,q9
2137         add     r11,r11,r12,ror#6
2138         eor     r12,r4,r5
2139         vshr.u32        q9,q8,#3
2140         eor     r0,r0,r4,ror#20
2141         add     r11,r11,r2
2142         vsli.32 q10,q8,#25
2143         ldr     r2,[sp,#36]
2144         and     r3,r3,r12
2145         vshr.u32        q11,q8,#18
2146         add     r7,r7,r11
2147         add     r11,r11,r0,ror#2
2148         eor     r3,r3,r5
2149         veor    q9,q9,q10
2150         add     r10,r10,r2
2151         vsli.32 q11,q8,#14
2152         eor     r2,r8,r9
2153         eor     r0,r7,r7,ror#5
2154         vshr.u32        d24,d3,#17
2155         add     r11,r11,r3
2156         and     r2,r2,r7
2157         veor    q9,q9,q11
2158         eor     r3,r0,r7,ror#19
2159         eor     r0,r11,r11,ror#11
2160         vsli.32 d24,d3,#15
2161         eor     r2,r2,r9
2162         add     r10,r10,r3,ror#6
2163         vshr.u32        d25,d3,#10
2164         eor     r3,r11,r4
2165         eor     r0,r0,r11,ror#20
2166         vadd.i32        q2,q2,q9
2167         add     r10,r10,r2
2168         ldr     r2,[sp,#40]
2169         veor    d25,d25,d24
2170         and     r12,r12,r3
2171         add     r6,r6,r10
2172         vshr.u32        d24,d3,#19
2173         add     r10,r10,r0,ror#2
2174         eor     r12,r12,r4
2175         vsli.32 d24,d3,#13
2176         add     r9,r9,r2
2177         eor     r2,r7,r8
2178         veor    d25,d25,d24
2179         eor     r0,r6,r6,ror#5
2180         add     r10,r10,r12
2181         vadd.i32        d4,d4,d25
2182         and     r2,r2,r6
2183         eor     r12,r0,r6,ror#19
2184         vshr.u32        d24,d4,#17
2185         eor     r0,r10,r10,ror#11
2186         eor     r2,r2,r8
2187         vsli.32 d24,d4,#15
2188         add     r9,r9,r12,ror#6
2189         eor     r12,r10,r11
2190         vshr.u32        d25,d4,#10
2191         eor     r0,r0,r10,ror#20
2192         add     r9,r9,r2
2193         veor    d25,d25,d24
2194         ldr     r2,[sp,#44]
2195         and     r3,r3,r12
2196         vshr.u32        d24,d4,#19
2197         add     r5,r5,r9
2198         add     r9,r9,r0,ror#2
2199         eor     r3,r3,r11
2200         vld1.32 {q8},[r14,:128]!
2201         add     r8,r8,r2
2202         vsli.32 d24,d4,#13
2203         eor     r2,r6,r7
2204         eor     r0,r5,r5,ror#5
2205         veor    d25,d25,d24
2206         add     r9,r9,r3
2207         and     r2,r2,r5
2208         vadd.i32        d5,d5,d25
2209         eor     r3,r0,r5,ror#19
2210         eor     r0,r9,r9,ror#11
2211         vadd.i32        q8,q8,q2
2212         eor     r2,r2,r7
2213         add     r8,r8,r3,ror#6
2214         eor     r3,r9,r10
2215         eor     r0,r0,r9,ror#20
2216         add     r8,r8,r2
2217         ldr     r2,[sp,#48]
2218         and     r12,r12,r3
2219         add     r4,r4,r8
2220         vst1.32 {q8},[r1,:128]!
2221         add     r8,r8,r0,ror#2
2222         eor     r12,r12,r10
2223         vext.8  q8,q3,q0,#4
2224         add     r7,r7,r2
2225         eor     r2,r5,r6
2226         eor     r0,r4,r4,ror#5
2227         vext.8  q9,q1,q2,#4
2228         add     r8,r8,r12
2229         and     r2,r2,r4
2230         eor     r12,r0,r4,ror#19
2231         vshr.u32        q10,q8,#7
2232         eor     r0,r8,r8,ror#11
2233         eor     r2,r2,r6
2234         vadd.i32        q3,q3,q9
2235         add     r7,r7,r12,ror#6
2236         eor     r12,r8,r9
2237         vshr.u32        q9,q8,#3
2238         eor     r0,r0,r8,ror#20
2239         add     r7,r7,r2
2240         vsli.32 q10,q8,#25
2241         ldr     r2,[sp,#52]
2242         and     r3,r3,r12
2243         vshr.u32        q11,q8,#18
2244         add     r11,r11,r7
2245         add     r7,r7,r0,ror#2
2246         eor     r3,r3,r9
2247         veor    q9,q9,q10
2248         add     r6,r6,r2
2249         vsli.32 q11,q8,#14
2250         eor     r2,r4,r5
2251         eor     r0,r11,r11,ror#5
2252         vshr.u32        d24,d5,#17
2253         add     r7,r7,r3
2254         and     r2,r2,r11
2255         veor    q9,q9,q11
2256         eor     r3,r0,r11,ror#19
2257         eor     r0,r7,r7,ror#11
2258         vsli.32 d24,d5,#15
2259         eor     r2,r2,r5
2260         add     r6,r6,r3,ror#6
2261         vshr.u32        d25,d5,#10
2262         eor     r3,r7,r8
2263         eor     r0,r0,r7,ror#20
2264         vadd.i32        q3,q3,q9
2265         add     r6,r6,r2
2266         ldr     r2,[sp,#56]
2267         veor    d25,d25,d24
2268         and     r12,r12,r3
2269         add     r10,r10,r6
2270         vshr.u32        d24,d5,#19
2271         add     r6,r6,r0,ror#2
2272         eor     r12,r12,r8
2273         vsli.32 d24,d5,#13
2274         add     r5,r5,r2
2275         eor     r2,r11,r4
2276         veor    d25,d25,d24
2277         eor     r0,r10,r10,ror#5
2278         add     r6,r6,r12
2279         vadd.i32        d6,d6,d25
2280         and     r2,r2,r10
2281         eor     r12,r0,r10,ror#19
2282         vshr.u32        d24,d6,#17
2283         eor     r0,r6,r6,ror#11
2284         eor     r2,r2,r4
2285         vsli.32 d24,d6,#15
2286         add     r5,r5,r12,ror#6
2287         eor     r12,r6,r7
2288         vshr.u32        d25,d6,#10
2289         eor     r0,r0,r6,ror#20
2290         add     r5,r5,r2
2291         veor    d25,d25,d24
2292         ldr     r2,[sp,#60]
2293         and     r3,r3,r12
2294         vshr.u32        d24,d6,#19
2295         add     r9,r9,r5
2296         add     r5,r5,r0,ror#2
2297         eor     r3,r3,r7
2298         vld1.32 {q8},[r14,:128]!
2299         add     r4,r4,r2
2300         vsli.32 d24,d6,#13
2301         eor     r2,r10,r11
2302         eor     r0,r9,r9,ror#5
2303         veor    d25,d25,d24
2304         add     r5,r5,r3
2305         and     r2,r2,r9
2306         vadd.i32        d7,d7,d25
2307         eor     r3,r0,r9,ror#19
2308         eor     r0,r5,r5,ror#11
2309         vadd.i32        q8,q8,q3
2310         eor     r2,r2,r11
2311         add     r4,r4,r3,ror#6
2312         eor     r3,r5,r6
2313         eor     r0,r0,r5,ror#20
2314         add     r4,r4,r2
2315         ldr     r2,[r14]
2316         and     r12,r12,r3
2317         add     r8,r8,r4
2318         vst1.32 {q8},[r1,:128]!
2319         add     r4,r4,r0,ror#2
2320         eor     r12,r12,r6
2321         teq     r2,#0                           @ check for K256 terminator
2322         ldr     r2,[sp,#0]
2323         sub     r1,r1,#64
2324         bne     .L_00_48
2325
2326         ldr             r1,[sp,#68]
2327         ldr             r0,[sp,#72]
2328         sub             r14,r14,#256    @ rewind r14
2329         teq             r1,r0
2330         it              eq
2331         subeq           r1,r1,#64               @ avoid SEGV
2332         vld1.8          {q0},[r1]!              @ load next input block
2333         vld1.8          {q1},[r1]!
2334         vld1.8          {q2},[r1]!
2335         vld1.8          {q3},[r1]!
2336         it              ne
2337         strne           r1,[sp,#68]
2338         mov             r1,sp
2339         add     r11,r11,r2
2340         eor     r2,r9,r10
2341         eor     r0,r8,r8,ror#5
2342         add     r4,r4,r12
2343         vld1.32 {q8},[r14,:128]!
2344         and     r2,r2,r8
2345         eor     r12,r0,r8,ror#19
2346         eor     r0,r4,r4,ror#11
2347         eor     r2,r2,r10
2348         vrev32.8        q0,q0
2349         add     r11,r11,r12,ror#6
2350         eor     r12,r4,r5
2351         eor     r0,r0,r4,ror#20
2352         add     r11,r11,r2
2353         vadd.i32        q8,q8,q0
2354         ldr     r2,[sp,#4]
2355         and     r3,r3,r12
2356         add     r7,r7,r11
2357         add     r11,r11,r0,ror#2
2358         eor     r3,r3,r5
2359         add     r10,r10,r2
2360         eor     r2,r8,r9
2361         eor     r0,r7,r7,ror#5
2362         add     r11,r11,r3
2363         and     r2,r2,r7
2364         eor     r3,r0,r7,ror#19
2365         eor     r0,r11,r11,ror#11
2366         eor     r2,r2,r9
2367         add     r10,r10,r3,ror#6
2368         eor     r3,r11,r4
2369         eor     r0,r0,r11,ror#20
2370         add     r10,r10,r2
2371         ldr     r2,[sp,#8]
2372         and     r12,r12,r3
2373         add     r6,r6,r10
2374         add     r10,r10,r0,ror#2
2375         eor     r12,r12,r4
2376         add     r9,r9,r2
2377         eor     r2,r7,r8
2378         eor     r0,r6,r6,ror#5
2379         add     r10,r10,r12
2380         and     r2,r2,r6
2381         eor     r12,r0,r6,ror#19
2382         eor     r0,r10,r10,ror#11
2383         eor     r2,r2,r8
2384         add     r9,r9,r12,ror#6
2385         eor     r12,r10,r11
2386         eor     r0,r0,r10,ror#20
2387         add     r9,r9,r2
2388         ldr     r2,[sp,#12]
2389         and     r3,r3,r12
2390         add     r5,r5,r9
2391         add     r9,r9,r0,ror#2
2392         eor     r3,r3,r11
2393         add     r8,r8,r2
2394         eor     r2,r6,r7
2395         eor     r0,r5,r5,ror#5
2396         add     r9,r9,r3
2397         and     r2,r2,r5
2398         eor     r3,r0,r5,ror#19
2399         eor     r0,r9,r9,ror#11
2400         eor     r2,r2,r7
2401         add     r8,r8,r3,ror#6
2402         eor     r3,r9,r10
2403         eor     r0,r0,r9,ror#20
2404         add     r8,r8,r2
2405         ldr     r2,[sp,#16]
2406         and     r12,r12,r3
2407         add     r4,r4,r8
2408         add     r8,r8,r0,ror#2
2409         eor     r12,r12,r10
2410         vst1.32 {q8},[r1,:128]!
2411         add     r7,r7,r2
2412         eor     r2,r5,r6
2413         eor     r0,r4,r4,ror#5
2414         add     r8,r8,r12
2415         vld1.32 {q8},[r14,:128]!
2416         and     r2,r2,r4
2417         eor     r12,r0,r4,ror#19
2418         eor     r0,r8,r8,ror#11
2419         eor     r2,r2,r6
2420         vrev32.8        q1,q1
2421         add     r7,r7,r12,ror#6
2422         eor     r12,r8,r9
2423         eor     r0,r0,r8,ror#20
2424         add     r7,r7,r2
2425         vadd.i32        q8,q8,q1
2426         ldr     r2,[sp,#20]
2427         and     r3,r3,r12
2428         add     r11,r11,r7
2429         add     r7,r7,r0,ror#2
2430         eor     r3,r3,r9
2431         add     r6,r6,r2
2432         eor     r2,r4,r5
2433         eor     r0,r11,r11,ror#5
2434         add     r7,r7,r3
2435         and     r2,r2,r11
2436         eor     r3,r0,r11,ror#19
2437         eor     r0,r7,r7,ror#11
2438         eor     r2,r2,r5
2439         add     r6,r6,r3,ror#6
2440         eor     r3,r7,r8
2441         eor     r0,r0,r7,ror#20
2442         add     r6,r6,r2
2443         ldr     r2,[sp,#24]
2444         and     r12,r12,r3
2445         add     r10,r10,r6
2446         add     r6,r6,r0,ror#2
2447         eor     r12,r12,r8
2448         add     r5,r5,r2
2449         eor     r2,r11,r4
2450         eor     r0,r10,r10,ror#5
2451         add     r6,r6,r12
2452         and     r2,r2,r10
2453         eor     r12,r0,r10,ror#19
2454         eor     r0,r6,r6,ror#11
2455         eor     r2,r2,r4
2456         add     r5,r5,r12,ror#6
2457         eor     r12,r6,r7
2458         eor     r0,r0,r6,ror#20
2459         add     r5,r5,r2
2460         ldr     r2,[sp,#28]
2461         and     r3,r3,r12
2462         add     r9,r9,r5
2463         add     r5,r5,r0,ror#2
2464         eor     r3,r3,r7
2465         add     r4,r4,r2
2466         eor     r2,r10,r11
2467         eor     r0,r9,r9,ror#5
2468         add     r5,r5,r3
2469         and     r2,r2,r9
2470         eor     r3,r0,r9,ror#19
2471         eor     r0,r5,r5,ror#11
2472         eor     r2,r2,r11
2473         add     r4,r4,r3,ror#6
2474         eor     r3,r5,r6
2475         eor     r0,r0,r5,ror#20
2476         add     r4,r4,r2
2477         ldr     r2,[sp,#32]
2478         and     r12,r12,r3
2479         add     r8,r8,r4
2480         add     r4,r4,r0,ror#2
2481         eor     r12,r12,r6
2482         vst1.32 {q8},[r1,:128]!
2483         add     r11,r11,r2
2484         eor     r2,r9,r10
2485         eor     r0,r8,r8,ror#5
2486         add     r4,r4,r12
2487         vld1.32 {q8},[r14,:128]!
2488         and     r2,r2,r8
2489         eor     r12,r0,r8,ror#19
2490         eor     r0,r4,r4,ror#11
2491         eor     r2,r2,r10
2492         vrev32.8        q2,q2
2493         add     r11,r11,r12,ror#6
2494         eor     r12,r4,r5
2495         eor     r0,r0,r4,ror#20
2496         add     r11,r11,r2
2497         vadd.i32        q8,q8,q2
2498         ldr     r2,[sp,#36]
2499         and     r3,r3,r12
2500         add     r7,r7,r11
2501         add     r11,r11,r0,ror#2
2502         eor     r3,r3,r5
2503         add     r10,r10,r2
2504         eor     r2,r8,r9
2505         eor     r0,r7,r7,ror#5
2506         add     r11,r11,r3
2507         and     r2,r2,r7
2508         eor     r3,r0,r7,ror#19
2509         eor     r0,r11,r11,ror#11
2510         eor     r2,r2,r9
2511         add     r10,r10,r3,ror#6
2512         eor     r3,r11,r4
2513         eor     r0,r0,r11,ror#20
2514         add     r10,r10,r2
2515         ldr     r2,[sp,#40]
2516         and     r12,r12,r3
2517         add     r6,r6,r10
2518         add     r10,r10,r0,ror#2
2519         eor     r12,r12,r4
2520         add     r9,r9,r2
2521         eor     r2,r7,r8
2522         eor     r0,r6,r6,ror#5
2523         add     r10,r10,r12
2524         and     r2,r2,r6
2525         eor     r12,r0,r6,ror#19
2526         eor     r0,r10,r10,ror#11
2527         eor     r2,r2,r8
2528         add     r9,r9,r12,ror#6
2529         eor     r12,r10,r11
2530         eor     r0,r0,r10,ror#20
2531         add     r9,r9,r2
2532         ldr     r2,[sp,#44]
2533         and     r3,r3,r12
2534         add     r5,r5,r9
2535         add     r9,r9,r0,ror#2
2536         eor     r3,r3,r11
2537         add     r8,r8,r2
2538         eor     r2,r6,r7
2539         eor     r0,r5,r5,ror#5
2540         add     r9,r9,r3
2541         and     r2,r2,r5
2542         eor     r3,r0,r5,ror#19
2543         eor     r0,r9,r9,ror#11
2544         eor     r2,r2,r7
2545         add     r8,r8,r3,ror#6
2546         eor     r3,r9,r10
2547         eor     r0,r0,r9,ror#20
2548         add     r8,r8,r2
2549         ldr     r2,[sp,#48]
2550         and     r12,r12,r3
2551         add     r4,r4,r8
2552         add     r8,r8,r0,ror#2
2553         eor     r12,r12,r10
2554         vst1.32 {q8},[r1,:128]!
2555         add     r7,r7,r2
2556         eor     r2,r5,r6
2557         eor     r0,r4,r4,ror#5
2558         add     r8,r8,r12
2559         vld1.32 {q8},[r14,:128]!
2560         and     r2,r2,r4
2561         eor     r12,r0,r4,ror#19
2562         eor     r0,r8,r8,ror#11
2563         eor     r2,r2,r6
2564         vrev32.8        q3,q3
2565         add     r7,r7,r12,ror#6
2566         eor     r12,r8,r9
2567         eor     r0,r0,r8,ror#20
2568         add     r7,r7,r2
2569         vadd.i32        q8,q8,q3
2570         ldr     r2,[sp,#52]
2571         and     r3,r3,r12
2572         add     r11,r11,r7
2573         add     r7,r7,r0,ror#2
2574         eor     r3,r3,r9
2575         add     r6,r6,r2
2576         eor     r2,r4,r5
2577         eor     r0,r11,r11,ror#5
2578         add     r7,r7,r3
2579         and     r2,r2,r11
2580         eor     r3,r0,r11,ror#19
2581         eor     r0,r7,r7,ror#11
2582         eor     r2,r2,r5
2583         add     r6,r6,r3,ror#6
2584         eor     r3,r7,r8
2585         eor     r0,r0,r7,ror#20
2586         add     r6,r6,r2
2587         ldr     r2,[sp,#56]
2588         and     r12,r12,r3
2589         add     r10,r10,r6
2590         add     r6,r6,r0,ror#2
2591         eor     r12,r12,r8
2592         add     r5,r5,r2
2593         eor     r2,r11,r4
2594         eor     r0,r10,r10,ror#5
2595         add     r6,r6,r12
2596         and     r2,r2,r10
2597         eor     r12,r0,r10,ror#19
2598         eor     r0,r6,r6,ror#11
2599         eor     r2,r2,r4
2600         add     r5,r5,r12,ror#6
2601         eor     r12,r6,r7
2602         eor     r0,r0,r6,ror#20
2603         add     r5,r5,r2
2604         ldr     r2,[sp,#60]
2605         and     r3,r3,r12
2606         add     r9,r9,r5
2607         add     r5,r5,r0,ror#2
2608         eor     r3,r3,r7
2609         add     r4,r4,r2
2610         eor     r2,r10,r11
2611         eor     r0,r9,r9,ror#5
2612         add     r5,r5,r3
2613         and     r2,r2,r9
2614         eor     r3,r0,r9,ror#19
2615         eor     r0,r5,r5,ror#11
2616         eor     r2,r2,r11
2617         add     r4,r4,r3,ror#6
2618         eor     r3,r5,r6
2619         eor     r0,r0,r5,ror#20
2620         add     r4,r4,r2
2621         ldr     r2,[sp,#64]
2622         and     r12,r12,r3
2623         add     r8,r8,r4
2624         add     r4,r4,r0,ror#2
2625         eor     r12,r12,r6
2626         vst1.32 {q8},[r1,:128]!
2627         ldr     r0,[r2,#0]
2628         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
2629         ldr     r12,[r2,#4]
2630         ldr     r3,[r2,#8]
2631         ldr     r1,[r2,#12]
2632         add     r4,r4,r0                        @ accumulate
2633         ldr     r0,[r2,#16]
2634         add     r5,r5,r12
2635         ldr     r12,[r2,#20]
2636         add     r6,r6,r3
2637         ldr     r3,[r2,#24]
2638         add     r7,r7,r1
2639         ldr     r1,[r2,#28]
2640         add     r8,r8,r0
2641         str     r4,[r2],#4
2642         add     r9,r9,r12
2643         str     r5,[r2],#4
2644         add     r10,r10,r3
2645         str     r6,[r2],#4
2646         add     r11,r11,r1
2647         str     r7,[r2],#4
2648         stmia   r2,{r8-r11}
2649
2650         ittte   ne
2651         movne   r1,sp
2652         ldrne   r2,[sp,#0]
2653         eorne   r12,r12,r12
2654         ldreq   sp,[sp,#76]                     @ restore original sp
2655         itt     ne
2656         eorne   r3,r5,r6
2657         bne     .L_00_48
2658
2659         ldmia   sp!,{r4-r12,pc}
2660 .size   sha256_block_data_order_neon,.-sha256_block_data_order_neon
2661 #endif
2662 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2663
2664 # ifdef __thumb2__
2665 #  define INST(a,b,c,d) .byte   c,d|0xc,a,b
2666 # else
2667 #  define INST(a,b,c,d) .byte   a,b,c,d
2668 # endif
2669
2670 .type   sha256_block_data_order_armv8,%function
2671 .align  5
2672 sha256_block_data_order_armv8:
2673 .LARMv8:
2674         vld1.32 {q0,q1},[r0]
2675 # ifdef __thumb2__
2676         adr     r3,.LARMv8
2677         sub     r3,r3,#.LARMv8-K256
2678 # else
2679         adrl    r3,K256
2680 # endif
2681         add     r2,r1,r2,lsl#6  @ len to point at the end of inp
2682
2683 .Loop_v8:
2684         vld1.8          {q8-q9},[r1]!
2685         vld1.8          {q10-q11},[r1]!
2686         vld1.32         {q12},[r3]!
2687         vrev32.8        q8,q8
2688         vrev32.8        q9,q9
2689         vrev32.8        q10,q10
2690         vrev32.8        q11,q11
2691         vmov            q14,q0  @ offload
2692         vmov            q15,q1
2693         teq             r1,r2
2694         vld1.32         {q13},[r3]!
2695         vadd.i32        q12,q12,q8
2696         INST(0xe2,0x03,0xfa,0xf3)       @ sha256su0 q8,q9
2697         vmov            q2,q0
2698         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2699         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2700         INST(0xe6,0x0c,0x64,0xf3)       @ sha256su1 q8,q10,q11
2701         vld1.32         {q12},[r3]!
2702         vadd.i32        q13,q13,q9
2703         INST(0xe4,0x23,0xfa,0xf3)       @ sha256su0 q9,q10
2704         vmov            q2,q0
2705         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2706         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2707         INST(0xe0,0x2c,0x66,0xf3)       @ sha256su1 q9,q11,q8
2708         vld1.32         {q13},[r3]!
2709         vadd.i32        q12,q12,q10
2710         INST(0xe6,0x43,0xfa,0xf3)       @ sha256su0 q10,q11
2711         vmov            q2,q0
2712         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2713         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2714         INST(0xe2,0x4c,0x60,0xf3)       @ sha256su1 q10,q8,q9
2715         vld1.32         {q12},[r3]!
2716         vadd.i32        q13,q13,q11
2717         INST(0xe0,0x63,0xfa,0xf3)       @ sha256su0 q11,q8
2718         vmov            q2,q0
2719         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2720         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2721         INST(0xe4,0x6c,0x62,0xf3)       @ sha256su1 q11,q9,q10
2722         vld1.32         {q13},[r3]!
2723         vadd.i32        q12,q12,q8
2724         INST(0xe2,0x03,0xfa,0xf3)       @ sha256su0 q8,q9
2725         vmov            q2,q0
2726         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2727         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2728         INST(0xe6,0x0c,0x64,0xf3)       @ sha256su1 q8,q10,q11
2729         vld1.32         {q12},[r3]!
2730         vadd.i32        q13,q13,q9
2731         INST(0xe4,0x23,0xfa,0xf3)       @ sha256su0 q9,q10
2732         vmov            q2,q0
2733         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2734         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2735         INST(0xe0,0x2c,0x66,0xf3)       @ sha256su1 q9,q11,q8
2736         vld1.32         {q13},[r3]!
2737         vadd.i32        q12,q12,q10
2738         INST(0xe6,0x43,0xfa,0xf3)       @ sha256su0 q10,q11
2739         vmov            q2,q0
2740         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2741         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2742         INST(0xe2,0x4c,0x60,0xf3)       @ sha256su1 q10,q8,q9
2743         vld1.32         {q12},[r3]!
2744         vadd.i32        q13,q13,q11
2745         INST(0xe0,0x63,0xfa,0xf3)       @ sha256su0 q11,q8
2746         vmov            q2,q0
2747         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2748         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2749         INST(0xe4,0x6c,0x62,0xf3)       @ sha256su1 q11,q9,q10
2750         vld1.32         {q13},[r3]!
2751         vadd.i32        q12,q12,q8
2752         INST(0xe2,0x03,0xfa,0xf3)       @ sha256su0 q8,q9
2753         vmov            q2,q0
2754         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2755         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2756         INST(0xe6,0x0c,0x64,0xf3)       @ sha256su1 q8,q10,q11
2757         vld1.32         {q12},[r3]!
2758         vadd.i32        q13,q13,q9
2759         INST(0xe4,0x23,0xfa,0xf3)       @ sha256su0 q9,q10
2760         vmov            q2,q0
2761         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2762         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2763         INST(0xe0,0x2c,0x66,0xf3)       @ sha256su1 q9,q11,q8
2764         vld1.32         {q13},[r3]!
2765         vadd.i32        q12,q12,q10
2766         INST(0xe6,0x43,0xfa,0xf3)       @ sha256su0 q10,q11
2767         vmov            q2,q0
2768         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2769         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2770         INST(0xe2,0x4c,0x60,0xf3)       @ sha256su1 q10,q8,q9
2771         vld1.32         {q12},[r3]!
2772         vadd.i32        q13,q13,q11
2773         INST(0xe0,0x63,0xfa,0xf3)       @ sha256su0 q11,q8
2774         vmov            q2,q0
2775         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2776         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2777         INST(0xe4,0x6c,0x62,0xf3)       @ sha256su1 q11,q9,q10
2778         vld1.32         {q13},[r3]!
2779         vadd.i32        q12,q12,q8
2780         vmov            q2,q0
2781         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2782         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2783
2784         vld1.32         {q12},[r3]!
2785         vadd.i32        q13,q13,q9
2786         vmov            q2,q0
2787         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2788         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2789
2790         vld1.32         {q13},[r3]
2791         vadd.i32        q12,q12,q10
2792         sub             r3,r3,#256-16   @ rewind
2793         vmov            q2,q0
2794         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2795         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2796
2797         vadd.i32        q13,q13,q11
2798         vmov            q2,q0
2799         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2800         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2801
2802         vadd.i32        q0,q0,q14
2803         vadd.i32        q1,q1,q15
2804         it              ne
2805         bne             .Loop_v8
2806
2807         vst1.32         {q0,q1},[r0]
2808
2809         bx      lr              @ bx lr
2810 .size   sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2811 #endif
2812 .asciz  "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>"
2813 .align  2
2814 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2815 .comm   OPENSSL_armcap_P,4,4
2816 #endif