Merge drm/drm-next into drm-intel-next-queued
[linux-2.6-microblaze.git] / arch / arm / lib / csumpartialcopygeneric.S
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  *  linux/arch/arm/lib/csumpartialcopygeneric.S
4  *
5  *  Copyright (C) 1995-2001 Russell King
6  */
7 #include <asm/assembler.h>
8
9 /*
10  * unsigned int
11  * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
12  *  r0 = src, r1 = dst, r2 = len, r3 = sum
13  *  Returns : r0 = checksum
14  *
15  * Note that 'tst' and 'teq' preserve the carry flag.
16  */
17
18 src     .req    r0
19 dst     .req    r1
20 len     .req    r2
21 sum     .req    r3
22
23 .Lzero:         mov     r0, sum
24                 load_regs
25
26                 /*
27                  * Align an unaligned destination pointer.  We know that
28                  * we have >= 8 bytes here, so we don't need to check
29                  * the length.  Note that the source pointer hasn't been
30                  * aligned yet.
31                  */
32 .Ldst_unaligned:
33                 tst     dst, #1
34                 beq     .Ldst_16bit
35
36                 load1b  ip
37                 sub     len, len, #1
38                 adcs    sum, sum, ip, put_byte_1        @ update checksum
39                 strb    ip, [dst], #1
40                 tst     dst, #2
41                 reteq   lr                      @ dst is now 32bit aligned
42
43 .Ldst_16bit:    load2b  r8, ip
44                 sub     len, len, #2
45                 adcs    sum, sum, r8, put_byte_0
46                 strb    r8, [dst], #1
47                 adcs    sum, sum, ip, put_byte_1
48                 strb    ip, [dst], #1
49                 ret     lr                      @ dst is now 32bit aligned
50
51                 /*
52                  * Handle 0 to 7 bytes, with any alignment of source and
53                  * destination pointers.  Note that when we get here, C = 0
54                  */
55 .Lless8:        teq     len, #0                 @ check for zero count
56                 beq     .Lzero
57
58                 /* we must have at least one byte. */
59                 tst     dst, #1                 @ dst 16-bit aligned
60                 beq     .Lless8_aligned
61
62                 /* Align dst */
63                 load1b  ip
64                 sub     len, len, #1
65                 adcs    sum, sum, ip, put_byte_1        @ update checksum
66                 strb    ip, [dst], #1
67                 tst     len, #6
68                 beq     .Lless8_byteonly
69
70 1:              load2b  r8, ip
71                 sub     len, len, #2
72                 adcs    sum, sum, r8, put_byte_0
73                 strb    r8, [dst], #1
74                 adcs    sum, sum, ip, put_byte_1
75                 strb    ip, [dst], #1
76 .Lless8_aligned:
77                 tst     len, #6
78                 bne     1b
79 .Lless8_byteonly:
80                 tst     len, #1
81                 beq     .Ldone
82                 load1b  r8
83                 adcs    sum, sum, r8, put_byte_0        @ update checksum
84                 strb    r8, [dst], #1
85                 b       .Ldone
86
87 FN_ENTRY
88                 save_regs
89                 mov     sum, #-1
90
91                 cmp     len, #8                 @ Ensure that we have at least
92                 blo     .Lless8                 @ 8 bytes to copy.
93
94                 adds    sum, sum, #0            @ C = 0
95                 tst     dst, #3                 @ Test destination alignment
96                 blne    .Ldst_unaligned         @ align destination, return here
97
98                 /*
99                  * Ok, the dst pointer is now 32bit aligned, and we know
100                  * that we must have more than 4 bytes to copy.  Note
101                  * that C contains the carry from the dst alignment above.
102                  */
103
104                 tst     src, #3                 @ Test source alignment
105                 bne     .Lsrc_not_aligned
106
107                 /* Routine for src & dst aligned */
108
109                 bics    ip, len, #15
110                 beq     2f
111
112 1:              load4l  r4, r5, r6, r7
113                 stmia   dst!, {r4, r5, r6, r7}
114                 adcs    sum, sum, r4
115                 adcs    sum, sum, r5
116                 adcs    sum, sum, r6
117                 adcs    sum, sum, r7
118                 sub     ip, ip, #16
119                 teq     ip, #0
120                 bne     1b
121
122 2:              ands    ip, len, #12
123                 beq     4f
124                 tst     ip, #8
125                 beq     3f
126                 load2l  r4, r5
127                 stmia   dst!, {r4, r5}
128                 adcs    sum, sum, r4
129                 adcs    sum, sum, r5
130                 tst     ip, #4
131                 beq     4f
132
133 3:              load1l  r4
134                 str     r4, [dst], #4
135                 adcs    sum, sum, r4
136
137 4:              ands    len, len, #3
138                 beq     .Ldone
139                 load1l  r4
140                 tst     len, #2
141                 mov     r5, r4, get_byte_0
142                 beq     .Lexit
143                 adcs    sum, sum, r4, lspush #16
144                 strb    r5, [dst], #1
145                 mov     r5, r4, get_byte_1
146                 strb    r5, [dst], #1
147                 mov     r5, r4, get_byte_2
148 .Lexit:         tst     len, #1
149                 strbne  r5, [dst], #1
150                 andne   r5, r5, #255
151                 adcsne  sum, sum, r5, put_byte_0
152
153                 /*
154                  * If the dst pointer was not 16-bit aligned, we
155                  * need to rotate the checksum here to get around
156                  * the inefficient byte manipulations in the
157                  * architecture independent code.
158                  */
159 .Ldone:         adc     r0, sum, #0
160                 ldr     sum, [sp, #0]           @ dst
161                 tst     sum, #1
162                 movne   r0, r0, ror #8
163                 load_regs
164
165 .Lsrc_not_aligned:
166                 adc     sum, sum, #0            @ include C from dst alignment
167                 and     ip, src, #3
168                 bic     src, src, #3
169                 load1l  r5
170                 cmp     ip, #2
171                 beq     .Lsrc2_aligned
172                 bhi     .Lsrc3_aligned
173                 mov     r4, r5, lspull #8               @ C = 0
174                 bics    ip, len, #15
175                 beq     2f
176 1:              load4l  r5, r6, r7, r8
177                 orr     r4, r4, r5, lspush #24
178                 mov     r5, r5, lspull #8
179                 orr     r5, r5, r6, lspush #24
180                 mov     r6, r6, lspull #8
181                 orr     r6, r6, r7, lspush #24
182                 mov     r7, r7, lspull #8
183                 orr     r7, r7, r8, lspush #24
184                 stmia   dst!, {r4, r5, r6, r7}
185                 adcs    sum, sum, r4
186                 adcs    sum, sum, r5
187                 adcs    sum, sum, r6
188                 adcs    sum, sum, r7
189                 mov     r4, r8, lspull #8
190                 sub     ip, ip, #16
191                 teq     ip, #0
192                 bne     1b
193 2:              ands    ip, len, #12
194                 beq     4f
195                 tst     ip, #8
196                 beq     3f
197                 load2l  r5, r6
198                 orr     r4, r4, r5, lspush #24
199                 mov     r5, r5, lspull #8
200                 orr     r5, r5, r6, lspush #24
201                 stmia   dst!, {r4, r5}
202                 adcs    sum, sum, r4
203                 adcs    sum, sum, r5
204                 mov     r4, r6, lspull #8
205                 tst     ip, #4
206                 beq     4f
207 3:              load1l  r5
208                 orr     r4, r4, r5, lspush #24
209                 str     r4, [dst], #4
210                 adcs    sum, sum, r4
211                 mov     r4, r5, lspull #8
212 4:              ands    len, len, #3
213                 beq     .Ldone
214                 mov     r5, r4, get_byte_0
215                 tst     len, #2
216                 beq     .Lexit
217                 adcs    sum, sum, r4, lspush #16
218                 strb    r5, [dst], #1
219                 mov     r5, r4, get_byte_1
220                 strb    r5, [dst], #1
221                 mov     r5, r4, get_byte_2
222                 b       .Lexit
223
224 .Lsrc2_aligned: mov     r4, r5, lspull #16
225                 adds    sum, sum, #0
226                 bics    ip, len, #15
227                 beq     2f
228 1:              load4l  r5, r6, r7, r8
229                 orr     r4, r4, r5, lspush #16
230                 mov     r5, r5, lspull #16
231                 orr     r5, r5, r6, lspush #16
232                 mov     r6, r6, lspull #16
233                 orr     r6, r6, r7, lspush #16
234                 mov     r7, r7, lspull #16
235                 orr     r7, r7, r8, lspush #16
236                 stmia   dst!, {r4, r5, r6, r7}
237                 adcs    sum, sum, r4
238                 adcs    sum, sum, r5
239                 adcs    sum, sum, r6
240                 adcs    sum, sum, r7
241                 mov     r4, r8, lspull #16
242                 sub     ip, ip, #16
243                 teq     ip, #0
244                 bne     1b
245 2:              ands    ip, len, #12
246                 beq     4f
247                 tst     ip, #8
248                 beq     3f
249                 load2l  r5, r6
250                 orr     r4, r4, r5, lspush #16
251                 mov     r5, r5, lspull #16
252                 orr     r5, r5, r6, lspush #16
253                 stmia   dst!, {r4, r5}
254                 adcs    sum, sum, r4
255                 adcs    sum, sum, r5
256                 mov     r4, r6, lspull #16
257                 tst     ip, #4
258                 beq     4f
259 3:              load1l  r5
260                 orr     r4, r4, r5, lspush #16
261                 str     r4, [dst], #4
262                 adcs    sum, sum, r4
263                 mov     r4, r5, lspull #16
264 4:              ands    len, len, #3
265                 beq     .Ldone
266                 mov     r5, r4, get_byte_0
267                 tst     len, #2
268                 beq     .Lexit
269                 adcs    sum, sum, r4
270                 strb    r5, [dst], #1
271                 mov     r5, r4, get_byte_1
272                 strb    r5, [dst], #1
273                 tst     len, #1
274                 beq     .Ldone
275                 load1b  r5
276                 b       .Lexit
277
278 .Lsrc3_aligned: mov     r4, r5, lspull #24
279                 adds    sum, sum, #0
280                 bics    ip, len, #15
281                 beq     2f
282 1:              load4l  r5, r6, r7, r8
283                 orr     r4, r4, r5, lspush #8
284                 mov     r5, r5, lspull #24
285                 orr     r5, r5, r6, lspush #8
286                 mov     r6, r6, lspull #24
287                 orr     r6, r6, r7, lspush #8
288                 mov     r7, r7, lspull #24
289                 orr     r7, r7, r8, lspush #8
290                 stmia   dst!, {r4, r5, r6, r7}
291                 adcs    sum, sum, r4
292                 adcs    sum, sum, r5
293                 adcs    sum, sum, r6
294                 adcs    sum, sum, r7
295                 mov     r4, r8, lspull #24
296                 sub     ip, ip, #16
297                 teq     ip, #0
298                 bne     1b
299 2:              ands    ip, len, #12
300                 beq     4f
301                 tst     ip, #8
302                 beq     3f
303                 load2l  r5, r6
304                 orr     r4, r4, r5, lspush #8
305                 mov     r5, r5, lspull #24
306                 orr     r5, r5, r6, lspush #8
307                 stmia   dst!, {r4, r5}
308                 adcs    sum, sum, r4
309                 adcs    sum, sum, r5
310                 mov     r4, r6, lspull #24
311                 tst     ip, #4
312                 beq     4f
313 3:              load1l  r5
314                 orr     r4, r4, r5, lspush #8
315                 str     r4, [dst], #4
316                 adcs    sum, sum, r4
317                 mov     r4, r5, lspull #24
318 4:              ands    len, len, #3
319                 beq     .Ldone
320                 mov     r5, r4, get_byte_0
321                 tst     len, #2
322                 beq     .Lexit
323                 strb    r5, [dst], #1
324                 adcs    sum, sum, r4
325                 load1l  r4
326                 mov     r5, r4, get_byte_0
327                 strb    r5, [dst], #1
328                 adcs    sum, sum, r4, lspush #24
329                 mov     r5, r4, get_byte_1
330                 b       .Lexit
331 FN_EXIT