2 * linux/arch/arm/lib/csumpartialcopygeneric.S
4 * Copyright (C) 1995-2001 Russell King
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
10 #include <asm/assembler.h>
11 #include <asm/export.h>
15 * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
16 * r0 = src, r1 = dst, r2 = len, r3 = sum
17 * Returns : r0 = checksum
19 * Note that 'tst' and 'teq' preserve the carry flag.
31 * Align an unaligned destination pointer. We know that
32 * we have >= 8 bytes here, so we don't need to check
33 * the length. Note that the source pointer hasn't been
42 adcs sum, sum, ip, put_byte_1 @ update checksum
45 reteq lr @ dst is now 32bit aligned
47 .Ldst_16bit: load2b r8, ip
49 adcs sum, sum, r8, put_byte_0
51 adcs sum, sum, ip, put_byte_1
53 ret lr @ dst is now 32bit aligned
56 * Handle 0 to 7 bytes, with any alignment of source and
57 * destination pointers. Note that when we get here, C = 0
59 .Lless8: teq len, #0 @ check for zero count
62 /* we must have at least one byte. */
63 tst dst, #1 @ dst 16-bit aligned
69 adcs sum, sum, ip, put_byte_1 @ update checksum
76 adcs sum, sum, r8, put_byte_0
78 adcs sum, sum, ip, put_byte_1
87 adcs sum, sum, r8, put_byte_0 @ update checksum
94 cmp len, #8 @ Ensure that we have at least
95 blo .Lless8 @ 8 bytes to copy.
97 adds sum, sum, #0 @ C = 0
98 tst dst, #3 @ Test destination alignment
99 blne .Ldst_unaligned @ align destination, return here
102 * Ok, the dst pointer is now 32bit aligned, and we know
103 * that we must have more than 4 bytes to copy. Note
104 * that C contains the carry from the dst alignment above.
107 tst src, #3 @ Test source alignment
108 bne .Lsrc_not_aligned
110 /* Routine for src & dst aligned */
115 1: load4l r4, r5, r6, r7
116 stmia dst!, {r4, r5, r6, r7}
144 mov r5, r4, get_byte_0
146 adcs sum, sum, r4, lspush #16
148 mov r5, r4, get_byte_1
150 mov r5, r4, get_byte_2
154 adcnes sum, sum, r5, put_byte_0
157 * If the dst pointer was not 16-bit aligned, we
158 * need to rotate the checksum here to get around
159 * the inefficient byte manipulations in the
160 * architecture independent code.
162 .Ldone: adc r0, sum, #0
163 ldr sum, [sp, #0] @ dst
169 adc sum, sum, #0 @ include C from dst alignment
176 mov r4, r5, lspull #8 @ C = 0
179 1: load4l r5, r6, r7, r8
180 orr r4, r4, r5, lspush #24
181 mov r5, r5, lspull #8
182 orr r5, r5, r6, lspush #24
183 mov r6, r6, lspull #8
184 orr r6, r6, r7, lspush #24
185 mov r7, r7, lspull #8
186 orr r7, r7, r8, lspush #24
187 stmia dst!, {r4, r5, r6, r7}
192 mov r4, r8, lspull #8
201 orr r4, r4, r5, lspush #24
202 mov r5, r5, lspull #8
203 orr r5, r5, r6, lspush #24
207 mov r4, r6, lspull #8
211 orr r4, r4, r5, lspush #24
214 mov r4, r5, lspull #8
217 mov r5, r4, get_byte_0
220 adcs sum, sum, r4, lspush #16
222 mov r5, r4, get_byte_1
224 mov r5, r4, get_byte_2
227 .Lsrc2_aligned: mov r4, r5, lspull #16
231 1: load4l r5, r6, r7, r8
232 orr r4, r4, r5, lspush #16
233 mov r5, r5, lspull #16
234 orr r5, r5, r6, lspush #16
235 mov r6, r6, lspull #16
236 orr r6, r6, r7, lspush #16
237 mov r7, r7, lspull #16
238 orr r7, r7, r8, lspush #16
239 stmia dst!, {r4, r5, r6, r7}
244 mov r4, r8, lspull #16
253 orr r4, r4, r5, lspush #16
254 mov r5, r5, lspull #16
255 orr r5, r5, r6, lspush #16
259 mov r4, r6, lspull #16
263 orr r4, r4, r5, lspush #16
266 mov r4, r5, lspull #16
269 mov r5, r4, get_byte_0
274 mov r5, r4, get_byte_1
281 .Lsrc3_aligned: mov r4, r5, lspull #24
285 1: load4l r5, r6, r7, r8
286 orr r4, r4, r5, lspush #8
287 mov r5, r5, lspull #24
288 orr r5, r5, r6, lspush #8
289 mov r6, r6, lspull #24
290 orr r6, r6, r7, lspush #8
291 mov r7, r7, lspull #24
292 orr r7, r7, r8, lspush #8
293 stmia dst!, {r4, r5, r6, r7}
298 mov r4, r8, lspull #24
307 orr r4, r4, r5, lspush #8
308 mov r5, r5, lspull #24
309 orr r5, r5, r6, lspush #8
313 mov r4, r6, lspull #24
317 orr r4, r4, r5, lspush #8
320 mov r4, r5, lspull #24
323 mov r5, r4, get_byte_0
329 mov r5, r4, get_byte_0
331 adcs sum, sum, r4, lspush #24
332 mov r5, r4, get_byte_1