Merge tag 'devicetree-fixes-for-5.11-1' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6-microblaze.git] / arch / powerpc / lib / copy_mc_64.S
1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  * Copyright (C) IBM Corporation, 2011
4  * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com>
5  * Author - Balbir Singh <bsingharora@gmail.com>
6  */
7 #include <asm/ppc_asm.h>
8 #include <asm/errno.h>
9 #include <asm/export.h>
10
11         .macro err1
12 100:
13         EX_TABLE(100b,.Ldo_err1)
14         .endm
15
16         .macro err2
17 200:
18         EX_TABLE(200b,.Ldo_err2)
19         .endm
20
21         .macro err3
22 300:    EX_TABLE(300b,.Ldone)
23         .endm
24
25 .Ldo_err2:
26         ld      r22,STK_REG(R22)(r1)
27         ld      r21,STK_REG(R21)(r1)
28         ld      r20,STK_REG(R20)(r1)
29         ld      r19,STK_REG(R19)(r1)
30         ld      r18,STK_REG(R18)(r1)
31         ld      r17,STK_REG(R17)(r1)
32         ld      r16,STK_REG(R16)(r1)
33         ld      r15,STK_REG(R15)(r1)
34         ld      r14,STK_REG(R14)(r1)
35         addi    r1,r1,STACKFRAMESIZE
36 .Ldo_err1:
37         /* Do a byte by byte copy to get the exact remaining size */
38         mtctr   r7
39 46:
40 err3;   lbz     r0,0(r4)
41         addi    r4,r4,1
42 err3;   stb     r0,0(r3)
43         addi    r3,r3,1
44         bdnz    46b
45         li      r3,0
46         blr
47
48 .Ldone:
49         mfctr   r3
50         blr
51
52
53 _GLOBAL(copy_mc_generic)
54         mr      r7,r5
55         cmpldi  r5,16
56         blt     .Lshort_copy
57
58 .Lcopy:
59         /* Get the source 8B aligned */
60         neg     r6,r4
61         mtocrf  0x01,r6
62         clrldi  r6,r6,(64-3)
63
64         bf      cr7*4+3,1f
65 err1;   lbz     r0,0(r4)
66         addi    r4,r4,1
67 err1;   stb     r0,0(r3)
68         addi    r3,r3,1
69         subi    r7,r7,1
70
71 1:      bf      cr7*4+2,2f
72 err1;   lhz     r0,0(r4)
73         addi    r4,r4,2
74 err1;   sth     r0,0(r3)
75         addi    r3,r3,2
76         subi    r7,r7,2
77
78 2:      bf      cr7*4+1,3f
79 err1;   lwz     r0,0(r4)
80         addi    r4,r4,4
81 err1;   stw     r0,0(r3)
82         addi    r3,r3,4
83         subi    r7,r7,4
84
85 3:      sub     r5,r5,r6
86         cmpldi  r5,128
87
88         mflr    r0
89         stdu    r1,-STACKFRAMESIZE(r1)
90         std     r14,STK_REG(R14)(r1)
91         std     r15,STK_REG(R15)(r1)
92         std     r16,STK_REG(R16)(r1)
93         std     r17,STK_REG(R17)(r1)
94         std     r18,STK_REG(R18)(r1)
95         std     r19,STK_REG(R19)(r1)
96         std     r20,STK_REG(R20)(r1)
97         std     r21,STK_REG(R21)(r1)
98         std     r22,STK_REG(R22)(r1)
99         std     r0,STACKFRAMESIZE+16(r1)
100
101         blt     5f
102         srdi    r6,r5,7
103         mtctr   r6
104
105         /* Now do cacheline (128B) sized loads and stores. */
106         .align  5
107 4:
108 err2;   ld      r0,0(r4)
109 err2;   ld      r6,8(r4)
110 err2;   ld      r8,16(r4)
111 err2;   ld      r9,24(r4)
112 err2;   ld      r10,32(r4)
113 err2;   ld      r11,40(r4)
114 err2;   ld      r12,48(r4)
115 err2;   ld      r14,56(r4)
116 err2;   ld      r15,64(r4)
117 err2;   ld      r16,72(r4)
118 err2;   ld      r17,80(r4)
119 err2;   ld      r18,88(r4)
120 err2;   ld      r19,96(r4)
121 err2;   ld      r20,104(r4)
122 err2;   ld      r21,112(r4)
123 err2;   ld      r22,120(r4)
124         addi    r4,r4,128
125 err2;   std     r0,0(r3)
126 err2;   std     r6,8(r3)
127 err2;   std     r8,16(r3)
128 err2;   std     r9,24(r3)
129 err2;   std     r10,32(r3)
130 err2;   std     r11,40(r3)
131 err2;   std     r12,48(r3)
132 err2;   std     r14,56(r3)
133 err2;   std     r15,64(r3)
134 err2;   std     r16,72(r3)
135 err2;   std     r17,80(r3)
136 err2;   std     r18,88(r3)
137 err2;   std     r19,96(r3)
138 err2;   std     r20,104(r3)
139 err2;   std     r21,112(r3)
140 err2;   std     r22,120(r3)
141         addi    r3,r3,128
142         subi    r7,r7,128
143         bdnz    4b
144
145         clrldi  r5,r5,(64-7)
146
147         /* Up to 127B to go */
148 5:      srdi    r6,r5,4
149         mtocrf  0x01,r6
150
151 6:      bf      cr7*4+1,7f
152 err2;   ld      r0,0(r4)
153 err2;   ld      r6,8(r4)
154 err2;   ld      r8,16(r4)
155 err2;   ld      r9,24(r4)
156 err2;   ld      r10,32(r4)
157 err2;   ld      r11,40(r4)
158 err2;   ld      r12,48(r4)
159 err2;   ld      r14,56(r4)
160         addi    r4,r4,64
161 err2;   std     r0,0(r3)
162 err2;   std     r6,8(r3)
163 err2;   std     r8,16(r3)
164 err2;   std     r9,24(r3)
165 err2;   std     r10,32(r3)
166 err2;   std     r11,40(r3)
167 err2;   std     r12,48(r3)
168 err2;   std     r14,56(r3)
169         addi    r3,r3,64
170         subi    r7,r7,64
171
172 7:      ld      r14,STK_REG(R14)(r1)
173         ld      r15,STK_REG(R15)(r1)
174         ld      r16,STK_REG(R16)(r1)
175         ld      r17,STK_REG(R17)(r1)
176         ld      r18,STK_REG(R18)(r1)
177         ld      r19,STK_REG(R19)(r1)
178         ld      r20,STK_REG(R20)(r1)
179         ld      r21,STK_REG(R21)(r1)
180         ld      r22,STK_REG(R22)(r1)
181         addi    r1,r1,STACKFRAMESIZE
182
183         /* Up to 63B to go */
184         bf      cr7*4+2,8f
185 err1;   ld      r0,0(r4)
186 err1;   ld      r6,8(r4)
187 err1;   ld      r8,16(r4)
188 err1;   ld      r9,24(r4)
189         addi    r4,r4,32
190 err1;   std     r0,0(r3)
191 err1;   std     r6,8(r3)
192 err1;   std     r8,16(r3)
193 err1;   std     r9,24(r3)
194         addi    r3,r3,32
195         subi    r7,r7,32
196
197         /* Up to 31B to go */
198 8:      bf      cr7*4+3,9f
199 err1;   ld      r0,0(r4)
200 err1;   ld      r6,8(r4)
201         addi    r4,r4,16
202 err1;   std     r0,0(r3)
203 err1;   std     r6,8(r3)
204         addi    r3,r3,16
205         subi    r7,r7,16
206
207 9:      clrldi  r5,r5,(64-4)
208
209         /* Up to 15B to go */
210 .Lshort_copy:
211         mtocrf  0x01,r5
212         bf      cr7*4+0,12f
213 err1;   lwz     r0,0(r4)        /* Less chance of a reject with word ops */
214 err1;   lwz     r6,4(r4)
215         addi    r4,r4,8
216 err1;   stw     r0,0(r3)
217 err1;   stw     r6,4(r3)
218         addi    r3,r3,8
219         subi    r7,r7,8
220
221 12:     bf      cr7*4+1,13f
222 err1;   lwz     r0,0(r4)
223         addi    r4,r4,4
224 err1;   stw     r0,0(r3)
225         addi    r3,r3,4
226         subi    r7,r7,4
227
228 13:     bf      cr7*4+2,14f
229 err1;   lhz     r0,0(r4)
230         addi    r4,r4,2
231 err1;   sth     r0,0(r3)
232         addi    r3,r3,2
233         subi    r7,r7,2
234
235 14:     bf      cr7*4+3,15f
236 err1;   lbz     r0,0(r4)
237 err1;   stb     r0,0(r3)
238
239 15:     li      r3,0
240         blr
241
242 EXPORT_SYMBOL_GPL(copy_mc_generic);