Merge tag 'docs-4.18' of git://git.lwn.net/linux
[linux-2.6-microblaze.git] / arch / powerpc / lib / copyuser_64.S
1 /*
2  * Copyright (C) 2002 Paul Mackerras, IBM Corp.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version
7  * 2 of the License, or (at your option) any later version.
8  */
9 #include <asm/processor.h>
10 #include <asm/ppc_asm.h>
11 #include <asm/export.h>
12
13 #ifdef __BIG_ENDIAN__
14 #define sLd sld         /* Shift towards low-numbered address. */
15 #define sHd srd         /* Shift towards high-numbered address. */
16 #else
17 #define sLd srd         /* Shift towards low-numbered address. */
18 #define sHd sld         /* Shift towards high-numbered address. */
19 #endif
20
21         .align  7
22 _GLOBAL_TOC(__copy_tofrom_user)
23 #ifdef CONFIG_PPC_BOOK3S_64
24 BEGIN_FTR_SECTION
25         nop
26 FTR_SECTION_ELSE
27         b       __copy_tofrom_user_power7
28 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
29 #endif
30 _GLOBAL(__copy_tofrom_user_base)
31         /* first check for a whole page copy on a page boundary */
32         cmpldi  cr1,r5,16
33         cmpdi   cr6,r5,4096
34         or      r0,r3,r4
35         neg     r6,r3           /* LS 3 bits = # bytes to 8-byte dest bdry */
36         andi.   r0,r0,4095
37         std     r3,-24(r1)
38         crand   cr0*4+2,cr0*4+2,cr6*4+2
39         std     r4,-16(r1)
40         std     r5,-8(r1)
41         dcbt    0,r4
42         beq     .Lcopy_page_4K
43         andi.   r6,r6,7
44         PPC_MTOCRF(0x01,r5)
45         blt     cr1,.Lshort_copy
46 /* Below we want to nop out the bne if we're on a CPU that has the
47  * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
48  * cleared.
49  * At the time of writing the only CPU that has this combination of bits
50  * set is Power6.
51  */
52 BEGIN_FTR_SECTION
53         nop
54 FTR_SECTION_ELSE
55         bne     .Ldst_unaligned
56 ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
57                     CPU_FTR_UNALIGNED_LD_STD)
58 .Ldst_aligned:
59         addi    r3,r3,-16
60 BEGIN_FTR_SECTION
61         andi.   r0,r4,7
62         bne     .Lsrc_unaligned
63 END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
64         blt     cr1,.Ldo_tail           /* if < 16 bytes to copy */
65         srdi    r0,r5,5
66         cmpdi   cr1,r0,0
67 20:     ld      r7,0(r4)
68 220:    ld      r6,8(r4)
69         addi    r4,r4,16
70         mtctr   r0
71         andi.   r0,r5,0x10
72         beq     22f
73         addi    r3,r3,16
74         addi    r4,r4,-16
75         mr      r9,r7
76         mr      r8,r6
77         beq     cr1,72f
78 21:     ld      r7,16(r4)
79 221:    ld      r6,24(r4)
80         addi    r4,r4,32
81 70:     std     r9,0(r3)
82 270:    std     r8,8(r3)
83 22:     ld      r9,0(r4)
84 222:    ld      r8,8(r4)
85 71:     std     r7,16(r3)
86 271:    std     r6,24(r3)
87         addi    r3,r3,32
88         bdnz    21b
89 72:     std     r9,0(r3)
90 272:    std     r8,8(r3)
91         andi.   r5,r5,0xf
92         beq+    3f
93         addi    r4,r4,16
94 .Ldo_tail:
95         addi    r3,r3,16
96         bf      cr7*4+0,246f
97 244:    ld      r9,0(r4)
98         addi    r4,r4,8
99 245:    std     r9,0(r3)
100         addi    r3,r3,8
101 246:    bf      cr7*4+1,1f
102 23:     lwz     r9,0(r4)
103         addi    r4,r4,4
104 73:     stw     r9,0(r3)
105         addi    r3,r3,4
106 1:      bf      cr7*4+2,2f
107 44:     lhz     r9,0(r4)
108         addi    r4,r4,2
109 74:     sth     r9,0(r3)
110         addi    r3,r3,2
111 2:      bf      cr7*4+3,3f
112 45:     lbz     r9,0(r4)
113 75:     stb     r9,0(r3)
114 3:      li      r3,0
115         blr
116
117 .Lsrc_unaligned:
118         srdi    r6,r5,3
119         addi    r5,r5,-16
120         subf    r4,r0,r4
121         srdi    r7,r5,4
122         sldi    r10,r0,3
123         cmpldi  cr6,r6,3
124         andi.   r5,r5,7
125         mtctr   r7
126         subfic  r11,r10,64
127         add     r5,r5,r0
128         bt      cr7*4+0,28f
129
130 24:     ld      r9,0(r4)        /* 3+2n loads, 2+2n stores */
131 25:     ld      r0,8(r4)
132         sLd     r6,r9,r10
133 26:     ldu     r9,16(r4)
134         sHd     r7,r0,r11
135         sLd     r8,r0,r10
136         or      r7,r7,r6
137         blt     cr6,79f
138 27:     ld      r0,8(r4)
139         b       2f
140
141 28:     ld      r0,0(r4)        /* 4+2n loads, 3+2n stores */
142 29:     ldu     r9,8(r4)
143         sLd     r8,r0,r10
144         addi    r3,r3,-8
145         blt     cr6,5f
146 30:     ld      r0,8(r4)
147         sHd     r12,r9,r11
148         sLd     r6,r9,r10
149 31:     ldu     r9,16(r4)
150         or      r12,r8,r12
151         sHd     r7,r0,r11
152         sLd     r8,r0,r10
153         addi    r3,r3,16
154         beq     cr6,78f
155
156 1:      or      r7,r7,r6
157 32:     ld      r0,8(r4)
158 76:     std     r12,8(r3)
159 2:      sHd     r12,r9,r11
160         sLd     r6,r9,r10
161 33:     ldu     r9,16(r4)
162         or      r12,r8,r12
163 77:     stdu    r7,16(r3)
164         sHd     r7,r0,r11
165         sLd     r8,r0,r10
166         bdnz    1b
167
168 78:     std     r12,8(r3)
169         or      r7,r7,r6
170 79:     std     r7,16(r3)
171 5:      sHd     r12,r9,r11
172         or      r12,r8,r12
173 80:     std     r12,24(r3)
174         bne     6f
175         li      r3,0
176         blr
177 6:      cmpwi   cr1,r5,8
178         addi    r3,r3,32
179         sLd     r9,r9,r10
180         ble     cr1,7f
181 34:     ld      r0,8(r4)
182         sHd     r7,r0,r11
183         or      r9,r7,r9
184 7:
185         bf      cr7*4+1,1f
186 #ifdef __BIG_ENDIAN__
187         rotldi  r9,r9,32
188 #endif
189 94:     stw     r9,0(r3)
190 #ifdef __LITTLE_ENDIAN__
191         rotrdi  r9,r9,32
192 #endif
193         addi    r3,r3,4
194 1:      bf      cr7*4+2,2f
195 #ifdef __BIG_ENDIAN__
196         rotldi  r9,r9,16
197 #endif
198 95:     sth     r9,0(r3)
199 #ifdef __LITTLE_ENDIAN__
200         rotrdi  r9,r9,16
201 #endif
202         addi    r3,r3,2
203 2:      bf      cr7*4+3,3f
204 #ifdef __BIG_ENDIAN__
205         rotldi  r9,r9,8
206 #endif
207 96:     stb     r9,0(r3)
208 #ifdef __LITTLE_ENDIAN__
209         rotrdi  r9,r9,8
210 #endif
211 3:      li      r3,0
212         blr
213
214 .Ldst_unaligned:
215         PPC_MTOCRF(0x01,r6)             /* put #bytes to 8B bdry into cr7 */
216         subf    r5,r6,r5
217         li      r7,0
218         cmpldi  cr1,r5,16
219         bf      cr7*4+3,1f
220 35:     lbz     r0,0(r4)
221 81:     stb     r0,0(r3)
222         addi    r7,r7,1
223 1:      bf      cr7*4+2,2f
224 36:     lhzx    r0,r7,r4
225 82:     sthx    r0,r7,r3
226         addi    r7,r7,2
227 2:      bf      cr7*4+1,3f
228 37:     lwzx    r0,r7,r4
229 83:     stwx    r0,r7,r3
230 3:      PPC_MTOCRF(0x01,r5)
231         add     r4,r6,r4
232         add     r3,r6,r3
233         b       .Ldst_aligned
234
235 .Lshort_copy:
236         bf      cr7*4+0,1f
237 38:     lwz     r0,0(r4)
238 39:     lwz     r9,4(r4)
239         addi    r4,r4,8
240 84:     stw     r0,0(r3)
241 85:     stw     r9,4(r3)
242         addi    r3,r3,8
243 1:      bf      cr7*4+1,2f
244 40:     lwz     r0,0(r4)
245         addi    r4,r4,4
246 86:     stw     r0,0(r3)
247         addi    r3,r3,4
248 2:      bf      cr7*4+2,3f
249 41:     lhz     r0,0(r4)
250         addi    r4,r4,2
251 87:     sth     r0,0(r3)
252         addi    r3,r3,2
253 3:      bf      cr7*4+3,4f
254 42:     lbz     r0,0(r4)
255 88:     stb     r0,0(r3)
256 4:      li      r3,0
257         blr
258
259 /*
260  * exception handlers follow
261  * we have to return the number of bytes not copied
262  * for an exception on a load, we set the rest of the destination to 0
263  */
264
265 136:
266 137:
267         add     r3,r3,r7
268         b       1f
269 130:
270 131:
271         addi    r3,r3,8
272 120:
273 320:
274 122:
275 322:
276 124:
277 125:
278 126:
279 127:
280 128:
281 129:
282 133:
283         addi    r3,r3,8
284 132:
285         addi    r3,r3,8
286 121:
287 321:
288 344:
289 134:
290 135:
291 138:
292 139:
293 140:
294 141:
295 142:
296 123:
297 144:
298 145:
299
300 /*
301  * here we have had a fault on a load and r3 points to the first
302  * unmodified byte of the destination
303  */
304 1:      ld      r6,-24(r1)
305         ld      r4,-16(r1)
306         ld      r5,-8(r1)
307         subf    r6,r6,r3
308         add     r4,r4,r6
309         subf    r5,r6,r5        /* #bytes left to go */
310
311 /*
312  * first see if we can copy any more bytes before hitting another exception
313  */
314         mtctr   r5
315 43:     lbz     r0,0(r4)
316         addi    r4,r4,1
317 89:     stb     r0,0(r3)
318         addi    r3,r3,1
319         bdnz    43b
320         li      r3,0            /* huh? all copied successfully this time? */
321         blr
322
323 /*
324  * here we have trapped again, amount remaining is in ctr.
325  */
326 143:    mfctr   r3
327         blr
328
329 /*
330  * exception handlers for stores: we just need to work
331  * out how many bytes weren't copied
332  */
333 182:
334 183:
335         add     r3,r3,r7
336         b       1f
337 371:
338 180:
339         addi    r3,r3,8
340 171:
341 177:
342 179:
343         addi    r3,r3,8
344 370:
345 372:
346 176:
347 178:
348         addi    r3,r3,4
349 185:
350         addi    r3,r3,4
351 170:
352 172:
353 345:
354 173:
355 174:
356 175:
357 181:
358 184:
359 186:
360 187:
361 188:
362 189:    
363 194:
364 195:
365 196:
366 1:
367         ld      r6,-24(r1)
368         ld      r5,-8(r1)
369         add     r6,r6,r5
370         subf    r3,r3,r6        /* #bytes not copied */
371         blr
372
373         EX_TABLE(20b,120b)
374         EX_TABLE(220b,320b)
375         EX_TABLE(21b,121b)
376         EX_TABLE(221b,321b)
377         EX_TABLE(70b,170b)
378         EX_TABLE(270b,370b)
379         EX_TABLE(22b,122b)
380         EX_TABLE(222b,322b)
381         EX_TABLE(71b,171b)
382         EX_TABLE(271b,371b)
383         EX_TABLE(72b,172b)
384         EX_TABLE(272b,372b)
385         EX_TABLE(244b,344b)
386         EX_TABLE(245b,345b)
387         EX_TABLE(23b,123b)
388         EX_TABLE(73b,173b)
389         EX_TABLE(44b,144b)
390         EX_TABLE(74b,174b)
391         EX_TABLE(45b,145b)
392         EX_TABLE(75b,175b)
393         EX_TABLE(24b,124b)
394         EX_TABLE(25b,125b)
395         EX_TABLE(26b,126b)
396         EX_TABLE(27b,127b)
397         EX_TABLE(28b,128b)
398         EX_TABLE(29b,129b)
399         EX_TABLE(30b,130b)
400         EX_TABLE(31b,131b)
401         EX_TABLE(32b,132b)
402         EX_TABLE(76b,176b)
403         EX_TABLE(33b,133b)
404         EX_TABLE(77b,177b)
405         EX_TABLE(78b,178b)
406         EX_TABLE(79b,179b)
407         EX_TABLE(80b,180b)
408         EX_TABLE(34b,134b)
409         EX_TABLE(94b,194b)
410         EX_TABLE(95b,195b)
411         EX_TABLE(96b,196b)
412         EX_TABLE(35b,135b)
413         EX_TABLE(81b,181b)
414         EX_TABLE(36b,136b)
415         EX_TABLE(82b,182b)
416         EX_TABLE(37b,137b)
417         EX_TABLE(83b,183b)
418         EX_TABLE(38b,138b)
419         EX_TABLE(39b,139b)
420         EX_TABLE(84b,184b)
421         EX_TABLE(85b,185b)
422         EX_TABLE(40b,140b)
423         EX_TABLE(86b,186b)
424         EX_TABLE(41b,141b)
425         EX_TABLE(87b,187b)
426         EX_TABLE(42b,142b)
427         EX_TABLE(88b,188b)
428         EX_TABLE(43b,143b)
429         EX_TABLE(89b,189b)
430
431 /*
432  * Routine to copy a whole page of data, optimized for POWER4.
433  * On POWER4 it is more than 50% faster than the simple loop
434  * above (following the .Ldst_aligned label).
435  */
436 .Lcopy_page_4K:
437         std     r31,-32(1)
438         std     r30,-40(1)
439         std     r29,-48(1)
440         std     r28,-56(1)
441         std     r27,-64(1)
442         std     r26,-72(1)
443         std     r25,-80(1)
444         std     r24,-88(1)
445         std     r23,-96(1)
446         std     r22,-104(1)
447         std     r21,-112(1)
448         std     r20,-120(1)
449         li      r5,4096/32 - 1
450         addi    r3,r3,-8
451         li      r0,5
452 0:      addi    r5,r5,-24
453         mtctr   r0
454 20:     ld      r22,640(4)
455 21:     ld      r21,512(4)
456 22:     ld      r20,384(4)
457 23:     ld      r11,256(4)
458 24:     ld      r9,128(4)
459 25:     ld      r7,0(4)
460 26:     ld      r25,648(4)
461 27:     ld      r24,520(4)
462 28:     ld      r23,392(4)
463 29:     ld      r10,264(4)
464 30:     ld      r8,136(4)
465 31:     ldu     r6,8(4)
466         cmpwi   r5,24
467 1:
468 32:     std     r22,648(3)
469 33:     std     r21,520(3)
470 34:     std     r20,392(3)
471 35:     std     r11,264(3)
472 36:     std     r9,136(3)
473 37:     std     r7,8(3)
474 38:     ld      r28,648(4)
475 39:     ld      r27,520(4)
476 40:     ld      r26,392(4)
477 41:     ld      r31,264(4)
478 42:     ld      r30,136(4)
479 43:     ld      r29,8(4)
480 44:     std     r25,656(3)
481 45:     std     r24,528(3)
482 46:     std     r23,400(3)
483 47:     std     r10,272(3)
484 48:     std     r8,144(3)
485 49:     std     r6,16(3)
486 50:     ld      r22,656(4)
487 51:     ld      r21,528(4)
488 52:     ld      r20,400(4)
489 53:     ld      r11,272(4)
490 54:     ld      r9,144(4)
491 55:     ld      r7,16(4)
492 56:     std     r28,664(3)
493 57:     std     r27,536(3)
494 58:     std     r26,408(3)
495 59:     std     r31,280(3)
496 60:     std     r30,152(3)
497 61:     stdu    r29,24(3)
498 62:     ld      r25,664(4)
499 63:     ld      r24,536(4)
500 64:     ld      r23,408(4)
501 65:     ld      r10,280(4)
502 66:     ld      r8,152(4)
503 67:     ldu     r6,24(4)
504         bdnz    1b
505 68:     std     r22,648(3)
506 69:     std     r21,520(3)
507 70:     std     r20,392(3)
508 71:     std     r11,264(3)
509 72:     std     r9,136(3)
510 73:     std     r7,8(3)
511 74:     addi    r4,r4,640
512 75:     addi    r3,r3,648
513         bge     0b
514         mtctr   r5
515 76:     ld      r7,0(4)
516 77:     ld      r8,8(4)
517 78:     ldu     r9,16(4)
518 3:
519 79:     ld      r10,8(4)
520 80:     std     r7,8(3)
521 81:     ld      r7,16(4)
522 82:     std     r8,16(3)
523 83:     ld      r8,24(4)
524 84:     std     r9,24(3)
525 85:     ldu     r9,32(4)
526 86:     stdu    r10,32(3)
527         bdnz    3b
528 4:
529 87:     ld      r10,8(4)
530 88:     std     r7,8(3)
531 89:     std     r8,16(3)
532 90:     std     r9,24(3)
533 91:     std     r10,32(3)
534 9:      ld      r20,-120(1)
535         ld      r21,-112(1)
536         ld      r22,-104(1)
537         ld      r23,-96(1)
538         ld      r24,-88(1)
539         ld      r25,-80(1)
540         ld      r26,-72(1)
541         ld      r27,-64(1)
542         ld      r28,-56(1)
543         ld      r29,-48(1)
544         ld      r30,-40(1)
545         ld      r31,-32(1)
546         li      r3,0
547         blr
548
549 /*
550  * on an exception, reset to the beginning and jump back into the
551  * standard __copy_tofrom_user
552  */
553 100:    ld      r20,-120(1)
554         ld      r21,-112(1)
555         ld      r22,-104(1)
556         ld      r23,-96(1)
557         ld      r24,-88(1)
558         ld      r25,-80(1)
559         ld      r26,-72(1)
560         ld      r27,-64(1)
561         ld      r28,-56(1)
562         ld      r29,-48(1)
563         ld      r30,-40(1)
564         ld      r31,-32(1)
565         ld      r3,-24(r1)
566         ld      r4,-16(r1)
567         li      r5,4096
568         b       .Ldst_aligned
569
570         EX_TABLE(20b,100b)
571         EX_TABLE(21b,100b)
572         EX_TABLE(22b,100b)
573         EX_TABLE(23b,100b)
574         EX_TABLE(24b,100b)
575         EX_TABLE(25b,100b)
576         EX_TABLE(26b,100b)
577         EX_TABLE(27b,100b)
578         EX_TABLE(28b,100b)
579         EX_TABLE(29b,100b)
580         EX_TABLE(30b,100b)
581         EX_TABLE(31b,100b)
582         EX_TABLE(32b,100b)
583         EX_TABLE(33b,100b)
584         EX_TABLE(34b,100b)
585         EX_TABLE(35b,100b)
586         EX_TABLE(36b,100b)
587         EX_TABLE(37b,100b)
588         EX_TABLE(38b,100b)
589         EX_TABLE(39b,100b)
590         EX_TABLE(40b,100b)
591         EX_TABLE(41b,100b)
592         EX_TABLE(42b,100b)
593         EX_TABLE(43b,100b)
594         EX_TABLE(44b,100b)
595         EX_TABLE(45b,100b)
596         EX_TABLE(46b,100b)
597         EX_TABLE(47b,100b)
598         EX_TABLE(48b,100b)
599         EX_TABLE(49b,100b)
600         EX_TABLE(50b,100b)
601         EX_TABLE(51b,100b)
602         EX_TABLE(52b,100b)
603         EX_TABLE(53b,100b)
604         EX_TABLE(54b,100b)
605         EX_TABLE(55b,100b)
606         EX_TABLE(56b,100b)
607         EX_TABLE(57b,100b)
608         EX_TABLE(58b,100b)
609         EX_TABLE(59b,100b)
610         EX_TABLE(60b,100b)
611         EX_TABLE(61b,100b)
612         EX_TABLE(62b,100b)
613         EX_TABLE(63b,100b)
614         EX_TABLE(64b,100b)
615         EX_TABLE(65b,100b)
616         EX_TABLE(66b,100b)
617         EX_TABLE(67b,100b)
618         EX_TABLE(68b,100b)
619         EX_TABLE(69b,100b)
620         EX_TABLE(70b,100b)
621         EX_TABLE(71b,100b)
622         EX_TABLE(72b,100b)
623         EX_TABLE(73b,100b)
624         EX_TABLE(74b,100b)
625         EX_TABLE(75b,100b)
626         EX_TABLE(76b,100b)
627         EX_TABLE(77b,100b)
628         EX_TABLE(78b,100b)
629         EX_TABLE(79b,100b)
630         EX_TABLE(80b,100b)
631         EX_TABLE(81b,100b)
632         EX_TABLE(82b,100b)
633         EX_TABLE(83b,100b)
634         EX_TABLE(84b,100b)
635         EX_TABLE(85b,100b)
636         EX_TABLE(86b,100b)
637         EX_TABLE(87b,100b)
638         EX_TABLE(88b,100b)
639         EX_TABLE(89b,100b)
640         EX_TABLE(90b,100b)
641         EX_TABLE(91b,100b)
642
643 EXPORT_SYMBOL(__copy_tofrom_user)