powerpc: clean inclusions of asm/feature-fixups.h
[linux-2.6-microblaze.git] / arch / powerpc / lib / copyuser_64.S
1 /*
2  * Copyright (C) 2002 Paul Mackerras, IBM Corp.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version
7  * 2 of the License, or (at your option) any later version.
8  */
9 #include <asm/processor.h>
10 #include <asm/ppc_asm.h>
11 #include <asm/export.h>
12 #include <asm/asm-compat.h>
13 #include <asm/feature-fixups.h>
14
15 #ifdef __BIG_ENDIAN__
16 #define sLd sld         /* Shift towards low-numbered address. */
17 #define sHd srd         /* Shift towards high-numbered address. */
18 #else
19 #define sLd srd         /* Shift towards low-numbered address. */
20 #define sHd sld         /* Shift towards high-numbered address. */
21 #endif
22
23         .align  7
24 _GLOBAL_TOC(__copy_tofrom_user)
25 #ifdef CONFIG_PPC_BOOK3S_64
26 BEGIN_FTR_SECTION
27         nop
28 FTR_SECTION_ELSE
29         b       __copy_tofrom_user_power7
30 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
31 #endif
32 _GLOBAL(__copy_tofrom_user_base)
33         /* first check for a whole page copy on a page boundary */
34         cmpldi  cr1,r5,16
35         cmpdi   cr6,r5,4096
36         or      r0,r3,r4
37         neg     r6,r3           /* LS 3 bits = # bytes to 8-byte dest bdry */
38         andi.   r0,r0,4095
39         std     r3,-24(r1)
40         crand   cr0*4+2,cr0*4+2,cr6*4+2
41         std     r4,-16(r1)
42         std     r5,-8(r1)
43         dcbt    0,r4
44         beq     .Lcopy_page_4K
45         andi.   r6,r6,7
46         PPC_MTOCRF(0x01,r5)
47         blt     cr1,.Lshort_copy
48 /* Below we want to nop out the bne if we're on a CPU that has the
49  * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
50  * cleared.
51  * At the time of writing the only CPU that has this combination of bits
52  * set is Power6.
53  */
54 BEGIN_FTR_SECTION
55         nop
56 FTR_SECTION_ELSE
57         bne     .Ldst_unaligned
58 ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
59                     CPU_FTR_UNALIGNED_LD_STD)
60 .Ldst_aligned:
61         addi    r3,r3,-16
62 BEGIN_FTR_SECTION
63         andi.   r0,r4,7
64         bne     .Lsrc_unaligned
65 END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
66         blt     cr1,.Ldo_tail           /* if < 16 bytes to copy */
67         srdi    r0,r5,5
68         cmpdi   cr1,r0,0
69 20:     ld      r7,0(r4)
70 220:    ld      r6,8(r4)
71         addi    r4,r4,16
72         mtctr   r0
73         andi.   r0,r5,0x10
74         beq     22f
75         addi    r3,r3,16
76         addi    r4,r4,-16
77         mr      r9,r7
78         mr      r8,r6
79         beq     cr1,72f
80 21:     ld      r7,16(r4)
81 221:    ld      r6,24(r4)
82         addi    r4,r4,32
83 70:     std     r9,0(r3)
84 270:    std     r8,8(r3)
85 22:     ld      r9,0(r4)
86 222:    ld      r8,8(r4)
87 71:     std     r7,16(r3)
88 271:    std     r6,24(r3)
89         addi    r3,r3,32
90         bdnz    21b
91 72:     std     r9,0(r3)
92 272:    std     r8,8(r3)
93         andi.   r5,r5,0xf
94         beq+    3f
95         addi    r4,r4,16
96 .Ldo_tail:
97         addi    r3,r3,16
98         bf      cr7*4+0,246f
99 244:    ld      r9,0(r4)
100         addi    r4,r4,8
101 245:    std     r9,0(r3)
102         addi    r3,r3,8
103 246:    bf      cr7*4+1,1f
104 23:     lwz     r9,0(r4)
105         addi    r4,r4,4
106 73:     stw     r9,0(r3)
107         addi    r3,r3,4
108 1:      bf      cr7*4+2,2f
109 44:     lhz     r9,0(r4)
110         addi    r4,r4,2
111 74:     sth     r9,0(r3)
112         addi    r3,r3,2
113 2:      bf      cr7*4+3,3f
114 45:     lbz     r9,0(r4)
115 75:     stb     r9,0(r3)
116 3:      li      r3,0
117         blr
118
119 .Lsrc_unaligned:
120         srdi    r6,r5,3
121         addi    r5,r5,-16
122         subf    r4,r0,r4
123         srdi    r7,r5,4
124         sldi    r10,r0,3
125         cmpldi  cr6,r6,3
126         andi.   r5,r5,7
127         mtctr   r7
128         subfic  r11,r10,64
129         add     r5,r5,r0
130         bt      cr7*4+0,28f
131
132 24:     ld      r9,0(r4)        /* 3+2n loads, 2+2n stores */
133 25:     ld      r0,8(r4)
134         sLd     r6,r9,r10
135 26:     ldu     r9,16(r4)
136         sHd     r7,r0,r11
137         sLd     r8,r0,r10
138         or      r7,r7,r6
139         blt     cr6,79f
140 27:     ld      r0,8(r4)
141         b       2f
142
143 28:     ld      r0,0(r4)        /* 4+2n loads, 3+2n stores */
144 29:     ldu     r9,8(r4)
145         sLd     r8,r0,r10
146         addi    r3,r3,-8
147         blt     cr6,5f
148 30:     ld      r0,8(r4)
149         sHd     r12,r9,r11
150         sLd     r6,r9,r10
151 31:     ldu     r9,16(r4)
152         or      r12,r8,r12
153         sHd     r7,r0,r11
154         sLd     r8,r0,r10
155         addi    r3,r3,16
156         beq     cr6,78f
157
158 1:      or      r7,r7,r6
159 32:     ld      r0,8(r4)
160 76:     std     r12,8(r3)
161 2:      sHd     r12,r9,r11
162         sLd     r6,r9,r10
163 33:     ldu     r9,16(r4)
164         or      r12,r8,r12
165 77:     stdu    r7,16(r3)
166         sHd     r7,r0,r11
167         sLd     r8,r0,r10
168         bdnz    1b
169
170 78:     std     r12,8(r3)
171         or      r7,r7,r6
172 79:     std     r7,16(r3)
173 5:      sHd     r12,r9,r11
174         or      r12,r8,r12
175 80:     std     r12,24(r3)
176         bne     6f
177         li      r3,0
178         blr
179 6:      cmpwi   cr1,r5,8
180         addi    r3,r3,32
181         sLd     r9,r9,r10
182         ble     cr1,7f
183 34:     ld      r0,8(r4)
184         sHd     r7,r0,r11
185         or      r9,r7,r9
186 7:
187         bf      cr7*4+1,1f
188 #ifdef __BIG_ENDIAN__
189         rotldi  r9,r9,32
190 #endif
191 94:     stw     r9,0(r3)
192 #ifdef __LITTLE_ENDIAN__
193         rotrdi  r9,r9,32
194 #endif
195         addi    r3,r3,4
196 1:      bf      cr7*4+2,2f
197 #ifdef __BIG_ENDIAN__
198         rotldi  r9,r9,16
199 #endif
200 95:     sth     r9,0(r3)
201 #ifdef __LITTLE_ENDIAN__
202         rotrdi  r9,r9,16
203 #endif
204         addi    r3,r3,2
205 2:      bf      cr7*4+3,3f
206 #ifdef __BIG_ENDIAN__
207         rotldi  r9,r9,8
208 #endif
209 96:     stb     r9,0(r3)
210 #ifdef __LITTLE_ENDIAN__
211         rotrdi  r9,r9,8
212 #endif
213 3:      li      r3,0
214         blr
215
216 .Ldst_unaligned:
217         PPC_MTOCRF(0x01,r6)             /* put #bytes to 8B bdry into cr7 */
218         subf    r5,r6,r5
219         li      r7,0
220         cmpldi  cr1,r5,16
221         bf      cr7*4+3,1f
222 35:     lbz     r0,0(r4)
223 81:     stb     r0,0(r3)
224         addi    r7,r7,1
225 1:      bf      cr7*4+2,2f
226 36:     lhzx    r0,r7,r4
227 82:     sthx    r0,r7,r3
228         addi    r7,r7,2
229 2:      bf      cr7*4+1,3f
230 37:     lwzx    r0,r7,r4
231 83:     stwx    r0,r7,r3
232 3:      PPC_MTOCRF(0x01,r5)
233         add     r4,r6,r4
234         add     r3,r6,r3
235         b       .Ldst_aligned
236
237 .Lshort_copy:
238         bf      cr7*4+0,1f
239 38:     lwz     r0,0(r4)
240 39:     lwz     r9,4(r4)
241         addi    r4,r4,8
242 84:     stw     r0,0(r3)
243 85:     stw     r9,4(r3)
244         addi    r3,r3,8
245 1:      bf      cr7*4+1,2f
246 40:     lwz     r0,0(r4)
247         addi    r4,r4,4
248 86:     stw     r0,0(r3)
249         addi    r3,r3,4
250 2:      bf      cr7*4+2,3f
251 41:     lhz     r0,0(r4)
252         addi    r4,r4,2
253 87:     sth     r0,0(r3)
254         addi    r3,r3,2
255 3:      bf      cr7*4+3,4f
256 42:     lbz     r0,0(r4)
257 88:     stb     r0,0(r3)
258 4:      li      r3,0
259         blr
260
261 /*
262  * exception handlers follow
263  * we have to return the number of bytes not copied
264  * for an exception on a load, we set the rest of the destination to 0
265  */
266
267 136:
268 137:
269         add     r3,r3,r7
270         b       1f
271 130:
272 131:
273         addi    r3,r3,8
274 120:
275 320:
276 122:
277 322:
278 124:
279 125:
280 126:
281 127:
282 128:
283 129:
284 133:
285         addi    r3,r3,8
286 132:
287         addi    r3,r3,8
288 121:
289 321:
290 344:
291 134:
292 135:
293 138:
294 139:
295 140:
296 141:
297 142:
298 123:
299 144:
300 145:
301
302 /*
303  * here we have had a fault on a load and r3 points to the first
304  * unmodified byte of the destination
305  */
306 1:      ld      r6,-24(r1)
307         ld      r4,-16(r1)
308         ld      r5,-8(r1)
309         subf    r6,r6,r3
310         add     r4,r4,r6
311         subf    r5,r6,r5        /* #bytes left to go */
312
313 /*
314  * first see if we can copy any more bytes before hitting another exception
315  */
316         mtctr   r5
317 43:     lbz     r0,0(r4)
318         addi    r4,r4,1
319 89:     stb     r0,0(r3)
320         addi    r3,r3,1
321         bdnz    43b
322         li      r3,0            /* huh? all copied successfully this time? */
323         blr
324
325 /*
326  * here we have trapped again, amount remaining is in ctr.
327  */
328 143:    mfctr   r3
329         blr
330
331 /*
332  * exception handlers for stores: we just need to work
333  * out how many bytes weren't copied
334  */
335 182:
336 183:
337         add     r3,r3,r7
338         b       1f
339 371:
340 180:
341         addi    r3,r3,8
342 171:
343 177:
344 179:
345         addi    r3,r3,8
346 370:
347 372:
348 176:
349 178:
350         addi    r3,r3,4
351 185:
352         addi    r3,r3,4
353 170:
354 172:
355 345:
356 173:
357 174:
358 175:
359 181:
360 184:
361 186:
362 187:
363 188:
364 189:    
365 194:
366 195:
367 196:
368 1:
369         ld      r6,-24(r1)
370         ld      r5,-8(r1)
371         add     r6,r6,r5
372         subf    r3,r3,r6        /* #bytes not copied */
373         blr
374
375         EX_TABLE(20b,120b)
376         EX_TABLE(220b,320b)
377         EX_TABLE(21b,121b)
378         EX_TABLE(221b,321b)
379         EX_TABLE(70b,170b)
380         EX_TABLE(270b,370b)
381         EX_TABLE(22b,122b)
382         EX_TABLE(222b,322b)
383         EX_TABLE(71b,171b)
384         EX_TABLE(271b,371b)
385         EX_TABLE(72b,172b)
386         EX_TABLE(272b,372b)
387         EX_TABLE(244b,344b)
388         EX_TABLE(245b,345b)
389         EX_TABLE(23b,123b)
390         EX_TABLE(73b,173b)
391         EX_TABLE(44b,144b)
392         EX_TABLE(74b,174b)
393         EX_TABLE(45b,145b)
394         EX_TABLE(75b,175b)
395         EX_TABLE(24b,124b)
396         EX_TABLE(25b,125b)
397         EX_TABLE(26b,126b)
398         EX_TABLE(27b,127b)
399         EX_TABLE(28b,128b)
400         EX_TABLE(29b,129b)
401         EX_TABLE(30b,130b)
402         EX_TABLE(31b,131b)
403         EX_TABLE(32b,132b)
404         EX_TABLE(76b,176b)
405         EX_TABLE(33b,133b)
406         EX_TABLE(77b,177b)
407         EX_TABLE(78b,178b)
408         EX_TABLE(79b,179b)
409         EX_TABLE(80b,180b)
410         EX_TABLE(34b,134b)
411         EX_TABLE(94b,194b)
412         EX_TABLE(95b,195b)
413         EX_TABLE(96b,196b)
414         EX_TABLE(35b,135b)
415         EX_TABLE(81b,181b)
416         EX_TABLE(36b,136b)
417         EX_TABLE(82b,182b)
418         EX_TABLE(37b,137b)
419         EX_TABLE(83b,183b)
420         EX_TABLE(38b,138b)
421         EX_TABLE(39b,139b)
422         EX_TABLE(84b,184b)
423         EX_TABLE(85b,185b)
424         EX_TABLE(40b,140b)
425         EX_TABLE(86b,186b)
426         EX_TABLE(41b,141b)
427         EX_TABLE(87b,187b)
428         EX_TABLE(42b,142b)
429         EX_TABLE(88b,188b)
430         EX_TABLE(43b,143b)
431         EX_TABLE(89b,189b)
432
433 /*
434  * Routine to copy a whole page of data, optimized for POWER4.
435  * On POWER4 it is more than 50% faster than the simple loop
436  * above (following the .Ldst_aligned label).
437  */
438 .Lcopy_page_4K:
439         std     r31,-32(1)
440         std     r30,-40(1)
441         std     r29,-48(1)
442         std     r28,-56(1)
443         std     r27,-64(1)
444         std     r26,-72(1)
445         std     r25,-80(1)
446         std     r24,-88(1)
447         std     r23,-96(1)
448         std     r22,-104(1)
449         std     r21,-112(1)
450         std     r20,-120(1)
451         li      r5,4096/32 - 1
452         addi    r3,r3,-8
453         li      r0,5
454 0:      addi    r5,r5,-24
455         mtctr   r0
456 20:     ld      r22,640(4)
457 21:     ld      r21,512(4)
458 22:     ld      r20,384(4)
459 23:     ld      r11,256(4)
460 24:     ld      r9,128(4)
461 25:     ld      r7,0(4)
462 26:     ld      r25,648(4)
463 27:     ld      r24,520(4)
464 28:     ld      r23,392(4)
465 29:     ld      r10,264(4)
466 30:     ld      r8,136(4)
467 31:     ldu     r6,8(4)
468         cmpwi   r5,24
469 1:
470 32:     std     r22,648(3)
471 33:     std     r21,520(3)
472 34:     std     r20,392(3)
473 35:     std     r11,264(3)
474 36:     std     r9,136(3)
475 37:     std     r7,8(3)
476 38:     ld      r28,648(4)
477 39:     ld      r27,520(4)
478 40:     ld      r26,392(4)
479 41:     ld      r31,264(4)
480 42:     ld      r30,136(4)
481 43:     ld      r29,8(4)
482 44:     std     r25,656(3)
483 45:     std     r24,528(3)
484 46:     std     r23,400(3)
485 47:     std     r10,272(3)
486 48:     std     r8,144(3)
487 49:     std     r6,16(3)
488 50:     ld      r22,656(4)
489 51:     ld      r21,528(4)
490 52:     ld      r20,400(4)
491 53:     ld      r11,272(4)
492 54:     ld      r9,144(4)
493 55:     ld      r7,16(4)
494 56:     std     r28,664(3)
495 57:     std     r27,536(3)
496 58:     std     r26,408(3)
497 59:     std     r31,280(3)
498 60:     std     r30,152(3)
499 61:     stdu    r29,24(3)
500 62:     ld      r25,664(4)
501 63:     ld      r24,536(4)
502 64:     ld      r23,408(4)
503 65:     ld      r10,280(4)
504 66:     ld      r8,152(4)
505 67:     ldu     r6,24(4)
506         bdnz    1b
507 68:     std     r22,648(3)
508 69:     std     r21,520(3)
509 70:     std     r20,392(3)
510 71:     std     r11,264(3)
511 72:     std     r9,136(3)
512 73:     std     r7,8(3)
513 74:     addi    r4,r4,640
514 75:     addi    r3,r3,648
515         bge     0b
516         mtctr   r5
517 76:     ld      r7,0(4)
518 77:     ld      r8,8(4)
519 78:     ldu     r9,16(4)
520 3:
521 79:     ld      r10,8(4)
522 80:     std     r7,8(3)
523 81:     ld      r7,16(4)
524 82:     std     r8,16(3)
525 83:     ld      r8,24(4)
526 84:     std     r9,24(3)
527 85:     ldu     r9,32(4)
528 86:     stdu    r10,32(3)
529         bdnz    3b
530 4:
531 87:     ld      r10,8(4)
532 88:     std     r7,8(3)
533 89:     std     r8,16(3)
534 90:     std     r9,24(3)
535 91:     std     r10,32(3)
536 9:      ld      r20,-120(1)
537         ld      r21,-112(1)
538         ld      r22,-104(1)
539         ld      r23,-96(1)
540         ld      r24,-88(1)
541         ld      r25,-80(1)
542         ld      r26,-72(1)
543         ld      r27,-64(1)
544         ld      r28,-56(1)
545         ld      r29,-48(1)
546         ld      r30,-40(1)
547         ld      r31,-32(1)
548         li      r3,0
549         blr
550
551 /*
552  * on an exception, reset to the beginning and jump back into the
553  * standard __copy_tofrom_user
554  */
555 100:    ld      r20,-120(1)
556         ld      r21,-112(1)
557         ld      r22,-104(1)
558         ld      r23,-96(1)
559         ld      r24,-88(1)
560         ld      r25,-80(1)
561         ld      r26,-72(1)
562         ld      r27,-64(1)
563         ld      r28,-56(1)
564         ld      r29,-48(1)
565         ld      r30,-40(1)
566         ld      r31,-32(1)
567         ld      r3,-24(r1)
568         ld      r4,-16(r1)
569         li      r5,4096
570         b       .Ldst_aligned
571
572         EX_TABLE(20b,100b)
573         EX_TABLE(21b,100b)
574         EX_TABLE(22b,100b)
575         EX_TABLE(23b,100b)
576         EX_TABLE(24b,100b)
577         EX_TABLE(25b,100b)
578         EX_TABLE(26b,100b)
579         EX_TABLE(27b,100b)
580         EX_TABLE(28b,100b)
581         EX_TABLE(29b,100b)
582         EX_TABLE(30b,100b)
583         EX_TABLE(31b,100b)
584         EX_TABLE(32b,100b)
585         EX_TABLE(33b,100b)
586         EX_TABLE(34b,100b)
587         EX_TABLE(35b,100b)
588         EX_TABLE(36b,100b)
589         EX_TABLE(37b,100b)
590         EX_TABLE(38b,100b)
591         EX_TABLE(39b,100b)
592         EX_TABLE(40b,100b)
593         EX_TABLE(41b,100b)
594         EX_TABLE(42b,100b)
595         EX_TABLE(43b,100b)
596         EX_TABLE(44b,100b)
597         EX_TABLE(45b,100b)
598         EX_TABLE(46b,100b)
599         EX_TABLE(47b,100b)
600         EX_TABLE(48b,100b)
601         EX_TABLE(49b,100b)
602         EX_TABLE(50b,100b)
603         EX_TABLE(51b,100b)
604         EX_TABLE(52b,100b)
605         EX_TABLE(53b,100b)
606         EX_TABLE(54b,100b)
607         EX_TABLE(55b,100b)
608         EX_TABLE(56b,100b)
609         EX_TABLE(57b,100b)
610         EX_TABLE(58b,100b)
611         EX_TABLE(59b,100b)
612         EX_TABLE(60b,100b)
613         EX_TABLE(61b,100b)
614         EX_TABLE(62b,100b)
615         EX_TABLE(63b,100b)
616         EX_TABLE(64b,100b)
617         EX_TABLE(65b,100b)
618         EX_TABLE(66b,100b)
619         EX_TABLE(67b,100b)
620         EX_TABLE(68b,100b)
621         EX_TABLE(69b,100b)
622         EX_TABLE(70b,100b)
623         EX_TABLE(71b,100b)
624         EX_TABLE(72b,100b)
625         EX_TABLE(73b,100b)
626         EX_TABLE(74b,100b)
627         EX_TABLE(75b,100b)
628         EX_TABLE(76b,100b)
629         EX_TABLE(77b,100b)
630         EX_TABLE(78b,100b)
631         EX_TABLE(79b,100b)
632         EX_TABLE(80b,100b)
633         EX_TABLE(81b,100b)
634         EX_TABLE(82b,100b)
635         EX_TABLE(83b,100b)
636         EX_TABLE(84b,100b)
637         EX_TABLE(85b,100b)
638         EX_TABLE(86b,100b)
639         EX_TABLE(87b,100b)
640         EX_TABLE(88b,100b)
641         EX_TABLE(89b,100b)
642         EX_TABLE(90b,100b)
643         EX_TABLE(91b,100b)
644
645 EXPORT_SYMBOL(__copy_tofrom_user)