Linux 6.9-rc1
[linux-2.6-microblaze.git] / arch / powerpc / lib / copypage_power7.S
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  *
4  * Copyright (C) IBM Corporation, 2012
5  *
6  * Author: Anton Blanchard <anton@au.ibm.com>
7  */
8 #include <asm/page.h>
9 #include <asm/ppc_asm.h>
10
11 _GLOBAL(copypage_power7)
12         /*
13          * We prefetch both the source and destination using enhanced touch
14          * instructions. We use a stream ID of 0 for the load side and
15          * 1 for the store side. Since source and destination are page
16          * aligned we don't need to clear the bottom 7 bits of either
17          * address.
18          */
19         ori     r9,r3,1         /* stream=1 => to */
20
21 #ifdef CONFIG_PPC_64K_PAGES
22         lis     r7,0x0E01       /* depth=7
23                                  * units/cachelines=512 */
24 #else
25         lis     r7,0x0E00       /* depth=7 */
26         ori     r7,r7,0x1000    /* units/cachelines=32 */
27 #endif
28         ori     r10,r7,1        /* stream=1 */
29
30         DCBT_SETUP_STREAMS(r4, r7, r9, r10, r8)
31
32 #ifdef CONFIG_ALTIVEC
33         mflr    r0
34         std     r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
35         std     r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
36         std     r0,16(r1)
37         stdu    r1,-STACKFRAMESIZE(r1)
38         bl      CFUNC(enter_vmx_ops)
39         cmpwi   r3,0
40         ld      r0,STACKFRAMESIZE+16(r1)
41         ld      r3,STK_REG(R31)(r1)
42         ld      r4,STK_REG(R30)(r1)
43         mtlr    r0
44
45         li      r0,(PAGE_SIZE/128)
46         mtctr   r0
47
48         beq     .Lnonvmx_copy
49
50         addi    r1,r1,STACKFRAMESIZE
51
52         li      r6,16
53         li      r7,32
54         li      r8,48
55         li      r9,64
56         li      r10,80
57         li      r11,96
58         li      r12,112
59
60         .align  5
61 1:      lvx     v7,0,r4
62         lvx     v6,r4,r6
63         lvx     v5,r4,r7
64         lvx     v4,r4,r8
65         lvx     v3,r4,r9
66         lvx     v2,r4,r10
67         lvx     v1,r4,r11
68         lvx     v0,r4,r12
69         addi    r4,r4,128
70         stvx    v7,0,r3
71         stvx    v6,r3,r6
72         stvx    v5,r3,r7
73         stvx    v4,r3,r8
74         stvx    v3,r3,r9
75         stvx    v2,r3,r10
76         stvx    v1,r3,r11
77         stvx    v0,r3,r12
78         addi    r3,r3,128
79         bdnz    1b
80
81         b       CFUNC(exit_vmx_ops)             /* tail call optimise */
82
83 #else
84         li      r0,(PAGE_SIZE/128)
85         mtctr   r0
86
87         stdu    r1,-STACKFRAMESIZE(r1)
88 #endif
89
90 .Lnonvmx_copy:
91         std     r14,STK_REG(R14)(r1)
92         std     r15,STK_REG(R15)(r1)
93         std     r16,STK_REG(R16)(r1)
94         std     r17,STK_REG(R17)(r1)
95         std     r18,STK_REG(R18)(r1)
96         std     r19,STK_REG(R19)(r1)
97         std     r20,STK_REG(R20)(r1)
98
99 1:      ld      r0,0(r4)
100         ld      r5,8(r4)
101         ld      r6,16(r4)
102         ld      r7,24(r4)
103         ld      r8,32(r4)
104         ld      r9,40(r4)
105         ld      r10,48(r4)
106         ld      r11,56(r4)
107         ld      r12,64(r4)
108         ld      r14,72(r4)
109         ld      r15,80(r4)
110         ld      r16,88(r4)
111         ld      r17,96(r4)
112         ld      r18,104(r4)
113         ld      r19,112(r4)
114         ld      r20,120(r4)
115         addi    r4,r4,128
116         std     r0,0(r3)
117         std     r5,8(r3)
118         std     r6,16(r3)
119         std     r7,24(r3)
120         std     r8,32(r3)
121         std     r9,40(r3)
122         std     r10,48(r3)
123         std     r11,56(r3)
124         std     r12,64(r3)
125         std     r14,72(r3)
126         std     r15,80(r3)
127         std     r16,88(r3)
128         std     r17,96(r3)
129         std     r18,104(r3)
130         std     r19,112(r3)
131         std     r20,120(r3)
132         addi    r3,r3,128
133         bdnz    1b
134
135         ld      r14,STK_REG(R14)(r1)
136         ld      r15,STK_REG(R15)(r1)
137         ld      r16,STK_REG(R16)(r1)
138         ld      r17,STK_REG(R17)(r1)
139         ld      r18,STK_REG(R18)(r1)
140         ld      r19,STK_REG(R19)(r1)
141         ld      r20,STK_REG(R20)(r1)
142         addi    r1,r1,STACKFRAMESIZE
143         blr