perf stat aggregation: Add separate node member
[linux-2.6-microblaze.git] / arch / hexagon / mm / copy_user_template.S
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
4  */
5
6 /* Numerology:
7  * WXYZ
8  * W: width in bytes
9  * X: Load=0, Store=1
10  * Y: Location 0=preamble,8=loop,9=epilog
11  * Z: Location=0,handler=9
12  */
13         .text
14         .global FUNCNAME
15         .type FUNCNAME, @function
16         .p2align 5
17 FUNCNAME:
18         {
19                 p0 = cmp.gtu(bytes,#0)
20                 if (!p0.new) jump:nt .Ldone
21                 r3 = or(dst,src)
22                 r4 = xor(dst,src)
23         }
24         {
25                 p1 = cmp.gtu(bytes,#15)
26                 p0 = bitsclr(r3,#7)
27                 if (!p0.new) jump:nt .Loop_not_aligned_8
28                 src_dst_sav = combine(src,dst)
29         }
30
31         {
32                 loopcount = lsr(bytes,#3)
33                 if (!p1) jump .Lsmall
34         }
35         p3=sp1loop0(.Loop8,loopcount)
36 .Loop8:
37 8080:
38 8180:
39         {
40                 if (p3) memd(dst++#8) = d_dbuf
41                 d_dbuf = memd(src++#8)
42         }:endloop0
43 8190:
44         {
45                 memd(dst++#8) = d_dbuf
46                 bytes -= asl(loopcount,#3)
47                 jump .Lsmall
48         }
49
50 .Loop_not_aligned_8:
51         {
52                 p0 = bitsclr(r4,#7)
53                 if (p0.new) jump:nt .Lalign
54         }
55         {
56                 p0 = bitsclr(r3,#3)
57                 if (!p0.new) jump:nt .Loop_not_aligned_4
58                 p1 = cmp.gtu(bytes,#7)
59         }
60
61         {
62                 if (!p1) jump .Lsmall
63                 loopcount = lsr(bytes,#2)
64         }
65         p3=sp1loop0(.Loop4,loopcount)
66 .Loop4:
67 4080:
68 4180:
69         {
70                 if (p3) memw(dst++#4) = w_dbuf
71                 w_dbuf = memw(src++#4)
72         }:endloop0
73 4190:
74         {
75                 memw(dst++#4) = w_dbuf
76                 bytes -= asl(loopcount,#2)
77                 jump .Lsmall
78         }
79
80 .Loop_not_aligned_4:
81         {
82                 p0 = bitsclr(r3,#1)
83                 if (!p0.new) jump:nt .Loop_not_aligned
84                 p1 = cmp.gtu(bytes,#3)
85         }
86
87         {
88                 if (!p1) jump .Lsmall
89                 loopcount = lsr(bytes,#1)
90         }
91         p3=sp1loop0(.Loop2,loopcount)
92 .Loop2:
93 2080:
94 2180:
95         {
96                 if (p3) memh(dst++#2) = w_dbuf
97                 w_dbuf = memuh(src++#2)
98         }:endloop0
99 2190:
100         {
101                 memh(dst++#2) = w_dbuf
102                 bytes -= asl(loopcount,#1)
103                 jump .Lsmall
104         }
105
106 .Loop_not_aligned: /* Works for as small as one byte */
107         p3=sp1loop0(.Loop1,bytes)
108 .Loop1:
109 1080:
110 1180:
111         {
112                 if (p3) memb(dst++#1) = w_dbuf
113                 w_dbuf = memub(src++#1)
114         }:endloop0
115         /* Done */
116 1190:
117         {
118                 memb(dst) = w_dbuf
119                 jumpr r31
120                 r0 = #0
121         }
122
123 .Lsmall:
124         {
125                 p0 = cmp.gtu(bytes,#0)
126                 if (p0.new) jump:nt .Loop_not_aligned
127         }
128 .Ldone:
129         {
130                 r0 = #0
131                 jumpr r31
132         }
133         .falign
134 .Lalign:
135 1000:
136         {
137                 if (p0.new) w_dbuf = memub(src)
138                 p0 = tstbit(src,#0)
139                 if (!p1) jump .Lsmall
140         }
141 1100:
142         {
143                 if (p0) memb(dst++#1) = w_dbuf
144                 if (p0) bytes = add(bytes,#-1)
145                 if (p0) src = add(src,#1)
146         }
147 2000:
148         {
149                 if (p0.new) w_dbuf = memuh(src)
150                 p0 = tstbit(src,#1)
151                 if (!p1) jump .Lsmall
152         }
153 2100:
154         {
155                 if (p0) memh(dst++#2) = w_dbuf
156                 if (p0) bytes = add(bytes,#-2)
157                 if (p0) src = add(src,#2)
158         }
159 4000:
160         {
161                 if (p0.new) w_dbuf = memw(src)
162                 p0 = tstbit(src,#2)
163                 if (!p1) jump .Lsmall
164         }
165 4100:
166         {
167                 if (p0) memw(dst++#4) = w_dbuf
168                 if (p0) bytes = add(bytes,#-4)
169                 if (p0) src = add(src,#4)
170                 jump FUNCNAME
171         }
172         .size FUNCNAME,.-FUNCNAME