1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 * User Space Access Routines
5 * Copyright (C) 2000-2002 Hewlett-Packard (John Marvin)
6 * Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org>
7 * Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr>
8 * Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org>
9 * Copyright (C) 2017 Helge Deller <deller@gmx.de>
10 * Copyright (C) 2017 John David Anglin <dave.anglin@bell.net>
14 * These routines still have plenty of room for optimization
15 * (word & doubleword load/store, dual issue, store hints, etc.).
19 * The following routines assume that space register 3 (sr3) contains
20 * the space id associated with the current users address space.
26 #include <asm/assembly.h>
27 #include <asm/errno.h>
28 #include <linux/linkage.h>
31 * get_sr gets the appropriate space value into
32 * sr1 for kernel/user space access, depending
33 * on the flag stored in the task structure.
38 ldw TI_SEGMENT(%r1),%r22
46 * unsigned long lclear_user(void *to, unsigned long n)
48 * Returns 0 for success.
49 * otherwise, returns number of bytes not transferred.
52 ENTRY_CFI(lclear_user)
53 comib,=,n 0,%r25,$lclu_done
56 addib,<> -1,%r25,$lclu_loop
57 1: stbs,ma %r0,1(%sr1,%r26)
66 ASM_EXCEPTIONTABLE_ENTRY(1b,2b)
67 ENDPROC_CFI(lclear_user)
71 * unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
74 * - sr1 already contains space of source region
75 * - sr2 already contains space of destination region
78 * - number of bytes that could not be copied.
79 * On success, this will be zero.
81 * This code is based on a C-implementation of a copy routine written by
82 * Randolph Chung, which in turn was derived from the glibc.
84 * Several strategies are tried to try to get the best performance for various
85 * conditions. In the optimal case, we copy by loops that copy 32- or 16-bytes
86 * at a time using general registers. Unaligned copies are handled either by
87 * aligning the destination and then using shift-and-write method, or in a few
88 * cases by falling back to a byte-at-a-time copy.
90 * Testing with various alignments and buffer sizes shows that this code is
91 * often >10x faster than a simple byte-at-a-time copy, even for strangely
92 * aligned operands. It is interesting to note that the glibc version of memcpy
93 * (written in C) is actually quite fast already. This routine is able to beat
94 * it by 30-40% for aligned copies because of the loop unrolling, but in some
95 * cases the glibc version is still slightly faster. This lends more
96 * credibility that gcc can generate very good code as long as we are careful.
98 * Possible optimizations:
99 * - add cache prefetching
100 * - try not to use the post-increment address modifiers; they may create
101 * additional interlocks. Assumption is that those were only efficient on old
102 * machines (pre PA8000 processors)
127 /* Last destination address */
130 /* short copy with less than 16 bytes? */
131 cmpib,COND(>>=),n 15,len,.Lbyte_loop
133 /* same alignment? */
136 cmpib,<>,n 0,t1,.Lunaligned_copy
139 /* only do 64-bit copies if we can get aligned. */
141 cmpib,<>,n 0,t1,.Lalign_loop32
143 /* loop until we are 64-bit aligned */
146 cmpib,=,n 0,t1,.Lcopy_loop_16_start
147 20: ldb,ma 1(srcspc,src),t1
148 21: stb,ma t1,1(dstspc,dst)
152 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
153 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
155 .Lcopy_loop_16_start:
158 cmpb,COND(>>=),n t0,len,.Lword_loop
160 10: ldd 0(srcspc,src),t1
161 11: ldd 8(srcspc,src),t2
163 12: std,ma t1,8(dstspc,dst)
164 13: std,ma t2,8(dstspc,dst)
165 14: ldd 0(srcspc,src),t1
166 15: ldd 8(srcspc,src),t2
168 16: std,ma t1,8(dstspc,dst)
169 17: std,ma t2,8(dstspc,dst)
171 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
172 ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy16_fault)
173 ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
174 ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
175 ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
176 ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy16_fault)
177 ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
178 ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
184 cmpib,COND(>>=),n 3,len,.Lbyte_loop
185 20: ldw,ma 4(srcspc,src),t1
186 21: stw,ma t1,4(dstspc,dst)
190 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
191 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
193 #endif /* CONFIG_64BIT */
195 /* loop until we are 32-bit aligned */
198 cmpib,=,n 0,t1,.Lcopy_loop_8
199 20: ldb,ma 1(srcspc,src),t1
200 21: stb,ma t1,1(dstspc,dst)
204 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
205 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
209 cmpib,COND(>>=),n 15,len,.Lbyte_loop
211 10: ldw 0(srcspc,src),t1
212 11: ldw 4(srcspc,src),t2
213 12: stw,ma t1,4(dstspc,dst)
214 13: stw,ma t2,4(dstspc,dst)
215 14: ldw 8(srcspc,src),t1
216 15: ldw 12(srcspc,src),t2
218 16: stw,ma t1,4(dstspc,dst)
219 17: stw,ma t2,4(dstspc,dst)
221 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
222 ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy8_fault)
223 ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
224 ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
225 ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
226 ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy8_fault)
227 ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
228 ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
234 cmpclr,COND(<>) len,%r0,%r0
236 20: ldb 0(srcspc,src),t1
238 21: stb,ma t1,1(dstspc,dst)
242 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
243 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
250 /* src and dst are not aligned the same way. */
251 /* need to go the hard way */
253 /* align until dst is 32bit-word-aligned */
255 cmpib,=,n 0,t1,.Lcopy_dstaligned
256 20: ldb 0(srcspc,src),t1
258 21: stb,ma t1,1(dstspc,dst)
262 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
263 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
267 /* store src, dst and len in safe place */
272 /* len now needs give number of words to copy */
276 * Copy from a not-aligned src to an aligned dst using shifts.
277 * Handles 4 words per loop.
285 /* Make src aligned by rounding it down. */
293 cmpb,COND(=) %r0,len,.Lcda_finish
296 1: ldw,ma 4(srcspc,src), a3
297 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
298 1: ldw,ma 4(srcspc,src), a0
299 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
302 1: ldw,ma 4(srcspc,src), a2
303 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
304 1: ldw,ma 4(srcspc,src), a3
305 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
307 cmpb,COND(=),n %r0,len,.Ldo0
309 1: ldw,ma 4(srcspc,src), a0
310 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
311 shrpw a2, a3, %sar, t0
312 1: stw,ma t0, 4(dstspc,dst)
313 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
315 1: ldw,ma 4(srcspc,src), a1
316 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
317 shrpw a3, a0, %sar, t0
318 1: stw,ma t0, 4(dstspc,dst)
319 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
321 1: ldw,ma 4(srcspc,src), a2
322 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
323 shrpw a0, a1, %sar, t0
324 1: stw,ma t0, 4(dstspc,dst)
325 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
327 1: ldw,ma 4(srcspc,src), a3
328 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
329 shrpw a1, a2, %sar, t0
330 1: stw,ma t0, 4(dstspc,dst)
331 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
333 cmpb,COND(<>) %r0,len,.Ldo4
336 shrpw a2, a3, %sar, t0
337 1: stw,ma t0, 4(dstspc,dst)
338 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
342 /* calculate new src, dst and len and jump to byte-copy loop */
349 1: ldw,ma 4(srcspc,src), a0
350 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
351 1: ldw,ma 4(srcspc,src), a1
352 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
356 1: ldw,ma 4(srcspc,src), a1
357 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
358 1: ldw,ma 4(srcspc,src), a2
359 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
364 /* fault exception fixup handlers: */
368 10: std,ma t1,8(dstspc,dst)
369 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
374 10: stw,ma t1,4(dstspc,dst)
375 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
376 ENDPROC_CFI(pa_memcpy)