1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /* Copyright 2002 Andi Kleen */
4 #include <linux/linkage.h>
6 #include <asm/cpufeatures.h>
7 #include <asm/mcsafe_test.h>
8 #include <asm/alternative-asm.h>
9 #include <asm/export.h>
11 .pushsection .noinstr.text, "ax"
14 * We build a jump to memcpy_orig by default which gets NOPped out on
15 * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
16 * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
17 * to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
23 * memcpy - Copy a memory block.
31 * rax original destination
33 SYM_FUNC_START_ALIAS(__memcpy)
34 SYM_FUNC_START_LOCAL(memcpy)
35 ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
36 "jmp memcpy_erms", X86_FEATURE_ERMS
47 SYM_FUNC_END_ALIAS(__memcpy)
49 EXPORT_SYMBOL(__memcpy)
52 * memcpy_erms() - enhanced fast string memcpy. This is faster and
53 * simpler than memcpy. Use memcpy_erms when possible.
55 SYM_FUNC_START_LOCAL(memcpy_erms)
60 SYM_FUNC_END(memcpy_erms)
62 SYM_FUNC_START_LOCAL(memcpy_orig)
69 * We check whether memory false dependence could occur,
70 * then jump to corresponding copy mode.
79 * Move in blocks of 4x8 bytes:
92 jae .Lcopy_forward_loop
98 * Calculate copy position to tail.
104 * At most 3 ALU operations in one cycle,
105 * so append NOPS in the same 16 bytes trunk.
108 .Lcopy_backward_loop:
112 movq -3*8(%rsi), %r10
113 movq -4*8(%rsi), %r11
114 leaq -4*8(%rsi), %rsi
117 movq %r10, -3*8(%rdi)
118 movq %r11, -4*8(%rdi)
119 leaq -4*8(%rdi), %rdi
120 jae .Lcopy_backward_loop
123 * Calculate copy position to head.
133 * Move data from 16 bytes to 31 bytes.
137 movq -2*8(%rsi, %rdx), %r10
138 movq -1*8(%rsi, %rdx), %r11
141 movq %r10, -2*8(%rdi, %rdx)
142 movq %r11, -1*8(%rdi, %rdx)
149 * Move data from 8 bytes to 15 bytes.
152 movq -1*8(%rsi, %rdx), %r9
154 movq %r9, -1*8(%rdi, %rdx)
162 * Move data from 4 bytes to 7 bytes.
165 movl -4(%rsi, %rdx), %r8d
167 movl %r8d, -4(%rdi, %rdx)
174 * Move data from 1 bytes to 3 bytes.
179 movzbq (%rsi, %rdx), %r9
181 movb %r9b, (%rdi, %rdx)
187 SYM_FUNC_END(memcpy_orig)
196 * __memcpy_mcsafe - memory copy with machine check exception handling
197 * Note that we only catch machine checks when reading the source addresses.
198 * Writes to target are posted and don't generate machine checks.
200 SYM_FUNC_START(__memcpy_mcsafe)
202 /* Less than 8 bytes? Go to byte copy loop */
205 /* Check for bad alignment of source */
207 /* Already aligned */
210 /* Copy one byte at a time until source is 8-byte aligned */
216 .L_read_leading_bytes:
218 MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
219 MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
220 .L_write_leading_bytes:
225 jnz .L_read_leading_bytes
235 MCSAFE_TEST_SRC %rsi 8 .E_read_words
236 MCSAFE_TEST_DST %rdi 8 .E_write_words
244 /* Any trailing bytes? */
247 jz .L_done_memcpy_trap
249 /* Copy trailing bytes */
251 .L_read_trailing_bytes:
253 MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
254 MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
255 .L_write_trailing_bytes:
260 jnz .L_read_trailing_bytes
262 /* Copy successful. Return zero */
267 SYM_FUNC_END(__memcpy_mcsafe)
268 EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
270 .section .fixup, "ax"
272 * Return number of bytes not copied for any failure. Note that
273 * there is no "tail" handling since the source buffer is 8-byte
274 * aligned and poison is cacheline aligned.
285 * For write fault handling, given the destination is unaligned,
286 * we handle faults on multi-byte writes with a byte-by-byte
287 * copy up to the write-protected page.
293 jmp mcsafe_handle_tail
297 _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
298 _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
299 _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
300 _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
301 _ASM_EXTABLE(.L_write_words, .E_write_words)
302 _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)