arch/x86/lib/copy_user_64.S

   1 /* SPDX-License-Identifier: GPL-2.0-only */
   2 /*
   3  * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
   4  * Copyright 2002 Andi Kleen, SuSE Labs.
   5  *
   6  * Functions to copy from and to user space.
   7  */
   8
   9 #include <linux/linkage.h>
  10 #include <asm/current.h>
  11 #include <asm/asm-offsets.h>
  12 #include <asm/thread_info.h>
  13 #include <asm/cpufeatures.h>
  14 #include <asm/alternative.h>
  15 #include <asm/asm.h>
  16 #include <asm/smap.h>
  17 #include <asm/export.h>
  18 #include <asm/trapnr.h>
  19
  20 .macro ALIGN_DESTINATION
  21         /* check for bad alignment of destination */
  22         movl %edi,%ecx
  23         andl $7,%ecx
  24         jz 102f                         /* already aligned */
  25         subl $8,%ecx
  26         negl %ecx
  27         subl %ecx,%edx
  28 100:    movb (%rsi),%al
  29 101:    movb %al,(%rdi)
  30         incq %rsi
  31         incq %rdi
  32         decl %ecx
  33         jnz 100b
  34 102:
  35
  36         _ASM_EXTABLE_CPY(100b, .Lcopy_user_handle_align)
  37         _ASM_EXTABLE_CPY(101b, .Lcopy_user_handle_align)
  38 .endm
  39
  40 /*
  41  * copy_user_generic_unrolled - memory copy with exception handling.
  42  * This version is for CPUs like P4 that don't have efficient micro
  43  * code for rep movsq
  44  *
  45  * Input:
  46  * rdi destination
  47  * rsi source
  48  * rdx count
  49  *
  50  * Output:
  51  * eax uncopied bytes or 0 if successful.
  52  */
  53 SYM_FUNC_START(copy_user_generic_unrolled)
  54         ASM_STAC
  55         cmpl $8,%edx
  56         jb 20f          /* less then 8 bytes, go to byte copy loop */
  57         ALIGN_DESTINATION
  58         movl %edx,%ecx
  59         andl $63,%edx
  60         shrl $6,%ecx
  61         jz .L_copy_short_string
  62 1:      movq (%rsi),%r8
  63 2:      movq 1*8(%rsi),%r9
  64 3:      movq 2*8(%rsi),%r10
  65 4:      movq 3*8(%rsi),%r11
  66 5:      movq %r8,(%rdi)
  67 6:      movq %r9,1*8(%rdi)
  68 7:      movq %r10,2*8(%rdi)
  69 8:      movq %r11,3*8(%rdi)
  70 9:      movq 4*8(%rsi),%r8
  71 10:     movq 5*8(%rsi),%r9
  72 11:     movq 6*8(%rsi),%r10
  73 12:     movq 7*8(%rsi),%r11
  74 13:     movq %r8,4*8(%rdi)
  75 14:     movq %r9,5*8(%rdi)
  76 15:     movq %r10,6*8(%rdi)
  77 16:     movq %r11,7*8(%rdi)
  78         leaq 64(%rsi),%rsi
  79         leaq 64(%rdi),%rdi
  80         decl %ecx
  81         jnz 1b
  82 .L_copy_short_string:
  83         movl %edx,%ecx
  84         andl $7,%edx
  85         shrl $3,%ecx
  86         jz 20f
  87 18:     movq (%rsi),%r8
  88 19:     movq %r8,(%rdi)
  89         leaq 8(%rsi),%rsi
  90         leaq 8(%rdi),%rdi
  91         decl %ecx
  92         jnz 18b
  93 20:     andl %edx,%edx
  94         jz 23f
  95         movl %edx,%ecx
  96 21:     movb (%rsi),%al
  97 22:     movb %al,(%rdi)
  98         incq %rsi
  99         incq %rdi
 100         decl %ecx
 101         jnz 21b
 102 23:     xor %eax,%eax
 103         ASM_CLAC
 104         RET
 105
 106 30:     shll $6,%ecx
 107         addl %ecx,%edx
 108         jmp 60f
 109 40:     leal (%rdx,%rcx,8),%edx
 110         jmp 60f
 111 50:     movl %ecx,%edx
 112 60:     jmp .Lcopy_user_handle_tail /* ecx is zerorest also */
 113
 114         _ASM_EXTABLE_CPY(1b, 30b)
 115         _ASM_EXTABLE_CPY(2b, 30b)
 116         _ASM_EXTABLE_CPY(3b, 30b)
 117         _ASM_EXTABLE_CPY(4b, 30b)
 118         _ASM_EXTABLE_CPY(5b, 30b)
 119         _ASM_EXTABLE_CPY(6b, 30b)
 120         _ASM_EXTABLE_CPY(7b, 30b)
 121         _ASM_EXTABLE_CPY(8b, 30b)
 122         _ASM_EXTABLE_CPY(9b, 30b)
 123         _ASM_EXTABLE_CPY(10b, 30b)
 124         _ASM_EXTABLE_CPY(11b, 30b)
 125         _ASM_EXTABLE_CPY(12b, 30b)
 126         _ASM_EXTABLE_CPY(13b, 30b)
 127         _ASM_EXTABLE_CPY(14b, 30b)
 128         _ASM_EXTABLE_CPY(15b, 30b)
 129         _ASM_EXTABLE_CPY(16b, 30b)
 130         _ASM_EXTABLE_CPY(18b, 40b)
 131         _ASM_EXTABLE_CPY(19b, 40b)
 132         _ASM_EXTABLE_CPY(21b, 50b)
 133         _ASM_EXTABLE_CPY(22b, 50b)
 134 SYM_FUNC_END(copy_user_generic_unrolled)
 135 EXPORT_SYMBOL(copy_user_generic_unrolled)
 136
 137 /* Some CPUs run faster using the string copy instructions.
 138  * This is also a lot simpler. Use them when possible.
 139  *
 140  * Only 4GB of copy is supported. This shouldn't be a problem
 141  * because the kernel normally only writes from/to page sized chunks
 142  * even if user space passed a longer buffer.
 143  * And more would be dangerous because both Intel and AMD have
 144  * errata with rep movsq > 4GB. If someone feels the need to fix
 145  * this please consider this.
 146  *
 147  * Input:
 148  * rdi destination
 149  * rsi source
 150  * rdx count
 151  *
 152  * Output:
 153  * eax uncopied bytes or 0 if successful.
 154  */
 155 SYM_FUNC_START(copy_user_generic_string)
 156         ASM_STAC
 157         cmpl $8,%edx
 158         jb 2f           /* less than 8 bytes, go to byte copy loop */
 159         ALIGN_DESTINATION
 160         movl %edx,%ecx
 161         shrl $3,%ecx
 162         andl $7,%edx
 163 1:      rep movsq
 164 2:      movl %edx,%ecx
 165 3:      rep movsb
 166         xorl %eax,%eax
 167         ASM_CLAC
 168         RET
 169
 170 11:     leal (%rdx,%rcx,8),%ecx
 171 12:     movl %ecx,%edx          /* ecx is zerorest also */
 172         jmp .Lcopy_user_handle_tail
 173
 174         _ASM_EXTABLE_CPY(1b, 11b)
 175         _ASM_EXTABLE_CPY(3b, 12b)
 176 SYM_FUNC_END(copy_user_generic_string)
 177 EXPORT_SYMBOL(copy_user_generic_string)
 178
 179 /*
 180  * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
 181  * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
 182  *
 183  * Input:
 184  * rdi destination
 185  * rsi source
 186  * rdx count
 187  *
 188  * Output:
 189  * eax uncopied bytes or 0 if successful.
 190  */
 191 SYM_FUNC_START(copy_user_enhanced_fast_string)
 192         ASM_STAC
 193         cmpl $64,%edx
 194         jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */
 195         movl %edx,%ecx
 196 1:      rep movsb
 197         xorl %eax,%eax
 198         ASM_CLAC
 199         RET
 200
 201 12:     movl %ecx,%edx          /* ecx is zerorest also */
 202         jmp .Lcopy_user_handle_tail
 203
 204         _ASM_EXTABLE_CPY(1b, 12b)
 205 SYM_FUNC_END(copy_user_enhanced_fast_string)
 206 EXPORT_SYMBOL(copy_user_enhanced_fast_string)
 207
 208 /*
 209  * Try to copy last bytes and clear the rest if needed.
 210  * Since protection fault in copy_from/to_user is not a normal situation,
 211  * it is not necessary to optimize tail handling.
 212  * Don't try to copy the tail if machine check happened
 213  *
 214  * Input:
 215  * rdi destination
 216  * rsi source
 217  * rdx count
 218  *
 219  * Output:
 220  * eax uncopied bytes or 0 if successful.
 221  */
 222 SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
 223         movl %edx,%ecx
 224 1:      rep movsb
 225 2:      mov %ecx,%eax
 226         ASM_CLAC
 227         RET
 228
 229         _ASM_EXTABLE_CPY(1b, 2b)
 230
 231 .Lcopy_user_handle_align:
 232         addl %ecx,%edx                  /* ecx is zerorest also */
 233         jmp .Lcopy_user_handle_tail
 234
 235 SYM_CODE_END(.Lcopy_user_handle_tail)
 236
 237 /*
 238  * copy_user_nocache - Uncached memory copy with exception handling
 239  * This will force destination out of cache for more performance.
 240  *
 241  * Note: Cached memory copy is used when destination or size is not
 242  * naturally aligned. That is:
 243  *  - Require 8-byte alignment when size is 8 bytes or larger.
 244  *  - Require 4-byte alignment when size is 4 bytes.
 245  */
 246 SYM_FUNC_START(__copy_user_nocache)
 247         ASM_STAC
 248
 249         /* If size is less than 8 bytes, go to 4-byte copy */
 250         cmpl $8,%edx
 251         jb .L_4b_nocache_copy_entry
 252
 253         /* If destination is not 8-byte aligned, "cache" copy to align it */
 254         ALIGN_DESTINATION
 255
 256         /* Set 4x8-byte copy count and remainder */
 257         movl %edx,%ecx
 258         andl $63,%edx
 259         shrl $6,%ecx
 260         jz .L_8b_nocache_copy_entry     /* jump if count is 0 */
 261
 262         /* Perform 4x8-byte nocache loop-copy */
 263 .L_4x8b_nocache_copy_loop:
 264 1:      movq (%rsi),%r8
 265 2:      movq 1*8(%rsi),%r9
 266 3:      movq 2*8(%rsi),%r10
 267 4:      movq 3*8(%rsi),%r11
 268 5:      movnti %r8,(%rdi)
 269 6:      movnti %r9,1*8(%rdi)
 270 7:      movnti %r10,2*8(%rdi)
 271 8:      movnti %r11,3*8(%rdi)
 272 9:      movq 4*8(%rsi),%r8
 273 10:     movq 5*8(%rsi),%r9
 274 11:     movq 6*8(%rsi),%r10
 275 12:     movq 7*8(%rsi),%r11
 276 13:     movnti %r8,4*8(%rdi)
 277 14:     movnti %r9,5*8(%rdi)
 278 15:     movnti %r10,6*8(%rdi)
 279 16:     movnti %r11,7*8(%rdi)
 280         leaq 64(%rsi),%rsi
 281         leaq 64(%rdi),%rdi
 282         decl %ecx
 283         jnz .L_4x8b_nocache_copy_loop
 284
 285         /* Set 8-byte copy count and remainder */
 286 .L_8b_nocache_copy_entry:
 287         movl %edx,%ecx
 288         andl $7,%edx
 289         shrl $3,%ecx
 290         jz .L_4b_nocache_copy_entry     /* jump if count is 0 */
 291
 292         /* Perform 8-byte nocache loop-copy */
 293 .L_8b_nocache_copy_loop:
 294 20:     movq (%rsi),%r8
 295 21:     movnti %r8,(%rdi)
 296         leaq 8(%rsi),%rsi
 297         leaq 8(%rdi),%rdi
 298         decl %ecx
 299         jnz .L_8b_nocache_copy_loop
 300
 301         /* If no byte left, we're done */
 302 .L_4b_nocache_copy_entry:
 303         andl %edx,%edx
 304         jz .L_finish_copy
 305
 306         /* If destination is not 4-byte aligned, go to byte copy: */
 307         movl %edi,%ecx
 308         andl $3,%ecx
 309         jnz .L_1b_cache_copy_entry
 310
 311         /* Set 4-byte copy count (1 or 0) and remainder */
 312         movl %edx,%ecx
 313         andl $3,%edx
 314         shrl $2,%ecx
 315         jz .L_1b_cache_copy_entry       /* jump if count is 0 */
 316
 317         /* Perform 4-byte nocache copy: */
 318 30:     movl (%rsi),%r8d
 319 31:     movnti %r8d,(%rdi)
 320         leaq 4(%rsi),%rsi
 321         leaq 4(%rdi),%rdi
 322
 323         /* If no bytes left, we're done: */
 324         andl %edx,%edx
 325         jz .L_finish_copy
 326
 327         /* Perform byte "cache" loop-copy for the remainder */
 328 .L_1b_cache_copy_entry:
 329         movl %edx,%ecx
 330 .L_1b_cache_copy_loop:
 331 40:     movb (%rsi),%al
 332 41:     movb %al,(%rdi)
 333         incq %rsi
 334         incq %rdi
 335         decl %ecx
 336         jnz .L_1b_cache_copy_loop
 337
 338         /* Finished copying; fence the prior stores */
 339 .L_finish_copy:
 340         xorl %eax,%eax
 341         ASM_CLAC
 342         sfence
 343         RET
 344
 345 .L_fixup_4x8b_copy:
 346         shll $6,%ecx
 347         addl %ecx,%edx
 348         jmp .L_fixup_handle_tail
 349 .L_fixup_8b_copy:
 350         lea (%rdx,%rcx,8),%rdx
 351         jmp .L_fixup_handle_tail
 352 .L_fixup_4b_copy:
 353         lea (%rdx,%rcx,4),%rdx
 354         jmp .L_fixup_handle_tail
 355 .L_fixup_1b_copy:
 356         movl %ecx,%edx
 357 .L_fixup_handle_tail:
 358         sfence
 359         jmp .Lcopy_user_handle_tail
 360
 361         _ASM_EXTABLE_CPY(1b, .L_fixup_4x8b_copy)
 362         _ASM_EXTABLE_CPY(2b, .L_fixup_4x8b_copy)
 363         _ASM_EXTABLE_CPY(3b, .L_fixup_4x8b_copy)
 364         _ASM_EXTABLE_CPY(4b, .L_fixup_4x8b_copy)
 365         _ASM_EXTABLE_CPY(5b, .L_fixup_4x8b_copy)
 366         _ASM_EXTABLE_CPY(6b, .L_fixup_4x8b_copy)
 367         _ASM_EXTABLE_CPY(7b, .L_fixup_4x8b_copy)
 368         _ASM_EXTABLE_CPY(8b, .L_fixup_4x8b_copy)
 369         _ASM_EXTABLE_CPY(9b, .L_fixup_4x8b_copy)
 370         _ASM_EXTABLE_CPY(10b, .L_fixup_4x8b_copy)
 371         _ASM_EXTABLE_CPY(11b, .L_fixup_4x8b_copy)
 372         _ASM_EXTABLE_CPY(12b, .L_fixup_4x8b_copy)
 373         _ASM_EXTABLE_CPY(13b, .L_fixup_4x8b_copy)
 374         _ASM_EXTABLE_CPY(14b, .L_fixup_4x8b_copy)
 375         _ASM_EXTABLE_CPY(15b, .L_fixup_4x8b_copy)
 376         _ASM_EXTABLE_CPY(16b, .L_fixup_4x8b_copy)
 377         _ASM_EXTABLE_CPY(20b, .L_fixup_8b_copy)
 378         _ASM_EXTABLE_CPY(21b, .L_fixup_8b_copy)
 379         _ASM_EXTABLE_CPY(30b, .L_fixup_4b_copy)
 380         _ASM_EXTABLE_CPY(31b, .L_fixup_4b_copy)
 381         _ASM_EXTABLE_CPY(40b, .L_fixup_1b_copy)
 382         _ASM_EXTABLE_CPY(41b, .L_fixup_1b_copy)
 383 SYM_FUNC_END(__copy_user_nocache)
 384 EXPORT_SYMBOL(__copy_user_nocache)