arch/x86/lib/copy_user_64.S

   1 /* SPDX-License-Identifier: GPL-2.0-only */
   2 /*
   3  * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
   4  * Copyright 2002 Andi Kleen, SuSE Labs.
   5  *
   6  * Functions to copy from and to user space.
   7  */
   8
   9 #include <linux/linkage.h>
  10 #include <asm/current.h>
  11 #include <asm/asm-offsets.h>
  12 #include <asm/thread_info.h>
  13 #include <asm/cpufeatures.h>
  14 #include <asm/alternative-asm.h>
  15 #include <asm/asm.h>
  16 #include <asm/smap.h>
  17 #include <asm/export.h>
  18
  19 .macro ALIGN_DESTINATION
  20         /* check for bad alignment of destination */
  21         movl %edi,%ecx
  22         andl $7,%ecx
  23         jz 102f                         /* already aligned */
  24         subl $8,%ecx
  25         negl %ecx
  26         subl %ecx,%edx
  27 100:    movb (%rsi),%al
  28 101:    movb %al,(%rdi)
  29         incq %rsi
  30         incq %rdi
  31         decl %ecx
  32         jnz 100b
  33 102:
  34         .section .fixup,"ax"
  35 103:    addl %ecx,%edx                  /* ecx is zerorest also */
  36         jmp .Lcopy_user_handle_tail
  37         .previous
  38
  39         _ASM_EXTABLE_UA(100b, 103b)
  40         _ASM_EXTABLE_UA(101b, 103b)
  41         .endm
  42
  43 /*
  44  * copy_user_generic_unrolled - memory copy with exception handling.
  45  * This version is for CPUs like P4 that don't have efficient micro
  46  * code for rep movsq
  47  *
  48  * Input:
  49  * rdi destination
  50  * rsi source
  51  * rdx count
  52  *
  53  * Output:
  54  * eax uncopied bytes or 0 if successful.
  55  */
  56 SYM_FUNC_START(copy_user_generic_unrolled)
  57         ASM_STAC
  58         cmpl $8,%edx
  59         jb 20f          /* less then 8 bytes, go to byte copy loop */
  60         ALIGN_DESTINATION
  61         movl %edx,%ecx
  62         andl $63,%edx
  63         shrl $6,%ecx
  64         jz .L_copy_short_string
  65 1:      movq (%rsi),%r8
  66 2:      movq 1*8(%rsi),%r9
  67 3:      movq 2*8(%rsi),%r10
  68 4:      movq 3*8(%rsi),%r11
  69 5:      movq %r8,(%rdi)
  70 6:      movq %r9,1*8(%rdi)
  71 7:      movq %r10,2*8(%rdi)
  72 8:      movq %r11,3*8(%rdi)
  73 9:      movq 4*8(%rsi),%r8
  74 10:     movq 5*8(%rsi),%r9
  75 11:     movq 6*8(%rsi),%r10
  76 12:     movq 7*8(%rsi),%r11
  77 13:     movq %r8,4*8(%rdi)
  78 14:     movq %r9,5*8(%rdi)
  79 15:     movq %r10,6*8(%rdi)
  80 16:     movq %r11,7*8(%rdi)
  81         leaq 64(%rsi),%rsi
  82         leaq 64(%rdi),%rdi
  83         decl %ecx
  84         jnz 1b
  85 .L_copy_short_string:
  86         movl %edx,%ecx
  87         andl $7,%edx
  88         shrl $3,%ecx
  89         jz 20f
  90 18:     movq (%rsi),%r8
  91 19:     movq %r8,(%rdi)
  92         leaq 8(%rsi),%rsi
  93         leaq 8(%rdi),%rdi
  94         decl %ecx
  95         jnz 18b
  96 20:     andl %edx,%edx
  97         jz 23f
  98         movl %edx,%ecx
  99 21:     movb (%rsi),%al
 100 22:     movb %al,(%rdi)
 101         incq %rsi
 102         incq %rdi
 103         decl %ecx
 104         jnz 21b
 105 23:     xor %eax,%eax
 106         ASM_CLAC
 107         ret
 108
 109         .section .fixup,"ax"
 110 30:     shll $6,%ecx
 111         addl %ecx,%edx
 112         jmp 60f
 113 40:     leal (%rdx,%rcx,8),%edx
 114         jmp 60f
 115 50:     movl %ecx,%edx
 116 60:     jmp .Lcopy_user_handle_tail /* ecx is zerorest also */
 117         .previous
 118
 119         _ASM_EXTABLE_UA(1b, 30b)
 120         _ASM_EXTABLE_UA(2b, 30b)
 121         _ASM_EXTABLE_UA(3b, 30b)
 122         _ASM_EXTABLE_UA(4b, 30b)
 123         _ASM_EXTABLE_UA(5b, 30b)
 124         _ASM_EXTABLE_UA(6b, 30b)
 125         _ASM_EXTABLE_UA(7b, 30b)
 126         _ASM_EXTABLE_UA(8b, 30b)
 127         _ASM_EXTABLE_UA(9b, 30b)
 128         _ASM_EXTABLE_UA(10b, 30b)
 129         _ASM_EXTABLE_UA(11b, 30b)
 130         _ASM_EXTABLE_UA(12b, 30b)
 131         _ASM_EXTABLE_UA(13b, 30b)
 132         _ASM_EXTABLE_UA(14b, 30b)
 133         _ASM_EXTABLE_UA(15b, 30b)
 134         _ASM_EXTABLE_UA(16b, 30b)
 135         _ASM_EXTABLE_UA(18b, 40b)
 136         _ASM_EXTABLE_UA(19b, 40b)
 137         _ASM_EXTABLE_UA(21b, 50b)
 138         _ASM_EXTABLE_UA(22b, 50b)
 139 SYM_FUNC_END(copy_user_generic_unrolled)
 140 EXPORT_SYMBOL(copy_user_generic_unrolled)
 141
 142 /* Some CPUs run faster using the string copy instructions.
 143  * This is also a lot simpler. Use them when possible.
 144  *
 145  * Only 4GB of copy is supported. This shouldn't be a problem
 146  * because the kernel normally only writes from/to page sized chunks
 147  * even if user space passed a longer buffer.
 148  * And more would be dangerous because both Intel and AMD have
 149  * errata with rep movsq > 4GB. If someone feels the need to fix
 150  * this please consider this.
 151  *
 152  * Input:
 153  * rdi destination
 154  * rsi source
 155  * rdx count
 156  *
 157  * Output:
 158  * eax uncopied bytes or 0 if successful.
 159  */
 160 SYM_FUNC_START(copy_user_generic_string)
 161         ASM_STAC
 162         cmpl $8,%edx
 163         jb 2f           /* less than 8 bytes, go to byte copy loop */
 164         ALIGN_DESTINATION
 165         movl %edx,%ecx
 166         shrl $3,%ecx
 167         andl $7,%edx
 168 1:      rep
 169         movsq
 170 2:      movl %edx,%ecx
 171 3:      rep
 172         movsb
 173         xorl %eax,%eax
 174         ASM_CLAC
 175         ret
 176
 177         .section .fixup,"ax"
 178 11:     leal (%rdx,%rcx,8),%ecx
 179 12:     movl %ecx,%edx          /* ecx is zerorest also */
 180         jmp .Lcopy_user_handle_tail
 181         .previous
 182
 183         _ASM_EXTABLE_UA(1b, 11b)
 184         _ASM_EXTABLE_UA(3b, 12b)
 185 SYM_FUNC_END(copy_user_generic_string)
 186 EXPORT_SYMBOL(copy_user_generic_string)
 187
 188 /*
 189  * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
 190  * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
 191  *
 192  * Input:
 193  * rdi destination
 194  * rsi source
 195  * rdx count
 196  *
 197  * Output:
 198  * eax uncopied bytes or 0 if successful.
 199  */
 200 SYM_FUNC_START(copy_user_enhanced_fast_string)
 201         ASM_STAC
 202         cmpl $64,%edx
 203         jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */
 204         movl %edx,%ecx
 205 1:      rep
 206         movsb
 207         xorl %eax,%eax
 208         ASM_CLAC
 209         ret
 210
 211         .section .fixup,"ax"
 212 12:     movl %ecx,%edx          /* ecx is zerorest also */
 213         jmp .Lcopy_user_handle_tail
 214         .previous
 215
 216         _ASM_EXTABLE_UA(1b, 12b)
 217 SYM_FUNC_END(copy_user_enhanced_fast_string)
 218 EXPORT_SYMBOL(copy_user_enhanced_fast_string)
 219
 220 /*
 221  * Try to copy last bytes and clear the rest if needed.
 222  * Since protection fault in copy_from/to_user is not a normal situation,
 223  * it is not necessary to optimize tail handling.
 224  *
 225  * Input:
 226  * rdi destination
 227  * rsi source
 228  * rdx count
 229  *
 230  * Output:
 231  * eax uncopied bytes or 0 if successful.
 232  */
 233 SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
 234         movl %edx,%ecx
 235 1:      rep movsb
 236 2:      mov %ecx,%eax
 237         ASM_CLAC
 238         ret
 239
 240         _ASM_EXTABLE_UA(1b, 2b)
 241 SYM_CODE_END(.Lcopy_user_handle_tail)
 242
 243 /*
 244  * copy_user_nocache - Uncached memory copy with exception handling
 245  * This will force destination out of cache for more performance.
 246  *
 247  * Note: Cached memory copy is used when destination or size is not
 248  * naturally aligned. That is:
 249  *  - Require 8-byte alignment when size is 8 bytes or larger.
 250  *  - Require 4-byte alignment when size is 4 bytes.
 251  */
 252 SYM_FUNC_START(__copy_user_nocache)
 253         ASM_STAC
 254
 255         /* If size is less than 8 bytes, go to 4-byte copy */
 256         cmpl $8,%edx
 257         jb .L_4b_nocache_copy_entry
 258
 259         /* If destination is not 8-byte aligned, "cache" copy to align it */
 260         ALIGN_DESTINATION
 261
 262         /* Set 4x8-byte copy count and remainder */
 263         movl %edx,%ecx
 264         andl $63,%edx
 265         shrl $6,%ecx
 266         jz .L_8b_nocache_copy_entry     /* jump if count is 0 */
 267
 268         /* Perform 4x8-byte nocache loop-copy */
 269 .L_4x8b_nocache_copy_loop:
 270 1:      movq (%rsi),%r8
 271 2:      movq 1*8(%rsi),%r9
 272 3:      movq 2*8(%rsi),%r10
 273 4:      movq 3*8(%rsi),%r11
 274 5:      movnti %r8,(%rdi)
 275 6:      movnti %r9,1*8(%rdi)
 276 7:      movnti %r10,2*8(%rdi)
 277 8:      movnti %r11,3*8(%rdi)
 278 9:      movq 4*8(%rsi),%r8
 279 10:     movq 5*8(%rsi),%r9
 280 11:     movq 6*8(%rsi),%r10
 281 12:     movq 7*8(%rsi),%r11
 282 13:     movnti %r8,4*8(%rdi)
 283 14:     movnti %r9,5*8(%rdi)
 284 15:     movnti %r10,6*8(%rdi)
 285 16:     movnti %r11,7*8(%rdi)
 286         leaq 64(%rsi),%rsi
 287         leaq 64(%rdi),%rdi
 288         decl %ecx
 289         jnz .L_4x8b_nocache_copy_loop
 290
 291         /* Set 8-byte copy count and remainder */
 292 .L_8b_nocache_copy_entry:
 293         movl %edx,%ecx
 294         andl $7,%edx
 295         shrl $3,%ecx
 296         jz .L_4b_nocache_copy_entry     /* jump if count is 0 */
 297
 298         /* Perform 8-byte nocache loop-copy */
 299 .L_8b_nocache_copy_loop:
 300 20:     movq (%rsi),%r8
 301 21:     movnti %r8,(%rdi)
 302         leaq 8(%rsi),%rsi
 303         leaq 8(%rdi),%rdi
 304         decl %ecx
 305         jnz .L_8b_nocache_copy_loop
 306
 307         /* If no byte left, we're done */
 308 .L_4b_nocache_copy_entry:
 309         andl %edx,%edx
 310         jz .L_finish_copy
 311
 312         /* If destination is not 4-byte aligned, go to byte copy: */
 313         movl %edi,%ecx
 314         andl $3,%ecx
 315         jnz .L_1b_cache_copy_entry
 316
 317         /* Set 4-byte copy count (1 or 0) and remainder */
 318         movl %edx,%ecx
 319         andl $3,%edx
 320         shrl $2,%ecx
 321         jz .L_1b_cache_copy_entry       /* jump if count is 0 */
 322
 323         /* Perform 4-byte nocache copy: */
 324 30:     movl (%rsi),%r8d
 325 31:     movnti %r8d,(%rdi)
 326         leaq 4(%rsi),%rsi
 327         leaq 4(%rdi),%rdi
 328
 329         /* If no bytes left, we're done: */
 330         andl %edx,%edx
 331         jz .L_finish_copy
 332
 333         /* Perform byte "cache" loop-copy for the remainder */
 334 .L_1b_cache_copy_entry:
 335         movl %edx,%ecx
 336 .L_1b_cache_copy_loop:
 337 40:     movb (%rsi),%al
 338 41:     movb %al,(%rdi)
 339         incq %rsi
 340         incq %rdi
 341         decl %ecx
 342         jnz .L_1b_cache_copy_loop
 343
 344         /* Finished copying; fence the prior stores */
 345 .L_finish_copy:
 346         xorl %eax,%eax
 347         ASM_CLAC
 348         sfence
 349         ret
 350
 351         .section .fixup,"ax"
 352 .L_fixup_4x8b_copy:
 353         shll $6,%ecx
 354         addl %ecx,%edx
 355         jmp .L_fixup_handle_tail
 356 .L_fixup_8b_copy:
 357         lea (%rdx,%rcx,8),%rdx
 358         jmp .L_fixup_handle_tail
 359 .L_fixup_4b_copy:
 360         lea (%rdx,%rcx,4),%rdx
 361         jmp .L_fixup_handle_tail
 362 .L_fixup_1b_copy:
 363         movl %ecx,%edx
 364 .L_fixup_handle_tail:
 365         sfence
 366         jmp .Lcopy_user_handle_tail
 367         .previous
 368
 369         _ASM_EXTABLE_UA(1b, .L_fixup_4x8b_copy)
 370         _ASM_EXTABLE_UA(2b, .L_fixup_4x8b_copy)
 371         _ASM_EXTABLE_UA(3b, .L_fixup_4x8b_copy)
 372         _ASM_EXTABLE_UA(4b, .L_fixup_4x8b_copy)
 373         _ASM_EXTABLE_UA(5b, .L_fixup_4x8b_copy)
 374         _ASM_EXTABLE_UA(6b, .L_fixup_4x8b_copy)
 375         _ASM_EXTABLE_UA(7b, .L_fixup_4x8b_copy)
 376         _ASM_EXTABLE_UA(8b, .L_fixup_4x8b_copy)
 377         _ASM_EXTABLE_UA(9b, .L_fixup_4x8b_copy)
 378         _ASM_EXTABLE_UA(10b, .L_fixup_4x8b_copy)
 379         _ASM_EXTABLE_UA(11b, .L_fixup_4x8b_copy)
 380         _ASM_EXTABLE_UA(12b, .L_fixup_4x8b_copy)
 381         _ASM_EXTABLE_UA(13b, .L_fixup_4x8b_copy)
 382         _ASM_EXTABLE_UA(14b, .L_fixup_4x8b_copy)
 383         _ASM_EXTABLE_UA(15b, .L_fixup_4x8b_copy)
 384         _ASM_EXTABLE_UA(16b, .L_fixup_4x8b_copy)
 385         _ASM_EXTABLE_UA(20b, .L_fixup_8b_copy)
 386         _ASM_EXTABLE_UA(21b, .L_fixup_8b_copy)
 387         _ASM_EXTABLE_UA(30b, .L_fixup_4b_copy)
 388         _ASM_EXTABLE_UA(31b, .L_fixup_4b_copy)
 389         _ASM_EXTABLE_UA(40b, .L_fixup_1b_copy)
 390         _ASM_EXTABLE_UA(41b, .L_fixup_1b_copy)
 391 SYM_FUNC_END(__copy_user_nocache)
 392 EXPORT_SYMBOL(__copy_user_nocache)