Merge tag 'ceph-for-5.9-rc1' of git://github.com/ceph/ceph-client
[linux-2.6-microblaze.git] / arch / x86 / kernel / kprobes / opt.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *  Kernel Probes Jump Optimization (Optprobes)
4  *
5  * Copyright (C) IBM Corporation, 2002, 2004
6  * Copyright (C) Hitachi Ltd., 2012
7  */
8 #include <linux/kprobes.h>
9 #include <linux/perf_event.h>
10 #include <linux/ptrace.h>
11 #include <linux/string.h>
12 #include <linux/slab.h>
13 #include <linux/hardirq.h>
14 #include <linux/preempt.h>
15 #include <linux/extable.h>
16 #include <linux/kdebug.h>
17 #include <linux/kallsyms.h>
18 #include <linux/ftrace.h>
19 #include <linux/frame.h>
20 #include <linux/pgtable.h>
21
22 #include <asm/text-patching.h>
23 #include <asm/cacheflush.h>
24 #include <asm/desc.h>
25 #include <linux/uaccess.h>
26 #include <asm/alternative.h>
27 #include <asm/insn.h>
28 #include <asm/debugreg.h>
29 #include <asm/set_memory.h>
30 #include <asm/sections.h>
31 #include <asm/nospec-branch.h>
32
33 #include "common.h"
34
35 unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
36 {
37         struct optimized_kprobe *op;
38         struct kprobe *kp;
39         long offs;
40         int i;
41
42         for (i = 0; i < JMP32_INSN_SIZE; i++) {
43                 kp = get_kprobe((void *)addr - i);
44                 /* This function only handles jump-optimized kprobe */
45                 if (kp && kprobe_optimized(kp)) {
46                         op = container_of(kp, struct optimized_kprobe, kp);
47                         /* If op->list is not empty, op is under optimizing */
48                         if (list_empty(&op->list))
49                                 goto found;
50                 }
51         }
52
53         return addr;
54 found:
55         /*
56          * If the kprobe can be optimized, original bytes which can be
57          * overwritten by jump destination address. In this case, original
58          * bytes must be recovered from op->optinsn.copied_insn buffer.
59          */
60         if (copy_from_kernel_nofault(buf, (void *)addr,
61                 MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
62                 return 0UL;
63
64         if (addr == (unsigned long)kp->addr) {
65                 buf[0] = kp->opcode;
66                 memcpy(buf + 1, op->optinsn.copied_insn, DISP32_SIZE);
67         } else {
68                 offs = addr - (unsigned long)kp->addr - 1;
69                 memcpy(buf, op->optinsn.copied_insn + offs, DISP32_SIZE - offs);
70         }
71
72         return (unsigned long)buf;
73 }
74
75 static void synthesize_clac(kprobe_opcode_t *addr)
76 {
77         /*
78          * Can't be static_cpu_has() due to how objtool treats this feature bit.
79          * This isn't a fast path anyway.
80          */
81         if (!boot_cpu_has(X86_FEATURE_SMAP))
82                 return;
83
84         /* Replace the NOP3 with CLAC */
85         addr[0] = 0x0f;
86         addr[1] = 0x01;
87         addr[2] = 0xca;
88 }
89
90 /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
91 static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
92 {
93 #ifdef CONFIG_X86_64
94         *addr++ = 0x48;
95         *addr++ = 0xbf;
96 #else
97         *addr++ = 0xb8;
98 #endif
99         *(unsigned long *)addr = val;
100 }
101
102 asm (
103                         ".pushsection .rodata\n"
104                         "optprobe_template_func:\n"
105                         ".global optprobe_template_entry\n"
106                         "optprobe_template_entry:\n"
107 #ifdef CONFIG_X86_64
108                         /* We don't bother saving the ss register */
109                         "       pushq %rsp\n"
110                         "       pushfq\n"
111                         ".global optprobe_template_clac\n"
112                         "optprobe_template_clac:\n"
113                         ASM_NOP3
114                         SAVE_REGS_STRING
115                         "       movq %rsp, %rsi\n"
116                         ".global optprobe_template_val\n"
117                         "optprobe_template_val:\n"
118                         ASM_NOP5
119                         ASM_NOP5
120                         ".global optprobe_template_call\n"
121                         "optprobe_template_call:\n"
122                         ASM_NOP5
123                         /* Move flags to rsp */
124                         "       movq 18*8(%rsp), %rdx\n"
125                         "       movq %rdx, 19*8(%rsp)\n"
126                         RESTORE_REGS_STRING
127                         /* Skip flags entry */
128                         "       addq $8, %rsp\n"
129                         "       popfq\n"
130 #else /* CONFIG_X86_32 */
131                         "       pushl %esp\n"
132                         "       pushfl\n"
133                         ".global optprobe_template_clac\n"
134                         "optprobe_template_clac:\n"
135                         ASM_NOP3
136                         SAVE_REGS_STRING
137                         "       movl %esp, %edx\n"
138                         ".global optprobe_template_val\n"
139                         "optprobe_template_val:\n"
140                         ASM_NOP5
141                         ".global optprobe_template_call\n"
142                         "optprobe_template_call:\n"
143                         ASM_NOP5
144                         /* Move flags into esp */
145                         "       movl 14*4(%esp), %edx\n"
146                         "       movl %edx, 15*4(%esp)\n"
147                         RESTORE_REGS_STRING
148                         /* Skip flags entry */
149                         "       addl $4, %esp\n"
150                         "       popfl\n"
151 #endif
152                         ".global optprobe_template_end\n"
153                         "optprobe_template_end:\n"
154                         ".popsection\n");
155
156 void optprobe_template_func(void);
157 STACK_FRAME_NON_STANDARD(optprobe_template_func);
158
159 #define TMPL_CLAC_IDX \
160         ((long)optprobe_template_clac - (long)optprobe_template_entry)
161 #define TMPL_MOVE_IDX \
162         ((long)optprobe_template_val - (long)optprobe_template_entry)
163 #define TMPL_CALL_IDX \
164         ((long)optprobe_template_call - (long)optprobe_template_entry)
165 #define TMPL_END_IDX \
166         ((long)optprobe_template_end - (long)optprobe_template_entry)
167
168 /* Optimized kprobe call back function: called from optinsn */
169 static void
170 optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
171 {
172         /* This is possible if op is under delayed unoptimizing */
173         if (kprobe_disabled(&op->kp))
174                 return;
175
176         preempt_disable();
177         if (kprobe_running()) {
178                 kprobes_inc_nmissed_count(&op->kp);
179         } else {
180                 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
181                 /* Save skipped registers */
182                 regs->cs = __KERNEL_CS;
183 #ifdef CONFIG_X86_32
184                 regs->cs |= get_kernel_rpl();
185                 regs->gs = 0;
186 #endif
187                 regs->ip = (unsigned long)op->kp.addr + INT3_INSN_SIZE;
188                 regs->orig_ax = ~0UL;
189
190                 __this_cpu_write(current_kprobe, &op->kp);
191                 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
192                 opt_pre_handler(&op->kp, regs);
193                 __this_cpu_write(current_kprobe, NULL);
194         }
195         preempt_enable();
196 }
197 NOKPROBE_SYMBOL(optimized_callback);
198
199 static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real)
200 {
201         struct insn insn;
202         int len = 0, ret;
203
204         while (len < JMP32_INSN_SIZE) {
205                 ret = __copy_instruction(dest + len, src + len, real + len, &insn);
206                 if (!ret || !can_boost(&insn, src + len))
207                         return -EINVAL;
208                 len += ret;
209         }
210         /* Check whether the address range is reserved */
211         if (ftrace_text_reserved(src, src + len - 1) ||
212             alternatives_text_reserved(src, src + len - 1) ||
213             jump_label_text_reserved(src, src + len - 1))
214                 return -EBUSY;
215
216         return len;
217 }
218
219 /* Check whether insn is indirect jump */
220 static int __insn_is_indirect_jump(struct insn *insn)
221 {
222         return ((insn->opcode.bytes[0] == 0xff &&
223                 (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
224                 insn->opcode.bytes[0] == 0xea); /* Segment based jump */
225 }
226
227 /* Check whether insn jumps into specified address range */
228 static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
229 {
230         unsigned long target = 0;
231
232         switch (insn->opcode.bytes[0]) {
233         case 0xe0:      /* loopne */
234         case 0xe1:      /* loope */
235         case 0xe2:      /* loop */
236         case 0xe3:      /* jcxz */
237         case 0xe9:      /* near relative jump */
238         case 0xeb:      /* short relative jump */
239                 break;
240         case 0x0f:
241                 if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
242                         break;
243                 return 0;
244         default:
245                 if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
246                         break;
247                 return 0;
248         }
249         target = (unsigned long)insn->next_byte + insn->immediate.value;
250
251         return (start <= target && target <= start + len);
252 }
253
254 static int insn_is_indirect_jump(struct insn *insn)
255 {
256         int ret = __insn_is_indirect_jump(insn);
257
258 #ifdef CONFIG_RETPOLINE
259         /*
260          * Jump to x86_indirect_thunk_* is treated as an indirect jump.
261          * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with
262          * older gcc may use indirect jump. So we add this check instead of
263          * replace indirect-jump check.
264          */
265         if (!ret)
266                 ret = insn_jump_into_range(insn,
267                                 (unsigned long)__indirect_thunk_start,
268                                 (unsigned long)__indirect_thunk_end -
269                                 (unsigned long)__indirect_thunk_start);
270 #endif
271         return ret;
272 }
273
274 /* Decode whole function to ensure any instructions don't jump into target */
275 static int can_optimize(unsigned long paddr)
276 {
277         unsigned long addr, size = 0, offset = 0;
278         struct insn insn;
279         kprobe_opcode_t buf[MAX_INSN_SIZE];
280
281         /* Lookup symbol including addr */
282         if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
283                 return 0;
284
285         /*
286          * Do not optimize in the entry code due to the unstable
287          * stack handling and registers setup.
288          */
289         if (((paddr >= (unsigned long)__entry_text_start) &&
290              (paddr <  (unsigned long)__entry_text_end)))
291                 return 0;
292
293         /* Check there is enough space for a relative jump. */
294         if (size - offset < JMP32_INSN_SIZE)
295                 return 0;
296
297         /* Decode instructions */
298         addr = paddr - offset;
299         while (addr < paddr - offset + size) { /* Decode until function end */
300                 unsigned long recovered_insn;
301                 if (search_exception_tables(addr))
302                         /*
303                          * Since some fixup code will jumps into this function,
304                          * we can't optimize kprobe in this function.
305                          */
306                         return 0;
307                 recovered_insn = recover_probed_instruction(buf, addr);
308                 if (!recovered_insn)
309                         return 0;
310                 kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
311                 insn_get_length(&insn);
312                 /* Another subsystem puts a breakpoint */
313                 if (insn.opcode.bytes[0] == INT3_INSN_OPCODE)
314                         return 0;
315                 /* Recover address */
316                 insn.kaddr = (void *)addr;
317                 insn.next_byte = (void *)(addr + insn.length);
318                 /* Check any instructions don't jump into target */
319                 if (insn_is_indirect_jump(&insn) ||
320                     insn_jump_into_range(&insn, paddr + INT3_INSN_SIZE,
321                                          DISP32_SIZE))
322                         return 0;
323                 addr += insn.length;
324         }
325
326         return 1;
327 }
328
329 /* Check optimized_kprobe can actually be optimized. */
330 int arch_check_optimized_kprobe(struct optimized_kprobe *op)
331 {
332         int i;
333         struct kprobe *p;
334
335         for (i = 1; i < op->optinsn.size; i++) {
336                 p = get_kprobe(op->kp.addr + i);
337                 if (p && !kprobe_disabled(p))
338                         return -EEXIST;
339         }
340
341         return 0;
342 }
343
344 /* Check the addr is within the optimized instructions. */
345 int arch_within_optimized_kprobe(struct optimized_kprobe *op,
346                                  unsigned long addr)
347 {
348         return ((unsigned long)op->kp.addr <= addr &&
349                 (unsigned long)op->kp.addr + op->optinsn.size > addr);
350 }
351
352 /* Free optimized instruction slot */
353 static
354 void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
355 {
356         u8 *slot = op->optinsn.insn;
357         if (slot) {
358                 int len = TMPL_END_IDX + op->optinsn.size + JMP32_INSN_SIZE;
359
360                 /* Record the perf event before freeing the slot */
361                 if (dirty)
362                         perf_event_text_poke(slot, slot, len, NULL, 0);
363
364                 free_optinsn_slot(slot, dirty);
365                 op->optinsn.insn = NULL;
366                 op->optinsn.size = 0;
367         }
368 }
369
370 void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
371 {
372         __arch_remove_optimized_kprobe(op, 1);
373 }
374
375 /*
376  * Copy replacing target instructions
377  * Target instructions MUST be relocatable (checked inside)
378  * This is called when new aggr(opt)probe is allocated or reused.
379  */
380 int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
381                                   struct kprobe *__unused)
382 {
383         u8 *buf = NULL, *slot;
384         int ret, len;
385         long rel;
386
387         if (!can_optimize((unsigned long)op->kp.addr))
388                 return -EILSEQ;
389
390         buf = kzalloc(MAX_OPTINSN_SIZE, GFP_KERNEL);
391         if (!buf)
392                 return -ENOMEM;
393
394         op->optinsn.insn = slot = get_optinsn_slot();
395         if (!slot) {
396                 ret = -ENOMEM;
397                 goto out;
398         }
399
400         /*
401          * Verify if the address gap is in 2GB range, because this uses
402          * a relative jump.
403          */
404         rel = (long)slot - (long)op->kp.addr + JMP32_INSN_SIZE;
405         if (abs(rel) > 0x7fffffff) {
406                 ret = -ERANGE;
407                 goto err;
408         }
409
410         /* Copy arch-dep-instance from template */
411         memcpy(buf, optprobe_template_entry, TMPL_END_IDX);
412
413         /* Copy instructions into the out-of-line buffer */
414         ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr,
415                                           slot + TMPL_END_IDX);
416         if (ret < 0)
417                 goto err;
418         op->optinsn.size = ret;
419         len = TMPL_END_IDX + op->optinsn.size;
420
421         synthesize_clac(buf + TMPL_CLAC_IDX);
422
423         /* Set probe information */
424         synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
425
426         /* Set probe function call */
427         synthesize_relcall(buf + TMPL_CALL_IDX,
428                            slot + TMPL_CALL_IDX, optimized_callback);
429
430         /* Set returning jmp instruction at the tail of out-of-line buffer */
431         synthesize_reljump(buf + len, slot + len,
432                            (u8 *)op->kp.addr + op->optinsn.size);
433         len += JMP32_INSN_SIZE;
434
435         /*
436          * Note len = TMPL_END_IDX + op->optinsn.size + JMP32_INSN_SIZE is also
437          * used in __arch_remove_optimized_kprobe().
438          */
439
440         /* We have to use text_poke() for instruction buffer because it is RO */
441         perf_event_text_poke(slot, NULL, 0, buf, len);
442         text_poke(slot, buf, len);
443
444         ret = 0;
445 out:
446         kfree(buf);
447         return ret;
448
449 err:
450         __arch_remove_optimized_kprobe(op, 0);
451         goto out;
452 }
453
454 /*
455  * Replace breakpoints (INT3) with relative jumps (JMP.d32).
456  * Caller must call with locking kprobe_mutex and text_mutex.
457  *
458  * The caller will have installed a regular kprobe and after that issued
459  * syncrhonize_rcu_tasks(), this ensures that the instruction(s) that live in
460  * the 4 bytes after the INT3 are unused and can now be overwritten.
461  */
462 void arch_optimize_kprobes(struct list_head *oplist)
463 {
464         struct optimized_kprobe *op, *tmp;
465         u8 insn_buff[JMP32_INSN_SIZE];
466
467         list_for_each_entry_safe(op, tmp, oplist, list) {
468                 s32 rel = (s32)((long)op->optinsn.insn -
469                         ((long)op->kp.addr + JMP32_INSN_SIZE));
470
471                 WARN_ON(kprobe_disabled(&op->kp));
472
473                 /* Backup instructions which will be replaced by jump address */
474                 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_INSN_SIZE,
475                        DISP32_SIZE);
476
477                 insn_buff[0] = JMP32_INSN_OPCODE;
478                 *(s32 *)(&insn_buff[1]) = rel;
479
480                 text_poke_bp(op->kp.addr, insn_buff, JMP32_INSN_SIZE, NULL);
481
482                 list_del_init(&op->list);
483         }
484 }
485
486 /*
487  * Replace a relative jump (JMP.d32) with a breakpoint (INT3).
488  *
489  * After that, we can restore the 4 bytes after the INT3 to undo what
490  * arch_optimize_kprobes() scribbled. This is safe since those bytes will be
491  * unused once the INT3 lands.
492  */
493 void arch_unoptimize_kprobe(struct optimized_kprobe *op)
494 {
495         u8 new[JMP32_INSN_SIZE] = { INT3_INSN_OPCODE, };
496         u8 old[JMP32_INSN_SIZE];
497         u8 *addr = op->kp.addr;
498
499         memcpy(old, op->kp.addr, JMP32_INSN_SIZE);
500         memcpy(new + INT3_INSN_SIZE,
501                op->optinsn.copied_insn,
502                JMP32_INSN_SIZE - INT3_INSN_SIZE);
503
504         text_poke(addr, new, INT3_INSN_SIZE);
505         text_poke_sync();
506         text_poke(addr + INT3_INSN_SIZE,
507                   new + INT3_INSN_SIZE,
508                   JMP32_INSN_SIZE - INT3_INSN_SIZE);
509         text_poke_sync();
510
511         perf_event_text_poke(op->kp.addr, old, JMP32_INSN_SIZE, new, JMP32_INSN_SIZE);
512 }
513
514 /*
515  * Recover original instructions and breakpoints from relative jumps.
516  * Caller must call with locking kprobe_mutex.
517  */
518 extern void arch_unoptimize_kprobes(struct list_head *oplist,
519                                     struct list_head *done_list)
520 {
521         struct optimized_kprobe *op, *tmp;
522
523         list_for_each_entry_safe(op, tmp, oplist, list) {
524                 arch_unoptimize_kprobe(op);
525                 list_move(&op->list, done_list);
526         }
527 }
528
529 int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
530 {
531         struct optimized_kprobe *op;
532
533         if (p->flags & KPROBE_FLAG_OPTIMIZED) {
534                 /* This kprobe is really able to run optimized path. */
535                 op = container_of(p, struct optimized_kprobe, kp);
536                 /* Detour through copied instructions */
537                 regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
538                 if (!reenter)
539                         reset_current_kprobe();
540                 return 1;
541         }
542         return 0;
543 }
544 NOKPROBE_SYMBOL(setup_detour_execution);