gro: Fix inconsistent indenting
[linux-2.6-microblaze.git] / kernel / kprobes.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *  Kernel Probes (KProbes)
4  *
5  * Copyright (C) IBM Corporation, 2002, 2004
6  *
7  * 2002-Oct     Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
8  *              Probes initial implementation (includes suggestions from
9  *              Rusty Russell).
10  * 2004-Aug     Updated by Prasanna S Panchamukhi <prasanna@in.ibm.com> with
11  *              hlists and exceptions notifier as suggested by Andi Kleen.
12  * 2004-July    Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
13  *              interface to access function arguments.
14  * 2004-Sep     Prasanna S Panchamukhi <prasanna@in.ibm.com> Changed Kprobes
15  *              exceptions notifier to be first on the priority list.
16  * 2005-May     Hien Nguyen <hien@us.ibm.com>, Jim Keniston
17  *              <jkenisto@us.ibm.com> and Prasanna S Panchamukhi
18  *              <prasanna@in.ibm.com> added function-return probes.
19  */
20
21 #define pr_fmt(fmt) "kprobes: " fmt
22
23 #include <linux/kprobes.h>
24 #include <linux/hash.h>
25 #include <linux/init.h>
26 #include <linux/slab.h>
27 #include <linux/stddef.h>
28 #include <linux/export.h>
29 #include <linux/moduleloader.h>
30 #include <linux/kallsyms.h>
31 #include <linux/freezer.h>
32 #include <linux/seq_file.h>
33 #include <linux/debugfs.h>
34 #include <linux/sysctl.h>
35 #include <linux/kdebug.h>
36 #include <linux/memory.h>
37 #include <linux/ftrace.h>
38 #include <linux/cpu.h>
39 #include <linux/jump_label.h>
40 #include <linux/static_call.h>
41 #include <linux/perf_event.h>
42
43 #include <asm/sections.h>
44 #include <asm/cacheflush.h>
45 #include <asm/errno.h>
46 #include <linux/uaccess.h>
47
48 #define KPROBE_HASH_BITS 6
49 #define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
50
51
52 static int kprobes_initialized;
53 /* kprobe_table can be accessed by
54  * - Normal hlist traversal and RCU add/del under 'kprobe_mutex' is held.
55  * Or
56  * - RCU hlist traversal under disabling preempt (breakpoint handlers)
57  */
58 static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
59
60 /* NOTE: change this value only with 'kprobe_mutex' held */
61 static bool kprobes_all_disarmed;
62
63 /* This protects 'kprobe_table' and 'optimizing_list' */
64 static DEFINE_MUTEX(kprobe_mutex);
65 static DEFINE_PER_CPU(struct kprobe *, kprobe_instance);
66
67 kprobe_opcode_t * __weak kprobe_lookup_name(const char *name,
68                                         unsigned int __unused)
69 {
70         return ((kprobe_opcode_t *)(kallsyms_lookup_name(name)));
71 }
72
73 /*
74  * Blacklist -- list of 'struct kprobe_blacklist_entry' to store info where
75  * kprobes can not probe.
76  */
77 static LIST_HEAD(kprobe_blacklist);
78
79 #ifdef __ARCH_WANT_KPROBES_INSN_SLOT
80 /*
81  * 'kprobe::ainsn.insn' points to the copy of the instruction to be
82  * single-stepped. x86_64, POWER4 and above have no-exec support and
83  * stepping on the instruction on a vmalloced/kmalloced/data page
84  * is a recipe for disaster
85  */
86 struct kprobe_insn_page {
87         struct list_head list;
88         kprobe_opcode_t *insns;         /* Page of instruction slots */
89         struct kprobe_insn_cache *cache;
90         int nused;
91         int ngarbage;
92         char slot_used[];
93 };
94
95 #define KPROBE_INSN_PAGE_SIZE(slots)                    \
96         (offsetof(struct kprobe_insn_page, slot_used) + \
97          (sizeof(char) * (slots)))
98
99 static int slots_per_page(struct kprobe_insn_cache *c)
100 {
101         return PAGE_SIZE/(c->insn_size * sizeof(kprobe_opcode_t));
102 }
103
104 enum kprobe_slot_state {
105         SLOT_CLEAN = 0,
106         SLOT_DIRTY = 1,
107         SLOT_USED = 2,
108 };
109
110 void __weak *alloc_insn_page(void)
111 {
112         /*
113          * Use module_alloc() so this page is within +/- 2GB of where the
114          * kernel image and loaded module images reside. This is required
115          * for most of the architectures.
116          * (e.g. x86-64 needs this to handle the %rip-relative fixups.)
117          */
118         return module_alloc(PAGE_SIZE);
119 }
120
121 static void free_insn_page(void *page)
122 {
123         module_memfree(page);
124 }
125
126 struct kprobe_insn_cache kprobe_insn_slots = {
127         .mutex = __MUTEX_INITIALIZER(kprobe_insn_slots.mutex),
128         .alloc = alloc_insn_page,
129         .free = free_insn_page,
130         .sym = KPROBE_INSN_PAGE_SYM,
131         .pages = LIST_HEAD_INIT(kprobe_insn_slots.pages),
132         .insn_size = MAX_INSN_SIZE,
133         .nr_garbage = 0,
134 };
135 static int collect_garbage_slots(struct kprobe_insn_cache *c);
136
137 /**
138  * __get_insn_slot() - Find a slot on an executable page for an instruction.
139  * We allocate an executable page if there's no room on existing ones.
140  */
141 kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c)
142 {
143         struct kprobe_insn_page *kip;
144         kprobe_opcode_t *slot = NULL;
145
146         /* Since the slot array is not protected by rcu, we need a mutex */
147         mutex_lock(&c->mutex);
148  retry:
149         rcu_read_lock();
150         list_for_each_entry_rcu(kip, &c->pages, list) {
151                 if (kip->nused < slots_per_page(c)) {
152                         int i;
153
154                         for (i = 0; i < slots_per_page(c); i++) {
155                                 if (kip->slot_used[i] == SLOT_CLEAN) {
156                                         kip->slot_used[i] = SLOT_USED;
157                                         kip->nused++;
158                                         slot = kip->insns + (i * c->insn_size);
159                                         rcu_read_unlock();
160                                         goto out;
161                                 }
162                         }
163                         /* kip->nused is broken. Fix it. */
164                         kip->nused = slots_per_page(c);
165                         WARN_ON(1);
166                 }
167         }
168         rcu_read_unlock();
169
170         /* If there are any garbage slots, collect it and try again. */
171         if (c->nr_garbage && collect_garbage_slots(c) == 0)
172                 goto retry;
173
174         /* All out of space.  Need to allocate a new page. */
175         kip = kmalloc(KPROBE_INSN_PAGE_SIZE(slots_per_page(c)), GFP_KERNEL);
176         if (!kip)
177                 goto out;
178
179         kip->insns = c->alloc();
180         if (!kip->insns) {
181                 kfree(kip);
182                 goto out;
183         }
184         INIT_LIST_HEAD(&kip->list);
185         memset(kip->slot_used, SLOT_CLEAN, slots_per_page(c));
186         kip->slot_used[0] = SLOT_USED;
187         kip->nused = 1;
188         kip->ngarbage = 0;
189         kip->cache = c;
190         list_add_rcu(&kip->list, &c->pages);
191         slot = kip->insns;
192
193         /* Record the perf ksymbol register event after adding the page */
194         perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_OOL, (unsigned long)kip->insns,
195                            PAGE_SIZE, false, c->sym);
196 out:
197         mutex_unlock(&c->mutex);
198         return slot;
199 }
200
201 /* Return true if all garbages are collected, otherwise false. */
202 static bool collect_one_slot(struct kprobe_insn_page *kip, int idx)
203 {
204         kip->slot_used[idx] = SLOT_CLEAN;
205         kip->nused--;
206         if (kip->nused == 0) {
207                 /*
208                  * Page is no longer in use.  Free it unless
209                  * it's the last one.  We keep the last one
210                  * so as not to have to set it up again the
211                  * next time somebody inserts a probe.
212                  */
213                 if (!list_is_singular(&kip->list)) {
214                         /*
215                          * Record perf ksymbol unregister event before removing
216                          * the page.
217                          */
218                         perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_OOL,
219                                            (unsigned long)kip->insns, PAGE_SIZE, true,
220                                            kip->cache->sym);
221                         list_del_rcu(&kip->list);
222                         synchronize_rcu();
223                         kip->cache->free(kip->insns);
224                         kfree(kip);
225                 }
226                 return true;
227         }
228         return false;
229 }
230
231 static int collect_garbage_slots(struct kprobe_insn_cache *c)
232 {
233         struct kprobe_insn_page *kip, *next;
234
235         /* Ensure no-one is interrupted on the garbages */
236         synchronize_rcu();
237
238         list_for_each_entry_safe(kip, next, &c->pages, list) {
239                 int i;
240
241                 if (kip->ngarbage == 0)
242                         continue;
243                 kip->ngarbage = 0;      /* we will collect all garbages */
244                 for (i = 0; i < slots_per_page(c); i++) {
245                         if (kip->slot_used[i] == SLOT_DIRTY && collect_one_slot(kip, i))
246                                 break;
247                 }
248         }
249         c->nr_garbage = 0;
250         return 0;
251 }
252
253 void __free_insn_slot(struct kprobe_insn_cache *c,
254                       kprobe_opcode_t *slot, int dirty)
255 {
256         struct kprobe_insn_page *kip;
257         long idx;
258
259         mutex_lock(&c->mutex);
260         rcu_read_lock();
261         list_for_each_entry_rcu(kip, &c->pages, list) {
262                 idx = ((long)slot - (long)kip->insns) /
263                         (c->insn_size * sizeof(kprobe_opcode_t));
264                 if (idx >= 0 && idx < slots_per_page(c))
265                         goto out;
266         }
267         /* Could not find this slot. */
268         WARN_ON(1);
269         kip = NULL;
270 out:
271         rcu_read_unlock();
272         /* Mark and sweep: this may sleep */
273         if (kip) {
274                 /* Check double free */
275                 WARN_ON(kip->slot_used[idx] != SLOT_USED);
276                 if (dirty) {
277                         kip->slot_used[idx] = SLOT_DIRTY;
278                         kip->ngarbage++;
279                         if (++c->nr_garbage > slots_per_page(c))
280                                 collect_garbage_slots(c);
281                 } else {
282                         collect_one_slot(kip, idx);
283                 }
284         }
285         mutex_unlock(&c->mutex);
286 }
287
288 /*
289  * Check given address is on the page of kprobe instruction slots.
290  * This will be used for checking whether the address on a stack
291  * is on a text area or not.
292  */
293 bool __is_insn_slot_addr(struct kprobe_insn_cache *c, unsigned long addr)
294 {
295         struct kprobe_insn_page *kip;
296         bool ret = false;
297
298         rcu_read_lock();
299         list_for_each_entry_rcu(kip, &c->pages, list) {
300                 if (addr >= (unsigned long)kip->insns &&
301                     addr < (unsigned long)kip->insns + PAGE_SIZE) {
302                         ret = true;
303                         break;
304                 }
305         }
306         rcu_read_unlock();
307
308         return ret;
309 }
310
311 int kprobe_cache_get_kallsym(struct kprobe_insn_cache *c, unsigned int *symnum,
312                              unsigned long *value, char *type, char *sym)
313 {
314         struct kprobe_insn_page *kip;
315         int ret = -ERANGE;
316
317         rcu_read_lock();
318         list_for_each_entry_rcu(kip, &c->pages, list) {
319                 if ((*symnum)--)
320                         continue;
321                 strscpy(sym, c->sym, KSYM_NAME_LEN);
322                 *type = 't';
323                 *value = (unsigned long)kip->insns;
324                 ret = 0;
325                 break;
326         }
327         rcu_read_unlock();
328
329         return ret;
330 }
331
332 #ifdef CONFIG_OPTPROBES
333 void __weak *alloc_optinsn_page(void)
334 {
335         return alloc_insn_page();
336 }
337
338 void __weak free_optinsn_page(void *page)
339 {
340         free_insn_page(page);
341 }
342
343 /* For optimized_kprobe buffer */
344 struct kprobe_insn_cache kprobe_optinsn_slots = {
345         .mutex = __MUTEX_INITIALIZER(kprobe_optinsn_slots.mutex),
346         .alloc = alloc_optinsn_page,
347         .free = free_optinsn_page,
348         .sym = KPROBE_OPTINSN_PAGE_SYM,
349         .pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages),
350         /* .insn_size is initialized later */
351         .nr_garbage = 0,
352 };
353 #endif
354 #endif
355
356 /* We have preemption disabled.. so it is safe to use __ versions */
357 static inline void set_kprobe_instance(struct kprobe *kp)
358 {
359         __this_cpu_write(kprobe_instance, kp);
360 }
361
362 static inline void reset_kprobe_instance(void)
363 {
364         __this_cpu_write(kprobe_instance, NULL);
365 }
366
367 /*
368  * This routine is called either:
369  *      - under the 'kprobe_mutex' - during kprobe_[un]register().
370  *                              OR
371  *      - with preemption disabled - from architecture specific code.
372  */
373 struct kprobe *get_kprobe(void *addr)
374 {
375         struct hlist_head *head;
376         struct kprobe *p;
377
378         head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)];
379         hlist_for_each_entry_rcu(p, head, hlist,
380                                  lockdep_is_held(&kprobe_mutex)) {
381                 if (p->addr == addr)
382                         return p;
383         }
384
385         return NULL;
386 }
387 NOKPROBE_SYMBOL(get_kprobe);
388
389 static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs);
390
391 /* Return true if 'p' is an aggregator */
392 static inline bool kprobe_aggrprobe(struct kprobe *p)
393 {
394         return p->pre_handler == aggr_pre_handler;
395 }
396
397 /* Return true if 'p' is unused */
398 static inline bool kprobe_unused(struct kprobe *p)
399 {
400         return kprobe_aggrprobe(p) && kprobe_disabled(p) &&
401                list_empty(&p->list);
402 }
403
404 /* Keep all fields in the kprobe consistent. */
405 static inline void copy_kprobe(struct kprobe *ap, struct kprobe *p)
406 {
407         memcpy(&p->opcode, &ap->opcode, sizeof(kprobe_opcode_t));
408         memcpy(&p->ainsn, &ap->ainsn, sizeof(struct arch_specific_insn));
409 }
410
411 #ifdef CONFIG_OPTPROBES
412 /* NOTE: This is protected by 'kprobe_mutex'. */
413 static bool kprobes_allow_optimization;
414
415 /*
416  * Call all 'kprobe::pre_handler' on the list, but ignores its return value.
417  * This must be called from arch-dep optimized caller.
418  */
419 void opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
420 {
421         struct kprobe *kp;
422
423         list_for_each_entry_rcu(kp, &p->list, list) {
424                 if (kp->pre_handler && likely(!kprobe_disabled(kp))) {
425                         set_kprobe_instance(kp);
426                         kp->pre_handler(kp, regs);
427                 }
428                 reset_kprobe_instance();
429         }
430 }
431 NOKPROBE_SYMBOL(opt_pre_handler);
432
433 /* Free optimized instructions and optimized_kprobe */
434 static void free_aggr_kprobe(struct kprobe *p)
435 {
436         struct optimized_kprobe *op;
437
438         op = container_of(p, struct optimized_kprobe, kp);
439         arch_remove_optimized_kprobe(op);
440         arch_remove_kprobe(p);
441         kfree(op);
442 }
443
444 /* Return true if the kprobe is ready for optimization. */
445 static inline int kprobe_optready(struct kprobe *p)
446 {
447         struct optimized_kprobe *op;
448
449         if (kprobe_aggrprobe(p)) {
450                 op = container_of(p, struct optimized_kprobe, kp);
451                 return arch_prepared_optinsn(&op->optinsn);
452         }
453
454         return 0;
455 }
456
457 /* Return true if the kprobe is disarmed. Note: p must be on hash list */
458 static inline bool kprobe_disarmed(struct kprobe *p)
459 {
460         struct optimized_kprobe *op;
461
462         /* If kprobe is not aggr/opt probe, just return kprobe is disabled */
463         if (!kprobe_aggrprobe(p))
464                 return kprobe_disabled(p);
465
466         op = container_of(p, struct optimized_kprobe, kp);
467
468         return kprobe_disabled(p) && list_empty(&op->list);
469 }
470
471 /* Return true if the probe is queued on (un)optimizing lists */
472 static bool kprobe_queued(struct kprobe *p)
473 {
474         struct optimized_kprobe *op;
475
476         if (kprobe_aggrprobe(p)) {
477                 op = container_of(p, struct optimized_kprobe, kp);
478                 if (!list_empty(&op->list))
479                         return true;
480         }
481         return false;
482 }
483
484 /*
485  * Return an optimized kprobe whose optimizing code replaces
486  * instructions including 'addr' (exclude breakpoint).
487  */
488 static struct kprobe *get_optimized_kprobe(kprobe_opcode_t *addr)
489 {
490         int i;
491         struct kprobe *p = NULL;
492         struct optimized_kprobe *op;
493
494         /* Don't check i == 0, since that is a breakpoint case. */
495         for (i = 1; !p && i < MAX_OPTIMIZED_LENGTH / sizeof(kprobe_opcode_t); i++)
496                 p = get_kprobe(addr - i);
497
498         if (p && kprobe_optready(p)) {
499                 op = container_of(p, struct optimized_kprobe, kp);
500                 if (arch_within_optimized_kprobe(op, addr))
501                         return p;
502         }
503
504         return NULL;
505 }
506
507 /* Optimization staging list, protected by 'kprobe_mutex' */
508 static LIST_HEAD(optimizing_list);
509 static LIST_HEAD(unoptimizing_list);
510 static LIST_HEAD(freeing_list);
511
512 static void kprobe_optimizer(struct work_struct *work);
513 static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
514 #define OPTIMIZE_DELAY 5
515
516 /*
517  * Optimize (replace a breakpoint with a jump) kprobes listed on
518  * 'optimizing_list'.
519  */
520 static void do_optimize_kprobes(void)
521 {
522         lockdep_assert_held(&text_mutex);
523         /*
524          * The optimization/unoptimization refers 'online_cpus' via
525          * stop_machine() and cpu-hotplug modifies the 'online_cpus'.
526          * And same time, 'text_mutex' will be held in cpu-hotplug and here.
527          * This combination can cause a deadlock (cpu-hotplug tries to lock
528          * 'text_mutex' but stop_machine() can not be done because
529          * the 'online_cpus' has been changed)
530          * To avoid this deadlock, caller must have locked cpu-hotplug
531          * for preventing cpu-hotplug outside of 'text_mutex' locking.
532          */
533         lockdep_assert_cpus_held();
534
535         /* Optimization never be done when disarmed */
536         if (kprobes_all_disarmed || !kprobes_allow_optimization ||
537             list_empty(&optimizing_list))
538                 return;
539
540         arch_optimize_kprobes(&optimizing_list);
541 }
542
543 /*
544  * Unoptimize (replace a jump with a breakpoint and remove the breakpoint
545  * if need) kprobes listed on 'unoptimizing_list'.
546  */
547 static void do_unoptimize_kprobes(void)
548 {
549         struct optimized_kprobe *op, *tmp;
550
551         lockdep_assert_held(&text_mutex);
552         /* See comment in do_optimize_kprobes() */
553         lockdep_assert_cpus_held();
554
555         /* Unoptimization must be done anytime */
556         if (list_empty(&unoptimizing_list))
557                 return;
558
559         arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
560         /* Loop on 'freeing_list' for disarming */
561         list_for_each_entry_safe(op, tmp, &freeing_list, list) {
562                 /* Switching from detour code to origin */
563                 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
564                 /* Disarm probes if marked disabled */
565                 if (kprobe_disabled(&op->kp))
566                         arch_disarm_kprobe(&op->kp);
567                 if (kprobe_unused(&op->kp)) {
568                         /*
569                          * Remove unused probes from hash list. After waiting
570                          * for synchronization, these probes are reclaimed.
571                          * (reclaiming is done by do_free_cleaned_kprobes().)
572                          */
573                         hlist_del_rcu(&op->kp.hlist);
574                 } else
575                         list_del_init(&op->list);
576         }
577 }
578
579 /* Reclaim all kprobes on the 'freeing_list' */
580 static void do_free_cleaned_kprobes(void)
581 {
582         struct optimized_kprobe *op, *tmp;
583
584         list_for_each_entry_safe(op, tmp, &freeing_list, list) {
585                 list_del_init(&op->list);
586                 if (WARN_ON_ONCE(!kprobe_unused(&op->kp))) {
587                         /*
588                          * This must not happen, but if there is a kprobe
589                          * still in use, keep it on kprobes hash list.
590                          */
591                         continue;
592                 }
593                 free_aggr_kprobe(&op->kp);
594         }
595 }
596
597 /* Start optimizer after OPTIMIZE_DELAY passed */
598 static void kick_kprobe_optimizer(void)
599 {
600         schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY);
601 }
602
603 /* Kprobe jump optimizer */
604 static void kprobe_optimizer(struct work_struct *work)
605 {
606         mutex_lock(&kprobe_mutex);
607         cpus_read_lock();
608         mutex_lock(&text_mutex);
609
610         /*
611          * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
612          * kprobes before waiting for quiesence period.
613          */
614         do_unoptimize_kprobes();
615
616         /*
617          * Step 2: Wait for quiesence period to ensure all potentially
618          * preempted tasks to have normally scheduled. Because optprobe
619          * may modify multiple instructions, there is a chance that Nth
620          * instruction is preempted. In that case, such tasks can return
621          * to 2nd-Nth byte of jump instruction. This wait is for avoiding it.
622          * Note that on non-preemptive kernel, this is transparently converted
623          * to synchronoze_sched() to wait for all interrupts to have completed.
624          */
625         synchronize_rcu_tasks();
626
627         /* Step 3: Optimize kprobes after quiesence period */
628         do_optimize_kprobes();
629
630         /* Step 4: Free cleaned kprobes after quiesence period */
631         do_free_cleaned_kprobes();
632
633         mutex_unlock(&text_mutex);
634         cpus_read_unlock();
635
636         /* Step 5: Kick optimizer again if needed */
637         if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list))
638                 kick_kprobe_optimizer();
639
640         mutex_unlock(&kprobe_mutex);
641 }
642
643 /* Wait for completing optimization and unoptimization */
644 void wait_for_kprobe_optimizer(void)
645 {
646         mutex_lock(&kprobe_mutex);
647
648         while (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list)) {
649                 mutex_unlock(&kprobe_mutex);
650
651                 /* This will also make 'optimizing_work' execute immmediately */
652                 flush_delayed_work(&optimizing_work);
653                 /* 'optimizing_work' might not have been queued yet, relax */
654                 cpu_relax();
655
656                 mutex_lock(&kprobe_mutex);
657         }
658
659         mutex_unlock(&kprobe_mutex);
660 }
661
662 static bool optprobe_queued_unopt(struct optimized_kprobe *op)
663 {
664         struct optimized_kprobe *_op;
665
666         list_for_each_entry(_op, &unoptimizing_list, list) {
667                 if (op == _op)
668                         return true;
669         }
670
671         return false;
672 }
673
674 /* Optimize kprobe if p is ready to be optimized */
675 static void optimize_kprobe(struct kprobe *p)
676 {
677         struct optimized_kprobe *op;
678
679         /* Check if the kprobe is disabled or not ready for optimization. */
680         if (!kprobe_optready(p) || !kprobes_allow_optimization ||
681             (kprobe_disabled(p) || kprobes_all_disarmed))
682                 return;
683
684         /* kprobes with 'post_handler' can not be optimized */
685         if (p->post_handler)
686                 return;
687
688         op = container_of(p, struct optimized_kprobe, kp);
689
690         /* Check there is no other kprobes at the optimized instructions */
691         if (arch_check_optimized_kprobe(op) < 0)
692                 return;
693
694         /* Check if it is already optimized. */
695         if (op->kp.flags & KPROBE_FLAG_OPTIMIZED) {
696                 if (optprobe_queued_unopt(op)) {
697                         /* This is under unoptimizing. Just dequeue the probe */
698                         list_del_init(&op->list);
699                 }
700                 return;
701         }
702         op->kp.flags |= KPROBE_FLAG_OPTIMIZED;
703
704         /*
705          * On the 'unoptimizing_list' and 'optimizing_list',
706          * 'op' must have OPTIMIZED flag
707          */
708         if (WARN_ON_ONCE(!list_empty(&op->list)))
709                 return;
710
711         list_add(&op->list, &optimizing_list);
712         kick_kprobe_optimizer();
713 }
714
715 /* Short cut to direct unoptimizing */
716 static void force_unoptimize_kprobe(struct optimized_kprobe *op)
717 {
718         lockdep_assert_cpus_held();
719         arch_unoptimize_kprobe(op);
720         op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
721 }
722
723 /* Unoptimize a kprobe if p is optimized */
724 static void unoptimize_kprobe(struct kprobe *p, bool force)
725 {
726         struct optimized_kprobe *op;
727
728         if (!kprobe_aggrprobe(p) || kprobe_disarmed(p))
729                 return; /* This is not an optprobe nor optimized */
730
731         op = container_of(p, struct optimized_kprobe, kp);
732         if (!kprobe_optimized(p))
733                 return;
734
735         if (!list_empty(&op->list)) {
736                 if (optprobe_queued_unopt(op)) {
737                         /* Queued in unoptimizing queue */
738                         if (force) {
739                                 /*
740                                  * Forcibly unoptimize the kprobe here, and queue it
741                                  * in the freeing list for release afterwards.
742                                  */
743                                 force_unoptimize_kprobe(op);
744                                 list_move(&op->list, &freeing_list);
745                         }
746                 } else {
747                         /* Dequeue from the optimizing queue */
748                         list_del_init(&op->list);
749                         op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
750                 }
751                 return;
752         }
753
754         /* Optimized kprobe case */
755         if (force) {
756                 /* Forcibly update the code: this is a special case */
757                 force_unoptimize_kprobe(op);
758         } else {
759                 list_add(&op->list, &unoptimizing_list);
760                 kick_kprobe_optimizer();
761         }
762 }
763
764 /* Cancel unoptimizing for reusing */
765 static int reuse_unused_kprobe(struct kprobe *ap)
766 {
767         struct optimized_kprobe *op;
768
769         /*
770          * Unused kprobe MUST be on the way of delayed unoptimizing (means
771          * there is still a relative jump) and disabled.
772          */
773         op = container_of(ap, struct optimized_kprobe, kp);
774         WARN_ON_ONCE(list_empty(&op->list));
775         /* Enable the probe again */
776         ap->flags &= ~KPROBE_FLAG_DISABLED;
777         /* Optimize it again. (remove from 'op->list') */
778         if (!kprobe_optready(ap))
779                 return -EINVAL;
780
781         optimize_kprobe(ap);
782         return 0;
783 }
784
785 /* Remove optimized instructions */
786 static void kill_optimized_kprobe(struct kprobe *p)
787 {
788         struct optimized_kprobe *op;
789
790         op = container_of(p, struct optimized_kprobe, kp);
791         if (!list_empty(&op->list))
792                 /* Dequeue from the (un)optimization queue */
793                 list_del_init(&op->list);
794         op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
795
796         if (kprobe_unused(p)) {
797                 /* Enqueue if it is unused */
798                 list_add(&op->list, &freeing_list);
799                 /*
800                  * Remove unused probes from the hash list. After waiting
801                  * for synchronization, this probe is reclaimed.
802                  * (reclaiming is done by do_free_cleaned_kprobes().)
803                  */
804                 hlist_del_rcu(&op->kp.hlist);
805         }
806
807         /* Don't touch the code, because it is already freed. */
808         arch_remove_optimized_kprobe(op);
809 }
810
811 static inline
812 void __prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
813 {
814         if (!kprobe_ftrace(p))
815                 arch_prepare_optimized_kprobe(op, p);
816 }
817
818 /* Try to prepare optimized instructions */
819 static void prepare_optimized_kprobe(struct kprobe *p)
820 {
821         struct optimized_kprobe *op;
822
823         op = container_of(p, struct optimized_kprobe, kp);
824         __prepare_optimized_kprobe(op, p);
825 }
826
827 /* Allocate new optimized_kprobe and try to prepare optimized instructions. */
828 static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
829 {
830         struct optimized_kprobe *op;
831
832         op = kzalloc(sizeof(struct optimized_kprobe), GFP_KERNEL);
833         if (!op)
834                 return NULL;
835
836         INIT_LIST_HEAD(&op->list);
837         op->kp.addr = p->addr;
838         __prepare_optimized_kprobe(op, p);
839
840         return &op->kp;
841 }
842
843 static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p);
844
845 /*
846  * Prepare an optimized_kprobe and optimize it.
847  * NOTE: 'p' must be a normal registered kprobe.
848  */
849 static void try_to_optimize_kprobe(struct kprobe *p)
850 {
851         struct kprobe *ap;
852         struct optimized_kprobe *op;
853
854         /* Impossible to optimize ftrace-based kprobe. */
855         if (kprobe_ftrace(p))
856                 return;
857
858         /* For preparing optimization, jump_label_text_reserved() is called. */
859         cpus_read_lock();
860         jump_label_lock();
861         mutex_lock(&text_mutex);
862
863         ap = alloc_aggr_kprobe(p);
864         if (!ap)
865                 goto out;
866
867         op = container_of(ap, struct optimized_kprobe, kp);
868         if (!arch_prepared_optinsn(&op->optinsn)) {
869                 /* If failed to setup optimizing, fallback to kprobe. */
870                 arch_remove_optimized_kprobe(op);
871                 kfree(op);
872                 goto out;
873         }
874
875         init_aggr_kprobe(ap, p);
876         optimize_kprobe(ap);    /* This just kicks optimizer thread. */
877
878 out:
879         mutex_unlock(&text_mutex);
880         jump_label_unlock();
881         cpus_read_unlock();
882 }
883
884 static void optimize_all_kprobes(void)
885 {
886         struct hlist_head *head;
887         struct kprobe *p;
888         unsigned int i;
889
890         mutex_lock(&kprobe_mutex);
891         /* If optimization is already allowed, just return. */
892         if (kprobes_allow_optimization)
893                 goto out;
894
895         cpus_read_lock();
896         kprobes_allow_optimization = true;
897         for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
898                 head = &kprobe_table[i];
899                 hlist_for_each_entry(p, head, hlist)
900                         if (!kprobe_disabled(p))
901                                 optimize_kprobe(p);
902         }
903         cpus_read_unlock();
904         pr_info("kprobe jump-optimization is enabled. All kprobes are optimized if possible.\n");
905 out:
906         mutex_unlock(&kprobe_mutex);
907 }
908
909 #ifdef CONFIG_SYSCTL
910 static void unoptimize_all_kprobes(void)
911 {
912         struct hlist_head *head;
913         struct kprobe *p;
914         unsigned int i;
915
916         mutex_lock(&kprobe_mutex);
917         /* If optimization is already prohibited, just return. */
918         if (!kprobes_allow_optimization) {
919                 mutex_unlock(&kprobe_mutex);
920                 return;
921         }
922
923         cpus_read_lock();
924         kprobes_allow_optimization = false;
925         for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
926                 head = &kprobe_table[i];
927                 hlist_for_each_entry(p, head, hlist) {
928                         if (!kprobe_disabled(p))
929                                 unoptimize_kprobe(p, false);
930                 }
931         }
932         cpus_read_unlock();
933         mutex_unlock(&kprobe_mutex);
934
935         /* Wait for unoptimizing completion. */
936         wait_for_kprobe_optimizer();
937         pr_info("kprobe jump-optimization is disabled. All kprobes are based on software breakpoint.\n");
938 }
939
940 static DEFINE_MUTEX(kprobe_sysctl_mutex);
941 int sysctl_kprobes_optimization;
942 int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
943                                       void *buffer, size_t *length,
944                                       loff_t *ppos)
945 {
946         int ret;
947
948         mutex_lock(&kprobe_sysctl_mutex);
949         sysctl_kprobes_optimization = kprobes_allow_optimization ? 1 : 0;
950         ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
951
952         if (sysctl_kprobes_optimization)
953                 optimize_all_kprobes();
954         else
955                 unoptimize_all_kprobes();
956         mutex_unlock(&kprobe_sysctl_mutex);
957
958         return ret;
959 }
960 #endif /* CONFIG_SYSCTL */
961
962 /* Put a breakpoint for a probe. */
963 static void __arm_kprobe(struct kprobe *p)
964 {
965         struct kprobe *_p;
966
967         lockdep_assert_held(&text_mutex);
968
969         /* Find the overlapping optimized kprobes. */
970         _p = get_optimized_kprobe(p->addr);
971         if (unlikely(_p))
972                 /* Fallback to unoptimized kprobe */
973                 unoptimize_kprobe(_p, true);
974
975         arch_arm_kprobe(p);
976         optimize_kprobe(p);     /* Try to optimize (add kprobe to a list) */
977 }
978
979 /* Remove the breakpoint of a probe. */
980 static void __disarm_kprobe(struct kprobe *p, bool reopt)
981 {
982         struct kprobe *_p;
983
984         lockdep_assert_held(&text_mutex);
985
986         /* Try to unoptimize */
987         unoptimize_kprobe(p, kprobes_all_disarmed);
988
989         if (!kprobe_queued(p)) {
990                 arch_disarm_kprobe(p);
991                 /* If another kprobe was blocked, re-optimize it. */
992                 _p = get_optimized_kprobe(p->addr);
993                 if (unlikely(_p) && reopt)
994                         optimize_kprobe(_p);
995         }
996         /*
997          * TODO: Since unoptimization and real disarming will be done by
998          * the worker thread, we can not check whether another probe are
999          * unoptimized because of this probe here. It should be re-optimized
1000          * by the worker thread.
1001          */
1002 }
1003
1004 #else /* !CONFIG_OPTPROBES */
1005
1006 #define optimize_kprobe(p)                      do {} while (0)
1007 #define unoptimize_kprobe(p, f)                 do {} while (0)
1008 #define kill_optimized_kprobe(p)                do {} while (0)
1009 #define prepare_optimized_kprobe(p)             do {} while (0)
1010 #define try_to_optimize_kprobe(p)               do {} while (0)
1011 #define __arm_kprobe(p)                         arch_arm_kprobe(p)
1012 #define __disarm_kprobe(p, o)                   arch_disarm_kprobe(p)
1013 #define kprobe_disarmed(p)                      kprobe_disabled(p)
1014 #define wait_for_kprobe_optimizer()             do {} while (0)
1015
1016 static int reuse_unused_kprobe(struct kprobe *ap)
1017 {
1018         /*
1019          * If the optimized kprobe is NOT supported, the aggr kprobe is
1020          * released at the same time that the last aggregated kprobe is
1021          * unregistered.
1022          * Thus there should be no chance to reuse unused kprobe.
1023          */
1024         WARN_ON_ONCE(1);
1025         return -EINVAL;
1026 }
1027
1028 static void free_aggr_kprobe(struct kprobe *p)
1029 {
1030         arch_remove_kprobe(p);
1031         kfree(p);
1032 }
1033
1034 static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
1035 {
1036         return kzalloc(sizeof(struct kprobe), GFP_KERNEL);
1037 }
1038 #endif /* CONFIG_OPTPROBES */
1039
1040 #ifdef CONFIG_KPROBES_ON_FTRACE
1041 static struct ftrace_ops kprobe_ftrace_ops __read_mostly = {
1042         .func = kprobe_ftrace_handler,
1043         .flags = FTRACE_OPS_FL_SAVE_REGS,
1044 };
1045
1046 static struct ftrace_ops kprobe_ipmodify_ops __read_mostly = {
1047         .func = kprobe_ftrace_handler,
1048         .flags = FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_IPMODIFY,
1049 };
1050
1051 static int kprobe_ipmodify_enabled;
1052 static int kprobe_ftrace_enabled;
1053
1054 static int __arm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops,
1055                                int *cnt)
1056 {
1057         int ret = 0;
1058
1059         lockdep_assert_held(&kprobe_mutex);
1060
1061         ret = ftrace_set_filter_ip(ops, (unsigned long)p->addr, 0, 0);
1062         if (WARN_ONCE(ret < 0, "Failed to arm kprobe-ftrace at %pS (error %d)\n", p->addr, ret))
1063                 return ret;
1064
1065         if (*cnt == 0) {
1066                 ret = register_ftrace_function(ops);
1067                 if (WARN(ret < 0, "Failed to register kprobe-ftrace (error %d)\n", ret))
1068                         goto err_ftrace;
1069         }
1070
1071         (*cnt)++;
1072         return ret;
1073
1074 err_ftrace:
1075         /*
1076          * At this point, sinec ops is not registered, we should be sefe from
1077          * registering empty filter.
1078          */
1079         ftrace_set_filter_ip(ops, (unsigned long)p->addr, 1, 0);
1080         return ret;
1081 }
1082
1083 static int arm_kprobe_ftrace(struct kprobe *p)
1084 {
1085         bool ipmodify = (p->post_handler != NULL);
1086
1087         return __arm_kprobe_ftrace(p,
1088                 ipmodify ? &kprobe_ipmodify_ops : &kprobe_ftrace_ops,
1089                 ipmodify ? &kprobe_ipmodify_enabled : &kprobe_ftrace_enabled);
1090 }
1091
1092 static int __disarm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops,
1093                                   int *cnt)
1094 {
1095         int ret = 0;
1096
1097         lockdep_assert_held(&kprobe_mutex);
1098
1099         if (*cnt == 1) {
1100                 ret = unregister_ftrace_function(ops);
1101                 if (WARN(ret < 0, "Failed to unregister kprobe-ftrace (error %d)\n", ret))
1102                         return ret;
1103         }
1104
1105         (*cnt)--;
1106
1107         ret = ftrace_set_filter_ip(ops, (unsigned long)p->addr, 1, 0);
1108         WARN_ONCE(ret < 0, "Failed to disarm kprobe-ftrace at %pS (error %d)\n",
1109                   p->addr, ret);
1110         return ret;
1111 }
1112
1113 static int disarm_kprobe_ftrace(struct kprobe *p)
1114 {
1115         bool ipmodify = (p->post_handler != NULL);
1116
1117         return __disarm_kprobe_ftrace(p,
1118                 ipmodify ? &kprobe_ipmodify_ops : &kprobe_ftrace_ops,
1119                 ipmodify ? &kprobe_ipmodify_enabled : &kprobe_ftrace_enabled);
1120 }
1121 #else   /* !CONFIG_KPROBES_ON_FTRACE */
1122 static inline int arm_kprobe_ftrace(struct kprobe *p)
1123 {
1124         return -ENODEV;
1125 }
1126
1127 static inline int disarm_kprobe_ftrace(struct kprobe *p)
1128 {
1129         return -ENODEV;
1130 }
1131 #endif
1132
1133 static int prepare_kprobe(struct kprobe *p)
1134 {
1135         /* Must ensure p->addr is really on ftrace */
1136         if (kprobe_ftrace(p))
1137                 return arch_prepare_kprobe_ftrace(p);
1138
1139         return arch_prepare_kprobe(p);
1140 }
1141
1142 static int arm_kprobe(struct kprobe *kp)
1143 {
1144         if (unlikely(kprobe_ftrace(kp)))
1145                 return arm_kprobe_ftrace(kp);
1146
1147         cpus_read_lock();
1148         mutex_lock(&text_mutex);
1149         __arm_kprobe(kp);
1150         mutex_unlock(&text_mutex);
1151         cpus_read_unlock();
1152
1153         return 0;
1154 }
1155
1156 static int disarm_kprobe(struct kprobe *kp, bool reopt)
1157 {
1158         if (unlikely(kprobe_ftrace(kp)))
1159                 return disarm_kprobe_ftrace(kp);
1160
1161         cpus_read_lock();
1162         mutex_lock(&text_mutex);
1163         __disarm_kprobe(kp, reopt);
1164         mutex_unlock(&text_mutex);
1165         cpus_read_unlock();
1166
1167         return 0;
1168 }
1169
1170 /*
1171  * Aggregate handlers for multiple kprobes support - these handlers
1172  * take care of invoking the individual kprobe handlers on p->list
1173  */
1174 static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
1175 {
1176         struct kprobe *kp;
1177
1178         list_for_each_entry_rcu(kp, &p->list, list) {
1179                 if (kp->pre_handler && likely(!kprobe_disabled(kp))) {
1180                         set_kprobe_instance(kp);
1181                         if (kp->pre_handler(kp, regs))
1182                                 return 1;
1183                 }
1184                 reset_kprobe_instance();
1185         }
1186         return 0;
1187 }
1188 NOKPROBE_SYMBOL(aggr_pre_handler);
1189
1190 static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
1191                               unsigned long flags)
1192 {
1193         struct kprobe *kp;
1194
1195         list_for_each_entry_rcu(kp, &p->list, list) {
1196                 if (kp->post_handler && likely(!kprobe_disabled(kp))) {
1197                         set_kprobe_instance(kp);
1198                         kp->post_handler(kp, regs, flags);
1199                         reset_kprobe_instance();
1200                 }
1201         }
1202 }
1203 NOKPROBE_SYMBOL(aggr_post_handler);
1204
1205 /* Walks the list and increments 'nmissed' if 'p' has child probes. */
1206 void kprobes_inc_nmissed_count(struct kprobe *p)
1207 {
1208         struct kprobe *kp;
1209
1210         if (!kprobe_aggrprobe(p)) {
1211                 p->nmissed++;
1212         } else {
1213                 list_for_each_entry_rcu(kp, &p->list, list)
1214                         kp->nmissed++;
1215         }
1216 }
1217 NOKPROBE_SYMBOL(kprobes_inc_nmissed_count);
1218
1219 static void free_rp_inst_rcu(struct rcu_head *head)
1220 {
1221         struct kretprobe_instance *ri = container_of(head, struct kretprobe_instance, rcu);
1222
1223         if (refcount_dec_and_test(&ri->rph->ref))
1224                 kfree(ri->rph);
1225         kfree(ri);
1226 }
1227 NOKPROBE_SYMBOL(free_rp_inst_rcu);
1228
1229 static void recycle_rp_inst(struct kretprobe_instance *ri)
1230 {
1231         struct kretprobe *rp = get_kretprobe(ri);
1232
1233         if (likely(rp))
1234                 freelist_add(&ri->freelist, &rp->freelist);
1235         else
1236                 call_rcu(&ri->rcu, free_rp_inst_rcu);
1237 }
1238 NOKPROBE_SYMBOL(recycle_rp_inst);
1239
1240 static struct kprobe kprobe_busy = {
1241         .addr = (void *) get_kprobe,
1242 };
1243
1244 void kprobe_busy_begin(void)
1245 {
1246         struct kprobe_ctlblk *kcb;
1247
1248         preempt_disable();
1249         __this_cpu_write(current_kprobe, &kprobe_busy);
1250         kcb = get_kprobe_ctlblk();
1251         kcb->kprobe_status = KPROBE_HIT_ACTIVE;
1252 }
1253
1254 void kprobe_busy_end(void)
1255 {
1256         __this_cpu_write(current_kprobe, NULL);
1257         preempt_enable();
1258 }
1259
1260 /*
1261  * This function is called from delayed_put_task_struct() when a task is
1262  * dead and cleaned up to recycle any kretprobe instances associated with
1263  * this task. These left over instances represent probed functions that
1264  * have been called but will never return.
1265  */
1266 void kprobe_flush_task(struct task_struct *tk)
1267 {
1268         struct kretprobe_instance *ri;
1269         struct llist_node *node;
1270
1271         /* Early boot, not yet initialized. */
1272         if (unlikely(!kprobes_initialized))
1273                 return;
1274
1275         kprobe_busy_begin();
1276
1277         node = __llist_del_all(&tk->kretprobe_instances);
1278         while (node) {
1279                 ri = container_of(node, struct kretprobe_instance, llist);
1280                 node = node->next;
1281
1282                 recycle_rp_inst(ri);
1283         }
1284
1285         kprobe_busy_end();
1286 }
1287 NOKPROBE_SYMBOL(kprobe_flush_task);
1288
1289 static inline void free_rp_inst(struct kretprobe *rp)
1290 {
1291         struct kretprobe_instance *ri;
1292         struct freelist_node *node;
1293         int count = 0;
1294
1295         node = rp->freelist.head;
1296         while (node) {
1297                 ri = container_of(node, struct kretprobe_instance, freelist);
1298                 node = node->next;
1299
1300                 kfree(ri);
1301                 count++;
1302         }
1303
1304         if (refcount_sub_and_test(count, &rp->rph->ref)) {
1305                 kfree(rp->rph);
1306                 rp->rph = NULL;
1307         }
1308 }
1309
1310 /* Add the new probe to 'ap->list'. */
1311 static int add_new_kprobe(struct kprobe *ap, struct kprobe *p)
1312 {
1313         if (p->post_handler)
1314                 unoptimize_kprobe(ap, true);    /* Fall back to normal kprobe */
1315
1316         list_add_rcu(&p->list, &ap->list);
1317         if (p->post_handler && !ap->post_handler)
1318                 ap->post_handler = aggr_post_handler;
1319
1320         return 0;
1321 }
1322
1323 /*
1324  * Fill in the required fields of the aggregator kprobe. Replace the
1325  * earlier kprobe in the hlist with the aggregator kprobe.
1326  */
1327 static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
1328 {
1329         /* Copy the insn slot of 'p' to 'ap'. */
1330         copy_kprobe(p, ap);
1331         flush_insn_slot(ap);
1332         ap->addr = p->addr;
1333         ap->flags = p->flags & ~KPROBE_FLAG_OPTIMIZED;
1334         ap->pre_handler = aggr_pre_handler;
1335         /* We don't care the kprobe which has gone. */
1336         if (p->post_handler && !kprobe_gone(p))
1337                 ap->post_handler = aggr_post_handler;
1338
1339         INIT_LIST_HEAD(&ap->list);
1340         INIT_HLIST_NODE(&ap->hlist);
1341
1342         list_add_rcu(&p->list, &ap->list);
1343         hlist_replace_rcu(&p->hlist, &ap->hlist);
1344 }
1345
1346 /*
1347  * This registers the second or subsequent kprobe at the same address.
1348  */
1349 static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p)
1350 {
1351         int ret = 0;
1352         struct kprobe *ap = orig_p;
1353
1354         cpus_read_lock();
1355
1356         /* For preparing optimization, jump_label_text_reserved() is called */
1357         jump_label_lock();
1358         mutex_lock(&text_mutex);
1359
1360         if (!kprobe_aggrprobe(orig_p)) {
1361                 /* If 'orig_p' is not an 'aggr_kprobe', create new one. */
1362                 ap = alloc_aggr_kprobe(orig_p);
1363                 if (!ap) {
1364                         ret = -ENOMEM;
1365                         goto out;
1366                 }
1367                 init_aggr_kprobe(ap, orig_p);
1368         } else if (kprobe_unused(ap)) {
1369                 /* This probe is going to die. Rescue it */
1370                 ret = reuse_unused_kprobe(ap);
1371                 if (ret)
1372                         goto out;
1373         }
1374
1375         if (kprobe_gone(ap)) {
1376                 /*
1377                  * Attempting to insert new probe at the same location that
1378                  * had a probe in the module vaddr area which already
1379                  * freed. So, the instruction slot has already been
1380                  * released. We need a new slot for the new probe.
1381                  */
1382                 ret = arch_prepare_kprobe(ap);
1383                 if (ret)
1384                         /*
1385                          * Even if fail to allocate new slot, don't need to
1386                          * free the 'ap'. It will be used next time, or
1387                          * freed by unregister_kprobe().
1388                          */
1389                         goto out;
1390
1391                 /* Prepare optimized instructions if possible. */
1392                 prepare_optimized_kprobe(ap);
1393
1394                 /*
1395                  * Clear gone flag to prevent allocating new slot again, and
1396                  * set disabled flag because it is not armed yet.
1397                  */
1398                 ap->flags = (ap->flags & ~KPROBE_FLAG_GONE)
1399                             | KPROBE_FLAG_DISABLED;
1400         }
1401
1402         /* Copy the insn slot of 'p' to 'ap'. */
1403         copy_kprobe(ap, p);
1404         ret = add_new_kprobe(ap, p);
1405
1406 out:
1407         mutex_unlock(&text_mutex);
1408         jump_label_unlock();
1409         cpus_read_unlock();
1410
1411         if (ret == 0 && kprobe_disabled(ap) && !kprobe_disabled(p)) {
1412                 ap->flags &= ~KPROBE_FLAG_DISABLED;
1413                 if (!kprobes_all_disarmed) {
1414                         /* Arm the breakpoint again. */
1415                         ret = arm_kprobe(ap);
1416                         if (ret) {
1417                                 ap->flags |= KPROBE_FLAG_DISABLED;
1418                                 list_del_rcu(&p->list);
1419                                 synchronize_rcu();
1420                         }
1421                 }
1422         }
1423         return ret;
1424 }
1425
1426 bool __weak arch_within_kprobe_blacklist(unsigned long addr)
1427 {
1428         /* The '__kprobes' functions and entry code must not be probed. */
1429         return addr >= (unsigned long)__kprobes_text_start &&
1430                addr < (unsigned long)__kprobes_text_end;
1431 }
1432
1433 static bool __within_kprobe_blacklist(unsigned long addr)
1434 {
1435         struct kprobe_blacklist_entry *ent;
1436
1437         if (arch_within_kprobe_blacklist(addr))
1438                 return true;
1439         /*
1440          * If 'kprobe_blacklist' is defined, check the address and
1441          * reject any probe registration in the prohibited area.
1442          */
1443         list_for_each_entry(ent, &kprobe_blacklist, list) {
1444                 if (addr >= ent->start_addr && addr < ent->end_addr)
1445                         return true;
1446         }
1447         return false;
1448 }
1449
1450 bool within_kprobe_blacklist(unsigned long addr)
1451 {
1452         char symname[KSYM_NAME_LEN], *p;
1453
1454         if (__within_kprobe_blacklist(addr))
1455                 return true;
1456
1457         /* Check if the address is on a suffixed-symbol */
1458         if (!lookup_symbol_name(addr, symname)) {
1459                 p = strchr(symname, '.');
1460                 if (!p)
1461                         return false;
1462                 *p = '\0';
1463                 addr = (unsigned long)kprobe_lookup_name(symname, 0);
1464                 if (addr)
1465                         return __within_kprobe_blacklist(addr);
1466         }
1467         return false;
1468 }
1469
1470 /*
1471  * If 'symbol_name' is specified, look it up and add the 'offset'
1472  * to it. This way, we can specify a relative address to a symbol.
1473  * This returns encoded errors if it fails to look up symbol or invalid
1474  * combination of parameters.
1475  */
1476 static kprobe_opcode_t *_kprobe_addr(kprobe_opcode_t *addr,
1477                         const char *symbol_name, unsigned int offset)
1478 {
1479         if ((symbol_name && addr) || (!symbol_name && !addr))
1480                 goto invalid;
1481
1482         if (symbol_name) {
1483                 addr = kprobe_lookup_name(symbol_name, offset);
1484                 if (!addr)
1485                         return ERR_PTR(-ENOENT);
1486         }
1487
1488         addr = (kprobe_opcode_t *)(((char *)addr) + offset);
1489         if (addr)
1490                 return addr;
1491
1492 invalid:
1493         return ERR_PTR(-EINVAL);
1494 }
1495
1496 static kprobe_opcode_t *kprobe_addr(struct kprobe *p)
1497 {
1498         return _kprobe_addr(p->addr, p->symbol_name, p->offset);
1499 }
1500
1501 /*
1502  * Check the 'p' is valid and return the aggregator kprobe
1503  * at the same address.
1504  */
1505 static struct kprobe *__get_valid_kprobe(struct kprobe *p)
1506 {
1507         struct kprobe *ap, *list_p;
1508
1509         lockdep_assert_held(&kprobe_mutex);
1510
1511         ap = get_kprobe(p->addr);
1512         if (unlikely(!ap))
1513                 return NULL;
1514
1515         if (p != ap) {
1516                 list_for_each_entry(list_p, &ap->list, list)
1517                         if (list_p == p)
1518                         /* kprobe p is a valid probe */
1519                                 goto valid;
1520                 return NULL;
1521         }
1522 valid:
1523         return ap;
1524 }
1525
1526 /*
1527  * Warn and return error if the kprobe is being re-registered since
1528  * there must be a software bug.
1529  */
1530 static inline int warn_kprobe_rereg(struct kprobe *p)
1531 {
1532         int ret = 0;
1533
1534         mutex_lock(&kprobe_mutex);
1535         if (WARN_ON_ONCE(__get_valid_kprobe(p)))
1536                 ret = -EINVAL;
1537         mutex_unlock(&kprobe_mutex);
1538
1539         return ret;
1540 }
1541
1542 static int check_ftrace_location(struct kprobe *p)
1543 {
1544         unsigned long ftrace_addr;
1545
1546         ftrace_addr = ftrace_location((unsigned long)p->addr);
1547         if (ftrace_addr) {
1548 #ifdef CONFIG_KPROBES_ON_FTRACE
1549                 /* Given address is not on the instruction boundary */
1550                 if ((unsigned long)p->addr != ftrace_addr)
1551                         return -EILSEQ;
1552                 p->flags |= KPROBE_FLAG_FTRACE;
1553 #else   /* !CONFIG_KPROBES_ON_FTRACE */
1554                 return -EINVAL;
1555 #endif
1556         }
1557         return 0;
1558 }
1559
1560 static int check_kprobe_address_safe(struct kprobe *p,
1561                                      struct module **probed_mod)
1562 {
1563         int ret;
1564
1565         ret = check_ftrace_location(p);
1566         if (ret)
1567                 return ret;
1568         jump_label_lock();
1569         preempt_disable();
1570
1571         /* Ensure it is not in reserved area nor out of text */
1572         if (!kernel_text_address((unsigned long) p->addr) ||
1573             within_kprobe_blacklist((unsigned long) p->addr) ||
1574             jump_label_text_reserved(p->addr, p->addr) ||
1575             static_call_text_reserved(p->addr, p->addr) ||
1576             find_bug((unsigned long)p->addr)) {
1577                 ret = -EINVAL;
1578                 goto out;
1579         }
1580
1581         /* Check if 'p' is probing a module. */
1582         *probed_mod = __module_text_address((unsigned long) p->addr);
1583         if (*probed_mod) {
1584                 /*
1585                  * We must hold a refcount of the probed module while updating
1586                  * its code to prohibit unexpected unloading.
1587                  */
1588                 if (unlikely(!try_module_get(*probed_mod))) {
1589                         ret = -ENOENT;
1590                         goto out;
1591                 }
1592
1593                 /*
1594                  * If the module freed '.init.text', we couldn't insert
1595                  * kprobes in there.
1596                  */
1597                 if (within_module_init((unsigned long)p->addr, *probed_mod) &&
1598                     (*probed_mod)->state != MODULE_STATE_COMING) {
1599                         module_put(*probed_mod);
1600                         *probed_mod = NULL;
1601                         ret = -ENOENT;
1602                 }
1603         }
1604 out:
1605         preempt_enable();
1606         jump_label_unlock();
1607
1608         return ret;
1609 }
1610
1611 int register_kprobe(struct kprobe *p)
1612 {
1613         int ret;
1614         struct kprobe *old_p;
1615         struct module *probed_mod;
1616         kprobe_opcode_t *addr;
1617
1618         /* Adjust probe address from symbol */
1619         addr = kprobe_addr(p);
1620         if (IS_ERR(addr))
1621                 return PTR_ERR(addr);
1622         p->addr = addr;
1623
1624         ret = warn_kprobe_rereg(p);
1625         if (ret)
1626                 return ret;
1627
1628         /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */
1629         p->flags &= KPROBE_FLAG_DISABLED;
1630         p->nmissed = 0;
1631         INIT_LIST_HEAD(&p->list);
1632
1633         ret = check_kprobe_address_safe(p, &probed_mod);
1634         if (ret)
1635                 return ret;
1636
1637         mutex_lock(&kprobe_mutex);
1638
1639         old_p = get_kprobe(p->addr);
1640         if (old_p) {
1641                 /* Since this may unoptimize 'old_p', locking 'text_mutex'. */
1642                 ret = register_aggr_kprobe(old_p, p);
1643                 goto out;
1644         }
1645
1646         cpus_read_lock();
1647         /* Prevent text modification */
1648         mutex_lock(&text_mutex);
1649         ret = prepare_kprobe(p);
1650         mutex_unlock(&text_mutex);
1651         cpus_read_unlock();
1652         if (ret)
1653                 goto out;
1654
1655         INIT_HLIST_NODE(&p->hlist);
1656         hlist_add_head_rcu(&p->hlist,
1657                        &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
1658
1659         if (!kprobes_all_disarmed && !kprobe_disabled(p)) {
1660                 ret = arm_kprobe(p);
1661                 if (ret) {
1662                         hlist_del_rcu(&p->hlist);
1663                         synchronize_rcu();
1664                         goto out;
1665                 }
1666         }
1667
1668         /* Try to optimize kprobe */
1669         try_to_optimize_kprobe(p);
1670 out:
1671         mutex_unlock(&kprobe_mutex);
1672
1673         if (probed_mod)
1674                 module_put(probed_mod);
1675
1676         return ret;
1677 }
1678 EXPORT_SYMBOL_GPL(register_kprobe);
1679
1680 /* Check if all probes on the 'ap' are disabled. */
1681 static bool aggr_kprobe_disabled(struct kprobe *ap)
1682 {
1683         struct kprobe *kp;
1684
1685         lockdep_assert_held(&kprobe_mutex);
1686
1687         list_for_each_entry(kp, &ap->list, list)
1688                 if (!kprobe_disabled(kp))
1689                         /*
1690                          * Since there is an active probe on the list,
1691                          * we can't disable this 'ap'.
1692                          */
1693                         return false;
1694
1695         return true;
1696 }
1697
1698 static struct kprobe *__disable_kprobe(struct kprobe *p)
1699 {
1700         struct kprobe *orig_p;
1701         int ret;
1702
1703         lockdep_assert_held(&kprobe_mutex);
1704
1705         /* Get an original kprobe for return */
1706         orig_p = __get_valid_kprobe(p);
1707         if (unlikely(orig_p == NULL))
1708                 return ERR_PTR(-EINVAL);
1709
1710         if (!kprobe_disabled(p)) {
1711                 /* Disable probe if it is a child probe */
1712                 if (p != orig_p)
1713                         p->flags |= KPROBE_FLAG_DISABLED;
1714
1715                 /* Try to disarm and disable this/parent probe */
1716                 if (p == orig_p || aggr_kprobe_disabled(orig_p)) {
1717                         /*
1718                          * If 'kprobes_all_disarmed' is set, 'orig_p'
1719                          * should have already been disarmed, so
1720                          * skip unneed disarming process.
1721                          */
1722                         if (!kprobes_all_disarmed) {
1723                                 ret = disarm_kprobe(orig_p, true);
1724                                 if (ret) {
1725                                         p->flags &= ~KPROBE_FLAG_DISABLED;
1726                                         return ERR_PTR(ret);
1727                                 }
1728                         }
1729                         orig_p->flags |= KPROBE_FLAG_DISABLED;
1730                 }
1731         }
1732
1733         return orig_p;
1734 }
1735
1736 /*
1737  * Unregister a kprobe without a scheduler synchronization.
1738  */
1739 static int __unregister_kprobe_top(struct kprobe *p)
1740 {
1741         struct kprobe *ap, *list_p;
1742
1743         /* Disable kprobe. This will disarm it if needed. */
1744         ap = __disable_kprobe(p);
1745         if (IS_ERR(ap))
1746                 return PTR_ERR(ap);
1747
1748         if (ap == p)
1749                 /*
1750                  * This probe is an independent(and non-optimized) kprobe
1751                  * (not an aggrprobe). Remove from the hash list.
1752                  */
1753                 goto disarmed;
1754
1755         /* Following process expects this probe is an aggrprobe */
1756         WARN_ON(!kprobe_aggrprobe(ap));
1757
1758         if (list_is_singular(&ap->list) && kprobe_disarmed(ap))
1759                 /*
1760                  * !disarmed could be happen if the probe is under delayed
1761                  * unoptimizing.
1762                  */
1763                 goto disarmed;
1764         else {
1765                 /* If disabling probe has special handlers, update aggrprobe */
1766                 if (p->post_handler && !kprobe_gone(p)) {
1767                         list_for_each_entry(list_p, &ap->list, list) {
1768                                 if ((list_p != p) && (list_p->post_handler))
1769                                         goto noclean;
1770                         }
1771                         ap->post_handler = NULL;
1772                 }
1773 noclean:
1774                 /*
1775                  * Remove from the aggrprobe: this path will do nothing in
1776                  * __unregister_kprobe_bottom().
1777                  */
1778                 list_del_rcu(&p->list);
1779                 if (!kprobe_disabled(ap) && !kprobes_all_disarmed)
1780                         /*
1781                          * Try to optimize this probe again, because post
1782                          * handler may have been changed.
1783                          */
1784                         optimize_kprobe(ap);
1785         }
1786         return 0;
1787
1788 disarmed:
1789         hlist_del_rcu(&ap->hlist);
1790         return 0;
1791 }
1792
1793 static void __unregister_kprobe_bottom(struct kprobe *p)
1794 {
1795         struct kprobe *ap;
1796
1797         if (list_empty(&p->list))
1798                 /* This is an independent kprobe */
1799                 arch_remove_kprobe(p);
1800         else if (list_is_singular(&p->list)) {
1801                 /* This is the last child of an aggrprobe */
1802                 ap = list_entry(p->list.next, struct kprobe, list);
1803                 list_del(&p->list);
1804                 free_aggr_kprobe(ap);
1805         }
1806         /* Otherwise, do nothing. */
1807 }
1808
1809 int register_kprobes(struct kprobe **kps, int num)
1810 {
1811         int i, ret = 0;
1812
1813         if (num <= 0)
1814                 return -EINVAL;
1815         for (i = 0; i < num; i++) {
1816                 ret = register_kprobe(kps[i]);
1817                 if (ret < 0) {
1818                         if (i > 0)
1819                                 unregister_kprobes(kps, i);
1820                         break;
1821                 }
1822         }
1823         return ret;
1824 }
1825 EXPORT_SYMBOL_GPL(register_kprobes);
1826
1827 void unregister_kprobe(struct kprobe *p)
1828 {
1829         unregister_kprobes(&p, 1);
1830 }
1831 EXPORT_SYMBOL_GPL(unregister_kprobe);
1832
1833 void unregister_kprobes(struct kprobe **kps, int num)
1834 {
1835         int i;
1836
1837         if (num <= 0)
1838                 return;
1839         mutex_lock(&kprobe_mutex);
1840         for (i = 0; i < num; i++)
1841                 if (__unregister_kprobe_top(kps[i]) < 0)
1842                         kps[i]->addr = NULL;
1843         mutex_unlock(&kprobe_mutex);
1844
1845         synchronize_rcu();
1846         for (i = 0; i < num; i++)
1847                 if (kps[i]->addr)
1848                         __unregister_kprobe_bottom(kps[i]);
1849 }
1850 EXPORT_SYMBOL_GPL(unregister_kprobes);
1851
1852 int __weak kprobe_exceptions_notify(struct notifier_block *self,
1853                                         unsigned long val, void *data)
1854 {
1855         return NOTIFY_DONE;
1856 }
1857 NOKPROBE_SYMBOL(kprobe_exceptions_notify);
1858
1859 static struct notifier_block kprobe_exceptions_nb = {
1860         .notifier_call = kprobe_exceptions_notify,
1861         .priority = 0x7fffffff /* we need to be notified first */
1862 };
1863
1864 #ifdef CONFIG_KRETPROBES
1865
1866 /* This assumes the 'tsk' is the current task or the is not running. */
1867 static kprobe_opcode_t *__kretprobe_find_ret_addr(struct task_struct *tsk,
1868                                                   struct llist_node **cur)
1869 {
1870         struct kretprobe_instance *ri = NULL;
1871         struct llist_node *node = *cur;
1872
1873         if (!node)
1874                 node = tsk->kretprobe_instances.first;
1875         else
1876                 node = node->next;
1877
1878         while (node) {
1879                 ri = container_of(node, struct kretprobe_instance, llist);
1880                 if (ri->ret_addr != kretprobe_trampoline_addr()) {
1881                         *cur = node;
1882                         return ri->ret_addr;
1883                 }
1884                 node = node->next;
1885         }
1886         return NULL;
1887 }
1888 NOKPROBE_SYMBOL(__kretprobe_find_ret_addr);
1889
1890 /**
1891  * kretprobe_find_ret_addr -- Find correct return address modified by kretprobe
1892  * @tsk: Target task
1893  * @fp: A frame pointer
1894  * @cur: a storage of the loop cursor llist_node pointer for next call
1895  *
1896  * Find the correct return address modified by a kretprobe on @tsk in unsigned
1897  * long type. If it finds the return address, this returns that address value,
1898  * or this returns 0.
1899  * The @tsk must be 'current' or a task which is not running. @fp is a hint
1900  * to get the currect return address - which is compared with the
1901  * kretprobe_instance::fp field. The @cur is a loop cursor for searching the
1902  * kretprobe return addresses on the @tsk. The '*@cur' should be NULL at the
1903  * first call, but '@cur' itself must NOT NULL.
1904  */
1905 unsigned long kretprobe_find_ret_addr(struct task_struct *tsk, void *fp,
1906                                       struct llist_node **cur)
1907 {
1908         struct kretprobe_instance *ri = NULL;
1909         kprobe_opcode_t *ret;
1910
1911         if (WARN_ON_ONCE(!cur))
1912                 return 0;
1913
1914         do {
1915                 ret = __kretprobe_find_ret_addr(tsk, cur);
1916                 if (!ret)
1917                         break;
1918                 ri = container_of(*cur, struct kretprobe_instance, llist);
1919         } while (ri->fp != fp);
1920
1921         return (unsigned long)ret;
1922 }
1923 NOKPROBE_SYMBOL(kretprobe_find_ret_addr);
1924
1925 void __weak arch_kretprobe_fixup_return(struct pt_regs *regs,
1926                                         kprobe_opcode_t *correct_ret_addr)
1927 {
1928         /*
1929          * Do nothing by default. Please fill this to update the fake return
1930          * address on the stack with the correct one on each arch if possible.
1931          */
1932 }
1933
1934 unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
1935                                              void *frame_pointer)
1936 {
1937         kprobe_opcode_t *correct_ret_addr = NULL;
1938         struct kretprobe_instance *ri = NULL;
1939         struct llist_node *first, *node = NULL;
1940         struct kretprobe *rp;
1941
1942         /* Find correct address and all nodes for this frame. */
1943         correct_ret_addr = __kretprobe_find_ret_addr(current, &node);
1944         if (!correct_ret_addr) {
1945                 pr_err("kretprobe: Return address not found, not execute handler. Maybe there is a bug in the kernel.\n");
1946                 BUG_ON(1);
1947         }
1948
1949         /*
1950          * Set the return address as the instruction pointer, because if the
1951          * user handler calls stack_trace_save_regs() with this 'regs',
1952          * the stack trace will start from the instruction pointer.
1953          */
1954         instruction_pointer_set(regs, (unsigned long)correct_ret_addr);
1955
1956         /* Run the user handler of the nodes. */
1957         first = current->kretprobe_instances.first;
1958         while (first) {
1959                 ri = container_of(first, struct kretprobe_instance, llist);
1960
1961                 if (WARN_ON_ONCE(ri->fp != frame_pointer))
1962                         break;
1963
1964                 rp = get_kretprobe(ri);
1965                 if (rp && rp->handler) {
1966                         struct kprobe *prev = kprobe_running();
1967
1968                         __this_cpu_write(current_kprobe, &rp->kp);
1969                         ri->ret_addr = correct_ret_addr;
1970                         rp->handler(ri, regs);
1971                         __this_cpu_write(current_kprobe, prev);
1972                 }
1973                 if (first == node)
1974                         break;
1975
1976                 first = first->next;
1977         }
1978
1979         arch_kretprobe_fixup_return(regs, correct_ret_addr);
1980
1981         /* Unlink all nodes for this frame. */
1982         first = current->kretprobe_instances.first;
1983         current->kretprobe_instances.first = node->next;
1984         node->next = NULL;
1985
1986         /* Recycle free instances. */
1987         while (first) {
1988                 ri = container_of(first, struct kretprobe_instance, llist);
1989                 first = first->next;
1990
1991                 recycle_rp_inst(ri);
1992         }
1993
1994         return (unsigned long)correct_ret_addr;
1995 }
1996 NOKPROBE_SYMBOL(__kretprobe_trampoline_handler)
1997
1998 /*
1999  * This kprobe pre_handler is registered with every kretprobe. When probe
2000  * hits it will set up the return probe.
2001  */
2002 static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
2003 {
2004         struct kretprobe *rp = container_of(p, struct kretprobe, kp);
2005         struct kretprobe_instance *ri;
2006         struct freelist_node *fn;
2007
2008         fn = freelist_try_get(&rp->freelist);
2009         if (!fn) {
2010                 rp->nmissed++;
2011                 return 0;
2012         }
2013
2014         ri = container_of(fn, struct kretprobe_instance, freelist);
2015
2016         if (rp->entry_handler && rp->entry_handler(ri, regs)) {
2017                 freelist_add(&ri->freelist, &rp->freelist);
2018                 return 0;
2019         }
2020
2021         arch_prepare_kretprobe(ri, regs);
2022
2023         __llist_add(&ri->llist, &current->kretprobe_instances);
2024
2025         return 0;
2026 }
2027 NOKPROBE_SYMBOL(pre_handler_kretprobe);
2028
2029 bool __weak arch_kprobe_on_func_entry(unsigned long offset)
2030 {
2031         return !offset;
2032 }
2033
2034 /**
2035  * kprobe_on_func_entry() -- check whether given address is function entry
2036  * @addr: Target address
2037  * @sym:  Target symbol name
2038  * @offset: The offset from the symbol or the address
2039  *
2040  * This checks whether the given @addr+@offset or @sym+@offset is on the
2041  * function entry address or not.
2042  * This returns 0 if it is the function entry, or -EINVAL if it is not.
2043  * And also it returns -ENOENT if it fails the symbol or address lookup.
2044  * Caller must pass @addr or @sym (either one must be NULL), or this
2045  * returns -EINVAL.
2046  */
2047 int kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
2048 {
2049         kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset);
2050
2051         if (IS_ERR(kp_addr))
2052                 return PTR_ERR(kp_addr);
2053
2054         if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset))
2055                 return -ENOENT;
2056
2057         if (!arch_kprobe_on_func_entry(offset))
2058                 return -EINVAL;
2059
2060         return 0;
2061 }
2062
2063 int register_kretprobe(struct kretprobe *rp)
2064 {
2065         int ret;
2066         struct kretprobe_instance *inst;
2067         int i;
2068         void *addr;
2069
2070         ret = kprobe_on_func_entry(rp->kp.addr, rp->kp.symbol_name, rp->kp.offset);
2071         if (ret)
2072                 return ret;
2073
2074         /* If only 'rp->kp.addr' is specified, check reregistering kprobes */
2075         if (rp->kp.addr && warn_kprobe_rereg(&rp->kp))
2076                 return -EINVAL;
2077
2078         if (kretprobe_blacklist_size) {
2079                 addr = kprobe_addr(&rp->kp);
2080                 if (IS_ERR(addr))
2081                         return PTR_ERR(addr);
2082
2083                 for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
2084                         if (kretprobe_blacklist[i].addr == addr)
2085                                 return -EINVAL;
2086                 }
2087         }
2088
2089         rp->kp.pre_handler = pre_handler_kretprobe;
2090         rp->kp.post_handler = NULL;
2091
2092         /* Pre-allocate memory for max kretprobe instances */
2093         if (rp->maxactive <= 0) {
2094 #ifdef CONFIG_PREEMPTION
2095                 rp->maxactive = max_t(unsigned int, 10, 2*num_possible_cpus());
2096 #else
2097                 rp->maxactive = num_possible_cpus();
2098 #endif
2099         }
2100         rp->freelist.head = NULL;
2101         rp->rph = kzalloc(sizeof(struct kretprobe_holder), GFP_KERNEL);
2102         if (!rp->rph)
2103                 return -ENOMEM;
2104
2105         rp->rph->rp = rp;
2106         for (i = 0; i < rp->maxactive; i++) {
2107                 inst = kzalloc(sizeof(struct kretprobe_instance) +
2108                                rp->data_size, GFP_KERNEL);
2109                 if (inst == NULL) {
2110                         refcount_set(&rp->rph->ref, i);
2111                         free_rp_inst(rp);
2112                         return -ENOMEM;
2113                 }
2114                 inst->rph = rp->rph;
2115                 freelist_add(&inst->freelist, &rp->freelist);
2116         }
2117         refcount_set(&rp->rph->ref, i);
2118
2119         rp->nmissed = 0;
2120         /* Establish function entry probe point */
2121         ret = register_kprobe(&rp->kp);
2122         if (ret != 0)
2123                 free_rp_inst(rp);
2124         return ret;
2125 }
2126 EXPORT_SYMBOL_GPL(register_kretprobe);
2127
2128 int register_kretprobes(struct kretprobe **rps, int num)
2129 {
2130         int ret = 0, i;
2131
2132         if (num <= 0)
2133                 return -EINVAL;
2134         for (i = 0; i < num; i++) {
2135                 ret = register_kretprobe(rps[i]);
2136                 if (ret < 0) {
2137                         if (i > 0)
2138                                 unregister_kretprobes(rps, i);
2139                         break;
2140                 }
2141         }
2142         return ret;
2143 }
2144 EXPORT_SYMBOL_GPL(register_kretprobes);
2145
2146 void unregister_kretprobe(struct kretprobe *rp)
2147 {
2148         unregister_kretprobes(&rp, 1);
2149 }
2150 EXPORT_SYMBOL_GPL(unregister_kretprobe);
2151
2152 void unregister_kretprobes(struct kretprobe **rps, int num)
2153 {
2154         int i;
2155
2156         if (num <= 0)
2157                 return;
2158         mutex_lock(&kprobe_mutex);
2159         for (i = 0; i < num; i++) {
2160                 if (__unregister_kprobe_top(&rps[i]->kp) < 0)
2161                         rps[i]->kp.addr = NULL;
2162                 rps[i]->rph->rp = NULL;
2163         }
2164         mutex_unlock(&kprobe_mutex);
2165
2166         synchronize_rcu();
2167         for (i = 0; i < num; i++) {
2168                 if (rps[i]->kp.addr) {
2169                         __unregister_kprobe_bottom(&rps[i]->kp);
2170                         free_rp_inst(rps[i]);
2171                 }
2172         }
2173 }
2174 EXPORT_SYMBOL_GPL(unregister_kretprobes);
2175
2176 #else /* CONFIG_KRETPROBES */
2177 int register_kretprobe(struct kretprobe *rp)
2178 {
2179         return -EOPNOTSUPP;
2180 }
2181 EXPORT_SYMBOL_GPL(register_kretprobe);
2182
2183 int register_kretprobes(struct kretprobe **rps, int num)
2184 {
2185         return -EOPNOTSUPP;
2186 }
2187 EXPORT_SYMBOL_GPL(register_kretprobes);
2188
2189 void unregister_kretprobe(struct kretprobe *rp)
2190 {
2191 }
2192 EXPORT_SYMBOL_GPL(unregister_kretprobe);
2193
2194 void unregister_kretprobes(struct kretprobe **rps, int num)
2195 {
2196 }
2197 EXPORT_SYMBOL_GPL(unregister_kretprobes);
2198
2199 static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
2200 {
2201         return 0;
2202 }
2203 NOKPROBE_SYMBOL(pre_handler_kretprobe);
2204
2205 #endif /* CONFIG_KRETPROBES */
2206
2207 /* Set the kprobe gone and remove its instruction buffer. */
2208 static void kill_kprobe(struct kprobe *p)
2209 {
2210         struct kprobe *kp;
2211
2212         lockdep_assert_held(&kprobe_mutex);
2213
2214         p->flags |= KPROBE_FLAG_GONE;
2215         if (kprobe_aggrprobe(p)) {
2216                 /*
2217                  * If this is an aggr_kprobe, we have to list all the
2218                  * chained probes and mark them GONE.
2219                  */
2220                 list_for_each_entry(kp, &p->list, list)
2221                         kp->flags |= KPROBE_FLAG_GONE;
2222                 p->post_handler = NULL;
2223                 kill_optimized_kprobe(p);
2224         }
2225         /*
2226          * Here, we can remove insn_slot safely, because no thread calls
2227          * the original probed function (which will be freed soon) any more.
2228          */
2229         arch_remove_kprobe(p);
2230
2231         /*
2232          * The module is going away. We should disarm the kprobe which
2233          * is using ftrace, because ftrace framework is still available at
2234          * 'MODULE_STATE_GOING' notification.
2235          */
2236         if (kprobe_ftrace(p) && !kprobe_disabled(p) && !kprobes_all_disarmed)
2237                 disarm_kprobe_ftrace(p);
2238 }
2239
2240 /* Disable one kprobe */
2241 int disable_kprobe(struct kprobe *kp)
2242 {
2243         int ret = 0;
2244         struct kprobe *p;
2245
2246         mutex_lock(&kprobe_mutex);
2247
2248         /* Disable this kprobe */
2249         p = __disable_kprobe(kp);
2250         if (IS_ERR(p))
2251                 ret = PTR_ERR(p);
2252
2253         mutex_unlock(&kprobe_mutex);
2254         return ret;
2255 }
2256 EXPORT_SYMBOL_GPL(disable_kprobe);
2257
2258 /* Enable one kprobe */
2259 int enable_kprobe(struct kprobe *kp)
2260 {
2261         int ret = 0;
2262         struct kprobe *p;
2263
2264         mutex_lock(&kprobe_mutex);
2265
2266         /* Check whether specified probe is valid. */
2267         p = __get_valid_kprobe(kp);
2268         if (unlikely(p == NULL)) {
2269                 ret = -EINVAL;
2270                 goto out;
2271         }
2272
2273         if (kprobe_gone(kp)) {
2274                 /* This kprobe has gone, we couldn't enable it. */
2275                 ret = -EINVAL;
2276                 goto out;
2277         }
2278
2279         if (p != kp)
2280                 kp->flags &= ~KPROBE_FLAG_DISABLED;
2281
2282         if (!kprobes_all_disarmed && kprobe_disabled(p)) {
2283                 p->flags &= ~KPROBE_FLAG_DISABLED;
2284                 ret = arm_kprobe(p);
2285                 if (ret)
2286                         p->flags |= KPROBE_FLAG_DISABLED;
2287         }
2288 out:
2289         mutex_unlock(&kprobe_mutex);
2290         return ret;
2291 }
2292 EXPORT_SYMBOL_GPL(enable_kprobe);
2293
2294 /* Caller must NOT call this in usual path. This is only for critical case */
2295 void dump_kprobe(struct kprobe *kp)
2296 {
2297         pr_err("Dump kprobe:\n.symbol_name = %s, .offset = %x, .addr = %pS\n",
2298                kp->symbol_name, kp->offset, kp->addr);
2299 }
2300 NOKPROBE_SYMBOL(dump_kprobe);
2301
2302 int kprobe_add_ksym_blacklist(unsigned long entry)
2303 {
2304         struct kprobe_blacklist_entry *ent;
2305         unsigned long offset = 0, size = 0;
2306
2307         if (!kernel_text_address(entry) ||
2308             !kallsyms_lookup_size_offset(entry, &size, &offset))
2309                 return -EINVAL;
2310
2311         ent = kmalloc(sizeof(*ent), GFP_KERNEL);
2312         if (!ent)
2313                 return -ENOMEM;
2314         ent->start_addr = entry;
2315         ent->end_addr = entry + size;
2316         INIT_LIST_HEAD(&ent->list);
2317         list_add_tail(&ent->list, &kprobe_blacklist);
2318
2319         return (int)size;
2320 }
2321
2322 /* Add all symbols in given area into kprobe blacklist */
2323 int kprobe_add_area_blacklist(unsigned long start, unsigned long end)
2324 {
2325         unsigned long entry;
2326         int ret = 0;
2327
2328         for (entry = start; entry < end; entry += ret) {
2329                 ret = kprobe_add_ksym_blacklist(entry);
2330                 if (ret < 0)
2331                         return ret;
2332                 if (ret == 0)   /* In case of alias symbol */
2333                         ret = 1;
2334         }
2335         return 0;
2336 }
2337
2338 /* Remove all symbols in given area from kprobe blacklist */
2339 static void kprobe_remove_area_blacklist(unsigned long start, unsigned long end)
2340 {
2341         struct kprobe_blacklist_entry *ent, *n;
2342
2343         list_for_each_entry_safe(ent, n, &kprobe_blacklist, list) {
2344                 if (ent->start_addr < start || ent->start_addr >= end)
2345                         continue;
2346                 list_del(&ent->list);
2347                 kfree(ent);
2348         }
2349 }
2350
2351 static void kprobe_remove_ksym_blacklist(unsigned long entry)
2352 {
2353         kprobe_remove_area_blacklist(entry, entry + 1);
2354 }
2355
2356 int __weak arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value,
2357                                    char *type, char *sym)
2358 {
2359         return -ERANGE;
2360 }
2361
2362 int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
2363                        char *sym)
2364 {
2365 #ifdef __ARCH_WANT_KPROBES_INSN_SLOT
2366         if (!kprobe_cache_get_kallsym(&kprobe_insn_slots, &symnum, value, type, sym))
2367                 return 0;
2368 #ifdef CONFIG_OPTPROBES
2369         if (!kprobe_cache_get_kallsym(&kprobe_optinsn_slots, &symnum, value, type, sym))
2370                 return 0;
2371 #endif
2372 #endif
2373         if (!arch_kprobe_get_kallsym(&symnum, value, type, sym))
2374                 return 0;
2375         return -ERANGE;
2376 }
2377
2378 int __init __weak arch_populate_kprobe_blacklist(void)
2379 {
2380         return 0;
2381 }
2382
2383 /*
2384  * Lookup and populate the kprobe_blacklist.
2385  *
2386  * Unlike the kretprobe blacklist, we'll need to determine
2387  * the range of addresses that belong to the said functions,
2388  * since a kprobe need not necessarily be at the beginning
2389  * of a function.
2390  */
2391 static int __init populate_kprobe_blacklist(unsigned long *start,
2392                                              unsigned long *end)
2393 {
2394         unsigned long entry;
2395         unsigned long *iter;
2396         int ret;
2397
2398         for (iter = start; iter < end; iter++) {
2399                 entry = (unsigned long)dereference_symbol_descriptor((void *)*iter);
2400                 ret = kprobe_add_ksym_blacklist(entry);
2401                 if (ret == -EINVAL)
2402                         continue;
2403                 if (ret < 0)
2404                         return ret;
2405         }
2406
2407         /* Symbols in '__kprobes_text' are blacklisted */
2408         ret = kprobe_add_area_blacklist((unsigned long)__kprobes_text_start,
2409                                         (unsigned long)__kprobes_text_end);
2410         if (ret)
2411                 return ret;
2412
2413         /* Symbols in 'noinstr' section are blacklisted */
2414         ret = kprobe_add_area_blacklist((unsigned long)__noinstr_text_start,
2415                                         (unsigned long)__noinstr_text_end);
2416
2417         return ret ? : arch_populate_kprobe_blacklist();
2418 }
2419
2420 static void add_module_kprobe_blacklist(struct module *mod)
2421 {
2422         unsigned long start, end;
2423         int i;
2424
2425         if (mod->kprobe_blacklist) {
2426                 for (i = 0; i < mod->num_kprobe_blacklist; i++)
2427                         kprobe_add_ksym_blacklist(mod->kprobe_blacklist[i]);
2428         }
2429
2430         start = (unsigned long)mod->kprobes_text_start;
2431         if (start) {
2432                 end = start + mod->kprobes_text_size;
2433                 kprobe_add_area_blacklist(start, end);
2434         }
2435
2436         start = (unsigned long)mod->noinstr_text_start;
2437         if (start) {
2438                 end = start + mod->noinstr_text_size;
2439                 kprobe_add_area_blacklist(start, end);
2440         }
2441 }
2442
2443 static void remove_module_kprobe_blacklist(struct module *mod)
2444 {
2445         unsigned long start, end;
2446         int i;
2447
2448         if (mod->kprobe_blacklist) {
2449                 for (i = 0; i < mod->num_kprobe_blacklist; i++)
2450                         kprobe_remove_ksym_blacklist(mod->kprobe_blacklist[i]);
2451         }
2452
2453         start = (unsigned long)mod->kprobes_text_start;
2454         if (start) {
2455                 end = start + mod->kprobes_text_size;
2456                 kprobe_remove_area_blacklist(start, end);
2457         }
2458
2459         start = (unsigned long)mod->noinstr_text_start;
2460         if (start) {
2461                 end = start + mod->noinstr_text_size;
2462                 kprobe_remove_area_blacklist(start, end);
2463         }
2464 }
2465
2466 /* Module notifier call back, checking kprobes on the module */
2467 static int kprobes_module_callback(struct notifier_block *nb,
2468                                    unsigned long val, void *data)
2469 {
2470         struct module *mod = data;
2471         struct hlist_head *head;
2472         struct kprobe *p;
2473         unsigned int i;
2474         int checkcore = (val == MODULE_STATE_GOING);
2475
2476         if (val == MODULE_STATE_COMING) {
2477                 mutex_lock(&kprobe_mutex);
2478                 add_module_kprobe_blacklist(mod);
2479                 mutex_unlock(&kprobe_mutex);
2480         }
2481         if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE)
2482                 return NOTIFY_DONE;
2483
2484         /*
2485          * When 'MODULE_STATE_GOING' was notified, both of module '.text' and
2486          * '.init.text' sections would be freed. When 'MODULE_STATE_LIVE' was
2487          * notified, only '.init.text' section would be freed. We need to
2488          * disable kprobes which have been inserted in the sections.
2489          */
2490         mutex_lock(&kprobe_mutex);
2491         for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
2492                 head = &kprobe_table[i];
2493                 hlist_for_each_entry(p, head, hlist)
2494                         if (within_module_init((unsigned long)p->addr, mod) ||
2495                             (checkcore &&
2496                              within_module_core((unsigned long)p->addr, mod))) {
2497                                 /*
2498                                  * The vaddr this probe is installed will soon
2499                                  * be vfreed buy not synced to disk. Hence,
2500                                  * disarming the breakpoint isn't needed.
2501                                  *
2502                                  * Note, this will also move any optimized probes
2503                                  * that are pending to be removed from their
2504                                  * corresponding lists to the 'freeing_list' and
2505                                  * will not be touched by the delayed
2506                                  * kprobe_optimizer() work handler.
2507                                  */
2508                                 kill_kprobe(p);
2509                         }
2510         }
2511         if (val == MODULE_STATE_GOING)
2512                 remove_module_kprobe_blacklist(mod);
2513         mutex_unlock(&kprobe_mutex);
2514         return NOTIFY_DONE;
2515 }
2516
2517 static struct notifier_block kprobe_module_nb = {
2518         .notifier_call = kprobes_module_callback,
2519         .priority = 0
2520 };
2521
2522 void kprobe_free_init_mem(void)
2523 {
2524         void *start = (void *)(&__init_begin);
2525         void *end = (void *)(&__init_end);
2526         struct hlist_head *head;
2527         struct kprobe *p;
2528         int i;
2529
2530         mutex_lock(&kprobe_mutex);
2531
2532         /* Kill all kprobes on initmem because the target code has been freed. */
2533         for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
2534                 head = &kprobe_table[i];
2535                 hlist_for_each_entry(p, head, hlist) {
2536                         if (start <= (void *)p->addr && (void *)p->addr < end)
2537                                 kill_kprobe(p);
2538                 }
2539         }
2540
2541         mutex_unlock(&kprobe_mutex);
2542 }
2543
2544 static int __init init_kprobes(void)
2545 {
2546         int i, err = 0;
2547
2548         /* FIXME allocate the probe table, currently defined statically */
2549         /* initialize all list heads */
2550         for (i = 0; i < KPROBE_TABLE_SIZE; i++)
2551                 INIT_HLIST_HEAD(&kprobe_table[i]);
2552
2553         err = populate_kprobe_blacklist(__start_kprobe_blacklist,
2554                                         __stop_kprobe_blacklist);
2555         if (err)
2556                 pr_err("Failed to populate blacklist (error %d), kprobes not restricted, be careful using them!\n", err);
2557
2558         if (kretprobe_blacklist_size) {
2559                 /* lookup the function address from its name */
2560                 for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
2561                         kretprobe_blacklist[i].addr =
2562                                 kprobe_lookup_name(kretprobe_blacklist[i].name, 0);
2563                         if (!kretprobe_blacklist[i].addr)
2564                                 pr_err("Failed to lookup symbol '%s' for kretprobe blacklist. Maybe the target function is removed or renamed.\n",
2565                                        kretprobe_blacklist[i].name);
2566                 }
2567         }
2568
2569         /* By default, kprobes are armed */
2570         kprobes_all_disarmed = false;
2571
2572 #if defined(CONFIG_OPTPROBES) && defined(__ARCH_WANT_KPROBES_INSN_SLOT)
2573         /* Init 'kprobe_optinsn_slots' for allocation */
2574         kprobe_optinsn_slots.insn_size = MAX_OPTINSN_SIZE;
2575 #endif
2576
2577         err = arch_init_kprobes();
2578         if (!err)
2579                 err = register_die_notifier(&kprobe_exceptions_nb);
2580         if (!err)
2581                 err = register_module_notifier(&kprobe_module_nb);
2582
2583         kprobes_initialized = (err == 0);
2584         return err;
2585 }
2586 early_initcall(init_kprobes);
2587
2588 #if defined(CONFIG_OPTPROBES)
2589 static int __init init_optprobes(void)
2590 {
2591         /*
2592          * Enable kprobe optimization - this kicks the optimizer which
2593          * depends on synchronize_rcu_tasks() and ksoftirqd, that is
2594          * not spawned in early initcall. So delay the optimization.
2595          */
2596         optimize_all_kprobes();
2597
2598         return 0;
2599 }
2600 subsys_initcall(init_optprobes);
2601 #endif
2602
2603 #ifdef CONFIG_DEBUG_FS
2604 static void report_probe(struct seq_file *pi, struct kprobe *p,
2605                 const char *sym, int offset, char *modname, struct kprobe *pp)
2606 {
2607         char *kprobe_type;
2608         void *addr = p->addr;
2609
2610         if (p->pre_handler == pre_handler_kretprobe)
2611                 kprobe_type = "r";
2612         else
2613                 kprobe_type = "k";
2614
2615         if (!kallsyms_show_value(pi->file->f_cred))
2616                 addr = NULL;
2617
2618         if (sym)
2619                 seq_printf(pi, "%px  %s  %s+0x%x  %s ",
2620                         addr, kprobe_type, sym, offset,
2621                         (modname ? modname : " "));
2622         else    /* try to use %pS */
2623                 seq_printf(pi, "%px  %s  %pS ",
2624                         addr, kprobe_type, p->addr);
2625
2626         if (!pp)
2627                 pp = p;
2628         seq_printf(pi, "%s%s%s%s\n",
2629                 (kprobe_gone(p) ? "[GONE]" : ""),
2630                 ((kprobe_disabled(p) && !kprobe_gone(p)) ?  "[DISABLED]" : ""),
2631                 (kprobe_optimized(pp) ? "[OPTIMIZED]" : ""),
2632                 (kprobe_ftrace(pp) ? "[FTRACE]" : ""));
2633 }
2634
2635 static void *kprobe_seq_start(struct seq_file *f, loff_t *pos)
2636 {
2637         return (*pos < KPROBE_TABLE_SIZE) ? pos : NULL;
2638 }
2639
2640 static void *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos)
2641 {
2642         (*pos)++;
2643         if (*pos >= KPROBE_TABLE_SIZE)
2644                 return NULL;
2645         return pos;
2646 }
2647
2648 static void kprobe_seq_stop(struct seq_file *f, void *v)
2649 {
2650         /* Nothing to do */
2651 }
2652
2653 static int show_kprobe_addr(struct seq_file *pi, void *v)
2654 {
2655         struct hlist_head *head;
2656         struct kprobe *p, *kp;
2657         const char *sym = NULL;
2658         unsigned int i = *(loff_t *) v;
2659         unsigned long offset = 0;
2660         char *modname, namebuf[KSYM_NAME_LEN];
2661
2662         head = &kprobe_table[i];
2663         preempt_disable();
2664         hlist_for_each_entry_rcu(p, head, hlist) {
2665                 sym = kallsyms_lookup((unsigned long)p->addr, NULL,
2666                                         &offset, &modname, namebuf);
2667                 if (kprobe_aggrprobe(p)) {
2668                         list_for_each_entry_rcu(kp, &p->list, list)
2669                                 report_probe(pi, kp, sym, offset, modname, p);
2670                 } else
2671                         report_probe(pi, p, sym, offset, modname, NULL);
2672         }
2673         preempt_enable();
2674         return 0;
2675 }
2676
2677 static const struct seq_operations kprobes_sops = {
2678         .start = kprobe_seq_start,
2679         .next  = kprobe_seq_next,
2680         .stop  = kprobe_seq_stop,
2681         .show  = show_kprobe_addr
2682 };
2683
2684 DEFINE_SEQ_ATTRIBUTE(kprobes);
2685
2686 /* kprobes/blacklist -- shows which functions can not be probed */
2687 static void *kprobe_blacklist_seq_start(struct seq_file *m, loff_t *pos)
2688 {
2689         mutex_lock(&kprobe_mutex);
2690         return seq_list_start(&kprobe_blacklist, *pos);
2691 }
2692
2693 static void *kprobe_blacklist_seq_next(struct seq_file *m, void *v, loff_t *pos)
2694 {
2695         return seq_list_next(v, &kprobe_blacklist, pos);
2696 }
2697
2698 static int kprobe_blacklist_seq_show(struct seq_file *m, void *v)
2699 {
2700         struct kprobe_blacklist_entry *ent =
2701                 list_entry(v, struct kprobe_blacklist_entry, list);
2702
2703         /*
2704          * If '/proc/kallsyms' is not showing kernel address, we won't
2705          * show them here either.
2706          */
2707         if (!kallsyms_show_value(m->file->f_cred))
2708                 seq_printf(m, "0x%px-0x%px\t%ps\n", NULL, NULL,
2709                            (void *)ent->start_addr);
2710         else
2711                 seq_printf(m, "0x%px-0x%px\t%ps\n", (void *)ent->start_addr,
2712                            (void *)ent->end_addr, (void *)ent->start_addr);
2713         return 0;
2714 }
2715
2716 static void kprobe_blacklist_seq_stop(struct seq_file *f, void *v)
2717 {
2718         mutex_unlock(&kprobe_mutex);
2719 }
2720
2721 static const struct seq_operations kprobe_blacklist_sops = {
2722         .start = kprobe_blacklist_seq_start,
2723         .next  = kprobe_blacklist_seq_next,
2724         .stop  = kprobe_blacklist_seq_stop,
2725         .show  = kprobe_blacklist_seq_show,
2726 };
2727 DEFINE_SEQ_ATTRIBUTE(kprobe_blacklist);
2728
2729 static int arm_all_kprobes(void)
2730 {
2731         struct hlist_head *head;
2732         struct kprobe *p;
2733         unsigned int i, total = 0, errors = 0;
2734         int err, ret = 0;
2735
2736         mutex_lock(&kprobe_mutex);
2737
2738         /* If kprobes are armed, just return */
2739         if (!kprobes_all_disarmed)
2740                 goto already_enabled;
2741
2742         /*
2743          * optimize_kprobe() called by arm_kprobe() checks
2744          * kprobes_all_disarmed, so set kprobes_all_disarmed before
2745          * arm_kprobe.
2746          */
2747         kprobes_all_disarmed = false;
2748         /* Arming kprobes doesn't optimize kprobe itself */
2749         for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
2750                 head = &kprobe_table[i];
2751                 /* Arm all kprobes on a best-effort basis */
2752                 hlist_for_each_entry(p, head, hlist) {
2753                         if (!kprobe_disabled(p)) {
2754                                 err = arm_kprobe(p);
2755                                 if (err)  {
2756                                         errors++;
2757                                         ret = err;
2758                                 }
2759                                 total++;
2760                         }
2761                 }
2762         }
2763
2764         if (errors)
2765                 pr_warn("Kprobes globally enabled, but failed to enable %d out of %d probes. Please check which kprobes are kept disabled via debugfs.\n",
2766                         errors, total);
2767         else
2768                 pr_info("Kprobes globally enabled\n");
2769
2770 already_enabled:
2771         mutex_unlock(&kprobe_mutex);
2772         return ret;
2773 }
2774
2775 static int disarm_all_kprobes(void)
2776 {
2777         struct hlist_head *head;
2778         struct kprobe *p;
2779         unsigned int i, total = 0, errors = 0;
2780         int err, ret = 0;
2781
2782         mutex_lock(&kprobe_mutex);
2783
2784         /* If kprobes are already disarmed, just return */
2785         if (kprobes_all_disarmed) {
2786                 mutex_unlock(&kprobe_mutex);
2787                 return 0;
2788         }
2789
2790         kprobes_all_disarmed = true;
2791
2792         for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
2793                 head = &kprobe_table[i];
2794                 /* Disarm all kprobes on a best-effort basis */
2795                 hlist_for_each_entry(p, head, hlist) {
2796                         if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) {
2797                                 err = disarm_kprobe(p, false);
2798                                 if (err) {
2799                                         errors++;
2800                                         ret = err;
2801                                 }
2802                                 total++;
2803                         }
2804                 }
2805         }
2806
2807         if (errors)
2808                 pr_warn("Kprobes globally disabled, but failed to disable %d out of %d probes. Please check which kprobes are kept enabled via debugfs.\n",
2809                         errors, total);
2810         else
2811                 pr_info("Kprobes globally disabled\n");
2812
2813         mutex_unlock(&kprobe_mutex);
2814
2815         /* Wait for disarming all kprobes by optimizer */
2816         wait_for_kprobe_optimizer();
2817
2818         return ret;
2819 }
2820
2821 /*
2822  * XXX: The debugfs bool file interface doesn't allow for callbacks
2823  * when the bool state is switched. We can reuse that facility when
2824  * available
2825  */
2826 static ssize_t read_enabled_file_bool(struct file *file,
2827                char __user *user_buf, size_t count, loff_t *ppos)
2828 {
2829         char buf[3];
2830
2831         if (!kprobes_all_disarmed)
2832                 buf[0] = '1';
2833         else
2834                 buf[0] = '0';
2835         buf[1] = '\n';
2836         buf[2] = 0x00;
2837         return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
2838 }
2839
2840 static ssize_t write_enabled_file_bool(struct file *file,
2841                const char __user *user_buf, size_t count, loff_t *ppos)
2842 {
2843         bool enable;
2844         int ret;
2845
2846         ret = kstrtobool_from_user(user_buf, count, &enable);
2847         if (ret)
2848                 return ret;
2849
2850         ret = enable ? arm_all_kprobes() : disarm_all_kprobes();
2851         if (ret)
2852                 return ret;
2853
2854         return count;
2855 }
2856
2857 static const struct file_operations fops_kp = {
2858         .read =         read_enabled_file_bool,
2859         .write =        write_enabled_file_bool,
2860         .llseek =       default_llseek,
2861 };
2862
2863 static int __init debugfs_kprobe_init(void)
2864 {
2865         struct dentry *dir;
2866
2867         dir = debugfs_create_dir("kprobes", NULL);
2868
2869         debugfs_create_file("list", 0400, dir, NULL, &kprobes_fops);
2870
2871         debugfs_create_file("enabled", 0600, dir, NULL, &fops_kp);
2872
2873         debugfs_create_file("blacklist", 0400, dir, NULL,
2874                             &kprobe_blacklist_fops);
2875
2876         return 0;
2877 }
2878
2879 late_initcall(debugfs_kprobe_init);
2880 #endif /* CONFIG_DEBUG_FS */