Linux 6.9-rc1
[linux-2.6-microblaze.git] / tools / perf / util / thread-stack.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * thread-stack.c: Synthesize a thread's stack using call / return events
4  * Copyright (c) 2014, Intel Corporation.
5  */
6
7 #include <linux/rbtree.h>
8 #include <linux/list.h>
9 #include <linux/log2.h>
10 #include <linux/zalloc.h>
11 #include <errno.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include "thread.h"
15 #include "event.h"
16 #include "machine.h"
17 #include "env.h"
18 #include "debug.h"
19 #include "symbol.h"
20 #include "comm.h"
21 #include "call-path.h"
22 #include "thread-stack.h"
23
24 #define STACK_GROWTH 2048
25
26 /*
27  * State of retpoline detection.
28  *
29  * RETPOLINE_NONE: no retpoline detection
30  * X86_RETPOLINE_POSSIBLE: x86 retpoline possible
31  * X86_RETPOLINE_DETECTED: x86 retpoline detected
32  */
33 enum retpoline_state_t {
34         RETPOLINE_NONE,
35         X86_RETPOLINE_POSSIBLE,
36         X86_RETPOLINE_DETECTED,
37 };
38
39 /**
40  * struct thread_stack_entry - thread stack entry.
41  * @ret_addr: return address
42  * @timestamp: timestamp (if known)
43  * @ref: external reference (e.g. db_id of sample)
44  * @branch_count: the branch count when the entry was created
45  * @insn_count: the instruction count when the entry was created
46  * @cyc_count the cycle count when the entry was created
47  * @db_id: id used for db-export
48  * @cp: call path
49  * @no_call: a 'call' was not seen
50  * @trace_end: a 'call' but trace ended
51  * @non_call: a branch but not a 'call' to the start of a different symbol
52  */
53 struct thread_stack_entry {
54         u64 ret_addr;
55         u64 timestamp;
56         u64 ref;
57         u64 branch_count;
58         u64 insn_count;
59         u64 cyc_count;
60         u64 db_id;
61         struct call_path *cp;
62         bool no_call;
63         bool trace_end;
64         bool non_call;
65 };
66
67 /**
68  * struct thread_stack - thread stack constructed from 'call' and 'return'
69  *                       branch samples.
70  * @stack: array that holds the stack
71  * @cnt: number of entries in the stack
72  * @sz: current maximum stack size
73  * @trace_nr: current trace number
74  * @branch_count: running branch count
75  * @insn_count: running  instruction count
76  * @cyc_count running  cycle count
77  * @kernel_start: kernel start address
78  * @last_time: last timestamp
79  * @crp: call/return processor
80  * @comm: current comm
81  * @arr_sz: size of array if this is the first element of an array
82  * @rstate: used to detect retpolines
83  * @br_stack_rb: branch stack (ring buffer)
84  * @br_stack_sz: maximum branch stack size
85  * @br_stack_pos: current position in @br_stack_rb
86  * @mispred_all: mark all branches as mispredicted
87  */
88 struct thread_stack {
89         struct thread_stack_entry *stack;
90         size_t cnt;
91         size_t sz;
92         u64 trace_nr;
93         u64 branch_count;
94         u64 insn_count;
95         u64 cyc_count;
96         u64 kernel_start;
97         u64 last_time;
98         struct call_return_processor *crp;
99         struct comm *comm;
100         unsigned int arr_sz;
101         enum retpoline_state_t rstate;
102         struct branch_stack *br_stack_rb;
103         unsigned int br_stack_sz;
104         unsigned int br_stack_pos;
105         bool mispred_all;
106 };
107
108 /*
109  * Assume pid == tid == 0 identifies the idle task as defined by
110  * perf_session__register_idle_thread(). The idle task is really 1 task per cpu,
111  * and therefore requires a stack for each cpu.
112  */
113 static inline bool thread_stack__per_cpu(struct thread *thread)
114 {
115         return !(thread__tid(thread) || thread__pid(thread));
116 }
117
118 static int thread_stack__grow(struct thread_stack *ts)
119 {
120         struct thread_stack_entry *new_stack;
121         size_t sz, new_sz;
122
123         new_sz = ts->sz + STACK_GROWTH;
124         sz = new_sz * sizeof(struct thread_stack_entry);
125
126         new_stack = realloc(ts->stack, sz);
127         if (!new_stack)
128                 return -ENOMEM;
129
130         ts->stack = new_stack;
131         ts->sz = new_sz;
132
133         return 0;
134 }
135
136 static int thread_stack__init(struct thread_stack *ts, struct thread *thread,
137                               struct call_return_processor *crp,
138                               bool callstack, unsigned int br_stack_sz)
139 {
140         int err;
141
142         if (callstack) {
143                 err = thread_stack__grow(ts);
144                 if (err)
145                         return err;
146         }
147
148         if (br_stack_sz) {
149                 size_t sz = sizeof(struct branch_stack);
150
151                 sz += br_stack_sz * sizeof(struct branch_entry);
152                 ts->br_stack_rb = zalloc(sz);
153                 if (!ts->br_stack_rb)
154                         return -ENOMEM;
155                 ts->br_stack_sz = br_stack_sz;
156         }
157
158         if (thread__maps(thread) && maps__machine(thread__maps(thread))) {
159                 struct machine *machine = maps__machine(thread__maps(thread));
160                 const char *arch = perf_env__arch(machine->env);
161
162                 ts->kernel_start = machine__kernel_start(machine);
163                 if (!strcmp(arch, "x86"))
164                         ts->rstate = X86_RETPOLINE_POSSIBLE;
165         } else {
166                 ts->kernel_start = 1ULL << 63;
167         }
168         ts->crp = crp;
169
170         return 0;
171 }
172
173 static struct thread_stack *thread_stack__new(struct thread *thread, int cpu,
174                                               struct call_return_processor *crp,
175                                               bool callstack,
176                                               unsigned int br_stack_sz)
177 {
178         struct thread_stack *ts = thread__ts(thread), *new_ts;
179         unsigned int old_sz = ts ? ts->arr_sz : 0;
180         unsigned int new_sz = 1;
181
182         if (thread_stack__per_cpu(thread) && cpu > 0)
183                 new_sz = roundup_pow_of_two(cpu + 1);
184
185         if (!ts || new_sz > old_sz) {
186                 new_ts = calloc(new_sz, sizeof(*ts));
187                 if (!new_ts)
188                         return NULL;
189                 if (ts)
190                         memcpy(new_ts, ts, old_sz * sizeof(*ts));
191                 new_ts->arr_sz = new_sz;
192                 free(thread__ts(thread));
193                 thread__set_ts(thread, new_ts);
194                 ts = new_ts;
195         }
196
197         if (thread_stack__per_cpu(thread) && cpu > 0 &&
198             (unsigned int)cpu < ts->arr_sz)
199                 ts += cpu;
200
201         if (!ts->stack &&
202             thread_stack__init(ts, thread, crp, callstack, br_stack_sz))
203                 return NULL;
204
205         return ts;
206 }
207
208 static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu)
209 {
210         struct thread_stack *ts = thread__ts(thread);
211
212         if (cpu < 0)
213                 cpu = 0;
214
215         if (!ts || (unsigned int)cpu >= ts->arr_sz)
216                 return NULL;
217
218         ts += cpu;
219
220         if (!ts->stack)
221                 return NULL;
222
223         return ts;
224 }
225
226 static inline struct thread_stack *thread__stack(struct thread *thread,
227                                                     int cpu)
228 {
229         if (!thread)
230                 return NULL;
231
232         if (thread_stack__per_cpu(thread))
233                 return thread__cpu_stack(thread, cpu);
234
235         return thread__ts(thread);
236 }
237
238 static int thread_stack__push(struct thread_stack *ts, u64 ret_addr,
239                               bool trace_end)
240 {
241         int err = 0;
242
243         if (ts->cnt == ts->sz) {
244                 err = thread_stack__grow(ts);
245                 if (err) {
246                         pr_warning("Out of memory: discarding thread stack\n");
247                         ts->cnt = 0;
248                 }
249         }
250
251         ts->stack[ts->cnt].trace_end = trace_end;
252         ts->stack[ts->cnt++].ret_addr = ret_addr;
253
254         return err;
255 }
256
257 static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr)
258 {
259         size_t i;
260
261         /*
262          * In some cases there may be functions which are not seen to return.
263          * For example when setjmp / longjmp has been used.  Or the perf context
264          * switch in the kernel which doesn't stop and start tracing in exactly
265          * the same code path.  When that happens the return address will be
266          * further down the stack.  If the return address is not found at all,
267          * we assume the opposite (i.e. this is a return for a call that wasn't
268          * seen for some reason) and leave the stack alone.
269          */
270         for (i = ts->cnt; i; ) {
271                 if (ts->stack[--i].ret_addr == ret_addr) {
272                         ts->cnt = i;
273                         return;
274                 }
275         }
276 }
277
278 static void thread_stack__pop_trace_end(struct thread_stack *ts)
279 {
280         size_t i;
281
282         for (i = ts->cnt; i; ) {
283                 if (ts->stack[--i].trace_end)
284                         ts->cnt = i;
285                 else
286                         return;
287         }
288 }
289
290 static bool thread_stack__in_kernel(struct thread_stack *ts)
291 {
292         if (!ts->cnt)
293                 return false;
294
295         return ts->stack[ts->cnt - 1].cp->in_kernel;
296 }
297
298 static int thread_stack__call_return(struct thread *thread,
299                                      struct thread_stack *ts, size_t idx,
300                                      u64 timestamp, u64 ref, bool no_return)
301 {
302         struct call_return_processor *crp = ts->crp;
303         struct thread_stack_entry *tse;
304         struct call_return cr = {
305                 .thread = thread,
306                 .comm = ts->comm,
307                 .db_id = 0,
308         };
309         u64 *parent_db_id;
310
311         tse = &ts->stack[idx];
312         cr.cp = tse->cp;
313         cr.call_time = tse->timestamp;
314         cr.return_time = timestamp;
315         cr.branch_count = ts->branch_count - tse->branch_count;
316         cr.insn_count = ts->insn_count - tse->insn_count;
317         cr.cyc_count = ts->cyc_count - tse->cyc_count;
318         cr.db_id = tse->db_id;
319         cr.call_ref = tse->ref;
320         cr.return_ref = ref;
321         if (tse->no_call)
322                 cr.flags |= CALL_RETURN_NO_CALL;
323         if (no_return)
324                 cr.flags |= CALL_RETURN_NO_RETURN;
325         if (tse->non_call)
326                 cr.flags |= CALL_RETURN_NON_CALL;
327
328         /*
329          * The parent db_id must be assigned before exporting the child. Note
330          * it is not possible to export the parent first because its information
331          * is not yet complete because its 'return' has not yet been processed.
332          */
333         parent_db_id = idx ? &(tse - 1)->db_id : NULL;
334
335         return crp->process(&cr, parent_db_id, crp->data);
336 }
337
338 static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
339 {
340         struct call_return_processor *crp = ts->crp;
341         int err;
342
343         if (!crp) {
344                 ts->cnt = 0;
345                 ts->br_stack_pos = 0;
346                 if (ts->br_stack_rb)
347                         ts->br_stack_rb->nr = 0;
348                 return 0;
349         }
350
351         while (ts->cnt) {
352                 err = thread_stack__call_return(thread, ts, --ts->cnt,
353                                                 ts->last_time, 0, true);
354                 if (err) {
355                         pr_err("Error flushing thread stack!\n");
356                         ts->cnt = 0;
357                         return err;
358                 }
359         }
360
361         return 0;
362 }
363
364 int thread_stack__flush(struct thread *thread)
365 {
366         struct thread_stack *ts = thread__ts(thread);
367         unsigned int pos;
368         int err = 0;
369
370         if (ts) {
371                 for (pos = 0; pos < ts->arr_sz; pos++) {
372                         int ret = __thread_stack__flush(thread, ts + pos);
373
374                         if (ret)
375                                 err = ret;
376                 }
377         }
378
379         return err;
380 }
381
382 static void thread_stack__update_br_stack(struct thread_stack *ts, u32 flags,
383                                           u64 from_ip, u64 to_ip)
384 {
385         struct branch_stack *bs = ts->br_stack_rb;
386         struct branch_entry *be;
387
388         if (!ts->br_stack_pos)
389                 ts->br_stack_pos = ts->br_stack_sz;
390
391         ts->br_stack_pos -= 1;
392
393         be              = &bs->entries[ts->br_stack_pos];
394         be->from        = from_ip;
395         be->to          = to_ip;
396         be->flags.value = 0;
397         be->flags.abort = !!(flags & PERF_IP_FLAG_TX_ABORT);
398         be->flags.in_tx = !!(flags & PERF_IP_FLAG_IN_TX);
399         /* No support for mispredict */
400         be->flags.mispred = ts->mispred_all;
401
402         if (bs->nr < ts->br_stack_sz)
403                 bs->nr += 1;
404 }
405
406 int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
407                         u64 to_ip, u16 insn_len, u64 trace_nr, bool callstack,
408                         unsigned int br_stack_sz, bool mispred_all)
409 {
410         struct thread_stack *ts = thread__stack(thread, cpu);
411
412         if (!thread)
413                 return -EINVAL;
414
415         if (!ts) {
416                 ts = thread_stack__new(thread, cpu, NULL, callstack, br_stack_sz);
417                 if (!ts) {
418                         pr_warning("Out of memory: no thread stack\n");
419                         return -ENOMEM;
420                 }
421                 ts->trace_nr = trace_nr;
422                 ts->mispred_all = mispred_all;
423         }
424
425         /*
426          * When the trace is discontinuous, the trace_nr changes.  In that case
427          * the stack might be completely invalid.  Better to report nothing than
428          * to report something misleading, so flush the stack.
429          */
430         if (trace_nr != ts->trace_nr) {
431                 if (ts->trace_nr)
432                         __thread_stack__flush(thread, ts);
433                 ts->trace_nr = trace_nr;
434         }
435
436         if (br_stack_sz)
437                 thread_stack__update_br_stack(ts, flags, from_ip, to_ip);
438
439         /*
440          * Stop here if thread_stack__process() is in use, or not recording call
441          * stack.
442          */
443         if (ts->crp || !callstack)
444                 return 0;
445
446         if (flags & PERF_IP_FLAG_CALL) {
447                 u64 ret_addr;
448
449                 if (!to_ip)
450                         return 0;
451                 ret_addr = from_ip + insn_len;
452                 if (ret_addr == to_ip)
453                         return 0; /* Zero-length calls are excluded */
454                 return thread_stack__push(ts, ret_addr,
455                                           flags & PERF_IP_FLAG_TRACE_END);
456         } else if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
457                 /*
458                  * If the caller did not change the trace number (which would
459                  * have flushed the stack) then try to make sense of the stack.
460                  * Possibly, tracing began after returning to the current
461                  * address, so try to pop that. Also, do not expect a call made
462                  * when the trace ended, to return, so pop that.
463                  */
464                 thread_stack__pop(ts, to_ip);
465                 thread_stack__pop_trace_end(ts);
466         } else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) {
467                 thread_stack__pop(ts, to_ip);
468         }
469
470         return 0;
471 }
472
473 void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr)
474 {
475         struct thread_stack *ts = thread__stack(thread, cpu);
476
477         if (!ts)
478                 return;
479
480         if (trace_nr != ts->trace_nr) {
481                 if (ts->trace_nr)
482                         __thread_stack__flush(thread, ts);
483                 ts->trace_nr = trace_nr;
484         }
485 }
486
487 static void __thread_stack__free(struct thread *thread, struct thread_stack *ts)
488 {
489         __thread_stack__flush(thread, ts);
490         zfree(&ts->stack);
491         zfree(&ts->br_stack_rb);
492 }
493
494 static void thread_stack__reset(struct thread *thread, struct thread_stack *ts)
495 {
496         unsigned int arr_sz = ts->arr_sz;
497
498         __thread_stack__free(thread, ts);
499         memset(ts, 0, sizeof(*ts));
500         ts->arr_sz = arr_sz;
501 }
502
503 void thread_stack__free(struct thread *thread)
504 {
505         struct thread_stack *ts = thread__ts(thread);
506         unsigned int pos;
507
508         if (ts) {
509                 for (pos = 0; pos < ts->arr_sz; pos++)
510                         __thread_stack__free(thread, ts + pos);
511                 free(thread__ts(thread));
512                 thread__set_ts(thread, NULL);
513         }
514 }
515
516 static inline u64 callchain_context(u64 ip, u64 kernel_start)
517 {
518         return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL;
519 }
520
521 void thread_stack__sample(struct thread *thread, int cpu,
522                           struct ip_callchain *chain,
523                           size_t sz, u64 ip, u64 kernel_start)
524 {
525         struct thread_stack *ts = thread__stack(thread, cpu);
526         u64 context = callchain_context(ip, kernel_start);
527         u64 last_context;
528         size_t i, j;
529
530         if (sz < 2) {
531                 chain->nr = 0;
532                 return;
533         }
534
535         chain->ips[0] = context;
536         chain->ips[1] = ip;
537
538         if (!ts) {
539                 chain->nr = 2;
540                 return;
541         }
542
543         last_context = context;
544
545         for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) {
546                 ip = ts->stack[ts->cnt - j].ret_addr;
547                 context = callchain_context(ip, kernel_start);
548                 if (context != last_context) {
549                         if (i >= sz - 1)
550                                 break;
551                         chain->ips[i++] = context;
552                         last_context = context;
553                 }
554                 chain->ips[i] = ip;
555         }
556
557         chain->nr = i;
558 }
559
560 /*
561  * Hardware sample records, created some time after the event occurred, need to
562  * have subsequent addresses removed from the call chain.
563  */
564 void thread_stack__sample_late(struct thread *thread, int cpu,
565                                struct ip_callchain *chain, size_t sz,
566                                u64 sample_ip, u64 kernel_start)
567 {
568         struct thread_stack *ts = thread__stack(thread, cpu);
569         u64 sample_context = callchain_context(sample_ip, kernel_start);
570         u64 last_context, context, ip;
571         size_t nr = 0, j;
572
573         if (sz < 2) {
574                 chain->nr = 0;
575                 return;
576         }
577
578         if (!ts)
579                 goto out;
580
581         /*
582          * When tracing kernel space, kernel addresses occur at the top of the
583          * call chain after the event occurred but before tracing stopped.
584          * Skip them.
585          */
586         for (j = 1; j <= ts->cnt; j++) {
587                 ip = ts->stack[ts->cnt - j].ret_addr;
588                 context = callchain_context(ip, kernel_start);
589                 if (context == PERF_CONTEXT_USER ||
590                     (context == sample_context && ip == sample_ip))
591                         break;
592         }
593
594         last_context = sample_ip; /* Use sample_ip as an invalid context */
595
596         for (; nr < sz && j <= ts->cnt; nr++, j++) {
597                 ip = ts->stack[ts->cnt - j].ret_addr;
598                 context = callchain_context(ip, kernel_start);
599                 if (context != last_context) {
600                         if (nr >= sz - 1)
601                                 break;
602                         chain->ips[nr++] = context;
603                         last_context = context;
604                 }
605                 chain->ips[nr] = ip;
606         }
607 out:
608         if (nr) {
609                 chain->nr = nr;
610         } else {
611                 chain->ips[0] = sample_context;
612                 chain->ips[1] = sample_ip;
613                 chain->nr = 2;
614         }
615 }
616
617 void thread_stack__br_sample(struct thread *thread, int cpu,
618                              struct branch_stack *dst, unsigned int sz)
619 {
620         struct thread_stack *ts = thread__stack(thread, cpu);
621         const size_t bsz = sizeof(struct branch_entry);
622         struct branch_stack *src;
623         struct branch_entry *be;
624         unsigned int nr;
625
626         dst->nr = 0;
627
628         if (!ts)
629                 return;
630
631         src = ts->br_stack_rb;
632         if (!src->nr)
633                 return;
634
635         dst->nr = min((unsigned int)src->nr, sz);
636
637         be = &dst->entries[0];
638         nr = min(ts->br_stack_sz - ts->br_stack_pos, (unsigned int)dst->nr);
639         memcpy(be, &src->entries[ts->br_stack_pos], bsz * nr);
640
641         if (src->nr >= ts->br_stack_sz) {
642                 sz -= nr;
643                 be = &dst->entries[nr];
644                 nr = min(ts->br_stack_pos, sz);
645                 memcpy(be, &src->entries[0], bsz * ts->br_stack_pos);
646         }
647 }
648
649 /* Start of user space branch entries */
650 static bool us_start(struct branch_entry *be, u64 kernel_start, bool *start)
651 {
652         if (!*start)
653                 *start = be->to && be->to < kernel_start;
654
655         return *start;
656 }
657
658 /*
659  * Start of branch entries after the ip fell in between 2 branches, or user
660  * space branch entries.
661  */
662 static bool ks_start(struct branch_entry *be, u64 sample_ip, u64 kernel_start,
663                      bool *start, struct branch_entry *nb)
664 {
665         if (!*start) {
666                 *start = (nb && sample_ip >= be->to && sample_ip <= nb->from) ||
667                          be->from < kernel_start ||
668                          (be->to && be->to < kernel_start);
669         }
670
671         return *start;
672 }
673
674 /*
675  * Hardware sample records, created some time after the event occurred, need to
676  * have subsequent addresses removed from the branch stack.
677  */
678 void thread_stack__br_sample_late(struct thread *thread, int cpu,
679                                   struct branch_stack *dst, unsigned int sz,
680                                   u64 ip, u64 kernel_start)
681 {
682         struct thread_stack *ts = thread__stack(thread, cpu);
683         struct branch_entry *d, *s, *spos, *ssz;
684         struct branch_stack *src;
685         unsigned int nr = 0;
686         bool start = false;
687
688         dst->nr = 0;
689
690         if (!ts)
691                 return;
692
693         src = ts->br_stack_rb;
694         if (!src->nr)
695                 return;
696
697         spos = &src->entries[ts->br_stack_pos];
698         ssz  = &src->entries[ts->br_stack_sz];
699
700         d = &dst->entries[0];
701         s = spos;
702
703         if (ip < kernel_start) {
704                 /*
705                  * User space sample: start copying branch entries when the
706                  * branch is in user space.
707                  */
708                 for (s = spos; s < ssz && nr < sz; s++) {
709                         if (us_start(s, kernel_start, &start)) {
710                                 *d++ = *s;
711                                 nr += 1;
712                         }
713                 }
714
715                 if (src->nr >= ts->br_stack_sz) {
716                         for (s = &src->entries[0]; s < spos && nr < sz; s++) {
717                                 if (us_start(s, kernel_start, &start)) {
718                                         *d++ = *s;
719                                         nr += 1;
720                                 }
721                         }
722                 }
723         } else {
724                 struct branch_entry *nb = NULL;
725
726                 /*
727                  * Kernel space sample: start copying branch entries when the ip
728                  * falls in between 2 branches (or the branch is in user space
729                  * because then the start must have been missed).
730                  */
731                 for (s = spos; s < ssz && nr < sz; s++) {
732                         if (ks_start(s, ip, kernel_start, &start, nb)) {
733                                 *d++ = *s;
734                                 nr += 1;
735                         }
736                         nb = s;
737                 }
738
739                 if (src->nr >= ts->br_stack_sz) {
740                         for (s = &src->entries[0]; s < spos && nr < sz; s++) {
741                                 if (ks_start(s, ip, kernel_start, &start, nb)) {
742                                         *d++ = *s;
743                                         nr += 1;
744                                 }
745                                 nb = s;
746                         }
747                 }
748         }
749
750         dst->nr = nr;
751 }
752
753 struct call_return_processor *
754 call_return_processor__new(int (*process)(struct call_return *cr, u64 *parent_db_id, void *data),
755                            void *data)
756 {
757         struct call_return_processor *crp;
758
759         crp = zalloc(sizeof(struct call_return_processor));
760         if (!crp)
761                 return NULL;
762         crp->cpr = call_path_root__new();
763         if (!crp->cpr)
764                 goto out_free;
765         crp->process = process;
766         crp->data = data;
767         return crp;
768
769 out_free:
770         free(crp);
771         return NULL;
772 }
773
774 void call_return_processor__free(struct call_return_processor *crp)
775 {
776         if (crp) {
777                 call_path_root__free(crp->cpr);
778                 free(crp);
779         }
780 }
781
782 static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
783                                  u64 timestamp, u64 ref, struct call_path *cp,
784                                  bool no_call, bool trace_end)
785 {
786         struct thread_stack_entry *tse;
787         int err;
788
789         if (!cp)
790                 return -ENOMEM;
791
792         if (ts->cnt == ts->sz) {
793                 err = thread_stack__grow(ts);
794                 if (err)
795                         return err;
796         }
797
798         tse = &ts->stack[ts->cnt++];
799         tse->ret_addr = ret_addr;
800         tse->timestamp = timestamp;
801         tse->ref = ref;
802         tse->branch_count = ts->branch_count;
803         tse->insn_count = ts->insn_count;
804         tse->cyc_count = ts->cyc_count;
805         tse->cp = cp;
806         tse->no_call = no_call;
807         tse->trace_end = trace_end;
808         tse->non_call = false;
809         tse->db_id = 0;
810
811         return 0;
812 }
813
814 static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
815                                 u64 ret_addr, u64 timestamp, u64 ref,
816                                 struct symbol *sym)
817 {
818         int err;
819
820         if (!ts->cnt)
821                 return 1;
822
823         if (ts->cnt == 1) {
824                 struct thread_stack_entry *tse = &ts->stack[0];
825
826                 if (tse->cp->sym == sym)
827                         return thread_stack__call_return(thread, ts, --ts->cnt,
828                                                          timestamp, ref, false);
829         }
830
831         if (ts->stack[ts->cnt - 1].ret_addr == ret_addr &&
832             !ts->stack[ts->cnt - 1].non_call) {
833                 return thread_stack__call_return(thread, ts, --ts->cnt,
834                                                  timestamp, ref, false);
835         } else {
836                 size_t i = ts->cnt - 1;
837
838                 while (i--) {
839                         if (ts->stack[i].ret_addr != ret_addr ||
840                             ts->stack[i].non_call)
841                                 continue;
842                         i += 1;
843                         while (ts->cnt > i) {
844                                 err = thread_stack__call_return(thread, ts,
845                                                                 --ts->cnt,
846                                                                 timestamp, ref,
847                                                                 true);
848                                 if (err)
849                                         return err;
850                         }
851                         return thread_stack__call_return(thread, ts, --ts->cnt,
852                                                          timestamp, ref, false);
853                 }
854         }
855
856         return 1;
857 }
858
859 static int thread_stack__bottom(struct thread_stack *ts,
860                                 struct perf_sample *sample,
861                                 struct addr_location *from_al,
862                                 struct addr_location *to_al, u64 ref)
863 {
864         struct call_path_root *cpr = ts->crp->cpr;
865         struct call_path *cp;
866         struct symbol *sym;
867         u64 ip;
868
869         if (sample->ip) {
870                 ip = sample->ip;
871                 sym = from_al->sym;
872         } else if (sample->addr) {
873                 ip = sample->addr;
874                 sym = to_al->sym;
875         } else {
876                 return 0;
877         }
878
879         cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
880                                 ts->kernel_start);
881
882         return thread_stack__push_cp(ts, ip, sample->time, ref, cp,
883                                      true, false);
884 }
885
886 static int thread_stack__pop_ks(struct thread *thread, struct thread_stack *ts,
887                                 struct perf_sample *sample, u64 ref)
888 {
889         u64 tm = sample->time;
890         int err;
891
892         /* Return to userspace, so pop all kernel addresses */
893         while (thread_stack__in_kernel(ts)) {
894                 err = thread_stack__call_return(thread, ts, --ts->cnt,
895                                                 tm, ref, true);
896                 if (err)
897                         return err;
898         }
899
900         return 0;
901 }
902
903 static int thread_stack__no_call_return(struct thread *thread,
904                                         struct thread_stack *ts,
905                                         struct perf_sample *sample,
906                                         struct addr_location *from_al,
907                                         struct addr_location *to_al, u64 ref)
908 {
909         struct call_path_root *cpr = ts->crp->cpr;
910         struct call_path *root = &cpr->call_path;
911         struct symbol *fsym = from_al->sym;
912         struct symbol *tsym = to_al->sym;
913         struct call_path *cp, *parent;
914         u64 ks = ts->kernel_start;
915         u64 addr = sample->addr;
916         u64 tm = sample->time;
917         u64 ip = sample->ip;
918         int err;
919
920         if (ip >= ks && addr < ks) {
921                 /* Return to userspace, so pop all kernel addresses */
922                 err = thread_stack__pop_ks(thread, ts, sample, ref);
923                 if (err)
924                         return err;
925
926                 /* If the stack is empty, push the userspace address */
927                 if (!ts->cnt) {
928                         cp = call_path__findnew(cpr, root, tsym, addr, ks);
929                         return thread_stack__push_cp(ts, 0, tm, ref, cp, true,
930                                                      false);
931                 }
932         } else if (thread_stack__in_kernel(ts) && ip < ks) {
933                 /* Return to userspace, so pop all kernel addresses */
934                 err = thread_stack__pop_ks(thread, ts, sample, ref);
935                 if (err)
936                         return err;
937         }
938
939         if (ts->cnt)
940                 parent = ts->stack[ts->cnt - 1].cp;
941         else
942                 parent = root;
943
944         if (parent->sym == from_al->sym) {
945                 /*
946                  * At the bottom of the stack, assume the missing 'call' was
947                  * before the trace started. So, pop the current symbol and push
948                  * the 'to' symbol.
949                  */
950                 if (ts->cnt == 1) {
951                         err = thread_stack__call_return(thread, ts, --ts->cnt,
952                                                         tm, ref, false);
953                         if (err)
954                                 return err;
955                 }
956
957                 if (!ts->cnt) {
958                         cp = call_path__findnew(cpr, root, tsym, addr, ks);
959
960                         return thread_stack__push_cp(ts, addr, tm, ref, cp,
961                                                      true, false);
962                 }
963
964                 /*
965                  * Otherwise assume the 'return' is being used as a jump (e.g.
966                  * retpoline) and just push the 'to' symbol.
967                  */
968                 cp = call_path__findnew(cpr, parent, tsym, addr, ks);
969
970                 err = thread_stack__push_cp(ts, 0, tm, ref, cp, true, false);
971                 if (!err)
972                         ts->stack[ts->cnt - 1].non_call = true;
973
974                 return err;
975         }
976
977         /*
978          * Assume 'parent' has not yet returned, so push 'to', and then push and
979          * pop 'from'.
980          */
981
982         cp = call_path__findnew(cpr, parent, tsym, addr, ks);
983
984         err = thread_stack__push_cp(ts, addr, tm, ref, cp, true, false);
985         if (err)
986                 return err;
987
988         cp = call_path__findnew(cpr, cp, fsym, ip, ks);
989
990         err = thread_stack__push_cp(ts, ip, tm, ref, cp, true, false);
991         if (err)
992                 return err;
993
994         return thread_stack__call_return(thread, ts, --ts->cnt, tm, ref, false);
995 }
996
997 static int thread_stack__trace_begin(struct thread *thread,
998                                      struct thread_stack *ts, u64 timestamp,
999                                      u64 ref)
1000 {
1001         struct thread_stack_entry *tse;
1002         int err;
1003
1004         if (!ts->cnt)
1005                 return 0;
1006
1007         /* Pop trace end */
1008         tse = &ts->stack[ts->cnt - 1];
1009         if (tse->trace_end) {
1010                 err = thread_stack__call_return(thread, ts, --ts->cnt,
1011                                                 timestamp, ref, false);
1012                 if (err)
1013                         return err;
1014         }
1015
1016         return 0;
1017 }
1018
1019 static int thread_stack__trace_end(struct thread_stack *ts,
1020                                    struct perf_sample *sample, u64 ref)
1021 {
1022         struct call_path_root *cpr = ts->crp->cpr;
1023         struct call_path *cp;
1024         u64 ret_addr;
1025
1026         /* No point having 'trace end' on the bottom of the stack */
1027         if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref))
1028                 return 0;
1029
1030         cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
1031                                 ts->kernel_start);
1032
1033         ret_addr = sample->ip + sample->insn_len;
1034
1035         return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp,
1036                                      false, true);
1037 }
1038
1039 static bool is_x86_retpoline(const char *name)
1040 {
1041         return strstr(name, "__x86_indirect_thunk_") == name;
1042 }
1043
1044 /*
1045  * x86 retpoline functions pollute the call graph. This function removes them.
1046  * This does not handle function return thunks, nor is there any improvement
1047  * for the handling of inline thunks or extern thunks.
1048  */
1049 static int thread_stack__x86_retpoline(struct thread_stack *ts,
1050                                        struct perf_sample *sample,
1051                                        struct addr_location *to_al)
1052 {
1053         struct thread_stack_entry *tse = &ts->stack[ts->cnt - 1];
1054         struct call_path_root *cpr = ts->crp->cpr;
1055         struct symbol *sym = tse->cp->sym;
1056         struct symbol *tsym = to_al->sym;
1057         struct call_path *cp;
1058
1059         if (sym && is_x86_retpoline(sym->name)) {
1060                 /*
1061                  * This is a x86 retpoline fn. It pollutes the call graph by
1062                  * showing up everywhere there is an indirect branch, but does
1063                  * not itself mean anything. Here the top-of-stack is removed,
1064                  * by decrementing the stack count, and then further down, the
1065                  * resulting top-of-stack is replaced with the actual target.
1066                  * The result is that the retpoline functions will no longer
1067                  * appear in the call graph. Note this only affects the call
1068                  * graph, since all the original branches are left unchanged.
1069                  */
1070                 ts->cnt -= 1;
1071                 sym = ts->stack[ts->cnt - 2].cp->sym;
1072                 if (sym && sym == tsym && to_al->addr != tsym->start) {
1073                         /*
1074                          * Target is back to the middle of the symbol we came
1075                          * from so assume it is an indirect jmp and forget it
1076                          * altogether.
1077                          */
1078                         ts->cnt -= 1;
1079                         return 0;
1080                 }
1081         } else if (sym && sym == tsym) {
1082                 /*
1083                  * Target is back to the symbol we came from so assume it is an
1084                  * indirect jmp and forget it altogether.
1085                  */
1086                 ts->cnt -= 1;
1087                 return 0;
1088         }
1089
1090         cp = call_path__findnew(cpr, ts->stack[ts->cnt - 2].cp, tsym,
1091                                 sample->addr, ts->kernel_start);
1092         if (!cp)
1093                 return -ENOMEM;
1094
1095         /* Replace the top-of-stack with the actual target */
1096         ts->stack[ts->cnt - 1].cp = cp;
1097
1098         return 0;
1099 }
1100
1101 int thread_stack__process(struct thread *thread, struct comm *comm,
1102                           struct perf_sample *sample,
1103                           struct addr_location *from_al,
1104                           struct addr_location *to_al, u64 ref,
1105                           struct call_return_processor *crp)
1106 {
1107         struct thread_stack *ts = thread__stack(thread, sample->cpu);
1108         enum retpoline_state_t rstate;
1109         int err = 0;
1110
1111         if (ts && !ts->crp) {
1112                 /* Supersede thread_stack__event() */
1113                 thread_stack__reset(thread, ts);
1114                 ts = NULL;
1115         }
1116
1117         if (!ts) {
1118                 ts = thread_stack__new(thread, sample->cpu, crp, true, 0);
1119                 if (!ts)
1120                         return -ENOMEM;
1121                 ts->comm = comm;
1122         }
1123
1124         rstate = ts->rstate;
1125         if (rstate == X86_RETPOLINE_DETECTED)
1126                 ts->rstate = X86_RETPOLINE_POSSIBLE;
1127
1128         /* Flush stack on exec */
1129         if (ts->comm != comm && thread__pid(thread) == thread__tid(thread)) {
1130                 err = __thread_stack__flush(thread, ts);
1131                 if (err)
1132                         return err;
1133                 ts->comm = comm;
1134         }
1135
1136         /* If the stack is empty, put the current symbol on the stack */
1137         if (!ts->cnt) {
1138                 err = thread_stack__bottom(ts, sample, from_al, to_al, ref);
1139                 if (err)
1140                         return err;
1141         }
1142
1143         ts->branch_count += 1;
1144         ts->insn_count += sample->insn_cnt;
1145         ts->cyc_count += sample->cyc_cnt;
1146         ts->last_time = sample->time;
1147
1148         if (sample->flags & PERF_IP_FLAG_CALL) {
1149                 bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END;
1150                 struct call_path_root *cpr = ts->crp->cpr;
1151                 struct call_path *cp;
1152                 u64 ret_addr;
1153
1154                 if (!sample->ip || !sample->addr)
1155                         return 0;
1156
1157                 ret_addr = sample->ip + sample->insn_len;
1158                 if (ret_addr == sample->addr)
1159                         return 0; /* Zero-length calls are excluded */
1160
1161                 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
1162                                         to_al->sym, sample->addr,
1163                                         ts->kernel_start);
1164                 err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
1165                                             cp, false, trace_end);
1166
1167                 /*
1168                  * A call to the same symbol but not the start of the symbol,
1169                  * may be the start of a x86 retpoline.
1170                  */
1171                 if (!err && rstate == X86_RETPOLINE_POSSIBLE && to_al->sym &&
1172                     from_al->sym == to_al->sym &&
1173                     to_al->addr != to_al->sym->start)
1174                         ts->rstate = X86_RETPOLINE_DETECTED;
1175
1176         } else if (sample->flags & PERF_IP_FLAG_RETURN) {
1177                 if (!sample->addr) {
1178                         u32 return_from_kernel = PERF_IP_FLAG_SYSCALLRET |
1179                                                  PERF_IP_FLAG_INTERRUPT;
1180
1181                         if (!(sample->flags & return_from_kernel))
1182                                 return 0;
1183
1184                         /* Pop kernel stack */
1185                         return thread_stack__pop_ks(thread, ts, sample, ref);
1186                 }
1187
1188                 if (!sample->ip)
1189                         return 0;
1190
1191                 /* x86 retpoline 'return' doesn't match the stack */
1192                 if (rstate == X86_RETPOLINE_DETECTED && ts->cnt > 2 &&
1193                     ts->stack[ts->cnt - 1].ret_addr != sample->addr)
1194                         return thread_stack__x86_retpoline(ts, sample, to_al);
1195
1196                 err = thread_stack__pop_cp(thread, ts, sample->addr,
1197                                            sample->time, ref, from_al->sym);
1198                 if (err) {
1199                         if (err < 0)
1200                                 return err;
1201                         err = thread_stack__no_call_return(thread, ts, sample,
1202                                                            from_al, to_al, ref);
1203                 }
1204         } else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) {
1205                 err = thread_stack__trace_begin(thread, ts, sample->time, ref);
1206         } else if (sample->flags & PERF_IP_FLAG_TRACE_END) {
1207                 err = thread_stack__trace_end(ts, sample, ref);
1208         } else if (sample->flags & PERF_IP_FLAG_BRANCH &&
1209                    from_al->sym != to_al->sym && to_al->sym &&
1210                    to_al->addr == to_al->sym->start) {
1211                 struct call_path_root *cpr = ts->crp->cpr;
1212                 struct call_path *cp;
1213
1214                 /*
1215                  * The compiler might optimize a call/ret combination by making
1216                  * it a jmp. Make that visible by recording on the stack a
1217                  * branch to the start of a different symbol. Note, that means
1218                  * when a ret pops the stack, all jmps must be popped off first.
1219                  */
1220                 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
1221                                         to_al->sym, sample->addr,
1222                                         ts->kernel_start);
1223                 err = thread_stack__push_cp(ts, 0, sample->time, ref, cp, false,
1224                                             false);
1225                 if (!err)
1226                         ts->stack[ts->cnt - 1].non_call = true;
1227         }
1228
1229         return err;
1230 }
1231
1232 size_t thread_stack__depth(struct thread *thread, int cpu)
1233 {
1234         struct thread_stack *ts = thread__stack(thread, cpu);
1235
1236         if (!ts)
1237                 return 0;
1238         return ts->cnt;
1239 }