Merge tag 'drm-misc-next-2021-07-22' of git://anongit.freedesktop.org/drm/drm-misc...
[linux-2.6-microblaze.git] / tools / perf / util / thread-stack.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * thread-stack.c: Synthesize a thread's stack using call / return events
4  * Copyright (c) 2014, Intel Corporation.
5  */
6
7 #include <linux/rbtree.h>
8 #include <linux/list.h>
9 #include <linux/log2.h>
10 #include <linux/zalloc.h>
11 #include <errno.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include "thread.h"
15 #include "event.h"
16 #include "machine.h"
17 #include "env.h"
18 #include "debug.h"
19 #include "symbol.h"
20 #include "comm.h"
21 #include "call-path.h"
22 #include "thread-stack.h"
23
24 #define STACK_GROWTH 2048
25
26 /*
27  * State of retpoline detection.
28  *
29  * RETPOLINE_NONE: no retpoline detection
30  * X86_RETPOLINE_POSSIBLE: x86 retpoline possible
31  * X86_RETPOLINE_DETECTED: x86 retpoline detected
32  */
33 enum retpoline_state_t {
34         RETPOLINE_NONE,
35         X86_RETPOLINE_POSSIBLE,
36         X86_RETPOLINE_DETECTED,
37 };
38
39 /**
40  * struct thread_stack_entry - thread stack entry.
41  * @ret_addr: return address
42  * @timestamp: timestamp (if known)
43  * @ref: external reference (e.g. db_id of sample)
44  * @branch_count: the branch count when the entry was created
45  * @insn_count: the instruction count when the entry was created
46  * @cyc_count the cycle count when the entry was created
47  * @db_id: id used for db-export
48  * @cp: call path
49  * @no_call: a 'call' was not seen
50  * @trace_end: a 'call' but trace ended
51  * @non_call: a branch but not a 'call' to the start of a different symbol
52  */
53 struct thread_stack_entry {
54         u64 ret_addr;
55         u64 timestamp;
56         u64 ref;
57         u64 branch_count;
58         u64 insn_count;
59         u64 cyc_count;
60         u64 db_id;
61         struct call_path *cp;
62         bool no_call;
63         bool trace_end;
64         bool non_call;
65 };
66
67 /**
68  * struct thread_stack - thread stack constructed from 'call' and 'return'
69  *                       branch samples.
70  * @stack: array that holds the stack
71  * @cnt: number of entries in the stack
72  * @sz: current maximum stack size
73  * @trace_nr: current trace number
74  * @branch_count: running branch count
75  * @insn_count: running  instruction count
76  * @cyc_count running  cycle count
77  * @kernel_start: kernel start address
78  * @last_time: last timestamp
79  * @crp: call/return processor
80  * @comm: current comm
81  * @arr_sz: size of array if this is the first element of an array
82  * @rstate: used to detect retpolines
83  * @br_stack_rb: branch stack (ring buffer)
84  * @br_stack_sz: maximum branch stack size
85  * @br_stack_pos: current position in @br_stack_rb
86  * @mispred_all: mark all branches as mispredicted
87  */
88 struct thread_stack {
89         struct thread_stack_entry *stack;
90         size_t cnt;
91         size_t sz;
92         u64 trace_nr;
93         u64 branch_count;
94         u64 insn_count;
95         u64 cyc_count;
96         u64 kernel_start;
97         u64 last_time;
98         struct call_return_processor *crp;
99         struct comm *comm;
100         unsigned int arr_sz;
101         enum retpoline_state_t rstate;
102         struct branch_stack *br_stack_rb;
103         unsigned int br_stack_sz;
104         unsigned int br_stack_pos;
105         bool mispred_all;
106 };
107
108 /*
109  * Assume pid == tid == 0 identifies the idle task as defined by
110  * perf_session__register_idle_thread(). The idle task is really 1 task per cpu,
111  * and therefore requires a stack for each cpu.
112  */
113 static inline bool thread_stack__per_cpu(struct thread *thread)
114 {
115         return !(thread->tid || thread->pid_);
116 }
117
118 static int thread_stack__grow(struct thread_stack *ts)
119 {
120         struct thread_stack_entry *new_stack;
121         size_t sz, new_sz;
122
123         new_sz = ts->sz + STACK_GROWTH;
124         sz = new_sz * sizeof(struct thread_stack_entry);
125
126         new_stack = realloc(ts->stack, sz);
127         if (!new_stack)
128                 return -ENOMEM;
129
130         ts->stack = new_stack;
131         ts->sz = new_sz;
132
133         return 0;
134 }
135
136 static int thread_stack__init(struct thread_stack *ts, struct thread *thread,
137                               struct call_return_processor *crp,
138                               bool callstack, unsigned int br_stack_sz)
139 {
140         int err;
141
142         if (callstack) {
143                 err = thread_stack__grow(ts);
144                 if (err)
145                         return err;
146         }
147
148         if (br_stack_sz) {
149                 size_t sz = sizeof(struct branch_stack);
150
151                 sz += br_stack_sz * sizeof(struct branch_entry);
152                 ts->br_stack_rb = zalloc(sz);
153                 if (!ts->br_stack_rb)
154                         return -ENOMEM;
155                 ts->br_stack_sz = br_stack_sz;
156         }
157
158         if (thread->maps && thread->maps->machine) {
159                 struct machine *machine = thread->maps->machine;
160                 const char *arch = perf_env__arch(machine->env);
161
162                 ts->kernel_start = machine__kernel_start(machine);
163                 if (!strcmp(arch, "x86"))
164                         ts->rstate = X86_RETPOLINE_POSSIBLE;
165         } else {
166                 ts->kernel_start = 1ULL << 63;
167         }
168         ts->crp = crp;
169
170         return 0;
171 }
172
173 static struct thread_stack *thread_stack__new(struct thread *thread, int cpu,
174                                               struct call_return_processor *crp,
175                                               bool callstack,
176                                               unsigned int br_stack_sz)
177 {
178         struct thread_stack *ts = thread->ts, *new_ts;
179         unsigned int old_sz = ts ? ts->arr_sz : 0;
180         unsigned int new_sz = 1;
181
182         if (thread_stack__per_cpu(thread) && cpu > 0)
183                 new_sz = roundup_pow_of_two(cpu + 1);
184
185         if (!ts || new_sz > old_sz) {
186                 new_ts = calloc(new_sz, sizeof(*ts));
187                 if (!new_ts)
188                         return NULL;
189                 if (ts)
190                         memcpy(new_ts, ts, old_sz * sizeof(*ts));
191                 new_ts->arr_sz = new_sz;
192                 zfree(&thread->ts);
193                 thread->ts = new_ts;
194                 ts = new_ts;
195         }
196
197         if (thread_stack__per_cpu(thread) && cpu > 0 &&
198             (unsigned int)cpu < ts->arr_sz)
199                 ts += cpu;
200
201         if (!ts->stack &&
202             thread_stack__init(ts, thread, crp, callstack, br_stack_sz))
203                 return NULL;
204
205         return ts;
206 }
207
208 static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu)
209 {
210         struct thread_stack *ts = thread->ts;
211
212         if (cpu < 0)
213                 cpu = 0;
214
215         if (!ts || (unsigned int)cpu >= ts->arr_sz)
216                 return NULL;
217
218         ts += cpu;
219
220         if (!ts->stack)
221                 return NULL;
222
223         return ts;
224 }
225
226 static inline struct thread_stack *thread__stack(struct thread *thread,
227                                                     int cpu)
228 {
229         if (!thread)
230                 return NULL;
231
232         if (thread_stack__per_cpu(thread))
233                 return thread__cpu_stack(thread, cpu);
234
235         return thread->ts;
236 }
237
238 static int thread_stack__push(struct thread_stack *ts, u64 ret_addr,
239                               bool trace_end)
240 {
241         int err = 0;
242
243         if (ts->cnt == ts->sz) {
244                 err = thread_stack__grow(ts);
245                 if (err) {
246                         pr_warning("Out of memory: discarding thread stack\n");
247                         ts->cnt = 0;
248                 }
249         }
250
251         ts->stack[ts->cnt].trace_end = trace_end;
252         ts->stack[ts->cnt++].ret_addr = ret_addr;
253
254         return err;
255 }
256
257 static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr)
258 {
259         size_t i;
260
261         /*
262          * In some cases there may be functions which are not seen to return.
263          * For example when setjmp / longjmp has been used.  Or the perf context
264          * switch in the kernel which doesn't stop and start tracing in exactly
265          * the same code path.  When that happens the return address will be
266          * further down the stack.  If the return address is not found at all,
267          * we assume the opposite (i.e. this is a return for a call that wasn't
268          * seen for some reason) and leave the stack alone.
269          */
270         for (i = ts->cnt; i; ) {
271                 if (ts->stack[--i].ret_addr == ret_addr) {
272                         ts->cnt = i;
273                         return;
274                 }
275         }
276 }
277
278 static void thread_stack__pop_trace_end(struct thread_stack *ts)
279 {
280         size_t i;
281
282         for (i = ts->cnt; i; ) {
283                 if (ts->stack[--i].trace_end)
284                         ts->cnt = i;
285                 else
286                         return;
287         }
288 }
289
290 static bool thread_stack__in_kernel(struct thread_stack *ts)
291 {
292         if (!ts->cnt)
293                 return false;
294
295         return ts->stack[ts->cnt - 1].cp->in_kernel;
296 }
297
298 static int thread_stack__call_return(struct thread *thread,
299                                      struct thread_stack *ts, size_t idx,
300                                      u64 timestamp, u64 ref, bool no_return)
301 {
302         struct call_return_processor *crp = ts->crp;
303         struct thread_stack_entry *tse;
304         struct call_return cr = {
305                 .thread = thread,
306                 .comm = ts->comm,
307                 .db_id = 0,
308         };
309         u64 *parent_db_id;
310
311         tse = &ts->stack[idx];
312         cr.cp = tse->cp;
313         cr.call_time = tse->timestamp;
314         cr.return_time = timestamp;
315         cr.branch_count = ts->branch_count - tse->branch_count;
316         cr.insn_count = ts->insn_count - tse->insn_count;
317         cr.cyc_count = ts->cyc_count - tse->cyc_count;
318         cr.db_id = tse->db_id;
319         cr.call_ref = tse->ref;
320         cr.return_ref = ref;
321         if (tse->no_call)
322                 cr.flags |= CALL_RETURN_NO_CALL;
323         if (no_return)
324                 cr.flags |= CALL_RETURN_NO_RETURN;
325         if (tse->non_call)
326                 cr.flags |= CALL_RETURN_NON_CALL;
327
328         /*
329          * The parent db_id must be assigned before exporting the child. Note
330          * it is not possible to export the parent first because its information
331          * is not yet complete because its 'return' has not yet been processed.
332          */
333         parent_db_id = idx ? &(tse - 1)->db_id : NULL;
334
335         return crp->process(&cr, parent_db_id, crp->data);
336 }
337
338 static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
339 {
340         struct call_return_processor *crp = ts->crp;
341         int err;
342
343         if (!crp) {
344                 ts->cnt = 0;
345                 ts->br_stack_pos = 0;
346                 if (ts->br_stack_rb)
347                         ts->br_stack_rb->nr = 0;
348                 return 0;
349         }
350
351         while (ts->cnt) {
352                 err = thread_stack__call_return(thread, ts, --ts->cnt,
353                                                 ts->last_time, 0, true);
354                 if (err) {
355                         pr_err("Error flushing thread stack!\n");
356                         ts->cnt = 0;
357                         return err;
358                 }
359         }
360
361         return 0;
362 }
363
364 int thread_stack__flush(struct thread *thread)
365 {
366         struct thread_stack *ts = thread->ts;
367         unsigned int pos;
368         int err = 0;
369
370         if (ts) {
371                 for (pos = 0; pos < ts->arr_sz; pos++) {
372                         int ret = __thread_stack__flush(thread, ts + pos);
373
374                         if (ret)
375                                 err = ret;
376                 }
377         }
378
379         return err;
380 }
381
382 static void thread_stack__update_br_stack(struct thread_stack *ts, u32 flags,
383                                           u64 from_ip, u64 to_ip)
384 {
385         struct branch_stack *bs = ts->br_stack_rb;
386         struct branch_entry *be;
387
388         if (!ts->br_stack_pos)
389                 ts->br_stack_pos = ts->br_stack_sz;
390
391         ts->br_stack_pos -= 1;
392
393         be              = &bs->entries[ts->br_stack_pos];
394         be->from        = from_ip;
395         be->to          = to_ip;
396         be->flags.value = 0;
397         be->flags.abort = !!(flags & PERF_IP_FLAG_TX_ABORT);
398         be->flags.in_tx = !!(flags & PERF_IP_FLAG_IN_TX);
399         /* No support for mispredict */
400         be->flags.mispred = ts->mispred_all;
401
402         if (bs->nr < ts->br_stack_sz)
403                 bs->nr += 1;
404 }
405
406 int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
407                         u64 to_ip, u16 insn_len, u64 trace_nr, bool callstack,
408                         unsigned int br_stack_sz, bool mispred_all)
409 {
410         struct thread_stack *ts = thread__stack(thread, cpu);
411
412         if (!thread)
413                 return -EINVAL;
414
415         if (!ts) {
416                 ts = thread_stack__new(thread, cpu, NULL, callstack, br_stack_sz);
417                 if (!ts) {
418                         pr_warning("Out of memory: no thread stack\n");
419                         return -ENOMEM;
420                 }
421                 ts->trace_nr = trace_nr;
422                 ts->mispred_all = mispred_all;
423         }
424
425         /*
426          * When the trace is discontinuous, the trace_nr changes.  In that case
427          * the stack might be completely invalid.  Better to report nothing than
428          * to report something misleading, so flush the stack.
429          */
430         if (trace_nr != ts->trace_nr) {
431                 if (ts->trace_nr)
432                         __thread_stack__flush(thread, ts);
433                 ts->trace_nr = trace_nr;
434         }
435
436         if (br_stack_sz)
437                 thread_stack__update_br_stack(ts, flags, from_ip, to_ip);
438
439         /*
440          * Stop here if thread_stack__process() is in use, or not recording call
441          * stack.
442          */
443         if (ts->crp || !callstack)
444                 return 0;
445
446         if (flags & PERF_IP_FLAG_CALL) {
447                 u64 ret_addr;
448
449                 if (!to_ip)
450                         return 0;
451                 ret_addr = from_ip + insn_len;
452                 if (ret_addr == to_ip)
453                         return 0; /* Zero-length calls are excluded */
454                 return thread_stack__push(ts, ret_addr,
455                                           flags & PERF_IP_FLAG_TRACE_END);
456         } else if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
457                 /*
458                  * If the caller did not change the trace number (which would
459                  * have flushed the stack) then try to make sense of the stack.
460                  * Possibly, tracing began after returning to the current
461                  * address, so try to pop that. Also, do not expect a call made
462                  * when the trace ended, to return, so pop that.
463                  */
464                 thread_stack__pop(ts, to_ip);
465                 thread_stack__pop_trace_end(ts);
466         } else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) {
467                 thread_stack__pop(ts, to_ip);
468         }
469
470         return 0;
471 }
472
473 void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr)
474 {
475         struct thread_stack *ts = thread__stack(thread, cpu);
476
477         if (!ts)
478                 return;
479
480         if (trace_nr != ts->trace_nr) {
481                 if (ts->trace_nr)
482                         __thread_stack__flush(thread, ts);
483                 ts->trace_nr = trace_nr;
484         }
485 }
486
487 static void __thread_stack__free(struct thread *thread, struct thread_stack *ts)
488 {
489         __thread_stack__flush(thread, ts);
490         zfree(&ts->stack);
491         zfree(&ts->br_stack_rb);
492 }
493
494 static void thread_stack__reset(struct thread *thread, struct thread_stack *ts)
495 {
496         unsigned int arr_sz = ts->arr_sz;
497
498         __thread_stack__free(thread, ts);
499         memset(ts, 0, sizeof(*ts));
500         ts->arr_sz = arr_sz;
501 }
502
503 void thread_stack__free(struct thread *thread)
504 {
505         struct thread_stack *ts = thread->ts;
506         unsigned int pos;
507
508         if (ts) {
509                 for (pos = 0; pos < ts->arr_sz; pos++)
510                         __thread_stack__free(thread, ts + pos);
511                 zfree(&thread->ts);
512         }
513 }
514
515 static inline u64 callchain_context(u64 ip, u64 kernel_start)
516 {
517         return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL;
518 }
519
520 void thread_stack__sample(struct thread *thread, int cpu,
521                           struct ip_callchain *chain,
522                           size_t sz, u64 ip, u64 kernel_start)
523 {
524         struct thread_stack *ts = thread__stack(thread, cpu);
525         u64 context = callchain_context(ip, kernel_start);
526         u64 last_context;
527         size_t i, j;
528
529         if (sz < 2) {
530                 chain->nr = 0;
531                 return;
532         }
533
534         chain->ips[0] = context;
535         chain->ips[1] = ip;
536
537         if (!ts) {
538                 chain->nr = 2;
539                 return;
540         }
541
542         last_context = context;
543
544         for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) {
545                 ip = ts->stack[ts->cnt - j].ret_addr;
546                 context = callchain_context(ip, kernel_start);
547                 if (context != last_context) {
548                         if (i >= sz - 1)
549                                 break;
550                         chain->ips[i++] = context;
551                         last_context = context;
552                 }
553                 chain->ips[i] = ip;
554         }
555
556         chain->nr = i;
557 }
558
559 /*
560  * Hardware sample records, created some time after the event occurred, need to
561  * have subsequent addresses removed from the call chain.
562  */
563 void thread_stack__sample_late(struct thread *thread, int cpu,
564                                struct ip_callchain *chain, size_t sz,
565                                u64 sample_ip, u64 kernel_start)
566 {
567         struct thread_stack *ts = thread__stack(thread, cpu);
568         u64 sample_context = callchain_context(sample_ip, kernel_start);
569         u64 last_context, context, ip;
570         size_t nr = 0, j;
571
572         if (sz < 2) {
573                 chain->nr = 0;
574                 return;
575         }
576
577         if (!ts)
578                 goto out;
579
580         /*
581          * When tracing kernel space, kernel addresses occur at the top of the
582          * call chain after the event occurred but before tracing stopped.
583          * Skip them.
584          */
585         for (j = 1; j <= ts->cnt; j++) {
586                 ip = ts->stack[ts->cnt - j].ret_addr;
587                 context = callchain_context(ip, kernel_start);
588                 if (context == PERF_CONTEXT_USER ||
589                     (context == sample_context && ip == sample_ip))
590                         break;
591         }
592
593         last_context = sample_ip; /* Use sample_ip as an invalid context */
594
595         for (; nr < sz && j <= ts->cnt; nr++, j++) {
596                 ip = ts->stack[ts->cnt - j].ret_addr;
597                 context = callchain_context(ip, kernel_start);
598                 if (context != last_context) {
599                         if (nr >= sz - 1)
600                                 break;
601                         chain->ips[nr++] = context;
602                         last_context = context;
603                 }
604                 chain->ips[nr] = ip;
605         }
606 out:
607         if (nr) {
608                 chain->nr = nr;
609         } else {
610                 chain->ips[0] = sample_context;
611                 chain->ips[1] = sample_ip;
612                 chain->nr = 2;
613         }
614 }
615
616 void thread_stack__br_sample(struct thread *thread, int cpu,
617                              struct branch_stack *dst, unsigned int sz)
618 {
619         struct thread_stack *ts = thread__stack(thread, cpu);
620         const size_t bsz = sizeof(struct branch_entry);
621         struct branch_stack *src;
622         struct branch_entry *be;
623         unsigned int nr;
624
625         dst->nr = 0;
626
627         if (!ts)
628                 return;
629
630         src = ts->br_stack_rb;
631         if (!src->nr)
632                 return;
633
634         dst->nr = min((unsigned int)src->nr, sz);
635
636         be = &dst->entries[0];
637         nr = min(ts->br_stack_sz - ts->br_stack_pos, (unsigned int)dst->nr);
638         memcpy(be, &src->entries[ts->br_stack_pos], bsz * nr);
639
640         if (src->nr >= ts->br_stack_sz) {
641                 sz -= nr;
642                 be = &dst->entries[nr];
643                 nr = min(ts->br_stack_pos, sz);
644                 memcpy(be, &src->entries[0], bsz * ts->br_stack_pos);
645         }
646 }
647
648 /* Start of user space branch entries */
649 static bool us_start(struct branch_entry *be, u64 kernel_start, bool *start)
650 {
651         if (!*start)
652                 *start = be->to && be->to < kernel_start;
653
654         return *start;
655 }
656
657 /*
658  * Start of branch entries after the ip fell in between 2 branches, or user
659  * space branch entries.
660  */
661 static bool ks_start(struct branch_entry *be, u64 sample_ip, u64 kernel_start,
662                      bool *start, struct branch_entry *nb)
663 {
664         if (!*start) {
665                 *start = (nb && sample_ip >= be->to && sample_ip <= nb->from) ||
666                          be->from < kernel_start ||
667                          (be->to && be->to < kernel_start);
668         }
669
670         return *start;
671 }
672
673 /*
674  * Hardware sample records, created some time after the event occurred, need to
675  * have subsequent addresses removed from the branch stack.
676  */
677 void thread_stack__br_sample_late(struct thread *thread, int cpu,
678                                   struct branch_stack *dst, unsigned int sz,
679                                   u64 ip, u64 kernel_start)
680 {
681         struct thread_stack *ts = thread__stack(thread, cpu);
682         struct branch_entry *d, *s, *spos, *ssz;
683         struct branch_stack *src;
684         unsigned int nr = 0;
685         bool start = false;
686
687         dst->nr = 0;
688
689         if (!ts)
690                 return;
691
692         src = ts->br_stack_rb;
693         if (!src->nr)
694                 return;
695
696         spos = &src->entries[ts->br_stack_pos];
697         ssz  = &src->entries[ts->br_stack_sz];
698
699         d = &dst->entries[0];
700         s = spos;
701
702         if (ip < kernel_start) {
703                 /*
704                  * User space sample: start copying branch entries when the
705                  * branch is in user space.
706                  */
707                 for (s = spos; s < ssz && nr < sz; s++) {
708                         if (us_start(s, kernel_start, &start)) {
709                                 *d++ = *s;
710                                 nr += 1;
711                         }
712                 }
713
714                 if (src->nr >= ts->br_stack_sz) {
715                         for (s = &src->entries[0]; s < spos && nr < sz; s++) {
716                                 if (us_start(s, kernel_start, &start)) {
717                                         *d++ = *s;
718                                         nr += 1;
719                                 }
720                         }
721                 }
722         } else {
723                 struct branch_entry *nb = NULL;
724
725                 /*
726                  * Kernel space sample: start copying branch entries when the ip
727                  * falls in between 2 branches (or the branch is in user space
728                  * because then the start must have been missed).
729                  */
730                 for (s = spos; s < ssz && nr < sz; s++) {
731                         if (ks_start(s, ip, kernel_start, &start, nb)) {
732                                 *d++ = *s;
733                                 nr += 1;
734                         }
735                         nb = s;
736                 }
737
738                 if (src->nr >= ts->br_stack_sz) {
739                         for (s = &src->entries[0]; s < spos && nr < sz; s++) {
740                                 if (ks_start(s, ip, kernel_start, &start, nb)) {
741                                         *d++ = *s;
742                                         nr += 1;
743                                 }
744                                 nb = s;
745                         }
746                 }
747         }
748
749         dst->nr = nr;
750 }
751
752 struct call_return_processor *
753 call_return_processor__new(int (*process)(struct call_return *cr, u64 *parent_db_id, void *data),
754                            void *data)
755 {
756         struct call_return_processor *crp;
757
758         crp = zalloc(sizeof(struct call_return_processor));
759         if (!crp)
760                 return NULL;
761         crp->cpr = call_path_root__new();
762         if (!crp->cpr)
763                 goto out_free;
764         crp->process = process;
765         crp->data = data;
766         return crp;
767
768 out_free:
769         free(crp);
770         return NULL;
771 }
772
773 void call_return_processor__free(struct call_return_processor *crp)
774 {
775         if (crp) {
776                 call_path_root__free(crp->cpr);
777                 free(crp);
778         }
779 }
780
781 static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
782                                  u64 timestamp, u64 ref, struct call_path *cp,
783                                  bool no_call, bool trace_end)
784 {
785         struct thread_stack_entry *tse;
786         int err;
787
788         if (!cp)
789                 return -ENOMEM;
790
791         if (ts->cnt == ts->sz) {
792                 err = thread_stack__grow(ts);
793                 if (err)
794                         return err;
795         }
796
797         tse = &ts->stack[ts->cnt++];
798         tse->ret_addr = ret_addr;
799         tse->timestamp = timestamp;
800         tse->ref = ref;
801         tse->branch_count = ts->branch_count;
802         tse->insn_count = ts->insn_count;
803         tse->cyc_count = ts->cyc_count;
804         tse->cp = cp;
805         tse->no_call = no_call;
806         tse->trace_end = trace_end;
807         tse->non_call = false;
808         tse->db_id = 0;
809
810         return 0;
811 }
812
813 static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
814                                 u64 ret_addr, u64 timestamp, u64 ref,
815                                 struct symbol *sym)
816 {
817         int err;
818
819         if (!ts->cnt)
820                 return 1;
821
822         if (ts->cnt == 1) {
823                 struct thread_stack_entry *tse = &ts->stack[0];
824
825                 if (tse->cp->sym == sym)
826                         return thread_stack__call_return(thread, ts, --ts->cnt,
827                                                          timestamp, ref, false);
828         }
829
830         if (ts->stack[ts->cnt - 1].ret_addr == ret_addr &&
831             !ts->stack[ts->cnt - 1].non_call) {
832                 return thread_stack__call_return(thread, ts, --ts->cnt,
833                                                  timestamp, ref, false);
834         } else {
835                 size_t i = ts->cnt - 1;
836
837                 while (i--) {
838                         if (ts->stack[i].ret_addr != ret_addr ||
839                             ts->stack[i].non_call)
840                                 continue;
841                         i += 1;
842                         while (ts->cnt > i) {
843                                 err = thread_stack__call_return(thread, ts,
844                                                                 --ts->cnt,
845                                                                 timestamp, ref,
846                                                                 true);
847                                 if (err)
848                                         return err;
849                         }
850                         return thread_stack__call_return(thread, ts, --ts->cnt,
851                                                          timestamp, ref, false);
852                 }
853         }
854
855         return 1;
856 }
857
858 static int thread_stack__bottom(struct thread_stack *ts,
859                                 struct perf_sample *sample,
860                                 struct addr_location *from_al,
861                                 struct addr_location *to_al, u64 ref)
862 {
863         struct call_path_root *cpr = ts->crp->cpr;
864         struct call_path *cp;
865         struct symbol *sym;
866         u64 ip;
867
868         if (sample->ip) {
869                 ip = sample->ip;
870                 sym = from_al->sym;
871         } else if (sample->addr) {
872                 ip = sample->addr;
873                 sym = to_al->sym;
874         } else {
875                 return 0;
876         }
877
878         cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
879                                 ts->kernel_start);
880
881         return thread_stack__push_cp(ts, ip, sample->time, ref, cp,
882                                      true, false);
883 }
884
885 static int thread_stack__pop_ks(struct thread *thread, struct thread_stack *ts,
886                                 struct perf_sample *sample, u64 ref)
887 {
888         u64 tm = sample->time;
889         int err;
890
891         /* Return to userspace, so pop all kernel addresses */
892         while (thread_stack__in_kernel(ts)) {
893                 err = thread_stack__call_return(thread, ts, --ts->cnt,
894                                                 tm, ref, true);
895                 if (err)
896                         return err;
897         }
898
899         return 0;
900 }
901
902 static int thread_stack__no_call_return(struct thread *thread,
903                                         struct thread_stack *ts,
904                                         struct perf_sample *sample,
905                                         struct addr_location *from_al,
906                                         struct addr_location *to_al, u64 ref)
907 {
908         struct call_path_root *cpr = ts->crp->cpr;
909         struct call_path *root = &cpr->call_path;
910         struct symbol *fsym = from_al->sym;
911         struct symbol *tsym = to_al->sym;
912         struct call_path *cp, *parent;
913         u64 ks = ts->kernel_start;
914         u64 addr = sample->addr;
915         u64 tm = sample->time;
916         u64 ip = sample->ip;
917         int err;
918
919         if (ip >= ks && addr < ks) {
920                 /* Return to userspace, so pop all kernel addresses */
921                 err = thread_stack__pop_ks(thread, ts, sample, ref);
922                 if (err)
923                         return err;
924
925                 /* If the stack is empty, push the userspace address */
926                 if (!ts->cnt) {
927                         cp = call_path__findnew(cpr, root, tsym, addr, ks);
928                         return thread_stack__push_cp(ts, 0, tm, ref, cp, true,
929                                                      false);
930                 }
931         } else if (thread_stack__in_kernel(ts) && ip < ks) {
932                 /* Return to userspace, so pop all kernel addresses */
933                 err = thread_stack__pop_ks(thread, ts, sample, ref);
934                 if (err)
935                         return err;
936         }
937
938         if (ts->cnt)
939                 parent = ts->stack[ts->cnt - 1].cp;
940         else
941                 parent = root;
942
943         if (parent->sym == from_al->sym) {
944                 /*
945                  * At the bottom of the stack, assume the missing 'call' was
946                  * before the trace started. So, pop the current symbol and push
947                  * the 'to' symbol.
948                  */
949                 if (ts->cnt == 1) {
950                         err = thread_stack__call_return(thread, ts, --ts->cnt,
951                                                         tm, ref, false);
952                         if (err)
953                                 return err;
954                 }
955
956                 if (!ts->cnt) {
957                         cp = call_path__findnew(cpr, root, tsym, addr, ks);
958
959                         return thread_stack__push_cp(ts, addr, tm, ref, cp,
960                                                      true, false);
961                 }
962
963                 /*
964                  * Otherwise assume the 'return' is being used as a jump (e.g.
965                  * retpoline) and just push the 'to' symbol.
966                  */
967                 cp = call_path__findnew(cpr, parent, tsym, addr, ks);
968
969                 err = thread_stack__push_cp(ts, 0, tm, ref, cp, true, false);
970                 if (!err)
971                         ts->stack[ts->cnt - 1].non_call = true;
972
973                 return err;
974         }
975
976         /*
977          * Assume 'parent' has not yet returned, so push 'to', and then push and
978          * pop 'from'.
979          */
980
981         cp = call_path__findnew(cpr, parent, tsym, addr, ks);
982
983         err = thread_stack__push_cp(ts, addr, tm, ref, cp, true, false);
984         if (err)
985                 return err;
986
987         cp = call_path__findnew(cpr, cp, fsym, ip, ks);
988
989         err = thread_stack__push_cp(ts, ip, tm, ref, cp, true, false);
990         if (err)
991                 return err;
992
993         return thread_stack__call_return(thread, ts, --ts->cnt, tm, ref, false);
994 }
995
996 static int thread_stack__trace_begin(struct thread *thread,
997                                      struct thread_stack *ts, u64 timestamp,
998                                      u64 ref)
999 {
1000         struct thread_stack_entry *tse;
1001         int err;
1002
1003         if (!ts->cnt)
1004                 return 0;
1005
1006         /* Pop trace end */
1007         tse = &ts->stack[ts->cnt - 1];
1008         if (tse->trace_end) {
1009                 err = thread_stack__call_return(thread, ts, --ts->cnt,
1010                                                 timestamp, ref, false);
1011                 if (err)
1012                         return err;
1013         }
1014
1015         return 0;
1016 }
1017
1018 static int thread_stack__trace_end(struct thread_stack *ts,
1019                                    struct perf_sample *sample, u64 ref)
1020 {
1021         struct call_path_root *cpr = ts->crp->cpr;
1022         struct call_path *cp;
1023         u64 ret_addr;
1024
1025         /* No point having 'trace end' on the bottom of the stack */
1026         if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref))
1027                 return 0;
1028
1029         cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
1030                                 ts->kernel_start);
1031
1032         ret_addr = sample->ip + sample->insn_len;
1033
1034         return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp,
1035                                      false, true);
1036 }
1037
1038 static bool is_x86_retpoline(const char *name)
1039 {
1040         const char *p = strstr(name, "__x86_indirect_thunk_");
1041
1042         return p == name || !strcmp(name, "__indirect_thunk_start");
1043 }
1044
1045 /*
1046  * x86 retpoline functions pollute the call graph. This function removes them.
1047  * This does not handle function return thunks, nor is there any improvement
1048  * for the handling of inline thunks or extern thunks.
1049  */
1050 static int thread_stack__x86_retpoline(struct thread_stack *ts,
1051                                        struct perf_sample *sample,
1052                                        struct addr_location *to_al)
1053 {
1054         struct thread_stack_entry *tse = &ts->stack[ts->cnt - 1];
1055         struct call_path_root *cpr = ts->crp->cpr;
1056         struct symbol *sym = tse->cp->sym;
1057         struct symbol *tsym = to_al->sym;
1058         struct call_path *cp;
1059
1060         if (sym && is_x86_retpoline(sym->name)) {
1061                 /*
1062                  * This is a x86 retpoline fn. It pollutes the call graph by
1063                  * showing up everywhere there is an indirect branch, but does
1064                  * not itself mean anything. Here the top-of-stack is removed,
1065                  * by decrementing the stack count, and then further down, the
1066                  * resulting top-of-stack is replaced with the actual target.
1067                  * The result is that the retpoline functions will no longer
1068                  * appear in the call graph. Note this only affects the call
1069                  * graph, since all the original branches are left unchanged.
1070                  */
1071                 ts->cnt -= 1;
1072                 sym = ts->stack[ts->cnt - 2].cp->sym;
1073                 if (sym && sym == tsym && to_al->addr != tsym->start) {
1074                         /*
1075                          * Target is back to the middle of the symbol we came
1076                          * from so assume it is an indirect jmp and forget it
1077                          * altogether.
1078                          */
1079                         ts->cnt -= 1;
1080                         return 0;
1081                 }
1082         } else if (sym && sym == tsym) {
1083                 /*
1084                  * Target is back to the symbol we came from so assume it is an
1085                  * indirect jmp and forget it altogether.
1086                  */
1087                 ts->cnt -= 1;
1088                 return 0;
1089         }
1090
1091         cp = call_path__findnew(cpr, ts->stack[ts->cnt - 2].cp, tsym,
1092                                 sample->addr, ts->kernel_start);
1093         if (!cp)
1094                 return -ENOMEM;
1095
1096         /* Replace the top-of-stack with the actual target */
1097         ts->stack[ts->cnt - 1].cp = cp;
1098
1099         return 0;
1100 }
1101
1102 int thread_stack__process(struct thread *thread, struct comm *comm,
1103                           struct perf_sample *sample,
1104                           struct addr_location *from_al,
1105                           struct addr_location *to_al, u64 ref,
1106                           struct call_return_processor *crp)
1107 {
1108         struct thread_stack *ts = thread__stack(thread, sample->cpu);
1109         enum retpoline_state_t rstate;
1110         int err = 0;
1111
1112         if (ts && !ts->crp) {
1113                 /* Supersede thread_stack__event() */
1114                 thread_stack__reset(thread, ts);
1115                 ts = NULL;
1116         }
1117
1118         if (!ts) {
1119                 ts = thread_stack__new(thread, sample->cpu, crp, true, 0);
1120                 if (!ts)
1121                         return -ENOMEM;
1122                 ts->comm = comm;
1123         }
1124
1125         rstate = ts->rstate;
1126         if (rstate == X86_RETPOLINE_DETECTED)
1127                 ts->rstate = X86_RETPOLINE_POSSIBLE;
1128
1129         /* Flush stack on exec */
1130         if (ts->comm != comm && thread->pid_ == thread->tid) {
1131                 err = __thread_stack__flush(thread, ts);
1132                 if (err)
1133                         return err;
1134                 ts->comm = comm;
1135         }
1136
1137         /* If the stack is empty, put the current symbol on the stack */
1138         if (!ts->cnt) {
1139                 err = thread_stack__bottom(ts, sample, from_al, to_al, ref);
1140                 if (err)
1141                         return err;
1142         }
1143
1144         ts->branch_count += 1;
1145         ts->insn_count += sample->insn_cnt;
1146         ts->cyc_count += sample->cyc_cnt;
1147         ts->last_time = sample->time;
1148
1149         if (sample->flags & PERF_IP_FLAG_CALL) {
1150                 bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END;
1151                 struct call_path_root *cpr = ts->crp->cpr;
1152                 struct call_path *cp;
1153                 u64 ret_addr;
1154
1155                 if (!sample->ip || !sample->addr)
1156                         return 0;
1157
1158                 ret_addr = sample->ip + sample->insn_len;
1159                 if (ret_addr == sample->addr)
1160                         return 0; /* Zero-length calls are excluded */
1161
1162                 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
1163                                         to_al->sym, sample->addr,
1164                                         ts->kernel_start);
1165                 err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
1166                                             cp, false, trace_end);
1167
1168                 /*
1169                  * A call to the same symbol but not the start of the symbol,
1170                  * may be the start of a x86 retpoline.
1171                  */
1172                 if (!err && rstate == X86_RETPOLINE_POSSIBLE && to_al->sym &&
1173                     from_al->sym == to_al->sym &&
1174                     to_al->addr != to_al->sym->start)
1175                         ts->rstate = X86_RETPOLINE_DETECTED;
1176
1177         } else if (sample->flags & PERF_IP_FLAG_RETURN) {
1178                 if (!sample->addr) {
1179                         u32 return_from_kernel = PERF_IP_FLAG_SYSCALLRET |
1180                                                  PERF_IP_FLAG_INTERRUPT;
1181
1182                         if (!(sample->flags & return_from_kernel))
1183                                 return 0;
1184
1185                         /* Pop kernel stack */
1186                         return thread_stack__pop_ks(thread, ts, sample, ref);
1187                 }
1188
1189                 if (!sample->ip)
1190                         return 0;
1191
1192                 /* x86 retpoline 'return' doesn't match the stack */
1193                 if (rstate == X86_RETPOLINE_DETECTED && ts->cnt > 2 &&
1194                     ts->stack[ts->cnt - 1].ret_addr != sample->addr)
1195                         return thread_stack__x86_retpoline(ts, sample, to_al);
1196
1197                 err = thread_stack__pop_cp(thread, ts, sample->addr,
1198                                            sample->time, ref, from_al->sym);
1199                 if (err) {
1200                         if (err < 0)
1201                                 return err;
1202                         err = thread_stack__no_call_return(thread, ts, sample,
1203                                                            from_al, to_al, ref);
1204                 }
1205         } else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) {
1206                 err = thread_stack__trace_begin(thread, ts, sample->time, ref);
1207         } else if (sample->flags & PERF_IP_FLAG_TRACE_END) {
1208                 err = thread_stack__trace_end(ts, sample, ref);
1209         } else if (sample->flags & PERF_IP_FLAG_BRANCH &&
1210                    from_al->sym != to_al->sym && to_al->sym &&
1211                    to_al->addr == to_al->sym->start) {
1212                 struct call_path_root *cpr = ts->crp->cpr;
1213                 struct call_path *cp;
1214
1215                 /*
1216                  * The compiler might optimize a call/ret combination by making
1217                  * it a jmp. Make that visible by recording on the stack a
1218                  * branch to the start of a different symbol. Note, that means
1219                  * when a ret pops the stack, all jmps must be popped off first.
1220                  */
1221                 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
1222                                         to_al->sym, sample->addr,
1223                                         ts->kernel_start);
1224                 err = thread_stack__push_cp(ts, 0, sample->time, ref, cp, false,
1225                                             false);
1226                 if (!err)
1227                         ts->stack[ts->cnt - 1].non_call = true;
1228         }
1229
1230         return err;
1231 }
1232
1233 size_t thread_stack__depth(struct thread *thread, int cpu)
1234 {
1235         struct thread_stack *ts = thread__stack(thread, cpu);
1236
1237         if (!ts)
1238                 return 0;
1239         return ts->cnt;
1240 }