1 // SPDX-License-Identifier: GPL-2.0
5 * Builtin annotate command: Analyze the perf.data input file,
6 * look up and read DSOs and symbol information and display
7 * a histogram of results, along various sorting keys.
11 #include "util/color.h"
12 #include <linux/list.h>
13 #include "util/cache.h"
14 #include <linux/rbtree.h>
15 #include <linux/zalloc.h>
16 #include "util/symbol.h"
18 #include "util/debug.h"
20 #include "util/evlist.h"
21 #include "util/evsel.h"
22 #include "util/annotate.h"
23 #include "util/annotate-data.h"
24 #include "util/event.h"
25 #include <subcmd/parse-options.h>
26 #include "util/parse-events.h"
27 #include "util/sort.h"
28 #include "util/hist.h"
30 #include "util/machine.h"
32 #include "util/session.h"
33 #include "util/tool.h"
34 #include "util/data.h"
35 #include "arch/common.h"
36 #include "util/block-range.h"
37 #include "util/map_symbol.h"
38 #include "util/branch.h"
39 #include "util/util.h"
43 #include <linux/bitmap.h>
44 #include <linux/err.h>
46 struct perf_annotate {
47 struct perf_tool tool;
48 struct perf_session *session;
49 #ifdef HAVE_SLANG_SUPPORT
52 bool use_stdio, use_stdio2;
53 #ifdef HAVE_GTK2_SUPPORT
62 const char *sym_hist_filter;
64 const char *target_data_type;
65 DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
69 * Given one basic block:
79 * where the horizontal are the branches and the vertical is the executed
80 * block of instructions.
82 * We count, for each 'instruction', the number of blocks that covered it as
83 * well as count the ratio each branch is taken.
85 * We can do this without knowing the actual instruction stream by keeping
86 * track of the address ranges. We break down ranges such that there is no
87 * overlap and iterate from the start until the end.
89 * @acme: once we parse the objdump output _before_ processing the samples,
90 * we can easily fold the branch.cycles IPC bits in.
92 static void process_basic_block(struct addr_map_symbol *start,
93 struct addr_map_symbol *end,
94 struct branch_flags *flags)
96 struct symbol *sym = start->ms.sym;
97 struct annotation *notes = sym ? symbol__annotation(sym) : NULL;
98 struct block_range_iter iter;
99 struct block_range *entry;
100 struct annotated_branch *branch;
103 * Sanity; NULL isn't executable and the CPU cannot execute backwards
105 if (!start->addr || start->addr > end->addr)
108 iter = block_range__create(start->addr, end->addr);
109 if (!block_range_iter__valid(&iter))
112 branch = annotation__get_branch(notes);
115 * First block in range is a branch target.
117 entry = block_range_iter(&iter);
118 assert(entry->is_target);
122 entry = block_range_iter(&iter);
128 branch->max_coverage = max(branch->max_coverage, entry->coverage);
130 } while (block_range_iter__next(&iter));
133 * Last block in rage is a branch.
135 entry = block_range_iter(&iter);
136 assert(entry->is_branch);
138 if (flags->predicted)
142 static void process_branch_stack(struct branch_stack *bs, struct addr_location *al,
143 struct perf_sample *sample)
145 struct addr_map_symbol *prev = NULL;
146 struct branch_info *bi;
152 bi = sample__resolve_bstack(sample, al);
156 for (i = bs->nr - 1; i >= 0; i--) {
158 * XXX filter against symbol
161 process_basic_block(prev, &bi[i].from, &bi[i].flags);
168 static int hist_iter__branch_callback(struct hist_entry_iter *iter,
169 struct addr_location *al __maybe_unused,
170 bool single __maybe_unused,
171 void *arg __maybe_unused)
173 struct hist_entry *he = iter->he;
174 struct branch_info *bi;
175 struct perf_sample *sample = iter->sample;
176 struct evsel *evsel = iter->evsel;
179 bi = he->branch_info;
180 err = addr_map_symbol__inc_samples(&bi->from, sample, evsel);
185 err = addr_map_symbol__inc_samples(&bi->to, sample, evsel);
191 static int process_branch_callback(struct evsel *evsel,
192 struct perf_sample *sample,
193 struct addr_location *al,
194 struct perf_annotate *ann,
195 struct machine *machine)
197 struct hist_entry_iter iter = {
200 .add_entry_cb = hist_iter__branch_callback,
201 .hide_unresolved = symbol_conf.hide_unresolved,
202 .ops = &hist_iter_branch,
204 struct addr_location a;
207 addr_location__init(&a);
208 if (machine__resolve(machine, &a, sample) < 0) {
219 map__dso(a.map)->hit = 1;
221 hist__account_cycles(sample->branch_stack, al, sample, false, NULL);
223 ret = hist_entry_iter__add(&iter, &a, PERF_MAX_STACK_DEPTH, ann);
225 addr_location__exit(&a);
229 static bool has_annotation(struct perf_annotate *ann)
231 return ui__has_annotation() || ann->use_stdio2;
234 static int evsel__add_sample(struct evsel *evsel, struct perf_sample *sample,
235 struct addr_location *al, struct perf_annotate *ann,
236 struct machine *machine)
238 struct hists *hists = evsel__hists(evsel);
239 struct hist_entry *he;
242 if ((!ann->has_br_stack || !has_annotation(ann)) &&
243 ann->sym_hist_filter != NULL &&
245 strcmp(ann->sym_hist_filter, al->sym->name) != 0)) {
246 /* We're only interested in a symbol named sym_hist_filter */
248 * FIXME: why isn't this done in the symbol_filter when loading
251 if (al->sym != NULL) {
252 struct dso *dso = map__dso(al->map);
254 rb_erase_cached(&al->sym->rb_node, &dso->symbols);
255 symbol__delete(al->sym);
256 dso__reset_find_symbol_cache(dso);
262 * XXX filtered samples can still have branch entries pointing into our
263 * symbol and are missed.
265 process_branch_stack(sample->branch_stack, al, sample);
267 if (ann->has_br_stack && has_annotation(ann))
268 return process_branch_callback(evsel, sample, al, ann, machine);
270 he = hists__add_entry(hists, al, NULL, NULL, NULL, NULL, sample, true);
274 ret = hist_entry__inc_addr_samples(he, sample, evsel, al->addr);
275 hists__inc_nr_samples(hists, true);
279 static int process_sample_event(struct perf_tool *tool,
280 union perf_event *event,
281 struct perf_sample *sample,
283 struct machine *machine)
285 struct perf_annotate *ann = container_of(tool, struct perf_annotate, tool);
286 struct addr_location al;
289 addr_location__init(&al);
290 if (machine__resolve(machine, &al, sample) < 0) {
291 pr_warning("problem processing %d event, skipping it.\n",
297 if (ann->cpu_list && !test_bit(sample->cpu, ann->cpu_bitmap))
301 evsel__add_sample(evsel, sample, &al, ann, machine)) {
302 pr_warning("problem incrementing symbol count, "
307 addr_location__exit(&al);
311 static int process_feature_event(struct perf_session *session,
312 union perf_event *event)
314 if (event->feat.feat_id < HEADER_LAST_FEATURE)
315 return perf_event__process_feature(session, event);
319 static int hist_entry__tty_annotate(struct hist_entry *he,
321 struct perf_annotate *ann)
323 if (!ann->use_stdio2)
324 return symbol__tty_annotate(&he->ms, evsel);
326 return symbol__tty_annotate2(&he->ms, evsel);
329 static void print_annotated_data_header(struct hist_entry *he, struct evsel *evsel)
331 struct dso *dso = map__dso(he->ms.map);
333 int nr_samples = he->stat.nr_events;
335 if (evsel__is_group_event(evsel)) {
336 struct hist_entry *pair;
338 list_for_each_entry(pair, &he->pairs.head, pairs.node)
339 nr_samples += pair->stat.nr_events;
342 printf("Annotate type: '%s' in %s (%d samples):\n",
343 he->mem_type->self.type_name, dso->name, nr_samples);
345 if (evsel__is_group_event(evsel)) {
349 for_each_group_evsel(pos, evsel)
350 printf(" event[%d] = %s\n", i++, pos->name);
352 nr_members = evsel->core.nr_members;
355 printf("============================================================================\n");
356 printf("%*s %10s %10s %s\n", 11 * nr_members, "samples", "offset", "size", "field");
359 static void print_annotated_data_type(struct annotated_data_type *mem_type,
360 struct annotated_member *member,
361 struct evsel *evsel, int indent)
363 struct annotated_member *child;
364 struct type_hist *h = mem_type->histograms[evsel->core.idx];
365 int i, nr_events = 1, samples = 0;
367 for (i = 0; i < member->size; i++)
368 samples += h->addr[member->offset + i].nr_samples;
369 printf(" %10d", samples);
371 if (evsel__is_group_event(evsel)) {
374 for_each_group_member(pos, evsel) {
375 h = mem_type->histograms[pos->core.idx];
378 for (i = 0; i < member->size; i++)
379 samples += h->addr[member->offset + i].nr_samples;
380 printf(" %10d", samples);
382 nr_events = evsel->core.nr_members;
385 printf(" %10d %10d %*s%s\t%s",
386 member->offset, member->size, indent, "", member->type_name,
387 member->var_name ?: "");
389 if (!list_empty(&member->children))
392 list_for_each_entry(child, &member->children, node)
393 print_annotated_data_type(mem_type, child, evsel, indent + 4);
395 if (!list_empty(&member->children))
396 printf("%*s}", 11 * nr_events + 24 + indent, "");
400 static void print_annotate_data_stat(struct annotated_data_stat *s)
402 #define PRINT_STAT(fld) if (s->fld) printf("%10d : %s\n", s->fld, #fld)
404 int bad = s->no_sym +
415 int ok = s->total - bad;
417 printf("Annotate data type stats:\n");
418 printf("total %d, ok %d (%.1f%%), bad %d (%.1f%%)\n",
419 s->total, ok, 100.0 * ok / (s->total ?: 1), bad, 100.0 * bad / (s->total ?: 1));
420 printf("-----------------------------------------------------------\n");
423 PRINT_STAT(no_insn_ops);
424 PRINT_STAT(no_mem_ops);
426 PRINT_STAT(no_dbginfo);
427 PRINT_STAT(no_cuinfo);
429 PRINT_STAT(no_typeinfo);
430 PRINT_STAT(invalid_size);
431 PRINT_STAT(bad_offset);
437 static void hists__find_annotations(struct hists *hists,
439 struct perf_annotate *ann)
441 struct rb_node *nd = rb_first_cached(&hists->entries), *next;
445 print_annotate_data_stat(&ann_data_stat);
448 struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
449 struct annotation *notes;
451 if (he->ms.sym == NULL || map__dso(he->ms.map)->annotate_warned)
454 if (ann->sym_hist_filter &&
455 (strcmp(he->ms.sym->name, ann->sym_hist_filter) != 0))
458 if (ann->min_percent) {
460 u64 total = hists__total_period(hists);
463 percent = 100.0 * he->stat.period / total;
465 if (percent < ann->min_percent)
469 notes = symbol__annotation(he->ms.sym);
470 if (notes->src == NULL) {
472 if (key == K_LEFT || key == '<')
479 if (ann->data_type) {
480 /* skip unknown type */
481 if (he->mem_type->histograms == NULL)
484 if (ann->target_data_type) {
485 const char *type_name = he->mem_type->self.type_name;
487 /* skip 'struct ' prefix in the type name */
488 if (strncmp(ann->target_data_type, "struct ", 7) &&
489 !strncmp(type_name, "struct ", 7))
492 /* skip 'union ' prefix in the type name */
493 if (strncmp(ann->target_data_type, "union ", 6) &&
494 !strncmp(type_name, "union ", 6))
497 if (strcmp(ann->target_data_type, type_name))
501 print_annotated_data_header(he, evsel);
502 print_annotated_data_type(he->mem_type, &he->mem_type->self, evsel, 0);
507 if (use_browser == 2) {
509 int (*annotate)(struct hist_entry *he,
511 struct hist_browser_timer *hbt);
513 annotate = dlsym(perf_gtk_handle,
514 "hist_entry__gtk_annotate");
515 if (annotate == NULL) {
516 ui__error("GTK browser not found!\n");
520 ret = annotate(he, evsel, NULL);
521 if (!ret || !ann->skip_missing)
524 /* skip missing symbols */
526 } else if (use_browser == 1) {
527 key = hist_entry__tui_annotate(he, evsel, NULL);
531 if (!ann->skip_missing)
549 hist_entry__tty_annotate(he, evsel, ann);
555 static int __cmd_annotate(struct perf_annotate *ann)
558 struct perf_session *session = ann->session;
560 u64 total_nr_samples;
563 ret = perf_session__cpu_bitmap(session, ann->cpu_list,
569 if (!annotate_opts.objdump_path) {
570 ret = perf_env__lookup_objdump(&session->header.env,
571 &annotate_opts.objdump_path);
576 ret = perf_session__process_events(session);
581 perf_session__fprintf_nr_events(session, stdout, false);
582 evlist__fprintf_nr_events(session->evlist, stdout, false);
587 perf_session__fprintf(session, stdout);
590 perf_session__fprintf_dsos(session, stdout);
592 total_nr_samples = 0;
593 evlist__for_each_entry(session->evlist, pos) {
594 struct hists *hists = evsel__hists(pos);
595 u32 nr_samples = hists->stats.nr_samples;
597 if (nr_samples > 0) {
598 total_nr_samples += nr_samples;
599 hists__collapse_resort(hists, NULL);
600 /* Don't sort callchain */
601 evsel__reset_sample_bit(pos, CALLCHAIN);
602 evsel__output_resort(pos, NULL);
605 * An event group needs to display other events too.
606 * Let's delay printing until other events are processed.
608 if (symbol_conf.event_group) {
609 if (!evsel__is_group_leader(pos)) {
610 struct hists *leader_hists;
612 leader_hists = evsel__hists(evsel__leader(pos));
613 hists__match(leader_hists, hists);
614 hists__link(leader_hists, hists);
619 hists__find_annotations(hists, pos, ann);
623 if (total_nr_samples == 0) {
624 ui__error("The %s data has no samples!\n", session->data->path);
628 /* Display group events together */
629 evlist__for_each_entry(session->evlist, pos) {
630 struct hists *hists = evsel__hists(pos);
631 u32 nr_samples = hists->stats.nr_samples;
636 if (!symbol_conf.event_group || !evsel__is_group_leader(pos))
639 hists__find_annotations(hists, pos, ann);
642 if (use_browser == 2) {
643 void (*show_annotations)(void);
645 show_annotations = dlsym(perf_gtk_handle,
646 "perf_gtk__show_annotations");
647 if (show_annotations == NULL) {
648 ui__error("GTK browser not found!\n");
658 static int parse_percent_limit(const struct option *opt, const char *str,
659 int unset __maybe_unused)
661 struct perf_annotate *ann = opt->value;
662 double pcnt = strtof(str, NULL);
664 ann->min_percent = pcnt;
668 static int parse_data_type(const struct option *opt, const char *str, int unset)
670 struct perf_annotate *ann = opt->value;
672 ann->data_type = !unset;
674 ann->target_data_type = strdup(str);
679 static const char * const annotate_usage[] = {
680 "perf annotate [<options>]",
684 int cmd_annotate(int argc, const char **argv)
686 struct perf_annotate annotate = {
688 .sample = process_sample_event,
689 .mmap = perf_event__process_mmap,
690 .mmap2 = perf_event__process_mmap2,
691 .comm = perf_event__process_comm,
692 .exit = perf_event__process_exit,
693 .fork = perf_event__process_fork,
694 .namespaces = perf_event__process_namespaces,
695 .attr = perf_event__process_attr,
696 .build_id = perf_event__process_build_id,
697 #ifdef HAVE_LIBTRACEEVENT
698 .tracing_data = perf_event__process_tracing_data,
700 .id_index = perf_event__process_id_index,
701 .auxtrace_info = perf_event__process_auxtrace_info,
702 .auxtrace = perf_event__process_auxtrace,
703 .feature = process_feature_event,
704 .ordered_events = true,
705 .ordering_requires_timestamps = true,
708 struct perf_data data = {
709 .mode = PERF_DATA_MODE_READ,
711 struct itrace_synth_opts itrace_synth_opts = {
714 const char *disassembler_style = NULL, *objdump_path = NULL, *addr2line_path = NULL;
715 struct option options[] = {
716 OPT_STRING('i', "input", &input_name, "file",
718 OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
719 "only consider symbols in these dsos"),
720 OPT_STRING('s', "symbol", &annotate.sym_hist_filter, "symbol",
721 "symbol to annotate"),
722 OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"),
723 OPT_INCR('v', "verbose", &verbose,
724 "be more verbose (show symbol address, etc)"),
725 OPT_BOOLEAN('q', "quiet", &quiet, "do now show any warnings or messages"),
726 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
727 "dump raw trace in ASCII"),
728 #ifdef HAVE_GTK2_SUPPORT
729 OPT_BOOLEAN(0, "gtk", &annotate.use_gtk, "Use the GTK interface"),
731 #ifdef HAVE_SLANG_SUPPORT
732 OPT_BOOLEAN(0, "tui", &annotate.use_tui, "Use the TUI interface"),
734 OPT_BOOLEAN(0, "stdio", &annotate.use_stdio, "Use the stdio interface"),
735 OPT_BOOLEAN(0, "stdio2", &annotate.use_stdio2, "Use the stdio interface"),
736 OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux,
737 "don't load vmlinux even if found"),
738 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
739 "file", "vmlinux pathname"),
740 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
741 "load module symbols - WARNING: use only with -k and LIVE kernel"),
742 OPT_BOOLEAN('l', "print-line", &annotate_opts.print_lines,
743 "print matching source lines (may be slow)"),
744 OPT_BOOLEAN('P', "full-paths", &annotate_opts.full_path,
745 "Don't shorten the displayed pathnames"),
746 OPT_BOOLEAN(0, "skip-missing", &annotate.skip_missing,
747 "Skip symbols that cannot be annotated"),
748 OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group,
750 "Show event group information together"),
751 OPT_STRING('C', "cpu", &annotate.cpu_list, "cpu", "list of cpus to profile"),
752 OPT_CALLBACK(0, "symfs", NULL, "directory",
753 "Look for files with symbols relative to this directory",
754 symbol__config_symfs),
755 OPT_BOOLEAN(0, "source", &annotate_opts.annotate_src,
756 "Interleave source code with assembly code (default)"),
757 OPT_BOOLEAN(0, "asm-raw", &annotate_opts.show_asm_raw,
758 "Display raw encoding of assembly instructions (default)"),
759 OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
760 "Specify disassembler style (e.g. -M intel for intel syntax)"),
761 OPT_STRING(0, "prefix", &annotate_opts.prefix, "prefix",
762 "Add prefix to source file path names in programs (with --prefix-strip)"),
763 OPT_STRING(0, "prefix-strip", &annotate_opts.prefix_strip, "N",
764 "Strip first N entries of source file path name in programs (with --prefix)"),
765 OPT_STRING(0, "objdump", &objdump_path, "path",
766 "objdump binary to use for disassembly and annotations"),
767 OPT_STRING(0, "addr2line", &addr2line_path, "path",
768 "addr2line binary to use for line numbers"),
769 OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle,
770 "Enable symbol demangling"),
771 OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
772 "Enable kernel symbol demangling"),
773 OPT_BOOLEAN(0, "group", &symbol_conf.event_group,
774 "Show event group information together"),
775 OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
776 "Show a column with the sum of periods"),
777 OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
778 "Show a column with the number of samples"),
779 OPT_CALLBACK_DEFAULT(0, "stdio-color", NULL, "mode",
780 "'always' (default), 'never' or 'auto' only applicable to --stdio mode",
781 stdio__config_color, "always"),
782 OPT_CALLBACK(0, "percent-type", &annotate_opts, "local-period",
783 "Set percent type local/global-period/hits",
784 annotate_parse_percent_type),
785 OPT_CALLBACK(0, "percent-limit", &annotate, "percent",
786 "Don't show entries under that percent", parse_percent_limit),
787 OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts",
788 "Instruction Tracing options\n" ITRACE_HELP,
789 itrace_parse_synth_opts),
790 OPT_CALLBACK_OPTARG(0, "data-type", &annotate, NULL, "name",
791 "Show data type annotate for the memory accesses",
793 OPT_BOOLEAN(0, "type-stat", &annotate.type_stat,
794 "Show stats for the data type annotation"),
799 set_option_flag(options, 0, "show-total-period", PARSE_OPT_EXCLUSIVE);
800 set_option_flag(options, 0, "show-nr-samples", PARSE_OPT_EXCLUSIVE);
802 annotation_options__init();
808 annotation_config__init();
810 argc = parse_options(argc, argv, options, annotate_usage, 0);
813 * Special case: if there's an argument left then assume that
814 * it's a symbol filter:
817 usage_with_options(annotate_usage, options);
819 annotate.sym_hist_filter = argv[0];
822 if (disassembler_style) {
823 annotate_opts.disassembler_style = strdup(disassembler_style);
824 if (!annotate_opts.disassembler_style)
828 annotate_opts.objdump_path = strdup(objdump_path);
829 if (!annotate_opts.objdump_path)
832 if (addr2line_path) {
833 symbol_conf.addr2line_path = strdup(addr2line_path);
834 if (!symbol_conf.addr2line_path)
838 if (annotate_check_args() < 0)
841 #ifdef HAVE_GTK2_SUPPORT
842 if (symbol_conf.show_nr_samples && annotate.use_gtk) {
843 pr_err("--show-nr-samples is not available in --gtk mode at this time\n");
848 #ifndef HAVE_DWARF_GETLOCATIONS_SUPPORT
849 if (annotate.data_type) {
850 pr_err("Error: Data type profiling is disabled due to missing DWARF support\n");
855 ret = symbol__validate_sym_arguments();
862 data.path = input_name;
864 annotate.session = perf_session__new(&data, &annotate.tool);
865 if (IS_ERR(annotate.session))
866 return PTR_ERR(annotate.session);
868 annotate.session->itrace_synth_opts = &itrace_synth_opts;
870 annotate.has_br_stack = perf_header__has_feat(&annotate.session->header,
871 HEADER_BRANCH_STACK);
873 if (annotate.group_set)
874 evlist__force_leader(annotate.session->evlist);
876 ret = symbol__annotation_init();
880 symbol_conf.try_vmlinux_path = true;
882 ret = symbol__init(&annotate.session->header.env);
886 if (annotate.use_stdio || annotate.use_stdio2)
888 #ifdef HAVE_SLANG_SUPPORT
889 else if (annotate.use_tui)
892 #ifdef HAVE_GTK2_SUPPORT
893 else if (annotate.use_gtk)
897 /* FIXME: only support stdio for now */
898 if (annotate.data_type) {
900 annotate_opts.annotate_src = false;
901 symbol_conf.annotate_data_member = true;
902 symbol_conf.annotate_data_sample = true;
908 * Events of different processes may correspond to the same
909 * symbol, we do not care about the processes in annotate,
910 * set sort order to avoid repeated output.
912 if (annotate.data_type)
913 sort_order = "dso,type";
915 sort_order = "dso,symbol";
918 * Set SORT_MODE__BRANCH so that annotate display IPC/Cycle
919 * if branch info is in perf data in TUI mode.
921 if ((use_browser == 1 || annotate.use_stdio2) && annotate.has_br_stack)
922 sort__mode = SORT_MODE__BRANCH;
924 if (setup_sorting(NULL) < 0)
925 usage_with_options(annotate_usage, options);
927 ret = __cmd_annotate(&annotate);
931 * Speed up the exit process by only deleting for debug builds. For
932 * large files this can save time.
935 perf_session__delete(annotate.session);
937 annotation_options__exit();