1 // SPDX-License-Identifier: GPL-2.0
5 * Builtin inject command: Examine the live mode (stdin) event stream
6 * and repipe it to stdout while optionally injecting additional
11 #include "util/color.h"
13 #include "util/vdso.h"
14 #include "util/evlist.h"
15 #include "util/evsel.h"
17 #include "util/session.h"
18 #include "util/tool.h"
19 #include "util/debug.h"
20 #include "util/build-id.h"
21 #include "util/data.h"
22 #include "util/auxtrace.h"
24 #include "util/symbol.h"
25 #include "util/synthetic-events.h"
26 #include "util/thread.h"
27 #include "util/namespaces.h"
28 #include "util/util.h"
31 #include <internal/lib.h>
33 #include <linux/err.h>
34 #include <subcmd/parse-options.h>
35 #include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
37 #include <linux/list.h>
38 #include <linux/string.h>
39 #include <linux/zalloc.h>
40 #include <linux/hash.h>
46 struct perf_sample sample;
47 union perf_event *event;
48 char event_buf[PERF_SAMPLE_MAX_SIZE];
52 /* hlist_node must be first, see free_hlist() */
53 struct hlist_node node;
60 /* hlist_node must be first, see free_hlist() */
61 struct hlist_node node;
62 /* Thread ID of QEMU thread */
68 /* Current host CPU */
70 /* Thread ID of QEMU thread */
74 struct guest_session {
79 struct perf_tool tool;
80 struct perf_data data;
81 struct perf_session *session;
84 struct perf_tsc_conversion host_tc;
85 struct perf_tsc_conversion guest_tc;
93 /* Array of guest_vcpu */
94 struct guest_vcpu *vcpu;
96 /* Hash table for guest_id */
97 struct hlist_head heads[PERF_EVLIST__HLIST_SIZE];
98 /* Hash table for guest_tid */
99 struct hlist_head tids[PERF_EVLIST__HLIST_SIZE];
100 /* Place to stash next guest event */
101 struct guest_event ev;
105 struct perf_tool tool;
106 struct perf_session *session;
113 bool in_place_update;
114 bool in_place_update_dry_run;
117 const char *input_name;
118 struct perf_data output;
121 struct list_head samples;
122 struct itrace_synth_opts itrace_synth_opts;
123 char event_copy[PERF_SAMPLE_MAX_SIZE];
124 struct perf_file_section secs[HEADER_FEAT_BITS];
125 struct guest_session guest_session;
129 struct list_head node;
131 union perf_event event[];
134 static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool,
135 struct machine *machine, u8 cpumode, u32 flags);
137 static int output_bytes(struct perf_inject *inject, void *buf, size_t sz)
141 size = perf_data__write(&inject->output, buf, sz);
145 inject->bytes_written += size;
149 static int perf_event__repipe_synth(struct perf_tool *tool,
150 union perf_event *event)
152 struct perf_inject *inject = container_of(tool, struct perf_inject,
155 return output_bytes(inject, event, event->header.size);
158 static int perf_event__repipe_oe_synth(struct perf_tool *tool,
159 union perf_event *event,
160 struct ordered_events *oe __maybe_unused)
162 return perf_event__repipe_synth(tool, event);
166 static int perf_event__drop_oe(struct perf_tool *tool __maybe_unused,
167 union perf_event *event __maybe_unused,
168 struct ordered_events *oe __maybe_unused)
174 static int perf_event__repipe_op2_synth(struct perf_session *session,
175 union perf_event *event)
177 return perf_event__repipe_synth(session->tool, event);
180 static int perf_event__repipe_op4_synth(struct perf_session *session,
181 union perf_event *event,
182 u64 data __maybe_unused,
183 const char *str __maybe_unused)
185 return perf_event__repipe_synth(session->tool, event);
188 static int perf_event__repipe_attr(struct perf_tool *tool,
189 union perf_event *event,
190 struct evlist **pevlist)
192 struct perf_inject *inject = container_of(tool, struct perf_inject,
196 ret = perf_event__process_attr(tool, event, pevlist);
200 if (!inject->is_pipe)
203 return perf_event__repipe_synth(tool, event);
206 static int perf_event__repipe_event_update(struct perf_tool *tool,
207 union perf_event *event,
208 struct evlist **pevlist __maybe_unused)
210 return perf_event__repipe_synth(tool, event);
213 #ifdef HAVE_AUXTRACE_SUPPORT
215 static int copy_bytes(struct perf_inject *inject, int fd, off_t size)
222 ssz = read(fd, buf, min(size, (off_t)sizeof(buf)));
225 ret = output_bytes(inject, buf, ssz);
234 static s64 perf_event__repipe_auxtrace(struct perf_session *session,
235 union perf_event *event)
237 struct perf_tool *tool = session->tool;
238 struct perf_inject *inject = container_of(tool, struct perf_inject,
242 inject->have_auxtrace = true;
244 if (!inject->output.is_pipe) {
247 offset = lseek(inject->output.file.fd, 0, SEEK_CUR);
250 ret = auxtrace_index__auxtrace_event(&session->auxtrace_index,
256 if (perf_data__is_pipe(session->data) || !session->one_mmap) {
257 ret = output_bytes(inject, event, event->header.size);
260 ret = copy_bytes(inject, perf_data__fd(session->data),
261 event->auxtrace.size);
263 ret = output_bytes(inject, event,
264 event->header.size + event->auxtrace.size);
269 return event->auxtrace.size;
275 perf_event__repipe_auxtrace(struct perf_session *session __maybe_unused,
276 union perf_event *event __maybe_unused)
278 pr_err("AUX area tracing not supported\n");
284 static int perf_event__repipe(struct perf_tool *tool,
285 union perf_event *event,
286 struct perf_sample *sample __maybe_unused,
287 struct machine *machine __maybe_unused)
289 return perf_event__repipe_synth(tool, event);
292 static int perf_event__drop(struct perf_tool *tool __maybe_unused,
293 union perf_event *event __maybe_unused,
294 struct perf_sample *sample __maybe_unused,
295 struct machine *machine __maybe_unused)
300 static int perf_event__drop_aux(struct perf_tool *tool,
301 union perf_event *event __maybe_unused,
302 struct perf_sample *sample,
303 struct machine *machine __maybe_unused)
305 struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
308 inject->aux_id = sample->id;
313 static union perf_event *
314 perf_inject__cut_auxtrace_sample(struct perf_inject *inject,
315 union perf_event *event,
316 struct perf_sample *sample)
318 size_t sz1 = sample->aux_sample.data - (void *)event;
319 size_t sz2 = event->header.size - sample->aux_sample.size - sz1;
320 union perf_event *ev = (union perf_event *)inject->event_copy;
322 if (sz1 > event->header.size || sz2 > event->header.size ||
323 sz1 + sz2 > event->header.size ||
324 sz1 < sizeof(struct perf_event_header) + sizeof(u64))
327 memcpy(ev, event, sz1);
328 memcpy((void *)ev + sz1, (void *)event + event->header.size - sz2, sz2);
329 ev->header.size = sz1 + sz2;
330 ((u64 *)((void *)ev + sz1))[-1] = 0;
335 typedef int (*inject_handler)(struct perf_tool *tool,
336 union perf_event *event,
337 struct perf_sample *sample,
339 struct machine *machine);
341 static int perf_event__repipe_sample(struct perf_tool *tool,
342 union perf_event *event,
343 struct perf_sample *sample,
345 struct machine *machine)
347 struct perf_inject *inject = container_of(tool, struct perf_inject,
350 if (evsel && evsel->handler) {
351 inject_handler f = evsel->handler;
352 return f(tool, event, sample, evsel, machine);
355 build_id__mark_dso_hit(tool, event, sample, evsel, machine);
357 if (inject->itrace_synth_opts.set && sample->aux_sample.size)
358 event = perf_inject__cut_auxtrace_sample(inject, event, sample);
360 return perf_event__repipe_synth(tool, event);
363 static int perf_event__repipe_mmap(struct perf_tool *tool,
364 union perf_event *event,
365 struct perf_sample *sample,
366 struct machine *machine)
370 err = perf_event__process_mmap(tool, event, sample, machine);
371 perf_event__repipe(tool, event, sample, machine);
377 static int perf_event__jit_repipe_mmap(struct perf_tool *tool,
378 union perf_event *event,
379 struct perf_sample *sample,
380 struct machine *machine)
382 struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
387 * if jit marker, then inject jit mmaps and generate ELF images
389 ret = jit_process(inject->session, &inject->output, machine,
390 event->mmap.filename, event->mmap.pid, event->mmap.tid, &n);
394 inject->bytes_written += n;
397 return perf_event__repipe_mmap(tool, event, sample, machine);
401 static struct dso *findnew_dso(int pid, int tid, const char *filename,
402 struct dso_id *id, struct machine *machine)
404 struct thread *thread;
405 struct nsinfo *nsi = NULL;
410 thread = machine__findnew_thread(machine, pid, tid);
411 if (thread == NULL) {
412 pr_err("cannot find or create a task %d/%d.\n", tid, pid);
416 vdso = is_vdso_map(filename);
417 nsi = nsinfo__get(thread->nsinfo);
420 /* The vdso maps are always on the host and not the
421 * container. Ensure that we don't use setns to look
424 nnsi = nsinfo__copy(nsi);
427 nsinfo__clear_need_setns(nnsi);
430 dso = machine__findnew_vdso(machine, thread);
432 dso = machine__findnew_dso_id(machine, filename, id);
436 nsinfo__put(dso->nsinfo);
445 static int perf_event__repipe_buildid_mmap(struct perf_tool *tool,
446 union perf_event *event,
447 struct perf_sample *sample,
448 struct machine *machine)
452 dso = findnew_dso(event->mmap.pid, event->mmap.tid,
453 event->mmap.filename, NULL, machine);
455 if (dso && !dso->hit) {
457 dso__inject_build_id(dso, tool, machine, sample->cpumode, 0);
461 return perf_event__repipe(tool, event, sample, machine);
464 static int perf_event__repipe_mmap2(struct perf_tool *tool,
465 union perf_event *event,
466 struct perf_sample *sample,
467 struct machine *machine)
471 err = perf_event__process_mmap2(tool, event, sample, machine);
472 perf_event__repipe(tool, event, sample, machine);
474 if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) {
477 dso = findnew_dso(event->mmap2.pid, event->mmap2.tid,
478 event->mmap2.filename, NULL, machine);
480 /* mark it not to inject build-id */
490 static int perf_event__jit_repipe_mmap2(struct perf_tool *tool,
491 union perf_event *event,
492 struct perf_sample *sample,
493 struct machine *machine)
495 struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
500 * if jit marker, then inject jit mmaps and generate ELF images
502 ret = jit_process(inject->session, &inject->output, machine,
503 event->mmap2.filename, event->mmap2.pid, event->mmap2.tid, &n);
507 inject->bytes_written += n;
510 return perf_event__repipe_mmap2(tool, event, sample, machine);
514 static int perf_event__repipe_buildid_mmap2(struct perf_tool *tool,
515 union perf_event *event,
516 struct perf_sample *sample,
517 struct machine *machine)
519 struct dso_id dso_id = {
520 .maj = event->mmap2.maj,
521 .min = event->mmap2.min,
522 .ino = event->mmap2.ino,
523 .ino_generation = event->mmap2.ino_generation,
527 if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) {
528 /* cannot use dso_id since it'd have invalid info */
529 dso = findnew_dso(event->mmap2.pid, event->mmap2.tid,
530 event->mmap2.filename, NULL, machine);
532 /* mark it not to inject build-id */
539 dso = findnew_dso(event->mmap2.pid, event->mmap2.tid,
540 event->mmap2.filename, &dso_id, machine);
542 if (dso && !dso->hit) {
544 dso__inject_build_id(dso, tool, machine, sample->cpumode,
549 perf_event__repipe(tool, event, sample, machine);
554 static int perf_event__repipe_fork(struct perf_tool *tool,
555 union perf_event *event,
556 struct perf_sample *sample,
557 struct machine *machine)
561 err = perf_event__process_fork(tool, event, sample, machine);
562 perf_event__repipe(tool, event, sample, machine);
567 static int perf_event__repipe_comm(struct perf_tool *tool,
568 union perf_event *event,
569 struct perf_sample *sample,
570 struct machine *machine)
574 err = perf_event__process_comm(tool, event, sample, machine);
575 perf_event__repipe(tool, event, sample, machine);
580 static int perf_event__repipe_namespaces(struct perf_tool *tool,
581 union perf_event *event,
582 struct perf_sample *sample,
583 struct machine *machine)
585 int err = perf_event__process_namespaces(tool, event, sample, machine);
587 perf_event__repipe(tool, event, sample, machine);
592 static int perf_event__repipe_exit(struct perf_tool *tool,
593 union perf_event *event,
594 struct perf_sample *sample,
595 struct machine *machine)
599 err = perf_event__process_exit(tool, event, sample, machine);
600 perf_event__repipe(tool, event, sample, machine);
605 static int perf_event__repipe_tracing_data(struct perf_session *session,
606 union perf_event *event)
608 perf_event__repipe_synth(session->tool, event);
610 return perf_event__process_tracing_data(session, event);
613 static int dso__read_build_id(struct dso *dso)
617 if (dso->has_build_id)
620 nsinfo__mountns_enter(dso->nsinfo, &nsc);
621 if (filename__read_build_id(dso->long_name, &dso->bid) > 0)
622 dso->has_build_id = true;
623 else if (dso->nsinfo) {
626 new_name = filename_with_chroot(dso->nsinfo->pid,
628 if (new_name && filename__read_build_id(new_name, &dso->bid) > 0)
629 dso->has_build_id = true;
632 nsinfo__mountns_exit(&nsc);
634 return dso->has_build_id ? 0 : -1;
637 static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool,
638 struct machine *machine, u8 cpumode, u32 flags)
642 if (is_anon_memory(dso->long_name) || flags & MAP_HUGETLB)
644 if (is_no_dso_memory(dso->long_name))
647 if (dso__read_build_id(dso) < 0) {
648 pr_debug("no build_id found for %s\n", dso->long_name);
652 err = perf_event__synthesize_build_id(tool, dso, cpumode,
653 perf_event__repipe, machine);
655 pr_err("Can't synthesize build_id event for %s\n", dso->long_name);
662 int perf_event__inject_buildid(struct perf_tool *tool, union perf_event *event,
663 struct perf_sample *sample,
664 struct evsel *evsel __maybe_unused,
665 struct machine *machine)
667 struct addr_location al;
668 struct thread *thread;
670 thread = machine__findnew_thread(machine, sample->pid, sample->tid);
671 if (thread == NULL) {
672 pr_err("problem processing %d event, skipping it.\n",
677 if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) {
678 if (!al.map->dso->hit) {
679 al.map->dso->hit = 1;
680 dso__inject_build_id(al.map->dso, tool, machine,
681 sample->cpumode, al.map->flags);
687 perf_event__repipe(tool, event, sample, machine);
691 static int perf_inject__sched_process_exit(struct perf_tool *tool,
692 union perf_event *event __maybe_unused,
693 struct perf_sample *sample,
694 struct evsel *evsel __maybe_unused,
695 struct machine *machine __maybe_unused)
697 struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
698 struct event_entry *ent;
700 list_for_each_entry(ent, &inject->samples, node) {
701 if (sample->tid == ent->tid) {
702 list_del_init(&ent->node);
711 static int perf_inject__sched_switch(struct perf_tool *tool,
712 union perf_event *event,
713 struct perf_sample *sample,
715 struct machine *machine)
717 struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
718 struct event_entry *ent;
720 perf_inject__sched_process_exit(tool, event, sample, evsel, machine);
722 ent = malloc(event->header.size + sizeof(struct event_entry));
724 color_fprintf(stderr, PERF_COLOR_RED,
725 "Not enough memory to process sched switch event!");
729 ent->tid = sample->tid;
730 memcpy(&ent->event, event, event->header.size);
731 list_add(&ent->node, &inject->samples);
735 static int perf_inject__sched_stat(struct perf_tool *tool,
736 union perf_event *event __maybe_unused,
737 struct perf_sample *sample,
739 struct machine *machine)
741 struct event_entry *ent;
742 union perf_event *event_sw;
743 struct perf_sample sample_sw;
744 struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
745 u32 pid = evsel__intval(evsel, sample, "pid");
747 list_for_each_entry(ent, &inject->samples, node) {
754 event_sw = &ent->event[0];
755 evsel__parse_sample(evsel, event_sw, &sample_sw);
757 sample_sw.period = sample->period;
758 sample_sw.time = sample->time;
759 perf_event__synthesize_sample(event_sw, evsel->core.attr.sample_type,
760 evsel->core.attr.read_format, &sample_sw);
761 build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine);
762 return perf_event__repipe(tool, event_sw, &sample_sw, machine);
765 static struct guest_vcpu *guest_session__vcpu(struct guest_session *gs, u32 vcpu)
767 if (realloc_array_as_needed(gs->vcpu, gs->vcpu_cnt, vcpu, NULL))
769 return &gs->vcpu[vcpu];
772 static int guest_session__output_bytes(struct guest_session *gs, void *buf, size_t sz)
774 ssize_t ret = writen(gs->tmp_fd, buf, sz);
776 return ret < 0 ? ret : 0;
779 static int guest_session__repipe(struct perf_tool *tool,
780 union perf_event *event,
781 struct perf_sample *sample __maybe_unused,
782 struct machine *machine __maybe_unused)
784 struct guest_session *gs = container_of(tool, struct guest_session, tool);
786 return guest_session__output_bytes(gs, event, event->header.size);
789 static int guest_session__map_tid(struct guest_session *gs, u32 tid, u32 vcpu)
791 struct guest_tid *guest_tid = zalloc(sizeof(*guest_tid));
797 guest_tid->tid = tid;
798 guest_tid->vcpu = vcpu;
799 hash = hash_32(guest_tid->tid, PERF_EVLIST__HLIST_BITS);
800 hlist_add_head(&guest_tid->node, &gs->tids[hash]);
805 static int host_peek_vm_comms_cb(struct perf_session *session __maybe_unused,
806 union perf_event *event,
807 u64 offset __maybe_unused, void *data)
809 struct guest_session *gs = data;
811 struct guest_vcpu *guest_vcpu;
814 if (event->header.type != PERF_RECORD_COMM ||
815 event->comm.pid != gs->machine_pid)
819 * QEMU option -name debug-threads=on, causes thread names formatted as
820 * below, although it is not an ABI. Also libvirt seems to use this by
821 * default. Here we rely on it to tell us which thread is which VCPU.
823 ret = sscanf(event->comm.comm, "CPU %u/KVM", &vcpu);
826 pr_debug("Found VCPU: tid %u comm %s vcpu %u\n",
827 event->comm.tid, event->comm.comm, vcpu);
828 if (vcpu > INT_MAX) {
829 pr_err("Invalid VCPU %u\n", vcpu);
832 guest_vcpu = guest_session__vcpu(gs, vcpu);
835 if (guest_vcpu->tid && guest_vcpu->tid != event->comm.tid) {
836 pr_err("Fatal error: Two threads found with the same VCPU\n");
839 guest_vcpu->tid = event->comm.tid;
841 return guest_session__map_tid(gs, event->comm.tid, vcpu);
844 static int host_peek_vm_comms(struct perf_session *session, struct guest_session *gs)
846 return perf_session__peek_events(session, session->header.data_offset,
847 session->header.data_size,
848 host_peek_vm_comms_cb, gs);
851 static bool evlist__is_id_used(struct evlist *evlist, u64 id)
853 return evlist__id2sid(evlist, id);
856 static u64 guest_session__allocate_new_id(struct guest_session *gs, struct evlist *host_evlist)
860 } while (!gs->highest_id || evlist__is_id_used(host_evlist, gs->highest_id));
862 return gs->highest_id;
865 static int guest_session__map_id(struct guest_session *gs, u64 id, u64 host_id, u32 vcpu)
867 struct guest_id *guest_id = zalloc(sizeof(*guest_id));
874 guest_id->host_id = host_id;
875 guest_id->vcpu = vcpu;
876 hash = hash_64(guest_id->id, PERF_EVLIST__HLIST_BITS);
877 hlist_add_head(&guest_id->node, &gs->heads[hash]);
882 static u64 evlist__find_highest_id(struct evlist *evlist)
887 evlist__for_each_entry(evlist, evsel) {
890 for (j = 0; j < evsel->core.ids; j++) {
891 u64 id = evsel->core.id[j];
901 static int guest_session__map_ids(struct guest_session *gs, struct evlist *host_evlist)
903 struct evlist *evlist = gs->session->evlist;
907 evlist__for_each_entry(evlist, evsel) {
910 for (j = 0; j < evsel->core.ids; j++) {
911 struct perf_sample_id *sid;
915 id = evsel->core.id[j];
916 sid = evlist__id2sid(evlist, id);
917 if (!sid || sid->cpu.cpu == -1)
919 host_id = guest_session__allocate_new_id(gs, host_evlist);
920 ret = guest_session__map_id(gs, id, host_id, sid->cpu.cpu);
929 static struct guest_id *guest_session__lookup_id(struct guest_session *gs, u64 id)
931 struct hlist_head *head;
932 struct guest_id *guest_id;
935 hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
936 head = &gs->heads[hash];
938 hlist_for_each_entry(guest_id, head, node)
939 if (guest_id->id == id)
945 static int process_attr(struct perf_tool *tool, union perf_event *event,
946 struct perf_sample *sample __maybe_unused,
947 struct machine *machine __maybe_unused)
949 struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
951 return perf_event__process_attr(tool, event, &inject->session->evlist);
954 static int guest_session__add_attr(struct guest_session *gs, struct evsel *evsel)
956 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
957 struct perf_event_attr attr = evsel->core.attr;
963 id_array = calloc(evsel->core.ids, sizeof(*id_array));
967 vcpu_array = calloc(evsel->core.ids, sizeof(*vcpu_array));
971 for (i = 0; i < evsel->core.ids; i++) {
972 u64 id = evsel->core.id[i];
973 struct guest_id *guest_id = guest_session__lookup_id(gs, id);
976 pr_err("Failed to find guest id %"PRIu64"\n", id);
980 id_array[i] = guest_id->host_id;
981 vcpu_array[i] = guest_id->vcpu;
984 attr.sample_type |= PERF_SAMPLE_IDENTIFIER;
985 attr.exclude_host = 1;
986 attr.exclude_guest = 0;
988 ret = perf_event__synthesize_attr(&inject->tool, &attr, evsel->core.ids,
989 id_array, process_attr);
991 pr_err("Failed to add guest attr.\n");
993 for (i = 0; i < evsel->core.ids; i++) {
994 struct perf_sample_id *sid;
995 u32 vcpu = vcpu_array[i];
997 sid = evlist__id2sid(inject->session->evlist, id_array[i]);
998 /* Guest event is per-thread from the host point of view */
1000 sid->tid = gs->vcpu[vcpu].tid;
1001 sid->machine_pid = gs->machine_pid;
1002 sid->vcpu.cpu = vcpu;
1010 static int guest_session__add_attrs(struct guest_session *gs)
1012 struct evlist *evlist = gs->session->evlist;
1013 struct evsel *evsel;
1016 evlist__for_each_entry(evlist, evsel) {
1017 ret = guest_session__add_attr(gs, evsel);
1025 static int synthesize_id_index(struct perf_inject *inject, size_t new_cnt)
1027 struct perf_session *session = inject->session;
1028 struct evlist *evlist = session->evlist;
1029 struct machine *machine = &session->machines.host;
1030 size_t from = evlist->core.nr_entries - new_cnt;
1032 return __perf_event__synthesize_id_index(&inject->tool, perf_event__repipe,
1033 evlist, machine, from);
1036 static struct guest_tid *guest_session__lookup_tid(struct guest_session *gs, u32 tid)
1038 struct hlist_head *head;
1039 struct guest_tid *guest_tid;
1042 hash = hash_32(tid, PERF_EVLIST__HLIST_BITS);
1043 head = &gs->tids[hash];
1045 hlist_for_each_entry(guest_tid, head, node)
1046 if (guest_tid->tid == tid)
1052 static bool dso__is_in_kernel_space(struct dso *dso)
1054 if (dso__is_vdso(dso))
1057 return dso__is_kcore(dso) ||
1059 is_kernel_module(dso->long_name, PERF_RECORD_MISC_CPUMODE_UNKNOWN);
1062 static u64 evlist__first_id(struct evlist *evlist)
1064 struct evsel *evsel;
1066 evlist__for_each_entry(evlist, evsel) {
1067 if (evsel->core.ids)
1068 return evsel->core.id[0];
1073 static int process_build_id(struct perf_tool *tool,
1074 union perf_event *event,
1075 struct perf_sample *sample __maybe_unused,
1076 struct machine *machine __maybe_unused)
1078 struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1080 return perf_event__process_build_id(inject->session, event);
1083 static int synthesize_build_id(struct perf_inject *inject, struct dso *dso, pid_t machine_pid)
1085 struct machine *machine = perf_session__findnew_machine(inject->session, machine_pid);
1086 u8 cpumode = dso__is_in_kernel_space(dso) ?
1087 PERF_RECORD_MISC_GUEST_KERNEL :
1088 PERF_RECORD_MISC_GUEST_USER;
1095 return perf_event__synthesize_build_id(&inject->tool, dso, cpumode,
1096 process_build_id, machine);
1099 static int guest_session__add_build_ids(struct guest_session *gs)
1101 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1102 struct machine *machine = &gs->session->machines.host;
1106 /* Build IDs will be put in the Build ID feature section */
1107 perf_header__set_feat(&inject->session->header, HEADER_BUILD_ID);
1109 dsos__for_each_with_build_id(dso, &machine->dsos.head) {
1110 ret = synthesize_build_id(inject, dso, gs->machine_pid);
1118 static int guest_session__ksymbol_event(struct perf_tool *tool,
1119 union perf_event *event,
1120 struct perf_sample *sample __maybe_unused,
1121 struct machine *machine __maybe_unused)
1123 struct guest_session *gs = container_of(tool, struct guest_session, tool);
1125 /* Only support out-of-line i.e. no BPF support */
1126 if (event->ksymbol.ksym_type != PERF_RECORD_KSYMBOL_TYPE_OOL)
1129 return guest_session__output_bytes(gs, event, event->header.size);
1132 static int guest_session__start(struct guest_session *gs, const char *name, bool force)
1134 char tmp_file_name[] = "/tmp/perf-inject-guest_session-XXXXXX";
1135 struct perf_session *session;
1138 /* Only these events will be injected */
1139 gs->tool.mmap = guest_session__repipe;
1140 gs->tool.mmap2 = guest_session__repipe;
1141 gs->tool.comm = guest_session__repipe;
1142 gs->tool.fork = guest_session__repipe;
1143 gs->tool.exit = guest_session__repipe;
1144 gs->tool.lost = guest_session__repipe;
1145 gs->tool.context_switch = guest_session__repipe;
1146 gs->tool.ksymbol = guest_session__ksymbol_event;
1147 gs->tool.text_poke = guest_session__repipe;
1149 * Processing a build ID creates a struct dso with that build ID. Later,
1150 * all guest dsos are iterated and the build IDs processed into the host
1151 * session where they will be output to the Build ID feature section
1152 * when the perf.data file header is written.
1154 gs->tool.build_id = perf_event__process_build_id;
1155 /* Process the id index to know what VCPU an ID belongs to */
1156 gs->tool.id_index = perf_event__process_id_index;
1158 gs->tool.ordered_events = true;
1159 gs->tool.ordering_requires_timestamps = true;
1161 gs->data.path = name;
1162 gs->data.force = force;
1163 gs->data.mode = PERF_DATA_MODE_READ;
1165 session = perf_session__new(&gs->data, &gs->tool);
1166 if (IS_ERR(session))
1167 return PTR_ERR(session);
1168 gs->session = session;
1171 * Initial events have zero'd ID samples. Get default ID sample size
1172 * used for removing them.
1174 gs->dflt_id_hdr_size = session->machines.host.id_hdr_size;
1175 /* And default ID for adding back a host-compatible ID sample */
1176 gs->dflt_id = evlist__first_id(session->evlist);
1178 pr_err("Guest data has no sample IDs");
1182 /* Temporary file for guest events */
1183 gs->tmp_file_name = strdup(tmp_file_name);
1184 if (!gs->tmp_file_name)
1186 gs->tmp_fd = mkstemp(gs->tmp_file_name);
1190 if (zstd_init(&gs->session->zstd_data, 0) < 0)
1191 pr_warning("Guest session decompression initialization failed.\n");
1194 * perf does not support processing 2 sessions simultaneously, so output
1195 * guest events to a temporary file.
1197 ret = perf_session__process_events(gs->session);
1201 if (lseek(gs->tmp_fd, 0, SEEK_SET))
1207 /* Free hlist nodes assuming hlist_node is the first member of hlist entries */
1208 static void free_hlist(struct hlist_head *heads, size_t hlist_sz)
1210 struct hlist_node *pos, *n;
1213 for (i = 0; i < hlist_sz; ++i) {
1214 hlist_for_each_safe(pos, n, &heads[i]) {
1221 static void guest_session__exit(struct guest_session *gs)
1224 perf_session__delete(gs->session);
1225 free_hlist(gs->heads, PERF_EVLIST__HLIST_SIZE);
1226 free_hlist(gs->tids, PERF_EVLIST__HLIST_SIZE);
1228 if (gs->tmp_file_name) {
1229 if (gs->tmp_fd >= 0)
1231 unlink(gs->tmp_file_name);
1232 free(gs->tmp_file_name);
1235 free(gs->perf_data_file);
1238 static void get_tsc_conv(struct perf_tsc_conversion *tc, struct perf_record_time_conv *time_conv)
1240 tc->time_shift = time_conv->time_shift;
1241 tc->time_mult = time_conv->time_mult;
1242 tc->time_zero = time_conv->time_zero;
1243 tc->time_cycles = time_conv->time_cycles;
1244 tc->time_mask = time_conv->time_mask;
1245 tc->cap_user_time_zero = time_conv->cap_user_time_zero;
1246 tc->cap_user_time_short = time_conv->cap_user_time_short;
1249 static void guest_session__get_tc(struct guest_session *gs)
1251 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1253 get_tsc_conv(&gs->host_tc, &inject->session->time_conv);
1254 get_tsc_conv(&gs->guest_tc, &gs->session->time_conv);
1257 static void guest_session__convert_time(struct guest_session *gs, u64 guest_time, u64 *host_time)
1266 if (gs->guest_tc.cap_user_time_zero)
1267 tsc = perf_time_to_tsc(guest_time, &gs->guest_tc);
1272 * This is the correct order of operations for x86 if the TSC Offset and
1273 * Multiplier values are used.
1275 tsc -= gs->time_offset;
1276 tsc /= gs->time_scale;
1278 if (gs->host_tc.cap_user_time_zero)
1279 *host_time = tsc_to_perf_time(tsc, &gs->host_tc);
1284 static int guest_session__fetch(struct guest_session *gs)
1286 void *buf = gs->ev.event_buf;
1287 struct perf_event_header *hdr = buf;
1288 size_t hdr_sz = sizeof(*hdr);
1291 ret = readn(gs->tmp_fd, buf, hdr_sz);
1296 /* Zero size means EOF */
1303 ret = readn(gs->tmp_fd, buf, hdr->size - hdr_sz);
1307 gs->ev.event = (union perf_event *)gs->ev.event_buf;
1308 gs->ev.sample.time = 0;
1310 if (hdr->type >= PERF_RECORD_USER_TYPE_START) {
1311 pr_err("Unexpected type fetching guest event");
1315 ret = evlist__parse_sample(gs->session->evlist, gs->ev.event, &gs->ev.sample);
1317 pr_err("Parse failed fetching guest event");
1322 guest_session__get_tc(gs);
1326 guest_session__convert_time(gs, gs->ev.sample.time, &gs->ev.sample.time);
1331 static int evlist__append_id_sample(struct evlist *evlist, union perf_event *ev,
1332 const struct perf_sample *sample)
1334 struct evsel *evsel;
1338 evsel = evlist__id2evsel(evlist, sample->id);
1342 pr_err("No evsel for id %"PRIu64"\n", sample->id);
1346 array += ev->header.size;
1347 ret = perf_event__synthesize_id_sample(array, evsel->core.attr.sample_type, sample);
1352 pr_err("Bad id sample size %d\n", ret);
1356 ev->header.size += ret;
1361 static int guest_session__inject_events(struct guest_session *gs, u64 timestamp)
1363 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1370 struct perf_sample *sample;
1371 struct guest_id *guest_id;
1372 union perf_event *ev;
1378 ret = guest_session__fetch(gs);
1385 sample = &gs->ev.sample;
1387 if (!ev->header.size)
1390 if (sample->time > timestamp)
1393 /* Change cpumode to guest */
1394 cpumode = ev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1395 if (cpumode & PERF_RECORD_MISC_USER)
1396 cpumode = PERF_RECORD_MISC_GUEST_USER;
1398 cpumode = PERF_RECORD_MISC_GUEST_KERNEL;
1399 ev->header.misc &= ~PERF_RECORD_MISC_CPUMODE_MASK;
1400 ev->header.misc |= cpumode;
1405 id_hdr_size = gs->dflt_id_hdr_size;
1407 struct evsel *evsel = evlist__id2evsel(gs->session->evlist, id);
1409 id_hdr_size = evsel__id_hdr_size(evsel);
1412 if (id_hdr_size & 7) {
1413 pr_err("Bad id_hdr_size %u\n", id_hdr_size);
1417 if (ev->header.size & 7) {
1418 pr_err("Bad event size %u\n", ev->header.size);
1422 /* Remove guest id sample */
1423 ev->header.size -= id_hdr_size;
1425 if (ev->header.size & 7) {
1426 pr_err("Bad raw event size %u\n", ev->header.size);
1430 guest_id = guest_session__lookup_id(gs, id);
1432 pr_err("Guest event with unknown id %llu\n",
1433 (unsigned long long)id);
1437 /* Change to host ID to avoid conflicting ID values */
1438 sample->id = guest_id->host_id;
1439 sample->stream_id = guest_id->host_id;
1441 if (sample->cpu != (u32)-1) {
1442 if (sample->cpu >= gs->vcpu_cnt) {
1443 pr_err("Guest event with unknown VCPU %u\n",
1447 /* Change to host CPU instead of guest VCPU */
1448 sample->cpu = gs->vcpu[sample->cpu].cpu;
1451 /* New id sample with new ID and CPU */
1452 ret = evlist__append_id_sample(inject->session->evlist, ev, sample);
1456 if (ev->header.size & 7) {
1457 pr_err("Bad new event size %u\n", ev->header.size);
1461 gs->fetched = false;
1463 ret = output_bytes(inject, ev, ev->header.size);
1469 static int guest_session__flush_events(struct guest_session *gs)
1471 return guest_session__inject_events(gs, -1);
1474 static int host__repipe(struct perf_tool *tool,
1475 union perf_event *event,
1476 struct perf_sample *sample,
1477 struct machine *machine)
1479 struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1482 ret = guest_session__inject_events(&inject->guest_session, sample->time);
1486 return perf_event__repipe(tool, event, sample, machine);
1489 static int host__finished_init(struct perf_session *session, union perf_event *event)
1491 struct perf_inject *inject = container_of(session->tool, struct perf_inject, tool);
1492 struct guest_session *gs = &inject->guest_session;
1496 * Peek through host COMM events to find QEMU threads and the VCPU they
1499 ret = host_peek_vm_comms(session, gs);
1503 if (!gs->vcpu_cnt) {
1504 pr_err("No VCPU theads found for pid %u\n", gs->machine_pid);
1509 * Allocate new (unused) host sample IDs and map them to the guest IDs.
1511 gs->highest_id = evlist__find_highest_id(session->evlist);
1512 ret = guest_session__map_ids(gs, session->evlist);
1516 ret = guest_session__add_attrs(gs);
1520 ret = synthesize_id_index(inject, gs->session->evlist->core.nr_entries);
1522 pr_err("Failed to synthesize id_index\n");
1526 ret = guest_session__add_build_ids(gs);
1528 pr_err("Failed to add guest build IDs\n");
1534 ret = guest_session__inject_events(gs, 0);
1538 return perf_event__repipe_op2_synth(session, event);
1542 * Obey finished-round ordering. The FINISHED_ROUND event is first processed
1543 * which flushes host events to file up until the last flush time. Then inject
1544 * guest events up to the same time. Finally write out the FINISHED_ROUND event
1547 static int host__finished_round(struct perf_tool *tool,
1548 union perf_event *event,
1549 struct ordered_events *oe)
1551 struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1552 int ret = perf_event__process_finished_round(tool, event, oe);
1553 u64 timestamp = ordered_events__last_flush_time(oe);
1558 ret = guest_session__inject_events(&inject->guest_session, timestamp);
1562 return perf_event__repipe_oe_synth(tool, event, oe);
1565 static int host__context_switch(struct perf_tool *tool,
1566 union perf_event *event,
1567 struct perf_sample *sample,
1568 struct machine *machine)
1570 struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1571 bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
1572 struct guest_session *gs = &inject->guest_session;
1573 u32 pid = event->context_switch.next_prev_pid;
1574 u32 tid = event->context_switch.next_prev_tid;
1575 struct guest_tid *guest_tid;
1578 if (out || pid != gs->machine_pid)
1581 guest_tid = guest_session__lookup_tid(gs, tid);
1585 if (sample->cpu == (u32)-1) {
1586 pr_err("Switch event does not have CPU\n");
1590 vcpu = guest_tid->vcpu;
1591 if (vcpu >= gs->vcpu_cnt)
1594 /* Guest is switching in, record which CPU the VCPU is now running on */
1595 gs->vcpu[vcpu].cpu = sample->cpu;
1597 return host__repipe(tool, event, sample, machine);
1600 static void sig_handler(int sig __maybe_unused)
1605 static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg)
1607 struct perf_event_attr *attr = &evsel->core.attr;
1608 const char *name = evsel__name(evsel);
1610 if (!(attr->sample_type & sample_type)) {
1611 pr_err("Samples for %s event do not have %s attribute set.",
1619 static int drop_sample(struct perf_tool *tool __maybe_unused,
1620 union perf_event *event __maybe_unused,
1621 struct perf_sample *sample __maybe_unused,
1622 struct evsel *evsel __maybe_unused,
1623 struct machine *machine __maybe_unused)
1628 static void strip_init(struct perf_inject *inject)
1630 struct evlist *evlist = inject->session->evlist;
1631 struct evsel *evsel;
1633 inject->tool.context_switch = perf_event__drop;
1635 evlist__for_each_entry(evlist, evsel)
1636 evsel->handler = drop_sample;
1639 static int parse_vm_time_correlation(const struct option *opt, const char *str, int unset)
1641 struct perf_inject *inject = opt->value;
1648 inject->itrace_synth_opts.set = true;
1649 inject->itrace_synth_opts.vm_time_correlation = true;
1650 inject->in_place_update = true;
1655 dry_run = skip_spaces(str);
1656 if (!strncmp(dry_run, "dry-run", strlen("dry-run"))) {
1657 inject->itrace_synth_opts.vm_tm_corr_dry_run = true;
1658 inject->in_place_update_dry_run = true;
1659 args = dry_run + strlen("dry-run");
1664 inject->itrace_synth_opts.vm_tm_corr_args = strdup(args);
1666 return inject->itrace_synth_opts.vm_tm_corr_args ? 0 : -ENOMEM;
1669 static int parse_guest_data(const struct option *opt, const char *str, int unset)
1671 struct perf_inject *inject = opt->value;
1672 struct guest_session *gs = &inject->guest_session;
1686 gs->perf_data_file = strsep(&s, ",");
1687 if (!gs->perf_data_file)
1690 gs->copy_kcore_dir = has_kcore_dir(gs->perf_data_file);
1691 if (gs->copy_kcore_dir)
1692 inject->output.is_dir = true;
1694 tok = strsep(&s, ",");
1697 gs->machine_pid = strtoul(tok, NULL, 0);
1698 if (!inject->guest_session.machine_pid)
1703 tok = strsep(&s, ",");
1706 gs->time_offset = strtoull(tok, NULL, 0);
1708 tok = strsep(&s, ",");
1711 gs->time_scale = strtod(tok, NULL);
1712 if (!gs->time_scale)
1718 pr_err("--guest-data option requires guest perf.data file name, "
1719 "guest machine PID, and optionally guest timestamp offset, "
1720 "and guest timestamp scale factor, separated by commas.\n");
1724 static int save_section_info_cb(struct perf_file_section *section,
1725 struct perf_header *ph __maybe_unused,
1726 int feat, int fd __maybe_unused, void *data)
1728 struct perf_inject *inject = data;
1730 inject->secs[feat] = *section;
1734 static int save_section_info(struct perf_inject *inject)
1736 struct perf_header *header = &inject->session->header;
1737 int fd = perf_data__fd(inject->session->data);
1739 return perf_header__process_sections(header, fd, inject, save_section_info_cb);
1742 static bool keep_feat(int feat)
1745 /* Keep original information that describes the machine or software */
1746 case HEADER_TRACING_DATA:
1747 case HEADER_HOSTNAME:
1748 case HEADER_OSRELEASE:
1749 case HEADER_VERSION:
1752 case HEADER_CPUDESC:
1754 case HEADER_TOTAL_MEM:
1755 case HEADER_CPU_TOPOLOGY:
1756 case HEADER_NUMA_TOPOLOGY:
1757 case HEADER_PMU_MAPPINGS:
1759 case HEADER_MEM_TOPOLOGY:
1760 case HEADER_CLOCKID:
1761 case HEADER_BPF_PROG_INFO:
1762 case HEADER_BPF_BTF:
1763 case HEADER_CPU_PMU_CAPS:
1764 case HEADER_CLOCK_DATA:
1765 case HEADER_HYBRID_TOPOLOGY:
1766 case HEADER_PMU_CAPS:
1768 /* Information that can be updated */
1769 case HEADER_BUILD_ID:
1770 case HEADER_CMDLINE:
1771 case HEADER_EVENT_DESC:
1772 case HEADER_BRANCH_STACK:
1773 case HEADER_GROUP_DESC:
1774 case HEADER_AUXTRACE:
1776 case HEADER_SAMPLE_TIME:
1777 case HEADER_DIR_FORMAT:
1778 case HEADER_COMPRESSED:
1784 static int read_file(int fd, u64 offs, void *buf, size_t sz)
1786 ssize_t ret = preadn(fd, buf, sz, offs);
1790 if ((size_t)ret != sz)
1795 static int feat_copy(struct perf_inject *inject, int feat, struct feat_writer *fw)
1797 int fd = perf_data__fd(inject->session->data);
1798 u64 offs = inject->secs[feat].offset;
1799 size_t sz = inject->secs[feat].size;
1800 void *buf = malloc(sz);
1806 ret = read_file(fd, offs, buf, sz);
1810 ret = fw->write(fw, buf, sz);
1817 struct feat_copier fc;
1818 struct perf_inject *inject;
1821 static int feat_copy_cb(struct feat_copier *fc, int feat, struct feat_writer *fw)
1823 struct inject_fc *inj_fc = container_of(fc, struct inject_fc, fc);
1824 struct perf_inject *inject = inj_fc->inject;
1827 if (!inject->secs[feat].offset ||
1831 ret = feat_copy(inject, feat, fw);
1835 return 1; /* Feature section copied */
1838 static int copy_kcore_dir(struct perf_inject *inject)
1843 ret = asprintf(&cmd, "cp -r -n %s/kcore_dir* %s >/dev/null 2>&1",
1844 inject->input_name, inject->output.path);
1847 pr_debug("%s\n", cmd);
1853 static int guest_session__copy_kcore_dir(struct guest_session *gs)
1855 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1859 ret = asprintf(&cmd, "cp -r -n %s/kcore_dir %s/kcore_dir__%u >/dev/null 2>&1",
1860 gs->perf_data_file, inject->output.path, gs->machine_pid);
1863 pr_debug("%s\n", cmd);
1869 static int output_fd(struct perf_inject *inject)
1871 return inject->in_place_update ? -1 : perf_data__fd(&inject->output);
1874 static int __cmd_inject(struct perf_inject *inject)
1877 struct guest_session *gs = &inject->guest_session;
1878 struct perf_session *session = inject->session;
1879 int fd = output_fd(inject);
1880 u64 output_data_offset;
1882 signal(SIGINT, sig_handler);
1884 if (inject->build_ids || inject->sched_stat ||
1885 inject->itrace_synth_opts.set || inject->build_id_all) {
1886 inject->tool.mmap = perf_event__repipe_mmap;
1887 inject->tool.mmap2 = perf_event__repipe_mmap2;
1888 inject->tool.fork = perf_event__repipe_fork;
1889 inject->tool.tracing_data = perf_event__repipe_tracing_data;
1892 output_data_offset = perf_session__data_offset(session->evlist);
1894 if (inject->build_id_all) {
1895 inject->tool.mmap = perf_event__repipe_buildid_mmap;
1896 inject->tool.mmap2 = perf_event__repipe_buildid_mmap2;
1897 } else if (inject->build_ids) {
1898 inject->tool.sample = perf_event__inject_buildid;
1899 } else if (inject->sched_stat) {
1900 struct evsel *evsel;
1902 evlist__for_each_entry(session->evlist, evsel) {
1903 const char *name = evsel__name(evsel);
1905 if (!strcmp(name, "sched:sched_switch")) {
1906 if (evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
1909 evsel->handler = perf_inject__sched_switch;
1910 } else if (!strcmp(name, "sched:sched_process_exit"))
1911 evsel->handler = perf_inject__sched_process_exit;
1912 else if (!strncmp(name, "sched:sched_stat_", 17))
1913 evsel->handler = perf_inject__sched_stat;
1915 } else if (inject->itrace_synth_opts.vm_time_correlation) {
1916 session->itrace_synth_opts = &inject->itrace_synth_opts;
1917 memset(&inject->tool, 0, sizeof(inject->tool));
1918 inject->tool.id_index = perf_event__process_id_index;
1919 inject->tool.auxtrace_info = perf_event__process_auxtrace_info;
1920 inject->tool.auxtrace = perf_event__process_auxtrace;
1921 inject->tool.auxtrace_error = perf_event__process_auxtrace_error;
1922 inject->tool.ordered_events = true;
1923 inject->tool.ordering_requires_timestamps = true;
1924 } else if (inject->itrace_synth_opts.set) {
1925 session->itrace_synth_opts = &inject->itrace_synth_opts;
1926 inject->itrace_synth_opts.inject = true;
1927 inject->tool.comm = perf_event__repipe_comm;
1928 inject->tool.namespaces = perf_event__repipe_namespaces;
1929 inject->tool.exit = perf_event__repipe_exit;
1930 inject->tool.id_index = perf_event__process_id_index;
1931 inject->tool.auxtrace_info = perf_event__process_auxtrace_info;
1932 inject->tool.auxtrace = perf_event__process_auxtrace;
1933 inject->tool.aux = perf_event__drop_aux;
1934 inject->tool.itrace_start = perf_event__drop_aux;
1935 inject->tool.aux_output_hw_id = perf_event__drop_aux;
1936 inject->tool.ordered_events = true;
1937 inject->tool.ordering_requires_timestamps = true;
1938 /* Allow space in the header for new attributes */
1939 output_data_offset = roundup(8192 + session->header.data_offset, 4096);
1942 } else if (gs->perf_data_file) {
1943 char *name = gs->perf_data_file;
1946 * Not strictly necessary, but keep these events in order wrt
1949 inject->tool.mmap = host__repipe;
1950 inject->tool.mmap2 = host__repipe;
1951 inject->tool.comm = host__repipe;
1952 inject->tool.fork = host__repipe;
1953 inject->tool.exit = host__repipe;
1954 inject->tool.lost = host__repipe;
1955 inject->tool.context_switch = host__repipe;
1956 inject->tool.ksymbol = host__repipe;
1957 inject->tool.text_poke = host__repipe;
1959 * Once the host session has initialized, set up sample ID
1960 * mapping and feed in guest attrs, build IDs and initial
1963 inject->tool.finished_init = host__finished_init;
1964 /* Obey finished round ordering */
1965 inject->tool.finished_round = host__finished_round,
1966 /* Keep track of which CPU a VCPU is runnng on */
1967 inject->tool.context_switch = host__context_switch;
1969 * Must order events to be able to obey finished round
1972 inject->tool.ordered_events = true;
1973 inject->tool.ordering_requires_timestamps = true;
1974 /* Set up a separate session to process guest perf.data file */
1975 ret = guest_session__start(gs, name, session->data->force);
1977 pr_err("Failed to process %s, error %d\n", name, ret);
1980 /* Allow space in the header for guest attributes */
1981 output_data_offset += gs->session->header.data_offset;
1982 output_data_offset = roundup(output_data_offset, 4096);
1985 if (!inject->itrace_synth_opts.set)
1986 auxtrace_index__free(&session->auxtrace_index);
1988 if (!inject->is_pipe && !inject->in_place_update)
1989 lseek(fd, output_data_offset, SEEK_SET);
1991 ret = perf_session__process_events(session);
1997 * Remaining guest events have later timestamps. Flush them
2000 ret = guest_session__flush_events(gs);
2002 pr_err("Failed to flush guest events\n");
2007 if (!inject->is_pipe && !inject->in_place_update) {
2008 struct inject_fc inj_fc = {
2009 .fc.copy = feat_copy_cb,
2013 if (inject->build_ids)
2014 perf_header__set_feat(&session->header,
2017 * Keep all buildids when there is unprocessed AUX data because
2018 * it is not known which ones the AUX trace hits.
2020 if (perf_header__has_feat(&session->header, HEADER_BUILD_ID) &&
2021 inject->have_auxtrace && !inject->itrace_synth_opts.set)
2022 dsos__hit_all(session);
2024 * The AUX areas have been removed and replaced with
2025 * synthesized hardware events, so clear the feature flag.
2027 if (inject->itrace_synth_opts.set) {
2028 perf_header__clear_feat(&session->header,
2030 if (inject->itrace_synth_opts.last_branch ||
2031 inject->itrace_synth_opts.add_last_branch)
2032 perf_header__set_feat(&session->header,
2033 HEADER_BRANCH_STACK);
2035 session->header.data_offset = output_data_offset;
2036 session->header.data_size = inject->bytes_written;
2037 perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc);
2039 if (inject->copy_kcore_dir) {
2040 ret = copy_kcore_dir(inject);
2042 pr_err("Failed to copy kcore\n");
2046 if (gs->copy_kcore_dir) {
2047 ret = guest_session__copy_kcore_dir(gs);
2049 pr_err("Failed to copy guest kcore\n");
2058 int cmd_inject(int argc, const char **argv)
2060 struct perf_inject inject = {
2062 .sample = perf_event__repipe_sample,
2063 .read = perf_event__repipe_sample,
2064 .mmap = perf_event__repipe,
2065 .mmap2 = perf_event__repipe,
2066 .comm = perf_event__repipe,
2067 .namespaces = perf_event__repipe,
2068 .cgroup = perf_event__repipe,
2069 .fork = perf_event__repipe,
2070 .exit = perf_event__repipe,
2071 .lost = perf_event__repipe,
2072 .lost_samples = perf_event__repipe,
2073 .aux = perf_event__repipe,
2074 .itrace_start = perf_event__repipe,
2075 .aux_output_hw_id = perf_event__repipe,
2076 .context_switch = perf_event__repipe,
2077 .throttle = perf_event__repipe,
2078 .unthrottle = perf_event__repipe,
2079 .ksymbol = perf_event__repipe,
2080 .bpf = perf_event__repipe,
2081 .text_poke = perf_event__repipe,
2082 .attr = perf_event__repipe_attr,
2083 .event_update = perf_event__repipe_event_update,
2084 .tracing_data = perf_event__repipe_op2_synth,
2085 .finished_round = perf_event__repipe_oe_synth,
2086 .build_id = perf_event__repipe_op2_synth,
2087 .id_index = perf_event__repipe_op2_synth,
2088 .auxtrace_info = perf_event__repipe_op2_synth,
2089 .auxtrace_error = perf_event__repipe_op2_synth,
2090 .time_conv = perf_event__repipe_op2_synth,
2091 .thread_map = perf_event__repipe_op2_synth,
2092 .cpu_map = perf_event__repipe_op2_synth,
2093 .stat_config = perf_event__repipe_op2_synth,
2094 .stat = perf_event__repipe_op2_synth,
2095 .stat_round = perf_event__repipe_op2_synth,
2096 .feature = perf_event__repipe_op2_synth,
2097 .finished_init = perf_event__repipe_op2_synth,
2098 .compressed = perf_event__repipe_op4_synth,
2099 .auxtrace = perf_event__repipe_auxtrace,
2102 .samples = LIST_HEAD_INIT(inject.samples),
2105 .mode = PERF_DATA_MODE_WRITE,
2109 struct perf_data data = {
2110 .mode = PERF_DATA_MODE_READ,
2116 struct option options[] = {
2117 OPT_BOOLEAN('b', "build-ids", &inject.build_ids,
2118 "Inject build-ids into the output stream"),
2119 OPT_BOOLEAN(0, "buildid-all", &inject.build_id_all,
2120 "Inject build-ids of all DSOs into the output stream"),
2121 OPT_STRING('i', "input", &inject.input_name, "file",
2123 OPT_STRING('o', "output", &inject.output.path, "file",
2124 "output file name"),
2125 OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat,
2126 "Merge sched-stat and sched-switch for getting events "
2127 "where and how long tasks slept"),
2129 OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"),
2131 OPT_INCR('v', "verbose", &verbose,
2132 "be more verbose (show build ids, etc)"),
2133 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
2134 "file", "vmlinux pathname"),
2135 OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux,
2136 "don't load vmlinux even if found"),
2137 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file",
2138 "kallsyms pathname"),
2139 OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"),
2140 OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts,
2141 NULL, "opts", "Instruction Tracing options\n"
2143 itrace_parse_synth_opts),
2144 OPT_BOOLEAN(0, "strip", &inject.strip,
2145 "strip non-synthesized events (use with --itrace)"),
2146 OPT_CALLBACK_OPTARG(0, "vm-time-correlation", &inject, NULL, "opts",
2147 "correlate time between VM guests and the host",
2148 parse_vm_time_correlation),
2149 OPT_CALLBACK_OPTARG(0, "guest-data", &inject, NULL, "opts",
2150 "inject events from a guest perf.data file",
2152 OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory",
2153 "guest mount directory under which every guest os"
2154 " instance has a subdir"),
2157 const char * const inject_usage[] = {
2158 "perf inject [<options>]",
2161 #ifndef HAVE_JITDUMP
2162 set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true);
2164 argc = parse_options(argc, argv, options, inject_usage, 0);
2167 * Any (unrecognized) arguments left?
2170 usage_with_options(inject_usage, options);
2172 if (inject.strip && !inject.itrace_synth_opts.set) {
2173 pr_err("--strip option requires --itrace option\n");
2177 if (symbol__validate_sym_arguments())
2180 if (inject.in_place_update) {
2181 if (!strcmp(inject.input_name, "-")) {
2182 pr_err("Input file name required for in-place updating\n");
2185 if (strcmp(inject.output.path, "-")) {
2186 pr_err("Output file name must not be specified for in-place updating\n");
2189 if (!data.force && !inject.in_place_update_dry_run) {
2190 pr_err("The input file would be updated in place, "
2191 "the --force option is required.\n");
2194 if (!inject.in_place_update_dry_run)
2195 data.in_place_update = true;
2197 if (strcmp(inject.output.path, "-") && !inject.strip &&
2198 has_kcore_dir(inject.input_name)) {
2199 inject.output.is_dir = true;
2200 inject.copy_kcore_dir = true;
2202 if (perf_data__open(&inject.output)) {
2203 perror("failed to create output file");
2208 data.path = inject.input_name;
2209 if (!strcmp(inject.input_name, "-") || inject.output.is_pipe) {
2210 inject.is_pipe = true;
2212 * Do not repipe header when input is a regular file
2213 * since either it can rewrite the header at the end
2214 * or write a new pipe header.
2216 if (strcmp(inject.input_name, "-"))
2220 inject.session = __perf_session__new(&data, repipe,
2223 if (IS_ERR(inject.session)) {
2224 ret = PTR_ERR(inject.session);
2225 goto out_close_output;
2228 if (zstd_init(&(inject.session->zstd_data), 0) < 0)
2229 pr_warning("Decompression initialization failed.\n");
2231 /* Save original section info before feature bits change */
2232 ret = save_section_info(&inject);
2236 if (!data.is_pipe && inject.output.is_pipe) {
2237 ret = perf_header__write_pipe(perf_data__fd(&inject.output));
2239 pr_err("Couldn't write a new pipe header.\n");
2243 ret = perf_event__synthesize_for_pipe(&inject.tool,
2246 perf_event__repipe);
2251 if (inject.build_ids && !inject.build_id_all) {
2253 * to make sure the mmap records are ordered correctly
2254 * and so that the correct especially due to jitted code
2255 * mmaps. We cannot generate the buildid hit list and
2256 * inject the jit mmaps at the same time for now.
2258 inject.tool.ordered_events = true;
2259 inject.tool.ordering_requires_timestamps = true;
2262 if (inject.sched_stat) {
2263 inject.tool.ordered_events = true;
2267 if (inject.jit_mode) {
2268 inject.tool.mmap2 = perf_event__jit_repipe_mmap2;
2269 inject.tool.mmap = perf_event__jit_repipe_mmap;
2270 inject.tool.ordered_events = true;
2271 inject.tool.ordering_requires_timestamps = true;
2273 * JIT MMAP injection injects all MMAP events in one go, so it
2274 * does not obey finished_round semantics.
2276 inject.tool.finished_round = perf_event__drop_oe;
2279 ret = symbol__init(&inject.session->header.env);
2283 ret = __cmd_inject(&inject);
2285 guest_session__exit(&inject.guest_session);
2288 zstd_fini(&(inject.session->zstd_data));
2289 perf_session__delete(inject.session);
2291 if (!inject.in_place_update)
2292 perf_data__close(&inject.output);
2293 free(inject.itrace_synth_opts.vm_tm_corr_args);