Merge branch 'linus' into x86/mm, to refresh the branch
[linux-2.6-microblaze.git] / tools / perf / builtin-inject.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-inject.c
4  *
5  * Builtin inject command: Examine the live mode (stdin) event stream
6  * and repipe it to stdout while optionally injecting additional
7  * events into it.
8  */
9 #include "builtin.h"
10
11 #include "util/color.h"
12 #include "util/dso.h"
13 #include "util/vdso.h"
14 #include "util/evlist.h"
15 #include "util/evsel.h"
16 #include "util/map.h"
17 #include "util/session.h"
18 #include "util/tool.h"
19 #include "util/debug.h"
20 #include "util/build-id.h"
21 #include "util/data.h"
22 #include "util/auxtrace.h"
23 #include "util/jit.h"
24 #include "util/symbol.h"
25 #include "util/synthetic-events.h"
26 #include "util/thread.h"
27 #include "util/namespaces.h"
28 #include "util/util.h"
29 #include "util/tsc.h"
30
31 #include <internal/lib.h>
32
33 #include <linux/err.h>
34 #include <subcmd/parse-options.h>
35 #include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
36
37 #include <linux/list.h>
38 #include <linux/string.h>
39 #include <linux/zalloc.h>
40 #include <linux/hash.h>
41 #include <errno.h>
42 #include <signal.h>
43 #include <inttypes.h>
44
45 struct guest_event {
46         struct perf_sample              sample;
47         union perf_event                *event;
48         char                            event_buf[PERF_SAMPLE_MAX_SIZE];
49 };
50
51 struct guest_id {
52         /* hlist_node must be first, see free_hlist() */
53         struct hlist_node               node;
54         u64                             id;
55         u64                             host_id;
56         u32                             vcpu;
57 };
58
59 struct guest_tid {
60         /* hlist_node must be first, see free_hlist() */
61         struct hlist_node               node;
62         /* Thread ID of QEMU thread */
63         u32                             tid;
64         u32                             vcpu;
65 };
66
67 struct guest_vcpu {
68         /* Current host CPU */
69         u32                             cpu;
70         /* Thread ID of QEMU thread */
71         u32                             tid;
72 };
73
74 struct guest_session {
75         char                            *perf_data_file;
76         u32                             machine_pid;
77         u64                             time_offset;
78         double                          time_scale;
79         struct perf_tool                tool;
80         struct perf_data                data;
81         struct perf_session             *session;
82         char                            *tmp_file_name;
83         int                             tmp_fd;
84         struct perf_tsc_conversion      host_tc;
85         struct perf_tsc_conversion      guest_tc;
86         bool                            copy_kcore_dir;
87         bool                            have_tc;
88         bool                            fetched;
89         bool                            ready;
90         u16                             dflt_id_hdr_size;
91         u64                             dflt_id;
92         u64                             highest_id;
93         /* Array of guest_vcpu */
94         struct guest_vcpu               *vcpu;
95         size_t                          vcpu_cnt;
96         /* Hash table for guest_id */
97         struct hlist_head               heads[PERF_EVLIST__HLIST_SIZE];
98         /* Hash table for guest_tid */
99         struct hlist_head               tids[PERF_EVLIST__HLIST_SIZE];
100         /* Place to stash next guest event */
101         struct guest_event              ev;
102 };
103
104 struct perf_inject {
105         struct perf_tool        tool;
106         struct perf_session     *session;
107         bool                    build_ids;
108         bool                    build_id_all;
109         bool                    sched_stat;
110         bool                    have_auxtrace;
111         bool                    strip;
112         bool                    jit_mode;
113         bool                    in_place_update;
114         bool                    in_place_update_dry_run;
115         bool                    is_pipe;
116         bool                    copy_kcore_dir;
117         const char              *input_name;
118         struct perf_data        output;
119         u64                     bytes_written;
120         u64                     aux_id;
121         struct list_head        samples;
122         struct itrace_synth_opts itrace_synth_opts;
123         char                    event_copy[PERF_SAMPLE_MAX_SIZE];
124         struct perf_file_section secs[HEADER_FEAT_BITS];
125         struct guest_session    guest_session;
126 };
127
128 struct event_entry {
129         struct list_head node;
130         u32              tid;
131         union perf_event event[];
132 };
133
134 static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool,
135                                 struct machine *machine, u8 cpumode, u32 flags);
136
137 static int output_bytes(struct perf_inject *inject, void *buf, size_t sz)
138 {
139         ssize_t size;
140
141         size = perf_data__write(&inject->output, buf, sz);
142         if (size < 0)
143                 return -errno;
144
145         inject->bytes_written += size;
146         return 0;
147 }
148
149 static int perf_event__repipe_synth(struct perf_tool *tool,
150                                     union perf_event *event)
151 {
152         struct perf_inject *inject = container_of(tool, struct perf_inject,
153                                                   tool);
154
155         return output_bytes(inject, event, event->header.size);
156 }
157
158 static int perf_event__repipe_oe_synth(struct perf_tool *tool,
159                                        union perf_event *event,
160                                        struct ordered_events *oe __maybe_unused)
161 {
162         return perf_event__repipe_synth(tool, event);
163 }
164
165 #ifdef HAVE_JITDUMP
166 static int perf_event__drop_oe(struct perf_tool *tool __maybe_unused,
167                                union perf_event *event __maybe_unused,
168                                struct ordered_events *oe __maybe_unused)
169 {
170         return 0;
171 }
172 #endif
173
174 static int perf_event__repipe_op2_synth(struct perf_session *session,
175                                         union perf_event *event)
176 {
177         return perf_event__repipe_synth(session->tool, event);
178 }
179
180 static int perf_event__repipe_op4_synth(struct perf_session *session,
181                                         union perf_event *event,
182                                         u64 data __maybe_unused,
183                                         const char *str __maybe_unused)
184 {
185         return perf_event__repipe_synth(session->tool, event);
186 }
187
188 static int perf_event__repipe_attr(struct perf_tool *tool,
189                                    union perf_event *event,
190                                    struct evlist **pevlist)
191 {
192         struct perf_inject *inject = container_of(tool, struct perf_inject,
193                                                   tool);
194         int ret;
195
196         ret = perf_event__process_attr(tool, event, pevlist);
197         if (ret)
198                 return ret;
199
200         if (!inject->is_pipe)
201                 return 0;
202
203         return perf_event__repipe_synth(tool, event);
204 }
205
206 static int perf_event__repipe_event_update(struct perf_tool *tool,
207                                            union perf_event *event,
208                                            struct evlist **pevlist __maybe_unused)
209 {
210         return perf_event__repipe_synth(tool, event);
211 }
212
213 #ifdef HAVE_AUXTRACE_SUPPORT
214
215 static int copy_bytes(struct perf_inject *inject, int fd, off_t size)
216 {
217         char buf[4096];
218         ssize_t ssz;
219         int ret;
220
221         while (size > 0) {
222                 ssz = read(fd, buf, min(size, (off_t)sizeof(buf)));
223                 if (ssz < 0)
224                         return -errno;
225                 ret = output_bytes(inject, buf, ssz);
226                 if (ret)
227                         return ret;
228                 size -= ssz;
229         }
230
231         return 0;
232 }
233
234 static s64 perf_event__repipe_auxtrace(struct perf_session *session,
235                                        union perf_event *event)
236 {
237         struct perf_tool *tool = session->tool;
238         struct perf_inject *inject = container_of(tool, struct perf_inject,
239                                                   tool);
240         int ret;
241
242         inject->have_auxtrace = true;
243
244         if (!inject->output.is_pipe) {
245                 off_t offset;
246
247                 offset = lseek(inject->output.file.fd, 0, SEEK_CUR);
248                 if (offset == -1)
249                         return -errno;
250                 ret = auxtrace_index__auxtrace_event(&session->auxtrace_index,
251                                                      event, offset);
252                 if (ret < 0)
253                         return ret;
254         }
255
256         if (perf_data__is_pipe(session->data) || !session->one_mmap) {
257                 ret = output_bytes(inject, event, event->header.size);
258                 if (ret < 0)
259                         return ret;
260                 ret = copy_bytes(inject, perf_data__fd(session->data),
261                                  event->auxtrace.size);
262         } else {
263                 ret = output_bytes(inject, event,
264                                    event->header.size + event->auxtrace.size);
265         }
266         if (ret < 0)
267                 return ret;
268
269         return event->auxtrace.size;
270 }
271
272 #else
273
274 static s64
275 perf_event__repipe_auxtrace(struct perf_session *session __maybe_unused,
276                             union perf_event *event __maybe_unused)
277 {
278         pr_err("AUX area tracing not supported\n");
279         return -EINVAL;
280 }
281
282 #endif
283
284 static int perf_event__repipe(struct perf_tool *tool,
285                               union perf_event *event,
286                               struct perf_sample *sample __maybe_unused,
287                               struct machine *machine __maybe_unused)
288 {
289         return perf_event__repipe_synth(tool, event);
290 }
291
292 static int perf_event__drop(struct perf_tool *tool __maybe_unused,
293                             union perf_event *event __maybe_unused,
294                             struct perf_sample *sample __maybe_unused,
295                             struct machine *machine __maybe_unused)
296 {
297         return 0;
298 }
299
300 static int perf_event__drop_aux(struct perf_tool *tool,
301                                 union perf_event *event __maybe_unused,
302                                 struct perf_sample *sample,
303                                 struct machine *machine __maybe_unused)
304 {
305         struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
306
307         if (!inject->aux_id)
308                 inject->aux_id = sample->id;
309
310         return 0;
311 }
312
313 static union perf_event *
314 perf_inject__cut_auxtrace_sample(struct perf_inject *inject,
315                                  union perf_event *event,
316                                  struct perf_sample *sample)
317 {
318         size_t sz1 = sample->aux_sample.data - (void *)event;
319         size_t sz2 = event->header.size - sample->aux_sample.size - sz1;
320         union perf_event *ev = (union perf_event *)inject->event_copy;
321
322         if (sz1 > event->header.size || sz2 > event->header.size ||
323             sz1 + sz2 > event->header.size ||
324             sz1 < sizeof(struct perf_event_header) + sizeof(u64))
325                 return event;
326
327         memcpy(ev, event, sz1);
328         memcpy((void *)ev + sz1, (void *)event + event->header.size - sz2, sz2);
329         ev->header.size = sz1 + sz2;
330         ((u64 *)((void *)ev + sz1))[-1] = 0;
331
332         return ev;
333 }
334
335 typedef int (*inject_handler)(struct perf_tool *tool,
336                               union perf_event *event,
337                               struct perf_sample *sample,
338                               struct evsel *evsel,
339                               struct machine *machine);
340
341 static int perf_event__repipe_sample(struct perf_tool *tool,
342                                      union perf_event *event,
343                                      struct perf_sample *sample,
344                                      struct evsel *evsel,
345                                      struct machine *machine)
346 {
347         struct perf_inject *inject = container_of(tool, struct perf_inject,
348                                                   tool);
349
350         if (evsel && evsel->handler) {
351                 inject_handler f = evsel->handler;
352                 return f(tool, event, sample, evsel, machine);
353         }
354
355         build_id__mark_dso_hit(tool, event, sample, evsel, machine);
356
357         if (inject->itrace_synth_opts.set && sample->aux_sample.size)
358                 event = perf_inject__cut_auxtrace_sample(inject, event, sample);
359
360         return perf_event__repipe_synth(tool, event);
361 }
362
363 static int perf_event__repipe_mmap(struct perf_tool *tool,
364                                    union perf_event *event,
365                                    struct perf_sample *sample,
366                                    struct machine *machine)
367 {
368         int err;
369
370         err = perf_event__process_mmap(tool, event, sample, machine);
371         perf_event__repipe(tool, event, sample, machine);
372
373         return err;
374 }
375
376 #ifdef HAVE_JITDUMP
377 static int perf_event__jit_repipe_mmap(struct perf_tool *tool,
378                                        union perf_event *event,
379                                        struct perf_sample *sample,
380                                        struct machine *machine)
381 {
382         struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
383         u64 n = 0;
384         int ret;
385
386         /*
387          * if jit marker, then inject jit mmaps and generate ELF images
388          */
389         ret = jit_process(inject->session, &inject->output, machine,
390                           event->mmap.filename, event->mmap.pid, event->mmap.tid, &n);
391         if (ret < 0)
392                 return ret;
393         if (ret) {
394                 inject->bytes_written += n;
395                 return 0;
396         }
397         return perf_event__repipe_mmap(tool, event, sample, machine);
398 }
399 #endif
400
401 static struct dso *findnew_dso(int pid, int tid, const char *filename,
402                                struct dso_id *id, struct machine *machine)
403 {
404         struct thread *thread;
405         struct nsinfo *nsi = NULL;
406         struct nsinfo *nnsi;
407         struct dso *dso;
408         bool vdso;
409
410         thread = machine__findnew_thread(machine, pid, tid);
411         if (thread == NULL) {
412                 pr_err("cannot find or create a task %d/%d.\n", tid, pid);
413                 return NULL;
414         }
415
416         vdso = is_vdso_map(filename);
417         nsi = nsinfo__get(thread->nsinfo);
418
419         if (vdso) {
420                 /* The vdso maps are always on the host and not the
421                  * container.  Ensure that we don't use setns to look
422                  * them up.
423                  */
424                 nnsi = nsinfo__copy(nsi);
425                 if (nnsi) {
426                         nsinfo__put(nsi);
427                         nsinfo__clear_need_setns(nnsi);
428                         nsi = nnsi;
429                 }
430                 dso = machine__findnew_vdso(machine, thread);
431         } else {
432                 dso = machine__findnew_dso_id(machine, filename, id);
433         }
434
435         if (dso) {
436                 nsinfo__put(dso->nsinfo);
437                 dso->nsinfo = nsi;
438         } else
439                 nsinfo__put(nsi);
440
441         thread__put(thread);
442         return dso;
443 }
444
445 static int perf_event__repipe_buildid_mmap(struct perf_tool *tool,
446                                            union perf_event *event,
447                                            struct perf_sample *sample,
448                                            struct machine *machine)
449 {
450         struct dso *dso;
451
452         dso = findnew_dso(event->mmap.pid, event->mmap.tid,
453                           event->mmap.filename, NULL, machine);
454
455         if (dso && !dso->hit) {
456                 dso->hit = 1;
457                 dso__inject_build_id(dso, tool, machine, sample->cpumode, 0);
458         }
459         dso__put(dso);
460
461         return perf_event__repipe(tool, event, sample, machine);
462 }
463
464 static int perf_event__repipe_mmap2(struct perf_tool *tool,
465                                    union perf_event *event,
466                                    struct perf_sample *sample,
467                                    struct machine *machine)
468 {
469         int err;
470
471         err = perf_event__process_mmap2(tool, event, sample, machine);
472         perf_event__repipe(tool, event, sample, machine);
473
474         if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) {
475                 struct dso *dso;
476
477                 dso = findnew_dso(event->mmap2.pid, event->mmap2.tid,
478                                   event->mmap2.filename, NULL, machine);
479                 if (dso) {
480                         /* mark it not to inject build-id */
481                         dso->hit = 1;
482                 }
483                 dso__put(dso);
484         }
485
486         return err;
487 }
488
489 #ifdef HAVE_JITDUMP
490 static int perf_event__jit_repipe_mmap2(struct perf_tool *tool,
491                                         union perf_event *event,
492                                         struct perf_sample *sample,
493                                         struct machine *machine)
494 {
495         struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
496         u64 n = 0;
497         int ret;
498
499         /*
500          * if jit marker, then inject jit mmaps and generate ELF images
501          */
502         ret = jit_process(inject->session, &inject->output, machine,
503                           event->mmap2.filename, event->mmap2.pid, event->mmap2.tid, &n);
504         if (ret < 0)
505                 return ret;
506         if (ret) {
507                 inject->bytes_written += n;
508                 return 0;
509         }
510         return perf_event__repipe_mmap2(tool, event, sample, machine);
511 }
512 #endif
513
514 static int perf_event__repipe_buildid_mmap2(struct perf_tool *tool,
515                                             union perf_event *event,
516                                             struct perf_sample *sample,
517                                             struct machine *machine)
518 {
519         struct dso_id dso_id = {
520                 .maj = event->mmap2.maj,
521                 .min = event->mmap2.min,
522                 .ino = event->mmap2.ino,
523                 .ino_generation = event->mmap2.ino_generation,
524         };
525         struct dso *dso;
526
527         if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) {
528                 /* cannot use dso_id since it'd have invalid info */
529                 dso = findnew_dso(event->mmap2.pid, event->mmap2.tid,
530                                   event->mmap2.filename, NULL, machine);
531                 if (dso) {
532                         /* mark it not to inject build-id */
533                         dso->hit = 1;
534                 }
535                 dso__put(dso);
536                 return 0;
537         }
538
539         dso = findnew_dso(event->mmap2.pid, event->mmap2.tid,
540                           event->mmap2.filename, &dso_id, machine);
541
542         if (dso && !dso->hit) {
543                 dso->hit = 1;
544                 dso__inject_build_id(dso, tool, machine, sample->cpumode,
545                                      event->mmap2.flags);
546         }
547         dso__put(dso);
548
549         perf_event__repipe(tool, event, sample, machine);
550
551         return 0;
552 }
553
554 static int perf_event__repipe_fork(struct perf_tool *tool,
555                                    union perf_event *event,
556                                    struct perf_sample *sample,
557                                    struct machine *machine)
558 {
559         int err;
560
561         err = perf_event__process_fork(tool, event, sample, machine);
562         perf_event__repipe(tool, event, sample, machine);
563
564         return err;
565 }
566
567 static int perf_event__repipe_comm(struct perf_tool *tool,
568                                    union perf_event *event,
569                                    struct perf_sample *sample,
570                                    struct machine *machine)
571 {
572         int err;
573
574         err = perf_event__process_comm(tool, event, sample, machine);
575         perf_event__repipe(tool, event, sample, machine);
576
577         return err;
578 }
579
580 static int perf_event__repipe_namespaces(struct perf_tool *tool,
581                                          union perf_event *event,
582                                          struct perf_sample *sample,
583                                          struct machine *machine)
584 {
585         int err = perf_event__process_namespaces(tool, event, sample, machine);
586
587         perf_event__repipe(tool, event, sample, machine);
588
589         return err;
590 }
591
592 static int perf_event__repipe_exit(struct perf_tool *tool,
593                                    union perf_event *event,
594                                    struct perf_sample *sample,
595                                    struct machine *machine)
596 {
597         int err;
598
599         err = perf_event__process_exit(tool, event, sample, machine);
600         perf_event__repipe(tool, event, sample, machine);
601
602         return err;
603 }
604
605 static int perf_event__repipe_tracing_data(struct perf_session *session,
606                                            union perf_event *event)
607 {
608         perf_event__repipe_synth(session->tool, event);
609
610         return perf_event__process_tracing_data(session, event);
611 }
612
613 static int dso__read_build_id(struct dso *dso)
614 {
615         struct nscookie nsc;
616
617         if (dso->has_build_id)
618                 return 0;
619
620         nsinfo__mountns_enter(dso->nsinfo, &nsc);
621         if (filename__read_build_id(dso->long_name, &dso->bid) > 0)
622                 dso->has_build_id = true;
623         else if (dso->nsinfo) {
624                 char *new_name;
625
626                 new_name = filename_with_chroot(dso->nsinfo->pid,
627                                                 dso->long_name);
628                 if (new_name && filename__read_build_id(new_name, &dso->bid) > 0)
629                         dso->has_build_id = true;
630                 free(new_name);
631         }
632         nsinfo__mountns_exit(&nsc);
633
634         return dso->has_build_id ? 0 : -1;
635 }
636
637 static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool,
638                                 struct machine *machine, u8 cpumode, u32 flags)
639 {
640         int err;
641
642         if (is_anon_memory(dso->long_name) || flags & MAP_HUGETLB)
643                 return 0;
644         if (is_no_dso_memory(dso->long_name))
645                 return 0;
646
647         if (dso__read_build_id(dso) < 0) {
648                 pr_debug("no build_id found for %s\n", dso->long_name);
649                 return -1;
650         }
651
652         err = perf_event__synthesize_build_id(tool, dso, cpumode,
653                                               perf_event__repipe, machine);
654         if (err) {
655                 pr_err("Can't synthesize build_id event for %s\n", dso->long_name);
656                 return -1;
657         }
658
659         return 0;
660 }
661
662 int perf_event__inject_buildid(struct perf_tool *tool, union perf_event *event,
663                                struct perf_sample *sample,
664                                struct evsel *evsel __maybe_unused,
665                                struct machine *machine)
666 {
667         struct addr_location al;
668         struct thread *thread;
669
670         thread = machine__findnew_thread(machine, sample->pid, sample->tid);
671         if (thread == NULL) {
672                 pr_err("problem processing %d event, skipping it.\n",
673                        event->header.type);
674                 goto repipe;
675         }
676
677         if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) {
678                 if (!al.map->dso->hit) {
679                         al.map->dso->hit = 1;
680                         dso__inject_build_id(al.map->dso, tool, machine,
681                                              sample->cpumode, al.map->flags);
682                 }
683         }
684
685         thread__put(thread);
686 repipe:
687         perf_event__repipe(tool, event, sample, machine);
688         return 0;
689 }
690
691 static int perf_inject__sched_process_exit(struct perf_tool *tool,
692                                            union perf_event *event __maybe_unused,
693                                            struct perf_sample *sample,
694                                            struct evsel *evsel __maybe_unused,
695                                            struct machine *machine __maybe_unused)
696 {
697         struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
698         struct event_entry *ent;
699
700         list_for_each_entry(ent, &inject->samples, node) {
701                 if (sample->tid == ent->tid) {
702                         list_del_init(&ent->node);
703                         free(ent);
704                         break;
705                 }
706         }
707
708         return 0;
709 }
710
711 static int perf_inject__sched_switch(struct perf_tool *tool,
712                                      union perf_event *event,
713                                      struct perf_sample *sample,
714                                      struct evsel *evsel,
715                                      struct machine *machine)
716 {
717         struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
718         struct event_entry *ent;
719
720         perf_inject__sched_process_exit(tool, event, sample, evsel, machine);
721
722         ent = malloc(event->header.size + sizeof(struct event_entry));
723         if (ent == NULL) {
724                 color_fprintf(stderr, PERF_COLOR_RED,
725                              "Not enough memory to process sched switch event!");
726                 return -1;
727         }
728
729         ent->tid = sample->tid;
730         memcpy(&ent->event, event, event->header.size);
731         list_add(&ent->node, &inject->samples);
732         return 0;
733 }
734
735 static int perf_inject__sched_stat(struct perf_tool *tool,
736                                    union perf_event *event __maybe_unused,
737                                    struct perf_sample *sample,
738                                    struct evsel *evsel,
739                                    struct machine *machine)
740 {
741         struct event_entry *ent;
742         union perf_event *event_sw;
743         struct perf_sample sample_sw;
744         struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
745         u32 pid = evsel__intval(evsel, sample, "pid");
746
747         list_for_each_entry(ent, &inject->samples, node) {
748                 if (pid == ent->tid)
749                         goto found;
750         }
751
752         return 0;
753 found:
754         event_sw = &ent->event[0];
755         evsel__parse_sample(evsel, event_sw, &sample_sw);
756
757         sample_sw.period = sample->period;
758         sample_sw.time   = sample->time;
759         perf_event__synthesize_sample(event_sw, evsel->core.attr.sample_type,
760                                       evsel->core.attr.read_format, &sample_sw);
761         build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine);
762         return perf_event__repipe(tool, event_sw, &sample_sw, machine);
763 }
764
765 static struct guest_vcpu *guest_session__vcpu(struct guest_session *gs, u32 vcpu)
766 {
767         if (realloc_array_as_needed(gs->vcpu, gs->vcpu_cnt, vcpu, NULL))
768                 return NULL;
769         return &gs->vcpu[vcpu];
770 }
771
772 static int guest_session__output_bytes(struct guest_session *gs, void *buf, size_t sz)
773 {
774         ssize_t ret = writen(gs->tmp_fd, buf, sz);
775
776         return ret < 0 ? ret : 0;
777 }
778
779 static int guest_session__repipe(struct perf_tool *tool,
780                                  union perf_event *event,
781                                  struct perf_sample *sample __maybe_unused,
782                                  struct machine *machine __maybe_unused)
783 {
784         struct guest_session *gs = container_of(tool, struct guest_session, tool);
785
786         return guest_session__output_bytes(gs, event, event->header.size);
787 }
788
789 static int guest_session__map_tid(struct guest_session *gs, u32 tid, u32 vcpu)
790 {
791         struct guest_tid *guest_tid = zalloc(sizeof(*guest_tid));
792         int hash;
793
794         if (!guest_tid)
795                 return -ENOMEM;
796
797         guest_tid->tid = tid;
798         guest_tid->vcpu = vcpu;
799         hash = hash_32(guest_tid->tid, PERF_EVLIST__HLIST_BITS);
800         hlist_add_head(&guest_tid->node, &gs->tids[hash]);
801
802         return 0;
803 }
804
805 static int host_peek_vm_comms_cb(struct perf_session *session __maybe_unused,
806                                  union perf_event *event,
807                                  u64 offset __maybe_unused, void *data)
808 {
809         struct guest_session *gs = data;
810         unsigned int vcpu;
811         struct guest_vcpu *guest_vcpu;
812         int ret;
813
814         if (event->header.type != PERF_RECORD_COMM ||
815             event->comm.pid != gs->machine_pid)
816                 return 0;
817
818         /*
819          * QEMU option -name debug-threads=on, causes thread names formatted as
820          * below, although it is not an ABI. Also libvirt seems to use this by
821          * default. Here we rely on it to tell us which thread is which VCPU.
822          */
823         ret = sscanf(event->comm.comm, "CPU %u/KVM", &vcpu);
824         if (ret <= 0)
825                 return ret;
826         pr_debug("Found VCPU: tid %u comm %s vcpu %u\n",
827                  event->comm.tid, event->comm.comm, vcpu);
828         if (vcpu > INT_MAX) {
829                 pr_err("Invalid VCPU %u\n", vcpu);
830                 return -EINVAL;
831         }
832         guest_vcpu = guest_session__vcpu(gs, vcpu);
833         if (!guest_vcpu)
834                 return -ENOMEM;
835         if (guest_vcpu->tid && guest_vcpu->tid != event->comm.tid) {
836                 pr_err("Fatal error: Two threads found with the same VCPU\n");
837                 return -EINVAL;
838         }
839         guest_vcpu->tid = event->comm.tid;
840
841         return guest_session__map_tid(gs, event->comm.tid, vcpu);
842 }
843
844 static int host_peek_vm_comms(struct perf_session *session, struct guest_session *gs)
845 {
846         return perf_session__peek_events(session, session->header.data_offset,
847                                          session->header.data_size,
848                                          host_peek_vm_comms_cb, gs);
849 }
850
851 static bool evlist__is_id_used(struct evlist *evlist, u64 id)
852 {
853         return evlist__id2sid(evlist, id);
854 }
855
856 static u64 guest_session__allocate_new_id(struct guest_session *gs, struct evlist *host_evlist)
857 {
858         do {
859                 gs->highest_id += 1;
860         } while (!gs->highest_id || evlist__is_id_used(host_evlist, gs->highest_id));
861
862         return gs->highest_id;
863 }
864
865 static int guest_session__map_id(struct guest_session *gs, u64 id, u64 host_id, u32 vcpu)
866 {
867         struct guest_id *guest_id = zalloc(sizeof(*guest_id));
868         int hash;
869
870         if (!guest_id)
871                 return -ENOMEM;
872
873         guest_id->id = id;
874         guest_id->host_id = host_id;
875         guest_id->vcpu = vcpu;
876         hash = hash_64(guest_id->id, PERF_EVLIST__HLIST_BITS);
877         hlist_add_head(&guest_id->node, &gs->heads[hash]);
878
879         return 0;
880 }
881
882 static u64 evlist__find_highest_id(struct evlist *evlist)
883 {
884         struct evsel *evsel;
885         u64 highest_id = 1;
886
887         evlist__for_each_entry(evlist, evsel) {
888                 u32 j;
889
890                 for (j = 0; j < evsel->core.ids; j++) {
891                         u64 id = evsel->core.id[j];
892
893                         if (id > highest_id)
894                                 highest_id = id;
895                 }
896         }
897
898         return highest_id;
899 }
900
901 static int guest_session__map_ids(struct guest_session *gs, struct evlist *host_evlist)
902 {
903         struct evlist *evlist = gs->session->evlist;
904         struct evsel *evsel;
905         int ret;
906
907         evlist__for_each_entry(evlist, evsel) {
908                 u32 j;
909
910                 for (j = 0; j < evsel->core.ids; j++) {
911                         struct perf_sample_id *sid;
912                         u64 host_id;
913                         u64 id;
914
915                         id = evsel->core.id[j];
916                         sid = evlist__id2sid(evlist, id);
917                         if (!sid || sid->cpu.cpu == -1)
918                                 continue;
919                         host_id = guest_session__allocate_new_id(gs, host_evlist);
920                         ret = guest_session__map_id(gs, id, host_id, sid->cpu.cpu);
921                         if (ret)
922                                 return ret;
923                 }
924         }
925
926         return 0;
927 }
928
929 static struct guest_id *guest_session__lookup_id(struct guest_session *gs, u64 id)
930 {
931         struct hlist_head *head;
932         struct guest_id *guest_id;
933         int hash;
934
935         hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
936         head = &gs->heads[hash];
937
938         hlist_for_each_entry(guest_id, head, node)
939                 if (guest_id->id == id)
940                         return guest_id;
941
942         return NULL;
943 }
944
945 static int process_attr(struct perf_tool *tool, union perf_event *event,
946                         struct perf_sample *sample __maybe_unused,
947                         struct machine *machine __maybe_unused)
948 {
949         struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
950
951         return perf_event__process_attr(tool, event, &inject->session->evlist);
952 }
953
954 static int guest_session__add_attr(struct guest_session *gs, struct evsel *evsel)
955 {
956         struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
957         struct perf_event_attr attr = evsel->core.attr;
958         u64 *id_array;
959         u32 *vcpu_array;
960         int ret = -ENOMEM;
961         u32 i;
962
963         id_array = calloc(evsel->core.ids, sizeof(*id_array));
964         if (!id_array)
965                 return -ENOMEM;
966
967         vcpu_array = calloc(evsel->core.ids, sizeof(*vcpu_array));
968         if (!vcpu_array)
969                 goto out;
970
971         for (i = 0; i < evsel->core.ids; i++) {
972                 u64 id = evsel->core.id[i];
973                 struct guest_id *guest_id = guest_session__lookup_id(gs, id);
974
975                 if (!guest_id) {
976                         pr_err("Failed to find guest id %"PRIu64"\n", id);
977                         ret = -EINVAL;
978                         goto out;
979                 }
980                 id_array[i] = guest_id->host_id;
981                 vcpu_array[i] = guest_id->vcpu;
982         }
983
984         attr.sample_type |= PERF_SAMPLE_IDENTIFIER;
985         attr.exclude_host = 1;
986         attr.exclude_guest = 0;
987
988         ret = perf_event__synthesize_attr(&inject->tool, &attr, evsel->core.ids,
989                                           id_array, process_attr);
990         if (ret)
991                 pr_err("Failed to add guest attr.\n");
992
993         for (i = 0; i < evsel->core.ids; i++) {
994                 struct perf_sample_id *sid;
995                 u32 vcpu = vcpu_array[i];
996
997                 sid = evlist__id2sid(inject->session->evlist, id_array[i]);
998                 /* Guest event is per-thread from the host point of view */
999                 sid->cpu.cpu = -1;
1000                 sid->tid = gs->vcpu[vcpu].tid;
1001                 sid->machine_pid = gs->machine_pid;
1002                 sid->vcpu.cpu = vcpu;
1003         }
1004 out:
1005         free(vcpu_array);
1006         free(id_array);
1007         return ret;
1008 }
1009
1010 static int guest_session__add_attrs(struct guest_session *gs)
1011 {
1012         struct evlist *evlist = gs->session->evlist;
1013         struct evsel *evsel;
1014         int ret;
1015
1016         evlist__for_each_entry(evlist, evsel) {
1017                 ret = guest_session__add_attr(gs, evsel);
1018                 if (ret)
1019                         return ret;
1020         }
1021
1022         return 0;
1023 }
1024
1025 static int synthesize_id_index(struct perf_inject *inject, size_t new_cnt)
1026 {
1027         struct perf_session *session = inject->session;
1028         struct evlist *evlist = session->evlist;
1029         struct machine *machine = &session->machines.host;
1030         size_t from = evlist->core.nr_entries - new_cnt;
1031
1032         return __perf_event__synthesize_id_index(&inject->tool, perf_event__repipe,
1033                                                  evlist, machine, from);
1034 }
1035
1036 static struct guest_tid *guest_session__lookup_tid(struct guest_session *gs, u32 tid)
1037 {
1038         struct hlist_head *head;
1039         struct guest_tid *guest_tid;
1040         int hash;
1041
1042         hash = hash_32(tid, PERF_EVLIST__HLIST_BITS);
1043         head = &gs->tids[hash];
1044
1045         hlist_for_each_entry(guest_tid, head, node)
1046                 if (guest_tid->tid == tid)
1047                         return guest_tid;
1048
1049         return NULL;
1050 }
1051
1052 static bool dso__is_in_kernel_space(struct dso *dso)
1053 {
1054         if (dso__is_vdso(dso))
1055                 return false;
1056
1057         return dso__is_kcore(dso) ||
1058                dso->kernel ||
1059                is_kernel_module(dso->long_name, PERF_RECORD_MISC_CPUMODE_UNKNOWN);
1060 }
1061
1062 static u64 evlist__first_id(struct evlist *evlist)
1063 {
1064         struct evsel *evsel;
1065
1066         evlist__for_each_entry(evlist, evsel) {
1067                 if (evsel->core.ids)
1068                         return evsel->core.id[0];
1069         }
1070         return 0;
1071 }
1072
1073 static int process_build_id(struct perf_tool *tool,
1074                             union perf_event *event,
1075                             struct perf_sample *sample __maybe_unused,
1076                             struct machine *machine __maybe_unused)
1077 {
1078         struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1079
1080         return perf_event__process_build_id(inject->session, event);
1081 }
1082
1083 static int synthesize_build_id(struct perf_inject *inject, struct dso *dso, pid_t machine_pid)
1084 {
1085         struct machine *machine = perf_session__findnew_machine(inject->session, machine_pid);
1086         u8 cpumode = dso__is_in_kernel_space(dso) ?
1087                         PERF_RECORD_MISC_GUEST_KERNEL :
1088                         PERF_RECORD_MISC_GUEST_USER;
1089
1090         if (!machine)
1091                 return -ENOMEM;
1092
1093         dso->hit = 1;
1094
1095         return perf_event__synthesize_build_id(&inject->tool, dso, cpumode,
1096                                                process_build_id, machine);
1097 }
1098
1099 static int guest_session__add_build_ids(struct guest_session *gs)
1100 {
1101         struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1102         struct machine *machine = &gs->session->machines.host;
1103         struct dso *dso;
1104         int ret;
1105
1106         /* Build IDs will be put in the Build ID feature section */
1107         perf_header__set_feat(&inject->session->header, HEADER_BUILD_ID);
1108
1109         dsos__for_each_with_build_id(dso, &machine->dsos.head) {
1110                 ret = synthesize_build_id(inject, dso, gs->machine_pid);
1111                 if (ret)
1112                         return ret;
1113         }
1114
1115         return 0;
1116 }
1117
1118 static int guest_session__ksymbol_event(struct perf_tool *tool,
1119                                         union perf_event *event,
1120                                         struct perf_sample *sample __maybe_unused,
1121                                         struct machine *machine __maybe_unused)
1122 {
1123         struct guest_session *gs = container_of(tool, struct guest_session, tool);
1124
1125         /* Only support out-of-line i.e. no BPF support */
1126         if (event->ksymbol.ksym_type != PERF_RECORD_KSYMBOL_TYPE_OOL)
1127                 return 0;
1128
1129         return guest_session__output_bytes(gs, event, event->header.size);
1130 }
1131
1132 static int guest_session__start(struct guest_session *gs, const char *name, bool force)
1133 {
1134         char tmp_file_name[] = "/tmp/perf-inject-guest_session-XXXXXX";
1135         struct perf_session *session;
1136         int ret;
1137
1138         /* Only these events will be injected */
1139         gs->tool.mmap           = guest_session__repipe;
1140         gs->tool.mmap2          = guest_session__repipe;
1141         gs->tool.comm           = guest_session__repipe;
1142         gs->tool.fork           = guest_session__repipe;
1143         gs->tool.exit           = guest_session__repipe;
1144         gs->tool.lost           = guest_session__repipe;
1145         gs->tool.context_switch = guest_session__repipe;
1146         gs->tool.ksymbol        = guest_session__ksymbol_event;
1147         gs->tool.text_poke      = guest_session__repipe;
1148         /*
1149          * Processing a build ID creates a struct dso with that build ID. Later,
1150          * all guest dsos are iterated and the build IDs processed into the host
1151          * session where they will be output to the Build ID feature section
1152          * when the perf.data file header is written.
1153          */
1154         gs->tool.build_id       = perf_event__process_build_id;
1155         /* Process the id index to know what VCPU an ID belongs to */
1156         gs->tool.id_index       = perf_event__process_id_index;
1157
1158         gs->tool.ordered_events = true;
1159         gs->tool.ordering_requires_timestamps = true;
1160
1161         gs->data.path   = name;
1162         gs->data.force  = force;
1163         gs->data.mode   = PERF_DATA_MODE_READ;
1164
1165         session = perf_session__new(&gs->data, &gs->tool);
1166         if (IS_ERR(session))
1167                 return PTR_ERR(session);
1168         gs->session = session;
1169
1170         /*
1171          * Initial events have zero'd ID samples. Get default ID sample size
1172          * used for removing them.
1173          */
1174         gs->dflt_id_hdr_size = session->machines.host.id_hdr_size;
1175         /* And default ID for adding back a host-compatible ID sample */
1176         gs->dflt_id = evlist__first_id(session->evlist);
1177         if (!gs->dflt_id) {
1178                 pr_err("Guest data has no sample IDs");
1179                 return -EINVAL;
1180         }
1181
1182         /* Temporary file for guest events */
1183         gs->tmp_file_name = strdup(tmp_file_name);
1184         if (!gs->tmp_file_name)
1185                 return -ENOMEM;
1186         gs->tmp_fd = mkstemp(gs->tmp_file_name);
1187         if (gs->tmp_fd < 0)
1188                 return -errno;
1189
1190         if (zstd_init(&gs->session->zstd_data, 0) < 0)
1191                 pr_warning("Guest session decompression initialization failed.\n");
1192
1193         /*
1194          * perf does not support processing 2 sessions simultaneously, so output
1195          * guest events to a temporary file.
1196          */
1197         ret = perf_session__process_events(gs->session);
1198         if (ret)
1199                 return ret;
1200
1201         if (lseek(gs->tmp_fd, 0, SEEK_SET))
1202                 return -errno;
1203
1204         return 0;
1205 }
1206
1207 /* Free hlist nodes assuming hlist_node is the first member of hlist entries */
1208 static void free_hlist(struct hlist_head *heads, size_t hlist_sz)
1209 {
1210         struct hlist_node *pos, *n;
1211         size_t i;
1212
1213         for (i = 0; i < hlist_sz; ++i) {
1214                 hlist_for_each_safe(pos, n, &heads[i]) {
1215                         hlist_del(pos);
1216                         free(pos);
1217                 }
1218         }
1219 }
1220
1221 static void guest_session__exit(struct guest_session *gs)
1222 {
1223         if (gs->session) {
1224                 perf_session__delete(gs->session);
1225                 free_hlist(gs->heads, PERF_EVLIST__HLIST_SIZE);
1226                 free_hlist(gs->tids, PERF_EVLIST__HLIST_SIZE);
1227         }
1228         if (gs->tmp_file_name) {
1229                 if (gs->tmp_fd >= 0)
1230                         close(gs->tmp_fd);
1231                 unlink(gs->tmp_file_name);
1232                 free(gs->tmp_file_name);
1233         }
1234         free(gs->vcpu);
1235         free(gs->perf_data_file);
1236 }
1237
1238 static void get_tsc_conv(struct perf_tsc_conversion *tc, struct perf_record_time_conv *time_conv)
1239 {
1240         tc->time_shift          = time_conv->time_shift;
1241         tc->time_mult           = time_conv->time_mult;
1242         tc->time_zero           = time_conv->time_zero;
1243         tc->time_cycles         = time_conv->time_cycles;
1244         tc->time_mask           = time_conv->time_mask;
1245         tc->cap_user_time_zero  = time_conv->cap_user_time_zero;
1246         tc->cap_user_time_short = time_conv->cap_user_time_short;
1247 }
1248
1249 static void guest_session__get_tc(struct guest_session *gs)
1250 {
1251         struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1252
1253         get_tsc_conv(&gs->host_tc, &inject->session->time_conv);
1254         get_tsc_conv(&gs->guest_tc, &gs->session->time_conv);
1255 }
1256
1257 static void guest_session__convert_time(struct guest_session *gs, u64 guest_time, u64 *host_time)
1258 {
1259         u64 tsc;
1260
1261         if (!guest_time) {
1262                 *host_time = 0;
1263                 return;
1264         }
1265
1266         if (gs->guest_tc.cap_user_time_zero)
1267                 tsc = perf_time_to_tsc(guest_time, &gs->guest_tc);
1268         else
1269                 tsc = guest_time;
1270
1271         /*
1272          * This is the correct order of operations for x86 if the TSC Offset and
1273          * Multiplier values are used.
1274          */
1275         tsc -= gs->time_offset;
1276         tsc /= gs->time_scale;
1277
1278         if (gs->host_tc.cap_user_time_zero)
1279                 *host_time = tsc_to_perf_time(tsc, &gs->host_tc);
1280         else
1281                 *host_time = tsc;
1282 }
1283
1284 static int guest_session__fetch(struct guest_session *gs)
1285 {
1286         void *buf = gs->ev.event_buf;
1287         struct perf_event_header *hdr = buf;
1288         size_t hdr_sz = sizeof(*hdr);
1289         ssize_t ret;
1290
1291         ret = readn(gs->tmp_fd, buf, hdr_sz);
1292         if (ret < 0)
1293                 return ret;
1294
1295         if (!ret) {
1296                 /* Zero size means EOF */
1297                 hdr->size = 0;
1298                 return 0;
1299         }
1300
1301         buf += hdr_sz;
1302
1303         ret = readn(gs->tmp_fd, buf, hdr->size - hdr_sz);
1304         if (ret < 0)
1305                 return ret;
1306
1307         gs->ev.event = (union perf_event *)gs->ev.event_buf;
1308         gs->ev.sample.time = 0;
1309
1310         if (hdr->type >= PERF_RECORD_USER_TYPE_START) {
1311                 pr_err("Unexpected type fetching guest event");
1312                 return 0;
1313         }
1314
1315         ret = evlist__parse_sample(gs->session->evlist, gs->ev.event, &gs->ev.sample);
1316         if (ret) {
1317                 pr_err("Parse failed fetching guest event");
1318                 return ret;
1319         }
1320
1321         if (!gs->have_tc) {
1322                 guest_session__get_tc(gs);
1323                 gs->have_tc = true;
1324         }
1325
1326         guest_session__convert_time(gs, gs->ev.sample.time, &gs->ev.sample.time);
1327
1328         return 0;
1329 }
1330
1331 static int evlist__append_id_sample(struct evlist *evlist, union perf_event *ev,
1332                                     const struct perf_sample *sample)
1333 {
1334         struct evsel *evsel;
1335         void *array;
1336         int ret;
1337
1338         evsel = evlist__id2evsel(evlist, sample->id);
1339         array = ev;
1340
1341         if (!evsel) {
1342                 pr_err("No evsel for id %"PRIu64"\n", sample->id);
1343                 return -EINVAL;
1344         }
1345
1346         array += ev->header.size;
1347         ret = perf_event__synthesize_id_sample(array, evsel->core.attr.sample_type, sample);
1348         if (ret < 0)
1349                 return ret;
1350
1351         if (ret & 7) {
1352                 pr_err("Bad id sample size %d\n", ret);
1353                 return -EINVAL;
1354         }
1355
1356         ev->header.size += ret;
1357
1358         return 0;
1359 }
1360
1361 static int guest_session__inject_events(struct guest_session *gs, u64 timestamp)
1362 {
1363         struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1364         int ret;
1365
1366         if (!gs->ready)
1367                 return 0;
1368
1369         while (1) {
1370                 struct perf_sample *sample;
1371                 struct guest_id *guest_id;
1372                 union perf_event *ev;
1373                 u16 id_hdr_size;
1374                 u8 cpumode;
1375                 u64 id;
1376
1377                 if (!gs->fetched) {
1378                         ret = guest_session__fetch(gs);
1379                         if (ret)
1380                                 return ret;
1381                         gs->fetched = true;
1382                 }
1383
1384                 ev = gs->ev.event;
1385                 sample = &gs->ev.sample;
1386
1387                 if (!ev->header.size)
1388                         return 0; /* EOF */
1389
1390                 if (sample->time > timestamp)
1391                         return 0;
1392
1393                 /* Change cpumode to guest */
1394                 cpumode = ev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1395                 if (cpumode & PERF_RECORD_MISC_USER)
1396                         cpumode = PERF_RECORD_MISC_GUEST_USER;
1397                 else
1398                         cpumode = PERF_RECORD_MISC_GUEST_KERNEL;
1399                 ev->header.misc &= ~PERF_RECORD_MISC_CPUMODE_MASK;
1400                 ev->header.misc |= cpumode;
1401
1402                 id = sample->id;
1403                 if (!id) {
1404                         id = gs->dflt_id;
1405                         id_hdr_size = gs->dflt_id_hdr_size;
1406                 } else {
1407                         struct evsel *evsel = evlist__id2evsel(gs->session->evlist, id);
1408
1409                         id_hdr_size = evsel__id_hdr_size(evsel);
1410                 }
1411
1412                 if (id_hdr_size & 7) {
1413                         pr_err("Bad id_hdr_size %u\n", id_hdr_size);
1414                         return -EINVAL;
1415                 }
1416
1417                 if (ev->header.size & 7) {
1418                         pr_err("Bad event size %u\n", ev->header.size);
1419                         return -EINVAL;
1420                 }
1421
1422                 /* Remove guest id sample */
1423                 ev->header.size -= id_hdr_size;
1424
1425                 if (ev->header.size & 7) {
1426                         pr_err("Bad raw event size %u\n", ev->header.size);
1427                         return -EINVAL;
1428                 }
1429
1430                 guest_id = guest_session__lookup_id(gs, id);
1431                 if (!guest_id) {
1432                         pr_err("Guest event with unknown id %llu\n",
1433                                (unsigned long long)id);
1434                         return -EINVAL;
1435                 }
1436
1437                 /* Change to host ID to avoid conflicting ID values */
1438                 sample->id = guest_id->host_id;
1439                 sample->stream_id = guest_id->host_id;
1440
1441                 if (sample->cpu != (u32)-1) {
1442                         if (sample->cpu >= gs->vcpu_cnt) {
1443                                 pr_err("Guest event with unknown VCPU %u\n",
1444                                        sample->cpu);
1445                                 return -EINVAL;
1446                         }
1447                         /* Change to host CPU instead of guest VCPU */
1448                         sample->cpu = gs->vcpu[sample->cpu].cpu;
1449                 }
1450
1451                 /* New id sample with new ID and CPU */
1452                 ret = evlist__append_id_sample(inject->session->evlist, ev, sample);
1453                 if (ret)
1454                         return ret;
1455
1456                 if (ev->header.size & 7) {
1457                         pr_err("Bad new event size %u\n", ev->header.size);
1458                         return -EINVAL;
1459                 }
1460
1461                 gs->fetched = false;
1462
1463                 ret = output_bytes(inject, ev, ev->header.size);
1464                 if (ret)
1465                         return ret;
1466         }
1467 }
1468
1469 static int guest_session__flush_events(struct guest_session *gs)
1470 {
1471         return guest_session__inject_events(gs, -1);
1472 }
1473
1474 static int host__repipe(struct perf_tool *tool,
1475                         union perf_event *event,
1476                         struct perf_sample *sample,
1477                         struct machine *machine)
1478 {
1479         struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1480         int ret;
1481
1482         ret = guest_session__inject_events(&inject->guest_session, sample->time);
1483         if (ret)
1484                 return ret;
1485
1486         return perf_event__repipe(tool, event, sample, machine);
1487 }
1488
1489 static int host__finished_init(struct perf_session *session, union perf_event *event)
1490 {
1491         struct perf_inject *inject = container_of(session->tool, struct perf_inject, tool);
1492         struct guest_session *gs = &inject->guest_session;
1493         int ret;
1494
1495         /*
1496          * Peek through host COMM events to find QEMU threads and the VCPU they
1497          * are running.
1498          */
1499         ret = host_peek_vm_comms(session, gs);
1500         if (ret)
1501                 return ret;
1502
1503         if (!gs->vcpu_cnt) {
1504                 pr_err("No VCPU threads found for pid %u\n", gs->machine_pid);
1505                 return -EINVAL;
1506         }
1507
1508         /*
1509          * Allocate new (unused) host sample IDs and map them to the guest IDs.
1510          */
1511         gs->highest_id = evlist__find_highest_id(session->evlist);
1512         ret = guest_session__map_ids(gs, session->evlist);
1513         if (ret)
1514                 return ret;
1515
1516         ret = guest_session__add_attrs(gs);
1517         if (ret)
1518                 return ret;
1519
1520         ret = synthesize_id_index(inject, gs->session->evlist->core.nr_entries);
1521         if (ret) {
1522                 pr_err("Failed to synthesize id_index\n");
1523                 return ret;
1524         }
1525
1526         ret = guest_session__add_build_ids(gs);
1527         if (ret) {
1528                 pr_err("Failed to add guest build IDs\n");
1529                 return ret;
1530         }
1531
1532         gs->ready = true;
1533
1534         ret = guest_session__inject_events(gs, 0);
1535         if (ret)
1536                 return ret;
1537
1538         return perf_event__repipe_op2_synth(session, event);
1539 }
1540
1541 /*
1542  * Obey finished-round ordering. The FINISHED_ROUND event is first processed
1543  * which flushes host events to file up until the last flush time. Then inject
1544  * guest events up to the same time. Finally write out the FINISHED_ROUND event
1545  * itself.
1546  */
1547 static int host__finished_round(struct perf_tool *tool,
1548                                 union perf_event *event,
1549                                 struct ordered_events *oe)
1550 {
1551         struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1552         int ret = perf_event__process_finished_round(tool, event, oe);
1553         u64 timestamp = ordered_events__last_flush_time(oe);
1554
1555         if (ret)
1556                 return ret;
1557
1558         ret = guest_session__inject_events(&inject->guest_session, timestamp);
1559         if (ret)
1560                 return ret;
1561
1562         return perf_event__repipe_oe_synth(tool, event, oe);
1563 }
1564
1565 static int host__context_switch(struct perf_tool *tool,
1566                                 union perf_event *event,
1567                                 struct perf_sample *sample,
1568                                 struct machine *machine)
1569 {
1570         struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1571         bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
1572         struct guest_session *gs = &inject->guest_session;
1573         u32 pid = event->context_switch.next_prev_pid;
1574         u32 tid = event->context_switch.next_prev_tid;
1575         struct guest_tid *guest_tid;
1576         u32 vcpu;
1577
1578         if (out || pid != gs->machine_pid)
1579                 goto out;
1580
1581         guest_tid = guest_session__lookup_tid(gs, tid);
1582         if (!guest_tid)
1583                 goto out;
1584
1585         if (sample->cpu == (u32)-1) {
1586                 pr_err("Switch event does not have CPU\n");
1587                 return -EINVAL;
1588         }
1589
1590         vcpu = guest_tid->vcpu;
1591         if (vcpu >= gs->vcpu_cnt)
1592                 return -EINVAL;
1593
1594         /* Guest is switching in, record which CPU the VCPU is now running on */
1595         gs->vcpu[vcpu].cpu = sample->cpu;
1596 out:
1597         return host__repipe(tool, event, sample, machine);
1598 }
1599
1600 static void sig_handler(int sig __maybe_unused)
1601 {
1602         session_done = 1;
1603 }
1604
1605 static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg)
1606 {
1607         struct perf_event_attr *attr = &evsel->core.attr;
1608         const char *name = evsel__name(evsel);
1609
1610         if (!(attr->sample_type & sample_type)) {
1611                 pr_err("Samples for %s event do not have %s attribute set.",
1612                         name, sample_msg);
1613                 return -EINVAL;
1614         }
1615
1616         return 0;
1617 }
1618
1619 static int drop_sample(struct perf_tool *tool __maybe_unused,
1620                        union perf_event *event __maybe_unused,
1621                        struct perf_sample *sample __maybe_unused,
1622                        struct evsel *evsel __maybe_unused,
1623                        struct machine *machine __maybe_unused)
1624 {
1625         return 0;
1626 }
1627
1628 static void strip_init(struct perf_inject *inject)
1629 {
1630         struct evlist *evlist = inject->session->evlist;
1631         struct evsel *evsel;
1632
1633         inject->tool.context_switch = perf_event__drop;
1634
1635         evlist__for_each_entry(evlist, evsel)
1636                 evsel->handler = drop_sample;
1637 }
1638
1639 static int parse_vm_time_correlation(const struct option *opt, const char *str, int unset)
1640 {
1641         struct perf_inject *inject = opt->value;
1642         const char *args;
1643         char *dry_run;
1644
1645         if (unset)
1646                 return 0;
1647
1648         inject->itrace_synth_opts.set = true;
1649         inject->itrace_synth_opts.vm_time_correlation = true;
1650         inject->in_place_update = true;
1651
1652         if (!str)
1653                 return 0;
1654
1655         dry_run = skip_spaces(str);
1656         if (!strncmp(dry_run, "dry-run", strlen("dry-run"))) {
1657                 inject->itrace_synth_opts.vm_tm_corr_dry_run = true;
1658                 inject->in_place_update_dry_run = true;
1659                 args = dry_run + strlen("dry-run");
1660         } else {
1661                 args = str;
1662         }
1663
1664         inject->itrace_synth_opts.vm_tm_corr_args = strdup(args);
1665
1666         return inject->itrace_synth_opts.vm_tm_corr_args ? 0 : -ENOMEM;
1667 }
1668
1669 static int parse_guest_data(const struct option *opt, const char *str, int unset)
1670 {
1671         struct perf_inject *inject = opt->value;
1672         struct guest_session *gs = &inject->guest_session;
1673         char *tok;
1674         char *s;
1675
1676         if (unset)
1677                 return 0;
1678
1679         if (!str)
1680                 goto bad_args;
1681
1682         s = strdup(str);
1683         if (!s)
1684                 return -ENOMEM;
1685
1686         gs->perf_data_file = strsep(&s, ",");
1687         if (!gs->perf_data_file)
1688                 goto bad_args;
1689
1690         gs->copy_kcore_dir = has_kcore_dir(gs->perf_data_file);
1691         if (gs->copy_kcore_dir)
1692                 inject->output.is_dir = true;
1693
1694         tok = strsep(&s, ",");
1695         if (!tok)
1696                 goto bad_args;
1697         gs->machine_pid = strtoul(tok, NULL, 0);
1698         if (!inject->guest_session.machine_pid)
1699                 goto bad_args;
1700
1701         gs->time_scale = 1;
1702
1703         tok = strsep(&s, ",");
1704         if (!tok)
1705                 goto out;
1706         gs->time_offset = strtoull(tok, NULL, 0);
1707
1708         tok = strsep(&s, ",");
1709         if (!tok)
1710                 goto out;
1711         gs->time_scale = strtod(tok, NULL);
1712         if (!gs->time_scale)
1713                 goto bad_args;
1714 out:
1715         return 0;
1716
1717 bad_args:
1718         pr_err("--guest-data option requires guest perf.data file name, "
1719                "guest machine PID, and optionally guest timestamp offset, "
1720                "and guest timestamp scale factor, separated by commas.\n");
1721         return -1;
1722 }
1723
1724 static int save_section_info_cb(struct perf_file_section *section,
1725                                 struct perf_header *ph __maybe_unused,
1726                                 int feat, int fd __maybe_unused, void *data)
1727 {
1728         struct perf_inject *inject = data;
1729
1730         inject->secs[feat] = *section;
1731         return 0;
1732 }
1733
1734 static int save_section_info(struct perf_inject *inject)
1735 {
1736         struct perf_header *header = &inject->session->header;
1737         int fd = perf_data__fd(inject->session->data);
1738
1739         return perf_header__process_sections(header, fd, inject, save_section_info_cb);
1740 }
1741
1742 static bool keep_feat(int feat)
1743 {
1744         switch (feat) {
1745         /* Keep original information that describes the machine or software */
1746         case HEADER_TRACING_DATA:
1747         case HEADER_HOSTNAME:
1748         case HEADER_OSRELEASE:
1749         case HEADER_VERSION:
1750         case HEADER_ARCH:
1751         case HEADER_NRCPUS:
1752         case HEADER_CPUDESC:
1753         case HEADER_CPUID:
1754         case HEADER_TOTAL_MEM:
1755         case HEADER_CPU_TOPOLOGY:
1756         case HEADER_NUMA_TOPOLOGY:
1757         case HEADER_PMU_MAPPINGS:
1758         case HEADER_CACHE:
1759         case HEADER_MEM_TOPOLOGY:
1760         case HEADER_CLOCKID:
1761         case HEADER_BPF_PROG_INFO:
1762         case HEADER_BPF_BTF:
1763         case HEADER_CPU_PMU_CAPS:
1764         case HEADER_CLOCK_DATA:
1765         case HEADER_HYBRID_TOPOLOGY:
1766         case HEADER_PMU_CAPS:
1767                 return true;
1768         /* Information that can be updated */
1769         case HEADER_BUILD_ID:
1770         case HEADER_CMDLINE:
1771         case HEADER_EVENT_DESC:
1772         case HEADER_BRANCH_STACK:
1773         case HEADER_GROUP_DESC:
1774         case HEADER_AUXTRACE:
1775         case HEADER_STAT:
1776         case HEADER_SAMPLE_TIME:
1777         case HEADER_DIR_FORMAT:
1778         case HEADER_COMPRESSED:
1779         default:
1780                 return false;
1781         };
1782 }
1783
1784 static int read_file(int fd, u64 offs, void *buf, size_t sz)
1785 {
1786         ssize_t ret = preadn(fd, buf, sz, offs);
1787
1788         if (ret < 0)
1789                 return -errno;
1790         if ((size_t)ret != sz)
1791                 return -EINVAL;
1792         return 0;
1793 }
1794
1795 static int feat_copy(struct perf_inject *inject, int feat, struct feat_writer *fw)
1796 {
1797         int fd = perf_data__fd(inject->session->data);
1798         u64 offs = inject->secs[feat].offset;
1799         size_t sz = inject->secs[feat].size;
1800         void *buf = malloc(sz);
1801         int ret;
1802
1803         if (!buf)
1804                 return -ENOMEM;
1805
1806         ret = read_file(fd, offs, buf, sz);
1807         if (ret)
1808                 goto out_free;
1809
1810         ret = fw->write(fw, buf, sz);
1811 out_free:
1812         free(buf);
1813         return ret;
1814 }
1815
1816 struct inject_fc {
1817         struct feat_copier fc;
1818         struct perf_inject *inject;
1819 };
1820
1821 static int feat_copy_cb(struct feat_copier *fc, int feat, struct feat_writer *fw)
1822 {
1823         struct inject_fc *inj_fc = container_of(fc, struct inject_fc, fc);
1824         struct perf_inject *inject = inj_fc->inject;
1825         int ret;
1826
1827         if (!inject->secs[feat].offset ||
1828             !keep_feat(feat))
1829                 return 0;
1830
1831         ret = feat_copy(inject, feat, fw);
1832         if (ret < 0)
1833                 return ret;
1834
1835         return 1; /* Feature section copied */
1836 }
1837
1838 static int copy_kcore_dir(struct perf_inject *inject)
1839 {
1840         char *cmd;
1841         int ret;
1842
1843         ret = asprintf(&cmd, "cp -r -n %s/kcore_dir* %s >/dev/null 2>&1",
1844                        inject->input_name, inject->output.path);
1845         if (ret < 0)
1846                 return ret;
1847         pr_debug("%s\n", cmd);
1848         ret = system(cmd);
1849         free(cmd);
1850         return ret;
1851 }
1852
1853 static int guest_session__copy_kcore_dir(struct guest_session *gs)
1854 {
1855         struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1856         char *cmd;
1857         int ret;
1858
1859         ret = asprintf(&cmd, "cp -r -n %s/kcore_dir %s/kcore_dir__%u >/dev/null 2>&1",
1860                        gs->perf_data_file, inject->output.path, gs->machine_pid);
1861         if (ret < 0)
1862                 return ret;
1863         pr_debug("%s\n", cmd);
1864         ret = system(cmd);
1865         free(cmd);
1866         return ret;
1867 }
1868
1869 static int output_fd(struct perf_inject *inject)
1870 {
1871         return inject->in_place_update ? -1 : perf_data__fd(&inject->output);
1872 }
1873
1874 static int __cmd_inject(struct perf_inject *inject)
1875 {
1876         int ret = -EINVAL;
1877         struct guest_session *gs = &inject->guest_session;
1878         struct perf_session *session = inject->session;
1879         int fd = output_fd(inject);
1880         u64 output_data_offset;
1881
1882         signal(SIGINT, sig_handler);
1883
1884         if (inject->build_ids || inject->sched_stat ||
1885             inject->itrace_synth_opts.set || inject->build_id_all) {
1886                 inject->tool.mmap         = perf_event__repipe_mmap;
1887                 inject->tool.mmap2        = perf_event__repipe_mmap2;
1888                 inject->tool.fork         = perf_event__repipe_fork;
1889                 inject->tool.tracing_data = perf_event__repipe_tracing_data;
1890         }
1891
1892         output_data_offset = perf_session__data_offset(session->evlist);
1893
1894         if (inject->build_id_all) {
1895                 inject->tool.mmap         = perf_event__repipe_buildid_mmap;
1896                 inject->tool.mmap2        = perf_event__repipe_buildid_mmap2;
1897         } else if (inject->build_ids) {
1898                 inject->tool.sample = perf_event__inject_buildid;
1899         } else if (inject->sched_stat) {
1900                 struct evsel *evsel;
1901
1902                 evlist__for_each_entry(session->evlist, evsel) {
1903                         const char *name = evsel__name(evsel);
1904
1905                         if (!strcmp(name, "sched:sched_switch")) {
1906                                 if (evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
1907                                         return -EINVAL;
1908
1909                                 evsel->handler = perf_inject__sched_switch;
1910                         } else if (!strcmp(name, "sched:sched_process_exit"))
1911                                 evsel->handler = perf_inject__sched_process_exit;
1912                         else if (!strncmp(name, "sched:sched_stat_", 17))
1913                                 evsel->handler = perf_inject__sched_stat;
1914                 }
1915         } else if (inject->itrace_synth_opts.vm_time_correlation) {
1916                 session->itrace_synth_opts = &inject->itrace_synth_opts;
1917                 memset(&inject->tool, 0, sizeof(inject->tool));
1918                 inject->tool.id_index       = perf_event__process_id_index;
1919                 inject->tool.auxtrace_info  = perf_event__process_auxtrace_info;
1920                 inject->tool.auxtrace       = perf_event__process_auxtrace;
1921                 inject->tool.auxtrace_error = perf_event__process_auxtrace_error;
1922                 inject->tool.ordered_events = true;
1923                 inject->tool.ordering_requires_timestamps = true;
1924         } else if (inject->itrace_synth_opts.set) {
1925                 session->itrace_synth_opts = &inject->itrace_synth_opts;
1926                 inject->itrace_synth_opts.inject = true;
1927                 inject->tool.comm           = perf_event__repipe_comm;
1928                 inject->tool.namespaces     = perf_event__repipe_namespaces;
1929                 inject->tool.exit           = perf_event__repipe_exit;
1930                 inject->tool.id_index       = perf_event__process_id_index;
1931                 inject->tool.auxtrace_info  = perf_event__process_auxtrace_info;
1932                 inject->tool.auxtrace       = perf_event__process_auxtrace;
1933                 inject->tool.aux            = perf_event__drop_aux;
1934                 inject->tool.itrace_start   = perf_event__drop_aux;
1935                 inject->tool.aux_output_hw_id = perf_event__drop_aux;
1936                 inject->tool.ordered_events = true;
1937                 inject->tool.ordering_requires_timestamps = true;
1938                 /* Allow space in the header for new attributes */
1939                 output_data_offset = roundup(8192 + session->header.data_offset, 4096);
1940                 if (inject->strip)
1941                         strip_init(inject);
1942         } else if (gs->perf_data_file) {
1943                 char *name = gs->perf_data_file;
1944
1945                 /*
1946                  * Not strictly necessary, but keep these events in order wrt
1947                  * guest events.
1948                  */
1949                 inject->tool.mmap               = host__repipe;
1950                 inject->tool.mmap2              = host__repipe;
1951                 inject->tool.comm               = host__repipe;
1952                 inject->tool.fork               = host__repipe;
1953                 inject->tool.exit               = host__repipe;
1954                 inject->tool.lost               = host__repipe;
1955                 inject->tool.context_switch     = host__repipe;
1956                 inject->tool.ksymbol            = host__repipe;
1957                 inject->tool.text_poke          = host__repipe;
1958                 /*
1959                  * Once the host session has initialized, set up sample ID
1960                  * mapping and feed in guest attrs, build IDs and initial
1961                  * events.
1962                  */
1963                 inject->tool.finished_init      = host__finished_init;
1964                 /* Obey finished round ordering */
1965                 inject->tool.finished_round     = host__finished_round,
1966                 /* Keep track of which CPU a VCPU is runnng on */
1967                 inject->tool.context_switch     = host__context_switch;
1968                 /*
1969                  * Must order events to be able to obey finished round
1970                  * ordering.
1971                  */
1972                 inject->tool.ordered_events     = true;
1973                 inject->tool.ordering_requires_timestamps = true;
1974                 /* Set up a separate session to process guest perf.data file */
1975                 ret = guest_session__start(gs, name, session->data->force);
1976                 if (ret) {
1977                         pr_err("Failed to process %s, error %d\n", name, ret);
1978                         return ret;
1979                 }
1980                 /* Allow space in the header for guest attributes */
1981                 output_data_offset += gs->session->header.data_offset;
1982                 output_data_offset = roundup(output_data_offset, 4096);
1983         }
1984
1985         if (!inject->itrace_synth_opts.set)
1986                 auxtrace_index__free(&session->auxtrace_index);
1987
1988         if (!inject->is_pipe && !inject->in_place_update)
1989                 lseek(fd, output_data_offset, SEEK_SET);
1990
1991         ret = perf_session__process_events(session);
1992         if (ret)
1993                 return ret;
1994
1995         if (gs->session) {
1996                 /*
1997                  * Remaining guest events have later timestamps. Flush them
1998                  * out to file.
1999                  */
2000                 ret = guest_session__flush_events(gs);
2001                 if (ret) {
2002                         pr_err("Failed to flush guest events\n");
2003                         return ret;
2004                 }
2005         }
2006
2007         if (!inject->is_pipe && !inject->in_place_update) {
2008                 struct inject_fc inj_fc = {
2009                         .fc.copy = feat_copy_cb,
2010                         .inject = inject,
2011                 };
2012
2013                 if (inject->build_ids)
2014                         perf_header__set_feat(&session->header,
2015                                               HEADER_BUILD_ID);
2016                 /*
2017                  * Keep all buildids when there is unprocessed AUX data because
2018                  * it is not known which ones the AUX trace hits.
2019                  */
2020                 if (perf_header__has_feat(&session->header, HEADER_BUILD_ID) &&
2021                     inject->have_auxtrace && !inject->itrace_synth_opts.set)
2022                         dsos__hit_all(session);
2023                 /*
2024                  * The AUX areas have been removed and replaced with
2025                  * synthesized hardware events, so clear the feature flag.
2026                  */
2027                 if (inject->itrace_synth_opts.set) {
2028                         perf_header__clear_feat(&session->header,
2029                                                 HEADER_AUXTRACE);
2030                         if (inject->itrace_synth_opts.last_branch ||
2031                             inject->itrace_synth_opts.add_last_branch)
2032                                 perf_header__set_feat(&session->header,
2033                                                       HEADER_BRANCH_STACK);
2034                 }
2035                 session->header.data_offset = output_data_offset;
2036                 session->header.data_size = inject->bytes_written;
2037                 perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc);
2038
2039                 if (inject->copy_kcore_dir) {
2040                         ret = copy_kcore_dir(inject);
2041                         if (ret) {
2042                                 pr_err("Failed to copy kcore\n");
2043                                 return ret;
2044                         }
2045                 }
2046                 if (gs->copy_kcore_dir) {
2047                         ret = guest_session__copy_kcore_dir(gs);
2048                         if (ret) {
2049                                 pr_err("Failed to copy guest kcore\n");
2050                                 return ret;
2051                         }
2052                 }
2053         }
2054
2055         return ret;
2056 }
2057
2058 int cmd_inject(int argc, const char **argv)
2059 {
2060         struct perf_inject inject = {
2061                 .tool = {
2062                         .sample         = perf_event__repipe_sample,
2063                         .read           = perf_event__repipe_sample,
2064                         .mmap           = perf_event__repipe,
2065                         .mmap2          = perf_event__repipe,
2066                         .comm           = perf_event__repipe,
2067                         .namespaces     = perf_event__repipe,
2068                         .cgroup         = perf_event__repipe,
2069                         .fork           = perf_event__repipe,
2070                         .exit           = perf_event__repipe,
2071                         .lost           = perf_event__repipe,
2072                         .lost_samples   = perf_event__repipe,
2073                         .aux            = perf_event__repipe,
2074                         .itrace_start   = perf_event__repipe,
2075                         .aux_output_hw_id = perf_event__repipe,
2076                         .context_switch = perf_event__repipe,
2077                         .throttle       = perf_event__repipe,
2078                         .unthrottle     = perf_event__repipe,
2079                         .ksymbol        = perf_event__repipe,
2080                         .bpf            = perf_event__repipe,
2081                         .text_poke      = perf_event__repipe,
2082                         .attr           = perf_event__repipe_attr,
2083                         .event_update   = perf_event__repipe_event_update,
2084                         .tracing_data   = perf_event__repipe_op2_synth,
2085                         .finished_round = perf_event__repipe_oe_synth,
2086                         .build_id       = perf_event__repipe_op2_synth,
2087                         .id_index       = perf_event__repipe_op2_synth,
2088                         .auxtrace_info  = perf_event__repipe_op2_synth,
2089                         .auxtrace_error = perf_event__repipe_op2_synth,
2090                         .time_conv      = perf_event__repipe_op2_synth,
2091                         .thread_map     = perf_event__repipe_op2_synth,
2092                         .cpu_map        = perf_event__repipe_op2_synth,
2093                         .stat_config    = perf_event__repipe_op2_synth,
2094                         .stat           = perf_event__repipe_op2_synth,
2095                         .stat_round     = perf_event__repipe_op2_synth,
2096                         .feature        = perf_event__repipe_op2_synth,
2097                         .finished_init  = perf_event__repipe_op2_synth,
2098                         .compressed     = perf_event__repipe_op4_synth,
2099                         .auxtrace       = perf_event__repipe_auxtrace,
2100                 },
2101                 .input_name  = "-",
2102                 .samples = LIST_HEAD_INIT(inject.samples),
2103                 .output = {
2104                         .path = "-",
2105                         .mode = PERF_DATA_MODE_WRITE,
2106                         .use_stdio = true,
2107                 },
2108         };
2109         struct perf_data data = {
2110                 .mode = PERF_DATA_MODE_READ,
2111                 .use_stdio = true,
2112         };
2113         int ret;
2114         bool repipe = true;
2115
2116         struct option options[] = {
2117                 OPT_BOOLEAN('b', "build-ids", &inject.build_ids,
2118                             "Inject build-ids into the output stream"),
2119                 OPT_BOOLEAN(0, "buildid-all", &inject.build_id_all,
2120                             "Inject build-ids of all DSOs into the output stream"),
2121                 OPT_STRING('i', "input", &inject.input_name, "file",
2122                            "input file name"),
2123                 OPT_STRING('o', "output", &inject.output.path, "file",
2124                            "output file name"),
2125                 OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat,
2126                             "Merge sched-stat and sched-switch for getting events "
2127                             "where and how long tasks slept"),
2128 #ifdef HAVE_JITDUMP
2129                 OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"),
2130 #endif
2131                 OPT_INCR('v', "verbose", &verbose,
2132                          "be more verbose (show build ids, etc)"),
2133                 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
2134                            "file", "vmlinux pathname"),
2135                 OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux,
2136                             "don't load vmlinux even if found"),
2137                 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file",
2138                            "kallsyms pathname"),
2139                 OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"),
2140                 OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts,
2141                                     NULL, "opts", "Instruction Tracing options\n"
2142                                     ITRACE_HELP,
2143                                     itrace_parse_synth_opts),
2144                 OPT_BOOLEAN(0, "strip", &inject.strip,
2145                             "strip non-synthesized events (use with --itrace)"),
2146                 OPT_CALLBACK_OPTARG(0, "vm-time-correlation", &inject, NULL, "opts",
2147                                     "correlate time between VM guests and the host",
2148                                     parse_vm_time_correlation),
2149                 OPT_CALLBACK_OPTARG(0, "guest-data", &inject, NULL, "opts",
2150                                     "inject events from a guest perf.data file",
2151                                     parse_guest_data),
2152                 OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory",
2153                            "guest mount directory under which every guest os"
2154                            " instance has a subdir"),
2155                 OPT_END()
2156         };
2157         const char * const inject_usage[] = {
2158                 "perf inject [<options>]",
2159                 NULL
2160         };
2161 #ifndef HAVE_JITDUMP
2162         set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true);
2163 #endif
2164         argc = parse_options(argc, argv, options, inject_usage, 0);
2165
2166         /*
2167          * Any (unrecognized) arguments left?
2168          */
2169         if (argc)
2170                 usage_with_options(inject_usage, options);
2171
2172         if (inject.strip && !inject.itrace_synth_opts.set) {
2173                 pr_err("--strip option requires --itrace option\n");
2174                 return -1;
2175         }
2176
2177         if (symbol__validate_sym_arguments())
2178                 return -1;
2179
2180         if (inject.in_place_update) {
2181                 if (!strcmp(inject.input_name, "-")) {
2182                         pr_err("Input file name required for in-place updating\n");
2183                         return -1;
2184                 }
2185                 if (strcmp(inject.output.path, "-")) {
2186                         pr_err("Output file name must not be specified for in-place updating\n");
2187                         return -1;
2188                 }
2189                 if (!data.force && !inject.in_place_update_dry_run) {
2190                         pr_err("The input file would be updated in place, "
2191                                 "the --force option is required.\n");
2192                         return -1;
2193                 }
2194                 if (!inject.in_place_update_dry_run)
2195                         data.in_place_update = true;
2196         } else {
2197                 if (strcmp(inject.output.path, "-") && !inject.strip &&
2198                     has_kcore_dir(inject.input_name)) {
2199                         inject.output.is_dir = true;
2200                         inject.copy_kcore_dir = true;
2201                 }
2202                 if (perf_data__open(&inject.output)) {
2203                         perror("failed to create output file");
2204                         return -1;
2205                 }
2206         }
2207
2208         data.path = inject.input_name;
2209         if (!strcmp(inject.input_name, "-") || inject.output.is_pipe) {
2210                 inject.is_pipe = true;
2211                 /*
2212                  * Do not repipe header when input is a regular file
2213                  * since either it can rewrite the header at the end
2214                  * or write a new pipe header.
2215                  */
2216                 if (strcmp(inject.input_name, "-"))
2217                         repipe = false;
2218         }
2219
2220         inject.session = __perf_session__new(&data, repipe,
2221                                              output_fd(&inject),
2222                                              &inject.tool);
2223         if (IS_ERR(inject.session)) {
2224                 ret = PTR_ERR(inject.session);
2225                 goto out_close_output;
2226         }
2227
2228         if (zstd_init(&(inject.session->zstd_data), 0) < 0)
2229                 pr_warning("Decompression initialization failed.\n");
2230
2231         /* Save original section info before feature bits change */
2232         ret = save_section_info(&inject);
2233         if (ret)
2234                 goto out_delete;
2235
2236         if (!data.is_pipe && inject.output.is_pipe) {
2237                 ret = perf_header__write_pipe(perf_data__fd(&inject.output));
2238                 if (ret < 0) {
2239                         pr_err("Couldn't write a new pipe header.\n");
2240                         goto out_delete;
2241                 }
2242
2243                 ret = perf_event__synthesize_for_pipe(&inject.tool,
2244                                                       inject.session,
2245                                                       &inject.output,
2246                                                       perf_event__repipe);
2247                 if (ret < 0)
2248                         goto out_delete;
2249         }
2250
2251         if (inject.build_ids && !inject.build_id_all) {
2252                 /*
2253                  * to make sure the mmap records are ordered correctly
2254                  * and so that the correct especially due to jitted code
2255                  * mmaps. We cannot generate the buildid hit list and
2256                  * inject the jit mmaps at the same time for now.
2257                  */
2258                 inject.tool.ordered_events = true;
2259                 inject.tool.ordering_requires_timestamps = true;
2260         }
2261
2262         if (inject.sched_stat) {
2263                 inject.tool.ordered_events = true;
2264         }
2265
2266 #ifdef HAVE_JITDUMP
2267         if (inject.jit_mode) {
2268                 inject.tool.mmap2          = perf_event__jit_repipe_mmap2;
2269                 inject.tool.mmap           = perf_event__jit_repipe_mmap;
2270                 inject.tool.ordered_events = true;
2271                 inject.tool.ordering_requires_timestamps = true;
2272                 /*
2273                  * JIT MMAP injection injects all MMAP events in one go, so it
2274                  * does not obey finished_round semantics.
2275                  */
2276                 inject.tool.finished_round = perf_event__drop_oe;
2277         }
2278 #endif
2279         ret = symbol__init(&inject.session->header.env);
2280         if (ret < 0)
2281                 goto out_delete;
2282
2283         ret = __cmd_inject(&inject);
2284
2285         guest_session__exit(&inject.guest_session);
2286
2287 out_delete:
2288         zstd_fini(&(inject.session->zstd_data));
2289         perf_session__delete(inject.session);
2290 out_close_output:
2291         if (!inject.in_place_update)
2292                 perf_data__close(&inject.output);
2293         free(inject.itrace_synth_opts.vm_tm_corr_args);
2294         return ret;
2295 }