Merge tag 'linux-kselftest-kunit-5.15-rc1' of git://git.kernel.org/pub/scm/linux...
[linux-2.6-microblaze.git] / tools / perf / util / evlist.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
4  *
5  * Parts came from builtin-{top,stat,record}.c, see those files for further
6  * copyright notes.
7  */
8 #include <api/fs/fs.h>
9 #include <errno.h>
10 #include <inttypes.h>
11 #include <poll.h>
12 #include "cpumap.h"
13 #include "util/mmap.h"
14 #include "thread_map.h"
15 #include "target.h"
16 #include "evlist.h"
17 #include "evsel.h"
18 #include "debug.h"
19 #include "units.h"
20 #include "bpf_counter.h"
21 #include <internal/lib.h> // page_size
22 #include "affinity.h"
23 #include "../perf.h"
24 #include "asm/bug.h"
25 #include "bpf-event.h"
26 #include "util/string2.h"
27 #include "util/perf_api_probe.h"
28 #include "util/evsel_fprintf.h"
29 #include "util/evlist-hybrid.h"
30 #include <signal.h>
31 #include <unistd.h>
32 #include <sched.h>
33 #include <stdlib.h>
34
35 #include "parse-events.h"
36 #include <subcmd/parse-options.h>
37
38 #include <fcntl.h>
39 #include <sys/ioctl.h>
40 #include <sys/mman.h>
41 #include <sys/prctl.h>
42
43 #include <linux/bitops.h>
44 #include <linux/hash.h>
45 #include <linux/log2.h>
46 #include <linux/err.h>
47 #include <linux/string.h>
48 #include <linux/zalloc.h>
49 #include <perf/evlist.h>
50 #include <perf/evsel.h>
51 #include <perf/cpumap.h>
52 #include <perf/mmap.h>
53
54 #include <internal/xyarray.h>
55
56 #ifdef LACKS_SIGQUEUE_PROTOTYPE
57 int sigqueue(pid_t pid, int sig, const union sigval value);
58 #endif
59
60 #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
61 #define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y)
62
63 void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
64                   struct perf_thread_map *threads)
65 {
66         perf_evlist__init(&evlist->core);
67         perf_evlist__set_maps(&evlist->core, cpus, threads);
68         evlist->workload.pid = -1;
69         evlist->bkw_mmap_state = BKW_MMAP_NOTREADY;
70         evlist->ctl_fd.fd = -1;
71         evlist->ctl_fd.ack = -1;
72         evlist->ctl_fd.pos = -1;
73 }
74
75 struct evlist *evlist__new(void)
76 {
77         struct evlist *evlist = zalloc(sizeof(*evlist));
78
79         if (evlist != NULL)
80                 evlist__init(evlist, NULL, NULL);
81
82         return evlist;
83 }
84
85 struct evlist *evlist__new_default(void)
86 {
87         struct evlist *evlist = evlist__new();
88
89         if (evlist && evlist__add_default(evlist)) {
90                 evlist__delete(evlist);
91                 evlist = NULL;
92         }
93
94         return evlist;
95 }
96
97 struct evlist *evlist__new_dummy(void)
98 {
99         struct evlist *evlist = evlist__new();
100
101         if (evlist && evlist__add_dummy(evlist)) {
102                 evlist__delete(evlist);
103                 evlist = NULL;
104         }
105
106         return evlist;
107 }
108
109 /**
110  * evlist__set_id_pos - set the positions of event ids.
111  * @evlist: selected event list
112  *
113  * Events with compatible sample types all have the same id_pos
114  * and is_pos.  For convenience, put a copy on evlist.
115  */
116 void evlist__set_id_pos(struct evlist *evlist)
117 {
118         struct evsel *first = evlist__first(evlist);
119
120         evlist->id_pos = first->id_pos;
121         evlist->is_pos = first->is_pos;
122 }
123
124 static void evlist__update_id_pos(struct evlist *evlist)
125 {
126         struct evsel *evsel;
127
128         evlist__for_each_entry(evlist, evsel)
129                 evsel__calc_id_pos(evsel);
130
131         evlist__set_id_pos(evlist);
132 }
133
134 static void evlist__purge(struct evlist *evlist)
135 {
136         struct evsel *pos, *n;
137
138         evlist__for_each_entry_safe(evlist, n, pos) {
139                 list_del_init(&pos->core.node);
140                 pos->evlist = NULL;
141                 evsel__delete(pos);
142         }
143
144         evlist->core.nr_entries = 0;
145 }
146
147 void evlist__exit(struct evlist *evlist)
148 {
149         zfree(&evlist->mmap);
150         zfree(&evlist->overwrite_mmap);
151         perf_evlist__exit(&evlist->core);
152 }
153
154 void evlist__delete(struct evlist *evlist)
155 {
156         if (evlist == NULL)
157                 return;
158
159         evlist__munmap(evlist);
160         evlist__close(evlist);
161         evlist__purge(evlist);
162         evlist__exit(evlist);
163         free(evlist);
164 }
165
166 void evlist__add(struct evlist *evlist, struct evsel *entry)
167 {
168         perf_evlist__add(&evlist->core, &entry->core);
169         entry->evlist = evlist;
170         entry->tracking = !entry->core.idx;
171
172         if (evlist->core.nr_entries == 1)
173                 evlist__set_id_pos(evlist);
174 }
175
176 void evlist__remove(struct evlist *evlist, struct evsel *evsel)
177 {
178         evsel->evlist = NULL;
179         perf_evlist__remove(&evlist->core, &evsel->core);
180 }
181
182 void evlist__splice_list_tail(struct evlist *evlist, struct list_head *list)
183 {
184         while (!list_empty(list)) {
185                 struct evsel *evsel, *temp, *leader = NULL;
186
187                 __evlist__for_each_entry_safe(list, temp, evsel) {
188                         list_del_init(&evsel->core.node);
189                         evlist__add(evlist, evsel);
190                         leader = evsel;
191                         break;
192                 }
193
194                 __evlist__for_each_entry_safe(list, temp, evsel) {
195                         if (evsel__has_leader(evsel, leader)) {
196                                 list_del_init(&evsel->core.node);
197                                 evlist__add(evlist, evsel);
198                         }
199                 }
200         }
201 }
202
203 int __evlist__set_tracepoints_handlers(struct evlist *evlist,
204                                        const struct evsel_str_handler *assocs, size_t nr_assocs)
205 {
206         size_t i;
207         int err;
208
209         for (i = 0; i < nr_assocs; i++) {
210                 // Adding a handler for an event not in this evlist, just ignore it.
211                 struct evsel *evsel = evlist__find_tracepoint_by_name(evlist, assocs[i].name);
212                 if (evsel == NULL)
213                         continue;
214
215                 err = -EEXIST;
216                 if (evsel->handler != NULL)
217                         goto out;
218                 evsel->handler = assocs[i].handler;
219         }
220
221         err = 0;
222 out:
223         return err;
224 }
225
226 void evlist__set_leader(struct evlist *evlist)
227 {
228         perf_evlist__set_leader(&evlist->core);
229 }
230
231 int __evlist__add_default(struct evlist *evlist, bool precise)
232 {
233         struct evsel *evsel;
234
235         evsel = evsel__new_cycles(precise, PERF_TYPE_HARDWARE,
236                                   PERF_COUNT_HW_CPU_CYCLES);
237         if (evsel == NULL)
238                 return -ENOMEM;
239
240         evlist__add(evlist, evsel);
241         return 0;
242 }
243
244 int evlist__add_dummy(struct evlist *evlist)
245 {
246         struct perf_event_attr attr = {
247                 .type   = PERF_TYPE_SOFTWARE,
248                 .config = PERF_COUNT_SW_DUMMY,
249                 .size   = sizeof(attr), /* to capture ABI version */
250         };
251         struct evsel *evsel = evsel__new_idx(&attr, evlist->core.nr_entries);
252
253         if (evsel == NULL)
254                 return -ENOMEM;
255
256         evlist__add(evlist, evsel);
257         return 0;
258 }
259
260 static int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs)
261 {
262         struct evsel *evsel, *n;
263         LIST_HEAD(head);
264         size_t i;
265
266         for (i = 0; i < nr_attrs; i++) {
267                 evsel = evsel__new_idx(attrs + i, evlist->core.nr_entries + i);
268                 if (evsel == NULL)
269                         goto out_delete_partial_list;
270                 list_add_tail(&evsel->core.node, &head);
271         }
272
273         evlist__splice_list_tail(evlist, &head);
274
275         return 0;
276
277 out_delete_partial_list:
278         __evlist__for_each_entry_safe(&head, n, evsel)
279                 evsel__delete(evsel);
280         return -1;
281 }
282
283 int __evlist__add_default_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs)
284 {
285         size_t i;
286
287         for (i = 0; i < nr_attrs; i++)
288                 event_attr_init(attrs + i);
289
290         return evlist__add_attrs(evlist, attrs, nr_attrs);
291 }
292
293 __weak int arch_evlist__add_default_attrs(struct evlist *evlist __maybe_unused)
294 {
295         return 0;
296 }
297
298 struct evsel *evlist__find_tracepoint_by_id(struct evlist *evlist, int id)
299 {
300         struct evsel *evsel;
301
302         evlist__for_each_entry(evlist, evsel) {
303                 if (evsel->core.attr.type   == PERF_TYPE_TRACEPOINT &&
304                     (int)evsel->core.attr.config == id)
305                         return evsel;
306         }
307
308         return NULL;
309 }
310
311 struct evsel *evlist__find_tracepoint_by_name(struct evlist *evlist, const char *name)
312 {
313         struct evsel *evsel;
314
315         evlist__for_each_entry(evlist, evsel) {
316                 if ((evsel->core.attr.type == PERF_TYPE_TRACEPOINT) &&
317                     (strcmp(evsel->name, name) == 0))
318                         return evsel;
319         }
320
321         return NULL;
322 }
323
324 int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, void *handler)
325 {
326         struct evsel *evsel = evsel__newtp(sys, name);
327
328         if (IS_ERR(evsel))
329                 return -1;
330
331         evsel->handler = handler;
332         evlist__add(evlist, evsel);
333         return 0;
334 }
335
336 static int evlist__nr_threads(struct evlist *evlist, struct evsel *evsel)
337 {
338         if (evsel->core.system_wide)
339                 return 1;
340         else
341                 return perf_thread_map__nr(evlist->core.threads);
342 }
343
344 void evlist__cpu_iter_start(struct evlist *evlist)
345 {
346         struct evsel *pos;
347
348         /*
349          * Reset the per evsel cpu_iter. This is needed because
350          * each evsel's cpumap may have a different index space,
351          * and some operations need the index to modify
352          * the FD xyarray (e.g. open, close)
353          */
354         evlist__for_each_entry(evlist, pos)
355                 pos->cpu_iter = 0;
356 }
357
358 bool evsel__cpu_iter_skip_no_inc(struct evsel *ev, int cpu)
359 {
360         if (ev->cpu_iter >= ev->core.cpus->nr)
361                 return true;
362         if (cpu >= 0 && ev->core.cpus->map[ev->cpu_iter] != cpu)
363                 return true;
364         return false;
365 }
366
367 bool evsel__cpu_iter_skip(struct evsel *ev, int cpu)
368 {
369         if (!evsel__cpu_iter_skip_no_inc(ev, cpu)) {
370                 ev->cpu_iter++;
371                 return false;
372         }
373         return true;
374 }
375
376 static int evsel__strcmp(struct evsel *pos, char *evsel_name)
377 {
378         if (!evsel_name)
379                 return 0;
380         if (evsel__is_dummy_event(pos))
381                 return 1;
382         return strcmp(pos->name, evsel_name);
383 }
384
385 static int evlist__is_enabled(struct evlist *evlist)
386 {
387         struct evsel *pos;
388
389         evlist__for_each_entry(evlist, pos) {
390                 if (!evsel__is_group_leader(pos) || !pos->core.fd)
391                         continue;
392                 /* If at least one event is enabled, evlist is enabled. */
393                 if (!pos->disabled)
394                         return true;
395         }
396         return false;
397 }
398
399 static void __evlist__disable(struct evlist *evlist, char *evsel_name)
400 {
401         struct evsel *pos;
402         struct affinity affinity;
403         int cpu, i, imm = 0;
404         bool has_imm = false;
405
406         if (affinity__setup(&affinity) < 0)
407                 return;
408
409         /* Disable 'immediate' events last */
410         for (imm = 0; imm <= 1; imm++) {
411                 evlist__for_each_cpu(evlist, i, cpu) {
412                         affinity__set(&affinity, cpu);
413
414                         evlist__for_each_entry(evlist, pos) {
415                                 if (evsel__strcmp(pos, evsel_name))
416                                         continue;
417                                 if (evsel__cpu_iter_skip(pos, cpu))
418                                         continue;
419                                 if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd)
420                                         continue;
421                                 if (pos->immediate)
422                                         has_imm = true;
423                                 if (pos->immediate != imm)
424                                         continue;
425                                 evsel__disable_cpu(pos, pos->cpu_iter - 1);
426                         }
427                 }
428                 if (!has_imm)
429                         break;
430         }
431
432         affinity__cleanup(&affinity);
433         evlist__for_each_entry(evlist, pos) {
434                 if (evsel__strcmp(pos, evsel_name))
435                         continue;
436                 if (!evsel__is_group_leader(pos) || !pos->core.fd)
437                         continue;
438                 pos->disabled = true;
439         }
440
441         /*
442          * If we disabled only single event, we need to check
443          * the enabled state of the evlist manually.
444          */
445         if (evsel_name)
446                 evlist->enabled = evlist__is_enabled(evlist);
447         else
448                 evlist->enabled = false;
449 }
450
451 void evlist__disable(struct evlist *evlist)
452 {
453         __evlist__disable(evlist, NULL);
454 }
455
456 void evlist__disable_evsel(struct evlist *evlist, char *evsel_name)
457 {
458         __evlist__disable(evlist, evsel_name);
459 }
460
461 static void __evlist__enable(struct evlist *evlist, char *evsel_name)
462 {
463         struct evsel *pos;
464         struct affinity affinity;
465         int cpu, i;
466
467         if (affinity__setup(&affinity) < 0)
468                 return;
469
470         evlist__for_each_cpu(evlist, i, cpu) {
471                 affinity__set(&affinity, cpu);
472
473                 evlist__for_each_entry(evlist, pos) {
474                         if (evsel__strcmp(pos, evsel_name))
475                                 continue;
476                         if (evsel__cpu_iter_skip(pos, cpu))
477                                 continue;
478                         if (!evsel__is_group_leader(pos) || !pos->core.fd)
479                                 continue;
480                         evsel__enable_cpu(pos, pos->cpu_iter - 1);
481                 }
482         }
483         affinity__cleanup(&affinity);
484         evlist__for_each_entry(evlist, pos) {
485                 if (evsel__strcmp(pos, evsel_name))
486                         continue;
487                 if (!evsel__is_group_leader(pos) || !pos->core.fd)
488                         continue;
489                 pos->disabled = false;
490         }
491
492         /*
493          * Even single event sets the 'enabled' for evlist,
494          * so the toggle can work properly and toggle to
495          * 'disabled' state.
496          */
497         evlist->enabled = true;
498 }
499
500 void evlist__enable(struct evlist *evlist)
501 {
502         __evlist__enable(evlist, NULL);
503 }
504
505 void evlist__enable_evsel(struct evlist *evlist, char *evsel_name)
506 {
507         __evlist__enable(evlist, evsel_name);
508 }
509
510 void evlist__toggle_enable(struct evlist *evlist)
511 {
512         (evlist->enabled ? evlist__disable : evlist__enable)(evlist);
513 }
514
515 static int evlist__enable_event_cpu(struct evlist *evlist, struct evsel *evsel, int cpu)
516 {
517         int thread;
518         int nr_threads = evlist__nr_threads(evlist, evsel);
519
520         if (!evsel->core.fd)
521                 return -EINVAL;
522
523         for (thread = 0; thread < nr_threads; thread++) {
524                 int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
525                 if (err)
526                         return err;
527         }
528         return 0;
529 }
530
531 static int evlist__enable_event_thread(struct evlist *evlist, struct evsel *evsel, int thread)
532 {
533         int cpu;
534         int nr_cpus = perf_cpu_map__nr(evlist->core.cpus);
535
536         if (!evsel->core.fd)
537                 return -EINVAL;
538
539         for (cpu = 0; cpu < nr_cpus; cpu++) {
540                 int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
541                 if (err)
542                         return err;
543         }
544         return 0;
545 }
546
547 int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx)
548 {
549         bool per_cpu_mmaps = !perf_cpu_map__empty(evlist->core.cpus);
550
551         if (per_cpu_mmaps)
552                 return evlist__enable_event_cpu(evlist, evsel, idx);
553
554         return evlist__enable_event_thread(evlist, evsel, idx);
555 }
556
557 int evlist__add_pollfd(struct evlist *evlist, int fd)
558 {
559         return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN, fdarray_flag__default);
560 }
561
562 int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask)
563 {
564         return perf_evlist__filter_pollfd(&evlist->core, revents_and_mask);
565 }
566
567 #ifdef HAVE_EVENTFD_SUPPORT
568 int evlist__add_wakeup_eventfd(struct evlist *evlist, int fd)
569 {
570         return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN,
571                                        fdarray_flag__nonfilterable);
572 }
573 #endif
574
575 int evlist__poll(struct evlist *evlist, int timeout)
576 {
577         return perf_evlist__poll(&evlist->core, timeout);
578 }
579
580 struct perf_sample_id *evlist__id2sid(struct evlist *evlist, u64 id)
581 {
582         struct hlist_head *head;
583         struct perf_sample_id *sid;
584         int hash;
585
586         hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
587         head = &evlist->core.heads[hash];
588
589         hlist_for_each_entry(sid, head, node)
590                 if (sid->id == id)
591                         return sid;
592
593         return NULL;
594 }
595
596 struct evsel *evlist__id2evsel(struct evlist *evlist, u64 id)
597 {
598         struct perf_sample_id *sid;
599
600         if (evlist->core.nr_entries == 1 || !id)
601                 return evlist__first(evlist);
602
603         sid = evlist__id2sid(evlist, id);
604         if (sid)
605                 return container_of(sid->evsel, struct evsel, core);
606
607         if (!evlist__sample_id_all(evlist))
608                 return evlist__first(evlist);
609
610         return NULL;
611 }
612
613 struct evsel *evlist__id2evsel_strict(struct evlist *evlist, u64 id)
614 {
615         struct perf_sample_id *sid;
616
617         if (!id)
618                 return NULL;
619
620         sid = evlist__id2sid(evlist, id);
621         if (sid)
622                 return container_of(sid->evsel, struct evsel, core);
623
624         return NULL;
625 }
626
627 static int evlist__event2id(struct evlist *evlist, union perf_event *event, u64 *id)
628 {
629         const __u64 *array = event->sample.array;
630         ssize_t n;
631
632         n = (event->header.size - sizeof(event->header)) >> 3;
633
634         if (event->header.type == PERF_RECORD_SAMPLE) {
635                 if (evlist->id_pos >= n)
636                         return -1;
637                 *id = array[evlist->id_pos];
638         } else {
639                 if (evlist->is_pos > n)
640                         return -1;
641                 n -= evlist->is_pos;
642                 *id = array[n];
643         }
644         return 0;
645 }
646
647 struct evsel *evlist__event2evsel(struct evlist *evlist, union perf_event *event)
648 {
649         struct evsel *first = evlist__first(evlist);
650         struct hlist_head *head;
651         struct perf_sample_id *sid;
652         int hash;
653         u64 id;
654
655         if (evlist->core.nr_entries == 1)
656                 return first;
657
658         if (!first->core.attr.sample_id_all &&
659             event->header.type != PERF_RECORD_SAMPLE)
660                 return first;
661
662         if (evlist__event2id(evlist, event, &id))
663                 return NULL;
664
665         /* Synthesized events have an id of zero */
666         if (!id)
667                 return first;
668
669         hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
670         head = &evlist->core.heads[hash];
671
672         hlist_for_each_entry(sid, head, node) {
673                 if (sid->id == id)
674                         return container_of(sid->evsel, struct evsel, core);
675         }
676         return NULL;
677 }
678
679 static int evlist__set_paused(struct evlist *evlist, bool value)
680 {
681         int i;
682
683         if (!evlist->overwrite_mmap)
684                 return 0;
685
686         for (i = 0; i < evlist->core.nr_mmaps; i++) {
687                 int fd = evlist->overwrite_mmap[i].core.fd;
688                 int err;
689
690                 if (fd < 0)
691                         continue;
692                 err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0);
693                 if (err)
694                         return err;
695         }
696         return 0;
697 }
698
699 static int evlist__pause(struct evlist *evlist)
700 {
701         return evlist__set_paused(evlist, true);
702 }
703
704 static int evlist__resume(struct evlist *evlist)
705 {
706         return evlist__set_paused(evlist, false);
707 }
708
709 static void evlist__munmap_nofree(struct evlist *evlist)
710 {
711         int i;
712
713         if (evlist->mmap)
714                 for (i = 0; i < evlist->core.nr_mmaps; i++)
715                         perf_mmap__munmap(&evlist->mmap[i].core);
716
717         if (evlist->overwrite_mmap)
718                 for (i = 0; i < evlist->core.nr_mmaps; i++)
719                         perf_mmap__munmap(&evlist->overwrite_mmap[i].core);
720 }
721
722 void evlist__munmap(struct evlist *evlist)
723 {
724         evlist__munmap_nofree(evlist);
725         zfree(&evlist->mmap);
726         zfree(&evlist->overwrite_mmap);
727 }
728
729 static void perf_mmap__unmap_cb(struct perf_mmap *map)
730 {
731         struct mmap *m = container_of(map, struct mmap, core);
732
733         mmap__munmap(m);
734 }
735
736 static struct mmap *evlist__alloc_mmap(struct evlist *evlist,
737                                        bool overwrite)
738 {
739         int i;
740         struct mmap *map;
741
742         map = zalloc(evlist->core.nr_mmaps * sizeof(struct mmap));
743         if (!map)
744                 return NULL;
745
746         for (i = 0; i < evlist->core.nr_mmaps; i++) {
747                 struct perf_mmap *prev = i ? &map[i - 1].core : NULL;
748
749                 /*
750                  * When the perf_mmap() call is made we grab one refcount, plus
751                  * one extra to let perf_mmap__consume() get the last
752                  * events after all real references (perf_mmap__get()) are
753                  * dropped.
754                  *
755                  * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and
756                  * thus does perf_mmap__get() on it.
757                  */
758                 perf_mmap__init(&map[i].core, prev, overwrite, perf_mmap__unmap_cb);
759         }
760
761         return map;
762 }
763
764 static void
765 perf_evlist__mmap_cb_idx(struct perf_evlist *_evlist,
766                          struct perf_mmap_param *_mp,
767                          int idx, bool per_cpu)
768 {
769         struct evlist *evlist = container_of(_evlist, struct evlist, core);
770         struct mmap_params *mp = container_of(_mp, struct mmap_params, core);
771
772         auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, idx, per_cpu);
773 }
774
775 static struct perf_mmap*
776 perf_evlist__mmap_cb_get(struct perf_evlist *_evlist, bool overwrite, int idx)
777 {
778         struct evlist *evlist = container_of(_evlist, struct evlist, core);
779         struct mmap *maps;
780
781         maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
782
783         if (!maps) {
784                 maps = evlist__alloc_mmap(evlist, overwrite);
785                 if (!maps)
786                         return NULL;
787
788                 if (overwrite) {
789                         evlist->overwrite_mmap = maps;
790                         if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY)
791                                 evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
792                 } else {
793                         evlist->mmap = maps;
794                 }
795         }
796
797         return &maps[idx].core;
798 }
799
800 static int
801 perf_evlist__mmap_cb_mmap(struct perf_mmap *_map, struct perf_mmap_param *_mp,
802                           int output, int cpu)
803 {
804         struct mmap *map = container_of(_map, struct mmap, core);
805         struct mmap_params *mp = container_of(_mp, struct mmap_params, core);
806
807         return mmap__mmap(map, mp, output, cpu);
808 }
809
810 unsigned long perf_event_mlock_kb_in_pages(void)
811 {
812         unsigned long pages;
813         int max;
814
815         if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) {
816                 /*
817                  * Pick a once upon a time good value, i.e. things look
818                  * strange since we can't read a sysctl value, but lets not
819                  * die yet...
820                  */
821                 max = 512;
822         } else {
823                 max -= (page_size / 1024);
824         }
825
826         pages = (max * 1024) / page_size;
827         if (!is_power_of_2(pages))
828                 pages = rounddown_pow_of_two(pages);
829
830         return pages;
831 }
832
833 size_t evlist__mmap_size(unsigned long pages)
834 {
835         if (pages == UINT_MAX)
836                 pages = perf_event_mlock_kb_in_pages();
837         else if (!is_power_of_2(pages))
838                 return 0;
839
840         return (pages + 1) * page_size;
841 }
842
843 static long parse_pages_arg(const char *str, unsigned long min,
844                             unsigned long max)
845 {
846         unsigned long pages, val;
847         static struct parse_tag tags[] = {
848                 { .tag  = 'B', .mult = 1       },
849                 { .tag  = 'K', .mult = 1 << 10 },
850                 { .tag  = 'M', .mult = 1 << 20 },
851                 { .tag  = 'G', .mult = 1 << 30 },
852                 { .tag  = 0 },
853         };
854
855         if (str == NULL)
856                 return -EINVAL;
857
858         val = parse_tag_value(str, tags);
859         if (val != (unsigned long) -1) {
860                 /* we got file size value */
861                 pages = PERF_ALIGN(val, page_size) / page_size;
862         } else {
863                 /* we got pages count value */
864                 char *eptr;
865                 pages = strtoul(str, &eptr, 10);
866                 if (*eptr != '\0')
867                         return -EINVAL;
868         }
869
870         if (pages == 0 && min == 0) {
871                 /* leave number of pages at 0 */
872         } else if (!is_power_of_2(pages)) {
873                 char buf[100];
874
875                 /* round pages up to next power of 2 */
876                 pages = roundup_pow_of_two(pages);
877                 if (!pages)
878                         return -EINVAL;
879
880                 unit_number__scnprintf(buf, sizeof(buf), pages * page_size);
881                 pr_info("rounding mmap pages size to %s (%lu pages)\n",
882                         buf, pages);
883         }
884
885         if (pages > max)
886                 return -EINVAL;
887
888         return pages;
889 }
890
891 int __evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str)
892 {
893         unsigned long max = UINT_MAX;
894         long pages;
895
896         if (max > SIZE_MAX / page_size)
897                 max = SIZE_MAX / page_size;
898
899         pages = parse_pages_arg(str, 1, max);
900         if (pages < 0) {
901                 pr_err("Invalid argument for --mmap_pages/-m\n");
902                 return -1;
903         }
904
905         *mmap_pages = pages;
906         return 0;
907 }
908
909 int evlist__parse_mmap_pages(const struct option *opt, const char *str, int unset __maybe_unused)
910 {
911         return __evlist__parse_mmap_pages(opt->value, str);
912 }
913
914 /**
915  * evlist__mmap_ex - Create mmaps to receive events.
916  * @evlist: list of events
917  * @pages: map length in pages
918  * @overwrite: overwrite older events?
919  * @auxtrace_pages - auxtrace map length in pages
920  * @auxtrace_overwrite - overwrite older auxtrace data?
921  *
922  * If @overwrite is %false the user needs to signal event consumption using
923  * perf_mmap__write_tail().  Using evlist__mmap_read() does this
924  * automatically.
925  *
926  * Similarly, if @auxtrace_overwrite is %false the user needs to signal data
927  * consumption using auxtrace_mmap__write_tail().
928  *
929  * Return: %0 on success, negative error code otherwise.
930  */
931 int evlist__mmap_ex(struct evlist *evlist, unsigned int pages,
932                          unsigned int auxtrace_pages,
933                          bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush,
934                          int comp_level)
935 {
936         /*
937          * Delay setting mp.prot: set it before calling perf_mmap__mmap.
938          * Its value is decided by evsel's write_backward.
939          * So &mp should not be passed through const pointer.
940          */
941         struct mmap_params mp = {
942                 .nr_cblocks     = nr_cblocks,
943                 .affinity       = affinity,
944                 .flush          = flush,
945                 .comp_level     = comp_level
946         };
947         struct perf_evlist_mmap_ops ops = {
948                 .idx  = perf_evlist__mmap_cb_idx,
949                 .get  = perf_evlist__mmap_cb_get,
950                 .mmap = perf_evlist__mmap_cb_mmap,
951         };
952
953         evlist->core.mmap_len = evlist__mmap_size(pages);
954         pr_debug("mmap size %zuB\n", evlist->core.mmap_len);
955
956         auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->core.mmap_len,
957                                    auxtrace_pages, auxtrace_overwrite);
958
959         return perf_evlist__mmap_ops(&evlist->core, &ops, &mp.core);
960 }
961
962 int evlist__mmap(struct evlist *evlist, unsigned int pages)
963 {
964         return evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0);
965 }
966
967 int evlist__create_maps(struct evlist *evlist, struct target *target)
968 {
969         bool all_threads = (target->per_thread && target->system_wide);
970         struct perf_cpu_map *cpus;
971         struct perf_thread_map *threads;
972
973         /*
974          * If specify '-a' and '--per-thread' to perf record, perf record
975          * will override '--per-thread'. target->per_thread = false and
976          * target->system_wide = true.
977          *
978          * If specify '--per-thread' only to perf record,
979          * target->per_thread = true and target->system_wide = false.
980          *
981          * So target->per_thread && target->system_wide is false.
982          * For perf record, thread_map__new_str doesn't call
983          * thread_map__new_all_cpus. That will keep perf record's
984          * current behavior.
985          *
986          * For perf stat, it allows the case that target->per_thread and
987          * target->system_wide are all true. It means to collect system-wide
988          * per-thread data. thread_map__new_str will call
989          * thread_map__new_all_cpus to enumerate all threads.
990          */
991         threads = thread_map__new_str(target->pid, target->tid, target->uid,
992                                       all_threads);
993
994         if (!threads)
995                 return -1;
996
997         if (target__uses_dummy_map(target))
998                 cpus = perf_cpu_map__dummy_new();
999         else
1000                 cpus = perf_cpu_map__new(target->cpu_list);
1001
1002         if (!cpus)
1003                 goto out_delete_threads;
1004
1005         evlist->core.has_user_cpus = !!target->cpu_list;
1006
1007         perf_evlist__set_maps(&evlist->core, cpus, threads);
1008
1009         /* as evlist now has references, put count here */
1010         perf_cpu_map__put(cpus);
1011         perf_thread_map__put(threads);
1012
1013         return 0;
1014
1015 out_delete_threads:
1016         perf_thread_map__put(threads);
1017         return -1;
1018 }
1019
1020 int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel)
1021 {
1022         struct evsel *evsel;
1023         int err = 0;
1024
1025         evlist__for_each_entry(evlist, evsel) {
1026                 if (evsel->filter == NULL)
1027                         continue;
1028
1029                 /*
1030                  * filters only work for tracepoint event, which doesn't have cpu limit.
1031                  * So evlist and evsel should always be same.
1032                  */
1033                 err = perf_evsel__apply_filter(&evsel->core, evsel->filter);
1034                 if (err) {
1035                         *err_evsel = evsel;
1036                         break;
1037                 }
1038         }
1039
1040         return err;
1041 }
1042
1043 int evlist__set_tp_filter(struct evlist *evlist, const char *filter)
1044 {
1045         struct evsel *evsel;
1046         int err = 0;
1047
1048         if (filter == NULL)
1049                 return -1;
1050
1051         evlist__for_each_entry(evlist, evsel) {
1052                 if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
1053                         continue;
1054
1055                 err = evsel__set_filter(evsel, filter);
1056                 if (err)
1057                         break;
1058         }
1059
1060         return err;
1061 }
1062
1063 int evlist__append_tp_filter(struct evlist *evlist, const char *filter)
1064 {
1065         struct evsel *evsel;
1066         int err = 0;
1067
1068         if (filter == NULL)
1069                 return -1;
1070
1071         evlist__for_each_entry(evlist, evsel) {
1072                 if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
1073                         continue;
1074
1075                 err = evsel__append_tp_filter(evsel, filter);
1076                 if (err)
1077                         break;
1078         }
1079
1080         return err;
1081 }
1082
1083 char *asprintf__tp_filter_pids(size_t npids, pid_t *pids)
1084 {
1085         char *filter;
1086         size_t i;
1087
1088         for (i = 0; i < npids; ++i) {
1089                 if (i == 0) {
1090                         if (asprintf(&filter, "common_pid != %d", pids[i]) < 0)
1091                                 return NULL;
1092                 } else {
1093                         char *tmp;
1094
1095                         if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0)
1096                                 goto out_free;
1097
1098                         free(filter);
1099                         filter = tmp;
1100                 }
1101         }
1102
1103         return filter;
1104 out_free:
1105         free(filter);
1106         return NULL;
1107 }
1108
1109 int evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids)
1110 {
1111         char *filter = asprintf__tp_filter_pids(npids, pids);
1112         int ret = evlist__set_tp_filter(evlist, filter);
1113
1114         free(filter);
1115         return ret;
1116 }
1117
1118 int evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid)
1119 {
1120         return evlist__set_tp_filter_pids(evlist, 1, &pid);
1121 }
1122
1123 int evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids)
1124 {
1125         char *filter = asprintf__tp_filter_pids(npids, pids);
1126         int ret = evlist__append_tp_filter(evlist, filter);
1127
1128         free(filter);
1129         return ret;
1130 }
1131
1132 int evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid)
1133 {
1134         return evlist__append_tp_filter_pids(evlist, 1, &pid);
1135 }
1136
1137 bool evlist__valid_sample_type(struct evlist *evlist)
1138 {
1139         struct evsel *pos;
1140
1141         if (evlist->core.nr_entries == 1)
1142                 return true;
1143
1144         if (evlist->id_pos < 0 || evlist->is_pos < 0)
1145                 return false;
1146
1147         evlist__for_each_entry(evlist, pos) {
1148                 if (pos->id_pos != evlist->id_pos ||
1149                     pos->is_pos != evlist->is_pos)
1150                         return false;
1151         }
1152
1153         return true;
1154 }
1155
1156 u64 __evlist__combined_sample_type(struct evlist *evlist)
1157 {
1158         struct evsel *evsel;
1159
1160         if (evlist->combined_sample_type)
1161                 return evlist->combined_sample_type;
1162
1163         evlist__for_each_entry(evlist, evsel)
1164                 evlist->combined_sample_type |= evsel->core.attr.sample_type;
1165
1166         return evlist->combined_sample_type;
1167 }
1168
1169 u64 evlist__combined_sample_type(struct evlist *evlist)
1170 {
1171         evlist->combined_sample_type = 0;
1172         return __evlist__combined_sample_type(evlist);
1173 }
1174
1175 u64 evlist__combined_branch_type(struct evlist *evlist)
1176 {
1177         struct evsel *evsel;
1178         u64 branch_type = 0;
1179
1180         evlist__for_each_entry(evlist, evsel)
1181                 branch_type |= evsel->core.attr.branch_sample_type;
1182         return branch_type;
1183 }
1184
1185 bool evlist__valid_read_format(struct evlist *evlist)
1186 {
1187         struct evsel *first = evlist__first(evlist), *pos = first;
1188         u64 read_format = first->core.attr.read_format;
1189         u64 sample_type = first->core.attr.sample_type;
1190
1191         evlist__for_each_entry(evlist, pos) {
1192                 if (read_format != pos->core.attr.read_format) {
1193                         pr_debug("Read format differs %#" PRIx64 " vs %#" PRIx64 "\n",
1194                                  read_format, (u64)pos->core.attr.read_format);
1195                 }
1196         }
1197
1198         /* PERF_SAMPLE_READ implies PERF_FORMAT_ID. */
1199         if ((sample_type & PERF_SAMPLE_READ) &&
1200             !(read_format & PERF_FORMAT_ID)) {
1201                 return false;
1202         }
1203
1204         return true;
1205 }
1206
1207 u16 evlist__id_hdr_size(struct evlist *evlist)
1208 {
1209         struct evsel *first = evlist__first(evlist);
1210         struct perf_sample *data;
1211         u64 sample_type;
1212         u16 size = 0;
1213
1214         if (!first->core.attr.sample_id_all)
1215                 goto out;
1216
1217         sample_type = first->core.attr.sample_type;
1218
1219         if (sample_type & PERF_SAMPLE_TID)
1220                 size += sizeof(data->tid) * 2;
1221
1222        if (sample_type & PERF_SAMPLE_TIME)
1223                 size += sizeof(data->time);
1224
1225         if (sample_type & PERF_SAMPLE_ID)
1226                 size += sizeof(data->id);
1227
1228         if (sample_type & PERF_SAMPLE_STREAM_ID)
1229                 size += sizeof(data->stream_id);
1230
1231         if (sample_type & PERF_SAMPLE_CPU)
1232                 size += sizeof(data->cpu) * 2;
1233
1234         if (sample_type & PERF_SAMPLE_IDENTIFIER)
1235                 size += sizeof(data->id);
1236 out:
1237         return size;
1238 }
1239
1240 bool evlist__valid_sample_id_all(struct evlist *evlist)
1241 {
1242         struct evsel *first = evlist__first(evlist), *pos = first;
1243
1244         evlist__for_each_entry_continue(evlist, pos) {
1245                 if (first->core.attr.sample_id_all != pos->core.attr.sample_id_all)
1246                         return false;
1247         }
1248
1249         return true;
1250 }
1251
1252 bool evlist__sample_id_all(struct evlist *evlist)
1253 {
1254         struct evsel *first = evlist__first(evlist);
1255         return first->core.attr.sample_id_all;
1256 }
1257
1258 void evlist__set_selected(struct evlist *evlist, struct evsel *evsel)
1259 {
1260         evlist->selected = evsel;
1261 }
1262
1263 void evlist__close(struct evlist *evlist)
1264 {
1265         struct evsel *evsel;
1266         struct affinity affinity;
1267         int cpu, i;
1268
1269         /*
1270          * With perf record core.cpus is usually NULL.
1271          * Use the old method to handle this for now.
1272          */
1273         if (!evlist->core.cpus) {
1274                 evlist__for_each_entry_reverse(evlist, evsel)
1275                         evsel__close(evsel);
1276                 return;
1277         }
1278
1279         if (affinity__setup(&affinity) < 0)
1280                 return;
1281         evlist__for_each_cpu(evlist, i, cpu) {
1282                 affinity__set(&affinity, cpu);
1283
1284                 evlist__for_each_entry_reverse(evlist, evsel) {
1285                         if (evsel__cpu_iter_skip(evsel, cpu))
1286                             continue;
1287                         perf_evsel__close_cpu(&evsel->core, evsel->cpu_iter - 1);
1288                 }
1289         }
1290         affinity__cleanup(&affinity);
1291         evlist__for_each_entry_reverse(evlist, evsel) {
1292                 perf_evsel__free_fd(&evsel->core);
1293                 perf_evsel__free_id(&evsel->core);
1294         }
1295         perf_evlist__reset_id_hash(&evlist->core);
1296 }
1297
1298 static int evlist__create_syswide_maps(struct evlist *evlist)
1299 {
1300         struct perf_cpu_map *cpus;
1301         struct perf_thread_map *threads;
1302         int err = -ENOMEM;
1303
1304         /*
1305          * Try reading /sys/devices/system/cpu/online to get
1306          * an all cpus map.
1307          *
1308          * FIXME: -ENOMEM is the best we can do here, the cpu_map
1309          * code needs an overhaul to properly forward the
1310          * error, and we may not want to do that fallback to a
1311          * default cpu identity map :-\
1312          */
1313         cpus = perf_cpu_map__new(NULL);
1314         if (!cpus)
1315                 goto out;
1316
1317         threads = perf_thread_map__new_dummy();
1318         if (!threads)
1319                 goto out_put;
1320
1321         perf_evlist__set_maps(&evlist->core, cpus, threads);
1322
1323         perf_thread_map__put(threads);
1324 out_put:
1325         perf_cpu_map__put(cpus);
1326 out:
1327         return err;
1328 }
1329
1330 int evlist__open(struct evlist *evlist)
1331 {
1332         struct evsel *evsel;
1333         int err;
1334
1335         /*
1336          * Default: one fd per CPU, all threads, aka systemwide
1337          * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL
1338          */
1339         if (evlist->core.threads == NULL && evlist->core.cpus == NULL) {
1340                 err = evlist__create_syswide_maps(evlist);
1341                 if (err < 0)
1342                         goto out_err;
1343         }
1344
1345         evlist__update_id_pos(evlist);
1346
1347         evlist__for_each_entry(evlist, evsel) {
1348                 err = evsel__open(evsel, evsel->core.cpus, evsel->core.threads);
1349                 if (err < 0)
1350                         goto out_err;
1351         }
1352
1353         return 0;
1354 out_err:
1355         evlist__close(evlist);
1356         errno = -err;
1357         return err;
1358 }
1359
1360 int evlist__prepare_workload(struct evlist *evlist, struct target *target, const char *argv[],
1361                              bool pipe_output, void (*exec_error)(int signo, siginfo_t *info, void *ucontext))
1362 {
1363         int child_ready_pipe[2], go_pipe[2];
1364         char bf;
1365
1366         if (pipe(child_ready_pipe) < 0) {
1367                 perror("failed to create 'ready' pipe");
1368                 return -1;
1369         }
1370
1371         if (pipe(go_pipe) < 0) {
1372                 perror("failed to create 'go' pipe");
1373                 goto out_close_ready_pipe;
1374         }
1375
1376         evlist->workload.pid = fork();
1377         if (evlist->workload.pid < 0) {
1378                 perror("failed to fork");
1379                 goto out_close_pipes;
1380         }
1381
1382         if (!evlist->workload.pid) {
1383                 int ret;
1384
1385                 if (pipe_output)
1386                         dup2(2, 1);
1387
1388                 signal(SIGTERM, SIG_DFL);
1389
1390                 close(child_ready_pipe[0]);
1391                 close(go_pipe[1]);
1392                 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
1393
1394                 /*
1395                  * Change the name of this process not to confuse --exclude-perf users
1396                  * that sees 'perf' in the window up to the execvp() and thinks that
1397                  * perf samples are not being excluded.
1398                  */
1399                 prctl(PR_SET_NAME, "perf-exec");
1400
1401                 /*
1402                  * Tell the parent we're ready to go
1403                  */
1404                 close(child_ready_pipe[1]);
1405
1406                 /*
1407                  * Wait until the parent tells us to go.
1408                  */
1409                 ret = read(go_pipe[0], &bf, 1);
1410                 /*
1411                  * The parent will ask for the execvp() to be performed by
1412                  * writing exactly one byte, in workload.cork_fd, usually via
1413                  * evlist__start_workload().
1414                  *
1415                  * For cancelling the workload without actually running it,
1416                  * the parent will just close workload.cork_fd, without writing
1417                  * anything, i.e. read will return zero and we just exit()
1418                  * here.
1419                  */
1420                 if (ret != 1) {
1421                         if (ret == -1)
1422                                 perror("unable to read pipe");
1423                         exit(ret);
1424                 }
1425
1426                 execvp(argv[0], (char **)argv);
1427
1428                 if (exec_error) {
1429                         union sigval val;
1430
1431                         val.sival_int = errno;
1432                         if (sigqueue(getppid(), SIGUSR1, val))
1433                                 perror(argv[0]);
1434                 } else
1435                         perror(argv[0]);
1436                 exit(-1);
1437         }
1438
1439         if (exec_error) {
1440                 struct sigaction act = {
1441                         .sa_flags     = SA_SIGINFO,
1442                         .sa_sigaction = exec_error,
1443                 };
1444                 sigaction(SIGUSR1, &act, NULL);
1445         }
1446
1447         if (target__none(target)) {
1448                 if (evlist->core.threads == NULL) {
1449                         fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n",
1450                                 __func__, __LINE__);
1451                         goto out_close_pipes;
1452                 }
1453                 perf_thread_map__set_pid(evlist->core.threads, 0, evlist->workload.pid);
1454         }
1455
1456         close(child_ready_pipe[1]);
1457         close(go_pipe[0]);
1458         /*
1459          * wait for child to settle
1460          */
1461         if (read(child_ready_pipe[0], &bf, 1) == -1) {
1462                 perror("unable to read pipe");
1463                 goto out_close_pipes;
1464         }
1465
1466         fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC);
1467         evlist->workload.cork_fd = go_pipe[1];
1468         close(child_ready_pipe[0]);
1469         return 0;
1470
1471 out_close_pipes:
1472         close(go_pipe[0]);
1473         close(go_pipe[1]);
1474 out_close_ready_pipe:
1475         close(child_ready_pipe[0]);
1476         close(child_ready_pipe[1]);
1477         return -1;
1478 }
1479
1480 int evlist__start_workload(struct evlist *evlist)
1481 {
1482         if (evlist->workload.cork_fd > 0) {
1483                 char bf = 0;
1484                 int ret;
1485                 /*
1486                  * Remove the cork, let it rip!
1487                  */
1488                 ret = write(evlist->workload.cork_fd, &bf, 1);
1489                 if (ret < 0)
1490                         perror("unable to write to pipe");
1491
1492                 close(evlist->workload.cork_fd);
1493                 return ret;
1494         }
1495
1496         return 0;
1497 }
1498
1499 int evlist__parse_sample(struct evlist *evlist, union perf_event *event, struct perf_sample *sample)
1500 {
1501         struct evsel *evsel = evlist__event2evsel(evlist, event);
1502
1503         if (!evsel)
1504                 return -EFAULT;
1505         return evsel__parse_sample(evsel, event, sample);
1506 }
1507
1508 int evlist__parse_sample_timestamp(struct evlist *evlist, union perf_event *event, u64 *timestamp)
1509 {
1510         struct evsel *evsel = evlist__event2evsel(evlist, event);
1511
1512         if (!evsel)
1513                 return -EFAULT;
1514         return evsel__parse_sample_timestamp(evsel, event, timestamp);
1515 }
1516
1517 int evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size)
1518 {
1519         int printed, value;
1520         char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
1521
1522         switch (err) {
1523         case EACCES:
1524         case EPERM:
1525                 printed = scnprintf(buf, size,
1526                                     "Error:\t%s.\n"
1527                                     "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg);
1528
1529                 value = perf_event_paranoid();
1530
1531                 printed += scnprintf(buf + printed, size - printed, "\nHint:\t");
1532
1533                 if (value >= 2) {
1534                         printed += scnprintf(buf + printed, size - printed,
1535                                              "For your workloads it needs to be <= 1\nHint:\t");
1536                 }
1537                 printed += scnprintf(buf + printed, size - printed,
1538                                      "For system wide tracing it needs to be set to -1.\n");
1539
1540                 printed += scnprintf(buf + printed, size - printed,
1541                                     "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n"
1542                                     "Hint:\tThe current value is %d.", value);
1543                 break;
1544         case EINVAL: {
1545                 struct evsel *first = evlist__first(evlist);
1546                 int max_freq;
1547
1548                 if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0)
1549                         goto out_default;
1550
1551                 if (first->core.attr.sample_freq < (u64)max_freq)
1552                         goto out_default;
1553
1554                 printed = scnprintf(buf, size,
1555                                     "Error:\t%s.\n"
1556                                     "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n"
1557                                     "Hint:\tThe current value is %d and %" PRIu64 " is being requested.",
1558                                     emsg, max_freq, first->core.attr.sample_freq);
1559                 break;
1560         }
1561         default:
1562 out_default:
1563                 scnprintf(buf, size, "%s", emsg);
1564                 break;
1565         }
1566
1567         return 0;
1568 }
1569
1570 int evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size)
1571 {
1572         char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
1573         int pages_attempted = evlist->core.mmap_len / 1024, pages_max_per_user, printed = 0;
1574
1575         switch (err) {
1576         case EPERM:
1577                 sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user);
1578                 printed += scnprintf(buf + printed, size - printed,
1579                                      "Error:\t%s.\n"
1580                                      "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n"
1581                                      "Hint:\tTried using %zd kB.\n",
1582                                      emsg, pages_max_per_user, pages_attempted);
1583
1584                 if (pages_attempted >= pages_max_per_user) {
1585                         printed += scnprintf(buf + printed, size - printed,
1586                                              "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n",
1587                                              pages_max_per_user + pages_attempted);
1588                 }
1589
1590                 printed += scnprintf(buf + printed, size - printed,
1591                                      "Hint:\tTry using a smaller -m/--mmap-pages value.");
1592                 break;
1593         default:
1594                 scnprintf(buf, size, "%s", emsg);
1595                 break;
1596         }
1597
1598         return 0;
1599 }
1600
1601 void evlist__to_front(struct evlist *evlist, struct evsel *move_evsel)
1602 {
1603         struct evsel *evsel, *n;
1604         LIST_HEAD(move);
1605
1606         if (move_evsel == evlist__first(evlist))
1607                 return;
1608
1609         evlist__for_each_entry_safe(evlist, n, evsel) {
1610                 if (evsel__leader(evsel) == evsel__leader(move_evsel))
1611                         list_move_tail(&evsel->core.node, &move);
1612         }
1613
1614         list_splice(&move, &evlist->core.entries);
1615 }
1616
1617 struct evsel *evlist__get_tracking_event(struct evlist *evlist)
1618 {
1619         struct evsel *evsel;
1620
1621         evlist__for_each_entry(evlist, evsel) {
1622                 if (evsel->tracking)
1623                         return evsel;
1624         }
1625
1626         return evlist__first(evlist);
1627 }
1628
1629 void evlist__set_tracking_event(struct evlist *evlist, struct evsel *tracking_evsel)
1630 {
1631         struct evsel *evsel;
1632
1633         if (tracking_evsel->tracking)
1634                 return;
1635
1636         evlist__for_each_entry(evlist, evsel) {
1637                 if (evsel != tracking_evsel)
1638                         evsel->tracking = false;
1639         }
1640
1641         tracking_evsel->tracking = true;
1642 }
1643
1644 struct evsel *evlist__find_evsel_by_str(struct evlist *evlist, const char *str)
1645 {
1646         struct evsel *evsel;
1647
1648         evlist__for_each_entry(evlist, evsel) {
1649                 if (!evsel->name)
1650                         continue;
1651                 if (strcmp(str, evsel->name) == 0)
1652                         return evsel;
1653         }
1654
1655         return NULL;
1656 }
1657
1658 void evlist__toggle_bkw_mmap(struct evlist *evlist, enum bkw_mmap_state state)
1659 {
1660         enum bkw_mmap_state old_state = evlist->bkw_mmap_state;
1661         enum action {
1662                 NONE,
1663                 PAUSE,
1664                 RESUME,
1665         } action = NONE;
1666
1667         if (!evlist->overwrite_mmap)
1668                 return;
1669
1670         switch (old_state) {
1671         case BKW_MMAP_NOTREADY: {
1672                 if (state != BKW_MMAP_RUNNING)
1673                         goto state_err;
1674                 break;
1675         }
1676         case BKW_MMAP_RUNNING: {
1677                 if (state != BKW_MMAP_DATA_PENDING)
1678                         goto state_err;
1679                 action = PAUSE;
1680                 break;
1681         }
1682         case BKW_MMAP_DATA_PENDING: {
1683                 if (state != BKW_MMAP_EMPTY)
1684                         goto state_err;
1685                 break;
1686         }
1687         case BKW_MMAP_EMPTY: {
1688                 if (state != BKW_MMAP_RUNNING)
1689                         goto state_err;
1690                 action = RESUME;
1691                 break;
1692         }
1693         default:
1694                 WARN_ONCE(1, "Shouldn't get there\n");
1695         }
1696
1697         evlist->bkw_mmap_state = state;
1698
1699         switch (action) {
1700         case PAUSE:
1701                 evlist__pause(evlist);
1702                 break;
1703         case RESUME:
1704                 evlist__resume(evlist);
1705                 break;
1706         case NONE:
1707         default:
1708                 break;
1709         }
1710
1711 state_err:
1712         return;
1713 }
1714
1715 bool evlist__exclude_kernel(struct evlist *evlist)
1716 {
1717         struct evsel *evsel;
1718
1719         evlist__for_each_entry(evlist, evsel) {
1720                 if (!evsel->core.attr.exclude_kernel)
1721                         return false;
1722         }
1723
1724         return true;
1725 }
1726
1727 /*
1728  * Events in data file are not collect in groups, but we still want
1729  * the group display. Set the artificial group and set the leader's
1730  * forced_leader flag to notify the display code.
1731  */
1732 void evlist__force_leader(struct evlist *evlist)
1733 {
1734         if (!evlist->core.nr_groups) {
1735                 struct evsel *leader = evlist__first(evlist);
1736
1737                 evlist__set_leader(evlist);
1738                 leader->forced_leader = true;
1739         }
1740 }
1741
1742 struct evsel *evlist__reset_weak_group(struct evlist *evsel_list, struct evsel *evsel, bool close)
1743 {
1744         struct evsel *c2, *leader;
1745         bool is_open = true;
1746
1747         leader = evsel__leader(evsel);
1748
1749         pr_debug("Weak group for %s/%d failed\n",
1750                         leader->name, leader->core.nr_members);
1751
1752         /*
1753          * for_each_group_member doesn't work here because it doesn't
1754          * include the first entry.
1755          */
1756         evlist__for_each_entry(evsel_list, c2) {
1757                 if (c2 == evsel)
1758                         is_open = false;
1759                 if (evsel__has_leader(c2, leader)) {
1760                         if (is_open && close)
1761                                 perf_evsel__close(&c2->core);
1762                         evsel__set_leader(c2, c2);
1763                         c2->core.nr_members = 0;
1764                         /*
1765                          * Set this for all former members of the group
1766                          * to indicate they get reopened.
1767                          */
1768                         c2->reset_group = true;
1769                 }
1770         }
1771         return leader;
1772 }
1773
1774 static int evlist__parse_control_fifo(const char *str, int *ctl_fd, int *ctl_fd_ack, bool *ctl_fd_close)
1775 {
1776         char *s, *p;
1777         int ret = 0, fd;
1778
1779         if (strncmp(str, "fifo:", 5))
1780                 return -EINVAL;
1781
1782         str += 5;
1783         if (!*str || *str == ',')
1784                 return -EINVAL;
1785
1786         s = strdup(str);
1787         if (!s)
1788                 return -ENOMEM;
1789
1790         p = strchr(s, ',');
1791         if (p)
1792                 *p = '\0';
1793
1794         /*
1795          * O_RDWR avoids POLLHUPs which is necessary to allow the other
1796          * end of a FIFO to be repeatedly opened and closed.
1797          */
1798         fd = open(s, O_RDWR | O_NONBLOCK | O_CLOEXEC);
1799         if (fd < 0) {
1800                 pr_err("Failed to open '%s'\n", s);
1801                 ret = -errno;
1802                 goto out_free;
1803         }
1804         *ctl_fd = fd;
1805         *ctl_fd_close = true;
1806
1807         if (p && *++p) {
1808                 /* O_RDWR | O_NONBLOCK means the other end need not be open */
1809                 fd = open(p, O_RDWR | O_NONBLOCK | O_CLOEXEC);
1810                 if (fd < 0) {
1811                         pr_err("Failed to open '%s'\n", p);
1812                         ret = -errno;
1813                         goto out_free;
1814                 }
1815                 *ctl_fd_ack = fd;
1816         }
1817
1818 out_free:
1819         free(s);
1820         return ret;
1821 }
1822
1823 int evlist__parse_control(const char *str, int *ctl_fd, int *ctl_fd_ack, bool *ctl_fd_close)
1824 {
1825         char *comma = NULL, *endptr = NULL;
1826
1827         *ctl_fd_close = false;
1828
1829         if (strncmp(str, "fd:", 3))
1830                 return evlist__parse_control_fifo(str, ctl_fd, ctl_fd_ack, ctl_fd_close);
1831
1832         *ctl_fd = strtoul(&str[3], &endptr, 0);
1833         if (endptr == &str[3])
1834                 return -EINVAL;
1835
1836         comma = strchr(str, ',');
1837         if (comma) {
1838                 if (endptr != comma)
1839                         return -EINVAL;
1840
1841                 *ctl_fd_ack = strtoul(comma + 1, &endptr, 0);
1842                 if (endptr == comma + 1 || *endptr != '\0')
1843                         return -EINVAL;
1844         }
1845
1846         return 0;
1847 }
1848
1849 void evlist__close_control(int ctl_fd, int ctl_fd_ack, bool *ctl_fd_close)
1850 {
1851         if (*ctl_fd_close) {
1852                 *ctl_fd_close = false;
1853                 close(ctl_fd);
1854                 if (ctl_fd_ack >= 0)
1855                         close(ctl_fd_ack);
1856         }
1857 }
1858
1859 int evlist__initialize_ctlfd(struct evlist *evlist, int fd, int ack)
1860 {
1861         if (fd == -1) {
1862                 pr_debug("Control descriptor is not initialized\n");
1863                 return 0;
1864         }
1865
1866         evlist->ctl_fd.pos = perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN,
1867                                                      fdarray_flag__nonfilterable);
1868         if (evlist->ctl_fd.pos < 0) {
1869                 evlist->ctl_fd.pos = -1;
1870                 pr_err("Failed to add ctl fd entry: %m\n");
1871                 return -1;
1872         }
1873
1874         evlist->ctl_fd.fd = fd;
1875         evlist->ctl_fd.ack = ack;
1876
1877         return 0;
1878 }
1879
1880 bool evlist__ctlfd_initialized(struct evlist *evlist)
1881 {
1882         return evlist->ctl_fd.pos >= 0;
1883 }
1884
1885 int evlist__finalize_ctlfd(struct evlist *evlist)
1886 {
1887         struct pollfd *entries = evlist->core.pollfd.entries;
1888
1889         if (!evlist__ctlfd_initialized(evlist))
1890                 return 0;
1891
1892         entries[evlist->ctl_fd.pos].fd = -1;
1893         entries[evlist->ctl_fd.pos].events = 0;
1894         entries[evlist->ctl_fd.pos].revents = 0;
1895
1896         evlist->ctl_fd.pos = -1;
1897         evlist->ctl_fd.ack = -1;
1898         evlist->ctl_fd.fd = -1;
1899
1900         return 0;
1901 }
1902
1903 static int evlist__ctlfd_recv(struct evlist *evlist, enum evlist_ctl_cmd *cmd,
1904                               char *cmd_data, size_t data_size)
1905 {
1906         int err;
1907         char c;
1908         size_t bytes_read = 0;
1909
1910         *cmd = EVLIST_CTL_CMD_UNSUPPORTED;
1911         memset(cmd_data, 0, data_size);
1912         data_size--;
1913
1914         do {
1915                 err = read(evlist->ctl_fd.fd, &c, 1);
1916                 if (err > 0) {
1917                         if (c == '\n' || c == '\0')
1918                                 break;
1919                         cmd_data[bytes_read++] = c;
1920                         if (bytes_read == data_size)
1921                                 break;
1922                         continue;
1923                 } else if (err == -1) {
1924                         if (errno == EINTR)
1925                                 continue;
1926                         if (errno == EAGAIN || errno == EWOULDBLOCK)
1927                                 err = 0;
1928                         else
1929                                 pr_err("Failed to read from ctlfd %d: %m\n", evlist->ctl_fd.fd);
1930                 }
1931                 break;
1932         } while (1);
1933
1934         pr_debug("Message from ctl_fd: \"%s%s\"\n", cmd_data,
1935                  bytes_read == data_size ? "" : c == '\n' ? "\\n" : "\\0");
1936
1937         if (bytes_read > 0) {
1938                 if (!strncmp(cmd_data, EVLIST_CTL_CMD_ENABLE_TAG,
1939                              (sizeof(EVLIST_CTL_CMD_ENABLE_TAG)-1))) {
1940                         *cmd = EVLIST_CTL_CMD_ENABLE;
1941                 } else if (!strncmp(cmd_data, EVLIST_CTL_CMD_DISABLE_TAG,
1942                                     (sizeof(EVLIST_CTL_CMD_DISABLE_TAG)-1))) {
1943                         *cmd = EVLIST_CTL_CMD_DISABLE;
1944                 } else if (!strncmp(cmd_data, EVLIST_CTL_CMD_SNAPSHOT_TAG,
1945                                     (sizeof(EVLIST_CTL_CMD_SNAPSHOT_TAG)-1))) {
1946                         *cmd = EVLIST_CTL_CMD_SNAPSHOT;
1947                         pr_debug("is snapshot\n");
1948                 } else if (!strncmp(cmd_data, EVLIST_CTL_CMD_EVLIST_TAG,
1949                                     (sizeof(EVLIST_CTL_CMD_EVLIST_TAG)-1))) {
1950                         *cmd = EVLIST_CTL_CMD_EVLIST;
1951                 } else if (!strncmp(cmd_data, EVLIST_CTL_CMD_STOP_TAG,
1952                                     (sizeof(EVLIST_CTL_CMD_STOP_TAG)-1))) {
1953                         *cmd = EVLIST_CTL_CMD_STOP;
1954                 } else if (!strncmp(cmd_data, EVLIST_CTL_CMD_PING_TAG,
1955                                     (sizeof(EVLIST_CTL_CMD_PING_TAG)-1))) {
1956                         *cmd = EVLIST_CTL_CMD_PING;
1957                 }
1958         }
1959
1960         return bytes_read ? (int)bytes_read : err;
1961 }
1962
1963 int evlist__ctlfd_ack(struct evlist *evlist)
1964 {
1965         int err;
1966
1967         if (evlist->ctl_fd.ack == -1)
1968                 return 0;
1969
1970         err = write(evlist->ctl_fd.ack, EVLIST_CTL_CMD_ACK_TAG,
1971                     sizeof(EVLIST_CTL_CMD_ACK_TAG));
1972         if (err == -1)
1973                 pr_err("failed to write to ctl_ack_fd %d: %m\n", evlist->ctl_fd.ack);
1974
1975         return err;
1976 }
1977
1978 static int get_cmd_arg(char *cmd_data, size_t cmd_size, char **arg)
1979 {
1980         char *data = cmd_data + cmd_size;
1981
1982         /* no argument */
1983         if (!*data)
1984                 return 0;
1985
1986         /* there's argument */
1987         if (*data == ' ') {
1988                 *arg = data + 1;
1989                 return 1;
1990         }
1991
1992         /* malformed */
1993         return -1;
1994 }
1995
1996 static int evlist__ctlfd_enable(struct evlist *evlist, char *cmd_data, bool enable)
1997 {
1998         struct evsel *evsel;
1999         char *name;
2000         int err;
2001
2002         err = get_cmd_arg(cmd_data,
2003                           enable ? sizeof(EVLIST_CTL_CMD_ENABLE_TAG) - 1 :
2004                                    sizeof(EVLIST_CTL_CMD_DISABLE_TAG) - 1,
2005                           &name);
2006         if (err < 0) {
2007                 pr_info("failed: wrong command\n");
2008                 return -1;
2009         }
2010
2011         if (err) {
2012                 evsel = evlist__find_evsel_by_str(evlist, name);
2013                 if (evsel) {
2014                         if (enable)
2015                                 evlist__enable_evsel(evlist, name);
2016                         else
2017                                 evlist__disable_evsel(evlist, name);
2018                         pr_info("Event %s %s\n", evsel->name,
2019                                 enable ? "enabled" : "disabled");
2020                 } else {
2021                         pr_info("failed: can't find '%s' event\n", name);
2022                 }
2023         } else {
2024                 if (enable) {
2025                         evlist__enable(evlist);
2026                         pr_info(EVLIST_ENABLED_MSG);
2027                 } else {
2028                         evlist__disable(evlist);
2029                         pr_info(EVLIST_DISABLED_MSG);
2030                 }
2031         }
2032
2033         return 0;
2034 }
2035
2036 static int evlist__ctlfd_list(struct evlist *evlist, char *cmd_data)
2037 {
2038         struct perf_attr_details details = { .verbose = false, };
2039         struct evsel *evsel;
2040         char *arg;
2041         int err;
2042
2043         err = get_cmd_arg(cmd_data,
2044                           sizeof(EVLIST_CTL_CMD_EVLIST_TAG) - 1,
2045                           &arg);
2046         if (err < 0) {
2047                 pr_info("failed: wrong command\n");
2048                 return -1;
2049         }
2050
2051         if (err) {
2052                 if (!strcmp(arg, "-v")) {
2053                         details.verbose = true;
2054                 } else if (!strcmp(arg, "-g")) {
2055                         details.event_group = true;
2056                 } else if (!strcmp(arg, "-F")) {
2057                         details.freq = true;
2058                 } else {
2059                         pr_info("failed: wrong command\n");
2060                         return -1;
2061                 }
2062         }
2063
2064         evlist__for_each_entry(evlist, evsel)
2065                 evsel__fprintf(evsel, &details, stderr);
2066
2067         return 0;
2068 }
2069
2070 int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd)
2071 {
2072         int err = 0;
2073         char cmd_data[EVLIST_CTL_CMD_MAX_LEN];
2074         int ctlfd_pos = evlist->ctl_fd.pos;
2075         struct pollfd *entries = evlist->core.pollfd.entries;
2076
2077         if (!evlist__ctlfd_initialized(evlist) || !entries[ctlfd_pos].revents)
2078                 return 0;
2079
2080         if (entries[ctlfd_pos].revents & POLLIN) {
2081                 err = evlist__ctlfd_recv(evlist, cmd, cmd_data,
2082                                          EVLIST_CTL_CMD_MAX_LEN);
2083                 if (err > 0) {
2084                         switch (*cmd) {
2085                         case EVLIST_CTL_CMD_ENABLE:
2086                         case EVLIST_CTL_CMD_DISABLE:
2087                                 err = evlist__ctlfd_enable(evlist, cmd_data,
2088                                                            *cmd == EVLIST_CTL_CMD_ENABLE);
2089                                 break;
2090                         case EVLIST_CTL_CMD_EVLIST:
2091                                 err = evlist__ctlfd_list(evlist, cmd_data);
2092                                 break;
2093                         case EVLIST_CTL_CMD_SNAPSHOT:
2094                         case EVLIST_CTL_CMD_STOP:
2095                         case EVLIST_CTL_CMD_PING:
2096                                 break;
2097                         case EVLIST_CTL_CMD_ACK:
2098                         case EVLIST_CTL_CMD_UNSUPPORTED:
2099                         default:
2100                                 pr_debug("ctlfd: unsupported %d\n", *cmd);
2101                                 break;
2102                         }
2103                         if (!(*cmd == EVLIST_CTL_CMD_ACK || *cmd == EVLIST_CTL_CMD_UNSUPPORTED ||
2104                               *cmd == EVLIST_CTL_CMD_SNAPSHOT))
2105                                 evlist__ctlfd_ack(evlist);
2106                 }
2107         }
2108
2109         if (entries[ctlfd_pos].revents & (POLLHUP | POLLERR))
2110                 evlist__finalize_ctlfd(evlist);
2111         else
2112                 entries[ctlfd_pos].revents = 0;
2113
2114         return err;
2115 }
2116
2117 struct evsel *evlist__find_evsel(struct evlist *evlist, int idx)
2118 {
2119         struct evsel *evsel;
2120
2121         evlist__for_each_entry(evlist, evsel) {
2122                 if (evsel->core.idx == idx)
2123                         return evsel;
2124         }
2125         return NULL;
2126 }
2127
2128 int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf)
2129 {
2130         struct evsel *evsel;
2131         int printed = 0;
2132
2133         evlist__for_each_entry(evlist, evsel) {
2134                 if (evsel__is_dummy_event(evsel))
2135                         continue;
2136                 if (size > (strlen(evsel__name(evsel)) + (printed ? 2 : 1))) {
2137                         printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "," : "", evsel__name(evsel));
2138                 } else {
2139                         printed += scnprintf(bf + printed, size - printed, "%s...", printed ? "," : "");
2140                         break;
2141                 }
2142         }
2143
2144         return printed;
2145 }
2146
2147 void evlist__check_mem_load_aux(struct evlist *evlist)
2148 {
2149         struct evsel *leader, *evsel, *pos;
2150
2151         /*
2152          * For some platforms, the 'mem-loads' event is required to use
2153          * together with 'mem-loads-aux' within a group and 'mem-loads-aux'
2154          * must be the group leader. Now we disable this group before reporting
2155          * because 'mem-loads-aux' is just an auxiliary event. It doesn't carry
2156          * any valid memory load information.
2157          */
2158         evlist__for_each_entry(evlist, evsel) {
2159                 leader = evsel__leader(evsel);
2160                 if (leader == evsel)
2161                         continue;
2162
2163                 if (leader->name && strstr(leader->name, "mem-loads-aux")) {
2164                         for_each_group_evsel(pos, leader) {
2165                                 evsel__set_leader(pos, pos);
2166                                 pos->core.nr_members = 0;
2167                         }
2168                 }
2169         }
2170 }