1 // SPDX-License-Identifier: GPL-2.0
3 * Arm Statistical Profiling Extensions (SPE) support
4 * Copyright (c) 2017-2018, Arm Ltd.
11 #include <linux/bitops.h>
12 #include <linux/kernel.h>
13 #include <linux/log2.h>
14 #include <linux/types.h>
15 #include <linux/zalloc.h>
28 #include "thread-stack.h"
30 #include "util/synthetic-events.h"
33 #include "arm-spe-decoder/arm-spe-decoder.h"
34 #include "arm-spe-decoder/arm-spe-pkt-decoder.h"
36 #define MAX_TIMESTAMP (~0ULL)
39 struct auxtrace auxtrace;
40 struct auxtrace_queues queues;
41 struct auxtrace_heap heap;
42 struct itrace_synth_opts synth_opts;
44 struct perf_session *session;
45 struct machine *machine;
55 u8 sample_remote_access;
68 unsigned long num_events;
71 struct arm_spe_queue {
73 unsigned int queue_nr;
74 struct auxtrace_buffer *buffer;
75 struct auxtrace_buffer *old_buffer;
76 union perf_event *event_buf;
82 struct arm_spe_decoder *decoder;
85 struct thread *thread;
88 static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
89 unsigned char *buf, size_t len)
91 struct arm_spe_pkt packet;
94 char desc[ARM_SPE_PKT_DESC_MAX];
95 const char *color = PERF_COLOR_BLUE;
97 color_fprintf(stdout, color,
98 ". ... ARM SPE data: size %zu bytes\n",
102 ret = arm_spe_get_packet(buf, len, &packet);
108 color_fprintf(stdout, color, " %08x: ", pos);
109 for (i = 0; i < pkt_len; i++)
110 color_fprintf(stdout, color, " %02x", buf[i]);
112 color_fprintf(stdout, color, " ");
114 ret = arm_spe_pkt_desc(&packet, desc,
115 ARM_SPE_PKT_DESC_MAX);
117 color_fprintf(stdout, color, " %s\n", desc);
119 color_fprintf(stdout, color, " Bad packet!\n");
127 static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
131 arm_spe_dump(spe, buf, len);
134 static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data)
136 struct arm_spe_queue *speq = data;
137 struct auxtrace_buffer *buffer = speq->buffer;
138 struct auxtrace_buffer *old_buffer = speq->old_buffer;
139 struct auxtrace_queue *queue;
141 queue = &speq->spe->queues.queue_array[speq->queue_nr];
143 buffer = auxtrace_buffer__next(queue, buffer);
144 /* If no more data, drop the previous auxtrace_buffer and return */
147 auxtrace_buffer__drop_data(old_buffer);
152 speq->buffer = buffer;
154 /* If the aux_buffer doesn't have data associated, try to load it */
156 /* get the file desc associated with the perf data file */
157 int fd = perf_data__fd(speq->spe->session->data);
159 buffer->data = auxtrace_buffer__get_data(buffer, fd);
164 b->len = buffer->size;
165 b->buf = buffer->data;
169 auxtrace_buffer__drop_data(old_buffer);
170 speq->old_buffer = buffer;
172 auxtrace_buffer__drop_data(buffer);
173 return arm_spe_get_trace(b, data);
179 static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
180 unsigned int queue_nr)
182 struct arm_spe_params params = { .get_trace = 0, };
183 struct arm_spe_queue *speq;
185 speq = zalloc(sizeof(*speq));
189 speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
190 if (!speq->event_buf)
194 speq->queue_nr = queue_nr;
200 params.get_trace = arm_spe_get_trace;
203 /* create new decoder */
204 speq->decoder = arm_spe_decoder_new(¶ms);
211 zfree(&speq->event_buf);
217 static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip)
219 return ip >= spe->kernel_start ?
220 PERF_RECORD_MISC_KERNEL :
221 PERF_RECORD_MISC_USER;
224 static void arm_spe_prep_sample(struct arm_spe *spe,
225 struct arm_spe_queue *speq,
226 union perf_event *event,
227 struct perf_sample *sample)
229 struct arm_spe_record *record = &speq->decoder->record;
231 if (!spe->timeless_decoding)
232 sample->time = speq->timestamp;
234 sample->ip = record->from_ip;
235 sample->cpumode = arm_spe_cpumode(spe, sample->ip);
236 sample->pid = speq->pid;
237 sample->tid = speq->tid;
238 sample->addr = record->to_ip;
240 sample->cpu = speq->cpu;
242 event->sample.header.type = PERF_RECORD_SAMPLE;
243 event->sample.header.misc = sample->cpumode;
244 event->sample.header.size = sizeof(struct perf_event_header);
248 arm_spe_deliver_synth_event(struct arm_spe *spe,
249 struct arm_spe_queue *speq __maybe_unused,
250 union perf_event *event,
251 struct perf_sample *sample)
255 ret = perf_session__deliver_synth_event(spe->session, event, sample);
257 pr_err("ARM SPE: failed to deliver event, error %d\n", ret);
263 arm_spe_synth_spe_events_sample(struct arm_spe_queue *speq,
266 struct arm_spe *spe = speq->spe;
267 union perf_event *event = speq->event_buf;
268 struct perf_sample sample = { .ip = 0, };
270 arm_spe_prep_sample(spe, speq, event, &sample);
272 sample.id = spe_events_id;
273 sample.stream_id = spe_events_id;
275 return arm_spe_deliver_synth_event(spe, speq, event, &sample);
278 static int arm_spe_sample(struct arm_spe_queue *speq)
280 const struct arm_spe_record *record = &speq->decoder->record;
281 struct arm_spe *spe = speq->spe;
284 if (spe->sample_flc) {
285 if (record->type & ARM_SPE_L1D_MISS) {
286 err = arm_spe_synth_spe_events_sample(
287 speq, spe->l1d_miss_id);
292 if (record->type & ARM_SPE_L1D_ACCESS) {
293 err = arm_spe_synth_spe_events_sample(
294 speq, spe->l1d_access_id);
300 if (spe->sample_llc) {
301 if (record->type & ARM_SPE_LLC_MISS) {
302 err = arm_spe_synth_spe_events_sample(
303 speq, spe->llc_miss_id);
308 if (record->type & ARM_SPE_LLC_ACCESS) {
309 err = arm_spe_synth_spe_events_sample(
310 speq, spe->llc_access_id);
316 if (spe->sample_tlb) {
317 if (record->type & ARM_SPE_TLB_MISS) {
318 err = arm_spe_synth_spe_events_sample(
319 speq, spe->tlb_miss_id);
324 if (record->type & ARM_SPE_TLB_ACCESS) {
325 err = arm_spe_synth_spe_events_sample(
326 speq, spe->tlb_access_id);
332 if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) {
333 err = arm_spe_synth_spe_events_sample(speq,
334 spe->branch_miss_id);
339 if (spe->sample_remote_access &&
340 (record->type & ARM_SPE_REMOTE_ACCESS)) {
341 err = arm_spe_synth_spe_events_sample(speq,
342 spe->remote_access_id);
350 static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
352 struct arm_spe *spe = speq->spe;
355 if (!spe->kernel_start)
356 spe->kernel_start = machine__kernel_start(spe->machine);
359 ret = arm_spe_decode(speq->decoder);
361 pr_debug("No data or all data has been processed.\n");
366 * Error is detected when decode SPE trace data, continue to
367 * the next trace data and find out more records.
372 ret = arm_spe_sample(speq);
376 if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
377 *timestamp = speq->timestamp;
385 static int arm_spe__setup_queue(struct arm_spe *spe,
386 struct auxtrace_queue *queue,
387 unsigned int queue_nr)
389 struct arm_spe_queue *speq = queue->priv;
390 struct arm_spe_record *record;
392 if (list_empty(&queue->head) || speq)
395 speq = arm_spe__alloc_queue(spe, queue_nr);
402 if (queue->cpu != -1)
403 speq->cpu = queue->cpu;
405 if (!speq->on_heap) {
408 if (spe->timeless_decoding)
412 ret = arm_spe_decode(speq->decoder);
420 record = &speq->decoder->record;
422 speq->timestamp = record->timestamp;
423 ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp);
426 speq->on_heap = true;
432 static int arm_spe__setup_queues(struct arm_spe *spe)
437 for (i = 0; i < spe->queues.nr_queues; i++) {
438 ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i);
446 static int arm_spe__update_queues(struct arm_spe *spe)
448 if (spe->queues.new_data) {
449 spe->queues.new_data = false;
450 return arm_spe__setup_queues(spe);
456 static bool arm_spe__is_timeless_decoding(struct arm_spe *spe)
459 struct evlist *evlist = spe->session->evlist;
460 bool timeless_decoding = true;
463 * Circle through the list of event and complain if we find one
464 * with the time bit set.
466 evlist__for_each_entry(evlist, evsel) {
467 if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
468 timeless_decoding = false;
471 return timeless_decoding;
474 static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
475 struct auxtrace_queue *queue)
477 struct arm_spe_queue *speq = queue->priv;
480 tid = machine__get_current_tid(spe->machine, speq->cpu);
483 thread__zput(speq->thread);
485 speq->tid = queue->tid;
487 if ((!speq->thread) && (speq->tid != -1)) {
488 speq->thread = machine__find_thread(spe->machine, -1,
493 speq->pid = speq->thread->pid_;
494 if (queue->cpu == -1)
495 speq->cpu = speq->thread->cpu;
499 static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
501 unsigned int queue_nr;
506 struct auxtrace_queue *queue;
507 struct arm_spe_queue *speq;
509 if (!spe->heap.heap_cnt)
512 if (spe->heap.heap_array[0].ordinal >= timestamp)
515 queue_nr = spe->heap.heap_array[0].queue_nr;
516 queue = &spe->queues.queue_array[queue_nr];
519 auxtrace_heap__pop(&spe->heap);
521 if (spe->heap.heap_cnt) {
522 ts = spe->heap.heap_array[0].ordinal + 1;
529 arm_spe_set_pid_tid_cpu(spe, queue);
531 ret = arm_spe_run_decoder(speq, &ts);
533 auxtrace_heap__add(&spe->heap, queue_nr, ts);
538 ret = auxtrace_heap__add(&spe->heap, queue_nr, ts);
542 speq->on_heap = false;
549 static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid,
552 struct auxtrace_queues *queues = &spe->queues;
556 for (i = 0; i < queues->nr_queues; i++) {
557 struct auxtrace_queue *queue = &spe->queues.queue_array[i];
558 struct arm_spe_queue *speq = queue->priv;
560 if (speq && (tid == -1 || speq->tid == tid)) {
562 arm_spe_set_pid_tid_cpu(spe, queue);
563 arm_spe_run_decoder(speq, &ts);
569 static int arm_spe_process_event(struct perf_session *session,
570 union perf_event *event,
571 struct perf_sample *sample,
572 struct perf_tool *tool)
576 struct arm_spe *spe = container_of(session->auxtrace,
577 struct arm_spe, auxtrace);
582 if (!tool->ordered_events) {
583 pr_err("SPE trace requires ordered events\n");
587 if (sample->time && (sample->time != (u64) -1))
588 timestamp = sample->time;
592 if (timestamp || spe->timeless_decoding) {
593 err = arm_spe__update_queues(spe);
598 if (spe->timeless_decoding) {
599 if (event->header.type == PERF_RECORD_EXIT) {
600 err = arm_spe_process_timeless_queues(spe,
604 } else if (timestamp) {
605 if (event->header.type == PERF_RECORD_EXIT) {
606 err = arm_spe_process_queues(spe, timestamp);
615 static int arm_spe_process_auxtrace_event(struct perf_session *session,
616 union perf_event *event,
617 struct perf_tool *tool __maybe_unused)
619 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
622 if (!spe->data_queued) {
623 struct auxtrace_buffer *buffer;
625 int fd = perf_data__fd(session->data);
628 if (perf_data__is_pipe(session->data)) {
631 data_offset = lseek(fd, 0, SEEK_CUR);
632 if (data_offset == -1)
636 err = auxtrace_queues__add_event(&spe->queues, session, event,
637 data_offset, &buffer);
641 /* Dump here now we have copied a piped trace out of the pipe */
643 if (auxtrace_buffer__get_data(buffer, fd)) {
644 arm_spe_dump_event(spe, buffer->data,
646 auxtrace_buffer__put_data(buffer);
654 static int arm_spe_flush(struct perf_session *session __maybe_unused,
655 struct perf_tool *tool __maybe_unused)
657 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
664 if (!tool->ordered_events)
667 ret = arm_spe__update_queues(spe);
671 if (spe->timeless_decoding)
672 return arm_spe_process_timeless_queues(spe, -1,
675 return arm_spe_process_queues(spe, MAX_TIMESTAMP);
678 static void arm_spe_free_queue(void *priv)
680 struct arm_spe_queue *speq = priv;
684 thread__zput(speq->thread);
685 arm_spe_decoder_free(speq->decoder);
686 zfree(&speq->event_buf);
690 static void arm_spe_free_events(struct perf_session *session)
692 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
694 struct auxtrace_queues *queues = &spe->queues;
697 for (i = 0; i < queues->nr_queues; i++) {
698 arm_spe_free_queue(queues->queue_array[i].priv);
699 queues->queue_array[i].priv = NULL;
701 auxtrace_queues__free(queues);
704 static void arm_spe_free(struct perf_session *session)
706 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
709 auxtrace_heap__free(&spe->heap);
710 arm_spe_free_events(session);
711 session->auxtrace = NULL;
715 static bool arm_spe_evsel_is_auxtrace(struct perf_session *session,
718 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace);
720 return evsel->core.attr.type == spe->pmu_type;
723 static const char * const arm_spe_info_fmts[] = {
724 [ARM_SPE_PMU_TYPE] = " PMU Type %"PRId64"\n",
727 static void arm_spe_print_info(__u64 *arr)
732 fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]);
735 struct arm_spe_synth {
736 struct perf_tool dummy_tool;
737 struct perf_session *session;
740 static int arm_spe_event_synth(struct perf_tool *tool,
741 union perf_event *event,
742 struct perf_sample *sample __maybe_unused,
743 struct machine *machine __maybe_unused)
745 struct arm_spe_synth *arm_spe_synth =
746 container_of(tool, struct arm_spe_synth, dummy_tool);
748 return perf_session__deliver_synth_event(arm_spe_synth->session,
752 static int arm_spe_synth_event(struct perf_session *session,
753 struct perf_event_attr *attr, u64 id)
755 struct arm_spe_synth arm_spe_synth;
757 memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth));
758 arm_spe_synth.session = session;
760 return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1,
761 &id, arm_spe_event_synth);
764 static void arm_spe_set_event_name(struct evlist *evlist, u64 id,
769 evlist__for_each_entry(evlist, evsel) {
770 if (evsel->core.id && evsel->core.id[0] == id) {
773 evsel->name = strdup(name);
780 arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
782 struct evlist *evlist = session->evlist;
784 struct perf_event_attr attr;
789 evlist__for_each_entry(evlist, evsel) {
790 if (evsel->core.attr.type == spe->pmu_type) {
797 pr_debug("No selected events with SPE trace data\n");
801 memset(&attr, 0, sizeof(struct perf_event_attr));
802 attr.size = sizeof(struct perf_event_attr);
803 attr.type = PERF_TYPE_HARDWARE;
804 attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
805 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
807 if (spe->timeless_decoding)
808 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
810 attr.sample_type |= PERF_SAMPLE_TIME;
812 attr.exclude_user = evsel->core.attr.exclude_user;
813 attr.exclude_kernel = evsel->core.attr.exclude_kernel;
814 attr.exclude_hv = evsel->core.attr.exclude_hv;
815 attr.exclude_host = evsel->core.attr.exclude_host;
816 attr.exclude_guest = evsel->core.attr.exclude_guest;
817 attr.sample_id_all = evsel->core.attr.sample_id_all;
818 attr.read_format = evsel->core.attr.read_format;
820 /* create new id val to be a fixed offset from evsel id */
821 id = evsel->core.id[0] + 1000000000;
826 if (spe->synth_opts.flc) {
827 spe->sample_flc = true;
829 /* Level 1 data cache miss */
830 err = arm_spe_synth_event(session, &attr, id);
833 spe->l1d_miss_id = id;
834 arm_spe_set_event_name(evlist, id, "l1d-miss");
837 /* Level 1 data cache access */
838 err = arm_spe_synth_event(session, &attr, id);
841 spe->l1d_access_id = id;
842 arm_spe_set_event_name(evlist, id, "l1d-access");
846 if (spe->synth_opts.llc) {
847 spe->sample_llc = true;
849 /* Last level cache miss */
850 err = arm_spe_synth_event(session, &attr, id);
853 spe->llc_miss_id = id;
854 arm_spe_set_event_name(evlist, id, "llc-miss");
857 /* Last level cache access */
858 err = arm_spe_synth_event(session, &attr, id);
861 spe->llc_access_id = id;
862 arm_spe_set_event_name(evlist, id, "llc-access");
866 if (spe->synth_opts.tlb) {
867 spe->sample_tlb = true;
870 err = arm_spe_synth_event(session, &attr, id);
873 spe->tlb_miss_id = id;
874 arm_spe_set_event_name(evlist, id, "tlb-miss");
878 err = arm_spe_synth_event(session, &attr, id);
881 spe->tlb_access_id = id;
882 arm_spe_set_event_name(evlist, id, "tlb-access");
886 if (spe->synth_opts.branches) {
887 spe->sample_branch = true;
890 err = arm_spe_synth_event(session, &attr, id);
893 spe->branch_miss_id = id;
894 arm_spe_set_event_name(evlist, id, "branch-miss");
898 if (spe->synth_opts.remote_access) {
899 spe->sample_remote_access = true;
902 err = arm_spe_synth_event(session, &attr, id);
905 spe->remote_access_id = id;
906 arm_spe_set_event_name(evlist, id, "remote-access");
913 int arm_spe_process_auxtrace_info(union perf_event *event,
914 struct perf_session *session)
916 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
917 size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX;
921 if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
925 spe = zalloc(sizeof(struct arm_spe));
929 err = auxtrace_queues__init(&spe->queues);
933 spe->session = session;
934 spe->machine = &session->machines.host; /* No kvm support */
935 spe->auxtrace_type = auxtrace_info->type;
936 spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
938 spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
939 spe->auxtrace.process_event = arm_spe_process_event;
940 spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
941 spe->auxtrace.flush_events = arm_spe_flush;
942 spe->auxtrace.free_events = arm_spe_free_events;
943 spe->auxtrace.free = arm_spe_free;
944 spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace;
945 session->auxtrace = &spe->auxtrace;
947 arm_spe_print_info(&auxtrace_info->priv[0]);
952 if (session->itrace_synth_opts && session->itrace_synth_opts->set)
953 spe->synth_opts = *session->itrace_synth_opts;
955 itrace_synth_opts__set_default(&spe->synth_opts, false);
957 err = arm_spe_synth_events(spe, session);
959 goto err_free_queues;
961 err = auxtrace_queues__process_index(&spe->queues, session);
963 goto err_free_queues;
965 if (spe->queues.populated)
966 spe->data_queued = true;
971 auxtrace_queues__free(&spe->queues);
972 session->auxtrace = NULL;