Merge tag 'drm-misc-next-2021-07-22' of git://anongit.freedesktop.org/drm/drm-misc...
[linux-2.6-microblaze.git] / tools / perf / util / arm-spe.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Arm Statistical Profiling Extensions (SPE) support
4  * Copyright (c) 2017-2018, Arm Ltd.
5  */
6
7 #include <byteswap.h>
8 #include <endian.h>
9 #include <errno.h>
10 #include <inttypes.h>
11 #include <linux/bitops.h>
12 #include <linux/kernel.h>
13 #include <linux/log2.h>
14 #include <linux/types.h>
15 #include <linux/zalloc.h>
16 #include <stdlib.h>
17 #include <unistd.h>
18
19 #include "auxtrace.h"
20 #include "color.h"
21 #include "debug.h"
22 #include "evlist.h"
23 #include "evsel.h"
24 #include "machine.h"
25 #include "session.h"
26 #include "symbol.h"
27 #include "thread.h"
28 #include "thread-stack.h"
29 #include "tsc.h"
30 #include "tool.h"
31 #include "util/synthetic-events.h"
32
33 #include "arm-spe.h"
34 #include "arm-spe-decoder/arm-spe-decoder.h"
35 #include "arm-spe-decoder/arm-spe-pkt-decoder.h"
36
37 #define MAX_TIMESTAMP (~0ULL)
38
39 struct arm_spe {
40         struct auxtrace                 auxtrace;
41         struct auxtrace_queues          queues;
42         struct auxtrace_heap            heap;
43         struct itrace_synth_opts        synth_opts;
44         u32                             auxtrace_type;
45         struct perf_session             *session;
46         struct machine                  *machine;
47         u32                             pmu_type;
48
49         struct perf_tsc_conversion      tc;
50
51         u8                              timeless_decoding;
52         u8                              data_queued;
53
54         u8                              sample_flc;
55         u8                              sample_llc;
56         u8                              sample_tlb;
57         u8                              sample_branch;
58         u8                              sample_remote_access;
59         u8                              sample_memory;
60
61         u64                             l1d_miss_id;
62         u64                             l1d_access_id;
63         u64                             llc_miss_id;
64         u64                             llc_access_id;
65         u64                             tlb_miss_id;
66         u64                             tlb_access_id;
67         u64                             branch_miss_id;
68         u64                             remote_access_id;
69         u64                             memory_id;
70
71         u64                             kernel_start;
72
73         unsigned long                   num_events;
74 };
75
76 struct arm_spe_queue {
77         struct arm_spe                  *spe;
78         unsigned int                    queue_nr;
79         struct auxtrace_buffer          *buffer;
80         struct auxtrace_buffer          *old_buffer;
81         union perf_event                *event_buf;
82         bool                            on_heap;
83         bool                            done;
84         pid_t                           pid;
85         pid_t                           tid;
86         int                             cpu;
87         struct arm_spe_decoder          *decoder;
88         u64                             time;
89         u64                             timestamp;
90         struct thread                   *thread;
91 };
92
93 static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
94                          unsigned char *buf, size_t len)
95 {
96         struct arm_spe_pkt packet;
97         size_t pos = 0;
98         int ret, pkt_len, i;
99         char desc[ARM_SPE_PKT_DESC_MAX];
100         const char *color = PERF_COLOR_BLUE;
101
102         color_fprintf(stdout, color,
103                       ". ... ARM SPE data: size %zu bytes\n",
104                       len);
105
106         while (len) {
107                 ret = arm_spe_get_packet(buf, len, &packet);
108                 if (ret > 0)
109                         pkt_len = ret;
110                 else
111                         pkt_len = 1;
112                 printf(".");
113                 color_fprintf(stdout, color, "  %08x: ", pos);
114                 for (i = 0; i < pkt_len; i++)
115                         color_fprintf(stdout, color, " %02x", buf[i]);
116                 for (; i < 16; i++)
117                         color_fprintf(stdout, color, "   ");
118                 if (ret > 0) {
119                         ret = arm_spe_pkt_desc(&packet, desc,
120                                                ARM_SPE_PKT_DESC_MAX);
121                         if (!ret)
122                                 color_fprintf(stdout, color, " %s\n", desc);
123                 } else {
124                         color_fprintf(stdout, color, " Bad packet!\n");
125                 }
126                 pos += pkt_len;
127                 buf += pkt_len;
128                 len -= pkt_len;
129         }
130 }
131
132 static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
133                                size_t len)
134 {
135         printf(".\n");
136         arm_spe_dump(spe, buf, len);
137 }
138
139 static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data)
140 {
141         struct arm_spe_queue *speq = data;
142         struct auxtrace_buffer *buffer = speq->buffer;
143         struct auxtrace_buffer *old_buffer = speq->old_buffer;
144         struct auxtrace_queue *queue;
145
146         queue = &speq->spe->queues.queue_array[speq->queue_nr];
147
148         buffer = auxtrace_buffer__next(queue, buffer);
149         /* If no more data, drop the previous auxtrace_buffer and return */
150         if (!buffer) {
151                 if (old_buffer)
152                         auxtrace_buffer__drop_data(old_buffer);
153                 b->len = 0;
154                 return 0;
155         }
156
157         speq->buffer = buffer;
158
159         /* If the aux_buffer doesn't have data associated, try to load it */
160         if (!buffer->data) {
161                 /* get the file desc associated with the perf data file */
162                 int fd = perf_data__fd(speq->spe->session->data);
163
164                 buffer->data = auxtrace_buffer__get_data(buffer, fd);
165                 if (!buffer->data)
166                         return -ENOMEM;
167         }
168
169         b->len = buffer->size;
170         b->buf = buffer->data;
171
172         if (b->len) {
173                 if (old_buffer)
174                         auxtrace_buffer__drop_data(old_buffer);
175                 speq->old_buffer = buffer;
176         } else {
177                 auxtrace_buffer__drop_data(buffer);
178                 return arm_spe_get_trace(b, data);
179         }
180
181         return 0;
182 }
183
184 static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
185                 unsigned int queue_nr)
186 {
187         struct arm_spe_params params = { .get_trace = 0, };
188         struct arm_spe_queue *speq;
189
190         speq = zalloc(sizeof(*speq));
191         if (!speq)
192                 return NULL;
193
194         speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
195         if (!speq->event_buf)
196                 goto out_free;
197
198         speq->spe = spe;
199         speq->queue_nr = queue_nr;
200         speq->pid = -1;
201         speq->tid = -1;
202         speq->cpu = -1;
203
204         /* params set */
205         params.get_trace = arm_spe_get_trace;
206         params.data = speq;
207
208         /* create new decoder */
209         speq->decoder = arm_spe_decoder_new(&params);
210         if (!speq->decoder)
211                 goto out_free;
212
213         return speq;
214
215 out_free:
216         zfree(&speq->event_buf);
217         free(speq);
218
219         return NULL;
220 }
221
222 static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip)
223 {
224         return ip >= spe->kernel_start ?
225                 PERF_RECORD_MISC_KERNEL :
226                 PERF_RECORD_MISC_USER;
227 }
228
229 static void arm_spe_prep_sample(struct arm_spe *spe,
230                                 struct arm_spe_queue *speq,
231                                 union perf_event *event,
232                                 struct perf_sample *sample)
233 {
234         struct arm_spe_record *record = &speq->decoder->record;
235
236         if (!spe->timeless_decoding)
237                 sample->time = tsc_to_perf_time(record->timestamp, &spe->tc);
238
239         sample->ip = record->from_ip;
240         sample->cpumode = arm_spe_cpumode(spe, sample->ip);
241         sample->pid = speq->pid;
242         sample->tid = speq->tid;
243         sample->period = 1;
244         sample->cpu = speq->cpu;
245
246         event->sample.header.type = PERF_RECORD_SAMPLE;
247         event->sample.header.misc = sample->cpumode;
248         event->sample.header.size = sizeof(struct perf_event_header);
249 }
250
251 static inline int
252 arm_spe_deliver_synth_event(struct arm_spe *spe,
253                             struct arm_spe_queue *speq __maybe_unused,
254                             union perf_event *event,
255                             struct perf_sample *sample)
256 {
257         int ret;
258
259         ret = perf_session__deliver_synth_event(spe->session, event, sample);
260         if (ret)
261                 pr_err("ARM SPE: failed to deliver event, error %d\n", ret);
262
263         return ret;
264 }
265
266 static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
267                                      u64 spe_events_id, u64 data_src)
268 {
269         struct arm_spe *spe = speq->spe;
270         struct arm_spe_record *record = &speq->decoder->record;
271         union perf_event *event = speq->event_buf;
272         struct perf_sample sample = { .ip = 0, };
273
274         arm_spe_prep_sample(spe, speq, event, &sample);
275
276         sample.id = spe_events_id;
277         sample.stream_id = spe_events_id;
278         sample.addr = record->virt_addr;
279         sample.phys_addr = record->phys_addr;
280         sample.data_src = data_src;
281
282         return arm_spe_deliver_synth_event(spe, speq, event, &sample);
283 }
284
285 static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
286                                         u64 spe_events_id)
287 {
288         struct arm_spe *spe = speq->spe;
289         struct arm_spe_record *record = &speq->decoder->record;
290         union perf_event *event = speq->event_buf;
291         struct perf_sample sample = { .ip = 0, };
292
293         arm_spe_prep_sample(spe, speq, event, &sample);
294
295         sample.id = spe_events_id;
296         sample.stream_id = spe_events_id;
297         sample.addr = record->to_ip;
298
299         return arm_spe_deliver_synth_event(spe, speq, event, &sample);
300 }
301
302 #define SPE_MEM_TYPE    (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \
303                          ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \
304                          ARM_SPE_REMOTE_ACCESS)
305
306 static bool arm_spe__is_memory_event(enum arm_spe_sample_type type)
307 {
308         if (type & SPE_MEM_TYPE)
309                 return true;
310
311         return false;
312 }
313
314 static u64 arm_spe__synth_data_source(const struct arm_spe_record *record)
315 {
316         union perf_mem_data_src data_src = { 0 };
317
318         if (record->op == ARM_SPE_LD)
319                 data_src.mem_op = PERF_MEM_OP_LOAD;
320         else
321                 data_src.mem_op = PERF_MEM_OP_STORE;
322
323         if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
324                 data_src.mem_lvl = PERF_MEM_LVL_L3;
325
326                 if (record->type & ARM_SPE_LLC_MISS)
327                         data_src.mem_lvl |= PERF_MEM_LVL_MISS;
328                 else
329                         data_src.mem_lvl |= PERF_MEM_LVL_HIT;
330         } else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) {
331                 data_src.mem_lvl = PERF_MEM_LVL_L1;
332
333                 if (record->type & ARM_SPE_L1D_MISS)
334                         data_src.mem_lvl |= PERF_MEM_LVL_MISS;
335                 else
336                         data_src.mem_lvl |= PERF_MEM_LVL_HIT;
337         }
338
339         if (record->type & ARM_SPE_REMOTE_ACCESS)
340                 data_src.mem_lvl |= PERF_MEM_LVL_REM_CCE1;
341
342         if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
343                 data_src.mem_dtlb = PERF_MEM_TLB_WK;
344
345                 if (record->type & ARM_SPE_TLB_MISS)
346                         data_src.mem_dtlb |= PERF_MEM_TLB_MISS;
347                 else
348                         data_src.mem_dtlb |= PERF_MEM_TLB_HIT;
349         }
350
351         return data_src.val;
352 }
353
354 static int arm_spe_sample(struct arm_spe_queue *speq)
355 {
356         const struct arm_spe_record *record = &speq->decoder->record;
357         struct arm_spe *spe = speq->spe;
358         u64 data_src;
359         int err;
360
361         data_src = arm_spe__synth_data_source(record);
362
363         if (spe->sample_flc) {
364                 if (record->type & ARM_SPE_L1D_MISS) {
365                         err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id,
366                                                         data_src);
367                         if (err)
368                                 return err;
369                 }
370
371                 if (record->type & ARM_SPE_L1D_ACCESS) {
372                         err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id,
373                                                         data_src);
374                         if (err)
375                                 return err;
376                 }
377         }
378
379         if (spe->sample_llc) {
380                 if (record->type & ARM_SPE_LLC_MISS) {
381                         err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id,
382                                                         data_src);
383                         if (err)
384                                 return err;
385                 }
386
387                 if (record->type & ARM_SPE_LLC_ACCESS) {
388                         err = arm_spe__synth_mem_sample(speq, spe->llc_access_id,
389                                                         data_src);
390                         if (err)
391                                 return err;
392                 }
393         }
394
395         if (spe->sample_tlb) {
396                 if (record->type & ARM_SPE_TLB_MISS) {
397                         err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id,
398                                                         data_src);
399                         if (err)
400                                 return err;
401                 }
402
403                 if (record->type & ARM_SPE_TLB_ACCESS) {
404                         err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id,
405                                                         data_src);
406                         if (err)
407                                 return err;
408                 }
409         }
410
411         if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) {
412                 err = arm_spe__synth_branch_sample(speq, spe->branch_miss_id);
413                 if (err)
414                         return err;
415         }
416
417         if (spe->sample_remote_access &&
418             (record->type & ARM_SPE_REMOTE_ACCESS)) {
419                 err = arm_spe__synth_mem_sample(speq, spe->remote_access_id,
420                                                 data_src);
421                 if (err)
422                         return err;
423         }
424
425         if (spe->sample_memory && arm_spe__is_memory_event(record->type)) {
426                 err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
427                 if (err)
428                         return err;
429         }
430
431         return 0;
432 }
433
434 static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
435 {
436         struct arm_spe *spe = speq->spe;
437         struct arm_spe_record *record;
438         int ret;
439
440         if (!spe->kernel_start)
441                 spe->kernel_start = machine__kernel_start(spe->machine);
442
443         while (1) {
444                 /*
445                  * The usual logic is firstly to decode the packets, and then
446                  * based the record to synthesize sample; but here the flow is
447                  * reversed: it calls arm_spe_sample() for synthesizing samples
448                  * prior to arm_spe_decode().
449                  *
450                  * Two reasons for this code logic:
451                  * 1. Firstly, when setup queue in arm_spe__setup_queue(), it
452                  * has decoded trace data and generated a record, but the record
453                  * is left to generate sample until run to here, so it's correct
454                  * to synthesize sample for the left record.
455                  * 2. After decoding trace data, it needs to compare the record
456                  * timestamp with the coming perf event, if the record timestamp
457                  * is later than the perf event, it needs bail out and pushs the
458                  * record into auxtrace heap, thus the record can be deferred to
459                  * synthesize sample until run to here at the next time; so this
460                  * can correlate samples between Arm SPE trace data and other
461                  * perf events with correct time ordering.
462                  */
463                 ret = arm_spe_sample(speq);
464                 if (ret)
465                         return ret;
466
467                 ret = arm_spe_decode(speq->decoder);
468                 if (!ret) {
469                         pr_debug("No data or all data has been processed.\n");
470                         return 1;
471                 }
472
473                 /*
474                  * Error is detected when decode SPE trace data, continue to
475                  * the next trace data and find out more records.
476                  */
477                 if (ret < 0)
478                         continue;
479
480                 record = &speq->decoder->record;
481
482                 /* Update timestamp for the last record */
483                 if (record->timestamp > speq->timestamp)
484                         speq->timestamp = record->timestamp;
485
486                 /*
487                  * If the timestamp of the queue is later than timestamp of the
488                  * coming perf event, bail out so can allow the perf event to
489                  * be processed ahead.
490                  */
491                 if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
492                         *timestamp = speq->timestamp;
493                         return 0;
494                 }
495         }
496
497         return 0;
498 }
499
500 static int arm_spe__setup_queue(struct arm_spe *spe,
501                                struct auxtrace_queue *queue,
502                                unsigned int queue_nr)
503 {
504         struct arm_spe_queue *speq = queue->priv;
505         struct arm_spe_record *record;
506
507         if (list_empty(&queue->head) || speq)
508                 return 0;
509
510         speq = arm_spe__alloc_queue(spe, queue_nr);
511
512         if (!speq)
513                 return -ENOMEM;
514
515         queue->priv = speq;
516
517         if (queue->cpu != -1)
518                 speq->cpu = queue->cpu;
519
520         if (!speq->on_heap) {
521                 int ret;
522
523                 if (spe->timeless_decoding)
524                         return 0;
525
526 retry:
527                 ret = arm_spe_decode(speq->decoder);
528
529                 if (!ret)
530                         return 0;
531
532                 if (ret < 0)
533                         goto retry;
534
535                 record = &speq->decoder->record;
536
537                 speq->timestamp = record->timestamp;
538                 ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp);
539                 if (ret)
540                         return ret;
541                 speq->on_heap = true;
542         }
543
544         return 0;
545 }
546
547 static int arm_spe__setup_queues(struct arm_spe *spe)
548 {
549         unsigned int i;
550         int ret;
551
552         for (i = 0; i < spe->queues.nr_queues; i++) {
553                 ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i);
554                 if (ret)
555                         return ret;
556         }
557
558         return 0;
559 }
560
561 static int arm_spe__update_queues(struct arm_spe *spe)
562 {
563         if (spe->queues.new_data) {
564                 spe->queues.new_data = false;
565                 return arm_spe__setup_queues(spe);
566         }
567
568         return 0;
569 }
570
571 static bool arm_spe__is_timeless_decoding(struct arm_spe *spe)
572 {
573         struct evsel *evsel;
574         struct evlist *evlist = spe->session->evlist;
575         bool timeless_decoding = true;
576
577         /*
578          * Circle through the list of event and complain if we find one
579          * with the time bit set.
580          */
581         evlist__for_each_entry(evlist, evsel) {
582                 if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
583                         timeless_decoding = false;
584         }
585
586         return timeless_decoding;
587 }
588
589 static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
590                                     struct auxtrace_queue *queue)
591 {
592         struct arm_spe_queue *speq = queue->priv;
593         pid_t tid;
594
595         tid = machine__get_current_tid(spe->machine, speq->cpu);
596         if (tid != -1) {
597                 speq->tid = tid;
598                 thread__zput(speq->thread);
599         } else
600                 speq->tid = queue->tid;
601
602         if ((!speq->thread) && (speq->tid != -1)) {
603                 speq->thread = machine__find_thread(spe->machine, -1,
604                                                     speq->tid);
605         }
606
607         if (speq->thread) {
608                 speq->pid = speq->thread->pid_;
609                 if (queue->cpu == -1)
610                         speq->cpu = speq->thread->cpu;
611         }
612 }
613
614 static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
615 {
616         unsigned int queue_nr;
617         u64 ts;
618         int ret;
619
620         while (1) {
621                 struct auxtrace_queue *queue;
622                 struct arm_spe_queue *speq;
623
624                 if (!spe->heap.heap_cnt)
625                         return 0;
626
627                 if (spe->heap.heap_array[0].ordinal >= timestamp)
628                         return 0;
629
630                 queue_nr = spe->heap.heap_array[0].queue_nr;
631                 queue = &spe->queues.queue_array[queue_nr];
632                 speq = queue->priv;
633
634                 auxtrace_heap__pop(&spe->heap);
635
636                 if (spe->heap.heap_cnt) {
637                         ts = spe->heap.heap_array[0].ordinal + 1;
638                         if (ts > timestamp)
639                                 ts = timestamp;
640                 } else {
641                         ts = timestamp;
642                 }
643
644                 arm_spe_set_pid_tid_cpu(spe, queue);
645
646                 ret = arm_spe_run_decoder(speq, &ts);
647                 if (ret < 0) {
648                         auxtrace_heap__add(&spe->heap, queue_nr, ts);
649                         return ret;
650                 }
651
652                 if (!ret) {
653                         ret = auxtrace_heap__add(&spe->heap, queue_nr, ts);
654                         if (ret < 0)
655                                 return ret;
656                 } else {
657                         speq->on_heap = false;
658                 }
659         }
660
661         return 0;
662 }
663
664 static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid,
665                                             u64 time_)
666 {
667         struct auxtrace_queues *queues = &spe->queues;
668         unsigned int i;
669         u64 ts = 0;
670
671         for (i = 0; i < queues->nr_queues; i++) {
672                 struct auxtrace_queue *queue = &spe->queues.queue_array[i];
673                 struct arm_spe_queue *speq = queue->priv;
674
675                 if (speq && (tid == -1 || speq->tid == tid)) {
676                         speq->time = time_;
677                         arm_spe_set_pid_tid_cpu(spe, queue);
678                         arm_spe_run_decoder(speq, &ts);
679                 }
680         }
681         return 0;
682 }
683
684 static int arm_spe_process_event(struct perf_session *session,
685                                  union perf_event *event,
686                                  struct perf_sample *sample,
687                                  struct perf_tool *tool)
688 {
689         int err = 0;
690         u64 timestamp;
691         struct arm_spe *spe = container_of(session->auxtrace,
692                         struct arm_spe, auxtrace);
693
694         if (dump_trace)
695                 return 0;
696
697         if (!tool->ordered_events) {
698                 pr_err("SPE trace requires ordered events\n");
699                 return -EINVAL;
700         }
701
702         if (sample->time && (sample->time != (u64) -1))
703                 timestamp = perf_time_to_tsc(sample->time, &spe->tc);
704         else
705                 timestamp = 0;
706
707         if (timestamp || spe->timeless_decoding) {
708                 err = arm_spe__update_queues(spe);
709                 if (err)
710                         return err;
711         }
712
713         if (spe->timeless_decoding) {
714                 if (event->header.type == PERF_RECORD_EXIT) {
715                         err = arm_spe_process_timeless_queues(spe,
716                                         event->fork.tid,
717                                         sample->time);
718                 }
719         } else if (timestamp) {
720                 err = arm_spe_process_queues(spe, timestamp);
721         }
722
723         return err;
724 }
725
726 static int arm_spe_process_auxtrace_event(struct perf_session *session,
727                                           union perf_event *event,
728                                           struct perf_tool *tool __maybe_unused)
729 {
730         struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
731                                              auxtrace);
732
733         if (!spe->data_queued) {
734                 struct auxtrace_buffer *buffer;
735                 off_t data_offset;
736                 int fd = perf_data__fd(session->data);
737                 int err;
738
739                 if (perf_data__is_pipe(session->data)) {
740                         data_offset = 0;
741                 } else {
742                         data_offset = lseek(fd, 0, SEEK_CUR);
743                         if (data_offset == -1)
744                                 return -errno;
745                 }
746
747                 err = auxtrace_queues__add_event(&spe->queues, session, event,
748                                 data_offset, &buffer);
749                 if (err)
750                         return err;
751
752                 /* Dump here now we have copied a piped trace out of the pipe */
753                 if (dump_trace) {
754                         if (auxtrace_buffer__get_data(buffer, fd)) {
755                                 arm_spe_dump_event(spe, buffer->data,
756                                                 buffer->size);
757                                 auxtrace_buffer__put_data(buffer);
758                         }
759                 }
760         }
761
762         return 0;
763 }
764
765 static int arm_spe_flush(struct perf_session *session __maybe_unused,
766                          struct perf_tool *tool __maybe_unused)
767 {
768         struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
769                         auxtrace);
770         int ret;
771
772         if (dump_trace)
773                 return 0;
774
775         if (!tool->ordered_events)
776                 return -EINVAL;
777
778         ret = arm_spe__update_queues(spe);
779         if (ret < 0)
780                 return ret;
781
782         if (spe->timeless_decoding)
783                 return arm_spe_process_timeless_queues(spe, -1,
784                                 MAX_TIMESTAMP - 1);
785
786         return arm_spe_process_queues(spe, MAX_TIMESTAMP);
787 }
788
789 static void arm_spe_free_queue(void *priv)
790 {
791         struct arm_spe_queue *speq = priv;
792
793         if (!speq)
794                 return;
795         thread__zput(speq->thread);
796         arm_spe_decoder_free(speq->decoder);
797         zfree(&speq->event_buf);
798         free(speq);
799 }
800
801 static void arm_spe_free_events(struct perf_session *session)
802 {
803         struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
804                                              auxtrace);
805         struct auxtrace_queues *queues = &spe->queues;
806         unsigned int i;
807
808         for (i = 0; i < queues->nr_queues; i++) {
809                 arm_spe_free_queue(queues->queue_array[i].priv);
810                 queues->queue_array[i].priv = NULL;
811         }
812         auxtrace_queues__free(queues);
813 }
814
815 static void arm_spe_free(struct perf_session *session)
816 {
817         struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
818                                              auxtrace);
819
820         auxtrace_heap__free(&spe->heap);
821         arm_spe_free_events(session);
822         session->auxtrace = NULL;
823         free(spe);
824 }
825
826 static bool arm_spe_evsel_is_auxtrace(struct perf_session *session,
827                                       struct evsel *evsel)
828 {
829         struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace);
830
831         return evsel->core.attr.type == spe->pmu_type;
832 }
833
834 static const char * const arm_spe_info_fmts[] = {
835         [ARM_SPE_PMU_TYPE]              = "  PMU Type           %"PRId64"\n",
836 };
837
838 static void arm_spe_print_info(__u64 *arr)
839 {
840         if (!dump_trace)
841                 return;
842
843         fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]);
844 }
845
846 struct arm_spe_synth {
847         struct perf_tool dummy_tool;
848         struct perf_session *session;
849 };
850
851 static int arm_spe_event_synth(struct perf_tool *tool,
852                                union perf_event *event,
853                                struct perf_sample *sample __maybe_unused,
854                                struct machine *machine __maybe_unused)
855 {
856         struct arm_spe_synth *arm_spe_synth =
857                       container_of(tool, struct arm_spe_synth, dummy_tool);
858
859         return perf_session__deliver_synth_event(arm_spe_synth->session,
860                                                  event, NULL);
861 }
862
863 static int arm_spe_synth_event(struct perf_session *session,
864                                struct perf_event_attr *attr, u64 id)
865 {
866         struct arm_spe_synth arm_spe_synth;
867
868         memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth));
869         arm_spe_synth.session = session;
870
871         return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1,
872                                            &id, arm_spe_event_synth);
873 }
874
875 static void arm_spe_set_event_name(struct evlist *evlist, u64 id,
876                                     const char *name)
877 {
878         struct evsel *evsel;
879
880         evlist__for_each_entry(evlist, evsel) {
881                 if (evsel->core.id && evsel->core.id[0] == id) {
882                         if (evsel->name)
883                                 zfree(&evsel->name);
884                         evsel->name = strdup(name);
885                         break;
886                 }
887         }
888 }
889
890 static int
891 arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
892 {
893         struct evlist *evlist = session->evlist;
894         struct evsel *evsel;
895         struct perf_event_attr attr;
896         bool found = false;
897         u64 id;
898         int err;
899
900         evlist__for_each_entry(evlist, evsel) {
901                 if (evsel->core.attr.type == spe->pmu_type) {
902                         found = true;
903                         break;
904                 }
905         }
906
907         if (!found) {
908                 pr_debug("No selected events with SPE trace data\n");
909                 return 0;
910         }
911
912         memset(&attr, 0, sizeof(struct perf_event_attr));
913         attr.size = sizeof(struct perf_event_attr);
914         attr.type = PERF_TYPE_HARDWARE;
915         attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
916         attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
917                             PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC;
918         if (spe->timeless_decoding)
919                 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
920         else
921                 attr.sample_type |= PERF_SAMPLE_TIME;
922
923         attr.exclude_user = evsel->core.attr.exclude_user;
924         attr.exclude_kernel = evsel->core.attr.exclude_kernel;
925         attr.exclude_hv = evsel->core.attr.exclude_hv;
926         attr.exclude_host = evsel->core.attr.exclude_host;
927         attr.exclude_guest = evsel->core.attr.exclude_guest;
928         attr.sample_id_all = evsel->core.attr.sample_id_all;
929         attr.read_format = evsel->core.attr.read_format;
930
931         /* create new id val to be a fixed offset from evsel id */
932         id = evsel->core.id[0] + 1000000000;
933
934         if (!id)
935                 id = 1;
936
937         if (spe->synth_opts.flc) {
938                 spe->sample_flc = true;
939
940                 /* Level 1 data cache miss */
941                 err = arm_spe_synth_event(session, &attr, id);
942                 if (err)
943                         return err;
944                 spe->l1d_miss_id = id;
945                 arm_spe_set_event_name(evlist, id, "l1d-miss");
946                 id += 1;
947
948                 /* Level 1 data cache access */
949                 err = arm_spe_synth_event(session, &attr, id);
950                 if (err)
951                         return err;
952                 spe->l1d_access_id = id;
953                 arm_spe_set_event_name(evlist, id, "l1d-access");
954                 id += 1;
955         }
956
957         if (spe->synth_opts.llc) {
958                 spe->sample_llc = true;
959
960                 /* Last level cache miss */
961                 err = arm_spe_synth_event(session, &attr, id);
962                 if (err)
963                         return err;
964                 spe->llc_miss_id = id;
965                 arm_spe_set_event_name(evlist, id, "llc-miss");
966                 id += 1;
967
968                 /* Last level cache access */
969                 err = arm_spe_synth_event(session, &attr, id);
970                 if (err)
971                         return err;
972                 spe->llc_access_id = id;
973                 arm_spe_set_event_name(evlist, id, "llc-access");
974                 id += 1;
975         }
976
977         if (spe->synth_opts.tlb) {
978                 spe->sample_tlb = true;
979
980                 /* TLB miss */
981                 err = arm_spe_synth_event(session, &attr, id);
982                 if (err)
983                         return err;
984                 spe->tlb_miss_id = id;
985                 arm_spe_set_event_name(evlist, id, "tlb-miss");
986                 id += 1;
987
988                 /* TLB access */
989                 err = arm_spe_synth_event(session, &attr, id);
990                 if (err)
991                         return err;
992                 spe->tlb_access_id = id;
993                 arm_spe_set_event_name(evlist, id, "tlb-access");
994                 id += 1;
995         }
996
997         if (spe->synth_opts.branches) {
998                 spe->sample_branch = true;
999
1000                 /* Branch miss */
1001                 err = arm_spe_synth_event(session, &attr, id);
1002                 if (err)
1003                         return err;
1004                 spe->branch_miss_id = id;
1005                 arm_spe_set_event_name(evlist, id, "branch-miss");
1006                 id += 1;
1007         }
1008
1009         if (spe->synth_opts.remote_access) {
1010                 spe->sample_remote_access = true;
1011
1012                 /* Remote access */
1013                 err = arm_spe_synth_event(session, &attr, id);
1014                 if (err)
1015                         return err;
1016                 spe->remote_access_id = id;
1017                 arm_spe_set_event_name(evlist, id, "remote-access");
1018                 id += 1;
1019         }
1020
1021         if (spe->synth_opts.mem) {
1022                 spe->sample_memory = true;
1023
1024                 err = arm_spe_synth_event(session, &attr, id);
1025                 if (err)
1026                         return err;
1027                 spe->memory_id = id;
1028                 arm_spe_set_event_name(evlist, id, "memory");
1029         }
1030
1031         return 0;
1032 }
1033
1034 int arm_spe_process_auxtrace_info(union perf_event *event,
1035                                   struct perf_session *session)
1036 {
1037         struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
1038         size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX;
1039         struct perf_record_time_conv *tc = &session->time_conv;
1040         struct arm_spe *spe;
1041         int err;
1042
1043         if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
1044                                         min_sz)
1045                 return -EINVAL;
1046
1047         spe = zalloc(sizeof(struct arm_spe));
1048         if (!spe)
1049                 return -ENOMEM;
1050
1051         err = auxtrace_queues__init(&spe->queues);
1052         if (err)
1053                 goto err_free;
1054
1055         spe->session = session;
1056         spe->machine = &session->machines.host; /* No kvm support */
1057         spe->auxtrace_type = auxtrace_info->type;
1058         spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
1059
1060         spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
1061
1062         /*
1063          * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead
1064          * and the parameters for hardware clock are stored in the session
1065          * context.  Passes these parameters to the struct perf_tsc_conversion
1066          * in "spe->tc", which is used for later conversion between clock
1067          * counter and timestamp.
1068          *
1069          * For backward compatibility, copies the fields starting from
1070          * "time_cycles" only if they are contained in the event.
1071          */
1072         spe->tc.time_shift = tc->time_shift;
1073         spe->tc.time_mult = tc->time_mult;
1074         spe->tc.time_zero = tc->time_zero;
1075
1076         if (event_contains(*tc, time_cycles)) {
1077                 spe->tc.time_cycles = tc->time_cycles;
1078                 spe->tc.time_mask = tc->time_mask;
1079                 spe->tc.cap_user_time_zero = tc->cap_user_time_zero;
1080                 spe->tc.cap_user_time_short = tc->cap_user_time_short;
1081         }
1082
1083         spe->auxtrace.process_event = arm_spe_process_event;
1084         spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
1085         spe->auxtrace.flush_events = arm_spe_flush;
1086         spe->auxtrace.free_events = arm_spe_free_events;
1087         spe->auxtrace.free = arm_spe_free;
1088         spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace;
1089         session->auxtrace = &spe->auxtrace;
1090
1091         arm_spe_print_info(&auxtrace_info->priv[0]);
1092
1093         if (dump_trace)
1094                 return 0;
1095
1096         if (session->itrace_synth_opts && session->itrace_synth_opts->set)
1097                 spe->synth_opts = *session->itrace_synth_opts;
1098         else
1099                 itrace_synth_opts__set_default(&spe->synth_opts, false);
1100
1101         err = arm_spe_synth_events(spe, session);
1102         if (err)
1103                 goto err_free_queues;
1104
1105         err = auxtrace_queues__process_index(&spe->queues, session);
1106         if (err)
1107                 goto err_free_queues;
1108
1109         if (spe->queues.populated)
1110                 spe->data_queued = true;
1111
1112         return 0;
1113
1114 err_free_queues:
1115         auxtrace_queues__free(&spe->queues);
1116         session->auxtrace = NULL;
1117 err_free:
1118         free(spe);
1119         return err;
1120 }