Linux 6.9-rc1
[linux-2.6-microblaze.git] / tools / perf / util / cs-etm.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright(C) 2015-2018 Linaro Limited.
4  *
5  * Author: Tor Jeremiassen <tor@ti.com>
6  * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
7  */
8
9 #include <linux/kernel.h>
10 #include <linux/bitfield.h>
11 #include <linux/bitops.h>
12 #include <linux/coresight-pmu.h>
13 #include <linux/err.h>
14 #include <linux/log2.h>
15 #include <linux/types.h>
16 #include <linux/zalloc.h>
17
18 #include <stdlib.h>
19
20 #include "auxtrace.h"
21 #include "color.h"
22 #include "cs-etm.h"
23 #include "cs-etm-decoder/cs-etm-decoder.h"
24 #include "debug.h"
25 #include "dso.h"
26 #include "evlist.h"
27 #include "intlist.h"
28 #include "machine.h"
29 #include "map.h"
30 #include "perf.h"
31 #include "session.h"
32 #include "map_symbol.h"
33 #include "branch.h"
34 #include "symbol.h"
35 #include "tool.h"
36 #include "thread.h"
37 #include "thread-stack.h"
38 #include "tsc.h"
39 #include <tools/libc_compat.h>
40 #include "util/synthetic-events.h"
41 #include "util/util.h"
42
43 struct cs_etm_auxtrace {
44         struct auxtrace auxtrace;
45         struct auxtrace_queues queues;
46         struct auxtrace_heap heap;
47         struct itrace_synth_opts synth_opts;
48         struct perf_session *session;
49         struct perf_tsc_conversion tc;
50
51         /*
52          * Timeless has no timestamps in the trace so overlapping mmap lookups
53          * are less accurate but produces smaller trace data. We use context IDs
54          * in the trace instead of matching timestamps with fork records so
55          * they're not really needed in the general case. Overlapping mmaps
56          * happen in cases like between a fork and an exec.
57          */
58         bool timeless_decoding;
59
60         /*
61          * Per-thread ignores the trace channel ID and instead assumes that
62          * everything in a buffer comes from the same process regardless of
63          * which CPU it ran on. It also implies no context IDs so the TID is
64          * taken from the auxtrace buffer.
65          */
66         bool per_thread_decoding;
67         bool snapshot_mode;
68         bool data_queued;
69         bool has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */
70
71         int num_cpu;
72         u64 latest_kernel_timestamp;
73         u32 auxtrace_type;
74         u64 branches_sample_type;
75         u64 branches_id;
76         u64 instructions_sample_type;
77         u64 instructions_sample_period;
78         u64 instructions_id;
79         u64 **metadata;
80         unsigned int pmu_type;
81         enum cs_etm_pid_fmt pid_fmt;
82 };
83
84 struct cs_etm_traceid_queue {
85         u8 trace_chan_id;
86         u64 period_instructions;
87         size_t last_branch_pos;
88         union perf_event *event_buf;
89         struct thread *thread;
90         struct thread *prev_packet_thread;
91         ocsd_ex_level prev_packet_el;
92         ocsd_ex_level el;
93         struct branch_stack *last_branch;
94         struct branch_stack *last_branch_rb;
95         struct cs_etm_packet *prev_packet;
96         struct cs_etm_packet *packet;
97         struct cs_etm_packet_queue packet_queue;
98 };
99
100 struct cs_etm_queue {
101         struct cs_etm_auxtrace *etm;
102         struct cs_etm_decoder *decoder;
103         struct auxtrace_buffer *buffer;
104         unsigned int queue_nr;
105         u8 pending_timestamp_chan_id;
106         u64 offset;
107         const unsigned char *buf;
108         size_t buf_len, buf_used;
109         /* Conversion between traceID and index in traceid_queues array */
110         struct intlist *traceid_queues_list;
111         struct cs_etm_traceid_queue **traceid_queues;
112 };
113
114 /* RB tree for quick conversion between traceID and metadata pointers */
115 static struct intlist *traceid_list;
116
117 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm);
118 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
119                                            pid_t tid);
120 static int cs_etm__get_data_block(struct cs_etm_queue *etmq);
121 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq);
122
123 /* PTMs ETMIDR [11:8] set to b0011 */
124 #define ETMIDR_PTM_VERSION 0x00000300
125
126 /*
127  * A struct auxtrace_heap_item only has a queue_nr and a timestamp to
128  * work with.  One option is to modify to auxtrace_heap_XYZ() API or simply
129  * encode the etm queue number as the upper 16 bit and the channel as
130  * the lower 16 bit.
131  */
132 #define TO_CS_QUEUE_NR(queue_nr, trace_chan_id) \
133                       (queue_nr << 16 | trace_chan_id)
134 #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
135 #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
136
137 static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
138 {
139         etmidr &= ETMIDR_PTM_VERSION;
140
141         if (etmidr == ETMIDR_PTM_VERSION)
142                 return CS_ETM_PROTO_PTM;
143
144         return CS_ETM_PROTO_ETMV3;
145 }
146
147 static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic)
148 {
149         struct int_node *inode;
150         u64 *metadata;
151
152         inode = intlist__find(traceid_list, trace_chan_id);
153         if (!inode)
154                 return -EINVAL;
155
156         metadata = inode->priv;
157         *magic = metadata[CS_ETM_MAGIC];
158         return 0;
159 }
160
161 int cs_etm__get_cpu(u8 trace_chan_id, int *cpu)
162 {
163         struct int_node *inode;
164         u64 *metadata;
165
166         inode = intlist__find(traceid_list, trace_chan_id);
167         if (!inode)
168                 return -EINVAL;
169
170         metadata = inode->priv;
171         *cpu = (int)metadata[CS_ETM_CPU];
172         return 0;
173 }
174
175 /*
176  * The returned PID format is presented as an enum:
177  *
178  *   CS_ETM_PIDFMT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced.
179  *   CS_ETM_PIDFMT_CTXTID2: CONTEXTIDR_EL2 is traced.
180  *   CS_ETM_PIDFMT_NONE: No context IDs
181  *
182  * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2
183  * are enabled at the same time when the session runs on an EL2 kernel.
184  * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be
185  * recorded in the trace data, the tool will selectively use
186  * CONTEXTIDR_EL2 as PID.
187  *
188  * The result is cached in etm->pid_fmt so this function only needs to be called
189  * when processing the aux info.
190  */
191 static enum cs_etm_pid_fmt cs_etm__init_pid_fmt(u64 *metadata)
192 {
193         u64 val;
194
195         if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) {
196                 val = metadata[CS_ETM_ETMCR];
197                 /* CONTEXTIDR is traced */
198                 if (val & BIT(ETM_OPT_CTXTID))
199                         return CS_ETM_PIDFMT_CTXTID;
200         } else {
201                 val = metadata[CS_ETMV4_TRCCONFIGR];
202                 /* CONTEXTIDR_EL2 is traced */
203                 if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT)))
204                         return CS_ETM_PIDFMT_CTXTID2;
205                 /* CONTEXTIDR_EL1 is traced */
206                 else if (val & BIT(ETM4_CFG_BIT_CTXTID))
207                         return CS_ETM_PIDFMT_CTXTID;
208         }
209
210         return CS_ETM_PIDFMT_NONE;
211 }
212
213 enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq)
214 {
215         return etmq->etm->pid_fmt;
216 }
217
218 static int cs_etm__map_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
219 {
220         struct int_node *inode;
221
222         /* Get an RB node for this CPU */
223         inode = intlist__findnew(traceid_list, trace_chan_id);
224
225         /* Something went wrong, no need to continue */
226         if (!inode)
227                 return -ENOMEM;
228
229         /*
230          * The node for that CPU should not be taken.
231          * Back out if that's the case.
232          */
233         if (inode->priv)
234                 return -EINVAL;
235
236         /* All good, associate the traceID with the metadata pointer */
237         inode->priv = cpu_metadata;
238
239         return 0;
240 }
241
242 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata)
243 {
244         u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
245
246         switch (cs_etm_magic) {
247         case __perf_cs_etmv3_magic:
248                 *trace_chan_id = (u8)(cpu_metadata[CS_ETM_ETMTRACEIDR] &
249                                       CORESIGHT_TRACE_ID_VAL_MASK);
250                 break;
251         case __perf_cs_etmv4_magic:
252         case __perf_cs_ete_magic:
253                 *trace_chan_id = (u8)(cpu_metadata[CS_ETMV4_TRCTRACEIDR] &
254                                       CORESIGHT_TRACE_ID_VAL_MASK);
255                 break;
256         default:
257                 return -EINVAL;
258         }
259         return 0;
260 }
261
262 /*
263  * update metadata trace ID from the value found in the AUX_HW_INFO packet.
264  * This will also clear the CORESIGHT_TRACE_ID_UNUSED_FLAG flag if present.
265  */
266 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
267 {
268         u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
269
270         switch (cs_etm_magic) {
271         case __perf_cs_etmv3_magic:
272                  cpu_metadata[CS_ETM_ETMTRACEIDR] = trace_chan_id;
273                 break;
274         case __perf_cs_etmv4_magic:
275         case __perf_cs_ete_magic:
276                 cpu_metadata[CS_ETMV4_TRCTRACEIDR] = trace_chan_id;
277                 break;
278
279         default:
280                 return -EINVAL;
281         }
282         return 0;
283 }
284
285 /*
286  * Get a metadata index for a specific cpu from an array.
287  *
288  */
289 static int get_cpu_data_idx(struct cs_etm_auxtrace *etm, int cpu)
290 {
291         int i;
292
293         for (i = 0; i < etm->num_cpu; i++) {
294                 if (etm->metadata[i][CS_ETM_CPU] == (u64)cpu) {
295                         return i;
296                 }
297         }
298
299         return -1;
300 }
301
302 /*
303  * Get a metadata for a specific cpu from an array.
304  *
305  */
306 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu)
307 {
308         int idx = get_cpu_data_idx(etm, cpu);
309
310         return (idx != -1) ? etm->metadata[idx] : NULL;
311 }
312
313 /*
314  * Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event.
315  *
316  * The payload associates the Trace ID and the CPU.
317  * The routine is tolerant of seeing multiple packets with the same association,
318  * but a CPU / Trace ID association changing during a session is an error.
319  */
320 static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
321                                             union perf_event *event)
322 {
323         struct cs_etm_auxtrace *etm;
324         struct perf_sample sample;
325         struct int_node *inode;
326         struct evsel *evsel;
327         u64 *cpu_data;
328         u64 hw_id;
329         int cpu, version, err;
330         u8 trace_chan_id, curr_chan_id;
331
332         /* extract and parse the HW ID */
333         hw_id = event->aux_output_hw_id.hw_id;
334         version = FIELD_GET(CS_AUX_HW_ID_VERSION_MASK, hw_id);
335         trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
336
337         /* check that we can handle this version */
338         if (version > CS_AUX_HW_ID_CURR_VERSION)
339                 return -EINVAL;
340
341         /* get access to the etm metadata */
342         etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace);
343         if (!etm || !etm->metadata)
344                 return -EINVAL;
345
346         /* parse the sample to get the CPU */
347         evsel = evlist__event2evsel(session->evlist, event);
348         if (!evsel)
349                 return -EINVAL;
350         err = evsel__parse_sample(evsel, event, &sample);
351         if (err)
352                 return err;
353         cpu = sample.cpu;
354         if (cpu == -1) {
355                 /* no CPU in the sample - possibly recorded with an old version of perf */
356                 pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record.");
357                 return -EINVAL;
358         }
359
360         /* See if the ID is mapped to a CPU, and it matches the current CPU */
361         inode = intlist__find(traceid_list, trace_chan_id);
362         if (inode) {
363                 cpu_data = inode->priv;
364                 if ((int)cpu_data[CS_ETM_CPU] != cpu) {
365                         pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
366                         return -EINVAL;
367                 }
368
369                 /* check that the mapped ID matches */
370                 err = cs_etm__metadata_get_trace_id(&curr_chan_id, cpu_data);
371                 if (err)
372                         return err;
373                 if (curr_chan_id != trace_chan_id) {
374                         pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n");
375                         return -EINVAL;
376                 }
377
378                 /* mapped and matched - return OK */
379                 return 0;
380         }
381
382         cpu_data = get_cpu_data(etm, cpu);
383         if (cpu_data == NULL)
384                 return err;
385
386         /* not one we've seen before - lets map it */
387         err = cs_etm__map_trace_id(trace_chan_id, cpu_data);
388         if (err)
389                 return err;
390
391         /*
392          * if we are picking up the association from the packet, need to plug
393          * the correct trace ID into the metadata for setting up decoders later.
394          */
395         err = cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
396         return err;
397 }
398
399 void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
400                                               u8 trace_chan_id)
401 {
402         /*
403          * When a timestamp packet is encountered the backend code
404          * is stopped so that the front end has time to process packets
405          * that were accumulated in the traceID queue.  Since there can
406          * be more than one channel per cs_etm_queue, we need to specify
407          * what traceID queue needs servicing.
408          */
409         etmq->pending_timestamp_chan_id = trace_chan_id;
410 }
411
412 static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
413                                       u8 *trace_chan_id)
414 {
415         struct cs_etm_packet_queue *packet_queue;
416
417         if (!etmq->pending_timestamp_chan_id)
418                 return 0;
419
420         if (trace_chan_id)
421                 *trace_chan_id = etmq->pending_timestamp_chan_id;
422
423         packet_queue = cs_etm__etmq_get_packet_queue(etmq,
424                                                      etmq->pending_timestamp_chan_id);
425         if (!packet_queue)
426                 return 0;
427
428         /* Acknowledge pending status */
429         etmq->pending_timestamp_chan_id = 0;
430
431         /* See function cs_etm_decoder__do_{hard|soft}_timestamp() */
432         return packet_queue->cs_timestamp;
433 }
434
435 static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue)
436 {
437         int i;
438
439         queue->head = 0;
440         queue->tail = 0;
441         queue->packet_count = 0;
442         for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) {
443                 queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
444                 queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
445                 queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
446                 queue->packet_buffer[i].instr_count = 0;
447                 queue->packet_buffer[i].last_instr_taken_branch = false;
448                 queue->packet_buffer[i].last_instr_size = 0;
449                 queue->packet_buffer[i].last_instr_type = 0;
450                 queue->packet_buffer[i].last_instr_subtype = 0;
451                 queue->packet_buffer[i].last_instr_cond = 0;
452                 queue->packet_buffer[i].flags = 0;
453                 queue->packet_buffer[i].exception_number = UINT32_MAX;
454                 queue->packet_buffer[i].trace_chan_id = UINT8_MAX;
455                 queue->packet_buffer[i].cpu = INT_MIN;
456         }
457 }
458
459 static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq)
460 {
461         int idx;
462         struct int_node *inode;
463         struct cs_etm_traceid_queue *tidq;
464         struct intlist *traceid_queues_list = etmq->traceid_queues_list;
465
466         intlist__for_each_entry(inode, traceid_queues_list) {
467                 idx = (int)(intptr_t)inode->priv;
468                 tidq = etmq->traceid_queues[idx];
469                 cs_etm__clear_packet_queue(&tidq->packet_queue);
470         }
471 }
472
473 static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
474                                       struct cs_etm_traceid_queue *tidq,
475                                       u8 trace_chan_id)
476 {
477         int rc = -ENOMEM;
478         struct auxtrace_queue *queue;
479         struct cs_etm_auxtrace *etm = etmq->etm;
480
481         cs_etm__clear_packet_queue(&tidq->packet_queue);
482
483         queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
484         tidq->trace_chan_id = trace_chan_id;
485         tidq->el = tidq->prev_packet_el = ocsd_EL_unknown;
486         tidq->thread = machine__findnew_thread(&etm->session->machines.host, -1,
487                                                queue->tid);
488         tidq->prev_packet_thread = machine__idle_thread(&etm->session->machines.host);
489
490         tidq->packet = zalloc(sizeof(struct cs_etm_packet));
491         if (!tidq->packet)
492                 goto out;
493
494         tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet));
495         if (!tidq->prev_packet)
496                 goto out_free;
497
498         if (etm->synth_opts.last_branch) {
499                 size_t sz = sizeof(struct branch_stack);
500
501                 sz += etm->synth_opts.last_branch_sz *
502                       sizeof(struct branch_entry);
503                 tidq->last_branch = zalloc(sz);
504                 if (!tidq->last_branch)
505                         goto out_free;
506                 tidq->last_branch_rb = zalloc(sz);
507                 if (!tidq->last_branch_rb)
508                         goto out_free;
509         }
510
511         tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
512         if (!tidq->event_buf)
513                 goto out_free;
514
515         return 0;
516
517 out_free:
518         zfree(&tidq->last_branch_rb);
519         zfree(&tidq->last_branch);
520         zfree(&tidq->prev_packet);
521         zfree(&tidq->packet);
522 out:
523         return rc;
524 }
525
526 static struct cs_etm_traceid_queue
527 *cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
528 {
529         int idx;
530         struct int_node *inode;
531         struct intlist *traceid_queues_list;
532         struct cs_etm_traceid_queue *tidq, **traceid_queues;
533         struct cs_etm_auxtrace *etm = etmq->etm;
534
535         if (etm->per_thread_decoding)
536                 trace_chan_id = CS_ETM_PER_THREAD_TRACEID;
537
538         traceid_queues_list = etmq->traceid_queues_list;
539
540         /*
541          * Check if the traceid_queue exist for this traceID by looking
542          * in the queue list.
543          */
544         inode = intlist__find(traceid_queues_list, trace_chan_id);
545         if (inode) {
546                 idx = (int)(intptr_t)inode->priv;
547                 return etmq->traceid_queues[idx];
548         }
549
550         /* We couldn't find a traceid_queue for this traceID, allocate one */
551         tidq = malloc(sizeof(*tidq));
552         if (!tidq)
553                 return NULL;
554
555         memset(tidq, 0, sizeof(*tidq));
556
557         /* Get a valid index for the new traceid_queue */
558         idx = intlist__nr_entries(traceid_queues_list);
559         /* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */
560         inode = intlist__findnew(traceid_queues_list, trace_chan_id);
561         if (!inode)
562                 goto out_free;
563
564         /* Associate this traceID with this index */
565         inode->priv = (void *)(intptr_t)idx;
566
567         if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id))
568                 goto out_free;
569
570         /* Grow the traceid_queues array by one unit */
571         traceid_queues = etmq->traceid_queues;
572         traceid_queues = reallocarray(traceid_queues,
573                                       idx + 1,
574                                       sizeof(*traceid_queues));
575
576         /*
577          * On failure reallocarray() returns NULL and the original block of
578          * memory is left untouched.
579          */
580         if (!traceid_queues)
581                 goto out_free;
582
583         traceid_queues[idx] = tidq;
584         etmq->traceid_queues = traceid_queues;
585
586         return etmq->traceid_queues[idx];
587
588 out_free:
589         /*
590          * Function intlist__remove() removes the inode from the list
591          * and delete the memory associated to it.
592          */
593         intlist__remove(traceid_queues_list, inode);
594         free(tidq);
595
596         return NULL;
597 }
598
599 struct cs_etm_packet_queue
600 *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
601 {
602         struct cs_etm_traceid_queue *tidq;
603
604         tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
605         if (tidq)
606                 return &tidq->packet_queue;
607
608         return NULL;
609 }
610
611 static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
612                                 struct cs_etm_traceid_queue *tidq)
613 {
614         struct cs_etm_packet *tmp;
615
616         if (etm->synth_opts.branches || etm->synth_opts.last_branch ||
617             etm->synth_opts.instructions) {
618                 /*
619                  * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
620                  * the next incoming packet.
621                  *
622                  * Threads and exception levels are also tracked for both the
623                  * previous and current packets. This is because the previous
624                  * packet is used for the 'from' IP for branch samples, so the
625                  * thread at that time must also be assigned to that sample.
626                  * Across discontinuity packets the thread can change, so by
627                  * tracking the thread for the previous packet the branch sample
628                  * will have the correct info.
629                  */
630                 tmp = tidq->packet;
631                 tidq->packet = tidq->prev_packet;
632                 tidq->prev_packet = tmp;
633                 tidq->prev_packet_el = tidq->el;
634                 thread__put(tidq->prev_packet_thread);
635                 tidq->prev_packet_thread = thread__get(tidq->thread);
636         }
637 }
638
639 static void cs_etm__packet_dump(const char *pkt_string)
640 {
641         const char *color = PERF_COLOR_BLUE;
642         int len = strlen(pkt_string);
643
644         if (len && (pkt_string[len-1] == '\n'))
645                 color_fprintf(stdout, color, "  %s", pkt_string);
646         else
647                 color_fprintf(stdout, color, "  %s\n", pkt_string);
648
649         fflush(stdout);
650 }
651
652 static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params,
653                                           struct cs_etm_auxtrace *etm, int t_idx,
654                                           int m_idx, u32 etmidr)
655 {
656         u64 **metadata = etm->metadata;
657
658         t_params[t_idx].protocol = cs_etm__get_v7_protocol_version(etmidr);
659         t_params[t_idx].etmv3.reg_ctrl = metadata[m_idx][CS_ETM_ETMCR];
660         t_params[t_idx].etmv3.reg_trc_id = metadata[m_idx][CS_ETM_ETMTRACEIDR];
661 }
662
663 static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
664                                           struct cs_etm_auxtrace *etm, int t_idx,
665                                           int m_idx)
666 {
667         u64 **metadata = etm->metadata;
668
669         t_params[t_idx].protocol = CS_ETM_PROTO_ETMV4i;
670         t_params[t_idx].etmv4.reg_idr0 = metadata[m_idx][CS_ETMV4_TRCIDR0];
671         t_params[t_idx].etmv4.reg_idr1 = metadata[m_idx][CS_ETMV4_TRCIDR1];
672         t_params[t_idx].etmv4.reg_idr2 = metadata[m_idx][CS_ETMV4_TRCIDR2];
673         t_params[t_idx].etmv4.reg_idr8 = metadata[m_idx][CS_ETMV4_TRCIDR8];
674         t_params[t_idx].etmv4.reg_configr = metadata[m_idx][CS_ETMV4_TRCCONFIGR];
675         t_params[t_idx].etmv4.reg_traceidr = metadata[m_idx][CS_ETMV4_TRCTRACEIDR];
676 }
677
678 static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
679                                           struct cs_etm_auxtrace *etm, int t_idx,
680                                           int m_idx)
681 {
682         u64 **metadata = etm->metadata;
683
684         t_params[t_idx].protocol = CS_ETM_PROTO_ETE;
685         t_params[t_idx].ete.reg_idr0 = metadata[m_idx][CS_ETE_TRCIDR0];
686         t_params[t_idx].ete.reg_idr1 = metadata[m_idx][CS_ETE_TRCIDR1];
687         t_params[t_idx].ete.reg_idr2 = metadata[m_idx][CS_ETE_TRCIDR2];
688         t_params[t_idx].ete.reg_idr8 = metadata[m_idx][CS_ETE_TRCIDR8];
689         t_params[t_idx].ete.reg_configr = metadata[m_idx][CS_ETE_TRCCONFIGR];
690         t_params[t_idx].ete.reg_traceidr = metadata[m_idx][CS_ETE_TRCTRACEIDR];
691         t_params[t_idx].ete.reg_devarch = metadata[m_idx][CS_ETE_TRCDEVARCH];
692 }
693
694 static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
695                                      struct cs_etm_auxtrace *etm,
696                                      bool formatted,
697                                      int sample_cpu,
698                                      int decoders)
699 {
700         int t_idx, m_idx;
701         u32 etmidr;
702         u64 architecture;
703
704         for (t_idx = 0; t_idx < decoders; t_idx++) {
705                 if (formatted)
706                         m_idx = t_idx;
707                 else {
708                         m_idx = get_cpu_data_idx(etm, sample_cpu);
709                         if (m_idx == -1) {
710                                 pr_warning("CS_ETM: unknown CPU, falling back to first metadata\n");
711                                 m_idx = 0;
712                         }
713                 }
714
715                 architecture = etm->metadata[m_idx][CS_ETM_MAGIC];
716
717                 switch (architecture) {
718                 case __perf_cs_etmv3_magic:
719                         etmidr = etm->metadata[m_idx][CS_ETM_ETMIDR];
720                         cs_etm__set_trace_param_etmv3(t_params, etm, t_idx, m_idx, etmidr);
721                         break;
722                 case __perf_cs_etmv4_magic:
723                         cs_etm__set_trace_param_etmv4(t_params, etm, t_idx, m_idx);
724                         break;
725                 case __perf_cs_ete_magic:
726                         cs_etm__set_trace_param_ete(t_params, etm, t_idx, m_idx);
727                         break;
728                 default:
729                         return -EINVAL;
730                 }
731         }
732
733         return 0;
734 }
735
736 static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
737                                        struct cs_etm_queue *etmq,
738                                        enum cs_etm_decoder_operation mode,
739                                        bool formatted)
740 {
741         int ret = -EINVAL;
742
743         if (!(mode < CS_ETM_OPERATION_MAX))
744                 goto out;
745
746         d_params->packet_printer = cs_etm__packet_dump;
747         d_params->operation = mode;
748         d_params->data = etmq;
749         d_params->formatted = formatted;
750         d_params->fsyncs = false;
751         d_params->hsyncs = false;
752         d_params->frame_aligned = true;
753
754         ret = 0;
755 out:
756         return ret;
757 }
758
759 static void cs_etm__dump_event(struct cs_etm_queue *etmq,
760                                struct auxtrace_buffer *buffer)
761 {
762         int ret;
763         const char *color = PERF_COLOR_BLUE;
764         size_t buffer_used = 0;
765
766         fprintf(stdout, "\n");
767         color_fprintf(stdout, color,
768                      ". ... CoreSight %s Trace data: size %#zx bytes\n",
769                      cs_etm_decoder__get_name(etmq->decoder), buffer->size);
770
771         do {
772                 size_t consumed;
773
774                 ret = cs_etm_decoder__process_data_block(
775                                 etmq->decoder, buffer->offset,
776                                 &((u8 *)buffer->data)[buffer_used],
777                                 buffer->size - buffer_used, &consumed);
778                 if (ret)
779                         break;
780
781                 buffer_used += consumed;
782         } while (buffer_used < buffer->size);
783
784         cs_etm_decoder__reset(etmq->decoder);
785 }
786
787 static int cs_etm__flush_events(struct perf_session *session,
788                                 struct perf_tool *tool)
789 {
790         struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
791                                                    struct cs_etm_auxtrace,
792                                                    auxtrace);
793         if (dump_trace)
794                 return 0;
795
796         if (!tool->ordered_events)
797                 return -EINVAL;
798
799         if (etm->timeless_decoding) {
800                 /*
801                  * Pass tid = -1 to process all queues. But likely they will have
802                  * already been processed on PERF_RECORD_EXIT anyway.
803                  */
804                 return cs_etm__process_timeless_queues(etm, -1);
805         }
806
807         return cs_etm__process_timestamped_queues(etm);
808 }
809
810 static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
811 {
812         int idx;
813         uintptr_t priv;
814         struct int_node *inode, *tmp;
815         struct cs_etm_traceid_queue *tidq;
816         struct intlist *traceid_queues_list = etmq->traceid_queues_list;
817
818         intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) {
819                 priv = (uintptr_t)inode->priv;
820                 idx = priv;
821
822                 /* Free this traceid_queue from the array */
823                 tidq = etmq->traceid_queues[idx];
824                 thread__zput(tidq->thread);
825                 thread__zput(tidq->prev_packet_thread);
826                 zfree(&tidq->event_buf);
827                 zfree(&tidq->last_branch);
828                 zfree(&tidq->last_branch_rb);
829                 zfree(&tidq->prev_packet);
830                 zfree(&tidq->packet);
831                 zfree(&tidq);
832
833                 /*
834                  * Function intlist__remove() removes the inode from the list
835                  * and delete the memory associated to it.
836                  */
837                 intlist__remove(traceid_queues_list, inode);
838         }
839
840         /* Then the RB tree itself */
841         intlist__delete(traceid_queues_list);
842         etmq->traceid_queues_list = NULL;
843
844         /* finally free the traceid_queues array */
845         zfree(&etmq->traceid_queues);
846 }
847
848 static void cs_etm__free_queue(void *priv)
849 {
850         struct cs_etm_queue *etmq = priv;
851
852         if (!etmq)
853                 return;
854
855         cs_etm_decoder__free(etmq->decoder);
856         cs_etm__free_traceid_queues(etmq);
857         free(etmq);
858 }
859
860 static void cs_etm__free_events(struct perf_session *session)
861 {
862         unsigned int i;
863         struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
864                                                    struct cs_etm_auxtrace,
865                                                    auxtrace);
866         struct auxtrace_queues *queues = &aux->queues;
867
868         for (i = 0; i < queues->nr_queues; i++) {
869                 cs_etm__free_queue(queues->queue_array[i].priv);
870                 queues->queue_array[i].priv = NULL;
871         }
872
873         auxtrace_queues__free(queues);
874 }
875
876 static void cs_etm__free(struct perf_session *session)
877 {
878         int i;
879         struct int_node *inode, *tmp;
880         struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
881                                                    struct cs_etm_auxtrace,
882                                                    auxtrace);
883         cs_etm__free_events(session);
884         session->auxtrace = NULL;
885
886         /* First remove all traceID/metadata nodes for the RB tree */
887         intlist__for_each_entry_safe(inode, tmp, traceid_list)
888                 intlist__remove(traceid_list, inode);
889         /* Then the RB tree itself */
890         intlist__delete(traceid_list);
891
892         for (i = 0; i < aux->num_cpu; i++)
893                 zfree(&aux->metadata[i]);
894
895         zfree(&aux->metadata);
896         zfree(&aux);
897 }
898
899 static bool cs_etm__evsel_is_auxtrace(struct perf_session *session,
900                                       struct evsel *evsel)
901 {
902         struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
903                                                    struct cs_etm_auxtrace,
904                                                    auxtrace);
905
906         return evsel->core.attr.type == aux->pmu_type;
907 }
908
909 static struct machine *cs_etm__get_machine(struct cs_etm_queue *etmq,
910                                            ocsd_ex_level el)
911 {
912         enum cs_etm_pid_fmt pid_fmt = cs_etm__get_pid_fmt(etmq);
913
914         /*
915          * For any virtualisation based on nVHE (e.g. pKVM), or host kernels
916          * running at EL1 assume everything is the host.
917          */
918         if (pid_fmt == CS_ETM_PIDFMT_CTXTID)
919                 return &etmq->etm->session->machines.host;
920
921         /*
922          * Not perfect, but otherwise assume anything in EL1 is the default
923          * guest, and everything else is the host. Distinguishing between guest
924          * and host userspaces isn't currently supported either. Neither is
925          * multiple guest support. All this does is reduce the likeliness of
926          * decode errors where we look into the host kernel maps when it should
927          * have been the guest maps.
928          */
929         switch (el) {
930         case ocsd_EL1:
931                 return machines__find_guest(&etmq->etm->session->machines,
932                                             DEFAULT_GUEST_KERNEL_ID);
933         case ocsd_EL3:
934         case ocsd_EL2:
935         case ocsd_EL0:
936         case ocsd_EL_unknown:
937         default:
938                 return &etmq->etm->session->machines.host;
939         }
940 }
941
942 static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address,
943                            ocsd_ex_level el)
944 {
945         struct machine *machine = cs_etm__get_machine(etmq, el);
946
947         if (address >= machine__kernel_start(machine)) {
948                 if (machine__is_host(machine))
949                         return PERF_RECORD_MISC_KERNEL;
950                 else
951                         return PERF_RECORD_MISC_GUEST_KERNEL;
952         } else {
953                 if (machine__is_host(machine))
954                         return PERF_RECORD_MISC_USER;
955                 else {
956                         /*
957                          * Can't really happen at the moment because
958                          * cs_etm__get_machine() will always return
959                          * machines.host for any non EL1 trace.
960                          */
961                         return PERF_RECORD_MISC_GUEST_USER;
962                 }
963         }
964 }
965
966 static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
967                               u64 address, size_t size, u8 *buffer,
968                               const ocsd_mem_space_acc_t mem_space)
969 {
970         u8  cpumode;
971         u64 offset;
972         int len;
973         struct addr_location al;
974         struct dso *dso;
975         struct cs_etm_traceid_queue *tidq;
976         int ret = 0;
977
978         if (!etmq)
979                 return 0;
980
981         addr_location__init(&al);
982         tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
983         if (!tidq)
984                 goto out;
985
986         /*
987          * We've already tracked EL along side the PID in cs_etm__set_thread()
988          * so double check that it matches what OpenCSD thinks as well. It
989          * doesn't distinguish between EL0 and EL1 for this mem access callback
990          * so we had to do the extra tracking. Skip validation if it's any of
991          * the 'any' values.
992          */
993         if (!(mem_space == OCSD_MEM_SPACE_ANY ||
994               mem_space == OCSD_MEM_SPACE_N || mem_space == OCSD_MEM_SPACE_S)) {
995                 if (mem_space & OCSD_MEM_SPACE_EL1N) {
996                         /* Includes both non secure EL1 and EL0 */
997                         assert(tidq->el == ocsd_EL1 || tidq->el == ocsd_EL0);
998                 } else if (mem_space & OCSD_MEM_SPACE_EL2)
999                         assert(tidq->el == ocsd_EL2);
1000                 else if (mem_space & OCSD_MEM_SPACE_EL3)
1001                         assert(tidq->el == ocsd_EL3);
1002         }
1003
1004         cpumode = cs_etm__cpu_mode(etmq, address, tidq->el);
1005
1006         if (!thread__find_map(tidq->thread, cpumode, address, &al))
1007                 goto out;
1008
1009         dso = map__dso(al.map);
1010         if (!dso)
1011                 goto out;
1012
1013         if (dso->data.status == DSO_DATA_STATUS_ERROR &&
1014             dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE))
1015                 goto out;
1016
1017         offset = map__map_ip(al.map, address);
1018
1019         map__load(al.map);
1020
1021         len = dso__data_read_offset(dso, maps__machine(thread__maps(tidq->thread)),
1022                                     offset, buffer, size);
1023
1024         if (len <= 0) {
1025                 ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n"
1026                                  "              Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n");
1027                 if (!dso->auxtrace_warned) {
1028                         pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n",
1029                                     address,
1030                                     dso->long_name ? dso->long_name : "Unknown");
1031                         dso->auxtrace_warned = true;
1032                 }
1033                 goto out;
1034         }
1035         ret = len;
1036 out:
1037         addr_location__exit(&al);
1038         return ret;
1039 }
1040
1041 static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
1042                                                 bool formatted, int sample_cpu)
1043 {
1044         struct cs_etm_decoder_params d_params;
1045         struct cs_etm_trace_params  *t_params = NULL;
1046         struct cs_etm_queue *etmq;
1047         /*
1048          * Each queue can only contain data from one CPU when unformatted, so only one decoder is
1049          * needed.
1050          */
1051         int decoders = formatted ? etm->num_cpu : 1;
1052
1053         etmq = zalloc(sizeof(*etmq));
1054         if (!etmq)
1055                 return NULL;
1056
1057         etmq->traceid_queues_list = intlist__new(NULL);
1058         if (!etmq->traceid_queues_list)
1059                 goto out_free;
1060
1061         /* Use metadata to fill in trace parameters for trace decoder */
1062         t_params = zalloc(sizeof(*t_params) * decoders);
1063
1064         if (!t_params)
1065                 goto out_free;
1066
1067         if (cs_etm__init_trace_params(t_params, etm, formatted, sample_cpu, decoders))
1068                 goto out_free;
1069
1070         /* Set decoder parameters to decode trace packets */
1071         if (cs_etm__init_decoder_params(&d_params, etmq,
1072                                         dump_trace ? CS_ETM_OPERATION_PRINT :
1073                                                      CS_ETM_OPERATION_DECODE,
1074                                         formatted))
1075                 goto out_free;
1076
1077         etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
1078                                             t_params);
1079
1080         if (!etmq->decoder)
1081                 goto out_free;
1082
1083         /*
1084          * Register a function to handle all memory accesses required by
1085          * the trace decoder library.
1086          */
1087         if (cs_etm_decoder__add_mem_access_cb(etmq->decoder,
1088                                               0x0L, ((u64) -1L),
1089                                               cs_etm__mem_access))
1090                 goto out_free_decoder;
1091
1092         zfree(&t_params);
1093         return etmq;
1094
1095 out_free_decoder:
1096         cs_etm_decoder__free(etmq->decoder);
1097 out_free:
1098         intlist__delete(etmq->traceid_queues_list);
1099         free(etmq);
1100
1101         return NULL;
1102 }
1103
1104 static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
1105                                struct auxtrace_queue *queue,
1106                                unsigned int queue_nr,
1107                                bool formatted,
1108                                int sample_cpu)
1109 {
1110         struct cs_etm_queue *etmq = queue->priv;
1111
1112         if (list_empty(&queue->head) || etmq)
1113                 return 0;
1114
1115         etmq = cs_etm__alloc_queue(etm, formatted, sample_cpu);
1116
1117         if (!etmq)
1118                 return -ENOMEM;
1119
1120         queue->priv = etmq;
1121         etmq->etm = etm;
1122         etmq->queue_nr = queue_nr;
1123         etmq->offset = 0;
1124
1125         return 0;
1126 }
1127
1128 static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm,
1129                                             struct cs_etm_queue *etmq,
1130                                             unsigned int queue_nr)
1131 {
1132         int ret = 0;
1133         unsigned int cs_queue_nr;
1134         u8 trace_chan_id;
1135         u64 cs_timestamp;
1136
1137         /*
1138          * We are under a CPU-wide trace scenario.  As such we need to know
1139          * when the code that generated the traces started to execute so that
1140          * it can be correlated with execution on other CPUs.  So we get a
1141          * handle on the beginning of traces and decode until we find a
1142          * timestamp.  The timestamp is then added to the auxtrace min heap
1143          * in order to know what nibble (of all the etmqs) to decode first.
1144          */
1145         while (1) {
1146                 /*
1147                  * Fetch an aux_buffer from this etmq.  Bail if no more
1148                  * blocks or an error has been encountered.
1149                  */
1150                 ret = cs_etm__get_data_block(etmq);
1151                 if (ret <= 0)
1152                         goto out;
1153
1154                 /*
1155                  * Run decoder on the trace block.  The decoder will stop when
1156                  * encountering a CS timestamp, a full packet queue or the end of
1157                  * trace for that block.
1158                  */
1159                 ret = cs_etm__decode_data_block(etmq);
1160                 if (ret)
1161                         goto out;
1162
1163                 /*
1164                  * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all
1165                  * the timestamp calculation for us.
1166                  */
1167                 cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
1168
1169                 /* We found a timestamp, no need to continue. */
1170                 if (cs_timestamp)
1171                         break;
1172
1173                 /*
1174                  * We didn't find a timestamp so empty all the traceid packet
1175                  * queues before looking for another timestamp packet, either
1176                  * in the current data block or a new one.  Packets that were
1177                  * just decoded are useless since no timestamp has been
1178                  * associated with them.  As such simply discard them.
1179                  */
1180                 cs_etm__clear_all_packet_queues(etmq);
1181         }
1182
1183         /*
1184          * We have a timestamp.  Add it to the min heap to reflect when
1185          * instructions conveyed by the range packets of this traceID queue
1186          * started to execute.  Once the same has been done for all the traceID
1187          * queues of each etmq, redenring and decoding can start in
1188          * chronological order.
1189          *
1190          * Note that packets decoded above are still in the traceID's packet
1191          * queue and will be processed in cs_etm__process_timestamped_queues().
1192          */
1193         cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
1194         ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
1195 out:
1196         return ret;
1197 }
1198
1199 static inline
1200 void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq,
1201                                  struct cs_etm_traceid_queue *tidq)
1202 {
1203         struct branch_stack *bs_src = tidq->last_branch_rb;
1204         struct branch_stack *bs_dst = tidq->last_branch;
1205         size_t nr = 0;
1206
1207         /*
1208          * Set the number of records before early exit: ->nr is used to
1209          * determine how many branches to copy from ->entries.
1210          */
1211         bs_dst->nr = bs_src->nr;
1212
1213         /*
1214          * Early exit when there is nothing to copy.
1215          */
1216         if (!bs_src->nr)
1217                 return;
1218
1219         /*
1220          * As bs_src->entries is a circular buffer, we need to copy from it in
1221          * two steps.  First, copy the branches from the most recently inserted
1222          * branch ->last_branch_pos until the end of bs_src->entries buffer.
1223          */
1224         nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos;
1225         memcpy(&bs_dst->entries[0],
1226                &bs_src->entries[tidq->last_branch_pos],
1227                sizeof(struct branch_entry) * nr);
1228
1229         /*
1230          * If we wrapped around at least once, the branches from the beginning
1231          * of the bs_src->entries buffer and until the ->last_branch_pos element
1232          * are older valid branches: copy them over.  The total number of
1233          * branches copied over will be equal to the number of branches asked by
1234          * the user in last_branch_sz.
1235          */
1236         if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) {
1237                 memcpy(&bs_dst->entries[nr],
1238                        &bs_src->entries[0],
1239                        sizeof(struct branch_entry) * tidq->last_branch_pos);
1240         }
1241 }
1242
1243 static inline
1244 void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq)
1245 {
1246         tidq->last_branch_pos = 0;
1247         tidq->last_branch_rb->nr = 0;
1248 }
1249
1250 static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
1251                                          u8 trace_chan_id, u64 addr)
1252 {
1253         u8 instrBytes[2];
1254
1255         cs_etm__mem_access(etmq, trace_chan_id, addr, ARRAY_SIZE(instrBytes),
1256                            instrBytes, 0);
1257         /*
1258          * T32 instruction size is indicated by bits[15:11] of the first
1259          * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
1260          * denote a 32-bit instruction.
1261          */
1262         return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
1263 }
1264
1265 static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
1266 {
1267         /* Returns 0 for the CS_ETM_DISCONTINUITY packet */
1268         if (packet->sample_type == CS_ETM_DISCONTINUITY)
1269                 return 0;
1270
1271         return packet->start_addr;
1272 }
1273
1274 static inline
1275 u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
1276 {
1277         /* Returns 0 for the CS_ETM_DISCONTINUITY packet */
1278         if (packet->sample_type == CS_ETM_DISCONTINUITY)
1279                 return 0;
1280
1281         return packet->end_addr - packet->last_instr_size;
1282 }
1283
1284 static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
1285                                      u64 trace_chan_id,
1286                                      const struct cs_etm_packet *packet,
1287                                      u64 offset)
1288 {
1289         if (packet->isa == CS_ETM_ISA_T32) {
1290                 u64 addr = packet->start_addr;
1291
1292                 while (offset) {
1293                         addr += cs_etm__t32_instr_size(etmq,
1294                                                        trace_chan_id, addr);
1295                         offset--;
1296                 }
1297                 return addr;
1298         }
1299
1300         /* Assume a 4 byte instruction size (A32/A64) */
1301         return packet->start_addr + offset * 4;
1302 }
1303
1304 static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq,
1305                                           struct cs_etm_traceid_queue *tidq)
1306 {
1307         struct branch_stack *bs = tidq->last_branch_rb;
1308         struct branch_entry *be;
1309
1310         /*
1311          * The branches are recorded in a circular buffer in reverse
1312          * chronological order: we start recording from the last element of the
1313          * buffer down.  After writing the first element of the stack, move the
1314          * insert position back to the end of the buffer.
1315          */
1316         if (!tidq->last_branch_pos)
1317                 tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
1318
1319         tidq->last_branch_pos -= 1;
1320
1321         be       = &bs->entries[tidq->last_branch_pos];
1322         be->from = cs_etm__last_executed_instr(tidq->prev_packet);
1323         be->to   = cs_etm__first_executed_instr(tidq->packet);
1324         /* No support for mispredict */
1325         be->flags.mispred = 0;
1326         be->flags.predicted = 1;
1327
1328         /*
1329          * Increment bs->nr until reaching the number of last branches asked by
1330          * the user on the command line.
1331          */
1332         if (bs->nr < etmq->etm->synth_opts.last_branch_sz)
1333                 bs->nr += 1;
1334 }
1335
1336 static int cs_etm__inject_event(union perf_event *event,
1337                                struct perf_sample *sample, u64 type)
1338 {
1339         event->header.size = perf_event__sample_event_size(sample, type, 0);
1340         return perf_event__synthesize_sample(event, type, 0, sample);
1341 }
1342
1343
1344 static int
1345 cs_etm__get_trace(struct cs_etm_queue *etmq)
1346 {
1347         struct auxtrace_buffer *aux_buffer = etmq->buffer;
1348         struct auxtrace_buffer *old_buffer = aux_buffer;
1349         struct auxtrace_queue *queue;
1350
1351         queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
1352
1353         aux_buffer = auxtrace_buffer__next(queue, aux_buffer);
1354
1355         /* If no more data, drop the previous auxtrace_buffer and return */
1356         if (!aux_buffer) {
1357                 if (old_buffer)
1358                         auxtrace_buffer__drop_data(old_buffer);
1359                 etmq->buf_len = 0;
1360                 return 0;
1361         }
1362
1363         etmq->buffer = aux_buffer;
1364
1365         /* If the aux_buffer doesn't have data associated, try to load it */
1366         if (!aux_buffer->data) {
1367                 /* get the file desc associated with the perf data file */
1368                 int fd = perf_data__fd(etmq->etm->session->data);
1369
1370                 aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd);
1371                 if (!aux_buffer->data)
1372                         return -ENOMEM;
1373         }
1374
1375         /* If valid, drop the previous buffer */
1376         if (old_buffer)
1377                 auxtrace_buffer__drop_data(old_buffer);
1378
1379         etmq->buf_used = 0;
1380         etmq->buf_len = aux_buffer->size;
1381         etmq->buf = aux_buffer->data;
1382
1383         return etmq->buf_len;
1384 }
1385
1386 static void cs_etm__set_thread(struct cs_etm_queue *etmq,
1387                                struct cs_etm_traceid_queue *tidq, pid_t tid,
1388                                ocsd_ex_level el)
1389 {
1390         struct machine *machine = cs_etm__get_machine(etmq, el);
1391
1392         if (tid != -1) {
1393                 thread__zput(tidq->thread);
1394                 tidq->thread = machine__find_thread(machine, -1, tid);
1395         }
1396
1397         /* Couldn't find a known thread */
1398         if (!tidq->thread)
1399                 tidq->thread = machine__idle_thread(machine);
1400
1401         tidq->el = el;
1402 }
1403
1404 int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid,
1405                             u8 trace_chan_id, ocsd_ex_level el)
1406 {
1407         struct cs_etm_traceid_queue *tidq;
1408
1409         tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
1410         if (!tidq)
1411                 return -EINVAL;
1412
1413         cs_etm__set_thread(etmq, tidq, tid, el);
1414         return 0;
1415 }
1416
1417 bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq)
1418 {
1419         return !!etmq->etm->timeless_decoding;
1420 }
1421
1422 static void cs_etm__copy_insn(struct cs_etm_queue *etmq,
1423                               u64 trace_chan_id,
1424                               const struct cs_etm_packet *packet,
1425                               struct perf_sample *sample)
1426 {
1427         /*
1428          * It's pointless to read instructions for the CS_ETM_DISCONTINUITY
1429          * packet, so directly bail out with 'insn_len' = 0.
1430          */
1431         if (packet->sample_type == CS_ETM_DISCONTINUITY) {
1432                 sample->insn_len = 0;
1433                 return;
1434         }
1435
1436         /*
1437          * T32 instruction size might be 32-bit or 16-bit, decide by calling
1438          * cs_etm__t32_instr_size().
1439          */
1440         if (packet->isa == CS_ETM_ISA_T32)
1441                 sample->insn_len = cs_etm__t32_instr_size(etmq, trace_chan_id,
1442                                                           sample->ip);
1443         /* Otherwise, A64 and A32 instruction size are always 32-bit. */
1444         else
1445                 sample->insn_len = 4;
1446
1447         cs_etm__mem_access(etmq, trace_chan_id, sample->ip, sample->insn_len,
1448                            (void *)sample->insn, 0);
1449 }
1450
1451 u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp)
1452 {
1453         struct cs_etm_auxtrace *etm = etmq->etm;
1454
1455         if (etm->has_virtual_ts)
1456                 return tsc_to_perf_time(cs_timestamp, &etm->tc);
1457         else
1458                 return cs_timestamp;
1459 }
1460
1461 static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq,
1462                                                struct cs_etm_traceid_queue *tidq)
1463 {
1464         struct cs_etm_auxtrace *etm = etmq->etm;
1465         struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue;
1466
1467         if (!etm->timeless_decoding && etm->has_virtual_ts)
1468                 return packet_queue->cs_timestamp;
1469         else
1470                 return etm->latest_kernel_timestamp;
1471 }
1472
1473 static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
1474                                             struct cs_etm_traceid_queue *tidq,
1475                                             u64 addr, u64 period)
1476 {
1477         int ret = 0;
1478         struct cs_etm_auxtrace *etm = etmq->etm;
1479         union perf_event *event = tidq->event_buf;
1480         struct perf_sample sample = {.ip = 0,};
1481
1482         event->sample.header.type = PERF_RECORD_SAMPLE;
1483         event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el);
1484         event->sample.header.size = sizeof(struct perf_event_header);
1485
1486         /* Set time field based on etm auxtrace config. */
1487         sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1488
1489         sample.ip = addr;
1490         sample.pid = thread__pid(tidq->thread);
1491         sample.tid = thread__tid(tidq->thread);
1492         sample.id = etmq->etm->instructions_id;
1493         sample.stream_id = etmq->etm->instructions_id;
1494         sample.period = period;
1495         sample.cpu = tidq->packet->cpu;
1496         sample.flags = tidq->prev_packet->flags;
1497         sample.cpumode = event->sample.header.misc;
1498
1499         cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample);
1500
1501         if (etm->synth_opts.last_branch)
1502                 sample.branch_stack = tidq->last_branch;
1503
1504         if (etm->synth_opts.inject) {
1505                 ret = cs_etm__inject_event(event, &sample,
1506                                            etm->instructions_sample_type);
1507                 if (ret)
1508                         return ret;
1509         }
1510
1511         ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1512
1513         if (ret)
1514                 pr_err(
1515                         "CS ETM Trace: failed to deliver instruction event, error %d\n",
1516                         ret);
1517
1518         return ret;
1519 }
1520
1521 /*
1522  * The cs etm packet encodes an instruction range between a branch target
1523  * and the next taken branch. Generate sample accordingly.
1524  */
1525 static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
1526                                        struct cs_etm_traceid_queue *tidq)
1527 {
1528         int ret = 0;
1529         struct cs_etm_auxtrace *etm = etmq->etm;
1530         struct perf_sample sample = {.ip = 0,};
1531         union perf_event *event = tidq->event_buf;
1532         struct dummy_branch_stack {
1533                 u64                     nr;
1534                 u64                     hw_idx;
1535                 struct branch_entry     entries;
1536         } dummy_bs;
1537         u64 ip;
1538
1539         ip = cs_etm__last_executed_instr(tidq->prev_packet);
1540
1541         event->sample.header.type = PERF_RECORD_SAMPLE;
1542         event->sample.header.misc = cs_etm__cpu_mode(etmq, ip,
1543                                                      tidq->prev_packet_el);
1544         event->sample.header.size = sizeof(struct perf_event_header);
1545
1546         /* Set time field based on etm auxtrace config. */
1547         sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1548
1549         sample.ip = ip;
1550         sample.pid = thread__pid(tidq->prev_packet_thread);
1551         sample.tid = thread__tid(tidq->prev_packet_thread);
1552         sample.addr = cs_etm__first_executed_instr(tidq->packet);
1553         sample.id = etmq->etm->branches_id;
1554         sample.stream_id = etmq->etm->branches_id;
1555         sample.period = 1;
1556         sample.cpu = tidq->packet->cpu;
1557         sample.flags = tidq->prev_packet->flags;
1558         sample.cpumode = event->sample.header.misc;
1559
1560         cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->prev_packet,
1561                           &sample);
1562
1563         /*
1564          * perf report cannot handle events without a branch stack
1565          */
1566         if (etm->synth_opts.last_branch) {
1567                 dummy_bs = (struct dummy_branch_stack){
1568                         .nr = 1,
1569                         .hw_idx = -1ULL,
1570                         .entries = {
1571                                 .from = sample.ip,
1572                                 .to = sample.addr,
1573                         },
1574                 };
1575                 sample.branch_stack = (struct branch_stack *)&dummy_bs;
1576         }
1577
1578         if (etm->synth_opts.inject) {
1579                 ret = cs_etm__inject_event(event, &sample,
1580                                            etm->branches_sample_type);
1581                 if (ret)
1582                         return ret;
1583         }
1584
1585         ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1586
1587         if (ret)
1588                 pr_err(
1589                 "CS ETM Trace: failed to deliver instruction event, error %d\n",
1590                 ret);
1591
1592         return ret;
1593 }
1594
1595 struct cs_etm_synth {
1596         struct perf_tool dummy_tool;
1597         struct perf_session *session;
1598 };
1599
1600 static int cs_etm__event_synth(struct perf_tool *tool,
1601                                union perf_event *event,
1602                                struct perf_sample *sample __maybe_unused,
1603                                struct machine *machine __maybe_unused)
1604 {
1605         struct cs_etm_synth *cs_etm_synth =
1606                       container_of(tool, struct cs_etm_synth, dummy_tool);
1607
1608         return perf_session__deliver_synth_event(cs_etm_synth->session,
1609                                                  event, NULL);
1610 }
1611
1612 static int cs_etm__synth_event(struct perf_session *session,
1613                                struct perf_event_attr *attr, u64 id)
1614 {
1615         struct cs_etm_synth cs_etm_synth;
1616
1617         memset(&cs_etm_synth, 0, sizeof(struct cs_etm_synth));
1618         cs_etm_synth.session = session;
1619
1620         return perf_event__synthesize_attr(&cs_etm_synth.dummy_tool, attr, 1,
1621                                            &id, cs_etm__event_synth);
1622 }
1623
1624 static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
1625                                 struct perf_session *session)
1626 {
1627         struct evlist *evlist = session->evlist;
1628         struct evsel *evsel;
1629         struct perf_event_attr attr;
1630         bool found = false;
1631         u64 id;
1632         int err;
1633
1634         evlist__for_each_entry(evlist, evsel) {
1635                 if (evsel->core.attr.type == etm->pmu_type) {
1636                         found = true;
1637                         break;
1638                 }
1639         }
1640
1641         if (!found) {
1642                 pr_debug("No selected events with CoreSight Trace data\n");
1643                 return 0;
1644         }
1645
1646         memset(&attr, 0, sizeof(struct perf_event_attr));
1647         attr.size = sizeof(struct perf_event_attr);
1648         attr.type = PERF_TYPE_HARDWARE;
1649         attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
1650         attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1651                             PERF_SAMPLE_PERIOD;
1652         if (etm->timeless_decoding)
1653                 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1654         else
1655                 attr.sample_type |= PERF_SAMPLE_TIME;
1656
1657         attr.exclude_user = evsel->core.attr.exclude_user;
1658         attr.exclude_kernel = evsel->core.attr.exclude_kernel;
1659         attr.exclude_hv = evsel->core.attr.exclude_hv;
1660         attr.exclude_host = evsel->core.attr.exclude_host;
1661         attr.exclude_guest = evsel->core.attr.exclude_guest;
1662         attr.sample_id_all = evsel->core.attr.sample_id_all;
1663         attr.read_format = evsel->core.attr.read_format;
1664
1665         /* create new id val to be a fixed offset from evsel id */
1666         id = evsel->core.id[0] + 1000000000;
1667
1668         if (!id)
1669                 id = 1;
1670
1671         if (etm->synth_opts.branches) {
1672                 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
1673                 attr.sample_period = 1;
1674                 attr.sample_type |= PERF_SAMPLE_ADDR;
1675                 err = cs_etm__synth_event(session, &attr, id);
1676                 if (err)
1677                         return err;
1678                 etm->branches_sample_type = attr.sample_type;
1679                 etm->branches_id = id;
1680                 id += 1;
1681                 attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
1682         }
1683
1684         if (etm->synth_opts.last_branch) {
1685                 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
1686                 /*
1687                  * We don't use the hardware index, but the sample generation
1688                  * code uses the new format branch_stack with this field,
1689                  * so the event attributes must indicate that it's present.
1690                  */
1691                 attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
1692         }
1693
1694         if (etm->synth_opts.instructions) {
1695                 attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1696                 attr.sample_period = etm->synth_opts.period;
1697                 etm->instructions_sample_period = attr.sample_period;
1698                 err = cs_etm__synth_event(session, &attr, id);
1699                 if (err)
1700                         return err;
1701                 etm->instructions_sample_type = attr.sample_type;
1702                 etm->instructions_id = id;
1703                 id += 1;
1704         }
1705
1706         return 0;
1707 }
1708
1709 static int cs_etm__sample(struct cs_etm_queue *etmq,
1710                           struct cs_etm_traceid_queue *tidq)
1711 {
1712         struct cs_etm_auxtrace *etm = etmq->etm;
1713         int ret;
1714         u8 trace_chan_id = tidq->trace_chan_id;
1715         u64 instrs_prev;
1716
1717         /* Get instructions remainder from previous packet */
1718         instrs_prev = tidq->period_instructions;
1719
1720         tidq->period_instructions += tidq->packet->instr_count;
1721
1722         /*
1723          * Record a branch when the last instruction in
1724          * PREV_PACKET is a branch.
1725          */
1726         if (etm->synth_opts.last_branch &&
1727             tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1728             tidq->prev_packet->last_instr_taken_branch)
1729                 cs_etm__update_last_branch_rb(etmq, tidq);
1730
1731         if (etm->synth_opts.instructions &&
1732             tidq->period_instructions >= etm->instructions_sample_period) {
1733                 /*
1734                  * Emit instruction sample periodically
1735                  * TODO: allow period to be defined in cycles and clock time
1736                  */
1737
1738                 /*
1739                  * Below diagram demonstrates the instruction samples
1740                  * generation flows:
1741                  *
1742                  *    Instrs     Instrs       Instrs       Instrs
1743                  *   Sample(n)  Sample(n+1)  Sample(n+2)  Sample(n+3)
1744                  *    |            |            |            |
1745                  *    V            V            V            V
1746                  *   --------------------------------------------------
1747                  *            ^                                  ^
1748                  *            |                                  |
1749                  *         Period                             Period
1750                  *    instructions(Pi)                   instructions(Pi')
1751                  *
1752                  *            |                                  |
1753                  *            \---------------- -----------------/
1754                  *                             V
1755                  *                 tidq->packet->instr_count
1756                  *
1757                  * Instrs Sample(n...) are the synthesised samples occurring
1758                  * every etm->instructions_sample_period instructions - as
1759                  * defined on the perf command line.  Sample(n) is being the
1760                  * last sample before the current etm packet, n+1 to n+3
1761                  * samples are generated from the current etm packet.
1762                  *
1763                  * tidq->packet->instr_count represents the number of
1764                  * instructions in the current etm packet.
1765                  *
1766                  * Period instructions (Pi) contains the number of
1767                  * instructions executed after the sample point(n) from the
1768                  * previous etm packet.  This will always be less than
1769                  * etm->instructions_sample_period.
1770                  *
1771                  * When generate new samples, it combines with two parts
1772                  * instructions, one is the tail of the old packet and another
1773                  * is the head of the new coming packet, to generate
1774                  * sample(n+1); sample(n+2) and sample(n+3) consume the
1775                  * instructions with sample period.  After sample(n+3), the rest
1776                  * instructions will be used by later packet and it is assigned
1777                  * to tidq->period_instructions for next round calculation.
1778                  */
1779
1780                 /*
1781                  * Get the initial offset into the current packet instructions;
1782                  * entry conditions ensure that instrs_prev is less than
1783                  * etm->instructions_sample_period.
1784                  */
1785                 u64 offset = etm->instructions_sample_period - instrs_prev;
1786                 u64 addr;
1787
1788                 /* Prepare last branches for instruction sample */
1789                 if (etm->synth_opts.last_branch)
1790                         cs_etm__copy_last_branch_rb(etmq, tidq);
1791
1792                 while (tidq->period_instructions >=
1793                                 etm->instructions_sample_period) {
1794                         /*
1795                          * Calculate the address of the sampled instruction (-1
1796                          * as sample is reported as though instruction has just
1797                          * been executed, but PC has not advanced to next
1798                          * instruction)
1799                          */
1800                         addr = cs_etm__instr_addr(etmq, trace_chan_id,
1801                                                   tidq->packet, offset - 1);
1802                         ret = cs_etm__synth_instruction_sample(
1803                                 etmq, tidq, addr,
1804                                 etm->instructions_sample_period);
1805                         if (ret)
1806                                 return ret;
1807
1808                         offset += etm->instructions_sample_period;
1809                         tidq->period_instructions -=
1810                                 etm->instructions_sample_period;
1811                 }
1812         }
1813
1814         if (etm->synth_opts.branches) {
1815                 bool generate_sample = false;
1816
1817                 /* Generate sample for tracing on packet */
1818                 if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY)
1819                         generate_sample = true;
1820
1821                 /* Generate sample for branch taken packet */
1822                 if (tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1823                     tidq->prev_packet->last_instr_taken_branch)
1824                         generate_sample = true;
1825
1826                 if (generate_sample) {
1827                         ret = cs_etm__synth_branch_sample(etmq, tidq);
1828                         if (ret)
1829                                 return ret;
1830                 }
1831         }
1832
1833         cs_etm__packet_swap(etm, tidq);
1834
1835         return 0;
1836 }
1837
1838 static int cs_etm__exception(struct cs_etm_traceid_queue *tidq)
1839 {
1840         /*
1841          * When the exception packet is inserted, whether the last instruction
1842          * in previous range packet is taken branch or not, we need to force
1843          * to set 'prev_packet->last_instr_taken_branch' to true.  This ensures
1844          * to generate branch sample for the instruction range before the
1845          * exception is trapped to kernel or before the exception returning.
1846          *
1847          * The exception packet includes the dummy address values, so don't
1848          * swap PACKET with PREV_PACKET.  This keeps PREV_PACKET to be useful
1849          * for generating instruction and branch samples.
1850          */
1851         if (tidq->prev_packet->sample_type == CS_ETM_RANGE)
1852                 tidq->prev_packet->last_instr_taken_branch = true;
1853
1854         return 0;
1855 }
1856
1857 static int cs_etm__flush(struct cs_etm_queue *etmq,
1858                          struct cs_etm_traceid_queue *tidq)
1859 {
1860         int err = 0;
1861         struct cs_etm_auxtrace *etm = etmq->etm;
1862
1863         /* Handle start tracing packet */
1864         if (tidq->prev_packet->sample_type == CS_ETM_EMPTY)
1865                 goto swap_packet;
1866
1867         if (etmq->etm->synth_opts.last_branch &&
1868             etmq->etm->synth_opts.instructions &&
1869             tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1870                 u64 addr;
1871
1872                 /* Prepare last branches for instruction sample */
1873                 cs_etm__copy_last_branch_rb(etmq, tidq);
1874
1875                 /*
1876                  * Generate a last branch event for the branches left in the
1877                  * circular buffer at the end of the trace.
1878                  *
1879                  * Use the address of the end of the last reported execution
1880                  * range
1881                  */
1882                 addr = cs_etm__last_executed_instr(tidq->prev_packet);
1883
1884                 err = cs_etm__synth_instruction_sample(
1885                         etmq, tidq, addr,
1886                         tidq->period_instructions);
1887                 if (err)
1888                         return err;
1889
1890                 tidq->period_instructions = 0;
1891
1892         }
1893
1894         if (etm->synth_opts.branches &&
1895             tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1896                 err = cs_etm__synth_branch_sample(etmq, tidq);
1897                 if (err)
1898                         return err;
1899         }
1900
1901 swap_packet:
1902         cs_etm__packet_swap(etm, tidq);
1903
1904         /* Reset last branches after flush the trace */
1905         if (etm->synth_opts.last_branch)
1906                 cs_etm__reset_last_branch_rb(tidq);
1907
1908         return err;
1909 }
1910
1911 static int cs_etm__end_block(struct cs_etm_queue *etmq,
1912                              struct cs_etm_traceid_queue *tidq)
1913 {
1914         int err;
1915
1916         /*
1917          * It has no new packet coming and 'etmq->packet' contains the stale
1918          * packet which was set at the previous time with packets swapping;
1919          * so skip to generate branch sample to avoid stale packet.
1920          *
1921          * For this case only flush branch stack and generate a last branch
1922          * event for the branches left in the circular buffer at the end of
1923          * the trace.
1924          */
1925         if (etmq->etm->synth_opts.last_branch &&
1926             etmq->etm->synth_opts.instructions &&
1927             tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1928                 u64 addr;
1929
1930                 /* Prepare last branches for instruction sample */
1931                 cs_etm__copy_last_branch_rb(etmq, tidq);
1932
1933                 /*
1934                  * Use the address of the end of the last reported execution
1935                  * range.
1936                  */
1937                 addr = cs_etm__last_executed_instr(tidq->prev_packet);
1938
1939                 err = cs_etm__synth_instruction_sample(
1940                         etmq, tidq, addr,
1941                         tidq->period_instructions);
1942                 if (err)
1943                         return err;
1944
1945                 tidq->period_instructions = 0;
1946         }
1947
1948         return 0;
1949 }
1950 /*
1951  * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue
1952  *                         if need be.
1953  * Returns:     < 0     if error
1954  *              = 0     if no more auxtrace_buffer to read
1955  *              > 0     if the current buffer isn't empty yet
1956  */
1957 static int cs_etm__get_data_block(struct cs_etm_queue *etmq)
1958 {
1959         int ret;
1960
1961         if (!etmq->buf_len) {
1962                 ret = cs_etm__get_trace(etmq);
1963                 if (ret <= 0)
1964                         return ret;
1965                 /*
1966                  * We cannot assume consecutive blocks in the data file
1967                  * are contiguous, reset the decoder to force re-sync.
1968                  */
1969                 ret = cs_etm_decoder__reset(etmq->decoder);
1970                 if (ret)
1971                         return ret;
1972         }
1973
1974         return etmq->buf_len;
1975 }
1976
1977 static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id,
1978                                  struct cs_etm_packet *packet,
1979                                  u64 end_addr)
1980 {
1981         /* Initialise to keep compiler happy */
1982         u16 instr16 = 0;
1983         u32 instr32 = 0;
1984         u64 addr;
1985
1986         switch (packet->isa) {
1987         case CS_ETM_ISA_T32:
1988                 /*
1989                  * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247:
1990                  *
1991                  *  b'15         b'8
1992                  * +-----------------+--------+
1993                  * | 1 1 0 1 1 1 1 1 |  imm8  |
1994                  * +-----------------+--------+
1995                  *
1996                  * According to the specification, it only defines SVC for T32
1997                  * with 16 bits instruction and has no definition for 32bits;
1998                  * so below only read 2 bytes as instruction size for T32.
1999                  */
2000                 addr = end_addr - 2;
2001                 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr16),
2002                                    (u8 *)&instr16, 0);
2003                 if ((instr16 & 0xFF00) == 0xDF00)
2004                         return true;
2005
2006                 break;
2007         case CS_ETM_ISA_A32:
2008                 /*
2009                  * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247:
2010                  *
2011                  *  b'31 b'28 b'27 b'24
2012                  * +---------+---------+-------------------------+
2013                  * |  !1111  | 1 1 1 1 |        imm24            |
2014                  * +---------+---------+-------------------------+
2015                  */
2016                 addr = end_addr - 4;
2017                 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2018                                    (u8 *)&instr32, 0);
2019                 if ((instr32 & 0x0F000000) == 0x0F000000 &&
2020                     (instr32 & 0xF0000000) != 0xF0000000)
2021                         return true;
2022
2023                 break;
2024         case CS_ETM_ISA_A64:
2025                 /*
2026                  * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294:
2027                  *
2028                  *  b'31               b'21           b'4     b'0
2029                  * +-----------------------+---------+-----------+
2030                  * | 1 1 0 1 0 1 0 0 0 0 0 |  imm16  | 0 0 0 0 1 |
2031                  * +-----------------------+---------+-----------+
2032                  */
2033                 addr = end_addr - 4;
2034                 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2035                                    (u8 *)&instr32, 0);
2036                 if ((instr32 & 0xFFE0001F) == 0xd4000001)
2037                         return true;
2038
2039                 break;
2040         case CS_ETM_ISA_UNKNOWN:
2041         default:
2042                 break;
2043         }
2044
2045         return false;
2046 }
2047
2048 static bool cs_etm__is_syscall(struct cs_etm_queue *etmq,
2049                                struct cs_etm_traceid_queue *tidq, u64 magic)
2050 {
2051         u8 trace_chan_id = tidq->trace_chan_id;
2052         struct cs_etm_packet *packet = tidq->packet;
2053         struct cs_etm_packet *prev_packet = tidq->prev_packet;
2054
2055         if (magic == __perf_cs_etmv3_magic)
2056                 if (packet->exception_number == CS_ETMV3_EXC_SVC)
2057                         return true;
2058
2059         /*
2060          * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and
2061          * HVC cases; need to check if it's SVC instruction based on
2062          * packet address.
2063          */
2064         if (magic == __perf_cs_etmv4_magic) {
2065                 if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2066                     cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2067                                          prev_packet->end_addr))
2068                         return true;
2069         }
2070
2071         return false;
2072 }
2073
2074 static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq,
2075                                        u64 magic)
2076 {
2077         struct cs_etm_packet *packet = tidq->packet;
2078
2079         if (magic == __perf_cs_etmv3_magic)
2080                 if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT ||
2081                     packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT ||
2082                     packet->exception_number == CS_ETMV3_EXC_PE_RESET ||
2083                     packet->exception_number == CS_ETMV3_EXC_IRQ ||
2084                     packet->exception_number == CS_ETMV3_EXC_FIQ)
2085                         return true;
2086
2087         if (magic == __perf_cs_etmv4_magic)
2088                 if (packet->exception_number == CS_ETMV4_EXC_RESET ||
2089                     packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT ||
2090                     packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR ||
2091                     packet->exception_number == CS_ETMV4_EXC_INST_DEBUG ||
2092                     packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG ||
2093                     packet->exception_number == CS_ETMV4_EXC_IRQ ||
2094                     packet->exception_number == CS_ETMV4_EXC_FIQ)
2095                         return true;
2096
2097         return false;
2098 }
2099
2100 static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq,
2101                                       struct cs_etm_traceid_queue *tidq,
2102                                       u64 magic)
2103 {
2104         u8 trace_chan_id = tidq->trace_chan_id;
2105         struct cs_etm_packet *packet = tidq->packet;
2106         struct cs_etm_packet *prev_packet = tidq->prev_packet;
2107
2108         if (magic == __perf_cs_etmv3_magic)
2109                 if (packet->exception_number == CS_ETMV3_EXC_SMC ||
2110                     packet->exception_number == CS_ETMV3_EXC_HYP ||
2111                     packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE ||
2112                     packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR ||
2113                     packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT ||
2114                     packet->exception_number == CS_ETMV3_EXC_DATA_FAULT ||
2115                     packet->exception_number == CS_ETMV3_EXC_GENERIC)
2116                         return true;
2117
2118         if (magic == __perf_cs_etmv4_magic) {
2119                 if (packet->exception_number == CS_ETMV4_EXC_TRAP ||
2120                     packet->exception_number == CS_ETMV4_EXC_ALIGNMENT ||
2121                     packet->exception_number == CS_ETMV4_EXC_INST_FAULT ||
2122                     packet->exception_number == CS_ETMV4_EXC_DATA_FAULT)
2123                         return true;
2124
2125                 /*
2126                  * For CS_ETMV4_EXC_CALL, except SVC other instructions
2127                  * (SMC, HVC) are taken as sync exceptions.
2128                  */
2129                 if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2130                     !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2131                                           prev_packet->end_addr))
2132                         return true;
2133
2134                 /*
2135                  * ETMv4 has 5 bits for exception number; if the numbers
2136                  * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ]
2137                  * they are implementation defined exceptions.
2138                  *
2139                  * For this case, simply take it as sync exception.
2140                  */
2141                 if (packet->exception_number > CS_ETMV4_EXC_FIQ &&
2142                     packet->exception_number <= CS_ETMV4_EXC_END)
2143                         return true;
2144         }
2145
2146         return false;
2147 }
2148
2149 static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq,
2150                                     struct cs_etm_traceid_queue *tidq)
2151 {
2152         struct cs_etm_packet *packet = tidq->packet;
2153         struct cs_etm_packet *prev_packet = tidq->prev_packet;
2154         u8 trace_chan_id = tidq->trace_chan_id;
2155         u64 magic;
2156         int ret;
2157
2158         switch (packet->sample_type) {
2159         case CS_ETM_RANGE:
2160                 /*
2161                  * Immediate branch instruction without neither link nor
2162                  * return flag, it's normal branch instruction within
2163                  * the function.
2164                  */
2165                 if (packet->last_instr_type == OCSD_INSTR_BR &&
2166                     packet->last_instr_subtype == OCSD_S_INSTR_NONE) {
2167                         packet->flags = PERF_IP_FLAG_BRANCH;
2168
2169                         if (packet->last_instr_cond)
2170                                 packet->flags |= PERF_IP_FLAG_CONDITIONAL;
2171                 }
2172
2173                 /*
2174                  * Immediate branch instruction with link (e.g. BL), this is
2175                  * branch instruction for function call.
2176                  */
2177                 if (packet->last_instr_type == OCSD_INSTR_BR &&
2178                     packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2179                         packet->flags = PERF_IP_FLAG_BRANCH |
2180                                         PERF_IP_FLAG_CALL;
2181
2182                 /*
2183                  * Indirect branch instruction with link (e.g. BLR), this is
2184                  * branch instruction for function call.
2185                  */
2186                 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2187                     packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2188                         packet->flags = PERF_IP_FLAG_BRANCH |
2189                                         PERF_IP_FLAG_CALL;
2190
2191                 /*
2192                  * Indirect branch instruction with subtype of
2193                  * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for
2194                  * function return for A32/T32.
2195                  */
2196                 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2197                     packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET)
2198                         packet->flags = PERF_IP_FLAG_BRANCH |
2199                                         PERF_IP_FLAG_RETURN;
2200
2201                 /*
2202                  * Indirect branch instruction without link (e.g. BR), usually
2203                  * this is used for function return, especially for functions
2204                  * within dynamic link lib.
2205                  */
2206                 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2207                     packet->last_instr_subtype == OCSD_S_INSTR_NONE)
2208                         packet->flags = PERF_IP_FLAG_BRANCH |
2209                                         PERF_IP_FLAG_RETURN;
2210
2211                 /* Return instruction for function return. */
2212                 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2213                     packet->last_instr_subtype == OCSD_S_INSTR_V8_RET)
2214                         packet->flags = PERF_IP_FLAG_BRANCH |
2215                                         PERF_IP_FLAG_RETURN;
2216
2217                 /*
2218                  * Decoder might insert a discontinuity in the middle of
2219                  * instruction packets, fixup prev_packet with flag
2220                  * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace.
2221                  */
2222                 if (prev_packet->sample_type == CS_ETM_DISCONTINUITY)
2223                         prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2224                                               PERF_IP_FLAG_TRACE_BEGIN;
2225
2226                 /*
2227                  * If the previous packet is an exception return packet
2228                  * and the return address just follows SVC instruction,
2229                  * it needs to calibrate the previous packet sample flags
2230                  * as PERF_IP_FLAG_SYSCALLRET.
2231                  */
2232                 if (prev_packet->flags == (PERF_IP_FLAG_BRANCH |
2233                                            PERF_IP_FLAG_RETURN |
2234                                            PERF_IP_FLAG_INTERRUPT) &&
2235                     cs_etm__is_svc_instr(etmq, trace_chan_id,
2236                                          packet, packet->start_addr))
2237                         prev_packet->flags = PERF_IP_FLAG_BRANCH |
2238                                              PERF_IP_FLAG_RETURN |
2239                                              PERF_IP_FLAG_SYSCALLRET;
2240                 break;
2241         case CS_ETM_DISCONTINUITY:
2242                 /*
2243                  * The trace is discontinuous, if the previous packet is
2244                  * instruction packet, set flag PERF_IP_FLAG_TRACE_END
2245                  * for previous packet.
2246                  */
2247                 if (prev_packet->sample_type == CS_ETM_RANGE)
2248                         prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2249                                               PERF_IP_FLAG_TRACE_END;
2250                 break;
2251         case CS_ETM_EXCEPTION:
2252                 ret = cs_etm__get_magic(packet->trace_chan_id, &magic);
2253                 if (ret)
2254                         return ret;
2255
2256                 /* The exception is for system call. */
2257                 if (cs_etm__is_syscall(etmq, tidq, magic))
2258                         packet->flags = PERF_IP_FLAG_BRANCH |
2259                                         PERF_IP_FLAG_CALL |
2260                                         PERF_IP_FLAG_SYSCALLRET;
2261                 /*
2262                  * The exceptions are triggered by external signals from bus,
2263                  * interrupt controller, debug module, PE reset or halt.
2264                  */
2265                 else if (cs_etm__is_async_exception(tidq, magic))
2266                         packet->flags = PERF_IP_FLAG_BRANCH |
2267                                         PERF_IP_FLAG_CALL |
2268                                         PERF_IP_FLAG_ASYNC |
2269                                         PERF_IP_FLAG_INTERRUPT;
2270                 /*
2271                  * Otherwise, exception is caused by trap, instruction &
2272                  * data fault, or alignment errors.
2273                  */
2274                 else if (cs_etm__is_sync_exception(etmq, tidq, magic))
2275                         packet->flags = PERF_IP_FLAG_BRANCH |
2276                                         PERF_IP_FLAG_CALL |
2277                                         PERF_IP_FLAG_INTERRUPT;
2278
2279                 /*
2280                  * When the exception packet is inserted, since exception
2281                  * packet is not used standalone for generating samples
2282                  * and it's affiliation to the previous instruction range
2283                  * packet; so set previous range packet flags to tell perf
2284                  * it is an exception taken branch.
2285                  */
2286                 if (prev_packet->sample_type == CS_ETM_RANGE)
2287                         prev_packet->flags = packet->flags;
2288                 break;
2289         case CS_ETM_EXCEPTION_RET:
2290                 /*
2291                  * When the exception return packet is inserted, since
2292                  * exception return packet is not used standalone for
2293                  * generating samples and it's affiliation to the previous
2294                  * instruction range packet; so set previous range packet
2295                  * flags to tell perf it is an exception return branch.
2296                  *
2297                  * The exception return can be for either system call or
2298                  * other exception types; unfortunately the packet doesn't
2299                  * contain exception type related info so we cannot decide
2300                  * the exception type purely based on exception return packet.
2301                  * If we record the exception number from exception packet and
2302                  * reuse it for exception return packet, this is not reliable
2303                  * due the trace can be discontinuity or the interrupt can
2304                  * be nested, thus the recorded exception number cannot be
2305                  * used for exception return packet for these two cases.
2306                  *
2307                  * For exception return packet, we only need to distinguish the
2308                  * packet is for system call or for other types.  Thus the
2309                  * decision can be deferred when receive the next packet which
2310                  * contains the return address, based on the return address we
2311                  * can read out the previous instruction and check if it's a
2312                  * system call instruction and then calibrate the sample flag
2313                  * as needed.
2314                  */
2315                 if (prev_packet->sample_type == CS_ETM_RANGE)
2316                         prev_packet->flags = PERF_IP_FLAG_BRANCH |
2317                                              PERF_IP_FLAG_RETURN |
2318                                              PERF_IP_FLAG_INTERRUPT;
2319                 break;
2320         case CS_ETM_EMPTY:
2321         default:
2322                 break;
2323         }
2324
2325         return 0;
2326 }
2327
2328 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq)
2329 {
2330         int ret = 0;
2331         size_t processed = 0;
2332
2333         /*
2334          * Packets are decoded and added to the decoder's packet queue
2335          * until the decoder packet processing callback has requested that
2336          * processing stops or there is nothing left in the buffer.  Normal
2337          * operations that stop processing are a timestamp packet or a full
2338          * decoder buffer queue.
2339          */
2340         ret = cs_etm_decoder__process_data_block(etmq->decoder,
2341                                                  etmq->offset,
2342                                                  &etmq->buf[etmq->buf_used],
2343                                                  etmq->buf_len,
2344                                                  &processed);
2345         if (ret)
2346                 goto out;
2347
2348         etmq->offset += processed;
2349         etmq->buf_used += processed;
2350         etmq->buf_len -= processed;
2351
2352 out:
2353         return ret;
2354 }
2355
2356 static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq,
2357                                          struct cs_etm_traceid_queue *tidq)
2358 {
2359         int ret;
2360         struct cs_etm_packet_queue *packet_queue;
2361
2362         packet_queue = &tidq->packet_queue;
2363
2364         /* Process each packet in this chunk */
2365         while (1) {
2366                 ret = cs_etm_decoder__get_packet(packet_queue,
2367                                                  tidq->packet);
2368                 if (ret <= 0)
2369                         /*
2370                          * Stop processing this chunk on
2371                          * end of data or error
2372                          */
2373                         break;
2374
2375                 /*
2376                  * Since packet addresses are swapped in packet
2377                  * handling within below switch() statements,
2378                  * thus setting sample flags must be called
2379                  * prior to switch() statement to use address
2380                  * information before packets swapping.
2381                  */
2382                 ret = cs_etm__set_sample_flags(etmq, tidq);
2383                 if (ret < 0)
2384                         break;
2385
2386                 switch (tidq->packet->sample_type) {
2387                 case CS_ETM_RANGE:
2388                         /*
2389                          * If the packet contains an instruction
2390                          * range, generate instruction sequence
2391                          * events.
2392                          */
2393                         cs_etm__sample(etmq, tidq);
2394                         break;
2395                 case CS_ETM_EXCEPTION:
2396                 case CS_ETM_EXCEPTION_RET:
2397                         /*
2398                          * If the exception packet is coming,
2399                          * make sure the previous instruction
2400                          * range packet to be handled properly.
2401                          */
2402                         cs_etm__exception(tidq);
2403                         break;
2404                 case CS_ETM_DISCONTINUITY:
2405                         /*
2406                          * Discontinuity in trace, flush
2407                          * previous branch stack
2408                          */
2409                         cs_etm__flush(etmq, tidq);
2410                         break;
2411                 case CS_ETM_EMPTY:
2412                         /*
2413                          * Should not receive empty packet,
2414                          * report error.
2415                          */
2416                         pr_err("CS ETM Trace: empty packet\n");
2417                         return -EINVAL;
2418                 default:
2419                         break;
2420                 }
2421         }
2422
2423         return ret;
2424 }
2425
2426 static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq)
2427 {
2428         int idx;
2429         struct int_node *inode;
2430         struct cs_etm_traceid_queue *tidq;
2431         struct intlist *traceid_queues_list = etmq->traceid_queues_list;
2432
2433         intlist__for_each_entry(inode, traceid_queues_list) {
2434                 idx = (int)(intptr_t)inode->priv;
2435                 tidq = etmq->traceid_queues[idx];
2436
2437                 /* Ignore return value */
2438                 cs_etm__process_traceid_queue(etmq, tidq);
2439
2440                 /*
2441                  * Generate an instruction sample with the remaining
2442                  * branchstack entries.
2443                  */
2444                 cs_etm__flush(etmq, tidq);
2445         }
2446 }
2447
2448 static int cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue *etmq)
2449 {
2450         int err = 0;
2451         struct cs_etm_traceid_queue *tidq;
2452
2453         tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID);
2454         if (!tidq)
2455                 return -EINVAL;
2456
2457         /* Go through each buffer in the queue and decode them one by one */
2458         while (1) {
2459                 err = cs_etm__get_data_block(etmq);
2460                 if (err <= 0)
2461                         return err;
2462
2463                 /* Run trace decoder until buffer consumed or end of trace */
2464                 do {
2465                         err = cs_etm__decode_data_block(etmq);
2466                         if (err)
2467                                 return err;
2468
2469                         /*
2470                          * Process each packet in this chunk, nothing to do if
2471                          * an error occurs other than hoping the next one will
2472                          * be better.
2473                          */
2474                         err = cs_etm__process_traceid_queue(etmq, tidq);
2475
2476                 } while (etmq->buf_len);
2477
2478                 if (err == 0)
2479                         /* Flush any remaining branch stack entries */
2480                         err = cs_etm__end_block(etmq, tidq);
2481         }
2482
2483         return err;
2484 }
2485
2486 static int cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue *etmq)
2487 {
2488         int idx, err = 0;
2489         struct cs_etm_traceid_queue *tidq;
2490         struct int_node *inode;
2491
2492         /* Go through each buffer in the queue and decode them one by one */
2493         while (1) {
2494                 err = cs_etm__get_data_block(etmq);
2495                 if (err <= 0)
2496                         return err;
2497
2498                 /* Run trace decoder until buffer consumed or end of trace */
2499                 do {
2500                         err = cs_etm__decode_data_block(etmq);
2501                         if (err)
2502                                 return err;
2503
2504                         /*
2505                          * cs_etm__run_per_thread_timeless_decoder() runs on a
2506                          * single traceID queue because each TID has a separate
2507                          * buffer. But here in per-cpu mode we need to iterate
2508                          * over each channel instead.
2509                          */
2510                         intlist__for_each_entry(inode,
2511                                                 etmq->traceid_queues_list) {
2512                                 idx = (int)(intptr_t)inode->priv;
2513                                 tidq = etmq->traceid_queues[idx];
2514                                 cs_etm__process_traceid_queue(etmq, tidq);
2515                         }
2516                 } while (etmq->buf_len);
2517
2518                 intlist__for_each_entry(inode, etmq->traceid_queues_list) {
2519                         idx = (int)(intptr_t)inode->priv;
2520                         tidq = etmq->traceid_queues[idx];
2521                         /* Flush any remaining branch stack entries */
2522                         err = cs_etm__end_block(etmq, tidq);
2523                         if (err)
2524                                 return err;
2525                 }
2526         }
2527
2528         return err;
2529 }
2530
2531 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
2532                                            pid_t tid)
2533 {
2534         unsigned int i;
2535         struct auxtrace_queues *queues = &etm->queues;
2536
2537         for (i = 0; i < queues->nr_queues; i++) {
2538                 struct auxtrace_queue *queue = &etm->queues.queue_array[i];
2539                 struct cs_etm_queue *etmq = queue->priv;
2540                 struct cs_etm_traceid_queue *tidq;
2541
2542                 if (!etmq)
2543                         continue;
2544
2545                 if (etm->per_thread_decoding) {
2546                         tidq = cs_etm__etmq_get_traceid_queue(
2547                                 etmq, CS_ETM_PER_THREAD_TRACEID);
2548
2549                         if (!tidq)
2550                                 continue;
2551
2552                         if (tid == -1 || thread__tid(tidq->thread) == tid)
2553                                 cs_etm__run_per_thread_timeless_decoder(etmq);
2554                 } else
2555                         cs_etm__run_per_cpu_timeless_decoder(etmq);
2556         }
2557
2558         return 0;
2559 }
2560
2561 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm)
2562 {
2563         int ret = 0;
2564         unsigned int cs_queue_nr, queue_nr, i;
2565         u8 trace_chan_id;
2566         u64 cs_timestamp;
2567         struct auxtrace_queue *queue;
2568         struct cs_etm_queue *etmq;
2569         struct cs_etm_traceid_queue *tidq;
2570
2571         /*
2572          * Pre-populate the heap with one entry from each queue so that we can
2573          * start processing in time order across all queues.
2574          */
2575         for (i = 0; i < etm->queues.nr_queues; i++) {
2576                 etmq = etm->queues.queue_array[i].priv;
2577                 if (!etmq)
2578                         continue;
2579
2580                 ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i);
2581                 if (ret)
2582                         return ret;
2583         }
2584
2585         while (1) {
2586                 if (!etm->heap.heap_cnt)
2587                         goto out;
2588
2589                 /* Take the entry at the top of the min heap */
2590                 cs_queue_nr = etm->heap.heap_array[0].queue_nr;
2591                 queue_nr = TO_QUEUE_NR(cs_queue_nr);
2592                 trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr);
2593                 queue = &etm->queues.queue_array[queue_nr];
2594                 etmq = queue->priv;
2595
2596                 /*
2597                  * Remove the top entry from the heap since we are about
2598                  * to process it.
2599                  */
2600                 auxtrace_heap__pop(&etm->heap);
2601
2602                 tidq  = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
2603                 if (!tidq) {
2604                         /*
2605                          * No traceID queue has been allocated for this traceID,
2606                          * which means something somewhere went very wrong.  No
2607                          * other choice than simply exit.
2608                          */
2609                         ret = -EINVAL;
2610                         goto out;
2611                 }
2612
2613                 /*
2614                  * Packets associated with this timestamp are already in
2615                  * the etmq's traceID queue, so process them.
2616                  */
2617                 ret = cs_etm__process_traceid_queue(etmq, tidq);
2618                 if (ret < 0)
2619                         goto out;
2620
2621                 /*
2622                  * Packets for this timestamp have been processed, time to
2623                  * move on to the next timestamp, fetching a new auxtrace_buffer
2624                  * if need be.
2625                  */
2626 refetch:
2627                 ret = cs_etm__get_data_block(etmq);
2628                 if (ret < 0)
2629                         goto out;
2630
2631                 /*
2632                  * No more auxtrace_buffers to process in this etmq, simply
2633                  * move on to another entry in the auxtrace_heap.
2634                  */
2635                 if (!ret)
2636                         continue;
2637
2638                 ret = cs_etm__decode_data_block(etmq);
2639                 if (ret)
2640                         goto out;
2641
2642                 cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
2643
2644                 if (!cs_timestamp) {
2645                         /*
2646                          * Function cs_etm__decode_data_block() returns when
2647                          * there is no more traces to decode in the current
2648                          * auxtrace_buffer OR when a timestamp has been
2649                          * encountered on any of the traceID queues.  Since we
2650                          * did not get a timestamp, there is no more traces to
2651                          * process in this auxtrace_buffer.  As such empty and
2652                          * flush all traceID queues.
2653                          */
2654                         cs_etm__clear_all_traceid_queues(etmq);
2655
2656                         /* Fetch another auxtrace_buffer for this etmq */
2657                         goto refetch;
2658                 }
2659
2660                 /*
2661                  * Add to the min heap the timestamp for packets that have
2662                  * just been decoded.  They will be processed and synthesized
2663                  * during the next call to cs_etm__process_traceid_queue() for
2664                  * this queue/traceID.
2665                  */
2666                 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
2667                 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
2668         }
2669
2670 out:
2671         return ret;
2672 }
2673
2674 static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm,
2675                                         union perf_event *event)
2676 {
2677         struct thread *th;
2678
2679         if (etm->timeless_decoding)
2680                 return 0;
2681
2682         /*
2683          * Add the tid/pid to the log so that we can get a match when we get a
2684          * contextID from the decoder. Only track for the host: only kernel
2685          * trace is supported for guests which wouldn't need pids so this should
2686          * be fine.
2687          */
2688         th = machine__findnew_thread(&etm->session->machines.host,
2689                                      event->itrace_start.pid,
2690                                      event->itrace_start.tid);
2691         if (!th)
2692                 return -ENOMEM;
2693
2694         thread__put(th);
2695
2696         return 0;
2697 }
2698
2699 static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm,
2700                                            union perf_event *event)
2701 {
2702         struct thread *th;
2703         bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
2704
2705         /*
2706          * Context switch in per-thread mode are irrelevant since perf
2707          * will start/stop tracing as the process is scheduled.
2708          */
2709         if (etm->timeless_decoding)
2710                 return 0;
2711
2712         /*
2713          * SWITCH_IN events carry the next process to be switched out while
2714          * SWITCH_OUT events carry the process to be switched in.  As such
2715          * we don't care about IN events.
2716          */
2717         if (!out)
2718                 return 0;
2719
2720         /*
2721          * Add the tid/pid to the log so that we can get a match when we get a
2722          * contextID from the decoder. Only track for the host: only kernel
2723          * trace is supported for guests which wouldn't need pids so this should
2724          * be fine.
2725          */
2726         th = machine__findnew_thread(&etm->session->machines.host,
2727                                      event->context_switch.next_prev_pid,
2728                                      event->context_switch.next_prev_tid);
2729         if (!th)
2730                 return -ENOMEM;
2731
2732         thread__put(th);
2733
2734         return 0;
2735 }
2736
2737 static int cs_etm__process_event(struct perf_session *session,
2738                                  union perf_event *event,
2739                                  struct perf_sample *sample,
2740                                  struct perf_tool *tool)
2741 {
2742         struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2743                                                    struct cs_etm_auxtrace,
2744                                                    auxtrace);
2745
2746         if (dump_trace)
2747                 return 0;
2748
2749         if (!tool->ordered_events) {
2750                 pr_err("CoreSight ETM Trace requires ordered events\n");
2751                 return -EINVAL;
2752         }
2753
2754         switch (event->header.type) {
2755         case PERF_RECORD_EXIT:
2756                 /*
2757                  * Don't need to wait for cs_etm__flush_events() in per-thread mode to
2758                  * start the decode because we know there will be no more trace from
2759                  * this thread. All this does is emit samples earlier than waiting for
2760                  * the flush in other modes, but with timestamps it makes sense to wait
2761                  * for flush so that events from different threads are interleaved
2762                  * properly.
2763                  */
2764                 if (etm->per_thread_decoding && etm->timeless_decoding)
2765                         return cs_etm__process_timeless_queues(etm,
2766                                                                event->fork.tid);
2767                 break;
2768
2769         case PERF_RECORD_ITRACE_START:
2770                 return cs_etm__process_itrace_start(etm, event);
2771
2772         case PERF_RECORD_SWITCH_CPU_WIDE:
2773                 return cs_etm__process_switch_cpu_wide(etm, event);
2774
2775         case PERF_RECORD_AUX:
2776                 /*
2777                  * Record the latest kernel timestamp available in the header
2778                  * for samples so that synthesised samples occur from this point
2779                  * onwards.
2780                  */
2781                 if (sample->time && (sample->time != (u64)-1))
2782                         etm->latest_kernel_timestamp = sample->time;
2783                 break;
2784
2785         default:
2786                 break;
2787         }
2788
2789         return 0;
2790 }
2791
2792 static void dump_queued_data(struct cs_etm_auxtrace *etm,
2793                              struct perf_record_auxtrace *event)
2794 {
2795         struct auxtrace_buffer *buf;
2796         unsigned int i;
2797         /*
2798          * Find all buffers with same reference in the queues and dump them.
2799          * This is because the queues can contain multiple entries of the same
2800          * buffer that were split on aux records.
2801          */
2802         for (i = 0; i < etm->queues.nr_queues; ++i)
2803                 list_for_each_entry(buf, &etm->queues.queue_array[i].head, list)
2804                         if (buf->reference == event->reference)
2805                                 cs_etm__dump_event(etm->queues.queue_array[i].priv, buf);
2806 }
2807
2808 static int cs_etm__process_auxtrace_event(struct perf_session *session,
2809                                           union perf_event *event,
2810                                           struct perf_tool *tool __maybe_unused)
2811 {
2812         struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2813                                                    struct cs_etm_auxtrace,
2814                                                    auxtrace);
2815         if (!etm->data_queued) {
2816                 struct auxtrace_buffer *buffer;
2817                 off_t  data_offset;
2818                 int fd = perf_data__fd(session->data);
2819                 bool is_pipe = perf_data__is_pipe(session->data);
2820                 int err;
2821                 int idx = event->auxtrace.idx;
2822
2823                 if (is_pipe)
2824                         data_offset = 0;
2825                 else {
2826                         data_offset = lseek(fd, 0, SEEK_CUR);
2827                         if (data_offset == -1)
2828                                 return -errno;
2829                 }
2830
2831                 err = auxtrace_queues__add_event(&etm->queues, session,
2832                                                  event, data_offset, &buffer);
2833                 if (err)
2834                         return err;
2835
2836                 /*
2837                  * Knowing if the trace is formatted or not requires a lookup of
2838                  * the aux record so only works in non-piped mode where data is
2839                  * queued in cs_etm__queue_aux_records(). Always assume
2840                  * formatted in piped mode (true).
2841                  */
2842                 err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
2843                                           idx, true, -1);
2844                 if (err)
2845                         return err;
2846
2847                 if (dump_trace)
2848                         if (auxtrace_buffer__get_data(buffer, fd)) {
2849                                 cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer);
2850                                 auxtrace_buffer__put_data(buffer);
2851                         }
2852         } else if (dump_trace)
2853                 dump_queued_data(etm, &event->auxtrace);
2854
2855         return 0;
2856 }
2857
2858 static int cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace *etm)
2859 {
2860         struct evsel *evsel;
2861         struct evlist *evlist = etm->session->evlist;
2862
2863         /* Override timeless mode with user input from --itrace=Z */
2864         if (etm->synth_opts.timeless_decoding) {
2865                 etm->timeless_decoding = true;
2866                 return 0;
2867         }
2868
2869         /*
2870          * Find the cs_etm evsel and look at what its timestamp setting was
2871          */
2872         evlist__for_each_entry(evlist, evsel)
2873                 if (cs_etm__evsel_is_auxtrace(etm->session, evsel)) {
2874                         etm->timeless_decoding =
2875                                 !(evsel->core.attr.config & BIT(ETM_OPT_TS));
2876                         return 0;
2877                 }
2878
2879         pr_err("CS ETM: Couldn't find ETM evsel\n");
2880         return -EINVAL;
2881 }
2882
2883 /*
2884  * Read a single cpu parameter block from the auxtrace_info priv block.
2885  *
2886  * For version 1 there is a per cpu nr_params entry. If we are handling
2887  * version 1 file, then there may be less, the same, or more params
2888  * indicated by this value than the compile time number we understand.
2889  *
2890  * For a version 0 info block, there are a fixed number, and we need to
2891  * fill out the nr_param value in the metadata we create.
2892  */
2893 static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset,
2894                                     int out_blk_size, int nr_params_v0)
2895 {
2896         u64 *metadata = NULL;
2897         int hdr_version;
2898         int nr_in_params, nr_out_params, nr_cmn_params;
2899         int i, k;
2900
2901         metadata = zalloc(sizeof(*metadata) * out_blk_size);
2902         if (!metadata)
2903                 return NULL;
2904
2905         /* read block current index & version */
2906         i = *buff_in_offset;
2907         hdr_version = buff_in[CS_HEADER_VERSION];
2908
2909         if (!hdr_version) {
2910         /* read version 0 info block into a version 1 metadata block  */
2911                 nr_in_params = nr_params_v0;
2912                 metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC];
2913                 metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU];
2914                 metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params;
2915                 /* remaining block params at offset +1 from source */
2916                 for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++)
2917                         metadata[k + 1] = buff_in[i + k];
2918                 /* version 0 has 2 common params */
2919                 nr_cmn_params = 2;
2920         } else {
2921         /* read version 1 info block - input and output nr_params may differ */
2922                 /* version 1 has 3 common params */
2923                 nr_cmn_params = 3;
2924                 nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS];
2925
2926                 /* if input has more params than output - skip excess */
2927                 nr_out_params = nr_in_params + nr_cmn_params;
2928                 if (nr_out_params > out_blk_size)
2929                         nr_out_params = out_blk_size;
2930
2931                 for (k = CS_ETM_MAGIC; k < nr_out_params; k++)
2932                         metadata[k] = buff_in[i + k];
2933
2934                 /* record the actual nr params we copied */
2935                 metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params;
2936         }
2937
2938         /* adjust in offset by number of in params used */
2939         i += nr_in_params + nr_cmn_params;
2940         *buff_in_offset = i;
2941         return metadata;
2942 }
2943
2944 /**
2945  * Puts a fragment of an auxtrace buffer into the auxtrace queues based
2946  * on the bounds of aux_event, if it matches with the buffer that's at
2947  * file_offset.
2948  *
2949  * Normally, whole auxtrace buffers would be added to the queue. But we
2950  * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder
2951  * is reset across each buffer, so splitting the buffers up in advance has
2952  * the same effect.
2953  */
2954 static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz,
2955                                       struct perf_record_aux *aux_event, struct perf_sample *sample)
2956 {
2957         int err;
2958         char buf[PERF_SAMPLE_MAX_SIZE];
2959         union perf_event *auxtrace_event_union;
2960         struct perf_record_auxtrace *auxtrace_event;
2961         union perf_event auxtrace_fragment;
2962         __u64 aux_offset, aux_size;
2963         __u32 idx;
2964         bool formatted;
2965
2966         struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2967                                                    struct cs_etm_auxtrace,
2968                                                    auxtrace);
2969
2970         /*
2971          * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got
2972          * from looping through the auxtrace index.
2973          */
2974         err = perf_session__peek_event(session, file_offset, buf,
2975                                        PERF_SAMPLE_MAX_SIZE, &auxtrace_event_union, NULL);
2976         if (err)
2977                 return err;
2978         auxtrace_event = &auxtrace_event_union->auxtrace;
2979         if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE)
2980                 return -EINVAL;
2981
2982         if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) ||
2983                 auxtrace_event->header.size != sz) {
2984                 return -EINVAL;
2985         }
2986
2987         /*
2988          * In per-thread mode, auxtrace CPU is set to -1, but TID will be set instead. See
2989          * auxtrace_mmap_params__set_idx(). However, the sample AUX event will contain a
2990          * CPU as we set this always for the AUX_OUTPUT_HW_ID event.
2991          * So now compare only TIDs if auxtrace CPU is -1, and CPUs if auxtrace CPU is not -1.
2992          * Return 'not found' if mismatch.
2993          */
2994         if (auxtrace_event->cpu == (__u32) -1) {
2995                 etm->per_thread_decoding = true;
2996                 if (auxtrace_event->tid != sample->tid)
2997                         return 1;
2998         } else if (auxtrace_event->cpu != sample->cpu) {
2999                 if (etm->per_thread_decoding) {
3000                         /*
3001                          * Found a per-cpu buffer after a per-thread one was
3002                          * already found
3003                          */
3004                         pr_err("CS ETM: Inconsistent per-thread/per-cpu mode.\n");
3005                         return -EINVAL;
3006                 }
3007                 return 1;
3008         }
3009
3010         if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) {
3011                 /*
3012                  * Clamp size in snapshot mode. The buffer size is clamped in
3013                  * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect
3014                  * the buffer size.
3015                  */
3016                 aux_size = min(aux_event->aux_size, auxtrace_event->size);
3017
3018                 /*
3019                  * In this mode, the head also points to the end of the buffer so aux_offset
3020                  * needs to have the size subtracted so it points to the beginning as in normal mode
3021                  */
3022                 aux_offset = aux_event->aux_offset - aux_size;
3023         } else {
3024                 aux_size = aux_event->aux_size;
3025                 aux_offset = aux_event->aux_offset;
3026         }
3027
3028         if (aux_offset >= auxtrace_event->offset &&
3029             aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) {
3030                 /*
3031                  * If this AUX event was inside this buffer somewhere, create a new auxtrace event
3032                  * based on the sizes of the aux event, and queue that fragment.
3033                  */
3034                 auxtrace_fragment.auxtrace = *auxtrace_event;
3035                 auxtrace_fragment.auxtrace.size = aux_size;
3036                 auxtrace_fragment.auxtrace.offset = aux_offset;
3037                 file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size;
3038
3039                 pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64
3040                           " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu);
3041                 err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment,
3042                                                  file_offset, NULL);
3043                 if (err)
3044                         return err;
3045
3046                 idx = auxtrace_event->idx;
3047                 formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
3048                 return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
3049                                            idx, formatted, sample->cpu);
3050         }
3051
3052         /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
3053         return 1;
3054 }
3055
3056 static int cs_etm__process_aux_hw_id_cb(struct perf_session *session, union perf_event *event,
3057                                         u64 offset __maybe_unused, void *data __maybe_unused)
3058 {
3059         /* look to handle PERF_RECORD_AUX_OUTPUT_HW_ID early to ensure decoders can be set up */
3060         if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) {
3061                 (*(int *)data)++; /* increment found count */
3062                 return cs_etm__process_aux_output_hw_id(session, event);
3063         }
3064         return 0;
3065 }
3066
3067 static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event,
3068                                         u64 offset __maybe_unused, void *data __maybe_unused)
3069 {
3070         struct perf_sample sample;
3071         int ret;
3072         struct auxtrace_index_entry *ent;
3073         struct auxtrace_index *auxtrace_index;
3074         struct evsel *evsel;
3075         size_t i;
3076
3077         /* Don't care about any other events, we're only queuing buffers for AUX events */
3078         if (event->header.type != PERF_RECORD_AUX)
3079                 return 0;
3080
3081         if (event->header.size < sizeof(struct perf_record_aux))
3082                 return -EINVAL;
3083
3084         /* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */
3085         if (!event->aux.aux_size)
3086                 return 0;
3087
3088         /*
3089          * Parse the sample, we need the sample_id_all data that comes after the event so that the
3090          * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID.
3091          */
3092         evsel = evlist__event2evsel(session->evlist, event);
3093         if (!evsel)
3094                 return -EINVAL;
3095         ret = evsel__parse_sample(evsel, event, &sample);
3096         if (ret)
3097                 return ret;
3098
3099         /*
3100          * Loop through the auxtrace index to find the buffer that matches up with this aux event.
3101          */
3102         list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
3103                 for (i = 0; i < auxtrace_index->nr; i++) {
3104                         ent = &auxtrace_index->entries[i];
3105                         ret = cs_etm__queue_aux_fragment(session, ent->file_offset,
3106                                                          ent->sz, &event->aux, &sample);
3107                         /*
3108                          * Stop search on error or successful values. Continue search on
3109                          * 1 ('not found')
3110                          */
3111                         if (ret != 1)
3112                                 return ret;
3113                 }
3114         }
3115
3116         /*
3117          * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but
3118          * don't exit with an error because it will still be possible to decode other aux records.
3119          */
3120         pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64
3121                " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu);
3122         return 0;
3123 }
3124
3125 static int cs_etm__queue_aux_records(struct perf_session *session)
3126 {
3127         struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index,
3128                                                                 struct auxtrace_index, list);
3129         if (index && index->nr > 0)
3130                 return perf_session__peek_events(session, session->header.data_offset,
3131                                                  session->header.data_size,
3132                                                  cs_etm__queue_aux_records_cb, NULL);
3133
3134         /*
3135          * We would get here if there are no entries in the index (either no auxtrace
3136          * buffers or no index at all). Fail silently as there is the possibility of
3137          * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still
3138          * false.
3139          *
3140          * In that scenario, buffers will not be split by AUX records.
3141          */
3142         return 0;
3143 }
3144
3145 #define HAS_PARAM(j, type, param) (metadata[(j)][CS_ETM_NR_TRC_PARAMS] <= \
3146                                   (CS_##type##_##param - CS_ETM_COMMON_BLK_MAX_V1))
3147
3148 /*
3149  * Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual
3150  * timestamps).
3151  */
3152 static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
3153 {
3154         int j;
3155
3156         for (j = 0; j < num_cpu; j++) {
3157                 switch (metadata[j][CS_ETM_MAGIC]) {
3158                 case __perf_cs_etmv4_magic:
3159                         if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1)
3160                                 return false;
3161                         break;
3162                 case __perf_cs_ete_magic:
3163                         if (HAS_PARAM(j, ETE, TS_SOURCE) || metadata[j][CS_ETE_TS_SOURCE] != 1)
3164                                 return false;
3165                         break;
3166                 default:
3167                         /* Unknown / unsupported magic number. */
3168                         return false;
3169                 }
3170         }
3171         return true;
3172 }
3173
3174 /* map trace ids to correct metadata block, from information in metadata */
3175 static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata)
3176 {
3177         u64 cs_etm_magic;
3178         u8 trace_chan_id;
3179         int i, err;
3180
3181         for (i = 0; i < num_cpu; i++) {
3182                 cs_etm_magic = metadata[i][CS_ETM_MAGIC];
3183                 switch (cs_etm_magic) {
3184                 case __perf_cs_etmv3_magic:
3185                         metadata[i][CS_ETM_ETMTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3186                         trace_chan_id = (u8)(metadata[i][CS_ETM_ETMTRACEIDR]);
3187                         break;
3188                 case __perf_cs_etmv4_magic:
3189                 case __perf_cs_ete_magic:
3190                         metadata[i][CS_ETMV4_TRCTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3191                         trace_chan_id = (u8)(metadata[i][CS_ETMV4_TRCTRACEIDR]);
3192                         break;
3193                 default:
3194                         /* unknown magic number */
3195                         return -EINVAL;
3196                 }
3197                 err = cs_etm__map_trace_id(trace_chan_id, metadata[i]);
3198                 if (err)
3199                         return err;
3200         }
3201         return 0;
3202 }
3203
3204 /*
3205  * If we found AUX_HW_ID packets, then set any metadata marked as unused to the
3206  * unused value to reduce the number of unneeded decoders created.
3207  */
3208 static int cs_etm__clear_unused_trace_ids_metadata(int num_cpu, u64 **metadata)
3209 {
3210         u64 cs_etm_magic;
3211         int i;
3212
3213         for (i = 0; i < num_cpu; i++) {
3214                 cs_etm_magic = metadata[i][CS_ETM_MAGIC];
3215                 switch (cs_etm_magic) {
3216                 case __perf_cs_etmv3_magic:
3217                         if (metadata[i][CS_ETM_ETMTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG)
3218                                 metadata[i][CS_ETM_ETMTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL;
3219                         break;
3220                 case __perf_cs_etmv4_magic:
3221                 case __perf_cs_ete_magic:
3222                         if (metadata[i][CS_ETMV4_TRCTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG)
3223                                 metadata[i][CS_ETMV4_TRCTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL;
3224                         break;
3225                 default:
3226                         /* unknown magic number */
3227                         return -EINVAL;
3228                 }
3229         }
3230         return 0;
3231 }
3232
3233 int cs_etm__process_auxtrace_info_full(union perf_event *event,
3234                                        struct perf_session *session)
3235 {
3236         struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
3237         struct cs_etm_auxtrace *etm = NULL;
3238         struct perf_record_time_conv *tc = &session->time_conv;
3239         int event_header_size = sizeof(struct perf_event_header);
3240         int total_size = auxtrace_info->header.size;
3241         int priv_size = 0;
3242         int num_cpu;
3243         int err = 0;
3244         int aux_hw_id_found;
3245         int i, j;
3246         u64 *ptr = NULL;
3247         u64 **metadata = NULL;
3248
3249         /*
3250          * Create an RB tree for traceID-metadata tuple.  Since the conversion
3251          * has to be made for each packet that gets decoded, optimizing access
3252          * in anything other than a sequential array is worth doing.
3253          */
3254         traceid_list = intlist__new(NULL);
3255         if (!traceid_list)
3256                 return -ENOMEM;
3257
3258         /* First the global part */
3259         ptr = (u64 *) auxtrace_info->priv;
3260         num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
3261         metadata = zalloc(sizeof(*metadata) * num_cpu);
3262         if (!metadata) {
3263                 err = -ENOMEM;
3264                 goto err_free_traceid_list;
3265         }
3266
3267         /* Start parsing after the common part of the header */
3268         i = CS_HEADER_VERSION_MAX;
3269
3270         /*
3271          * The metadata is stored in the auxtrace_info section and encodes
3272          * the configuration of the ARM embedded trace macrocell which is
3273          * required by the trace decoder to properly decode the trace due
3274          * to its highly compressed nature.
3275          */
3276         for (j = 0; j < num_cpu; j++) {
3277                 if (ptr[i] == __perf_cs_etmv3_magic) {
3278                         metadata[j] =
3279                                 cs_etm__create_meta_blk(ptr, &i,
3280                                                         CS_ETM_PRIV_MAX,
3281                                                         CS_ETM_NR_TRC_PARAMS_V0);
3282                 } else if (ptr[i] == __perf_cs_etmv4_magic) {
3283                         metadata[j] =
3284                                 cs_etm__create_meta_blk(ptr, &i,
3285                                                         CS_ETMV4_PRIV_MAX,
3286                                                         CS_ETMV4_NR_TRC_PARAMS_V0);
3287                 } else if (ptr[i] == __perf_cs_ete_magic) {
3288                         metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1);
3289                 } else {
3290                         ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n",
3291                                   ptr[i]);
3292                         err = -EINVAL;
3293                         goto err_free_metadata;
3294                 }
3295
3296                 if (!metadata[j]) {
3297                         err = -ENOMEM;
3298                         goto err_free_metadata;
3299                 }
3300         }
3301
3302         /*
3303          * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and
3304          * CS_ETMV4_PRIV_MAX mark how many double words are in the
3305          * global metadata, and each cpu's metadata respectively.
3306          * The following tests if the correct number of double words was
3307          * present in the auxtrace info section.
3308          */
3309         priv_size = total_size - event_header_size - INFO_HEADER_SIZE;
3310         if (i * 8 != priv_size) {
3311                 err = -EINVAL;
3312                 goto err_free_metadata;
3313         }
3314
3315         etm = zalloc(sizeof(*etm));
3316
3317         if (!etm) {
3318                 err = -ENOMEM;
3319                 goto err_free_metadata;
3320         }
3321
3322         /*
3323          * As all the ETMs run at the same exception level, the system should
3324          * have the same PID format crossing CPUs.  So cache the PID format
3325          * and reuse it for sequential decoding.
3326          */
3327         etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]);
3328
3329         err = auxtrace_queues__init(&etm->queues);
3330         if (err)
3331                 goto err_free_etm;
3332
3333         if (session->itrace_synth_opts->set) {
3334                 etm->synth_opts = *session->itrace_synth_opts;
3335         } else {
3336                 itrace_synth_opts__set_default(&etm->synth_opts,
3337                                 session->itrace_synth_opts->default_no_sample);
3338                 etm->synth_opts.callchain = false;
3339         }
3340
3341         etm->session = session;
3342
3343         etm->num_cpu = num_cpu;
3344         etm->pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) & 0xffffffff);
3345         etm->snapshot_mode = (ptr[CS_ETM_SNAPSHOT] != 0);
3346         etm->metadata = metadata;
3347         etm->auxtrace_type = auxtrace_info->type;
3348
3349         if (etm->synth_opts.use_timestamp)
3350                 /*
3351                  * Prior to Armv8.4, Arm CPUs don't support FEAT_TRF feature,
3352                  * therefore the decoder cannot know if the timestamp trace is
3353                  * same with the kernel time.
3354                  *
3355                  * If a user has knowledge for the working platform and can
3356                  * specify itrace option 'T' to tell decoder to forcely use the
3357                  * traced timestamp as the kernel time.
3358                  */
3359                 etm->has_virtual_ts = true;
3360         else
3361                 /* Use virtual timestamps if all ETMs report ts_source = 1 */
3362                 etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu);
3363
3364         if (!etm->has_virtual_ts)
3365                 ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n"
3366                             "The time field of the samples will not be set accurately.\n"
3367                             "For Arm CPUs prior to Armv8.4 or without support FEAT_TRF,\n"
3368                             "you can specify the itrace option 'T' for timestamp decoding\n"
3369                             "if the Coresight timestamp on the platform is same with the kernel time.\n\n");
3370
3371         etm->auxtrace.process_event = cs_etm__process_event;
3372         etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event;
3373         etm->auxtrace.flush_events = cs_etm__flush_events;
3374         etm->auxtrace.free_events = cs_etm__free_events;
3375         etm->auxtrace.free = cs_etm__free;
3376         etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace;
3377         session->auxtrace = &etm->auxtrace;
3378
3379         err = cs_etm__setup_timeless_decoding(etm);
3380         if (err)
3381                 return err;
3382
3383         etm->tc.time_shift = tc->time_shift;
3384         etm->tc.time_mult = tc->time_mult;
3385         etm->tc.time_zero = tc->time_zero;
3386         if (event_contains(*tc, time_cycles)) {
3387                 etm->tc.time_cycles = tc->time_cycles;
3388                 etm->tc.time_mask = tc->time_mask;
3389                 etm->tc.cap_user_time_zero = tc->cap_user_time_zero;
3390                 etm->tc.cap_user_time_short = tc->cap_user_time_short;
3391         }
3392         err = cs_etm__synth_events(etm, session);
3393         if (err)
3394                 goto err_free_queues;
3395
3396         /*
3397          * Map Trace ID values to CPU metadata.
3398          *
3399          * Trace metadata will always contain Trace ID values from the legacy algorithm. If the
3400          * files has been recorded by a "new" perf updated to handle AUX_HW_ID then the metadata
3401          * ID value will also have the CORESIGHT_TRACE_ID_UNUSED_FLAG set.
3402          *
3403          * The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use
3404          * the same IDs as the old algorithm as far as is possible, unless there are clashes
3405          * in which case a different value will be used. This means an older perf may still
3406          * be able to record and read files generate on a newer system.
3407          *
3408          * For a perf able to interpret AUX_HW_ID packets we first check for the presence of
3409          * those packets. If they are there then the values will be mapped and plugged into
3410          * the metadata. We then set any remaining metadata values with the used flag to a
3411          * value CORESIGHT_TRACE_ID_UNUSED_VAL - which indicates no decoder is required.
3412          *
3413          * If no AUX_HW_ID packets are present - which means a file recorded on an old kernel
3414          * then we map Trace ID values to CPU directly from the metadata - clearing any unused
3415          * flags if present.
3416          */
3417
3418         /* first scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */
3419         aux_hw_id_found = 0;
3420         err = perf_session__peek_events(session, session->header.data_offset,
3421                                         session->header.data_size,
3422                                         cs_etm__process_aux_hw_id_cb, &aux_hw_id_found);
3423         if (err)
3424                 goto err_free_queues;
3425
3426         /* if HW ID found then clear any unused metadata ID values */
3427         if (aux_hw_id_found)
3428                 err = cs_etm__clear_unused_trace_ids_metadata(num_cpu, metadata);
3429         /* otherwise, this is a file with metadata values only, map from metadata */
3430         else
3431                 err = cs_etm__map_trace_ids_metadata(num_cpu, metadata);
3432
3433         if (err)
3434                 goto err_free_queues;
3435
3436         err = cs_etm__queue_aux_records(session);
3437         if (err)
3438                 goto err_free_queues;
3439
3440         etm->data_queued = etm->queues.populated;
3441         return 0;
3442
3443 err_free_queues:
3444         auxtrace_queues__free(&etm->queues);
3445         session->auxtrace = NULL;
3446 err_free_etm:
3447         zfree(&etm);
3448 err_free_metadata:
3449         /* No need to check @metadata[j], free(NULL) is supported */
3450         for (j = 0; j < num_cpu; j++)
3451                 zfree(&metadata[j]);
3452         zfree(&metadata);
3453 err_free_traceid_list:
3454         intlist__delete(traceid_list);
3455         return err;
3456 }