1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright(C) 2015-2018 Linaro Limited.
5 * Author: Tor Jeremiassen <tor@ti.com>
6 * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
9 #include <linux/kernel.h>
10 #include <linux/bitfield.h>
11 #include <linux/bitops.h>
12 #include <linux/coresight-pmu.h>
13 #include <linux/err.h>
14 #include <linux/log2.h>
15 #include <linux/types.h>
16 #include <linux/zalloc.h>
23 #include "cs-etm-decoder/cs-etm-decoder.h"
32 #include "map_symbol.h"
37 #include "thread-stack.h"
39 #include <tools/libc_compat.h>
40 #include "util/synthetic-events.h"
41 #include "util/util.h"
43 struct cs_etm_auxtrace {
44 struct auxtrace auxtrace;
45 struct auxtrace_queues queues;
46 struct auxtrace_heap heap;
47 struct itrace_synth_opts synth_opts;
48 struct perf_session *session;
49 struct perf_tsc_conversion tc;
52 * Timeless has no timestamps in the trace so overlapping mmap lookups
53 * are less accurate but produces smaller trace data. We use context IDs
54 * in the trace instead of matching timestamps with fork records so
55 * they're not really needed in the general case. Overlapping mmaps
56 * happen in cases like between a fork and an exec.
58 bool timeless_decoding;
61 * Per-thread ignores the trace channel ID and instead assumes that
62 * everything in a buffer comes from the same process regardless of
63 * which CPU it ran on. It also implies no context IDs so the TID is
64 * taken from the auxtrace buffer.
66 bool per_thread_decoding;
69 bool has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */
72 u64 latest_kernel_timestamp;
74 u64 branches_sample_type;
76 u64 instructions_sample_type;
77 u64 instructions_sample_period;
80 unsigned int pmu_type;
81 enum cs_etm_pid_fmt pid_fmt;
84 struct cs_etm_traceid_queue {
86 u64 period_instructions;
87 size_t last_branch_pos;
88 union perf_event *event_buf;
89 struct thread *thread;
90 struct thread *prev_packet_thread;
91 ocsd_ex_level prev_packet_el;
93 struct branch_stack *last_branch;
94 struct branch_stack *last_branch_rb;
95 struct cs_etm_packet *prev_packet;
96 struct cs_etm_packet *packet;
97 struct cs_etm_packet_queue packet_queue;
100 struct cs_etm_queue {
101 struct cs_etm_auxtrace *etm;
102 struct cs_etm_decoder *decoder;
103 struct auxtrace_buffer *buffer;
104 unsigned int queue_nr;
105 u8 pending_timestamp_chan_id;
107 const unsigned char *buf;
108 size_t buf_len, buf_used;
109 /* Conversion between traceID and index in traceid_queues array */
110 struct intlist *traceid_queues_list;
111 struct cs_etm_traceid_queue **traceid_queues;
114 /* RB tree for quick conversion between traceID and metadata pointers */
115 static struct intlist *traceid_list;
117 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm);
118 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
120 static int cs_etm__get_data_block(struct cs_etm_queue *etmq);
121 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq);
123 /* PTMs ETMIDR [11:8] set to b0011 */
124 #define ETMIDR_PTM_VERSION 0x00000300
127 * A struct auxtrace_heap_item only has a queue_nr and a timestamp to
128 * work with. One option is to modify to auxtrace_heap_XYZ() API or simply
129 * encode the etm queue number as the upper 16 bit and the channel as
132 #define TO_CS_QUEUE_NR(queue_nr, trace_chan_id) \
133 (queue_nr << 16 | trace_chan_id)
134 #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
135 #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
137 static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
139 etmidr &= ETMIDR_PTM_VERSION;
141 if (etmidr == ETMIDR_PTM_VERSION)
142 return CS_ETM_PROTO_PTM;
144 return CS_ETM_PROTO_ETMV3;
147 static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic)
149 struct int_node *inode;
152 inode = intlist__find(traceid_list, trace_chan_id);
156 metadata = inode->priv;
157 *magic = metadata[CS_ETM_MAGIC];
161 int cs_etm__get_cpu(u8 trace_chan_id, int *cpu)
163 struct int_node *inode;
166 inode = intlist__find(traceid_list, trace_chan_id);
170 metadata = inode->priv;
171 *cpu = (int)metadata[CS_ETM_CPU];
176 * The returned PID format is presented as an enum:
178 * CS_ETM_PIDFMT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced.
179 * CS_ETM_PIDFMT_CTXTID2: CONTEXTIDR_EL2 is traced.
180 * CS_ETM_PIDFMT_NONE: No context IDs
182 * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2
183 * are enabled at the same time when the session runs on an EL2 kernel.
184 * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be
185 * recorded in the trace data, the tool will selectively use
186 * CONTEXTIDR_EL2 as PID.
188 * The result is cached in etm->pid_fmt so this function only needs to be called
189 * when processing the aux info.
191 static enum cs_etm_pid_fmt cs_etm__init_pid_fmt(u64 *metadata)
195 if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) {
196 val = metadata[CS_ETM_ETMCR];
197 /* CONTEXTIDR is traced */
198 if (val & BIT(ETM_OPT_CTXTID))
199 return CS_ETM_PIDFMT_CTXTID;
201 val = metadata[CS_ETMV4_TRCCONFIGR];
202 /* CONTEXTIDR_EL2 is traced */
203 if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT)))
204 return CS_ETM_PIDFMT_CTXTID2;
205 /* CONTEXTIDR_EL1 is traced */
206 else if (val & BIT(ETM4_CFG_BIT_CTXTID))
207 return CS_ETM_PIDFMT_CTXTID;
210 return CS_ETM_PIDFMT_NONE;
213 enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq)
215 return etmq->etm->pid_fmt;
218 static int cs_etm__map_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
220 struct int_node *inode;
222 /* Get an RB node for this CPU */
223 inode = intlist__findnew(traceid_list, trace_chan_id);
225 /* Something went wrong, no need to continue */
230 * The node for that CPU should not be taken.
231 * Back out if that's the case.
236 /* All good, associate the traceID with the metadata pointer */
237 inode->priv = cpu_metadata;
242 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata)
244 u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
246 switch (cs_etm_magic) {
247 case __perf_cs_etmv3_magic:
248 *trace_chan_id = (u8)(cpu_metadata[CS_ETM_ETMTRACEIDR] &
249 CORESIGHT_TRACE_ID_VAL_MASK);
251 case __perf_cs_etmv4_magic:
252 case __perf_cs_ete_magic:
253 *trace_chan_id = (u8)(cpu_metadata[CS_ETMV4_TRCTRACEIDR] &
254 CORESIGHT_TRACE_ID_VAL_MASK);
263 * update metadata trace ID from the value found in the AUX_HW_INFO packet.
264 * This will also clear the CORESIGHT_TRACE_ID_UNUSED_FLAG flag if present.
266 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
268 u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
270 switch (cs_etm_magic) {
271 case __perf_cs_etmv3_magic:
272 cpu_metadata[CS_ETM_ETMTRACEIDR] = trace_chan_id;
274 case __perf_cs_etmv4_magic:
275 case __perf_cs_ete_magic:
276 cpu_metadata[CS_ETMV4_TRCTRACEIDR] = trace_chan_id;
286 * Get a metadata index for a specific cpu from an array.
289 static int get_cpu_data_idx(struct cs_etm_auxtrace *etm, int cpu)
293 for (i = 0; i < etm->num_cpu; i++) {
294 if (etm->metadata[i][CS_ETM_CPU] == (u64)cpu) {
303 * Get a metadata for a specific cpu from an array.
306 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu)
308 int idx = get_cpu_data_idx(etm, cpu);
310 return (idx != -1) ? etm->metadata[idx] : NULL;
314 * Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event.
316 * The payload associates the Trace ID and the CPU.
317 * The routine is tolerant of seeing multiple packets with the same association,
318 * but a CPU / Trace ID association changing during a session is an error.
320 static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
321 union perf_event *event)
323 struct cs_etm_auxtrace *etm;
324 struct perf_sample sample;
325 struct int_node *inode;
329 int cpu, version, err;
330 u8 trace_chan_id, curr_chan_id;
332 /* extract and parse the HW ID */
333 hw_id = event->aux_output_hw_id.hw_id;
334 version = FIELD_GET(CS_AUX_HW_ID_VERSION_MASK, hw_id);
335 trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
337 /* check that we can handle this version */
338 if (version > CS_AUX_HW_ID_CURR_VERSION)
341 /* get access to the etm metadata */
342 etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace);
343 if (!etm || !etm->metadata)
346 /* parse the sample to get the CPU */
347 evsel = evlist__event2evsel(session->evlist, event);
350 err = evsel__parse_sample(evsel, event, &sample);
355 /* no CPU in the sample - possibly recorded with an old version of perf */
356 pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record.");
360 /* See if the ID is mapped to a CPU, and it matches the current CPU */
361 inode = intlist__find(traceid_list, trace_chan_id);
363 cpu_data = inode->priv;
364 if ((int)cpu_data[CS_ETM_CPU] != cpu) {
365 pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
369 /* check that the mapped ID matches */
370 err = cs_etm__metadata_get_trace_id(&curr_chan_id, cpu_data);
373 if (curr_chan_id != trace_chan_id) {
374 pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n");
378 /* mapped and matched - return OK */
382 cpu_data = get_cpu_data(etm, cpu);
383 if (cpu_data == NULL)
386 /* not one we've seen before - lets map it */
387 err = cs_etm__map_trace_id(trace_chan_id, cpu_data);
392 * if we are picking up the association from the packet, need to plug
393 * the correct trace ID into the metadata for setting up decoders later.
395 err = cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
399 void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
403 * When a timestamp packet is encountered the backend code
404 * is stopped so that the front end has time to process packets
405 * that were accumulated in the traceID queue. Since there can
406 * be more than one channel per cs_etm_queue, we need to specify
407 * what traceID queue needs servicing.
409 etmq->pending_timestamp_chan_id = trace_chan_id;
412 static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
415 struct cs_etm_packet_queue *packet_queue;
417 if (!etmq->pending_timestamp_chan_id)
421 *trace_chan_id = etmq->pending_timestamp_chan_id;
423 packet_queue = cs_etm__etmq_get_packet_queue(etmq,
424 etmq->pending_timestamp_chan_id);
428 /* Acknowledge pending status */
429 etmq->pending_timestamp_chan_id = 0;
431 /* See function cs_etm_decoder__do_{hard|soft}_timestamp() */
432 return packet_queue->cs_timestamp;
435 static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue)
441 queue->packet_count = 0;
442 for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) {
443 queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
444 queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
445 queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
446 queue->packet_buffer[i].instr_count = 0;
447 queue->packet_buffer[i].last_instr_taken_branch = false;
448 queue->packet_buffer[i].last_instr_size = 0;
449 queue->packet_buffer[i].last_instr_type = 0;
450 queue->packet_buffer[i].last_instr_subtype = 0;
451 queue->packet_buffer[i].last_instr_cond = 0;
452 queue->packet_buffer[i].flags = 0;
453 queue->packet_buffer[i].exception_number = UINT32_MAX;
454 queue->packet_buffer[i].trace_chan_id = UINT8_MAX;
455 queue->packet_buffer[i].cpu = INT_MIN;
459 static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq)
462 struct int_node *inode;
463 struct cs_etm_traceid_queue *tidq;
464 struct intlist *traceid_queues_list = etmq->traceid_queues_list;
466 intlist__for_each_entry(inode, traceid_queues_list) {
467 idx = (int)(intptr_t)inode->priv;
468 tidq = etmq->traceid_queues[idx];
469 cs_etm__clear_packet_queue(&tidq->packet_queue);
473 static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
474 struct cs_etm_traceid_queue *tidq,
478 struct auxtrace_queue *queue;
479 struct cs_etm_auxtrace *etm = etmq->etm;
481 cs_etm__clear_packet_queue(&tidq->packet_queue);
483 queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
484 tidq->trace_chan_id = trace_chan_id;
485 tidq->el = tidq->prev_packet_el = ocsd_EL_unknown;
486 tidq->thread = machine__findnew_thread(&etm->session->machines.host, -1,
488 tidq->prev_packet_thread = machine__idle_thread(&etm->session->machines.host);
490 tidq->packet = zalloc(sizeof(struct cs_etm_packet));
494 tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet));
495 if (!tidq->prev_packet)
498 if (etm->synth_opts.last_branch) {
499 size_t sz = sizeof(struct branch_stack);
501 sz += etm->synth_opts.last_branch_sz *
502 sizeof(struct branch_entry);
503 tidq->last_branch = zalloc(sz);
504 if (!tidq->last_branch)
506 tidq->last_branch_rb = zalloc(sz);
507 if (!tidq->last_branch_rb)
511 tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
512 if (!tidq->event_buf)
518 zfree(&tidq->last_branch_rb);
519 zfree(&tidq->last_branch);
520 zfree(&tidq->prev_packet);
521 zfree(&tidq->packet);
526 static struct cs_etm_traceid_queue
527 *cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
530 struct int_node *inode;
531 struct intlist *traceid_queues_list;
532 struct cs_etm_traceid_queue *tidq, **traceid_queues;
533 struct cs_etm_auxtrace *etm = etmq->etm;
535 if (etm->per_thread_decoding)
536 trace_chan_id = CS_ETM_PER_THREAD_TRACEID;
538 traceid_queues_list = etmq->traceid_queues_list;
541 * Check if the traceid_queue exist for this traceID by looking
544 inode = intlist__find(traceid_queues_list, trace_chan_id);
546 idx = (int)(intptr_t)inode->priv;
547 return etmq->traceid_queues[idx];
550 /* We couldn't find a traceid_queue for this traceID, allocate one */
551 tidq = malloc(sizeof(*tidq));
555 memset(tidq, 0, sizeof(*tidq));
557 /* Get a valid index for the new traceid_queue */
558 idx = intlist__nr_entries(traceid_queues_list);
559 /* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */
560 inode = intlist__findnew(traceid_queues_list, trace_chan_id);
564 /* Associate this traceID with this index */
565 inode->priv = (void *)(intptr_t)idx;
567 if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id))
570 /* Grow the traceid_queues array by one unit */
571 traceid_queues = etmq->traceid_queues;
572 traceid_queues = reallocarray(traceid_queues,
574 sizeof(*traceid_queues));
577 * On failure reallocarray() returns NULL and the original block of
578 * memory is left untouched.
583 traceid_queues[idx] = tidq;
584 etmq->traceid_queues = traceid_queues;
586 return etmq->traceid_queues[idx];
590 * Function intlist__remove() removes the inode from the list
591 * and delete the memory associated to it.
593 intlist__remove(traceid_queues_list, inode);
599 struct cs_etm_packet_queue
600 *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
602 struct cs_etm_traceid_queue *tidq;
604 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
606 return &tidq->packet_queue;
611 static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
612 struct cs_etm_traceid_queue *tidq)
614 struct cs_etm_packet *tmp;
616 if (etm->synth_opts.branches || etm->synth_opts.last_branch ||
617 etm->synth_opts.instructions) {
619 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
620 * the next incoming packet.
622 * Threads and exception levels are also tracked for both the
623 * previous and current packets. This is because the previous
624 * packet is used for the 'from' IP for branch samples, so the
625 * thread at that time must also be assigned to that sample.
626 * Across discontinuity packets the thread can change, so by
627 * tracking the thread for the previous packet the branch sample
628 * will have the correct info.
631 tidq->packet = tidq->prev_packet;
632 tidq->prev_packet = tmp;
633 tidq->prev_packet_el = tidq->el;
634 thread__put(tidq->prev_packet_thread);
635 tidq->prev_packet_thread = thread__get(tidq->thread);
639 static void cs_etm__packet_dump(const char *pkt_string)
641 const char *color = PERF_COLOR_BLUE;
642 int len = strlen(pkt_string);
644 if (len && (pkt_string[len-1] == '\n'))
645 color_fprintf(stdout, color, " %s", pkt_string);
647 color_fprintf(stdout, color, " %s\n", pkt_string);
652 static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params,
653 struct cs_etm_auxtrace *etm, int t_idx,
654 int m_idx, u32 etmidr)
656 u64 **metadata = etm->metadata;
658 t_params[t_idx].protocol = cs_etm__get_v7_protocol_version(etmidr);
659 t_params[t_idx].etmv3.reg_ctrl = metadata[m_idx][CS_ETM_ETMCR];
660 t_params[t_idx].etmv3.reg_trc_id = metadata[m_idx][CS_ETM_ETMTRACEIDR];
663 static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
664 struct cs_etm_auxtrace *etm, int t_idx,
667 u64 **metadata = etm->metadata;
669 t_params[t_idx].protocol = CS_ETM_PROTO_ETMV4i;
670 t_params[t_idx].etmv4.reg_idr0 = metadata[m_idx][CS_ETMV4_TRCIDR0];
671 t_params[t_idx].etmv4.reg_idr1 = metadata[m_idx][CS_ETMV4_TRCIDR1];
672 t_params[t_idx].etmv4.reg_idr2 = metadata[m_idx][CS_ETMV4_TRCIDR2];
673 t_params[t_idx].etmv4.reg_idr8 = metadata[m_idx][CS_ETMV4_TRCIDR8];
674 t_params[t_idx].etmv4.reg_configr = metadata[m_idx][CS_ETMV4_TRCCONFIGR];
675 t_params[t_idx].etmv4.reg_traceidr = metadata[m_idx][CS_ETMV4_TRCTRACEIDR];
678 static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
679 struct cs_etm_auxtrace *etm, int t_idx,
682 u64 **metadata = etm->metadata;
684 t_params[t_idx].protocol = CS_ETM_PROTO_ETE;
685 t_params[t_idx].ete.reg_idr0 = metadata[m_idx][CS_ETE_TRCIDR0];
686 t_params[t_idx].ete.reg_idr1 = metadata[m_idx][CS_ETE_TRCIDR1];
687 t_params[t_idx].ete.reg_idr2 = metadata[m_idx][CS_ETE_TRCIDR2];
688 t_params[t_idx].ete.reg_idr8 = metadata[m_idx][CS_ETE_TRCIDR8];
689 t_params[t_idx].ete.reg_configr = metadata[m_idx][CS_ETE_TRCCONFIGR];
690 t_params[t_idx].ete.reg_traceidr = metadata[m_idx][CS_ETE_TRCTRACEIDR];
691 t_params[t_idx].ete.reg_devarch = metadata[m_idx][CS_ETE_TRCDEVARCH];
694 static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
695 struct cs_etm_auxtrace *etm,
704 for (t_idx = 0; t_idx < decoders; t_idx++) {
708 m_idx = get_cpu_data_idx(etm, sample_cpu);
710 pr_warning("CS_ETM: unknown CPU, falling back to first metadata\n");
715 architecture = etm->metadata[m_idx][CS_ETM_MAGIC];
717 switch (architecture) {
718 case __perf_cs_etmv3_magic:
719 etmidr = etm->metadata[m_idx][CS_ETM_ETMIDR];
720 cs_etm__set_trace_param_etmv3(t_params, etm, t_idx, m_idx, etmidr);
722 case __perf_cs_etmv4_magic:
723 cs_etm__set_trace_param_etmv4(t_params, etm, t_idx, m_idx);
725 case __perf_cs_ete_magic:
726 cs_etm__set_trace_param_ete(t_params, etm, t_idx, m_idx);
736 static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
737 struct cs_etm_queue *etmq,
738 enum cs_etm_decoder_operation mode,
743 if (!(mode < CS_ETM_OPERATION_MAX))
746 d_params->packet_printer = cs_etm__packet_dump;
747 d_params->operation = mode;
748 d_params->data = etmq;
749 d_params->formatted = formatted;
750 d_params->fsyncs = false;
751 d_params->hsyncs = false;
752 d_params->frame_aligned = true;
759 static void cs_etm__dump_event(struct cs_etm_queue *etmq,
760 struct auxtrace_buffer *buffer)
763 const char *color = PERF_COLOR_BLUE;
764 size_t buffer_used = 0;
766 fprintf(stdout, "\n");
767 color_fprintf(stdout, color,
768 ". ... CoreSight %s Trace data: size %#zx bytes\n",
769 cs_etm_decoder__get_name(etmq->decoder), buffer->size);
774 ret = cs_etm_decoder__process_data_block(
775 etmq->decoder, buffer->offset,
776 &((u8 *)buffer->data)[buffer_used],
777 buffer->size - buffer_used, &consumed);
781 buffer_used += consumed;
782 } while (buffer_used < buffer->size);
784 cs_etm_decoder__reset(etmq->decoder);
787 static int cs_etm__flush_events(struct perf_session *session,
788 struct perf_tool *tool)
790 struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
791 struct cs_etm_auxtrace,
796 if (!tool->ordered_events)
799 if (etm->timeless_decoding) {
801 * Pass tid = -1 to process all queues. But likely they will have
802 * already been processed on PERF_RECORD_EXIT anyway.
804 return cs_etm__process_timeless_queues(etm, -1);
807 return cs_etm__process_timestamped_queues(etm);
810 static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
814 struct int_node *inode, *tmp;
815 struct cs_etm_traceid_queue *tidq;
816 struct intlist *traceid_queues_list = etmq->traceid_queues_list;
818 intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) {
819 priv = (uintptr_t)inode->priv;
822 /* Free this traceid_queue from the array */
823 tidq = etmq->traceid_queues[idx];
824 thread__zput(tidq->thread);
825 thread__zput(tidq->prev_packet_thread);
826 zfree(&tidq->event_buf);
827 zfree(&tidq->last_branch);
828 zfree(&tidq->last_branch_rb);
829 zfree(&tidq->prev_packet);
830 zfree(&tidq->packet);
834 * Function intlist__remove() removes the inode from the list
835 * and delete the memory associated to it.
837 intlist__remove(traceid_queues_list, inode);
840 /* Then the RB tree itself */
841 intlist__delete(traceid_queues_list);
842 etmq->traceid_queues_list = NULL;
844 /* finally free the traceid_queues array */
845 zfree(&etmq->traceid_queues);
848 static void cs_etm__free_queue(void *priv)
850 struct cs_etm_queue *etmq = priv;
855 cs_etm_decoder__free(etmq->decoder);
856 cs_etm__free_traceid_queues(etmq);
860 static void cs_etm__free_events(struct perf_session *session)
863 struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
864 struct cs_etm_auxtrace,
866 struct auxtrace_queues *queues = &aux->queues;
868 for (i = 0; i < queues->nr_queues; i++) {
869 cs_etm__free_queue(queues->queue_array[i].priv);
870 queues->queue_array[i].priv = NULL;
873 auxtrace_queues__free(queues);
876 static void cs_etm__free(struct perf_session *session)
879 struct int_node *inode, *tmp;
880 struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
881 struct cs_etm_auxtrace,
883 cs_etm__free_events(session);
884 session->auxtrace = NULL;
886 /* First remove all traceID/metadata nodes for the RB tree */
887 intlist__for_each_entry_safe(inode, tmp, traceid_list)
888 intlist__remove(traceid_list, inode);
889 /* Then the RB tree itself */
890 intlist__delete(traceid_list);
892 for (i = 0; i < aux->num_cpu; i++)
893 zfree(&aux->metadata[i]);
895 zfree(&aux->metadata);
899 static bool cs_etm__evsel_is_auxtrace(struct perf_session *session,
902 struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
903 struct cs_etm_auxtrace,
906 return evsel->core.attr.type == aux->pmu_type;
909 static struct machine *cs_etm__get_machine(struct cs_etm_queue *etmq,
912 enum cs_etm_pid_fmt pid_fmt = cs_etm__get_pid_fmt(etmq);
915 * For any virtualisation based on nVHE (e.g. pKVM), or host kernels
916 * running at EL1 assume everything is the host.
918 if (pid_fmt == CS_ETM_PIDFMT_CTXTID)
919 return &etmq->etm->session->machines.host;
922 * Not perfect, but otherwise assume anything in EL1 is the default
923 * guest, and everything else is the host. Distinguishing between guest
924 * and host userspaces isn't currently supported either. Neither is
925 * multiple guest support. All this does is reduce the likeliness of
926 * decode errors where we look into the host kernel maps when it should
927 * have been the guest maps.
931 return machines__find_guest(&etmq->etm->session->machines,
932 DEFAULT_GUEST_KERNEL_ID);
936 case ocsd_EL_unknown:
938 return &etmq->etm->session->machines.host;
942 static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address,
945 struct machine *machine = cs_etm__get_machine(etmq, el);
947 if (address >= machine__kernel_start(machine)) {
948 if (machine__is_host(machine))
949 return PERF_RECORD_MISC_KERNEL;
951 return PERF_RECORD_MISC_GUEST_KERNEL;
953 if (machine__is_host(machine))
954 return PERF_RECORD_MISC_USER;
957 * Can't really happen at the moment because
958 * cs_etm__get_machine() will always return
959 * machines.host for any non EL1 trace.
961 return PERF_RECORD_MISC_GUEST_USER;
966 static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
967 u64 address, size_t size, u8 *buffer,
968 const ocsd_mem_space_acc_t mem_space)
973 struct addr_location al;
975 struct cs_etm_traceid_queue *tidq;
981 addr_location__init(&al);
982 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
987 * We've already tracked EL along side the PID in cs_etm__set_thread()
988 * so double check that it matches what OpenCSD thinks as well. It
989 * doesn't distinguish between EL0 and EL1 for this mem access callback
990 * so we had to do the extra tracking. Skip validation if it's any of
993 if (!(mem_space == OCSD_MEM_SPACE_ANY ||
994 mem_space == OCSD_MEM_SPACE_N || mem_space == OCSD_MEM_SPACE_S)) {
995 if (mem_space & OCSD_MEM_SPACE_EL1N) {
996 /* Includes both non secure EL1 and EL0 */
997 assert(tidq->el == ocsd_EL1 || tidq->el == ocsd_EL0);
998 } else if (mem_space & OCSD_MEM_SPACE_EL2)
999 assert(tidq->el == ocsd_EL2);
1000 else if (mem_space & OCSD_MEM_SPACE_EL3)
1001 assert(tidq->el == ocsd_EL3);
1004 cpumode = cs_etm__cpu_mode(etmq, address, tidq->el);
1006 if (!thread__find_map(tidq->thread, cpumode, address, &al))
1009 dso = map__dso(al.map);
1013 if (dso->data.status == DSO_DATA_STATUS_ERROR &&
1014 dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE))
1017 offset = map__map_ip(al.map, address);
1021 len = dso__data_read_offset(dso, maps__machine(thread__maps(tidq->thread)),
1022 offset, buffer, size);
1025 ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n"
1026 " Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n");
1027 if (!dso->auxtrace_warned) {
1028 pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n",
1030 dso->long_name ? dso->long_name : "Unknown");
1031 dso->auxtrace_warned = true;
1037 addr_location__exit(&al);
1041 static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
1042 bool formatted, int sample_cpu)
1044 struct cs_etm_decoder_params d_params;
1045 struct cs_etm_trace_params *t_params = NULL;
1046 struct cs_etm_queue *etmq;
1048 * Each queue can only contain data from one CPU when unformatted, so only one decoder is
1051 int decoders = formatted ? etm->num_cpu : 1;
1053 etmq = zalloc(sizeof(*etmq));
1057 etmq->traceid_queues_list = intlist__new(NULL);
1058 if (!etmq->traceid_queues_list)
1061 /* Use metadata to fill in trace parameters for trace decoder */
1062 t_params = zalloc(sizeof(*t_params) * decoders);
1067 if (cs_etm__init_trace_params(t_params, etm, formatted, sample_cpu, decoders))
1070 /* Set decoder parameters to decode trace packets */
1071 if (cs_etm__init_decoder_params(&d_params, etmq,
1072 dump_trace ? CS_ETM_OPERATION_PRINT :
1073 CS_ETM_OPERATION_DECODE,
1077 etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
1084 * Register a function to handle all memory accesses required by
1085 * the trace decoder library.
1087 if (cs_etm_decoder__add_mem_access_cb(etmq->decoder,
1089 cs_etm__mem_access))
1090 goto out_free_decoder;
1096 cs_etm_decoder__free(etmq->decoder);
1098 intlist__delete(etmq->traceid_queues_list);
1104 static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
1105 struct auxtrace_queue *queue,
1106 unsigned int queue_nr,
1110 struct cs_etm_queue *etmq = queue->priv;
1112 if (list_empty(&queue->head) || etmq)
1115 etmq = cs_etm__alloc_queue(etm, formatted, sample_cpu);
1122 etmq->queue_nr = queue_nr;
1128 static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm,
1129 struct cs_etm_queue *etmq,
1130 unsigned int queue_nr)
1133 unsigned int cs_queue_nr;
1138 * We are under a CPU-wide trace scenario. As such we need to know
1139 * when the code that generated the traces started to execute so that
1140 * it can be correlated with execution on other CPUs. So we get a
1141 * handle on the beginning of traces and decode until we find a
1142 * timestamp. The timestamp is then added to the auxtrace min heap
1143 * in order to know what nibble (of all the etmqs) to decode first.
1147 * Fetch an aux_buffer from this etmq. Bail if no more
1148 * blocks or an error has been encountered.
1150 ret = cs_etm__get_data_block(etmq);
1155 * Run decoder on the trace block. The decoder will stop when
1156 * encountering a CS timestamp, a full packet queue or the end of
1157 * trace for that block.
1159 ret = cs_etm__decode_data_block(etmq);
1164 * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all
1165 * the timestamp calculation for us.
1167 cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
1169 /* We found a timestamp, no need to continue. */
1174 * We didn't find a timestamp so empty all the traceid packet
1175 * queues before looking for another timestamp packet, either
1176 * in the current data block or a new one. Packets that were
1177 * just decoded are useless since no timestamp has been
1178 * associated with them. As such simply discard them.
1180 cs_etm__clear_all_packet_queues(etmq);
1184 * We have a timestamp. Add it to the min heap to reflect when
1185 * instructions conveyed by the range packets of this traceID queue
1186 * started to execute. Once the same has been done for all the traceID
1187 * queues of each etmq, redenring and decoding can start in
1188 * chronological order.
1190 * Note that packets decoded above are still in the traceID's packet
1191 * queue and will be processed in cs_etm__process_timestamped_queues().
1193 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
1194 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
1200 void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq,
1201 struct cs_etm_traceid_queue *tidq)
1203 struct branch_stack *bs_src = tidq->last_branch_rb;
1204 struct branch_stack *bs_dst = tidq->last_branch;
1208 * Set the number of records before early exit: ->nr is used to
1209 * determine how many branches to copy from ->entries.
1211 bs_dst->nr = bs_src->nr;
1214 * Early exit when there is nothing to copy.
1220 * As bs_src->entries is a circular buffer, we need to copy from it in
1221 * two steps. First, copy the branches from the most recently inserted
1222 * branch ->last_branch_pos until the end of bs_src->entries buffer.
1224 nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos;
1225 memcpy(&bs_dst->entries[0],
1226 &bs_src->entries[tidq->last_branch_pos],
1227 sizeof(struct branch_entry) * nr);
1230 * If we wrapped around at least once, the branches from the beginning
1231 * of the bs_src->entries buffer and until the ->last_branch_pos element
1232 * are older valid branches: copy them over. The total number of
1233 * branches copied over will be equal to the number of branches asked by
1234 * the user in last_branch_sz.
1236 if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) {
1237 memcpy(&bs_dst->entries[nr],
1238 &bs_src->entries[0],
1239 sizeof(struct branch_entry) * tidq->last_branch_pos);
1244 void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq)
1246 tidq->last_branch_pos = 0;
1247 tidq->last_branch_rb->nr = 0;
1250 static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
1251 u8 trace_chan_id, u64 addr)
1255 cs_etm__mem_access(etmq, trace_chan_id, addr, ARRAY_SIZE(instrBytes),
1258 * T32 instruction size is indicated by bits[15:11] of the first
1259 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
1260 * denote a 32-bit instruction.
1262 return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
1265 static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
1267 /* Returns 0 for the CS_ETM_DISCONTINUITY packet */
1268 if (packet->sample_type == CS_ETM_DISCONTINUITY)
1271 return packet->start_addr;
1275 u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
1277 /* Returns 0 for the CS_ETM_DISCONTINUITY packet */
1278 if (packet->sample_type == CS_ETM_DISCONTINUITY)
1281 return packet->end_addr - packet->last_instr_size;
1284 static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
1286 const struct cs_etm_packet *packet,
1289 if (packet->isa == CS_ETM_ISA_T32) {
1290 u64 addr = packet->start_addr;
1293 addr += cs_etm__t32_instr_size(etmq,
1294 trace_chan_id, addr);
1300 /* Assume a 4 byte instruction size (A32/A64) */
1301 return packet->start_addr + offset * 4;
1304 static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq,
1305 struct cs_etm_traceid_queue *tidq)
1307 struct branch_stack *bs = tidq->last_branch_rb;
1308 struct branch_entry *be;
1311 * The branches are recorded in a circular buffer in reverse
1312 * chronological order: we start recording from the last element of the
1313 * buffer down. After writing the first element of the stack, move the
1314 * insert position back to the end of the buffer.
1316 if (!tidq->last_branch_pos)
1317 tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
1319 tidq->last_branch_pos -= 1;
1321 be = &bs->entries[tidq->last_branch_pos];
1322 be->from = cs_etm__last_executed_instr(tidq->prev_packet);
1323 be->to = cs_etm__first_executed_instr(tidq->packet);
1324 /* No support for mispredict */
1325 be->flags.mispred = 0;
1326 be->flags.predicted = 1;
1329 * Increment bs->nr until reaching the number of last branches asked by
1330 * the user on the command line.
1332 if (bs->nr < etmq->etm->synth_opts.last_branch_sz)
1336 static int cs_etm__inject_event(union perf_event *event,
1337 struct perf_sample *sample, u64 type)
1339 event->header.size = perf_event__sample_event_size(sample, type, 0);
1340 return perf_event__synthesize_sample(event, type, 0, sample);
1345 cs_etm__get_trace(struct cs_etm_queue *etmq)
1347 struct auxtrace_buffer *aux_buffer = etmq->buffer;
1348 struct auxtrace_buffer *old_buffer = aux_buffer;
1349 struct auxtrace_queue *queue;
1351 queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
1353 aux_buffer = auxtrace_buffer__next(queue, aux_buffer);
1355 /* If no more data, drop the previous auxtrace_buffer and return */
1358 auxtrace_buffer__drop_data(old_buffer);
1363 etmq->buffer = aux_buffer;
1365 /* If the aux_buffer doesn't have data associated, try to load it */
1366 if (!aux_buffer->data) {
1367 /* get the file desc associated with the perf data file */
1368 int fd = perf_data__fd(etmq->etm->session->data);
1370 aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd);
1371 if (!aux_buffer->data)
1375 /* If valid, drop the previous buffer */
1377 auxtrace_buffer__drop_data(old_buffer);
1380 etmq->buf_len = aux_buffer->size;
1381 etmq->buf = aux_buffer->data;
1383 return etmq->buf_len;
1386 static void cs_etm__set_thread(struct cs_etm_queue *etmq,
1387 struct cs_etm_traceid_queue *tidq, pid_t tid,
1390 struct machine *machine = cs_etm__get_machine(etmq, el);
1393 thread__zput(tidq->thread);
1394 tidq->thread = machine__find_thread(machine, -1, tid);
1397 /* Couldn't find a known thread */
1399 tidq->thread = machine__idle_thread(machine);
1404 int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid,
1405 u8 trace_chan_id, ocsd_ex_level el)
1407 struct cs_etm_traceid_queue *tidq;
1409 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
1413 cs_etm__set_thread(etmq, tidq, tid, el);
1417 bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq)
1419 return !!etmq->etm->timeless_decoding;
1422 static void cs_etm__copy_insn(struct cs_etm_queue *etmq,
1424 const struct cs_etm_packet *packet,
1425 struct perf_sample *sample)
1428 * It's pointless to read instructions for the CS_ETM_DISCONTINUITY
1429 * packet, so directly bail out with 'insn_len' = 0.
1431 if (packet->sample_type == CS_ETM_DISCONTINUITY) {
1432 sample->insn_len = 0;
1437 * T32 instruction size might be 32-bit or 16-bit, decide by calling
1438 * cs_etm__t32_instr_size().
1440 if (packet->isa == CS_ETM_ISA_T32)
1441 sample->insn_len = cs_etm__t32_instr_size(etmq, trace_chan_id,
1443 /* Otherwise, A64 and A32 instruction size are always 32-bit. */
1445 sample->insn_len = 4;
1447 cs_etm__mem_access(etmq, trace_chan_id, sample->ip, sample->insn_len,
1448 (void *)sample->insn, 0);
1451 u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp)
1453 struct cs_etm_auxtrace *etm = etmq->etm;
1455 if (etm->has_virtual_ts)
1456 return tsc_to_perf_time(cs_timestamp, &etm->tc);
1458 return cs_timestamp;
1461 static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq,
1462 struct cs_etm_traceid_queue *tidq)
1464 struct cs_etm_auxtrace *etm = etmq->etm;
1465 struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue;
1467 if (!etm->timeless_decoding && etm->has_virtual_ts)
1468 return packet_queue->cs_timestamp;
1470 return etm->latest_kernel_timestamp;
1473 static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
1474 struct cs_etm_traceid_queue *tidq,
1475 u64 addr, u64 period)
1478 struct cs_etm_auxtrace *etm = etmq->etm;
1479 union perf_event *event = tidq->event_buf;
1480 struct perf_sample sample = {.ip = 0,};
1482 event->sample.header.type = PERF_RECORD_SAMPLE;
1483 event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el);
1484 event->sample.header.size = sizeof(struct perf_event_header);
1486 /* Set time field based on etm auxtrace config. */
1487 sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1490 sample.pid = thread__pid(tidq->thread);
1491 sample.tid = thread__tid(tidq->thread);
1492 sample.id = etmq->etm->instructions_id;
1493 sample.stream_id = etmq->etm->instructions_id;
1494 sample.period = period;
1495 sample.cpu = tidq->packet->cpu;
1496 sample.flags = tidq->prev_packet->flags;
1497 sample.cpumode = event->sample.header.misc;
1499 cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample);
1501 if (etm->synth_opts.last_branch)
1502 sample.branch_stack = tidq->last_branch;
1504 if (etm->synth_opts.inject) {
1505 ret = cs_etm__inject_event(event, &sample,
1506 etm->instructions_sample_type);
1511 ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1515 "CS ETM Trace: failed to deliver instruction event, error %d\n",
1522 * The cs etm packet encodes an instruction range between a branch target
1523 * and the next taken branch. Generate sample accordingly.
1525 static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
1526 struct cs_etm_traceid_queue *tidq)
1529 struct cs_etm_auxtrace *etm = etmq->etm;
1530 struct perf_sample sample = {.ip = 0,};
1531 union perf_event *event = tidq->event_buf;
1532 struct dummy_branch_stack {
1535 struct branch_entry entries;
1539 ip = cs_etm__last_executed_instr(tidq->prev_packet);
1541 event->sample.header.type = PERF_RECORD_SAMPLE;
1542 event->sample.header.misc = cs_etm__cpu_mode(etmq, ip,
1543 tidq->prev_packet_el);
1544 event->sample.header.size = sizeof(struct perf_event_header);
1546 /* Set time field based on etm auxtrace config. */
1547 sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1550 sample.pid = thread__pid(tidq->prev_packet_thread);
1551 sample.tid = thread__tid(tidq->prev_packet_thread);
1552 sample.addr = cs_etm__first_executed_instr(tidq->packet);
1553 sample.id = etmq->etm->branches_id;
1554 sample.stream_id = etmq->etm->branches_id;
1556 sample.cpu = tidq->packet->cpu;
1557 sample.flags = tidq->prev_packet->flags;
1558 sample.cpumode = event->sample.header.misc;
1560 cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->prev_packet,
1564 * perf report cannot handle events without a branch stack
1566 if (etm->synth_opts.last_branch) {
1567 dummy_bs = (struct dummy_branch_stack){
1575 sample.branch_stack = (struct branch_stack *)&dummy_bs;
1578 if (etm->synth_opts.inject) {
1579 ret = cs_etm__inject_event(event, &sample,
1580 etm->branches_sample_type);
1585 ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1589 "CS ETM Trace: failed to deliver instruction event, error %d\n",
1595 struct cs_etm_synth {
1596 struct perf_tool dummy_tool;
1597 struct perf_session *session;
1600 static int cs_etm__event_synth(struct perf_tool *tool,
1601 union perf_event *event,
1602 struct perf_sample *sample __maybe_unused,
1603 struct machine *machine __maybe_unused)
1605 struct cs_etm_synth *cs_etm_synth =
1606 container_of(tool, struct cs_etm_synth, dummy_tool);
1608 return perf_session__deliver_synth_event(cs_etm_synth->session,
1612 static int cs_etm__synth_event(struct perf_session *session,
1613 struct perf_event_attr *attr, u64 id)
1615 struct cs_etm_synth cs_etm_synth;
1617 memset(&cs_etm_synth, 0, sizeof(struct cs_etm_synth));
1618 cs_etm_synth.session = session;
1620 return perf_event__synthesize_attr(&cs_etm_synth.dummy_tool, attr, 1,
1621 &id, cs_etm__event_synth);
1624 static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
1625 struct perf_session *session)
1627 struct evlist *evlist = session->evlist;
1628 struct evsel *evsel;
1629 struct perf_event_attr attr;
1634 evlist__for_each_entry(evlist, evsel) {
1635 if (evsel->core.attr.type == etm->pmu_type) {
1642 pr_debug("No selected events with CoreSight Trace data\n");
1646 memset(&attr, 0, sizeof(struct perf_event_attr));
1647 attr.size = sizeof(struct perf_event_attr);
1648 attr.type = PERF_TYPE_HARDWARE;
1649 attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
1650 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1652 if (etm->timeless_decoding)
1653 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1655 attr.sample_type |= PERF_SAMPLE_TIME;
1657 attr.exclude_user = evsel->core.attr.exclude_user;
1658 attr.exclude_kernel = evsel->core.attr.exclude_kernel;
1659 attr.exclude_hv = evsel->core.attr.exclude_hv;
1660 attr.exclude_host = evsel->core.attr.exclude_host;
1661 attr.exclude_guest = evsel->core.attr.exclude_guest;
1662 attr.sample_id_all = evsel->core.attr.sample_id_all;
1663 attr.read_format = evsel->core.attr.read_format;
1665 /* create new id val to be a fixed offset from evsel id */
1666 id = evsel->core.id[0] + 1000000000;
1671 if (etm->synth_opts.branches) {
1672 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
1673 attr.sample_period = 1;
1674 attr.sample_type |= PERF_SAMPLE_ADDR;
1675 err = cs_etm__synth_event(session, &attr, id);
1678 etm->branches_sample_type = attr.sample_type;
1679 etm->branches_id = id;
1681 attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
1684 if (etm->synth_opts.last_branch) {
1685 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
1687 * We don't use the hardware index, but the sample generation
1688 * code uses the new format branch_stack with this field,
1689 * so the event attributes must indicate that it's present.
1691 attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
1694 if (etm->synth_opts.instructions) {
1695 attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1696 attr.sample_period = etm->synth_opts.period;
1697 etm->instructions_sample_period = attr.sample_period;
1698 err = cs_etm__synth_event(session, &attr, id);
1701 etm->instructions_sample_type = attr.sample_type;
1702 etm->instructions_id = id;
1709 static int cs_etm__sample(struct cs_etm_queue *etmq,
1710 struct cs_etm_traceid_queue *tidq)
1712 struct cs_etm_auxtrace *etm = etmq->etm;
1714 u8 trace_chan_id = tidq->trace_chan_id;
1717 /* Get instructions remainder from previous packet */
1718 instrs_prev = tidq->period_instructions;
1720 tidq->period_instructions += tidq->packet->instr_count;
1723 * Record a branch when the last instruction in
1724 * PREV_PACKET is a branch.
1726 if (etm->synth_opts.last_branch &&
1727 tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1728 tidq->prev_packet->last_instr_taken_branch)
1729 cs_etm__update_last_branch_rb(etmq, tidq);
1731 if (etm->synth_opts.instructions &&
1732 tidq->period_instructions >= etm->instructions_sample_period) {
1734 * Emit instruction sample periodically
1735 * TODO: allow period to be defined in cycles and clock time
1739 * Below diagram demonstrates the instruction samples
1742 * Instrs Instrs Instrs Instrs
1743 * Sample(n) Sample(n+1) Sample(n+2) Sample(n+3)
1746 * --------------------------------------------------
1750 * instructions(Pi) instructions(Pi')
1753 * \---------------- -----------------/
1755 * tidq->packet->instr_count
1757 * Instrs Sample(n...) are the synthesised samples occurring
1758 * every etm->instructions_sample_period instructions - as
1759 * defined on the perf command line. Sample(n) is being the
1760 * last sample before the current etm packet, n+1 to n+3
1761 * samples are generated from the current etm packet.
1763 * tidq->packet->instr_count represents the number of
1764 * instructions in the current etm packet.
1766 * Period instructions (Pi) contains the number of
1767 * instructions executed after the sample point(n) from the
1768 * previous etm packet. This will always be less than
1769 * etm->instructions_sample_period.
1771 * When generate new samples, it combines with two parts
1772 * instructions, one is the tail of the old packet and another
1773 * is the head of the new coming packet, to generate
1774 * sample(n+1); sample(n+2) and sample(n+3) consume the
1775 * instructions with sample period. After sample(n+3), the rest
1776 * instructions will be used by later packet and it is assigned
1777 * to tidq->period_instructions for next round calculation.
1781 * Get the initial offset into the current packet instructions;
1782 * entry conditions ensure that instrs_prev is less than
1783 * etm->instructions_sample_period.
1785 u64 offset = etm->instructions_sample_period - instrs_prev;
1788 /* Prepare last branches for instruction sample */
1789 if (etm->synth_opts.last_branch)
1790 cs_etm__copy_last_branch_rb(etmq, tidq);
1792 while (tidq->period_instructions >=
1793 etm->instructions_sample_period) {
1795 * Calculate the address of the sampled instruction (-1
1796 * as sample is reported as though instruction has just
1797 * been executed, but PC has not advanced to next
1800 addr = cs_etm__instr_addr(etmq, trace_chan_id,
1801 tidq->packet, offset - 1);
1802 ret = cs_etm__synth_instruction_sample(
1804 etm->instructions_sample_period);
1808 offset += etm->instructions_sample_period;
1809 tidq->period_instructions -=
1810 etm->instructions_sample_period;
1814 if (etm->synth_opts.branches) {
1815 bool generate_sample = false;
1817 /* Generate sample for tracing on packet */
1818 if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY)
1819 generate_sample = true;
1821 /* Generate sample for branch taken packet */
1822 if (tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1823 tidq->prev_packet->last_instr_taken_branch)
1824 generate_sample = true;
1826 if (generate_sample) {
1827 ret = cs_etm__synth_branch_sample(etmq, tidq);
1833 cs_etm__packet_swap(etm, tidq);
1838 static int cs_etm__exception(struct cs_etm_traceid_queue *tidq)
1841 * When the exception packet is inserted, whether the last instruction
1842 * in previous range packet is taken branch or not, we need to force
1843 * to set 'prev_packet->last_instr_taken_branch' to true. This ensures
1844 * to generate branch sample for the instruction range before the
1845 * exception is trapped to kernel or before the exception returning.
1847 * The exception packet includes the dummy address values, so don't
1848 * swap PACKET with PREV_PACKET. This keeps PREV_PACKET to be useful
1849 * for generating instruction and branch samples.
1851 if (tidq->prev_packet->sample_type == CS_ETM_RANGE)
1852 tidq->prev_packet->last_instr_taken_branch = true;
1857 static int cs_etm__flush(struct cs_etm_queue *etmq,
1858 struct cs_etm_traceid_queue *tidq)
1861 struct cs_etm_auxtrace *etm = etmq->etm;
1863 /* Handle start tracing packet */
1864 if (tidq->prev_packet->sample_type == CS_ETM_EMPTY)
1867 if (etmq->etm->synth_opts.last_branch &&
1868 etmq->etm->synth_opts.instructions &&
1869 tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1872 /* Prepare last branches for instruction sample */
1873 cs_etm__copy_last_branch_rb(etmq, tidq);
1876 * Generate a last branch event for the branches left in the
1877 * circular buffer at the end of the trace.
1879 * Use the address of the end of the last reported execution
1882 addr = cs_etm__last_executed_instr(tidq->prev_packet);
1884 err = cs_etm__synth_instruction_sample(
1886 tidq->period_instructions);
1890 tidq->period_instructions = 0;
1894 if (etm->synth_opts.branches &&
1895 tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1896 err = cs_etm__synth_branch_sample(etmq, tidq);
1902 cs_etm__packet_swap(etm, tidq);
1904 /* Reset last branches after flush the trace */
1905 if (etm->synth_opts.last_branch)
1906 cs_etm__reset_last_branch_rb(tidq);
1911 static int cs_etm__end_block(struct cs_etm_queue *etmq,
1912 struct cs_etm_traceid_queue *tidq)
1917 * It has no new packet coming and 'etmq->packet' contains the stale
1918 * packet which was set at the previous time with packets swapping;
1919 * so skip to generate branch sample to avoid stale packet.
1921 * For this case only flush branch stack and generate a last branch
1922 * event for the branches left in the circular buffer at the end of
1925 if (etmq->etm->synth_opts.last_branch &&
1926 etmq->etm->synth_opts.instructions &&
1927 tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1930 /* Prepare last branches for instruction sample */
1931 cs_etm__copy_last_branch_rb(etmq, tidq);
1934 * Use the address of the end of the last reported execution
1937 addr = cs_etm__last_executed_instr(tidq->prev_packet);
1939 err = cs_etm__synth_instruction_sample(
1941 tidq->period_instructions);
1945 tidq->period_instructions = 0;
1951 * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue
1953 * Returns: < 0 if error
1954 * = 0 if no more auxtrace_buffer to read
1955 * > 0 if the current buffer isn't empty yet
1957 static int cs_etm__get_data_block(struct cs_etm_queue *etmq)
1961 if (!etmq->buf_len) {
1962 ret = cs_etm__get_trace(etmq);
1966 * We cannot assume consecutive blocks in the data file
1967 * are contiguous, reset the decoder to force re-sync.
1969 ret = cs_etm_decoder__reset(etmq->decoder);
1974 return etmq->buf_len;
1977 static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id,
1978 struct cs_etm_packet *packet,
1981 /* Initialise to keep compiler happy */
1986 switch (packet->isa) {
1987 case CS_ETM_ISA_T32:
1989 * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247:
1992 * +-----------------+--------+
1993 * | 1 1 0 1 1 1 1 1 | imm8 |
1994 * +-----------------+--------+
1996 * According to the specification, it only defines SVC for T32
1997 * with 16 bits instruction and has no definition for 32bits;
1998 * so below only read 2 bytes as instruction size for T32.
2000 addr = end_addr - 2;
2001 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr16),
2003 if ((instr16 & 0xFF00) == 0xDF00)
2007 case CS_ETM_ISA_A32:
2009 * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247:
2011 * b'31 b'28 b'27 b'24
2012 * +---------+---------+-------------------------+
2013 * | !1111 | 1 1 1 1 | imm24 |
2014 * +---------+---------+-------------------------+
2016 addr = end_addr - 4;
2017 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2019 if ((instr32 & 0x0F000000) == 0x0F000000 &&
2020 (instr32 & 0xF0000000) != 0xF0000000)
2024 case CS_ETM_ISA_A64:
2026 * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294:
2029 * +-----------------------+---------+-----------+
2030 * | 1 1 0 1 0 1 0 0 0 0 0 | imm16 | 0 0 0 0 1 |
2031 * +-----------------------+---------+-----------+
2033 addr = end_addr - 4;
2034 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2036 if ((instr32 & 0xFFE0001F) == 0xd4000001)
2040 case CS_ETM_ISA_UNKNOWN:
2048 static bool cs_etm__is_syscall(struct cs_etm_queue *etmq,
2049 struct cs_etm_traceid_queue *tidq, u64 magic)
2051 u8 trace_chan_id = tidq->trace_chan_id;
2052 struct cs_etm_packet *packet = tidq->packet;
2053 struct cs_etm_packet *prev_packet = tidq->prev_packet;
2055 if (magic == __perf_cs_etmv3_magic)
2056 if (packet->exception_number == CS_ETMV3_EXC_SVC)
2060 * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and
2061 * HVC cases; need to check if it's SVC instruction based on
2064 if (magic == __perf_cs_etmv4_magic) {
2065 if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2066 cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2067 prev_packet->end_addr))
2074 static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq,
2077 struct cs_etm_packet *packet = tidq->packet;
2079 if (magic == __perf_cs_etmv3_magic)
2080 if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT ||
2081 packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT ||
2082 packet->exception_number == CS_ETMV3_EXC_PE_RESET ||
2083 packet->exception_number == CS_ETMV3_EXC_IRQ ||
2084 packet->exception_number == CS_ETMV3_EXC_FIQ)
2087 if (magic == __perf_cs_etmv4_magic)
2088 if (packet->exception_number == CS_ETMV4_EXC_RESET ||
2089 packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT ||
2090 packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR ||
2091 packet->exception_number == CS_ETMV4_EXC_INST_DEBUG ||
2092 packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG ||
2093 packet->exception_number == CS_ETMV4_EXC_IRQ ||
2094 packet->exception_number == CS_ETMV4_EXC_FIQ)
2100 static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq,
2101 struct cs_etm_traceid_queue *tidq,
2104 u8 trace_chan_id = tidq->trace_chan_id;
2105 struct cs_etm_packet *packet = tidq->packet;
2106 struct cs_etm_packet *prev_packet = tidq->prev_packet;
2108 if (magic == __perf_cs_etmv3_magic)
2109 if (packet->exception_number == CS_ETMV3_EXC_SMC ||
2110 packet->exception_number == CS_ETMV3_EXC_HYP ||
2111 packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE ||
2112 packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR ||
2113 packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT ||
2114 packet->exception_number == CS_ETMV3_EXC_DATA_FAULT ||
2115 packet->exception_number == CS_ETMV3_EXC_GENERIC)
2118 if (magic == __perf_cs_etmv4_magic) {
2119 if (packet->exception_number == CS_ETMV4_EXC_TRAP ||
2120 packet->exception_number == CS_ETMV4_EXC_ALIGNMENT ||
2121 packet->exception_number == CS_ETMV4_EXC_INST_FAULT ||
2122 packet->exception_number == CS_ETMV4_EXC_DATA_FAULT)
2126 * For CS_ETMV4_EXC_CALL, except SVC other instructions
2127 * (SMC, HVC) are taken as sync exceptions.
2129 if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2130 !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2131 prev_packet->end_addr))
2135 * ETMv4 has 5 bits for exception number; if the numbers
2136 * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ]
2137 * they are implementation defined exceptions.
2139 * For this case, simply take it as sync exception.
2141 if (packet->exception_number > CS_ETMV4_EXC_FIQ &&
2142 packet->exception_number <= CS_ETMV4_EXC_END)
2149 static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq,
2150 struct cs_etm_traceid_queue *tidq)
2152 struct cs_etm_packet *packet = tidq->packet;
2153 struct cs_etm_packet *prev_packet = tidq->prev_packet;
2154 u8 trace_chan_id = tidq->trace_chan_id;
2158 switch (packet->sample_type) {
2161 * Immediate branch instruction without neither link nor
2162 * return flag, it's normal branch instruction within
2165 if (packet->last_instr_type == OCSD_INSTR_BR &&
2166 packet->last_instr_subtype == OCSD_S_INSTR_NONE) {
2167 packet->flags = PERF_IP_FLAG_BRANCH;
2169 if (packet->last_instr_cond)
2170 packet->flags |= PERF_IP_FLAG_CONDITIONAL;
2174 * Immediate branch instruction with link (e.g. BL), this is
2175 * branch instruction for function call.
2177 if (packet->last_instr_type == OCSD_INSTR_BR &&
2178 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2179 packet->flags = PERF_IP_FLAG_BRANCH |
2183 * Indirect branch instruction with link (e.g. BLR), this is
2184 * branch instruction for function call.
2186 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2187 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2188 packet->flags = PERF_IP_FLAG_BRANCH |
2192 * Indirect branch instruction with subtype of
2193 * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for
2194 * function return for A32/T32.
2196 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2197 packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET)
2198 packet->flags = PERF_IP_FLAG_BRANCH |
2199 PERF_IP_FLAG_RETURN;
2202 * Indirect branch instruction without link (e.g. BR), usually
2203 * this is used for function return, especially for functions
2204 * within dynamic link lib.
2206 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2207 packet->last_instr_subtype == OCSD_S_INSTR_NONE)
2208 packet->flags = PERF_IP_FLAG_BRANCH |
2209 PERF_IP_FLAG_RETURN;
2211 /* Return instruction for function return. */
2212 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2213 packet->last_instr_subtype == OCSD_S_INSTR_V8_RET)
2214 packet->flags = PERF_IP_FLAG_BRANCH |
2215 PERF_IP_FLAG_RETURN;
2218 * Decoder might insert a discontinuity in the middle of
2219 * instruction packets, fixup prev_packet with flag
2220 * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace.
2222 if (prev_packet->sample_type == CS_ETM_DISCONTINUITY)
2223 prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2224 PERF_IP_FLAG_TRACE_BEGIN;
2227 * If the previous packet is an exception return packet
2228 * and the return address just follows SVC instruction,
2229 * it needs to calibrate the previous packet sample flags
2230 * as PERF_IP_FLAG_SYSCALLRET.
2232 if (prev_packet->flags == (PERF_IP_FLAG_BRANCH |
2233 PERF_IP_FLAG_RETURN |
2234 PERF_IP_FLAG_INTERRUPT) &&
2235 cs_etm__is_svc_instr(etmq, trace_chan_id,
2236 packet, packet->start_addr))
2237 prev_packet->flags = PERF_IP_FLAG_BRANCH |
2238 PERF_IP_FLAG_RETURN |
2239 PERF_IP_FLAG_SYSCALLRET;
2241 case CS_ETM_DISCONTINUITY:
2243 * The trace is discontinuous, if the previous packet is
2244 * instruction packet, set flag PERF_IP_FLAG_TRACE_END
2245 * for previous packet.
2247 if (prev_packet->sample_type == CS_ETM_RANGE)
2248 prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2249 PERF_IP_FLAG_TRACE_END;
2251 case CS_ETM_EXCEPTION:
2252 ret = cs_etm__get_magic(packet->trace_chan_id, &magic);
2256 /* The exception is for system call. */
2257 if (cs_etm__is_syscall(etmq, tidq, magic))
2258 packet->flags = PERF_IP_FLAG_BRANCH |
2260 PERF_IP_FLAG_SYSCALLRET;
2262 * The exceptions are triggered by external signals from bus,
2263 * interrupt controller, debug module, PE reset or halt.
2265 else if (cs_etm__is_async_exception(tidq, magic))
2266 packet->flags = PERF_IP_FLAG_BRANCH |
2268 PERF_IP_FLAG_ASYNC |
2269 PERF_IP_FLAG_INTERRUPT;
2271 * Otherwise, exception is caused by trap, instruction &
2272 * data fault, or alignment errors.
2274 else if (cs_etm__is_sync_exception(etmq, tidq, magic))
2275 packet->flags = PERF_IP_FLAG_BRANCH |
2277 PERF_IP_FLAG_INTERRUPT;
2280 * When the exception packet is inserted, since exception
2281 * packet is not used standalone for generating samples
2282 * and it's affiliation to the previous instruction range
2283 * packet; so set previous range packet flags to tell perf
2284 * it is an exception taken branch.
2286 if (prev_packet->sample_type == CS_ETM_RANGE)
2287 prev_packet->flags = packet->flags;
2289 case CS_ETM_EXCEPTION_RET:
2291 * When the exception return packet is inserted, since
2292 * exception return packet is not used standalone for
2293 * generating samples and it's affiliation to the previous
2294 * instruction range packet; so set previous range packet
2295 * flags to tell perf it is an exception return branch.
2297 * The exception return can be for either system call or
2298 * other exception types; unfortunately the packet doesn't
2299 * contain exception type related info so we cannot decide
2300 * the exception type purely based on exception return packet.
2301 * If we record the exception number from exception packet and
2302 * reuse it for exception return packet, this is not reliable
2303 * due the trace can be discontinuity or the interrupt can
2304 * be nested, thus the recorded exception number cannot be
2305 * used for exception return packet for these two cases.
2307 * For exception return packet, we only need to distinguish the
2308 * packet is for system call or for other types. Thus the
2309 * decision can be deferred when receive the next packet which
2310 * contains the return address, based on the return address we
2311 * can read out the previous instruction and check if it's a
2312 * system call instruction and then calibrate the sample flag
2315 if (prev_packet->sample_type == CS_ETM_RANGE)
2316 prev_packet->flags = PERF_IP_FLAG_BRANCH |
2317 PERF_IP_FLAG_RETURN |
2318 PERF_IP_FLAG_INTERRUPT;
2328 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq)
2331 size_t processed = 0;
2334 * Packets are decoded and added to the decoder's packet queue
2335 * until the decoder packet processing callback has requested that
2336 * processing stops or there is nothing left in the buffer. Normal
2337 * operations that stop processing are a timestamp packet or a full
2338 * decoder buffer queue.
2340 ret = cs_etm_decoder__process_data_block(etmq->decoder,
2342 &etmq->buf[etmq->buf_used],
2348 etmq->offset += processed;
2349 etmq->buf_used += processed;
2350 etmq->buf_len -= processed;
2356 static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq,
2357 struct cs_etm_traceid_queue *tidq)
2360 struct cs_etm_packet_queue *packet_queue;
2362 packet_queue = &tidq->packet_queue;
2364 /* Process each packet in this chunk */
2366 ret = cs_etm_decoder__get_packet(packet_queue,
2370 * Stop processing this chunk on
2371 * end of data or error
2376 * Since packet addresses are swapped in packet
2377 * handling within below switch() statements,
2378 * thus setting sample flags must be called
2379 * prior to switch() statement to use address
2380 * information before packets swapping.
2382 ret = cs_etm__set_sample_flags(etmq, tidq);
2386 switch (tidq->packet->sample_type) {
2389 * If the packet contains an instruction
2390 * range, generate instruction sequence
2393 cs_etm__sample(etmq, tidq);
2395 case CS_ETM_EXCEPTION:
2396 case CS_ETM_EXCEPTION_RET:
2398 * If the exception packet is coming,
2399 * make sure the previous instruction
2400 * range packet to be handled properly.
2402 cs_etm__exception(tidq);
2404 case CS_ETM_DISCONTINUITY:
2406 * Discontinuity in trace, flush
2407 * previous branch stack
2409 cs_etm__flush(etmq, tidq);
2413 * Should not receive empty packet,
2416 pr_err("CS ETM Trace: empty packet\n");
2426 static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq)
2429 struct int_node *inode;
2430 struct cs_etm_traceid_queue *tidq;
2431 struct intlist *traceid_queues_list = etmq->traceid_queues_list;
2433 intlist__for_each_entry(inode, traceid_queues_list) {
2434 idx = (int)(intptr_t)inode->priv;
2435 tidq = etmq->traceid_queues[idx];
2437 /* Ignore return value */
2438 cs_etm__process_traceid_queue(etmq, tidq);
2441 * Generate an instruction sample with the remaining
2442 * branchstack entries.
2444 cs_etm__flush(etmq, tidq);
2448 static int cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue *etmq)
2451 struct cs_etm_traceid_queue *tidq;
2453 tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID);
2457 /* Go through each buffer in the queue and decode them one by one */
2459 err = cs_etm__get_data_block(etmq);
2463 /* Run trace decoder until buffer consumed or end of trace */
2465 err = cs_etm__decode_data_block(etmq);
2470 * Process each packet in this chunk, nothing to do if
2471 * an error occurs other than hoping the next one will
2474 err = cs_etm__process_traceid_queue(etmq, tidq);
2476 } while (etmq->buf_len);
2479 /* Flush any remaining branch stack entries */
2480 err = cs_etm__end_block(etmq, tidq);
2486 static int cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue *etmq)
2489 struct cs_etm_traceid_queue *tidq;
2490 struct int_node *inode;
2492 /* Go through each buffer in the queue and decode them one by one */
2494 err = cs_etm__get_data_block(etmq);
2498 /* Run trace decoder until buffer consumed or end of trace */
2500 err = cs_etm__decode_data_block(etmq);
2505 * cs_etm__run_per_thread_timeless_decoder() runs on a
2506 * single traceID queue because each TID has a separate
2507 * buffer. But here in per-cpu mode we need to iterate
2508 * over each channel instead.
2510 intlist__for_each_entry(inode,
2511 etmq->traceid_queues_list) {
2512 idx = (int)(intptr_t)inode->priv;
2513 tidq = etmq->traceid_queues[idx];
2514 cs_etm__process_traceid_queue(etmq, tidq);
2516 } while (etmq->buf_len);
2518 intlist__for_each_entry(inode, etmq->traceid_queues_list) {
2519 idx = (int)(intptr_t)inode->priv;
2520 tidq = etmq->traceid_queues[idx];
2521 /* Flush any remaining branch stack entries */
2522 err = cs_etm__end_block(etmq, tidq);
2531 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
2535 struct auxtrace_queues *queues = &etm->queues;
2537 for (i = 0; i < queues->nr_queues; i++) {
2538 struct auxtrace_queue *queue = &etm->queues.queue_array[i];
2539 struct cs_etm_queue *etmq = queue->priv;
2540 struct cs_etm_traceid_queue *tidq;
2545 if (etm->per_thread_decoding) {
2546 tidq = cs_etm__etmq_get_traceid_queue(
2547 etmq, CS_ETM_PER_THREAD_TRACEID);
2552 if (tid == -1 || thread__tid(tidq->thread) == tid)
2553 cs_etm__run_per_thread_timeless_decoder(etmq);
2555 cs_etm__run_per_cpu_timeless_decoder(etmq);
2561 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm)
2564 unsigned int cs_queue_nr, queue_nr, i;
2567 struct auxtrace_queue *queue;
2568 struct cs_etm_queue *etmq;
2569 struct cs_etm_traceid_queue *tidq;
2572 * Pre-populate the heap with one entry from each queue so that we can
2573 * start processing in time order across all queues.
2575 for (i = 0; i < etm->queues.nr_queues; i++) {
2576 etmq = etm->queues.queue_array[i].priv;
2580 ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i);
2586 if (!etm->heap.heap_cnt)
2589 /* Take the entry at the top of the min heap */
2590 cs_queue_nr = etm->heap.heap_array[0].queue_nr;
2591 queue_nr = TO_QUEUE_NR(cs_queue_nr);
2592 trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr);
2593 queue = &etm->queues.queue_array[queue_nr];
2597 * Remove the top entry from the heap since we are about
2600 auxtrace_heap__pop(&etm->heap);
2602 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
2605 * No traceID queue has been allocated for this traceID,
2606 * which means something somewhere went very wrong. No
2607 * other choice than simply exit.
2614 * Packets associated with this timestamp are already in
2615 * the etmq's traceID queue, so process them.
2617 ret = cs_etm__process_traceid_queue(etmq, tidq);
2622 * Packets for this timestamp have been processed, time to
2623 * move on to the next timestamp, fetching a new auxtrace_buffer
2627 ret = cs_etm__get_data_block(etmq);
2632 * No more auxtrace_buffers to process in this etmq, simply
2633 * move on to another entry in the auxtrace_heap.
2638 ret = cs_etm__decode_data_block(etmq);
2642 cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
2644 if (!cs_timestamp) {
2646 * Function cs_etm__decode_data_block() returns when
2647 * there is no more traces to decode in the current
2648 * auxtrace_buffer OR when a timestamp has been
2649 * encountered on any of the traceID queues. Since we
2650 * did not get a timestamp, there is no more traces to
2651 * process in this auxtrace_buffer. As such empty and
2652 * flush all traceID queues.
2654 cs_etm__clear_all_traceid_queues(etmq);
2656 /* Fetch another auxtrace_buffer for this etmq */
2661 * Add to the min heap the timestamp for packets that have
2662 * just been decoded. They will be processed and synthesized
2663 * during the next call to cs_etm__process_traceid_queue() for
2664 * this queue/traceID.
2666 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
2667 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
2674 static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm,
2675 union perf_event *event)
2679 if (etm->timeless_decoding)
2683 * Add the tid/pid to the log so that we can get a match when we get a
2684 * contextID from the decoder. Only track for the host: only kernel
2685 * trace is supported for guests which wouldn't need pids so this should
2688 th = machine__findnew_thread(&etm->session->machines.host,
2689 event->itrace_start.pid,
2690 event->itrace_start.tid);
2699 static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm,
2700 union perf_event *event)
2703 bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
2706 * Context switch in per-thread mode are irrelevant since perf
2707 * will start/stop tracing as the process is scheduled.
2709 if (etm->timeless_decoding)
2713 * SWITCH_IN events carry the next process to be switched out while
2714 * SWITCH_OUT events carry the process to be switched in. As such
2715 * we don't care about IN events.
2721 * Add the tid/pid to the log so that we can get a match when we get a
2722 * contextID from the decoder. Only track for the host: only kernel
2723 * trace is supported for guests which wouldn't need pids so this should
2726 th = machine__findnew_thread(&etm->session->machines.host,
2727 event->context_switch.next_prev_pid,
2728 event->context_switch.next_prev_tid);
2737 static int cs_etm__process_event(struct perf_session *session,
2738 union perf_event *event,
2739 struct perf_sample *sample,
2740 struct perf_tool *tool)
2742 struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2743 struct cs_etm_auxtrace,
2749 if (!tool->ordered_events) {
2750 pr_err("CoreSight ETM Trace requires ordered events\n");
2754 switch (event->header.type) {
2755 case PERF_RECORD_EXIT:
2757 * Don't need to wait for cs_etm__flush_events() in per-thread mode to
2758 * start the decode because we know there will be no more trace from
2759 * this thread. All this does is emit samples earlier than waiting for
2760 * the flush in other modes, but with timestamps it makes sense to wait
2761 * for flush so that events from different threads are interleaved
2764 if (etm->per_thread_decoding && etm->timeless_decoding)
2765 return cs_etm__process_timeless_queues(etm,
2769 case PERF_RECORD_ITRACE_START:
2770 return cs_etm__process_itrace_start(etm, event);
2772 case PERF_RECORD_SWITCH_CPU_WIDE:
2773 return cs_etm__process_switch_cpu_wide(etm, event);
2775 case PERF_RECORD_AUX:
2777 * Record the latest kernel timestamp available in the header
2778 * for samples so that synthesised samples occur from this point
2781 if (sample->time && (sample->time != (u64)-1))
2782 etm->latest_kernel_timestamp = sample->time;
2792 static void dump_queued_data(struct cs_etm_auxtrace *etm,
2793 struct perf_record_auxtrace *event)
2795 struct auxtrace_buffer *buf;
2798 * Find all buffers with same reference in the queues and dump them.
2799 * This is because the queues can contain multiple entries of the same
2800 * buffer that were split on aux records.
2802 for (i = 0; i < etm->queues.nr_queues; ++i)
2803 list_for_each_entry(buf, &etm->queues.queue_array[i].head, list)
2804 if (buf->reference == event->reference)
2805 cs_etm__dump_event(etm->queues.queue_array[i].priv, buf);
2808 static int cs_etm__process_auxtrace_event(struct perf_session *session,
2809 union perf_event *event,
2810 struct perf_tool *tool __maybe_unused)
2812 struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2813 struct cs_etm_auxtrace,
2815 if (!etm->data_queued) {
2816 struct auxtrace_buffer *buffer;
2818 int fd = perf_data__fd(session->data);
2819 bool is_pipe = perf_data__is_pipe(session->data);
2821 int idx = event->auxtrace.idx;
2826 data_offset = lseek(fd, 0, SEEK_CUR);
2827 if (data_offset == -1)
2831 err = auxtrace_queues__add_event(&etm->queues, session,
2832 event, data_offset, &buffer);
2837 * Knowing if the trace is formatted or not requires a lookup of
2838 * the aux record so only works in non-piped mode where data is
2839 * queued in cs_etm__queue_aux_records(). Always assume
2840 * formatted in piped mode (true).
2842 err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
2848 if (auxtrace_buffer__get_data(buffer, fd)) {
2849 cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer);
2850 auxtrace_buffer__put_data(buffer);
2852 } else if (dump_trace)
2853 dump_queued_data(etm, &event->auxtrace);
2858 static int cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace *etm)
2860 struct evsel *evsel;
2861 struct evlist *evlist = etm->session->evlist;
2863 /* Override timeless mode with user input from --itrace=Z */
2864 if (etm->synth_opts.timeless_decoding) {
2865 etm->timeless_decoding = true;
2870 * Find the cs_etm evsel and look at what its timestamp setting was
2872 evlist__for_each_entry(evlist, evsel)
2873 if (cs_etm__evsel_is_auxtrace(etm->session, evsel)) {
2874 etm->timeless_decoding =
2875 !(evsel->core.attr.config & BIT(ETM_OPT_TS));
2879 pr_err("CS ETM: Couldn't find ETM evsel\n");
2884 * Read a single cpu parameter block from the auxtrace_info priv block.
2886 * For version 1 there is a per cpu nr_params entry. If we are handling
2887 * version 1 file, then there may be less, the same, or more params
2888 * indicated by this value than the compile time number we understand.
2890 * For a version 0 info block, there are a fixed number, and we need to
2891 * fill out the nr_param value in the metadata we create.
2893 static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset,
2894 int out_blk_size, int nr_params_v0)
2896 u64 *metadata = NULL;
2898 int nr_in_params, nr_out_params, nr_cmn_params;
2901 metadata = zalloc(sizeof(*metadata) * out_blk_size);
2905 /* read block current index & version */
2906 i = *buff_in_offset;
2907 hdr_version = buff_in[CS_HEADER_VERSION];
2910 /* read version 0 info block into a version 1 metadata block */
2911 nr_in_params = nr_params_v0;
2912 metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC];
2913 metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU];
2914 metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params;
2915 /* remaining block params at offset +1 from source */
2916 for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++)
2917 metadata[k + 1] = buff_in[i + k];
2918 /* version 0 has 2 common params */
2921 /* read version 1 info block - input and output nr_params may differ */
2922 /* version 1 has 3 common params */
2924 nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS];
2926 /* if input has more params than output - skip excess */
2927 nr_out_params = nr_in_params + nr_cmn_params;
2928 if (nr_out_params > out_blk_size)
2929 nr_out_params = out_blk_size;
2931 for (k = CS_ETM_MAGIC; k < nr_out_params; k++)
2932 metadata[k] = buff_in[i + k];
2934 /* record the actual nr params we copied */
2935 metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params;
2938 /* adjust in offset by number of in params used */
2939 i += nr_in_params + nr_cmn_params;
2940 *buff_in_offset = i;
2945 * Puts a fragment of an auxtrace buffer into the auxtrace queues based
2946 * on the bounds of aux_event, if it matches with the buffer that's at
2949 * Normally, whole auxtrace buffers would be added to the queue. But we
2950 * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder
2951 * is reset across each buffer, so splitting the buffers up in advance has
2954 static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz,
2955 struct perf_record_aux *aux_event, struct perf_sample *sample)
2958 char buf[PERF_SAMPLE_MAX_SIZE];
2959 union perf_event *auxtrace_event_union;
2960 struct perf_record_auxtrace *auxtrace_event;
2961 union perf_event auxtrace_fragment;
2962 __u64 aux_offset, aux_size;
2966 struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2967 struct cs_etm_auxtrace,
2971 * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got
2972 * from looping through the auxtrace index.
2974 err = perf_session__peek_event(session, file_offset, buf,
2975 PERF_SAMPLE_MAX_SIZE, &auxtrace_event_union, NULL);
2978 auxtrace_event = &auxtrace_event_union->auxtrace;
2979 if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE)
2982 if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) ||
2983 auxtrace_event->header.size != sz) {
2988 * In per-thread mode, auxtrace CPU is set to -1, but TID will be set instead. See
2989 * auxtrace_mmap_params__set_idx(). However, the sample AUX event will contain a
2990 * CPU as we set this always for the AUX_OUTPUT_HW_ID event.
2991 * So now compare only TIDs if auxtrace CPU is -1, and CPUs if auxtrace CPU is not -1.
2992 * Return 'not found' if mismatch.
2994 if (auxtrace_event->cpu == (__u32) -1) {
2995 etm->per_thread_decoding = true;
2996 if (auxtrace_event->tid != sample->tid)
2998 } else if (auxtrace_event->cpu != sample->cpu) {
2999 if (etm->per_thread_decoding) {
3001 * Found a per-cpu buffer after a per-thread one was
3004 pr_err("CS ETM: Inconsistent per-thread/per-cpu mode.\n");
3010 if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) {
3012 * Clamp size in snapshot mode. The buffer size is clamped in
3013 * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect
3016 aux_size = min(aux_event->aux_size, auxtrace_event->size);
3019 * In this mode, the head also points to the end of the buffer so aux_offset
3020 * needs to have the size subtracted so it points to the beginning as in normal mode
3022 aux_offset = aux_event->aux_offset - aux_size;
3024 aux_size = aux_event->aux_size;
3025 aux_offset = aux_event->aux_offset;
3028 if (aux_offset >= auxtrace_event->offset &&
3029 aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) {
3031 * If this AUX event was inside this buffer somewhere, create a new auxtrace event
3032 * based on the sizes of the aux event, and queue that fragment.
3034 auxtrace_fragment.auxtrace = *auxtrace_event;
3035 auxtrace_fragment.auxtrace.size = aux_size;
3036 auxtrace_fragment.auxtrace.offset = aux_offset;
3037 file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size;
3039 pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64
3040 " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu);
3041 err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment,
3046 idx = auxtrace_event->idx;
3047 formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
3048 return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
3049 idx, formatted, sample->cpu);
3052 /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
3056 static int cs_etm__process_aux_hw_id_cb(struct perf_session *session, union perf_event *event,
3057 u64 offset __maybe_unused, void *data __maybe_unused)
3059 /* look to handle PERF_RECORD_AUX_OUTPUT_HW_ID early to ensure decoders can be set up */
3060 if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) {
3061 (*(int *)data)++; /* increment found count */
3062 return cs_etm__process_aux_output_hw_id(session, event);
3067 static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event,
3068 u64 offset __maybe_unused, void *data __maybe_unused)
3070 struct perf_sample sample;
3072 struct auxtrace_index_entry *ent;
3073 struct auxtrace_index *auxtrace_index;
3074 struct evsel *evsel;
3077 /* Don't care about any other events, we're only queuing buffers for AUX events */
3078 if (event->header.type != PERF_RECORD_AUX)
3081 if (event->header.size < sizeof(struct perf_record_aux))
3084 /* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */
3085 if (!event->aux.aux_size)
3089 * Parse the sample, we need the sample_id_all data that comes after the event so that the
3090 * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID.
3092 evsel = evlist__event2evsel(session->evlist, event);
3095 ret = evsel__parse_sample(evsel, event, &sample);
3100 * Loop through the auxtrace index to find the buffer that matches up with this aux event.
3102 list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
3103 for (i = 0; i < auxtrace_index->nr; i++) {
3104 ent = &auxtrace_index->entries[i];
3105 ret = cs_etm__queue_aux_fragment(session, ent->file_offset,
3106 ent->sz, &event->aux, &sample);
3108 * Stop search on error or successful values. Continue search on
3117 * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but
3118 * don't exit with an error because it will still be possible to decode other aux records.
3120 pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64
3121 " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu);
3125 static int cs_etm__queue_aux_records(struct perf_session *session)
3127 struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index,
3128 struct auxtrace_index, list);
3129 if (index && index->nr > 0)
3130 return perf_session__peek_events(session, session->header.data_offset,
3131 session->header.data_size,
3132 cs_etm__queue_aux_records_cb, NULL);
3135 * We would get here if there are no entries in the index (either no auxtrace
3136 * buffers or no index at all). Fail silently as there is the possibility of
3137 * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still
3140 * In that scenario, buffers will not be split by AUX records.
3145 #define HAS_PARAM(j, type, param) (metadata[(j)][CS_ETM_NR_TRC_PARAMS] <= \
3146 (CS_##type##_##param - CS_ETM_COMMON_BLK_MAX_V1))
3149 * Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual
3152 static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
3156 for (j = 0; j < num_cpu; j++) {
3157 switch (metadata[j][CS_ETM_MAGIC]) {
3158 case __perf_cs_etmv4_magic:
3159 if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1)
3162 case __perf_cs_ete_magic:
3163 if (HAS_PARAM(j, ETE, TS_SOURCE) || metadata[j][CS_ETE_TS_SOURCE] != 1)
3167 /* Unknown / unsupported magic number. */
3174 /* map trace ids to correct metadata block, from information in metadata */
3175 static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata)
3181 for (i = 0; i < num_cpu; i++) {
3182 cs_etm_magic = metadata[i][CS_ETM_MAGIC];
3183 switch (cs_etm_magic) {
3184 case __perf_cs_etmv3_magic:
3185 metadata[i][CS_ETM_ETMTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3186 trace_chan_id = (u8)(metadata[i][CS_ETM_ETMTRACEIDR]);
3188 case __perf_cs_etmv4_magic:
3189 case __perf_cs_ete_magic:
3190 metadata[i][CS_ETMV4_TRCTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3191 trace_chan_id = (u8)(metadata[i][CS_ETMV4_TRCTRACEIDR]);
3194 /* unknown magic number */
3197 err = cs_etm__map_trace_id(trace_chan_id, metadata[i]);
3205 * If we found AUX_HW_ID packets, then set any metadata marked as unused to the
3206 * unused value to reduce the number of unneeded decoders created.
3208 static int cs_etm__clear_unused_trace_ids_metadata(int num_cpu, u64 **metadata)
3213 for (i = 0; i < num_cpu; i++) {
3214 cs_etm_magic = metadata[i][CS_ETM_MAGIC];
3215 switch (cs_etm_magic) {
3216 case __perf_cs_etmv3_magic:
3217 if (metadata[i][CS_ETM_ETMTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG)
3218 metadata[i][CS_ETM_ETMTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL;
3220 case __perf_cs_etmv4_magic:
3221 case __perf_cs_ete_magic:
3222 if (metadata[i][CS_ETMV4_TRCTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG)
3223 metadata[i][CS_ETMV4_TRCTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL;
3226 /* unknown magic number */
3233 int cs_etm__process_auxtrace_info_full(union perf_event *event,
3234 struct perf_session *session)
3236 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
3237 struct cs_etm_auxtrace *etm = NULL;
3238 struct perf_record_time_conv *tc = &session->time_conv;
3239 int event_header_size = sizeof(struct perf_event_header);
3240 int total_size = auxtrace_info->header.size;
3244 int aux_hw_id_found;
3247 u64 **metadata = NULL;
3250 * Create an RB tree for traceID-metadata tuple. Since the conversion
3251 * has to be made for each packet that gets decoded, optimizing access
3252 * in anything other than a sequential array is worth doing.
3254 traceid_list = intlist__new(NULL);
3258 /* First the global part */
3259 ptr = (u64 *) auxtrace_info->priv;
3260 num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
3261 metadata = zalloc(sizeof(*metadata) * num_cpu);
3264 goto err_free_traceid_list;
3267 /* Start parsing after the common part of the header */
3268 i = CS_HEADER_VERSION_MAX;
3271 * The metadata is stored in the auxtrace_info section and encodes
3272 * the configuration of the ARM embedded trace macrocell which is
3273 * required by the trace decoder to properly decode the trace due
3274 * to its highly compressed nature.
3276 for (j = 0; j < num_cpu; j++) {
3277 if (ptr[i] == __perf_cs_etmv3_magic) {
3279 cs_etm__create_meta_blk(ptr, &i,
3281 CS_ETM_NR_TRC_PARAMS_V0);
3282 } else if (ptr[i] == __perf_cs_etmv4_magic) {
3284 cs_etm__create_meta_blk(ptr, &i,
3286 CS_ETMV4_NR_TRC_PARAMS_V0);
3287 } else if (ptr[i] == __perf_cs_ete_magic) {
3288 metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1);
3290 ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n",
3293 goto err_free_metadata;
3298 goto err_free_metadata;
3303 * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and
3304 * CS_ETMV4_PRIV_MAX mark how many double words are in the
3305 * global metadata, and each cpu's metadata respectively.
3306 * The following tests if the correct number of double words was
3307 * present in the auxtrace info section.
3309 priv_size = total_size - event_header_size - INFO_HEADER_SIZE;
3310 if (i * 8 != priv_size) {
3312 goto err_free_metadata;
3315 etm = zalloc(sizeof(*etm));
3319 goto err_free_metadata;
3323 * As all the ETMs run at the same exception level, the system should
3324 * have the same PID format crossing CPUs. So cache the PID format
3325 * and reuse it for sequential decoding.
3327 etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]);
3329 err = auxtrace_queues__init(&etm->queues);
3333 if (session->itrace_synth_opts->set) {
3334 etm->synth_opts = *session->itrace_synth_opts;
3336 itrace_synth_opts__set_default(&etm->synth_opts,
3337 session->itrace_synth_opts->default_no_sample);
3338 etm->synth_opts.callchain = false;
3341 etm->session = session;
3343 etm->num_cpu = num_cpu;
3344 etm->pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) & 0xffffffff);
3345 etm->snapshot_mode = (ptr[CS_ETM_SNAPSHOT] != 0);
3346 etm->metadata = metadata;
3347 etm->auxtrace_type = auxtrace_info->type;
3349 if (etm->synth_opts.use_timestamp)
3351 * Prior to Armv8.4, Arm CPUs don't support FEAT_TRF feature,
3352 * therefore the decoder cannot know if the timestamp trace is
3353 * same with the kernel time.
3355 * If a user has knowledge for the working platform and can
3356 * specify itrace option 'T' to tell decoder to forcely use the
3357 * traced timestamp as the kernel time.
3359 etm->has_virtual_ts = true;
3361 /* Use virtual timestamps if all ETMs report ts_source = 1 */
3362 etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu);
3364 if (!etm->has_virtual_ts)
3365 ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n"
3366 "The time field of the samples will not be set accurately.\n"
3367 "For Arm CPUs prior to Armv8.4 or without support FEAT_TRF,\n"
3368 "you can specify the itrace option 'T' for timestamp decoding\n"
3369 "if the Coresight timestamp on the platform is same with the kernel time.\n\n");
3371 etm->auxtrace.process_event = cs_etm__process_event;
3372 etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event;
3373 etm->auxtrace.flush_events = cs_etm__flush_events;
3374 etm->auxtrace.free_events = cs_etm__free_events;
3375 etm->auxtrace.free = cs_etm__free;
3376 etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace;
3377 session->auxtrace = &etm->auxtrace;
3379 err = cs_etm__setup_timeless_decoding(etm);
3383 etm->tc.time_shift = tc->time_shift;
3384 etm->tc.time_mult = tc->time_mult;
3385 etm->tc.time_zero = tc->time_zero;
3386 if (event_contains(*tc, time_cycles)) {
3387 etm->tc.time_cycles = tc->time_cycles;
3388 etm->tc.time_mask = tc->time_mask;
3389 etm->tc.cap_user_time_zero = tc->cap_user_time_zero;
3390 etm->tc.cap_user_time_short = tc->cap_user_time_short;
3392 err = cs_etm__synth_events(etm, session);
3394 goto err_free_queues;
3397 * Map Trace ID values to CPU metadata.
3399 * Trace metadata will always contain Trace ID values from the legacy algorithm. If the
3400 * files has been recorded by a "new" perf updated to handle AUX_HW_ID then the metadata
3401 * ID value will also have the CORESIGHT_TRACE_ID_UNUSED_FLAG set.
3403 * The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use
3404 * the same IDs as the old algorithm as far as is possible, unless there are clashes
3405 * in which case a different value will be used. This means an older perf may still
3406 * be able to record and read files generate on a newer system.
3408 * For a perf able to interpret AUX_HW_ID packets we first check for the presence of
3409 * those packets. If they are there then the values will be mapped and plugged into
3410 * the metadata. We then set any remaining metadata values with the used flag to a
3411 * value CORESIGHT_TRACE_ID_UNUSED_VAL - which indicates no decoder is required.
3413 * If no AUX_HW_ID packets are present - which means a file recorded on an old kernel
3414 * then we map Trace ID values to CPU directly from the metadata - clearing any unused
3418 /* first scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */
3419 aux_hw_id_found = 0;
3420 err = perf_session__peek_events(session, session->header.data_offset,
3421 session->header.data_size,
3422 cs_etm__process_aux_hw_id_cb, &aux_hw_id_found);
3424 goto err_free_queues;
3426 /* if HW ID found then clear any unused metadata ID values */
3427 if (aux_hw_id_found)
3428 err = cs_etm__clear_unused_trace_ids_metadata(num_cpu, metadata);
3429 /* otherwise, this is a file with metadata values only, map from metadata */
3431 err = cs_etm__map_trace_ids_metadata(num_cpu, metadata);
3434 goto err_free_queues;
3436 err = cs_etm__queue_aux_records(session);
3438 goto err_free_queues;
3440 etm->data_queued = etm->queues.populated;
3444 auxtrace_queues__free(&etm->queues);
3445 session->auxtrace = NULL;
3449 /* No need to check @metadata[j], free(NULL) is supported */
3450 for (j = 0; j < num_cpu; j++)
3451 zfree(&metadata[j]);
3453 err_free_traceid_list:
3454 intlist__delete(traceid_list);