perf cs-etm: Pass unformatted flag to decoder
[linux-2.6-microblaze.git] / tools / perf / util / cs-etm.c
index 32ad92d..f4b2bff 100644 (file)
@@ -62,7 +62,6 @@ struct cs_etm_auxtrace {
        u64 instructions_sample_period;
        u64 instructions_id;
        u64 **metadata;
-       u64 kernel_start;
        unsigned int pmu_type;
 };
 
@@ -97,7 +96,6 @@ struct cs_etm_queue {
 /* RB tree for quick conversion between traceID and metadata pointers */
 static struct intlist *traceid_list;
 
-static int cs_etm__update_queues(struct cs_etm_auxtrace *etm);
 static int cs_etm__process_queues(struct cs_etm_auxtrace *etm);
 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
                                           pid_t tid);
@@ -463,13 +461,14 @@ static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
 }
 
 static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
-                                    struct cs_etm_auxtrace *etm)
+                                    struct cs_etm_auxtrace *etm,
+                                    int decoders)
 {
        int i;
        u32 etmidr;
        u64 architecture;
 
-       for (i = 0; i < etm->num_cpu; i++) {
+       for (i = 0; i < decoders; i++) {
                architecture = etm->metadata[i][CS_ETM_MAGIC];
 
                switch (architecture) {
@@ -490,7 +489,8 @@ static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
 
 static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
                                       struct cs_etm_queue *etmq,
-                                      enum cs_etm_decoder_operation mode)
+                                      enum cs_etm_decoder_operation mode,
+                                      bool formatted)
 {
        int ret = -EINVAL;
 
@@ -500,7 +500,7 @@ static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
        d_params->packet_printer = cs_etm__packet_dump;
        d_params->operation = mode;
        d_params->data = etmq;
-       d_params->formatted = true;
+       d_params->formatted = formatted;
        d_params->fsyncs = false;
        d_params->hsyncs = false;
        d_params->frame_aligned = true;
@@ -510,14 +510,11 @@ out:
        return ret;
 }
 
-static void cs_etm__dump_event(struct cs_etm_auxtrace *etm,
+static void cs_etm__dump_event(struct cs_etm_queue *etmq,
                               struct auxtrace_buffer *buffer)
 {
        int ret;
        const char *color = PERF_COLOR_BLUE;
-       struct cs_etm_decoder_params d_params;
-       struct cs_etm_trace_params *t_params;
-       struct cs_etm_decoder *decoder;
        size_t buffer_used = 0;
 
        fprintf(stdout, "\n");
@@ -525,29 +522,11 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm,
                     ". ... CoreSight ETM Trace data: size %zu bytes\n",
                     buffer->size);
 
-       /* Use metadata to fill in trace parameters for trace decoder */
-       t_params = zalloc(sizeof(*t_params) * etm->num_cpu);
-
-       if (!t_params)
-               return;
-
-       if (cs_etm__init_trace_params(t_params, etm))
-               goto out_free;
-
-       /* Set decoder parameters to simply print the trace packets */
-       if (cs_etm__init_decoder_params(&d_params, NULL,
-                                       CS_ETM_OPERATION_PRINT))
-               goto out_free;
-
-       decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params);
-
-       if (!decoder)
-               goto out_free;
        do {
                size_t consumed;
 
                ret = cs_etm_decoder__process_data_block(
-                               decoder, buffer->offset,
+                               etmq->decoder, buffer->offset,
                                &((u8 *)buffer->data)[buffer_used],
                                buffer->size - buffer_used, &consumed);
                if (ret)
@@ -556,16 +535,12 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm,
                buffer_used += consumed;
        } while (buffer_used < buffer->size);
 
-       cs_etm_decoder__free(decoder);
-
-out_free:
-       zfree(&t_params);
+       cs_etm_decoder__reset(etmq->decoder);
 }
 
 static int cs_etm__flush_events(struct perf_session *session,
                                struct perf_tool *tool)
 {
-       int ret;
        struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
                                                   struct cs_etm_auxtrace,
                                                   auxtrace);
@@ -575,11 +550,6 @@ static int cs_etm__flush_events(struct perf_session *session,
        if (!tool->ordered_events)
                return -EINVAL;
 
-       ret = cs_etm__update_queues(etm);
-
-       if (ret < 0)
-               return ret;
-
        if (etm->timeless_decoding)
                return cs_etm__process_timeless_queues(etm, -1);
 
@@ -691,7 +661,7 @@ static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address)
 
        machine = etmq->etm->machine;
 
-       if (address >= etmq->etm->kernel_start) {
+       if (address >= machine__kernel_start(machine)) {
                if (machine__is_host(machine))
                        return PERF_RECORD_MISC_KERNEL;
                else
@@ -752,11 +722,17 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
        return len;
 }
 
-static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm)
+static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
+                                               bool formatted)
 {
        struct cs_etm_decoder_params d_params;
        struct cs_etm_trace_params  *t_params = NULL;
        struct cs_etm_queue *etmq;
+       /*
+        * Each queue can only contain data from one CPU when unformatted, so only one decoder is
+        * needed.
+        */
+       int decoders = formatted ? etm->num_cpu : 1;
 
        etmq = zalloc(sizeof(*etmq));
        if (!etmq)
@@ -767,20 +743,23 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm)
                goto out_free;
 
        /* Use metadata to fill in trace parameters for trace decoder */
-       t_params = zalloc(sizeof(*t_params) * etm->num_cpu);
+       t_params = zalloc(sizeof(*t_params) * decoders);
 
        if (!t_params)
                goto out_free;
 
-       if (cs_etm__init_trace_params(t_params, etm))
+       if (cs_etm__init_trace_params(t_params, etm, decoders))
                goto out_free;
 
        /* Set decoder parameters to decode trace packets */
        if (cs_etm__init_decoder_params(&d_params, etmq,
-                                       CS_ETM_OPERATION_DECODE))
+                                       dump_trace ? CS_ETM_OPERATION_PRINT :
+                                                    CS_ETM_OPERATION_DECODE,
+                                       formatted))
                goto out_free;
 
-       etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params);
+       etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
+                                           t_params);
 
        if (!etmq->decoder)
                goto out_free;
@@ -808,31 +787,35 @@ out_free:
 
 static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
                               struct auxtrace_queue *queue,
-                              unsigned int queue_nr)
+                              unsigned int queue_nr,
+                              bool formatted)
 {
-       int ret = 0;
-       unsigned int cs_queue_nr;
-       u8 trace_chan_id;
-       u64 cs_timestamp;
        struct cs_etm_queue *etmq = queue->priv;
 
        if (list_empty(&queue->head) || etmq)
-               goto out;
+               return 0;
 
-       etmq = cs_etm__alloc_queue(etm);
+       etmq = cs_etm__alloc_queue(etm, formatted);
 
-       if (!etmq) {
-               ret = -ENOMEM;
-               goto out;
-       }
+       if (!etmq)
+               return -ENOMEM;
 
        queue->priv = etmq;
        etmq->etm = etm;
        etmq->queue_nr = queue_nr;
        etmq->offset = 0;
 
-       if (etm->timeless_decoding)
-               goto out;
+       return 0;
+}
+
+static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm,
+                                           struct cs_etm_queue *etmq,
+                                           unsigned int queue_nr)
+{
+       int ret = 0;
+       unsigned int cs_queue_nr;
+       u8 trace_chan_id;
+       u64 cs_timestamp;
 
        /*
         * We are under a CPU-wide trace scenario.  As such we need to know
@@ -896,33 +879,6 @@ out:
        return ret;
 }
 
-static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm)
-{
-       unsigned int i;
-       int ret;
-
-       if (!etm->kernel_start)
-               etm->kernel_start = machine__kernel_start(etm->machine);
-
-       for (i = 0; i < etm->queues.nr_queues; i++) {
-               ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i);
-               if (ret)
-                       return ret;
-       }
-
-       return 0;
-}
-
-static int cs_etm__update_queues(struct cs_etm_auxtrace *etm)
-{
-       if (etm->queues.new_data) {
-               etm->queues.new_data = false;
-               return cs_etm__setup_queues(etm);
-       }
-
-       return 0;
-}
-
 static inline
 void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq,
                                 struct cs_etm_traceid_queue *tidq)
@@ -2222,13 +2178,27 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
 static int cs_etm__process_queues(struct cs_etm_auxtrace *etm)
 {
        int ret = 0;
-       unsigned int cs_queue_nr, queue_nr;
+       unsigned int cs_queue_nr, queue_nr, i;
        u8 trace_chan_id;
        u64 cs_timestamp;
        struct auxtrace_queue *queue;
        struct cs_etm_queue *etmq;
        struct cs_etm_traceid_queue *tidq;
 
+       /*
+        * Pre-populate the heap with one entry from each queue so that we can
+        * start processing in time order across all queues.
+        */
+       for (i = 0; i < etm->queues.nr_queues; i++) {
+               etmq = etm->queues.queue_array[i].priv;
+               if (!etmq)
+                       continue;
+
+               ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i);
+               if (ret)
+                       return ret;
+       }
+
        while (1) {
                if (!etm->heap.heap_cnt)
                        goto out;
@@ -2382,7 +2352,6 @@ static int cs_etm__process_event(struct perf_session *session,
                                 struct perf_sample *sample,
                                 struct perf_tool *tool)
 {
-       int err = 0;
        u64 sample_kernel_timestamp;
        struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
                                                   struct cs_etm_auxtrace,
@@ -2401,12 +2370,6 @@ static int cs_etm__process_event(struct perf_session *session,
        else
                sample_kernel_timestamp = 0;
 
-       if (sample_kernel_timestamp || etm->timeless_decoding) {
-               err = cs_etm__update_queues(etm);
-               if (err)
-                       return err;
-       }
-
        /*
         * Don't wait for cs_etm__flush_events() in per-thread/timeless mode to start the decode. We
         * need the tid of the PERF_RECORD_EXIT event to assign to the synthesised samples because
@@ -2434,6 +2397,22 @@ static int cs_etm__process_event(struct perf_session *session,
        return 0;
 }
 
+static void dump_queued_data(struct cs_etm_auxtrace *etm,
+                            struct perf_record_auxtrace *event)
+{
+       struct auxtrace_buffer *buf;
+       unsigned int i;
+       /*
+        * Find all buffers with same reference in the queues and dump them.
+        * This is because the queues can contain multiple entries of the same
+        * buffer that were split on aux records.
+        */
+       for (i = 0; i < etm->queues.nr_queues; ++i)
+               list_for_each_entry(buf, &etm->queues.queue_array[i].head, list)
+                       if (buf->reference == event->reference)
+                               cs_etm__dump_event(etm->queues.queue_array[i].priv, buf);
+}
+
 static int cs_etm__process_auxtrace_event(struct perf_session *session,
                                          union perf_event *event,
                                          struct perf_tool *tool __maybe_unused)
@@ -2447,6 +2426,7 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session,
                int fd = perf_data__fd(session->data);
                bool is_pipe = perf_data__is_pipe(session->data);
                int err;
+               int idx = event->auxtrace.idx;
 
                if (is_pipe)
                        data_offset = 0;
@@ -2461,12 +2441,24 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session,
                if (err)
                        return err;
 
+               /*
+                * Knowing if the trace is formatted or not requires a lookup of
+                * the aux record so only works in non-piped mode where data is
+                * queued in cs_etm__queue_aux_records(). Always assume
+                * formatted in piped mode (true).
+                */
+               err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
+                                         idx, true);
+               if (err)
+                       return err;
+
                if (dump_trace)
                        if (auxtrace_buffer__get_data(buffer, fd)) {
-                               cs_etm__dump_event(etm, buffer);
+                               cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer);
                                auxtrace_buffer__put_data(buffer);
                        }
-       }
+       } else if (dump_trace)
+               dump_queued_data(etm, &event->auxtrace);
 
        return 0;
 }
@@ -2683,6 +2675,181 @@ static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset,
        return metadata;
 }
 
+/**
+ * Puts a fragment of an auxtrace buffer into the auxtrace queues based
+ * on the bounds of aux_event, if it matches with the buffer that's at
+ * file_offset.
+ *
+ * Normally, whole auxtrace buffers would be added to the queue. But we
+ * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder
+ * is reset across each buffer, so splitting the buffers up in advance has
+ * the same effect.
+ */
+static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz,
+                                     struct perf_record_aux *aux_event, struct perf_sample *sample)
+{
+       int err;
+       char buf[PERF_SAMPLE_MAX_SIZE];
+       union perf_event *auxtrace_event_union;
+       struct perf_record_auxtrace *auxtrace_event;
+       union perf_event auxtrace_fragment;
+       __u64 aux_offset, aux_size;
+       __u32 idx;
+       bool formatted;
+
+       struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
+                                                  struct cs_etm_auxtrace,
+                                                  auxtrace);
+
+       /*
+        * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got
+        * from looping through the auxtrace index.
+        */
+       err = perf_session__peek_event(session, file_offset, buf,
+                                      PERF_SAMPLE_MAX_SIZE, &auxtrace_event_union, NULL);
+       if (err)
+               return err;
+       auxtrace_event = &auxtrace_event_union->auxtrace;
+       if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE)
+               return -EINVAL;
+
+       if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) ||
+               auxtrace_event->header.size != sz) {
+               return -EINVAL;
+       }
+
+       /*
+        * In per-thread mode, CPU is set to -1, but TID will be set instead. See
+        * auxtrace_mmap_params__set_idx(). Return 'not found' if neither CPU nor TID match.
+        */
+       if ((auxtrace_event->cpu == (__u32) -1 && auxtrace_event->tid != sample->tid) ||
+                       auxtrace_event->cpu != sample->cpu)
+               return 1;
+
+       if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) {
+               /*
+                * Clamp size in snapshot mode. The buffer size is clamped in
+                * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect
+                * the buffer size.
+                */
+               aux_size = min(aux_event->aux_size, auxtrace_event->size);
+
+               /*
+                * In this mode, the head also points to the end of the buffer so aux_offset
+                * needs to have the size subtracted so it points to the beginning as in normal mode
+                */
+               aux_offset = aux_event->aux_offset - aux_size;
+       } else {
+               aux_size = aux_event->aux_size;
+               aux_offset = aux_event->aux_offset;
+       }
+
+       if (aux_offset >= auxtrace_event->offset &&
+           aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) {
+               /*
+                * If this AUX event was inside this buffer somewhere, create a new auxtrace event
+                * based on the sizes of the aux event, and queue that fragment.
+                */
+               auxtrace_fragment.auxtrace = *auxtrace_event;
+               auxtrace_fragment.auxtrace.size = aux_size;
+               auxtrace_fragment.auxtrace.offset = aux_offset;
+               file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size;
+
+               pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64
+                         " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu);
+               err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment,
+                                                file_offset, NULL);
+               if (err)
+                       return err;
+
+               idx = auxtrace_event->idx;
+               formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
+               return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
+                                          idx, formatted);
+       }
+
+       /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
+       return 1;
+}
+
+static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event,
+                                       u64 offset __maybe_unused, void *data __maybe_unused)
+{
+       struct perf_sample sample;
+       int ret;
+       struct auxtrace_index_entry *ent;
+       struct auxtrace_index *auxtrace_index;
+       struct evsel *evsel;
+       size_t i;
+
+       /* Don't care about any other events, we're only queuing buffers for AUX events */
+       if (event->header.type != PERF_RECORD_AUX)
+               return 0;
+
+       if (event->header.size < sizeof(struct perf_record_aux))
+               return -EINVAL;
+
+       /* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */
+       if (!event->aux.aux_size)
+               return 0;
+
+       /*
+        * Parse the sample, we need the sample_id_all data that comes after the event so that the
+        * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID.
+        */
+       evsel = evlist__event2evsel(session->evlist, event);
+       if (!evsel)
+               return -EINVAL;
+       ret = evsel__parse_sample(evsel, event, &sample);
+       if (ret)
+               return ret;
+
+       /*
+        * Loop through the auxtrace index to find the buffer that matches up with this aux event.
+        */
+       list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
+               for (i = 0; i < auxtrace_index->nr; i++) {
+                       ent = &auxtrace_index->entries[i];
+                       ret = cs_etm__queue_aux_fragment(session, ent->file_offset,
+                                                        ent->sz, &event->aux, &sample);
+                       /*
+                        * Stop search on error or successful values. Continue search on
+                        * 1 ('not found')
+                        */
+                       if (ret != 1)
+                               return ret;
+               }
+       }
+
+       /*
+        * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but
+        * don't exit with an error because it will still be possible to decode other aux records.
+        */
+       pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64
+              " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu);
+       return 0;
+}
+
+static int cs_etm__queue_aux_records(struct perf_session *session)
+{
+       struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index,
+                                                               struct auxtrace_index, list);
+       if (index && index->nr > 0)
+               return perf_session__peek_events(session, session->header.data_offset,
+                                                session->header.data_size,
+                                                cs_etm__queue_aux_records_cb, NULL);
+
+       /*
+        * We would get here if there are no entries in the index (either no auxtrace
+        * buffers or no index at all). Fail silently as there is the possibility of
+        * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still
+        * false.
+        *
+        * In that scenario, buffers will not be split by AUX records.
+        */
+       return 0;
+}
+
 int cs_etm__process_auxtrace_info(union perf_event *event,
                                  struct perf_session *session)
 {
@@ -2876,18 +3043,24 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
 
        if (dump_trace) {
                cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu);
-               return 0;
        }
 
        err = cs_etm__synth_events(etm, session);
        if (err)
                goto err_delete_thread;
 
-       err = auxtrace_queues__process_index(&etm->queues, session);
+       err = cs_etm__queue_aux_records(session);
        if (err)
                goto err_delete_thread;
 
        etm->data_queued = etm->queues.populated;
+       /*
+        * Print warning in pipe mode, see cs_etm__process_auxtrace_event() and
+        * cs_etm__queue_aux_fragment() for details relating to limitations.
+        */
+       if (!etm->data_queued)
+               pr_warning("CS ETM warning: Coresight decode and TRBE support requires random file access.\n"
+                          "Continuing with best effort decoding in piped mode.\n\n");
 
        return 0;