perf cs-etm: Pass unformatted flag to decoder
[linux-2.6-microblaze.git] / tools / perf / util / cs-etm.c
index 22f8326..f4b2bff 100644 (file)
@@ -62,7 +62,6 @@ struct cs_etm_auxtrace {
        u64 instructions_sample_period;
        u64 instructions_id;
        u64 **metadata;
-       u64 kernel_start;
        unsigned int pmu_type;
 };
 
@@ -97,7 +96,6 @@ struct cs_etm_queue {
 /* RB tree for quick conversion between traceID and metadata pointers */
 static struct intlist *traceid_list;
 
-static int cs_etm__update_queues(struct cs_etm_auxtrace *etm);
 static int cs_etm__process_queues(struct cs_etm_auxtrace *etm);
 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
                                           pid_t tid);
@@ -463,13 +461,14 @@ static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
 }
 
 static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
-                                    struct cs_etm_auxtrace *etm)
+                                    struct cs_etm_auxtrace *etm,
+                                    int decoders)
 {
        int i;
        u32 etmidr;
        u64 architecture;
 
-       for (i = 0; i < etm->num_cpu; i++) {
+       for (i = 0; i < decoders; i++) {
                architecture = etm->metadata[i][CS_ETM_MAGIC];
 
                switch (architecture) {
@@ -490,7 +489,8 @@ static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
 
 static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
                                       struct cs_etm_queue *etmq,
-                                      enum cs_etm_decoder_operation mode)
+                                      enum cs_etm_decoder_operation mode,
+                                      bool formatted)
 {
        int ret = -EINVAL;
 
@@ -500,7 +500,7 @@ static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
        d_params->packet_printer = cs_etm__packet_dump;
        d_params->operation = mode;
        d_params->data = etmq;
-       d_params->formatted = true;
+       d_params->formatted = formatted;
        d_params->fsyncs = false;
        d_params->hsyncs = false;
        d_params->frame_aligned = true;
@@ -510,14 +510,11 @@ out:
        return ret;
 }
 
-static void cs_etm__dump_event(struct cs_etm_auxtrace *etm,
+static void cs_etm__dump_event(struct cs_etm_queue *etmq,
                               struct auxtrace_buffer *buffer)
 {
        int ret;
        const char *color = PERF_COLOR_BLUE;
-       struct cs_etm_decoder_params d_params;
-       struct cs_etm_trace_params *t_params;
-       struct cs_etm_decoder *decoder;
        size_t buffer_used = 0;
 
        fprintf(stdout, "\n");
@@ -525,29 +522,11 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm,
                     ". ... CoreSight ETM Trace data: size %zu bytes\n",
                     buffer->size);
 
-       /* Use metadata to fill in trace parameters for trace decoder */
-       t_params = zalloc(sizeof(*t_params) * etm->num_cpu);
-
-       if (!t_params)
-               return;
-
-       if (cs_etm__init_trace_params(t_params, etm))
-               goto out_free;
-
-       /* Set decoder parameters to simply print the trace packets */
-       if (cs_etm__init_decoder_params(&d_params, NULL,
-                                       CS_ETM_OPERATION_PRINT))
-               goto out_free;
-
-       decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params);
-
-       if (!decoder)
-               goto out_free;
        do {
                size_t consumed;
 
                ret = cs_etm_decoder__process_data_block(
-                               decoder, buffer->offset,
+                               etmq->decoder, buffer->offset,
                                &((u8 *)buffer->data)[buffer_used],
                                buffer->size - buffer_used, &consumed);
                if (ret)
@@ -556,16 +535,12 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm,
                buffer_used += consumed;
        } while (buffer_used < buffer->size);
 
-       cs_etm_decoder__free(decoder);
-
-out_free:
-       zfree(&t_params);
+       cs_etm_decoder__reset(etmq->decoder);
 }
 
 static int cs_etm__flush_events(struct perf_session *session,
                                struct perf_tool *tool)
 {
-       int ret;
        struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
                                                   struct cs_etm_auxtrace,
                                                   auxtrace);
@@ -575,11 +550,6 @@ static int cs_etm__flush_events(struct perf_session *session,
        if (!tool->ordered_events)
                return -EINVAL;
 
-       ret = cs_etm__update_queues(etm);
-
-       if (ret < 0)
-               return ret;
-
        if (etm->timeless_decoding)
                return cs_etm__process_timeless_queues(etm, -1);
 
@@ -691,7 +661,7 @@ static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address)
 
        machine = etmq->etm->machine;
 
-       if (address >= etmq->etm->kernel_start) {
+       if (address >= machine__kernel_start(machine)) {
                if (machine__is_host(machine))
                        return PERF_RECORD_MISC_KERNEL;
                else
@@ -752,11 +722,17 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
        return len;
 }
 
-static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm)
+static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
+                                               bool formatted)
 {
        struct cs_etm_decoder_params d_params;
        struct cs_etm_trace_params  *t_params = NULL;
        struct cs_etm_queue *etmq;
+       /*
+        * Each queue can only contain data from one CPU when unformatted, so only one decoder is
+        * needed.
+        */
+       int decoders = formatted ? etm->num_cpu : 1;
 
        etmq = zalloc(sizeof(*etmq));
        if (!etmq)
@@ -767,20 +743,23 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm)
                goto out_free;
 
        /* Use metadata to fill in trace parameters for trace decoder */
-       t_params = zalloc(sizeof(*t_params) * etm->num_cpu);
+       t_params = zalloc(sizeof(*t_params) * decoders);
 
        if (!t_params)
                goto out_free;
 
-       if (cs_etm__init_trace_params(t_params, etm))
+       if (cs_etm__init_trace_params(t_params, etm, decoders))
                goto out_free;
 
        /* Set decoder parameters to decode trace packets */
        if (cs_etm__init_decoder_params(&d_params, etmq,
-                                       CS_ETM_OPERATION_DECODE))
+                                       dump_trace ? CS_ETM_OPERATION_PRINT :
+                                                    CS_ETM_OPERATION_DECODE,
+                                       formatted))
                goto out_free;
 
-       etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params);
+       etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
+                                           t_params);
 
        if (!etmq->decoder)
                goto out_free;
@@ -808,31 +787,35 @@ out_free:
 
 static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
                               struct auxtrace_queue *queue,
-                              unsigned int queue_nr)
+                              unsigned int queue_nr,
+                              bool formatted)
 {
-       int ret = 0;
-       unsigned int cs_queue_nr;
-       u8 trace_chan_id;
-       u64 cs_timestamp;
        struct cs_etm_queue *etmq = queue->priv;
 
        if (list_empty(&queue->head) || etmq)
-               goto out;
+               return 0;
 
-       etmq = cs_etm__alloc_queue(etm);
+       etmq = cs_etm__alloc_queue(etm, formatted);
 
-       if (!etmq) {
-               ret = -ENOMEM;
-               goto out;
-       }
+       if (!etmq)
+               return -ENOMEM;
 
        queue->priv = etmq;
        etmq->etm = etm;
        etmq->queue_nr = queue_nr;
        etmq->offset = 0;
 
-       if (etm->timeless_decoding)
-               goto out;
+       return 0;
+}
+
+static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm,
+                                           struct cs_etm_queue *etmq,
+                                           unsigned int queue_nr)
+{
+       int ret = 0;
+       unsigned int cs_queue_nr;
+       u8 trace_chan_id;
+       u64 cs_timestamp;
 
        /*
         * We are under a CPU-wide trace scenario.  As such we need to know
@@ -896,33 +879,6 @@ out:
        return ret;
 }
 
-static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm)
-{
-       unsigned int i;
-       int ret;
-
-       if (!etm->kernel_start)
-               etm->kernel_start = machine__kernel_start(etm->machine);
-
-       for (i = 0; i < etm->queues.nr_queues; i++) {
-               ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i);
-               if (ret)
-                       return ret;
-       }
-
-       return 0;
-}
-
-static int cs_etm__update_queues(struct cs_etm_auxtrace *etm)
-{
-       if (etm->queues.new_data) {
-               etm->queues.new_data = false;
-               return cs_etm__setup_queues(etm);
-       }
-
-       return 0;
-}
-
 static inline
 void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq,
                                 struct cs_etm_traceid_queue *tidq)
@@ -2222,13 +2178,27 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
 static int cs_etm__process_queues(struct cs_etm_auxtrace *etm)
 {
        int ret = 0;
-       unsigned int cs_queue_nr, queue_nr;
+       unsigned int cs_queue_nr, queue_nr, i;
        u8 trace_chan_id;
        u64 cs_timestamp;
        struct auxtrace_queue *queue;
        struct cs_etm_queue *etmq;
        struct cs_etm_traceid_queue *tidq;
 
+       /*
+        * Pre-populate the heap with one entry from each queue so that we can
+        * start processing in time order across all queues.
+        */
+       for (i = 0; i < etm->queues.nr_queues; i++) {
+               etmq = etm->queues.queue_array[i].priv;
+               if (!etmq)
+                       continue;
+
+               ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i);
+               if (ret)
+                       return ret;
+       }
+
        while (1) {
                if (!etm->heap.heap_cnt)
                        goto out;
@@ -2382,7 +2352,6 @@ static int cs_etm__process_event(struct perf_session *session,
                                 struct perf_sample *sample,
                                 struct perf_tool *tool)
 {
-       int err = 0;
        u64 sample_kernel_timestamp;
        struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
                                                   struct cs_etm_auxtrace,
@@ -2401,12 +2370,6 @@ static int cs_etm__process_event(struct perf_session *session,
        else
                sample_kernel_timestamp = 0;
 
-       if (sample_kernel_timestamp || etm->timeless_decoding) {
-               err = cs_etm__update_queues(etm);
-               if (err)
-                       return err;
-       }
-
        /*
         * Don't wait for cs_etm__flush_events() in per-thread/timeless mode to start the decode. We
         * need the tid of the PERF_RECORD_EXIT event to assign to the synthesised samples because
@@ -2434,6 +2397,22 @@ static int cs_etm__process_event(struct perf_session *session,
        return 0;
 }
 
+static void dump_queued_data(struct cs_etm_auxtrace *etm,
+                            struct perf_record_auxtrace *event)
+{
+       struct auxtrace_buffer *buf;
+       unsigned int i;
+       /*
+        * Find all buffers with same reference in the queues and dump them.
+        * This is because the queues can contain multiple entries of the same
+        * buffer that were split on aux records.
+        */
+       for (i = 0; i < etm->queues.nr_queues; ++i)
+               list_for_each_entry(buf, &etm->queues.queue_array[i].head, list)
+                       if (buf->reference == event->reference)
+                               cs_etm__dump_event(etm->queues.queue_array[i].priv, buf);
+}
+
 static int cs_etm__process_auxtrace_event(struct perf_session *session,
                                          union perf_event *event,
                                          struct perf_tool *tool __maybe_unused)
@@ -2447,6 +2426,7 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session,
                int fd = perf_data__fd(session->data);
                bool is_pipe = perf_data__is_pipe(session->data);
                int err;
+               int idx = event->auxtrace.idx;
 
                if (is_pipe)
                        data_offset = 0;
@@ -2461,12 +2441,24 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session,
                if (err)
                        return err;
 
+               /*
+                * Knowing if the trace is formatted or not requires a lookup of
+                * the aux record so only works in non-piped mode where data is
+                * queued in cs_etm__queue_aux_records(). Always assume
+                * formatted in piped mode (true).
+                */
+               err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
+                                         idx, true);
+               if (err)
+                       return err;
+
                if (dump_trace)
                        if (auxtrace_buffer__get_data(buffer, fd)) {
-                               cs_etm__dump_event(etm, buffer);
+                               cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer);
                                auxtrace_buffer__put_data(buffer);
                        }
-       }
+       } else if (dump_trace)
+               dump_queued_data(etm, &event->auxtrace);
 
        return 0;
 }
@@ -2702,6 +2694,8 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o
        struct perf_record_auxtrace *auxtrace_event;
        union perf_event auxtrace_fragment;
        __u64 aux_offset, aux_size;
+       __u32 idx;
+       bool formatted;
 
        struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
                                                   struct cs_etm_auxtrace,
@@ -2763,8 +2757,15 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o
 
                pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64
                          " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu);
-               return auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment,
-                                                 file_offset, NULL);
+               err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment,
+                                                file_offset, NULL);
+               if (err)
+                       return err;
+
+               idx = auxtrace_event->idx;
+               formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
+               return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
+                                          idx, formatted);
        }
 
        /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
@@ -3042,7 +3043,6 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
 
        if (dump_trace) {
                cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu);
-               return 0;
        }
 
        err = cs_etm__synth_events(etm, session);
@@ -3054,6 +3054,13 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
                goto err_delete_thread;
 
        etm->data_queued = etm->queues.populated;
+       /*
+        * Print warning in pipe mode, see cs_etm__process_auxtrace_event() and
+        * cs_etm__queue_aux_fragment() for details relating to limitations.
+        */
+       if (!etm->data_queued)
+               pr_warning("CS ETM warning: Coresight decode and TRBE support requires random file access.\n"
+                          "Continuing with best effort decoding in piped mode.\n\n");
 
        return 0;