Merge tag 'kbuild-v5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy...
[linux-2.6-microblaze.git] / tools / perf / util / arm-spe.c
index 2539d4b..58b7069 100644 (file)
@@ -26,6 +26,7 @@
 #include "symbol.h"
 #include "thread.h"
 #include "thread-stack.h"
+#include "tsc.h"
 #include "tool.h"
 #include "util/synthetic-events.h"
 
@@ -45,6 +46,8 @@ struct arm_spe {
        struct machine                  *machine;
        u32                             pmu_type;
 
+       struct perf_tsc_conversion      tc;
+
        u8                              timeless_decoding;
        u8                              data_queued;
 
@@ -231,7 +234,7 @@ static void arm_spe_prep_sample(struct arm_spe *spe,
        struct arm_spe_record *record = &speq->decoder->record;
 
        if (!spe->timeless_decoding)
-               sample->time = speq->timestamp;
+               sample->time = tsc_to_perf_time(record->timestamp, &spe->tc);
 
        sample->ip = record->from_ip;
        sample->cpumode = arm_spe_cpumode(spe, sample->ip);
@@ -431,12 +434,36 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
 static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
 {
        struct arm_spe *spe = speq->spe;
+       struct arm_spe_record *record;
        int ret;
 
        if (!spe->kernel_start)
                spe->kernel_start = machine__kernel_start(spe->machine);
 
        while (1) {
+               /*
+                * The usual logic is firstly to decode the packets, and then
+                * based the record to synthesize sample; but here the flow is
+                * reversed: it calls arm_spe_sample() for synthesizing samples
+                * prior to arm_spe_decode().
+                *
+                * Two reasons for this code logic:
+                * 1. Firstly, when setup queue in arm_spe__setup_queue(), it
+                * has decoded trace data and generated a record, but the record
+                * is left to generate sample until run to here, so it's correct
+                * to synthesize sample for the left record.
+                * 2. After decoding trace data, it needs to compare the record
+                * timestamp with the coming perf event, if the record timestamp
+                * is later than the perf event, it needs bail out and pushs the
+                * record into auxtrace heap, thus the record can be deferred to
+                * synthesize sample until run to here at the next time; so this
+                * can correlate samples between Arm SPE trace data and other
+                * perf events with correct time ordering.
+                */
+               ret = arm_spe_sample(speq);
+               if (ret)
+                       return ret;
+
                ret = arm_spe_decode(speq->decoder);
                if (!ret) {
                        pr_debug("No data or all data has been processed.\n");
@@ -450,10 +477,17 @@ static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
                if (ret < 0)
                        continue;
 
-               ret = arm_spe_sample(speq);
-               if (ret)
-                       return ret;
+               record = &speq->decoder->record;
 
+               /* Update timestamp for the last record */
+               if (record->timestamp > speq->timestamp)
+                       speq->timestamp = record->timestamp;
+
+               /*
+                * If the timestamp of the queue is later than timestamp of the
+                * coming perf event, bail out so can allow the perf event to
+                * be processed ahead.
+                */
                if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
                        *timestamp = speq->timestamp;
                        return 0;
@@ -666,7 +700,7 @@ static int arm_spe_process_event(struct perf_session *session,
        }
 
        if (sample->time && (sample->time != (u64) -1))
-               timestamp = sample->time;
+               timestamp = perf_time_to_tsc(sample->time, &spe->tc);
        else
                timestamp = 0;
 
@@ -683,11 +717,7 @@ static int arm_spe_process_event(struct perf_session *session,
                                        sample->time);
                }
        } else if (timestamp) {
-               if (event->header.type == PERF_RECORD_EXIT) {
-                       err = arm_spe_process_queues(spe, timestamp);
-                       if (err)
-                               return err;
-               }
+               err = arm_spe_process_queues(spe, timestamp);
        }
 
        return err;
@@ -1006,6 +1036,7 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
 {
        struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
        size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX;
+       struct perf_record_time_conv *tc = &session->time_conv;
        struct arm_spe *spe;
        int err;
 
@@ -1027,6 +1058,28 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
        spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
 
        spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
+
+       /*
+        * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead
+        * and the parameters for hardware clock are stored in the session
+        * context.  Passes these parameters to the struct perf_tsc_conversion
+        * in "spe->tc", which is used for later conversion between clock
+        * counter and timestamp.
+        *
+        * For backward compatibility, copies the fields starting from
+        * "time_cycles" only if they are contained in the event.
+        */
+       spe->tc.time_shift = tc->time_shift;
+       spe->tc.time_mult = tc->time_mult;
+       spe->tc.time_zero = tc->time_zero;
+
+       if (event_contains(*tc, time_cycles)) {
+               spe->tc.time_cycles = tc->time_cycles;
+               spe->tc.time_mask = tc->time_mask;
+               spe->tc.cap_user_time_zero = tc->cap_user_time_zero;
+               spe->tc.cap_user_time_short = tc->cap_user_time_short;
+       }
+
        spe->auxtrace.process_event = arm_spe_process_event;
        spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
        spe->auxtrace.flush_events = arm_spe_flush;