#include "symbol.h"
#include "thread.h"
#include "thread-stack.h"
+#include "tsc.h"
#include "tool.h"
#include "util/synthetic-events.h"
struct machine *machine;
u32 pmu_type;
+ struct perf_tsc_conversion tc;
+
u8 timeless_decoding;
u8 data_queued;
struct arm_spe_record *record = &speq->decoder->record;
if (!spe->timeless_decoding)
- sample->time = speq->timestamp;
+ sample->time = tsc_to_perf_time(record->timestamp, &spe->tc);
sample->ip = record->from_ip;
sample->cpumode = arm_spe_cpumode(spe, sample->ip);
static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
{
struct arm_spe *spe = speq->spe;
+ struct arm_spe_record *record;
int ret;
if (!spe->kernel_start)
spe->kernel_start = machine__kernel_start(spe->machine);
while (1) {
+ /*
+ * The usual logic is firstly to decode the packets, and then
+ * based the record to synthesize sample; but here the flow is
+ * reversed: it calls arm_spe_sample() for synthesizing samples
+ * prior to arm_spe_decode().
+ *
+ * Two reasons for this code logic:
+ * 1. Firstly, when setup queue in arm_spe__setup_queue(), it
+ * has decoded trace data and generated a record, but the record
+ * is left to generate sample until run to here, so it's correct
+ * to synthesize sample for the left record.
+ * 2. After decoding trace data, it needs to compare the record
+ * timestamp with the coming perf event, if the record timestamp
+ * is later than the perf event, it needs bail out and pushs the
+ * record into auxtrace heap, thus the record can be deferred to
+ * synthesize sample until run to here at the next time; so this
+ * can correlate samples between Arm SPE trace data and other
+ * perf events with correct time ordering.
+ */
+ ret = arm_spe_sample(speq);
+ if (ret)
+ return ret;
+
ret = arm_spe_decode(speq->decoder);
if (!ret) {
pr_debug("No data or all data has been processed.\n");
if (ret < 0)
continue;
- ret = arm_spe_sample(speq);
- if (ret)
- return ret;
+ record = &speq->decoder->record;
+ /* Update timestamp for the last record */
+ if (record->timestamp > speq->timestamp)
+ speq->timestamp = record->timestamp;
+
+ /*
+ * If the timestamp of the queue is later than timestamp of the
+ * coming perf event, bail out so can allow the perf event to
+ * be processed ahead.
+ */
if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
*timestamp = speq->timestamp;
return 0;
}
if (sample->time && (sample->time != (u64) -1))
- timestamp = sample->time;
+ timestamp = perf_time_to_tsc(sample->time, &spe->tc);
else
timestamp = 0;
sample->time);
}
} else if (timestamp) {
- if (event->header.type == PERF_RECORD_EXIT) {
- err = arm_spe_process_queues(spe, timestamp);
- if (err)
- return err;
- }
+ err = arm_spe_process_queues(spe, timestamp);
}
return err;
{
struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX;
+ struct perf_record_time_conv *tc = &session->time_conv;
struct arm_spe *spe;
int err;
spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
+
+ /*
+ * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead
+ * and the parameters for hardware clock are stored in the session
+ * context. Passes these parameters to the struct perf_tsc_conversion
+ * in "spe->tc", which is used for later conversion between clock
+ * counter and timestamp.
+ *
+ * For backward compatibility, copies the fields starting from
+ * "time_cycles" only if they are contained in the event.
+ */
+ spe->tc.time_shift = tc->time_shift;
+ spe->tc.time_mult = tc->time_mult;
+ spe->tc.time_zero = tc->time_zero;
+
+ if (event_contains(*tc, time_cycles)) {
+ spe->tc.time_cycles = tc->time_cycles;
+ spe->tc.time_mask = tc->time_mask;
+ spe->tc.cap_user_time_zero = tc->cap_user_time_zero;
+ spe->tc.cap_user_time_short = tc->cap_user_time_short;
+ }
+
spe->auxtrace.process_event = arm_spe_process_event;
spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
spe->auxtrace.flush_events = arm_spe_flush;