perf intel-pt: Add PSB events
[linux-2.6-microblaze.git] / tools / perf / util / intel-pt.c
index 60214de..ddb8e6c 100644 (file)
@@ -108,6 +108,7 @@ struct intel_pt {
        u64 exstop_id;
        u64 pwrx_id;
        u64 cbr_id;
+       u64 psb_id;
 
        bool sample_pebs;
        struct evsel *pebs_evsel;
@@ -893,6 +894,18 @@ static bool intel_pt_sampling_mode(struct intel_pt *pt)
        return false;
 }
 
+static u64 intel_pt_ctl(struct intel_pt *pt)
+{
+       struct evsel *evsel;
+       u64 config;
+
+       evlist__for_each_entry(pt->session->evlist, evsel) {
+               if (intel_pt_get_config(pt, &evsel->core.attr, &config))
+                       return config;
+       }
+       return 0;
+}
+
 static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns)
 {
        u64 quot, rem;
@@ -1026,6 +1039,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
        params.data = ptq;
        params.return_compression = intel_pt_return_compression(pt);
        params.branch_enable = intel_pt_branch_enable(pt);
+       params.ctl = intel_pt_ctl(pt);
        params.max_non_turbo_ratio = pt->max_non_turbo_ratio;
        params.mtc_period = intel_pt_mtc_period(pt);
        params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
@@ -1381,7 +1395,8 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
                sample.branch_stack = (struct branch_stack *)&dummy_bs;
        }
 
-       sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt;
+       if (ptq->state->flags & INTEL_PT_SAMPLE_IPC)
+               sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt;
        if (sample.cyc_cnt) {
                sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt;
                ptq->last_br_insn_cnt = ptq->ipc_insn_cnt;
@@ -1431,7 +1446,8 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
        else
                sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
 
-       sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt;
+       if (ptq->state->flags & INTEL_PT_SAMPLE_IPC)
+               sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt;
        if (sample.cyc_cnt) {
                sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt;
                ptq->last_in_insn_cnt = ptq->ipc_insn_cnt;
@@ -1533,6 +1549,32 @@ static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq)
                                            pt->pwr_events_sample_type);
 }
 
+static int intel_pt_synth_psb_sample(struct intel_pt_queue *ptq)
+{
+       struct intel_pt *pt = ptq->pt;
+       union perf_event *event = ptq->event_buf;
+       struct perf_sample sample = { .ip = 0, };
+       struct perf_synth_intel_psb raw;
+
+       if (intel_pt_skip_event(pt))
+               return 0;
+
+       intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+       sample.id = ptq->pt->psb_id;
+       sample.stream_id = ptq->pt->psb_id;
+       sample.flags = 0;
+
+       raw.reserved = 0;
+       raw.offset = ptq->state->psb_offset;
+
+       sample.raw_size = perf_synth__raw_size(raw);
+       sample.raw_data = perf_synth__raw_data(&raw);
+
+       return intel_pt_deliver_synth_event(pt, event, &sample,
+                                           pt->pwr_events_sample_type);
+}
+
 static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq)
 {
        struct intel_pt *pt = ptq->pt;
@@ -1853,13 +1895,30 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
        if (sample_type & PERF_SAMPLE_ADDR && items->has_mem_access_address)
                sample.addr = items->mem_access_address;
 
-       if (sample_type & PERF_SAMPLE_WEIGHT) {
+       if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
                /*
                 * Refer kernel's setup_pebs_adaptive_sample_data() and
                 * intel_hsw_weight().
                 */
-               if (items->has_mem_access_latency)
-                       sample.weight = items->mem_access_latency;
+               if (items->has_mem_access_latency) {
+                       u64 weight = items->mem_access_latency >> 32;
+
+                       /*
+                        * Starts from SPR, the mem access latency field
+                        * contains both cache latency [47:32] and instruction
+                        * latency [15:0]. The cache latency is the same as the
+                        * mem access latency on previous platforms.
+                        *
+                        * In practice, no memory access could last than 4G
+                        * cycles. Use latency >> 32 to distinguish the
+                        * different format of the mem access latency field.
+                        */
+                       if (weight > 0) {
+                               sample.weight = weight & 0xffff;
+                               sample.ins_lat = items->mem_access_latency & 0xffff;
+                       } else
+                               sample.weight = items->mem_access_latency;
+               }
                if (!sample.weight && items->has_tsx_aux_info) {
                        /* Cycles last block */
                        sample.weight = (u32)items->tsx_aux_info;
@@ -1966,14 +2025,8 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
 
        ptq->have_sample = false;
 
-       if (ptq->state->tot_cyc_cnt > ptq->ipc_cyc_cnt) {
-               /*
-                * Cycle count and instruction count only go together to create
-                * a valid IPC ratio when the cycle count changes.
-                */
-               ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt;
-               ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt;
-       }
+       ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt;
+       ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt;
 
        /*
         * Do PEBS first to allow for the possibility that the PEBS timestamp
@@ -1986,6 +2039,11 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
        }
 
        if (pt->sample_pwr_events) {
+               if (state->type & INTEL_PT_PSB_EVT) {
+                       err = intel_pt_synth_psb_sample(ptq);
+                       if (err)
+                               return err;
+               }
                if (ptq->state->cbr != ptq->cbr_seen) {
                        err = intel_pt_synth_cbr_sample(ptq);
                        if (err)
@@ -3083,6 +3141,14 @@ static int intel_pt_synth_events(struct intel_pt *pt,
                pt->cbr_id = id;
                intel_pt_set_event_name(evlist, id, "cbr");
                id += 1;
+
+               attr.config = PERF_SYNTH_INTEL_PSB;
+               err = intel_pt_synth_event(session, "psb", &attr, id);
+               if (err)
+                       return err;
+               pt->psb_id = id;
+               intel_pt_set_event_name(evlist, id, "psb");
+               id += 1;
        }
 
        if (pt->synth_opts.pwr_events && (evsel->core.attr.config & 0x10)) {