1 // SPDX-License-Identifier: GPL-2.0
7 #include <linux/ring_buffer.h>
8 #include <linux/perf_event.h>
10 #include <perf/event.h>
11 #include <perf/evsel.h>
12 #include <internal/mmap.h>
13 #include <internal/lib.h>
14 #include <linux/kernel.h>
15 #include <linux/math64.h>
18 void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
19 bool overwrite, libperf_unmap_cb_t unmap_cb)
22 map->overwrite = overwrite;
23 map->unmap_cb = unmap_cb;
24 refcount_set(&map->refcnt, 0);
29 size_t perf_mmap__mmap_len(struct perf_mmap *map)
31 return map->mask + 1 + page_size;
34 int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
39 map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot,
41 if (map->base == MAP_FAILED) {
51 void perf_mmap__munmap(struct perf_mmap *map)
53 if (map && map->base != NULL) {
54 munmap(map->base, perf_mmap__mmap_len(map));
57 refcount_set(&map->refcnt, 0);
59 if (map && map->unmap_cb)
63 void perf_mmap__get(struct perf_mmap *map)
65 refcount_inc(&map->refcnt);
68 void perf_mmap__put(struct perf_mmap *map)
70 BUG_ON(map->base && refcount_read(&map->refcnt) == 0);
72 if (refcount_dec_and_test(&map->refcnt))
73 perf_mmap__munmap(map);
76 static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
78 ring_buffer_write_tail(md->base, tail);
81 u64 perf_mmap__read_head(struct perf_mmap *map)
83 return ring_buffer_read_head(map->base);
86 static bool perf_mmap__empty(struct perf_mmap *map)
88 struct perf_event_mmap_page *pc = map->base;
90 return perf_mmap__read_head(map) == map->prev && !pc->aux_size;
93 void perf_mmap__consume(struct perf_mmap *map)
95 if (!map->overwrite) {
98 perf_mmap__write_tail(map, old);
101 if (refcount_read(&map->refcnt) == 1 && perf_mmap__empty(map))
105 static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end)
107 struct perf_event_header *pheader;
108 u64 evt_head = *start;
111 pr_debug2("%s: buf=%p, start=%"PRIx64"\n", __func__, buf, *start);
112 pheader = (struct perf_event_header *)(buf + (*start & mask));
114 if (evt_head - *start >= (unsigned int)size) {
115 pr_debug("Finished reading overwrite ring buffer: rewind\n");
116 if (evt_head - *start > (unsigned int)size)
117 evt_head -= pheader->size;
122 pheader = (struct perf_event_header *)(buf + (evt_head & mask));
124 if (pheader->size == 0) {
125 pr_debug("Finished reading overwrite ring buffer: get start\n");
130 evt_head += pheader->size;
131 pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
133 WARN_ONCE(1, "Shouldn't get here\n");
138 * Report the start and end of the available data in ringbuffer
140 static int __perf_mmap__read_init(struct perf_mmap *md)
142 u64 head = perf_mmap__read_head(md);
144 unsigned char *data = md->base + page_size;
147 md->start = md->overwrite ? head : old;
148 md->end = md->overwrite ? old : head;
150 if ((md->end - md->start) < md->flush)
153 size = md->end - md->start;
154 if (size > (unsigned long)(md->mask) + 1) {
155 if (!md->overwrite) {
156 WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
159 perf_mmap__consume(md);
164 * Backward ring buffer is full. We still have a chance to read
165 * most of data from it.
167 if (overwrite_rb_find_range(data, md->mask, &md->start, &md->end))
174 int perf_mmap__read_init(struct perf_mmap *map)
177 * Check if event was unmapped due to a POLLHUP/POLLERR.
179 if (!refcount_read(&map->refcnt))
182 return __perf_mmap__read_init(map);
186 * Mandatory for overwrite mode
187 * The direction of overwrite mode is backward.
188 * The last perf_mmap__read() will set tail to map->core.prev.
189 * Need to correct the map->core.prev to head which is the end of next read.
191 void perf_mmap__read_done(struct perf_mmap *map)
194 * Check if event was unmapped due to a POLLHUP/POLLERR.
196 if (!refcount_read(&map->refcnt))
199 map->prev = perf_mmap__read_head(map);
202 /* When check_messup is true, 'end' must points to a good entry */
203 static union perf_event *perf_mmap__read(struct perf_mmap *map,
204 u64 *startp, u64 end)
206 unsigned char *data = map->base + page_size;
207 union perf_event *event = NULL;
208 int diff = end - *startp;
210 if (diff >= (int)sizeof(event->header)) {
213 event = (union perf_event *)&data[*startp & map->mask];
214 size = event->header.size;
216 if (size < sizeof(event->header) || diff < (int)size)
220 * Event straddles the mmap boundary -- header should always
221 * be inside due to u64 alignment of output.
223 if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) {
224 unsigned int offset = *startp;
225 unsigned int len = min(sizeof(*event), size), cpy;
226 void *dst = map->event_copy;
229 cpy = min(map->mask + 1 - (offset & map->mask), len);
230 memcpy(dst, &data[offset & map->mask], cpy);
236 event = (union perf_event *)map->event_copy;
246 * Read event from ring buffer one by one.
247 * Return one event for each call.
250 * perf_mmap__read_init()
251 * while(event = perf_mmap__read_event()) {
252 * //process the event
253 * perf_mmap__consume()
255 * perf_mmap__read_done()
257 union perf_event *perf_mmap__read_event(struct perf_mmap *map)
259 union perf_event *event;
262 * Check if event was unmapped due to a POLLHUP/POLLERR.
264 if (!refcount_read(&map->refcnt))
267 /* non-overwirte doesn't pause the ringbuffer */
269 map->end = perf_mmap__read_head(map);
271 event = perf_mmap__read(map, &map->start, map->end);
274 map->prev = map->start;
279 #if defined(__i386__) || defined(__x86_64__)
280 static u64 read_perf_counter(unsigned int counter)
282 unsigned int low, high;
284 asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
286 return low | ((u64)high) << 32;
289 static u64 read_timestamp(void)
291 unsigned int low, high;
293 asm volatile("rdtsc" : "=a" (low), "=d" (high));
295 return low | ((u64)high) << 32;
298 static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; }
299 static u64 read_timestamp(void) { return 0; }
302 int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count)
304 struct perf_event_mmap_page *pc = map->base;
305 u32 seq, idx, time_mult = 0, time_shift = 0;
306 u64 cnt, cyc = 0, time_offset = 0, time_cycles = 0, time_mask = ~0ULL;
308 if (!pc || !pc->cap_user_rdpmc)
312 seq = READ_ONCE(pc->lock);
315 count->ena = READ_ONCE(pc->time_enabled);
316 count->run = READ_ONCE(pc->time_running);
318 if (pc->cap_user_time && count->ena != count->run) {
319 cyc = read_timestamp();
320 time_mult = READ_ONCE(pc->time_mult);
321 time_shift = READ_ONCE(pc->time_shift);
322 time_offset = READ_ONCE(pc->time_offset);
324 if (pc->cap_user_time_short) {
325 time_cycles = READ_ONCE(pc->time_cycles);
326 time_mask = READ_ONCE(pc->time_mask);
330 idx = READ_ONCE(pc->index);
331 cnt = READ_ONCE(pc->offset);
332 if (pc->cap_user_rdpmc && idx) {
333 s64 evcnt = read_perf_counter(idx - 1);
334 u16 width = READ_ONCE(pc->pmc_width);
336 evcnt <<= 64 - width;
337 evcnt >>= 64 - width;
343 } while (READ_ONCE(pc->lock) != seq);
345 if (count->ena != count->run) {
348 /* Adjust for cap_usr_time_short, a nop if not */
349 cyc = time_cycles + ((cyc - time_cycles) & time_mask);
351 delta = time_offset + mul_u64_u32_shr(cyc, time_mult, time_shift);
357 cnt = mul_u64_u64_div64(cnt, count->ena, count->run);