Merge branch 'for-next/sme' into for-next/core
[linux-2.6-microblaze.git] / drivers / perf / riscv_pmu_sbi.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * RISC-V performance counter support.
4  *
5  * Copyright (C) 2021 Western Digital Corporation or its affiliates.
6  *
7  * This code is based on ARM perf event code which is in turn based on
8  * sparc64 and x86 code.
9  */
10
11 #define pr_fmt(fmt) "riscv-pmu-sbi: " fmt
12
13 #include <linux/mod_devicetable.h>
14 #include <linux/perf/riscv_pmu.h>
15 #include <linux/platform_device.h>
16 #include <linux/irq.h>
17 #include <linux/irqdomain.h>
18 #include <linux/of_irq.h>
19 #include <linux/of.h>
20 #include <linux/cpu_pm.h>
21
22 #include <asm/sbi.h>
23 #include <asm/hwcap.h>
24
25 PMU_FORMAT_ATTR(event, "config:0-47");
26 PMU_FORMAT_ATTR(firmware, "config:63");
27
28 static struct attribute *riscv_arch_formats_attr[] = {
29         &format_attr_event.attr,
30         &format_attr_firmware.attr,
31         NULL,
32 };
33
34 static struct attribute_group riscv_pmu_format_group = {
35         .name = "format",
36         .attrs = riscv_arch_formats_attr,
37 };
38
39 static const struct attribute_group *riscv_pmu_attr_groups[] = {
40         &riscv_pmu_format_group,
41         NULL,
42 };
43
44 union sbi_pmu_ctr_info {
45         unsigned long value;
46         struct {
47                 unsigned long csr:12;
48                 unsigned long width:6;
49 #if __riscv_xlen == 32
50                 unsigned long reserved:13;
51 #else
52                 unsigned long reserved:45;
53 #endif
54                 unsigned long type:1;
55         };
56 };
57
58 /*
59  * RISC-V doesn't have hetergenous harts yet. This need to be part of
60  * per_cpu in case of harts with different pmu counters
61  */
62 static union sbi_pmu_ctr_info *pmu_ctr_list;
63 static unsigned int riscv_pmu_irq;
64
65 struct sbi_pmu_event_data {
66         union {
67                 union {
68                         struct hw_gen_event {
69                                 uint32_t event_code:16;
70                                 uint32_t event_type:4;
71                                 uint32_t reserved:12;
72                         } hw_gen_event;
73                         struct hw_cache_event {
74                                 uint32_t result_id:1;
75                                 uint32_t op_id:2;
76                                 uint32_t cache_id:13;
77                                 uint32_t event_type:4;
78                                 uint32_t reserved:12;
79                         } hw_cache_event;
80                 };
81                 uint32_t event_idx;
82         };
83 };
84
85 static const struct sbi_pmu_event_data pmu_hw_event_map[] = {
86         [PERF_COUNT_HW_CPU_CYCLES]              = {.hw_gen_event = {
87                                                         SBI_PMU_HW_CPU_CYCLES,
88                                                         SBI_PMU_EVENT_TYPE_HW, 0}},
89         [PERF_COUNT_HW_INSTRUCTIONS]            = {.hw_gen_event = {
90                                                         SBI_PMU_HW_INSTRUCTIONS,
91                                                         SBI_PMU_EVENT_TYPE_HW, 0}},
92         [PERF_COUNT_HW_CACHE_REFERENCES]        = {.hw_gen_event = {
93                                                         SBI_PMU_HW_CACHE_REFERENCES,
94                                                         SBI_PMU_EVENT_TYPE_HW, 0}},
95         [PERF_COUNT_HW_CACHE_MISSES]            = {.hw_gen_event = {
96                                                         SBI_PMU_HW_CACHE_MISSES,
97                                                         SBI_PMU_EVENT_TYPE_HW, 0}},
98         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]     = {.hw_gen_event = {
99                                                         SBI_PMU_HW_BRANCH_INSTRUCTIONS,
100                                                         SBI_PMU_EVENT_TYPE_HW, 0}},
101         [PERF_COUNT_HW_BRANCH_MISSES]           = {.hw_gen_event = {
102                                                         SBI_PMU_HW_BRANCH_MISSES,
103                                                         SBI_PMU_EVENT_TYPE_HW, 0}},
104         [PERF_COUNT_HW_BUS_CYCLES]              = {.hw_gen_event = {
105                                                         SBI_PMU_HW_BUS_CYCLES,
106                                                         SBI_PMU_EVENT_TYPE_HW, 0}},
107         [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = {.hw_gen_event = {
108                                                         SBI_PMU_HW_STALLED_CYCLES_FRONTEND,
109                                                         SBI_PMU_EVENT_TYPE_HW, 0}},
110         [PERF_COUNT_HW_STALLED_CYCLES_BACKEND]  = {.hw_gen_event = {
111                                                         SBI_PMU_HW_STALLED_CYCLES_BACKEND,
112                                                         SBI_PMU_EVENT_TYPE_HW, 0}},
113         [PERF_COUNT_HW_REF_CPU_CYCLES]          = {.hw_gen_event = {
114                                                         SBI_PMU_HW_REF_CPU_CYCLES,
115                                                         SBI_PMU_EVENT_TYPE_HW, 0}},
116 };
117
118 #define C(x) PERF_COUNT_HW_CACHE_##x
119 static const struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
120 [PERF_COUNT_HW_CACHE_OP_MAX]
121 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
122         [C(L1D)] = {
123                 [C(OP_READ)] = {
124                         [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
125                                         C(OP_READ), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}},
126                         [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
127                                         C(OP_READ), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}},
128                 },
129                 [C(OP_WRITE)] = {
130                         [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
131                                         C(OP_WRITE), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}},
132                         [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
133                                         C(OP_WRITE), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}},
134                 },
135                 [C(OP_PREFETCH)] = {
136                         [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
137                                         C(OP_PREFETCH), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}},
138                         [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
139                                         C(OP_PREFETCH), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}},
140                 },
141         },
142         [C(L1I)] = {
143                 [C(OP_READ)] = {
144                         [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
145                                         C(OP_READ), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}},
146                         [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), C(OP_READ),
147                                         C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}},
148                 },
149                 [C(OP_WRITE)] = {
150                         [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
151                                         C(OP_WRITE), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}},
152                         [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
153                                         C(OP_WRITE), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}},
154                 },
155                 [C(OP_PREFETCH)] = {
156                         [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
157                                         C(OP_PREFETCH), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}},
158                         [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
159                                         C(OP_PREFETCH), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}},
160                 },
161         },
162         [C(LL)] = {
163                 [C(OP_READ)] = {
164                         [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
165                                         C(OP_READ), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}},
166                         [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
167                                         C(OP_READ), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}},
168                 },
169                 [C(OP_WRITE)] = {
170                         [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
171                                         C(OP_WRITE), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}},
172                         [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
173                                         C(OP_WRITE), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}},
174                 },
175                 [C(OP_PREFETCH)] = {
176                         [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
177                                         C(OP_PREFETCH), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}},
178                         [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
179                                         C(OP_PREFETCH), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}},
180                 },
181         },
182         [C(DTLB)] = {
183                 [C(OP_READ)] = {
184                         [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
185                                         C(OP_READ), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
186                         [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
187                                         C(OP_READ), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
188                 },
189                 [C(OP_WRITE)] = {
190                         [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
191                                         C(OP_WRITE), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
192                         [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
193                                         C(OP_WRITE), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
194                 },
195                 [C(OP_PREFETCH)] = {
196                         [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
197                                         C(OP_PREFETCH), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
198                         [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
199                                         C(OP_PREFETCH), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
200                 },
201         },
202         [C(ITLB)] = {
203                 [C(OP_READ)] = {
204                         [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
205                                         C(OP_READ), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
206                         [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
207                                         C(OP_READ), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
208                 },
209                 [C(OP_WRITE)] = {
210                         [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
211                                         C(OP_WRITE), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
212                         [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
213                                         C(OP_WRITE), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
214                 },
215                 [C(OP_PREFETCH)] = {
216                         [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
217                                         C(OP_PREFETCH), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
218                         [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
219                                         C(OP_PREFETCH), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
220                 },
221         },
222         [C(BPU)] = {
223                 [C(OP_READ)] = {
224                         [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
225                                         C(OP_READ), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}},
226                         [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
227                                         C(OP_READ), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}},
228                 },
229                 [C(OP_WRITE)] = {
230                         [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
231                                         C(OP_WRITE), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}},
232                         [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
233                                         C(OP_WRITE), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}},
234                 },
235                 [C(OP_PREFETCH)] = {
236                         [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
237                                         C(OP_PREFETCH), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}},
238                         [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
239                                         C(OP_PREFETCH), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}},
240                 },
241         },
242         [C(NODE)] = {
243                 [C(OP_READ)] = {
244                         [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
245                                         C(OP_READ), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}},
246                         [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
247                                         C(OP_READ), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}},
248                 },
249                 [C(OP_WRITE)] = {
250                         [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
251                                         C(OP_WRITE), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}},
252                         [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
253                                         C(OP_WRITE), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}},
254                 },
255                 [C(OP_PREFETCH)] = {
256                         [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
257                                         C(OP_PREFETCH), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}},
258                         [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
259                                         C(OP_PREFETCH), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}},
260                 },
261         },
262 };
263
264 static int pmu_sbi_ctr_get_width(int idx)
265 {
266         return pmu_ctr_list[idx].width;
267 }
268
269 static bool pmu_sbi_ctr_is_fw(int cidx)
270 {
271         union sbi_pmu_ctr_info *info;
272
273         info = &pmu_ctr_list[cidx];
274         if (!info)
275                 return false;
276
277         return (info->type == SBI_PMU_CTR_TYPE_FW) ? true : false;
278 }
279
280 static int pmu_sbi_ctr_get_idx(struct perf_event *event)
281 {
282         struct hw_perf_event *hwc = &event->hw;
283         struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
284         struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
285         struct sbiret ret;
286         int idx;
287         uint64_t cbase = 0;
288         uint64_t cmask = GENMASK_ULL(rvpmu->num_counters - 1, 0);
289         unsigned long cflags = 0;
290
291         if (event->attr.exclude_kernel)
292                 cflags |= SBI_PMU_CFG_FLAG_SET_SINH;
293         if (event->attr.exclude_user)
294                 cflags |= SBI_PMU_CFG_FLAG_SET_UINH;
295
296         /* retrieve the available counter index */
297         ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase, cmask,
298                         cflags, hwc->event_base, hwc->config, 0);
299         if (ret.error) {
300                 pr_debug("Not able to find a counter for event %lx config %llx\n",
301                         hwc->event_base, hwc->config);
302                 return sbi_err_map_linux_errno(ret.error);
303         }
304
305         idx = ret.value;
306         if (idx >= rvpmu->num_counters || !pmu_ctr_list[idx].value)
307                 return -ENOENT;
308
309         /* Additional sanity check for the counter id */
310         if (pmu_sbi_ctr_is_fw(idx)) {
311                 if (!test_and_set_bit(idx, cpuc->used_fw_ctrs))
312                         return idx;
313         } else {
314                 if (!test_and_set_bit(idx, cpuc->used_hw_ctrs))
315                         return idx;
316         }
317
318         return -ENOENT;
319 }
320
321 static void pmu_sbi_ctr_clear_idx(struct perf_event *event)
322 {
323
324         struct hw_perf_event *hwc = &event->hw;
325         struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
326         struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
327         int idx = hwc->idx;
328
329         if (pmu_sbi_ctr_is_fw(idx))
330                 clear_bit(idx, cpuc->used_fw_ctrs);
331         else
332                 clear_bit(idx, cpuc->used_hw_ctrs);
333 }
334
335 static int pmu_event_find_cache(u64 config)
336 {
337         unsigned int cache_type, cache_op, cache_result, ret;
338
339         cache_type = (config >>  0) & 0xff;
340         if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
341                 return -EINVAL;
342
343         cache_op = (config >>  8) & 0xff;
344         if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
345                 return -EINVAL;
346
347         cache_result = (config >> 16) & 0xff;
348         if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
349                 return -EINVAL;
350
351         ret = pmu_cache_event_map[cache_type][cache_op][cache_result].event_idx;
352
353         return ret;
354 }
355
356 static bool pmu_sbi_is_fw_event(struct perf_event *event)
357 {
358         u32 type = event->attr.type;
359         u64 config = event->attr.config;
360
361         if ((type == PERF_TYPE_RAW) && ((config >> 63) == 1))
362                 return true;
363         else
364                 return false;
365 }
366
367 static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig)
368 {
369         u32 type = event->attr.type;
370         u64 config = event->attr.config;
371         int bSoftware;
372         u64 raw_config_val;
373         int ret;
374
375         switch (type) {
376         case PERF_TYPE_HARDWARE:
377                 if (config >= PERF_COUNT_HW_MAX)
378                         return -EINVAL;
379                 ret = pmu_hw_event_map[event->attr.config].event_idx;
380                 break;
381         case PERF_TYPE_HW_CACHE:
382                 ret = pmu_event_find_cache(config);
383                 break;
384         case PERF_TYPE_RAW:
385                 /*
386                  * As per SBI specification, the upper 16 bits must be unused for
387                  * a raw event. Use the MSB (63b) to distinguish between hardware
388                  * raw event and firmware events.
389                  */
390                 bSoftware = config >> 63;
391                 raw_config_val = config & RISCV_PMU_RAW_EVENT_MASK;
392                 if (bSoftware) {
393                         if (raw_config_val < SBI_PMU_FW_MAX)
394                                 ret = (raw_config_val & 0xFFFF) |
395                                       (SBI_PMU_EVENT_TYPE_FW << 16);
396                         else
397                                 return -EINVAL;
398                 } else {
399                         ret = RISCV_PMU_RAW_EVENT_IDX;
400                         *econfig = raw_config_val;
401                 }
402                 break;
403         default:
404                 ret = -EINVAL;
405                 break;
406         }
407
408         return ret;
409 }
410
411 static u64 pmu_sbi_ctr_read(struct perf_event *event)
412 {
413         struct hw_perf_event *hwc = &event->hw;
414         int idx = hwc->idx;
415         struct sbiret ret;
416         union sbi_pmu_ctr_info info;
417         u64 val = 0;
418
419         if (pmu_sbi_is_fw_event(event)) {
420                 ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ,
421                                 hwc->idx, 0, 0, 0, 0, 0);
422                 if (!ret.error)
423                         val = ret.value;
424         } else {
425                 info = pmu_ctr_list[idx];
426                 val = riscv_pmu_ctr_read_csr(info.csr);
427                 if (IS_ENABLED(CONFIG_32BIT))
428                         val = ((u64)riscv_pmu_ctr_read_csr(info.csr + 0x80)) << 31 | val;
429         }
430
431         return val;
432 }
433
434 static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival)
435 {
436         struct sbiret ret;
437         struct hw_perf_event *hwc = &event->hw;
438         unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE;
439
440         ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, hwc->idx,
441                         1, flag, ival, ival >> 32, 0);
442         if (ret.error && (ret.error != SBI_ERR_ALREADY_STARTED))
443                 pr_err("Starting counter idx %d failed with error %d\n",
444                         hwc->idx, sbi_err_map_linux_errno(ret.error));
445 }
446
447 static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag)
448 {
449         struct sbiret ret;
450         struct hw_perf_event *hwc = &event->hw;
451
452         ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, hwc->idx, 1, flag, 0, 0, 0);
453         if (ret.error && (ret.error != SBI_ERR_ALREADY_STOPPED) &&
454                 flag != SBI_PMU_STOP_FLAG_RESET)
455                 pr_err("Stopping counter idx %d failed with error %d\n",
456                         hwc->idx, sbi_err_map_linux_errno(ret.error));
457 }
458
459 static int pmu_sbi_find_num_ctrs(void)
460 {
461         struct sbiret ret;
462
463         ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_NUM_COUNTERS, 0, 0, 0, 0, 0, 0);
464         if (!ret.error)
465                 return ret.value;
466         else
467                 return sbi_err_map_linux_errno(ret.error);
468 }
469
470 static int pmu_sbi_get_ctrinfo(int nctr)
471 {
472         struct sbiret ret;
473         int i, num_hw_ctr = 0, num_fw_ctr = 0;
474         union sbi_pmu_ctr_info cinfo;
475
476         pmu_ctr_list = kcalloc(nctr, sizeof(*pmu_ctr_list), GFP_KERNEL);
477         if (!pmu_ctr_list)
478                 return -ENOMEM;
479
480         for (i = 0; i <= nctr; i++) {
481                 ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_GET_INFO, i, 0, 0, 0, 0, 0);
482                 if (ret.error)
483                         /* The logical counter ids are not expected to be contiguous */
484                         continue;
485                 cinfo.value = ret.value;
486                 if (cinfo.type == SBI_PMU_CTR_TYPE_FW)
487                         num_fw_ctr++;
488                 else
489                         num_hw_ctr++;
490                 pmu_ctr_list[i].value = cinfo.value;
491         }
492
493         pr_info("%d firmware and %d hardware counters\n", num_fw_ctr, num_hw_ctr);
494
495         return 0;
496 }
497
498 static inline void pmu_sbi_stop_all(struct riscv_pmu *pmu)
499 {
500         /*
501          * No need to check the error because we are disabling all the counters
502          * which may include counters that are not enabled yet.
503          */
504         sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP,
505                   0, GENMASK_ULL(pmu->num_counters - 1, 0), 0, 0, 0, 0);
506 }
507
508 static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu)
509 {
510         struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
511
512         /* No need to check the error here as we can't do anything about the error */
513         sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, 0,
514                   cpu_hw_evt->used_hw_ctrs[0], 0, 0, 0, 0);
515 }
516
517 /*
518  * This function starts all the used counters in two step approach.
519  * Any counter that did not overflow can be start in a single step
520  * while the overflowed counters need to be started with updated initialization
521  * value.
522  */
523 static inline void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu,
524                                                unsigned long ctr_ovf_mask)
525 {
526         int idx = 0;
527         struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
528         struct perf_event *event;
529         unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE;
530         unsigned long ctr_start_mask = 0;
531         uint64_t max_period;
532         struct hw_perf_event *hwc;
533         u64 init_val = 0;
534
535         ctr_start_mask = cpu_hw_evt->used_hw_ctrs[0] & ~ctr_ovf_mask;
536
537         /* Start all the counters that did not overflow in a single shot */
538         sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, 0, ctr_start_mask,
539                   0, 0, 0, 0);
540
541         /* Reinitialize and start all the counter that overflowed */
542         while (ctr_ovf_mask) {
543                 if (ctr_ovf_mask & 0x01) {
544                         event = cpu_hw_evt->events[idx];
545                         hwc = &event->hw;
546                         max_period = riscv_pmu_ctr_get_width_mask(event);
547                         init_val = local64_read(&hwc->prev_count) & max_period;
548                         sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, idx, 1,
549                                   flag, init_val, 0, 0);
550                 }
551                 ctr_ovf_mask = ctr_ovf_mask >> 1;
552                 idx++;
553         }
554 }
555
556 static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
557 {
558         struct perf_sample_data data;
559         struct pt_regs *regs;
560         struct hw_perf_event *hw_evt;
561         union sbi_pmu_ctr_info *info;
562         int lidx, hidx, fidx;
563         struct riscv_pmu *pmu;
564         struct perf_event *event;
565         unsigned long overflow;
566         unsigned long overflowed_ctrs = 0;
567         struct cpu_hw_events *cpu_hw_evt = dev;
568
569         if (WARN_ON_ONCE(!cpu_hw_evt))
570                 return IRQ_NONE;
571
572         /* Firmware counter don't support overflow yet */
573         fidx = find_first_bit(cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS);
574         event = cpu_hw_evt->events[fidx];
575         if (!event) {
576                 csr_clear(CSR_SIP, SIP_LCOFIP);
577                 return IRQ_NONE;
578         }
579
580         pmu = to_riscv_pmu(event->pmu);
581         pmu_sbi_stop_hw_ctrs(pmu);
582
583         /* Overflow status register should only be read after counter are stopped */
584         overflow = csr_read(CSR_SSCOUNTOVF);
585
586         /*
587          * Overflow interrupt pending bit should only be cleared after stopping
588          * all the counters to avoid any race condition.
589          */
590         csr_clear(CSR_SIP, SIP_LCOFIP);
591
592         /* No overflow bit is set */
593         if (!overflow)
594                 return IRQ_NONE;
595
596         regs = get_irq_regs();
597
598         for_each_set_bit(lidx, cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS) {
599                 struct perf_event *event = cpu_hw_evt->events[lidx];
600
601                 /* Skip if invalid event or user did not request a sampling */
602                 if (!event || !is_sampling_event(event))
603                         continue;
604
605                 info = &pmu_ctr_list[lidx];
606                 /* Do a sanity check */
607                 if (!info || info->type != SBI_PMU_CTR_TYPE_HW)
608                         continue;
609
610                 /* compute hardware counter index */
611                 hidx = info->csr - CSR_CYCLE;
612                 /* check if the corresponding bit is set in sscountovf */
613                 if (!(overflow & (1 << hidx)))
614                         continue;
615
616                 /*
617                  * Keep a track of overflowed counters so that they can be started
618                  * with updated initial value.
619                  */
620                 overflowed_ctrs |= 1 << lidx;
621                 hw_evt = &event->hw;
622                 riscv_pmu_event_update(event);
623                 perf_sample_data_init(&data, 0, hw_evt->last_period);
624                 if (riscv_pmu_event_set_period(event)) {
625                         /*
626                          * Unlike other ISAs, RISC-V don't have to disable interrupts
627                          * to avoid throttling here. As per the specification, the
628                          * interrupt remains disabled until the OF bit is set.
629                          * Interrupts are enabled again only during the start.
630                          * TODO: We will need to stop the guest counters once
631                          * virtualization support is added.
632                          */
633                         perf_event_overflow(event, &data, regs);
634                 }
635         }
636         pmu_sbi_start_overflow_mask(pmu, overflowed_ctrs);
637
638         return IRQ_HANDLED;
639 }
640
641 static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node)
642 {
643         struct riscv_pmu *pmu = hlist_entry_safe(node, struct riscv_pmu, node);
644         struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
645
646         /* Enable the access for TIME csr only from the user mode now */
647         csr_write(CSR_SCOUNTEREN, 0x2);
648
649         /* Stop all the counters so that they can be enabled from perf */
650         pmu_sbi_stop_all(pmu);
651
652         if (riscv_isa_extension_available(NULL, SSCOFPMF)) {
653                 cpu_hw_evt->irq = riscv_pmu_irq;
654                 csr_clear(CSR_IP, BIT(RV_IRQ_PMU));
655                 csr_set(CSR_IE, BIT(RV_IRQ_PMU));
656                 enable_percpu_irq(riscv_pmu_irq, IRQ_TYPE_NONE);
657         }
658
659         return 0;
660 }
661
662 static int pmu_sbi_dying_cpu(unsigned int cpu, struct hlist_node *node)
663 {
664         if (riscv_isa_extension_available(NULL, SSCOFPMF)) {
665                 disable_percpu_irq(riscv_pmu_irq);
666                 csr_clear(CSR_IE, BIT(RV_IRQ_PMU));
667         }
668
669         /* Disable all counters access for user mode now */
670         csr_write(CSR_SCOUNTEREN, 0x0);
671
672         return 0;
673 }
674
675 static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pdev)
676 {
677         int ret;
678         struct cpu_hw_events __percpu *hw_events = pmu->hw_events;
679         struct device_node *cpu, *child;
680         struct irq_domain *domain = NULL;
681
682         if (!riscv_isa_extension_available(NULL, SSCOFPMF))
683                 return -EOPNOTSUPP;
684
685         for_each_of_cpu_node(cpu) {
686                 child = of_get_compatible_child(cpu, "riscv,cpu-intc");
687                 if (!child) {
688                         pr_err("Failed to find INTC node\n");
689                         of_node_put(cpu);
690                         return -ENODEV;
691                 }
692                 domain = irq_find_host(child);
693                 of_node_put(child);
694                 if (domain) {
695                         of_node_put(cpu);
696                         break;
697                 }
698         }
699         if (!domain) {
700                 pr_err("Failed to find INTC IRQ root domain\n");
701                 return -ENODEV;
702         }
703
704         riscv_pmu_irq = irq_create_mapping(domain, RV_IRQ_PMU);
705         if (!riscv_pmu_irq) {
706                 pr_err("Failed to map PMU interrupt for node\n");
707                 return -ENODEV;
708         }
709
710         ret = request_percpu_irq(riscv_pmu_irq, pmu_sbi_ovf_handler, "riscv-pmu", hw_events);
711         if (ret) {
712                 pr_err("registering percpu irq failed [%d]\n", ret);
713                 return ret;
714         }
715
716         return 0;
717 }
718
719 #ifdef CONFIG_CPU_PM
720 static int riscv_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
721                                 void *v)
722 {
723         struct riscv_pmu *rvpmu = container_of(b, struct riscv_pmu, riscv_pm_nb);
724         struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
725         int enabled = bitmap_weight(cpuc->used_hw_ctrs, RISCV_MAX_COUNTERS);
726         struct perf_event *event;
727         int idx;
728
729         if (!enabled)
730                 return NOTIFY_OK;
731
732         for (idx = 0; idx < RISCV_MAX_COUNTERS; idx++) {
733                 event = cpuc->events[idx];
734                 if (!event)
735                         continue;
736
737                 switch (cmd) {
738                 case CPU_PM_ENTER:
739                         /*
740                          * Stop and update the counter
741                          */
742                         riscv_pmu_stop(event, PERF_EF_UPDATE);
743                         break;
744                 case CPU_PM_EXIT:
745                 case CPU_PM_ENTER_FAILED:
746                         /*
747                          * Restore and enable the counter.
748                          *
749                          * Requires RCU read locking to be functional,
750                          * wrap the call within RCU_NONIDLE to make the
751                          * RCU subsystem aware this cpu is not idle from
752                          * an RCU perspective for the riscv_pmu_start() call
753                          * duration.
754                          */
755                         RCU_NONIDLE(riscv_pmu_start(event, PERF_EF_RELOAD));
756                         break;
757                 default:
758                         break;
759                 }
760         }
761
762         return NOTIFY_OK;
763 }
764
765 static int riscv_pm_pmu_register(struct riscv_pmu *pmu)
766 {
767         pmu->riscv_pm_nb.notifier_call = riscv_pm_pmu_notify;
768         return cpu_pm_register_notifier(&pmu->riscv_pm_nb);
769 }
770
771 static void riscv_pm_pmu_unregister(struct riscv_pmu *pmu)
772 {
773         cpu_pm_unregister_notifier(&pmu->riscv_pm_nb);
774 }
775 #else
776 static inline int riscv_pm_pmu_register(struct riscv_pmu *pmu) { return 0; }
777 static inline void riscv_pm_pmu_unregister(struct riscv_pmu *pmu) { }
778 #endif
779
780 static void riscv_pmu_destroy(struct riscv_pmu *pmu)
781 {
782         riscv_pm_pmu_unregister(pmu);
783         cpuhp_state_remove_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
784 }
785
786 static int pmu_sbi_device_probe(struct platform_device *pdev)
787 {
788         struct riscv_pmu *pmu = NULL;
789         int num_counters;
790         int ret = -ENODEV;
791
792         pr_info("SBI PMU extension is available\n");
793         pmu = riscv_pmu_alloc();
794         if (!pmu)
795                 return -ENOMEM;
796
797         num_counters = pmu_sbi_find_num_ctrs();
798         if (num_counters < 0) {
799                 pr_err("SBI PMU extension doesn't provide any counters\n");
800                 goto out_free;
801         }
802
803         /* cache all the information about counters now */
804         if (pmu_sbi_get_ctrinfo(num_counters))
805                 goto out_free;
806
807         ret = pmu_sbi_setup_irqs(pmu, pdev);
808         if (ret < 0) {
809                 pr_info("Perf sampling/filtering is not supported as sscof extension is not available\n");
810                 pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
811                 pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE;
812         }
813         pmu->pmu.attr_groups = riscv_pmu_attr_groups;
814         pmu->num_counters = num_counters;
815         pmu->ctr_start = pmu_sbi_ctr_start;
816         pmu->ctr_stop = pmu_sbi_ctr_stop;
817         pmu->event_map = pmu_sbi_event_map;
818         pmu->ctr_get_idx = pmu_sbi_ctr_get_idx;
819         pmu->ctr_get_width = pmu_sbi_ctr_get_width;
820         pmu->ctr_clear_idx = pmu_sbi_ctr_clear_idx;
821         pmu->ctr_read = pmu_sbi_ctr_read;
822
823         ret = cpuhp_state_add_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
824         if (ret)
825                 return ret;
826
827         ret = riscv_pm_pmu_register(pmu);
828         if (ret)
829                 goto out_unregister;
830
831         ret = perf_pmu_register(&pmu->pmu, "cpu", PERF_TYPE_RAW);
832         if (ret)
833                 goto out_unregister;
834
835         return 0;
836
837 out_unregister:
838         riscv_pmu_destroy(pmu);
839
840 out_free:
841         kfree(pmu);
842         return ret;
843 }
844
845 static struct platform_driver pmu_sbi_driver = {
846         .probe          = pmu_sbi_device_probe,
847         .driver         = {
848                 .name   = RISCV_PMU_PDEV_NAME,
849         },
850 };
851
852 static int __init pmu_sbi_devinit(void)
853 {
854         int ret;
855         struct platform_device *pdev;
856
857         if (sbi_spec_version < sbi_mk_version(0, 3) ||
858             sbi_probe_extension(SBI_EXT_PMU) <= 0) {
859                 return 0;
860         }
861
862         ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_RISCV_STARTING,
863                                       "perf/riscv/pmu:starting",
864                                       pmu_sbi_starting_cpu, pmu_sbi_dying_cpu);
865         if (ret) {
866                 pr_err("CPU hotplug notifier could not be registered: %d\n",
867                        ret);
868                 return ret;
869         }
870
871         ret = platform_driver_register(&pmu_sbi_driver);
872         if (ret)
873                 return ret;
874
875         pdev = platform_device_register_simple(RISCV_PMU_PDEV_NAME, -1, NULL, 0);
876         if (IS_ERR(pdev)) {
877                 platform_driver_unregister(&pmu_sbi_driver);
878                 return PTR_ERR(pdev);
879         }
880
881         /* Notify legacy implementation that SBI pmu is available*/
882         riscv_pmu_legacy_skip_init();
883
884         return ret;
885 }
886 device_initcall(pmu_sbi_devinit)