Merge tag 'platform-drivers-x86-v5.10-1' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6-microblaze.git] / arch / arm64 / kvm / pmu-emul.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2015 Linaro Ltd.
4  * Author: Shannon Zhao <shannon.zhao@linaro.org>
5  */
6
7 #include <linux/cpu.h>
8 #include <linux/kvm.h>
9 #include <linux/kvm_host.h>
10 #include <linux/perf_event.h>
11 #include <linux/perf/arm_pmu.h>
12 #include <linux/uaccess.h>
13 #include <asm/kvm_emulate.h>
14 #include <kvm/arm_pmu.h>
15 #include <kvm/arm_vgic.h>
16
17 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx);
18 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx);
19 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);
20
21 #define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1
22
23 /**
24  * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter
25  * @vcpu: The vcpu pointer
26  * @select_idx: The counter index
27  */
28 static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx)
29 {
30         return (select_idx == ARMV8_PMU_CYCLE_IDX &&
31                 __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC);
32 }
33
34 static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
35 {
36         struct kvm_pmu *pmu;
37         struct kvm_vcpu_arch *vcpu_arch;
38
39         pmc -= pmc->idx;
40         pmu = container_of(pmc, struct kvm_pmu, pmc[0]);
41         vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu);
42         return container_of(vcpu_arch, struct kvm_vcpu, arch);
43 }
44
45 /**
46  * kvm_pmu_pmc_is_chained - determine if the pmc is chained
47  * @pmc: The PMU counter pointer
48  */
49 static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc)
50 {
51         struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
52
53         return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
54 }
55
56 /**
57  * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter
58  * @select_idx: The counter index
59  */
60 static bool kvm_pmu_idx_is_high_counter(u64 select_idx)
61 {
62         return select_idx & 0x1;
63 }
64
65 /**
66  * kvm_pmu_get_canonical_pmc - obtain the canonical pmc
67  * @pmc: The PMU counter pointer
68  *
69  * When a pair of PMCs are chained together we use the low counter (canonical)
70  * to hold the underlying perf event.
71  */
72 static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc)
73 {
74         if (kvm_pmu_pmc_is_chained(pmc) &&
75             kvm_pmu_idx_is_high_counter(pmc->idx))
76                 return pmc - 1;
77
78         return pmc;
79 }
80 static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc)
81 {
82         if (kvm_pmu_idx_is_high_counter(pmc->idx))
83                 return pmc - 1;
84         else
85                 return pmc + 1;
86 }
87
88 /**
89  * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain
90  * @vcpu: The vcpu pointer
91  * @select_idx: The counter index
92  */
93 static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx)
94 {
95         u64 eventsel, reg;
96
97         select_idx |= 0x1;
98
99         if (select_idx == ARMV8_PMU_CYCLE_IDX)
100                 return false;
101
102         reg = PMEVTYPER0_EL0 + select_idx;
103         eventsel = __vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_EVENT;
104
105         return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN;
106 }
107
108 /**
109  * kvm_pmu_get_pair_counter_value - get PMU counter value
110  * @vcpu: The vcpu pointer
111  * @pmc: The PMU counter pointer
112  */
113 static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu,
114                                           struct kvm_pmc *pmc)
115 {
116         u64 counter, counter_high, reg, enabled, running;
117
118         if (kvm_pmu_pmc_is_chained(pmc)) {
119                 pmc = kvm_pmu_get_canonical_pmc(pmc);
120                 reg = PMEVCNTR0_EL0 + pmc->idx;
121
122                 counter = __vcpu_sys_reg(vcpu, reg);
123                 counter_high = __vcpu_sys_reg(vcpu, reg + 1);
124
125                 counter = lower_32_bits(counter) | (counter_high << 32);
126         } else {
127                 reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
128                       ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
129                 counter = __vcpu_sys_reg(vcpu, reg);
130         }
131
132         /*
133          * The real counter value is equal to the value of counter register plus
134          * the value perf event counts.
135          */
136         if (pmc->perf_event)
137                 counter += perf_event_read_value(pmc->perf_event, &enabled,
138                                                  &running);
139
140         return counter;
141 }
142
143 /**
144  * kvm_pmu_get_counter_value - get PMU counter value
145  * @vcpu: The vcpu pointer
146  * @select_idx: The counter index
147  */
148 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
149 {
150         u64 counter;
151         struct kvm_pmu *pmu = &vcpu->arch.pmu;
152         struct kvm_pmc *pmc = &pmu->pmc[select_idx];
153
154         counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
155
156         if (kvm_pmu_pmc_is_chained(pmc) &&
157             kvm_pmu_idx_is_high_counter(select_idx))
158                 counter = upper_32_bits(counter);
159         else if (select_idx != ARMV8_PMU_CYCLE_IDX)
160                 counter = lower_32_bits(counter);
161
162         return counter;
163 }
164
165 /**
166  * kvm_pmu_set_counter_value - set PMU counter value
167  * @vcpu: The vcpu pointer
168  * @select_idx: The counter index
169  * @val: The counter value
170  */
171 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
172 {
173         u64 reg;
174
175         reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
176               ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
177         __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx);
178
179         /* Recreate the perf event to reflect the updated sample_period */
180         kvm_pmu_create_perf_event(vcpu, select_idx);
181 }
182
183 /**
184  * kvm_pmu_release_perf_event - remove the perf event
185  * @pmc: The PMU counter pointer
186  */
187 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
188 {
189         pmc = kvm_pmu_get_canonical_pmc(pmc);
190         if (pmc->perf_event) {
191                 perf_event_disable(pmc->perf_event);
192                 perf_event_release_kernel(pmc->perf_event);
193                 pmc->perf_event = NULL;
194         }
195 }
196
197 /**
198  * kvm_pmu_stop_counter - stop PMU counter
199  * @pmc: The PMU counter pointer
200  *
201  * If this counter has been configured to monitor some event, release it here.
202  */
203 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
204 {
205         u64 counter, reg, val;
206
207         pmc = kvm_pmu_get_canonical_pmc(pmc);
208         if (!pmc->perf_event)
209                 return;
210
211         counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
212
213         if (pmc->idx == ARMV8_PMU_CYCLE_IDX) {
214                 reg = PMCCNTR_EL0;
215                 val = counter;
216         } else {
217                 reg = PMEVCNTR0_EL0 + pmc->idx;
218                 val = lower_32_bits(counter);
219         }
220
221         __vcpu_sys_reg(vcpu, reg) = val;
222
223         if (kvm_pmu_pmc_is_chained(pmc))
224                 __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter);
225
226         kvm_pmu_release_perf_event(pmc);
227 }
228
229 /**
230  * kvm_pmu_vcpu_init - assign pmu counter idx for cpu
231  * @vcpu: The vcpu pointer
232  *
233  */
234 void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu)
235 {
236         int i;
237         struct kvm_pmu *pmu = &vcpu->arch.pmu;
238
239         for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
240                 pmu->pmc[i].idx = i;
241 }
242
243 /**
244  * kvm_pmu_vcpu_reset - reset pmu state for cpu
245  * @vcpu: The vcpu pointer
246  *
247  */
248 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
249 {
250         unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
251         struct kvm_pmu *pmu = &vcpu->arch.pmu;
252         int i;
253
254         for_each_set_bit(i, &mask, 32)
255                 kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]);
256
257         bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
258 }
259
260 /**
261  * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu
262  * @vcpu: The vcpu pointer
263  *
264  */
265 void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
266 {
267         int i;
268         struct kvm_pmu *pmu = &vcpu->arch.pmu;
269
270         for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
271                 kvm_pmu_release_perf_event(&pmu->pmc[i]);
272         irq_work_sync(&vcpu->arch.pmu.overflow_work);
273 }
274
275 u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
276 {
277         u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT;
278
279         val &= ARMV8_PMU_PMCR_N_MASK;
280         if (val == 0)
281                 return BIT(ARMV8_PMU_CYCLE_IDX);
282         else
283                 return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX);
284 }
285
286 /**
287  * kvm_pmu_enable_counter_mask - enable selected PMU counters
288  * @vcpu: The vcpu pointer
289  * @val: the value guest writes to PMCNTENSET register
290  *
291  * Call perf_event_enable to start counting the perf event
292  */
293 void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
294 {
295         int i;
296         struct kvm_pmu *pmu = &vcpu->arch.pmu;
297         struct kvm_pmc *pmc;
298
299         if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val)
300                 return;
301
302         for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
303                 if (!(val & BIT(i)))
304                         continue;
305
306                 pmc = &pmu->pmc[i];
307
308                 /* A change in the enable state may affect the chain state */
309                 kvm_pmu_update_pmc_chained(vcpu, i);
310                 kvm_pmu_create_perf_event(vcpu, i);
311
312                 /* At this point, pmc must be the canonical */
313                 if (pmc->perf_event) {
314                         perf_event_enable(pmc->perf_event);
315                         if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
316                                 kvm_debug("fail to enable perf event\n");
317                 }
318         }
319 }
320
321 /**
322  * kvm_pmu_disable_counter_mask - disable selected PMU counters
323  * @vcpu: The vcpu pointer
324  * @val: the value guest writes to PMCNTENCLR register
325  *
326  * Call perf_event_disable to stop counting the perf event
327  */
328 void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
329 {
330         int i;
331         struct kvm_pmu *pmu = &vcpu->arch.pmu;
332         struct kvm_pmc *pmc;
333
334         if (!val)
335                 return;
336
337         for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
338                 if (!(val & BIT(i)))
339                         continue;
340
341                 pmc = &pmu->pmc[i];
342
343                 /* A change in the enable state may affect the chain state */
344                 kvm_pmu_update_pmc_chained(vcpu, i);
345                 kvm_pmu_create_perf_event(vcpu, i);
346
347                 /* At this point, pmc must be the canonical */
348                 if (pmc->perf_event)
349                         perf_event_disable(pmc->perf_event);
350         }
351 }
352
353 static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
354 {
355         u64 reg = 0;
356
357         if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) {
358                 reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
359                 reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
360                 reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
361                 reg &= kvm_pmu_valid_counter_mask(vcpu);
362         }
363
364         return reg;
365 }
366
367 static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
368 {
369         struct kvm_pmu *pmu = &vcpu->arch.pmu;
370         bool overflow;
371
372         if (!kvm_arm_pmu_v3_ready(vcpu))
373                 return;
374
375         overflow = !!kvm_pmu_overflow_status(vcpu);
376         if (pmu->irq_level == overflow)
377                 return;
378
379         pmu->irq_level = overflow;
380
381         if (likely(irqchip_in_kernel(vcpu->kvm))) {
382                 int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
383                                               pmu->irq_num, overflow, pmu);
384                 WARN_ON(ret);
385         }
386 }
387
388 bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
389 {
390         struct kvm_pmu *pmu = &vcpu->arch.pmu;
391         struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
392         bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU;
393
394         if (likely(irqchip_in_kernel(vcpu->kvm)))
395                 return false;
396
397         return pmu->irq_level != run_level;
398 }
399
400 /*
401  * Reflect the PMU overflow interrupt output level into the kvm_run structure
402  */
403 void kvm_pmu_update_run(struct kvm_vcpu *vcpu)
404 {
405         struct kvm_sync_regs *regs = &vcpu->run->s.regs;
406
407         /* Populate the timer bitmap for user space */
408         regs->device_irq_level &= ~KVM_ARM_DEV_PMU;
409         if (vcpu->arch.pmu.irq_level)
410                 regs->device_irq_level |= KVM_ARM_DEV_PMU;
411 }
412
413 /**
414  * kvm_pmu_flush_hwstate - flush pmu state to cpu
415  * @vcpu: The vcpu pointer
416  *
417  * Check if the PMU has overflowed while we were running in the host, and inject
418  * an interrupt if that was the case.
419  */
420 void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu)
421 {
422         kvm_pmu_update_state(vcpu);
423 }
424
425 /**
426  * kvm_pmu_sync_hwstate - sync pmu state from cpu
427  * @vcpu: The vcpu pointer
428  *
429  * Check if the PMU has overflowed while we were running in the guest, and
430  * inject an interrupt if that was the case.
431  */
432 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
433 {
434         kvm_pmu_update_state(vcpu);
435 }
436
437 /**
438  * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding
439  * to the event.
440  * This is why we need a callback to do it once outside of the NMI context.
441  */
442 static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work)
443 {
444         struct kvm_vcpu *vcpu;
445         struct kvm_pmu *pmu;
446
447         pmu = container_of(work, struct kvm_pmu, overflow_work);
448         vcpu = kvm_pmc_to_vcpu(pmu->pmc);
449
450         kvm_vcpu_kick(vcpu);
451 }
452
453 /**
454  * When the perf event overflows, set the overflow status and inform the vcpu.
455  */
456 static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
457                                   struct perf_sample_data *data,
458                                   struct pt_regs *regs)
459 {
460         struct kvm_pmc *pmc = perf_event->overflow_handler_context;
461         struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
462         struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
463         int idx = pmc->idx;
464         u64 period;
465
466         cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE);
467
468         /*
469          * Reset the sample period to the architectural limit,
470          * i.e. the point where the counter overflows.
471          */
472         period = -(local64_read(&perf_event->count));
473
474         if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
475                 period &= GENMASK(31, 0);
476
477         local64_set(&perf_event->hw.period_left, 0);
478         perf_event->attr.sample_period = period;
479         perf_event->hw.sample_period = period;
480
481         __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
482
483         if (kvm_pmu_overflow_status(vcpu)) {
484                 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
485
486                 if (!in_nmi())
487                         kvm_vcpu_kick(vcpu);
488                 else
489                         irq_work_queue(&vcpu->arch.pmu.overflow_work);
490         }
491
492         cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD);
493 }
494
495 /**
496  * kvm_pmu_software_increment - do software increment
497  * @vcpu: The vcpu pointer
498  * @val: the value guest writes to PMSWINC register
499  */
500 void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
501 {
502         struct kvm_pmu *pmu = &vcpu->arch.pmu;
503         int i;
504
505         if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
506                 return;
507
508         /* Weed out disabled counters */
509         val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
510
511         for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
512                 u64 type, reg;
513
514                 if (!(val & BIT(i)))
515                         continue;
516
517                 /* PMSWINC only applies to ... SW_INC! */
518                 type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i);
519                 type &= ARMV8_PMU_EVTYPE_EVENT;
520                 if (type != ARMV8_PMUV3_PERFCTR_SW_INCR)
521                         continue;
522
523                 /* increment this even SW_INC counter */
524                 reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
525                 reg = lower_32_bits(reg);
526                 __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
527
528                 if (reg) /* no overflow on the low part */
529                         continue;
530
531                 if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) {
532                         /* increment the high counter */
533                         reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1;
534                         reg = lower_32_bits(reg);
535                         __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg;
536                         if (!reg) /* mark overflow on the high counter */
537                                 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1);
538                 } else {
539                         /* mark overflow on low counter */
540                         __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
541                 }
542         }
543 }
544
545 /**
546  * kvm_pmu_handle_pmcr - handle PMCR register
547  * @vcpu: The vcpu pointer
548  * @val: the value guest writes to PMCR register
549  */
550 void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
551 {
552         unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
553         int i;
554
555         if (val & ARMV8_PMU_PMCR_E) {
556                 kvm_pmu_enable_counter_mask(vcpu,
557                        __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask);
558         } else {
559                 kvm_pmu_disable_counter_mask(vcpu, mask);
560         }
561
562         if (val & ARMV8_PMU_PMCR_C)
563                 kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
564
565         if (val & ARMV8_PMU_PMCR_P) {
566                 for_each_set_bit(i, &mask, 32)
567                         kvm_pmu_set_counter_value(vcpu, i, 0);
568         }
569 }
570
571 static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
572 {
573         return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) &&
574                (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx));
575 }
576
577 /**
578  * kvm_pmu_create_perf_event - create a perf event for a counter
579  * @vcpu: The vcpu pointer
580  * @select_idx: The number of selected counter
581  */
582 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
583 {
584         struct kvm_pmu *pmu = &vcpu->arch.pmu;
585         struct kvm_pmc *pmc;
586         struct perf_event *event;
587         struct perf_event_attr attr;
588         u64 eventsel, counter, reg, data;
589
590         /*
591          * For chained counters the event type and filtering attributes are
592          * obtained from the low/even counter. We also use this counter to
593          * determine if the event is enabled/disabled.
594          */
595         pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]);
596
597         reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
598               ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx;
599         data = __vcpu_sys_reg(vcpu, reg);
600
601         kvm_pmu_stop_counter(vcpu, pmc);
602         eventsel = data & ARMV8_PMU_EVTYPE_EVENT;
603
604         /* Software increment event does't need to be backed by a perf event */
605         if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR &&
606             pmc->idx != ARMV8_PMU_CYCLE_IDX)
607                 return;
608
609         memset(&attr, 0, sizeof(struct perf_event_attr));
610         attr.type = PERF_TYPE_RAW;
611         attr.size = sizeof(attr);
612         attr.pinned = 1;
613         attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx);
614         attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
615         attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
616         attr.exclude_hv = 1; /* Don't count EL2 events */
617         attr.exclude_host = 1; /* Don't count host events */
618         attr.config = (pmc->idx == ARMV8_PMU_CYCLE_IDX) ?
619                 ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel;
620
621         counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
622
623         if (kvm_pmu_pmc_is_chained(pmc)) {
624                 /**
625                  * The initial sample period (overflow count) of an event. For
626                  * chained counters we only support overflow interrupts on the
627                  * high counter.
628                  */
629                 attr.sample_period = (-counter) & GENMASK(63, 0);
630                 attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED;
631
632                 event = perf_event_create_kernel_counter(&attr, -1, current,
633                                                          kvm_pmu_perf_overflow,
634                                                          pmc + 1);
635         } else {
636                 /* The initial sample period (overflow count) of an event. */
637                 if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
638                         attr.sample_period = (-counter) & GENMASK(63, 0);
639                 else
640                         attr.sample_period = (-counter) & GENMASK(31, 0);
641
642                 event = perf_event_create_kernel_counter(&attr, -1, current,
643                                                  kvm_pmu_perf_overflow, pmc);
644         }
645
646         if (IS_ERR(event)) {
647                 pr_err_once("kvm: pmu event creation failed %ld\n",
648                             PTR_ERR(event));
649                 return;
650         }
651
652         pmc->perf_event = event;
653 }
654
655 /**
656  * kvm_pmu_update_pmc_chained - update chained bitmap
657  * @vcpu: The vcpu pointer
658  * @select_idx: The number of selected counter
659  *
660  * Update the chained bitmap based on the event type written in the
661  * typer register and the enable state of the odd register.
662  */
663 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx)
664 {
665         struct kvm_pmu *pmu = &vcpu->arch.pmu;
666         struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc;
667         bool new_state, old_state;
668
669         old_state = kvm_pmu_pmc_is_chained(pmc);
670         new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) &&
671                     kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1);
672
673         if (old_state == new_state)
674                 return;
675
676         canonical_pmc = kvm_pmu_get_canonical_pmc(pmc);
677         kvm_pmu_stop_counter(vcpu, canonical_pmc);
678         if (new_state) {
679                 /*
680                  * During promotion from !chained to chained we must ensure
681                  * the adjacent counter is stopped and its event destroyed
682                  */
683                 kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc));
684                 set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
685                 return;
686         }
687         clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
688 }
689
690 /**
691  * kvm_pmu_set_counter_event_type - set selected counter to monitor some event
692  * @vcpu: The vcpu pointer
693  * @data: The data guest writes to PMXEVTYPER_EL0
694  * @select_idx: The number of selected counter
695  *
696  * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an
697  * event with given hardware event number. Here we call perf_event API to
698  * emulate this action and create a kernel perf event for it.
699  */
700 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
701                                     u64 select_idx)
702 {
703         u64 reg, event_type = data & ARMV8_PMU_EVTYPE_MASK;
704
705         reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
706               ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx;
707
708         __vcpu_sys_reg(vcpu, reg) = event_type;
709
710         kvm_pmu_update_pmc_chained(vcpu, select_idx);
711         kvm_pmu_create_perf_event(vcpu, select_idx);
712 }
713
714 bool kvm_arm_support_pmu_v3(void)
715 {
716         /*
717          * Check if HW_PERF_EVENTS are supported by checking the number of
718          * hardware performance counters. This could ensure the presence of
719          * a physical PMU and CONFIG_PERF_EVENT is selected.
720          */
721         return (perf_num_counters() > 0);
722 }
723
724 int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
725 {
726         if (!vcpu->arch.pmu.created)
727                 return 0;
728
729         /*
730          * A valid interrupt configuration for the PMU is either to have a
731          * properly configured interrupt number and using an in-kernel
732          * irqchip, or to not have an in-kernel GIC and not set an IRQ.
733          */
734         if (irqchip_in_kernel(vcpu->kvm)) {
735                 int irq = vcpu->arch.pmu.irq_num;
736                 if (!kvm_arm_pmu_irq_initialized(vcpu))
737                         return -EINVAL;
738
739                 /*
740                  * If we are using an in-kernel vgic, at this point we know
741                  * the vgic will be initialized, so we can check the PMU irq
742                  * number against the dimensions of the vgic and make sure
743                  * it's valid.
744                  */
745                 if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq))
746                         return -EINVAL;
747         } else if (kvm_arm_pmu_irq_initialized(vcpu)) {
748                    return -EINVAL;
749         }
750
751         kvm_pmu_vcpu_reset(vcpu);
752         vcpu->arch.pmu.ready = true;
753
754         return 0;
755 }
756
757 static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
758 {
759         if (!kvm_arm_support_pmu_v3())
760                 return -ENODEV;
761
762         if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
763                 return -ENXIO;
764
765         if (vcpu->arch.pmu.created)
766                 return -EBUSY;
767
768         if (irqchip_in_kernel(vcpu->kvm)) {
769                 int ret;
770
771                 /*
772                  * If using the PMU with an in-kernel virtual GIC
773                  * implementation, we require the GIC to be already
774                  * initialized when initializing the PMU.
775                  */
776                 if (!vgic_initialized(vcpu->kvm))
777                         return -ENODEV;
778
779                 if (!kvm_arm_pmu_irq_initialized(vcpu))
780                         return -ENXIO;
781
782                 ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num,
783                                          &vcpu->arch.pmu);
784                 if (ret)
785                         return ret;
786         }
787
788         init_irq_work(&vcpu->arch.pmu.overflow_work,
789                       kvm_pmu_perf_overflow_notify_vcpu);
790
791         vcpu->arch.pmu.created = true;
792         return 0;
793 }
794
795 /*
796  * For one VM the interrupt type must be same for each vcpu.
797  * As a PPI, the interrupt number is the same for all vcpus,
798  * while as an SPI it must be a separate number per vcpu.
799  */
800 static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
801 {
802         int i;
803         struct kvm_vcpu *vcpu;
804
805         kvm_for_each_vcpu(i, vcpu, kvm) {
806                 if (!kvm_arm_pmu_irq_initialized(vcpu))
807                         continue;
808
809                 if (irq_is_ppi(irq)) {
810                         if (vcpu->arch.pmu.irq_num != irq)
811                                 return false;
812                 } else {
813                         if (vcpu->arch.pmu.irq_num == irq)
814                                 return false;
815                 }
816         }
817
818         return true;
819 }
820
821 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
822 {
823         switch (attr->attr) {
824         case KVM_ARM_VCPU_PMU_V3_IRQ: {
825                 int __user *uaddr = (int __user *)(long)attr->addr;
826                 int irq;
827
828                 if (!irqchip_in_kernel(vcpu->kvm))
829                         return -EINVAL;
830
831                 if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
832                         return -ENODEV;
833
834                 if (get_user(irq, uaddr))
835                         return -EFAULT;
836
837                 /* The PMU overflow interrupt can be a PPI or a valid SPI. */
838                 if (!(irq_is_ppi(irq) || irq_is_spi(irq)))
839                         return -EINVAL;
840
841                 if (!pmu_irq_is_valid(vcpu->kvm, irq))
842                         return -EINVAL;
843
844                 if (kvm_arm_pmu_irq_initialized(vcpu))
845                         return -EBUSY;
846
847                 kvm_debug("Set kvm ARM PMU irq: %d\n", irq);
848                 vcpu->arch.pmu.irq_num = irq;
849                 return 0;
850         }
851         case KVM_ARM_VCPU_PMU_V3_INIT:
852                 return kvm_arm_pmu_v3_init(vcpu);
853         }
854
855         return -ENXIO;
856 }
857
858 int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
859 {
860         switch (attr->attr) {
861         case KVM_ARM_VCPU_PMU_V3_IRQ: {
862                 int __user *uaddr = (int __user *)(long)attr->addr;
863                 int irq;
864
865                 if (!irqchip_in_kernel(vcpu->kvm))
866                         return -EINVAL;
867
868                 if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
869                         return -ENODEV;
870
871                 if (!kvm_arm_pmu_irq_initialized(vcpu))
872                         return -ENXIO;
873
874                 irq = vcpu->arch.pmu.irq_num;
875                 return put_user(irq, uaddr);
876         }
877         }
878
879         return -ENXIO;
880 }
881
882 int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
883 {
884         switch (attr->attr) {
885         case KVM_ARM_VCPU_PMU_V3_IRQ:
886         case KVM_ARM_VCPU_PMU_V3_INIT:
887                 if (kvm_arm_support_pmu_v3() &&
888                     test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
889                         return 0;
890         }
891
892         return -ENXIO;
893 }