RISC-V: Add sscofpmf extension support
authorAtish Patra <atish.patra@wdc.com>
Sat, 19 Feb 2022 00:46:58 +0000 (16:46 -0800)
committerPalmer Dabbelt <palmer@rivosinc.com>
Mon, 21 Mar 2022 22:01:09 +0000 (15:01 -0700)
The sscofpmf extension allows counter overflow and filtering for
programmable counters. Enable the perf driver to handle the overflow
interrupt. The overflow interrupt is a hart local interrupt.
Thus, per cpu overflow interrupts are setup as a child under the root
INTC irq domain.

Signed-off-by: Atish Patra <atish.patra@wdc.com>
Signed-off-by: Atish Patra <atishp@rivosinc.com>
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
arch/riscv/include/asm/csr.h
arch/riscv/include/asm/hwcap.h
arch/riscv/kernel/cpu.c
arch/riscv/kernel/cpufeature.c
drivers/perf/riscv_pmu_sbi.c
include/linux/perf/riscv_pmu.h

index ce493df..8b2e480 100644 (file)
@@ -65,6 +65,7 @@
 #define IRQ_S_EXT              9
 #define IRQ_VS_EXT             10
 #define IRQ_M_EXT              11
+#define IRQ_PMU_OVF            13
 
 /* Exception causes */
 #define EXC_INST_MISALIGNED    0
 #define CSR_HPMCOUNTER30H      0xc9e
 #define CSR_HPMCOUNTER31H      0xc9f
 
+#define CSR_SSCOUNTOVF         0xda0
+
 #define CSR_SSTATUS            0x100
 #define CSR_SIE                        0x104
 #define CSR_STVEC              0x105
 # define RV_IRQ_SOFT           IRQ_S_SOFT
 # define RV_IRQ_TIMER  IRQ_S_TIMER
 # define RV_IRQ_EXT            IRQ_S_EXT
-#endif /* CONFIG_RISCV_M_MODE */
+# define RV_IRQ_PMU    IRQ_PMU_OVF
+# define SIP_LCOFIP     (_AC(0x1, UL) << IRQ_PMU_OVF)
+
+#endif /* !CONFIG_RISCV_M_MODE */
 
 /* IE/IP (Supervisor/Machine Interrupt Enable/Pending) flags */
 #define IE_SIE         (_AC(0x1, UL) << RV_IRQ_SOFT)
index 691fc9c..0734e42 100644 (file)
@@ -51,6 +51,7 @@ extern unsigned long elf_hwcap;
  * available logical extension id.
  */
 enum riscv_isa_ext_id {
+       RISCV_ISA_EXT_SSCOFPMF = RISCV_ISA_EXT_BASE,
        RISCV_ISA_EXT_ID_MAX = RISCV_ISA_EXT_MAX,
 };
 
index fc115e3..c11cb57 100644 (file)
@@ -87,6 +87,7 @@ int riscv_of_parent_hartid(struct device_node *node)
  *    extensions by an underscore.
  */
 static struct riscv_isa_ext_data isa_ext_arr[] = {
+       __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF),
        __RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX),
 };
 
index a43c08a..1b2d42d 100644 (file)
@@ -190,6 +190,8 @@ void __init riscv_fill_hwcap(void)
                        if (!ext_long) {
                                this_hwcap |= isa2hwcap[(unsigned char)(*ext)];
                                set_bit(*ext - 'a', this_isa);
+                       } else {
+                               SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF);
                        }
 #undef SET_ISA_EXT_MAP
                }
index 815d5c5..a1317a4 100644 (file)
 #include <linux/mod_devicetable.h>
 #include <linux/perf/riscv_pmu.h>
 #include <linux/platform_device.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/of_irq.h>
+#include <linux/of.h>
 
 #include <asm/sbi.h>
+#include <asm/hwcap.h>
 
 union sbi_pmu_ctr_info {
        unsigned long value;
@@ -35,6 +40,7 @@ union sbi_pmu_ctr_info {
  * per_cpu in case of harts with different pmu counters
  */
 static union sbi_pmu_ctr_info *pmu_ctr_list;
+static unsigned int riscv_pmu_irq;
 
 struct sbi_pmu_event_data {
        union {
@@ -469,33 +475,229 @@ static int pmu_sbi_get_ctrinfo(int nctr)
        return 0;
 }
 
+static inline void pmu_sbi_stop_all(struct riscv_pmu *pmu)
+{
+       /**
+        * No need to check the error because we are disabling all the counters
+        * which may include counters that are not enabled yet.
+        */
+       sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP,
+                 0, GENMASK_ULL(pmu->num_counters - 1, 0), 0, 0, 0, 0);
+}
+
+static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu)
+{
+       struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
+
+       /* No need to check the error here as we can't do anything about the error */
+       sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, 0,
+                 cpu_hw_evt->used_hw_ctrs[0], 0, 0, 0, 0);
+}
+
+/**
+ * This function starts all the used counters in two step approach.
+ * Any counter that did not overflow can be start in a single step
+ * while the overflowed counters need to be started with updated initialization
+ * value.
+ */
+static inline void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu,
+                                              unsigned long ctr_ovf_mask)
+{
+       int idx = 0;
+       struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
+       struct perf_event *event;
+       unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE;
+       unsigned long ctr_start_mask = 0;
+       uint64_t max_period;
+       struct hw_perf_event *hwc;
+       u64 init_val = 0;
+
+       ctr_start_mask = cpu_hw_evt->used_hw_ctrs[0] & ~ctr_ovf_mask;
+
+       /* Start all the counters that did not overflow in a single shot */
+       sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, 0, ctr_start_mask,
+                 0, 0, 0, 0);
+
+       /* Reinitialize and start all the counter that overflowed */
+       while (ctr_ovf_mask) {
+               if (ctr_ovf_mask & 0x01) {
+                       event = cpu_hw_evt->events[idx];
+                       hwc = &event->hw;
+                       max_period = riscv_pmu_ctr_get_width_mask(event);
+                       init_val = local64_read(&hwc->prev_count) & max_period;
+                       sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, idx, 1,
+                                 flag, init_val, 0, 0);
+               }
+               ctr_ovf_mask = ctr_ovf_mask >> 1;
+               idx++;
+       }
+}
+
+static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
+{
+       struct perf_sample_data data;
+       struct pt_regs *regs;
+       struct hw_perf_event *hw_evt;
+       union sbi_pmu_ctr_info *info;
+       int lidx, hidx, fidx;
+       struct riscv_pmu *pmu;
+       struct perf_event *event;
+       unsigned long overflow;
+       unsigned long overflowed_ctrs = 0;
+       struct cpu_hw_events *cpu_hw_evt = dev;
+
+       if (WARN_ON_ONCE(!cpu_hw_evt))
+               return IRQ_NONE;
+
+       /* Firmware counter don't support overflow yet */
+       fidx = find_first_bit(cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS);
+       event = cpu_hw_evt->events[fidx];
+       if (!event) {
+               csr_clear(CSR_SIP, SIP_LCOFIP);
+               return IRQ_NONE;
+       }
+
+       pmu = to_riscv_pmu(event->pmu);
+       pmu_sbi_stop_hw_ctrs(pmu);
+
+       /* Overflow status register should only be read after counter are stopped */
+       overflow = csr_read(CSR_SSCOUNTOVF);
+
+       /**
+        * Overflow interrupt pending bit should only be cleared after stopping
+        * all the counters to avoid any race condition.
+        */
+       csr_clear(CSR_SIP, SIP_LCOFIP);
+
+       /* No overflow bit is set */
+       if (!overflow)
+               return IRQ_NONE;
+
+       regs = get_irq_regs();
+
+       for_each_set_bit(lidx, cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS) {
+               struct perf_event *event = cpu_hw_evt->events[lidx];
+
+               /* Skip if invalid event or user did not request a sampling */
+               if (!event || !is_sampling_event(event))
+                       continue;
+
+               info = &pmu_ctr_list[lidx];
+               /* Do a sanity check */
+               if (!info || info->type != SBI_PMU_CTR_TYPE_HW)
+                       continue;
+
+               /* compute hardware counter index */
+               hidx = info->csr - CSR_CYCLE;
+               /* check if the corresponding bit is set in sscountovf */
+               if (!(overflow & (1 << hidx)))
+                       continue;
+
+               /*
+                * Keep a track of overflowed counters so that they can be started
+                * with updated initial value.
+                */
+               overflowed_ctrs |= 1 << lidx;
+               hw_evt = &event->hw;
+               riscv_pmu_event_update(event);
+               perf_sample_data_init(&data, 0, hw_evt->last_period);
+               if (riscv_pmu_event_set_period(event)) {
+                       /*
+                        * Unlike other ISAs, RISC-V don't have to disable interrupts
+                        * to avoid throttling here. As per the specification, the
+                        * interrupt remains disabled until the OF bit is set.
+                        * Interrupts are enabled again only during the start.
+                        * TODO: We will need to stop the guest counters once
+                        * virtualization support is added.
+                        */
+                       perf_event_overflow(event, &data, regs);
+               }
+       }
+       pmu_sbi_start_overflow_mask(pmu, overflowed_ctrs);
+
+       return IRQ_HANDLED;
+}
+
 static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node)
 {
        struct riscv_pmu *pmu = hlist_entry_safe(node, struct riscv_pmu, node);
+       struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
 
        /* Enable the access for TIME csr only from the user mode now */
        csr_write(CSR_SCOUNTEREN, 0x2);
 
        /* Stop all the counters so that they can be enabled from perf */
-       sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP,
-                 0, GENMASK_ULL(pmu->num_counters - 1, 0), 0, 0, 0, 0);
+       pmu_sbi_stop_all(pmu);
+
+       if (riscv_isa_extension_available(NULL, SSCOFPMF)) {
+               cpu_hw_evt->irq = riscv_pmu_irq;
+               csr_clear(CSR_IP, BIT(RV_IRQ_PMU));
+               csr_set(CSR_IE, BIT(RV_IRQ_PMU));
+               enable_percpu_irq(riscv_pmu_irq, IRQ_TYPE_NONE);
+       }
 
        return 0;
 }
 
 static int pmu_sbi_dying_cpu(unsigned int cpu, struct hlist_node *node)
 {
+       if (riscv_isa_extension_available(NULL, SSCOFPMF)) {
+               disable_percpu_irq(riscv_pmu_irq);
+               csr_clear(CSR_IE, BIT(RV_IRQ_PMU));
+       }
+
        /* Disable all counters access for user mode now */
        csr_write(CSR_SCOUNTEREN, 0x0);
 
        return 0;
 }
 
+static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pdev)
+{
+       int ret;
+       struct cpu_hw_events __percpu *hw_events = pmu->hw_events;
+       struct device_node *cpu, *child;
+       struct irq_domain *domain = NULL;
+
+       if (!riscv_isa_extension_available(NULL, SSCOFPMF))
+               return -EOPNOTSUPP;
+
+       for_each_of_cpu_node(cpu) {
+               child = of_get_compatible_child(cpu, "riscv,cpu-intc");
+               if (!child) {
+                       pr_err("Failed to find INTC node\n");
+                       return -ENODEV;
+               }
+               domain = irq_find_host(child);
+               of_node_put(child);
+               if (domain)
+                       break;
+       }
+       if (!domain) {
+               pr_err("Failed to find INTC IRQ root domain\n");
+               return -ENODEV;
+       }
+
+       riscv_pmu_irq = irq_create_mapping(domain, RV_IRQ_PMU);
+       if (!riscv_pmu_irq) {
+               pr_err("Failed to map PMU interrupt for node\n");
+               return -ENODEV;
+       }
+
+       ret = request_percpu_irq(riscv_pmu_irq, pmu_sbi_ovf_handler, "riscv-pmu", hw_events);
+       if (ret) {
+               pr_err("registering percpu irq failed [%d]\n", ret);
+               return ret;
+       }
+
+       return 0;
+}
+
 static int pmu_sbi_device_probe(struct platform_device *pdev)
 {
        struct riscv_pmu *pmu = NULL;
        int num_counters;
-       int ret;
+       int ret = -ENODEV;
 
        pr_info("SBI PMU extension is available\n");
        pmu = riscv_pmu_alloc();
@@ -505,13 +707,19 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
        num_counters = pmu_sbi_find_num_ctrs();
        if (num_counters < 0) {
                pr_err("SBI PMU extension doesn't provide any counters\n");
-               return -ENODEV;
+               goto out_free;
        }
 
        /* cache all the information about counters now */
        if (pmu_sbi_get_ctrinfo(num_counters))
-               return -ENODEV;
+               goto out_free;
 
+       ret = pmu_sbi_setup_irqs(pmu, pdev);
+       if (ret < 0) {
+               pr_info("Perf sampling/filtering is not supported as sscof extension is not available\n");
+               pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+               pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE;
+       }
        pmu->num_counters = num_counters;
        pmu->ctr_start = pmu_sbi_ctr_start;
        pmu->ctr_stop = pmu_sbi_ctr_stop;
@@ -532,6 +740,10 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
        }
 
        return 0;
+
+out_free:
+       kfree(pmu);
+       return ret;
 }
 
 static struct platform_driver pmu_sbi_driver = {
index 0f22694..46f9b6f 100644 (file)
@@ -29,6 +29,8 @@
 struct cpu_hw_events {
        /* currently enabled events */
        int                     n_events;
+       /* Counter overflow interrupt */
+       int             irq;
        /* currently enabled events */
        struct perf_event       *events[RISCV_MAX_COUNTERS];
        /* currently enabled hardware counters */