Merge branches 'for-next/sve-remove-pseudo-regs', 'for-next/backtrace-ipi', 'for...

author Catalin Marinas <catalin.marinas@arm.com>

Thu, 26 Oct 2023 16:09:52 +0000 (17:09 +0100)

committer Catalin Marinas <catalin.marinas@arm.com>

Thu, 26 Oct 2023 16:09:52 +0000 (17:09 +0100)
author Catalin Marinas <catalin.marinas@arm.com>
Thu, 26 Oct 2023 16:09:52 +0000 (17:09 +0100)
committer Catalin Marinas <catalin.marinas@arm.com>
Thu, 26 Oct 2023 16:09:52 +0000 (17:09 +0100)
diff --git a/Documentation/admin-guide/perf/ampere_cspmu.rst b/Documentation/admin-guide/perf/ampere_cspmu.rst

new file mode 100644 (file)

index 0000000..94f93f5
--- /dev/null
+++ b/Documentation/admin-guide/perf/ampere_cspmu.rst
@@ -0,0 +1,29 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================================
+Ampere SoC Performance Monitoring Unit (PMU)
+============================================
+
+Ampere SoC PMU is a generic PMU IP that follows Arm CoreSight PMU architecture.
+Therefore, the driver is implemented as a submodule of arm_cspmu driver. At the
+first phase it's used for counting MCU events on AmpereOne.
+
+
+MCU PMU events
+--------------
+
+The PMU driver supports setting filters for "rank", "bank", and "threshold".
+Note, that the filters are per PMU instance rather than per event.
+
+
+Example for perf tool use::
+
+  / # perf list ampere
+
+    ampere_mcu_pmu_0/act_sent/                         [Kernel PMU event]
+    <...>
+    ampere_mcu_pmu_1/rd_sent/                          [Kernel PMU event]
+    <...>
+
+  / # perf stat -a -e ampere_mcu_pmu_0/act_sent,bank=5,rank=3,threshold=2/,ampere_mcu_pmu_1/rd_sent/ \
+        sleep 1
diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst

index f60be04..a2e6f2c 100644 (file)
--- a/Documentation/admin-guide/perf/index.rst
+++ b/Documentation/admin-guide/perf/index.rst
@@ -22,3 +22,4 @@ Performance monitor support
     nvidia-pmu
     meson-ddr-pmu
     cxl
+   ampere_cspmu
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig

index b10515c..cacc671 100644 (file)
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1355,6 +1355,8 @@ choice
  config CPU_BIG_ENDIAN
         bool "Build big-endian kernel"
         depends on !LD_IS_LLD || LLD_VERSION >= 130000
+       # https://github.com/llvm/llvm-project/commit/1379b150991f70a5782e9a143c2ba5308da1161c
+       depends on AS_IS_GNU || AS_VERSION >= 150000
         help
           Say Y if you plan on running a kernel with a big-endian userspace.
  
diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h

index e749838..f303409 100644 (file)
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -63,12 +63,6 @@ struct cpuinfo_arm64 {
         u64             reg_id_aa64smfr0;
  
         struct cpuinfo_32bit    aarch32;
-
-       /* pseudo-ZCR for recording maximum ZCR_EL1 LEN value: */
-       u64             reg_zcr;
-
-       /* pseudo-SMCR for recording maximum SMCR_EL1 LEN value: */
-       u64             reg_smcr;
  };
  
  DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data);
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h

index 5bba393..19b4d00 100644 (file)
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -23,6 +23,7 @@
  #include <linux/bug.h>
  #include <linux/jump_label.h>
  #include <linux/kernel.h>
+#include <linux/cpumask.h>
  
  /*
   * CPU feature register tracking
@@ -380,6 +381,7 @@ struct arm64_cpu_capabilities {
          * method is robust against being called multiple times.
          */
         const struct arm64_cpu_capabilities *match_list;
+       const struct cpumask *cpus;
  };
  
  static inline int cpucap_default_scope(const struct arm64_cpu_capabilities *cap)
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h

index 5f6f848..818ea1c 100644 (file)
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -85,7 +85,8 @@
  #define ARM_CPU_PART_NEOVERSE_N2       0xD49
  #define ARM_CPU_PART_CORTEX_A78C       0xD4B
  
-#define APM_CPU_PART_POTENZA           0x000
+#define APM_CPU_PART_XGENE             0x000
+#define APM_CPU_VAR_POTENZA            0x00
  
  #define CAVIUM_CPU_PART_THUNDERX       0x0A1
  #define CAVIUM_CPU_PART_THUNDERX_81XX  0x0A2
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h

index 8df46f1..9e5d3a0 100644 (file)
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -128,7 +128,6 @@ extern void sme_kernel_enable(const struct arm64_cpu_capabilities *__unused);
  extern void sme2_kernel_enable(const struct arm64_cpu_capabilities *__unused);
  extern void fa64_kernel_enable(const struct arm64_cpu_capabilities *__unused);
  
-extern u64 read_zcr_features(void);
  extern u64 read_smcr_features(void);
  
  /*
diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h

index cbbcdc3..3129a58 100644 (file)
--- a/arch/arm64/include/asm/lse.h
+++ b/arch/arm64/include/asm/lse.h
@@ -16,14 +16,9 @@
  #include <asm/atomic_lse.h>
  #include <asm/cpucaps.h>
  
-static __always_inline bool system_uses_lse_atomics(void)
-{
-       return alternative_has_cap_likely(ARM64_HAS_LSE_ATOMICS);
-}
-
  #define __lse_ll_sc_body(op, ...)                                      \
  ({                                                                     \
-       system_uses_lse_atomics() ?                                     \
+       alternative_has_cap_likely(ARM64_HAS_LSE_ATOMICS) ?             \
                 __lse_##op(__VA_ARGS__) :                               \
                 __ll_sc_##op(__VA_ARGS__);                              \
  })
@@ -34,8 +29,6 @@ static __always_inline bool system_uses_lse_atomics(void)
  
  #else  /* CONFIG_ARM64_LSE_ATOMICS */
  
-static inline bool system_uses_lse_atomics(void) { return false; }
-
  #define __lse_ll_sc_body(op, ...)              __ll_sc_##op(__VA_ARGS__)
  
  #define ARM64_LSE_ATOMIC_INSN(llsc, lse)       llsc
diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h

index 4cedbaa..91fbd5c 100644 (file)
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -90,7 +90,7 @@ static inline bool try_page_mte_tagging(struct page *page)
  }
  
  void mte_zero_clear_page_tags(void *addr);
-void mte_sync_tags(pte_t pte);
+void mte_sync_tags(pte_t pte, unsigned int nr_pages);
  void mte_copy_page_tags(void *kto, const void *kfrom);
  void mte_thread_init_user(void);
  void mte_thread_switch(struct task_struct *next);
@@ -122,7 +122,7 @@ static inline bool try_page_mte_tagging(struct page *page)
  static inline void mte_zero_clear_page_tags(void *addr)
  {
  }
-static inline void mte_sync_tags(pte_t pte)
+static inline void mte_sync_tags(pte_t pte, unsigned int nr_pages)
  {
  }
  static inline void mte_copy_page_tags(void *kto, const void *kfrom)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h

index 7f7d9b1..b19a8ae 100644 (file)
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -325,8 +325,7 @@ static inline void __check_safe_pte_update(struct mm_struct *mm, pte_t *ptep,
                      __func__, pte_val(old_pte), pte_val(pte));
  }
  
-static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
-                               pte_t *ptep, pte_t pte)
+static inline void __sync_cache_and_tags(pte_t pte, unsigned int nr_pages)
  {
         if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
                 __sync_icache_dcache(pte);
@@ -339,24 +338,22 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
          */
         if (system_supports_mte() && pte_access_permitted(pte, false) &&
             !pte_special(pte) && pte_tagged(pte))
-               mte_sync_tags(pte);
-
-       __check_safe_pte_update(mm, ptep, pte);
-
-       set_pte(ptep, pte);
+               mte_sync_tags(pte, nr_pages);
  }
  
-static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
-                             pte_t *ptep, pte_t pte, unsigned int nr)
+static inline void set_ptes(struct mm_struct *mm,
+                           unsigned long __always_unused addr,
+                           pte_t *ptep, pte_t pte, unsigned int nr)
  {
         page_table_check_ptes_set(mm, ptep, pte, nr);
+       __sync_cache_and_tags(pte, nr);
  
         for (;;) {
-               __set_pte_at(mm, addr, ptep, pte);
+               __check_safe_pte_update(mm, ptep, pte);
+               set_pte(ptep, pte);
                 if (--nr == 0)
                         break;
                 ptep++;
-               addr += PAGE_SIZE;
                 pte_val(pte) += PAGE_SIZE;
         }
  }
@@ -531,18 +528,29 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
  #define pud_pfn(pud)           ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT)
  #define pfn_pud(pfn,prot)      __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
  
+static inline void __set_pte_at(struct mm_struct *mm,
+                               unsigned long __always_unused addr,
+                               pte_t *ptep, pte_t pte, unsigned int nr)
+{
+       __sync_cache_and_tags(pte, nr);
+       __check_safe_pte_update(mm, ptep, pte);
+       set_pte(ptep, pte);
+}
+
  static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
                               pmd_t *pmdp, pmd_t pmd)
  {
         page_table_check_pmd_set(mm, pmdp, pmd);
-       return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd));
+       return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd),
+                                               PMD_SIZE >> PAGE_SHIFT);
  }
  
  static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
                               pud_t *pudp, pud_t pud)
  {
         page_table_check_pud_set(mm, pudp, pud);
-       return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud));
+       return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud),
+                                               PUD_SIZE >> PAGE_SHIFT);
  }
  
  #define __p4d_to_phys(p4d)     __pte_to_phys(p4d_pte(p4d))
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c

index 444a73c..2ccb9df 100644 (file)
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -611,18 +611,6 @@ static const struct arm64_ftr_bits ftr_id_dfr1[] = {
         ARM64_FTR_END,
  };
  
-static const struct arm64_ftr_bits ftr_zcr[] = {
-       ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE,
-               ZCR_ELx_LEN_SHIFT, ZCR_ELx_LEN_WIDTH, 0),       /* LEN */
-       ARM64_FTR_END,
-};
-
-static const struct arm64_ftr_bits ftr_smcr[] = {
-       ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE,
-               SMCR_ELx_LEN_SHIFT, SMCR_ELx_LEN_WIDTH, 0),     /* LEN */
-       ARM64_FTR_END,
-};
-
  /*
   * Common ftr bits for a 32bit register with all hidden, strict
   * attributes, with 4bit feature fields and a default safe value of
@@ -735,10 +723,6 @@ static const struct __ftr_reg_entry {
         ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2),
         ARM64_FTR_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3),
  
-       /* Op1 = 0, CRn = 1, CRm = 2 */
-       ARM64_FTR_REG(SYS_ZCR_EL1, ftr_zcr),
-       ARM64_FTR_REG(SYS_SMCR_EL1, ftr_smcr),
-
         /* Op1 = 1, CRn = 0, CRm = 0 */
         ARM64_FTR_REG(SYS_GMID_EL1, ftr_gmid),
  
@@ -1040,21 +1024,20 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
  
         if (IS_ENABLED(CONFIG_ARM64_SVE) &&
             id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) {
-               info->reg_zcr = read_zcr_features();
-               init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr);
+               sve_kernel_enable(NULL);
                 vec_init_vq_map(ARM64_VEC_SVE);
         }
  
         if (IS_ENABLED(CONFIG_ARM64_SME) &&
             id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1))) {
-               info->reg_smcr = read_smcr_features();
+               sme_kernel_enable(NULL);
+
                 /*
                  * We mask out SMPS since even if the hardware
                  * supports priorities the kernel does not at present
                  * and we block access to them.
                  */
                 info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS;
-               init_cpu_ftr_reg(SYS_SMCR_EL1, info->reg_smcr);
                 vec_init_vq_map(ARM64_VEC_SME);
         }
  
@@ -1289,28 +1272,25 @@ void update_cpu_features(int cpu,
         taint |= check_update_ftr_reg(SYS_ID_AA64SMFR0_EL1, cpu,
                                       info->reg_id_aa64smfr0, boot->reg_id_aa64smfr0);
  
+       /* Probe vector lengths */
         if (IS_ENABLED(CONFIG_ARM64_SVE) &&
             id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) {
-               info->reg_zcr = read_zcr_features();
-               taint |= check_update_ftr_reg(SYS_ZCR_EL1, cpu,
-                                       info->reg_zcr, boot->reg_zcr);
-
-               /* Probe vector lengths */
-               if (!system_capabilities_finalized())
+               if (!system_capabilities_finalized()) {
+                       sve_kernel_enable(NULL);
                         vec_update_vq_map(ARM64_VEC_SVE);
+               }
         }
  
         if (IS_ENABLED(CONFIG_ARM64_SME) &&
             id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1))) {
-               info->reg_smcr = read_smcr_features();
+               sme_kernel_enable(NULL);
+
                 /*
                  * We mask out SMPS since even if the hardware
                  * supports priorities the kernel does not at present
                  * and we block access to them.
                  */
                 info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS;
-               taint |= check_update_ftr_reg(SYS_SMCR_EL1, cpu,
-                                       info->reg_smcr, boot->reg_smcr);
  
                 /* Probe vector lengths */
                 if (!system_capabilities_finalized())
@@ -1848,6 +1828,8 @@ static int __init parse_kpti(char *str)
  early_param("kpti", parse_kpti);
  
  #ifdef CONFIG_ARM64_HW_AFDBM
+static struct cpumask dbm_cpus __read_mostly;
+
  static inline void __cpu_enable_hw_dbm(void)
  {
         u64 tcr = read_sysreg(tcr_el1) | TCR_HD;
@@ -1883,35 +1865,22 @@ static bool cpu_can_use_dbm(const struct arm64_cpu_capabilities *cap)
  
  static void cpu_enable_hw_dbm(struct arm64_cpu_capabilities const *cap)
  {
-       if (cpu_can_use_dbm(cap))
+       if (cpu_can_use_dbm(cap)) {
                 __cpu_enable_hw_dbm();
+               cpumask_set_cpu(smp_processor_id(), &dbm_cpus);
+       }
  }
  
  static bool has_hw_dbm(const struct arm64_cpu_capabilities *cap,
                        int __unused)
  {
-       static bool detected = false;
         /*
          * DBM is a non-conflicting feature. i.e, the kernel can safely
          * run a mix of CPUs with and without the feature. So, we
          * unconditionally enable the capability to allow any late CPU
          * to use the feature. We only enable the control bits on the
-        * CPU, if it actually supports.
-        *
-        * We have to make sure we print the "feature" detection only
-        * when at least one CPU actually uses it. So check if this CPU
-        * can actually use it and print the message exactly once.
-        *
-        * This is safe as all CPUs (including secondary CPUs - due to the
-        * LOCAL_CPU scope - and the hotplugged CPUs - via verification)
-        * goes through the "matches" check exactly once. Also if a CPU
-        * matches the criteria, it is guaranteed that the CPU will turn
-        * the DBM on, as the capability is unconditionally enabled.
+        * CPU, if it is supported.
          */
-       if (!detected && cpu_can_use_dbm(cap)) {
-               detected = true;
-               pr_info("detected: Hardware dirty bit management\n");
-       }
  
         return true;
  }
@@ -1944,8 +1913,6 @@ int get_cpu_with_amu_feat(void)
  static void cpu_amu_enable(struct arm64_cpu_capabilities const *cap)
  {
         if (has_cpuid_feature(cap, SCOPE_LOCAL_CPU)) {
-               pr_info("detected CPU%d: Activity Monitors Unit (AMU)\n",
-                       smp_processor_id());
                 cpumask_set_cpu(smp_processor_id(), &amu_cpus);
  
                 /* 0 reference values signal broken/disabled counters */
@@ -2405,16 +2372,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
  #endif /* CONFIG_ARM64_RAS_EXTN */
  #ifdef CONFIG_ARM64_AMU_EXTN
         {
-               /*
-                * The feature is enabled by default if CONFIG_ARM64_AMU_EXTN=y.
-                * Therefore, don't provide .desc as we don't want the detection
-                * message to be shown until at least one CPU is detected to
-                * support the feature.
-                */
+               .desc = "Activity Monitors Unit (AMU)",
                 .capability = ARM64_HAS_AMU_EXTN,
                 .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
                 .matches = has_amu,
                 .cpu_enable = cpu_amu_enable,
+               .cpus = &amu_cpus,
                 ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, AMU, IMP)
         },
  #endif /* CONFIG_ARM64_AMU_EXTN */
@@ -2454,18 +2417,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
         },
  #ifdef CONFIG_ARM64_HW_AFDBM
         {
-               /*
-                * Since we turn this on always, we don't want the user to
-                * think that the feature is available when it may not be.
-                * So hide the description.
-                *
-                * .desc = "Hardware pagetable Dirty Bit Management",
-                *
-                */
+               .desc = "Hardware dirty bit management",
                 .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
                 .capability = ARM64_HW_DBM,
                 .matches = has_hw_dbm,
                 .cpu_enable = cpu_enable_hw_dbm,
+               .cpus = &dbm_cpus,
                 ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HAFDBS, DBM)
         },
  #endif
@@ -2981,7 +2938,7 @@ static void update_cpu_capabilities(u16 scope_mask)
                     !caps->matches(caps, cpucap_default_scope(caps)))
                         continue;
  
-               if (caps->desc)
+               if (caps->desc && !caps->cpus)
                         pr_info("detected: %s\n", caps->desc);
  
                 __set_bit(caps->capability, system_cpucaps);
@@ -3153,36 +3110,20 @@ static void verify_local_elf_hwcaps(void)
  
  static void verify_sve_features(void)
  {
-       u64 safe_zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);
-       u64 zcr = read_zcr_features();
-
-       unsigned int safe_len = safe_zcr & ZCR_ELx_LEN_MASK;
-       unsigned int len = zcr & ZCR_ELx_LEN_MASK;
-
-       if (len < safe_len || vec_verify_vq_map(ARM64_VEC_SVE)) {
+       if (vec_verify_vq_map(ARM64_VEC_SVE)) {
                 pr_crit("CPU%d: SVE: vector length support mismatch\n",
                         smp_processor_id());
                 cpu_die_early();
         }
-
-       /* Add checks on other ZCR bits here if necessary */
  }
  
  static void verify_sme_features(void)
  {
-       u64 safe_smcr = read_sanitised_ftr_reg(SYS_SMCR_EL1);
-       u64 smcr = read_smcr_features();
-
-       unsigned int safe_len = safe_smcr & SMCR_ELx_LEN_MASK;
-       unsigned int len = smcr & SMCR_ELx_LEN_MASK;
-
-       if (len < safe_len || vec_verify_vq_map(ARM64_VEC_SME)) {
+       if (vec_verify_vq_map(ARM64_VEC_SME)) {
                 pr_crit("CPU%d: SME: vector length support mismatch\n",
                         smp_processor_id());
                 cpu_die_early();
         }
-
-       /* Add checks on other SMCR bits here if necessary */
  }
  
  static void verify_hyp_capabilities(void)
@@ -3330,6 +3271,7 @@ unsigned long cpu_get_elf_hwcap2(void)
  
  static void __init setup_system_capabilities(void)
  {
+       int i;
         /*
          * We have finalised the system-wide safe feature
          * registers, finalise the capabilities that depend
@@ -3338,6 +3280,15 @@ static void __init setup_system_capabilities(void)
          */
         update_cpu_capabilities(SCOPE_SYSTEM);
         enable_cpu_capabilities(SCOPE_ALL & ~SCOPE_BOOT_CPU);
+
+       for (i = 0; i < ARM64_NCAPS; i++) {
+               const struct arm64_cpu_capabilities *caps = cpucap_ptrs[i];
+
+               if (caps && caps->cpus && caps->desc &&
+                       cpumask_any(caps->cpus) < nr_cpu_ids)
+                       pr_info("detected: %s on CPU%*pbl\n",
+                               caps->desc, cpumask_pr_args(caps->cpus));
+       }
  }
  
  void __init setup_cpu_features(void)
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c

index 91e44ac..04c8010 100644 (file)
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -1170,32 +1170,12 @@ void sve_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
         isb();
  }
  
-/*
- * Read the pseudo-ZCR used by cpufeatures to identify the supported SVE
- * vector length.
- *
- * Use only if SVE is present.
- * This function clobbers the SVE vector length.
- */
-u64 read_zcr_features(void)
-{
-       /*
-        * Set the maximum possible VL, and write zeroes to all other
-        * bits to see if they stick.
-        */
-       sve_kernel_enable(NULL);
-       write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1);
-
-       /* Return LEN value that would be written to get the maximum VL */
-       return sve_vq_from_vl(sve_get_vl()) - 1;
-}
-
  void __init sve_setup(void)
  {
         struct vl_info *info = &vl_info[ARM64_VEC_SVE];
-       u64 zcr;
         DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
         unsigned long b;
+       int max_bit;
  
         if (!system_supports_sve())
                 return;
@@ -1208,17 +1188,8 @@ void __init sve_setup(void)
         if (WARN_ON(!test_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map)))
                 set_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map);
  
-       zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);
-       info->max_vl = sve_vl_from_vq((zcr & ZCR_ELx_LEN_MASK) + 1);
-
-       /*
-        * Sanity-check that the max VL we determined through CPU features
-        * corresponds properly to sve_vq_map.  If not, do our best:
-        */
-       if (WARN_ON(info->max_vl != find_supported_vector_length(ARM64_VEC_SVE,
-                                                                info->max_vl)))
-               info->max_vl = find_supported_vector_length(ARM64_VEC_SVE,
-                                                           info->max_vl);
+       max_bit = find_first_bit(info->vq_map, SVE_VQ_MAX);
+       info->max_vl = sve_vl_from_vq(__bit_to_vq(max_bit));
  
         /*
          * For the default VL, pick the maximum supported value <= 64.
@@ -1333,32 +1304,10 @@ void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
                        SYS_SMCR_EL1);
  }
  
-/*
- * Read the pseudo-SMCR used by cpufeatures to identify the supported
- * vector length.
- *
- * Use only if SME is present.
- * This function clobbers the SME vector length.
- */
-u64 read_smcr_features(void)
-{
-       sme_kernel_enable(NULL);
-
-       /*
-        * Set the maximum possible VL.
-        */
-       write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_LEN_MASK,
-                      SYS_SMCR_EL1);
-
-       /* Return LEN value that would be written to get the maximum VL */
-       return sve_vq_from_vl(sme_get_vl()) - 1;
-}
-
  void __init sme_setup(void)
  {
         struct vl_info *info = &vl_info[ARM64_VEC_SME];
-       u64 smcr;
-       int min_bit;
+       int min_bit, max_bit;
  
         if (!system_supports_sme())
                 return;
@@ -1367,24 +1316,16 @@ void __init sme_setup(void)
          * SME doesn't require any particular vector length be
          * supported but it does require at least one.  We should have
          * disabled the feature entirely while bringing up CPUs but
-        * let's double check here.
+        * let's double check here.  The bitmap is SVE_VQ_MAP sized for
+        * sharing with SVE.
          */
         WARN_ON(bitmap_empty(info->vq_map, SVE_VQ_MAX));
  
         min_bit = find_last_bit(info->vq_map, SVE_VQ_MAX);
         info->min_vl = sve_vl_from_vq(__bit_to_vq(min_bit));
  
-       smcr = read_sanitised_ftr_reg(SYS_SMCR_EL1);
-       info->max_vl = sve_vl_from_vq((smcr & SMCR_ELx_LEN_MASK) + 1);
-
-       /*
-        * Sanity-check that the max VL we determined through CPU features
-        * corresponds properly to sme_vq_map.  If not, do our best:
-        */
-       if (WARN_ON(info->max_vl != find_supported_vector_length(ARM64_VEC_SME,
-                                                                info->max_vl)))
-               info->max_vl = find_supported_vector_length(ARM64_VEC_SME,
-                                                           info->max_vl);
+       max_bit = find_first_bit(info->vq_map, SVE_VQ_MAX);
+       info->max_vl = sve_vl_from_vq(__bit_to_vq(max_bit));
  
         WARN_ON(info->min_vl > info->max_vl);
  
diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c

index bd69a4e..79200f2 100644 (file)
--- a/arch/arm64/kernel/module-plts.c
+++ b/arch/arm64/kernel/module-plts.c
@@ -167,9 +167,6 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
                 switch (ELF64_R_TYPE(rela[i].r_info)) {
                 case R_AARCH64_JUMP26:
                 case R_AARCH64_CALL26:
-                       if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
-                               break;
-
                         /*
                          * We only have to consider branch targets that resolve
                          * to symbols that are defined in a different section.
@@ -269,9 +266,6 @@ static int partition_branch_plt_relas(Elf64_Sym *syms, Elf64_Rela *rela,
  {
         int i = 0, j = numrels - 1;
  
-       if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
-               return 0;
-
         while (i < j) {
                 if (branch_rela_needs_plt(syms, &rela[i], dstidx))
                         i++;
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c

index 4edecaa..2fb5e7a 100644 (file)
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -35,10 +35,10 @@ DEFINE_STATIC_KEY_FALSE(mte_async_or_asymm_mode);
  EXPORT_SYMBOL_GPL(mte_async_or_asymm_mode);
  #endif
  
-void mte_sync_tags(pte_t pte)
+void mte_sync_tags(pte_t pte, unsigned int nr_pages)
  {
         struct page *page = pte_page(pte);
-       long i, nr_pages = compound_nr(page);
+       unsigned int i;
  
         /* if PG_mte_tagged is set, tags have already been initialised */
         for (i = 0; i < nr_pages; i++, page++) {
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c

index 16ead57..af876a4 100644 (file)
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -528,7 +528,7 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
  {
         u64 hwid = processor->arm_mpidr;
  
-       if (!(processor->flags & ACPI_MADT_ENABLED)) {
+       if (!acpi_gicc_is_usable(processor)) {
                 pr_debug("skipping disabled CPU entry with 0x%llx MPIDR\n", hwid);
                 return;
         }
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c

index 95f6945..a1710e5 100644 (file)
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -874,7 +874,7 @@ u32 __attribute_const__ kvm_target_cpu(void)
                 break;
         case ARM_CPU_IMP_APM:
                 switch (part_number) {
-               case APM_CPU_PART_POTENZA:
+               case APM_CPU_PART_XGENE:
                         return KVM_ARM_TARGET_XGENE_POTENZA;
                 }
                 break;
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c

index 8a0f860..8deec68 100644 (file)
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -16,6 +16,7 @@
  #include <linux/nodemask.h>
  #include <linux/initrd.h>
  #include <linux/gfp.h>
+#include <linux/math.h>
  #include <linux/memblock.h>
  #include <linux/sort.h>
  #include <linux/of.h>
@@ -493,8 +494,16 @@ void __init mem_init(void)
  {
         bool swiotlb = max_pfn > PFN_DOWN(arm64_dma_phys_limit);
  
-       if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC))
+       if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) && !swiotlb) {
+               /*
+                * If no bouncing needed for ZONE_DMA, reduce the swiotlb
+                * buffer for kmalloc() bouncing to 1MB per 1GB of RAM.
+                */
+               unsigned long size =
+                       DIV_ROUND_UP(memblock_phys_mem_size(), 1024);
+               swiotlb_adjust_size(min(swiotlb_size_or_default(), size));
                 swiotlb = true;
+       }
  
         swiotlb_init(swiotlb, SWIOTLB_VERBOSE);
  
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c

index 7dd6dba..b203cfe 100644 (file)
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -90,7 +90,7 @@ static int map_gicc_mpidr(struct acpi_subtable_header *entry,
         struct acpi_madt_generic_interrupt *gicc =
             container_of(entry, struct acpi_madt_generic_interrupt, header);
  
-       if (!(gicc->flags & ACPI_MADT_ENABLED))
+       if (!acpi_gicc_is_usable(gicc))
                 return -ENODEV;
  
         /* device_declaration means Device object in DSDT, in the
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c

index 7dd2c61..071b04f 100644 (file)
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -836,8 +836,9 @@ static u64 __arch_timer_check_delta(void)
                  * Note that TVAL is signed, thus has only 31 of its
                  * 32 bits to express magnitude.
                  */
-               MIDR_ALL_VERSIONS(MIDR_CPU_MODEL(ARM_CPU_IMP_APM,
-                                                APM_CPU_PART_POTENZA)),
+               MIDR_REV_RANGE(MIDR_CPU_MODEL(ARM_CPU_IMP_APM,
+                                             APM_CPU_PART_XGENE),
+                              APM_CPU_VAR_POTENZA, 0x0, 0xf),
                 {},
         };
  
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c

index 737da1b..580f155 100644 (file)
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -2390,8 +2390,7 @@ gic_acpi_parse_madt_gicc(union acpi_subtable_headers *header,
         u32 size = reg == GIC_PIDR2_ARCH_GICv4 ? SZ_64K * 4 : SZ_64K * 2;
         void __iomem *redist_base;
  
-       /* GICC entry which has !ACPI_MADT_ENABLED is not unusable so skip */
-       if (!(gicc->flags & ACPI_MADT_ENABLED))
+       if (!acpi_gicc_is_usable(gicc))
                 return 0;
  
         redist_base = ioremap(gicc->gicr_base_address, size);
@@ -2441,7 +2440,7 @@ static int __init gic_acpi_match_gicc(union acpi_subtable_headers *header,
          * If GICC is enabled and has valid gicr base address, then it means
          * GICR base is presented via GICC
          */
-       if ((gicc->flags & ACPI_MADT_ENABLED) && gicc->gicr_base_address) {
+       if (acpi_gicc_is_usable(gicc) && gicc->gicr_base_address) {
                 acpi_data.enabled_rdists++;
                 return 0;
         }
@@ -2450,7 +2449,7 @@ static int __init gic_acpi_match_gicc(union acpi_subtable_headers *header,
          * It's perfectly valid firmware can pass disabled GICC entry, driver
          * should not treat as errors, skip the entry instead of probe fail.
          */
-       if (!(gicc->flags & ACPI_MADT_ENABLED))
+       if (!acpi_gicc_is_usable(gicc))
                 return 0;
  
         return -ENODEV;
@@ -2509,8 +2508,7 @@ static int __init gic_acpi_parse_virt_madt_gicc(union acpi_subtable_headers *hea
         int maint_irq_mode;
         static int first_madt = true;
  
-       /* Skip unusable CPUs */
-       if (!(gicc->flags & ACPI_MADT_ENABLED))
+       if (!acpi_gicc_is_usable(gicc))
                 return 0;
  
         maint_irq_mode = (gicc->flags & ACPI_MADT_VGIC_IRQ_MODE) ?
diff --git a/drivers/perf/amlogic/meson_g12_ddr_pmu.c b/drivers/perf/amlogic/meson_g12_ddr_pmu.c

index 8b64388..15d52ab 100644 (file)
--- a/drivers/perf/amlogic/meson_g12_ddr_pmu.c
+++ b/drivers/perf/amlogic/meson_g12_ddr_pmu.c
@@ -377,6 +377,7 @@ static const struct of_device_id meson_ddr_pmu_dt_match[] = {
         },
         {}
  };
+MODULE_DEVICE_TABLE(of, meson_ddr_pmu_dt_match);
  
  static struct platform_driver g12_ddr_pmu_driver = {
         .probe = g12_ddr_pmu_probe,
diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c

index 913dc04..9479e91 100644 (file)
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -112,7 +112,9 @@
  
  #define CMN_DTM_PMEVCNTSR              0x240
  
-#define CMN_DTM_UNIT_INFO              0x0910
+#define CMN650_DTM_UNIT_INFO           0x0910
+#define CMN_DTM_UNIT_INFO              0x0960
+#define CMN_DTM_UNIT_INFO_DTC_DOMAIN   GENMASK_ULL(1, 0)
  
  #define CMN_DTM_NUM_COUNTERS           4
  /* Want more local counters? Why not replicate the whole DTM! Ugh... */
@@ -279,16 +281,13 @@ struct arm_cmn_node {
         u16 id, logid;
         enum cmn_node_type type;
  
-       int dtm;
-       union {
-               /* DN/HN-F/CXHA */
-               struct {
-                       u8 val : 4;
-                       u8 count : 4;
-               } occupid[SEL_MAX];
-               /* XP */
-               u8 dtc;
-       };
+       u8 dtm;
+       s8 dtc;
+       /* DN/HN-F/CXHA */
+       struct {
+               u8 val : 4;
+               u8 count : 4;
+       } occupid[SEL_MAX];
         union {
                 u8 event[4];
                 __le32 event_sel;
@@ -538,12 +537,12 @@ static int arm_cmn_map_show(struct seq_file *s, void *data)
  
                 seq_puts(s, "\n     |");
                 for (x = 0; x < cmn->mesh_x; x++) {
-                       u8 dtc = cmn->xps[xp_base + x].dtc;
+                       s8 dtc = cmn->xps[xp_base + x].dtc;
  
-                       if (dtc & (dtc - 1))
+                       if (dtc < 0)
                                 seq_puts(s, " DTC ?? |");
                         else
-                               seq_printf(s, " DTC %ld  |", __ffs(dtc));
+                               seq_printf(s, " DTC %d  |", dtc);
                 }
                 seq_puts(s, "\n     |");
                 for (x = 0; x < cmn->mesh_x; x++)
@@ -587,8 +586,7 @@ static void arm_cmn_debugfs_init(struct arm_cmn *cmn, int id) {}
  struct arm_cmn_hw_event {
         struct arm_cmn_node *dn;
         u64 dtm_idx[4];
-       unsigned int dtc_idx;
-       u8 dtcs_used;
+       s8 dtc_idx[CMN_MAX_DTCS];
         u8 num_dns;
         u8 dtm_offset;
         bool wide_sel;
@@ -598,6 +596,10 @@ struct arm_cmn_hw_event {
  #define for_each_hw_dn(hw, dn, i) \
         for (i = 0, dn = hw->dn; i < hw->num_dns; i++, dn++)
  
+/* @i is the DTC number, @idx is the counter index on that DTC */
+#define for_each_hw_dtc_idx(hw, i, idx) \
+       for (int i = 0, idx; i < CMN_MAX_DTCS; i++) if ((idx = hw->dtc_idx[i]) >= 0)
+
  static struct arm_cmn_hw_event *to_cmn_hw(struct perf_event *event)
  {
         BUILD_BUG_ON(sizeof(struct arm_cmn_hw_event) > offsetof(struct hw_perf_event, target));
@@ -1427,12 +1429,11 @@ static void arm_cmn_init_counter(struct perf_event *event)
  {
         struct arm_cmn *cmn = to_cmn(event->pmu);
         struct arm_cmn_hw_event *hw = to_cmn_hw(event);
-       unsigned int i, pmevcnt = CMN_DT_PMEVCNT(hw->dtc_idx);
         u64 count;
  
-       for (i = 0; hw->dtcs_used & (1U << i); i++) {
-               writel_relaxed(CMN_COUNTER_INIT, cmn->dtc[i].base + pmevcnt);
-               cmn->dtc[i].counters[hw->dtc_idx] = event;
+       for_each_hw_dtc_idx(hw, i, idx) {
+               writel_relaxed(CMN_COUNTER_INIT, cmn->dtc[i].base + CMN_DT_PMEVCNT(idx));
+               cmn->dtc[i].counters[idx] = event;
         }
  
         count = arm_cmn_read_dtm(cmn, hw, false);
@@ -1445,11 +1446,9 @@ static void arm_cmn_event_read(struct perf_event *event)
         struct arm_cmn_hw_event *hw = to_cmn_hw(event);
         u64 delta, new, prev;
         unsigned long flags;
-       unsigned int i;
  
-       if (hw->dtc_idx == CMN_DT_NUM_COUNTERS) {
-               i = __ffs(hw->dtcs_used);
-               delta = arm_cmn_read_cc(cmn->dtc + i);
+       if (CMN_EVENT_TYPE(event) == CMN_TYPE_DTC) {
+               delta = arm_cmn_read_cc(cmn->dtc + hw->dtc_idx[0]);
                 local64_add(delta, &event->count);
                 return;
         }
@@ -1459,8 +1458,8 @@ static void arm_cmn_event_read(struct perf_event *event)
         delta = new - prev;
  
         local_irq_save(flags);
-       for (i = 0; hw->dtcs_used & (1U << i); i++) {
-               new = arm_cmn_read_counter(cmn->dtc + i, hw->dtc_idx);
+       for_each_hw_dtc_idx(hw, i, idx) {
+               new = arm_cmn_read_counter(cmn->dtc + i, idx);
                 delta += new << 16;
         }
         local_irq_restore(flags);
@@ -1516,7 +1515,7 @@ static void arm_cmn_event_start(struct perf_event *event, int flags)
         int i;
  
         if (type == CMN_TYPE_DTC) {
-               i = __ffs(hw->dtcs_used);
+               i = hw->dtc_idx[0];
                 writeq_relaxed(CMN_CC_INIT, cmn->dtc[i].base + CMN_DT_PMCCNTR);
                 cmn->dtc[i].cc_active = true;
         } else if (type == CMN_TYPE_WP) {
@@ -1547,7 +1546,7 @@ static void arm_cmn_event_stop(struct perf_event *event, int flags)
         int i;
  
         if (type == CMN_TYPE_DTC) {
-               i = __ffs(hw->dtcs_used);
+               i = hw->dtc_idx[0];
                 cmn->dtc[i].cc_active = false;
         } else if (type == CMN_TYPE_WP) {
                 int wp_idx = arm_cmn_wp_idx(event);
@@ -1571,7 +1570,7 @@ struct arm_cmn_val {
         u8 dtm_count[CMN_MAX_DTMS];
         u8 occupid[CMN_MAX_DTMS][SEL_MAX];
         u8 wp[CMN_MAX_DTMS][4];
-       int dtc_count;
+       int dtc_count[CMN_MAX_DTCS];
         bool cycles;
  };
  
@@ -1592,7 +1591,8 @@ static void arm_cmn_val_add_event(struct arm_cmn *cmn, struct arm_cmn_val *val,
                 return;
         }
  
-       val->dtc_count++;
+       for_each_hw_dtc_idx(hw, dtc, idx)
+               val->dtc_count[dtc]++;
  
         for_each_hw_dn(hw, dn, i) {
                 int wp_idx, dtm = dn->dtm, sel = hw->filter_sel;
@@ -1639,8 +1639,9 @@ static int arm_cmn_validate_group(struct arm_cmn *cmn, struct perf_event *event)
                 goto done;
         }
  
-       if (val->dtc_count == CMN_DT_NUM_COUNTERS)
-               goto done;
+       for (i = 0; i < CMN_MAX_DTCS; i++)
+               if (val->dtc_count[i] == CMN_DT_NUM_COUNTERS)
+                       goto done;
  
         for_each_hw_dn(hw, dn, i) {
                 int wp_idx, wp_cmb, dtm = dn->dtm, sel = hw->filter_sel;
@@ -1733,12 +1734,19 @@ static int arm_cmn_event_init(struct perf_event *event)
         hw->dn = arm_cmn_node(cmn, type);
         if (!hw->dn)
                 return -EINVAL;
+
+       memset(hw->dtc_idx, -1, sizeof(hw->dtc_idx));
         for (dn = hw->dn; dn->type == type; dn++) {
                 if (bynodeid && dn->id != nodeid) {
                         hw->dn++;
                         continue;
                 }
                 hw->num_dns++;
+               if (dn->dtc < 0)
+                       memset(hw->dtc_idx, 0, cmn->num_dtcs);
+               else
+                       hw->dtc_idx[dn->dtc] = 0;
+
                 if (bynodeid)
                         break;
         }
@@ -1750,12 +1758,6 @@ static int arm_cmn_event_init(struct perf_event *event)
                         nodeid, nid.x, nid.y, nid.port, nid.dev, type);
                 return -EINVAL;
         }
-       /*
-        * Keep assuming non-cycles events count in all DTC domains; turns out
-        * it's hard to make a worthwhile optimisation around this, short of
-        * going all-in with domain-local counter allocation as well.
-        */
-       hw->dtcs_used = (1U << cmn->num_dtcs) - 1;
  
         return arm_cmn_validate_group(cmn, event);
  }
@@ -1781,46 +1783,48 @@ static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event,
         }
         memset(hw->dtm_idx, 0, sizeof(hw->dtm_idx));
  
-       for (i = 0; hw->dtcs_used & (1U << i); i++)
-               cmn->dtc[i].counters[hw->dtc_idx] = NULL;
+       for_each_hw_dtc_idx(hw, j, idx)
+               cmn->dtc[j].counters[idx] = NULL;
  }
  
  static int arm_cmn_event_add(struct perf_event *event, int flags)
  {
         struct arm_cmn *cmn = to_cmn(event->pmu);
         struct arm_cmn_hw_event *hw = to_cmn_hw(event);
-       struct arm_cmn_dtc *dtc = &cmn->dtc[0];
         struct arm_cmn_node *dn;
         enum cmn_node_type type = CMN_EVENT_TYPE(event);
-       unsigned int i, dtc_idx, input_sel;
+       unsigned int input_sel, i = 0;
  
         if (type == CMN_TYPE_DTC) {
-               i = 0;
                 while (cmn->dtc[i].cycles)
                         if (++i == cmn->num_dtcs)
                                 return -ENOSPC;
  
                 cmn->dtc[i].cycles = event;
-               hw->dtc_idx = CMN_DT_NUM_COUNTERS;
-               hw->dtcs_used = 1U << i;
+               hw->dtc_idx[0] = i;
  
                 if (flags & PERF_EF_START)
                         arm_cmn_event_start(event, 0);
                 return 0;
         }
  
-       /* Grab a free global counter first... */
-       dtc_idx = 0;
-       while (dtc->counters[dtc_idx])
-               if (++dtc_idx == CMN_DT_NUM_COUNTERS)
-                       return -ENOSPC;
-
-       hw->dtc_idx = dtc_idx;
+       /* Grab the global counters first... */
+       for_each_hw_dtc_idx(hw, j, idx) {
+               if (cmn->part == PART_CMN600 && j > 0) {
+                       idx = hw->dtc_idx[0];
+               } else {
+                       idx = 0;
+                       while (cmn->dtc[j].counters[idx])
+                               if (++idx == CMN_DT_NUM_COUNTERS)
+                                       goto free_dtms;
+               }
+               hw->dtc_idx[j] = idx;
+       }
  
-       /* ...then the local counters to feed it. */
+       /* ...then the local counters to feed them */
         for_each_hw_dn(hw, dn, i) {
                 struct arm_cmn_dtm *dtm = &cmn->dtms[dn->dtm] + hw->dtm_offset;
-               unsigned int dtm_idx, shift;
+               unsigned int dtm_idx, shift, d = max_t(int, dn->dtc, 0);
                 u64 reg;
  
                 dtm_idx = 0;
@@ -1839,11 +1843,11 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
  
                         tmp = dtm->wp_event[wp_idx ^ 1];
                         if (tmp >= 0 && CMN_EVENT_WP_COMBINE(event) !=
-                                       CMN_EVENT_WP_COMBINE(dtc->counters[tmp]))
+                                       CMN_EVENT_WP_COMBINE(cmn->dtc[d].counters[tmp]))
                                 goto free_dtms;
  
                         input_sel = CMN__PMEVCNT0_INPUT_SEL_WP + wp_idx;
-                       dtm->wp_event[wp_idx] = dtc_idx;
+                       dtm->wp_event[wp_idx] = hw->dtc_idx[d];
                         writel_relaxed(cfg, dtm->base + CMN_DTM_WPn_CONFIG(wp_idx));
                 } else {
                         struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
@@ -1863,7 +1867,7 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
                 dtm->input_sel[dtm_idx] = input_sel;
                 shift = CMN__PMEVCNTn_GLOBAL_NUM_SHIFT(dtm_idx);
                 dtm->pmu_config_low &= ~(CMN__PMEVCNT0_GLOBAL_NUM << shift);
-               dtm->pmu_config_low |= FIELD_PREP(CMN__PMEVCNT0_GLOBAL_NUM, dtc_idx) << shift;
+               dtm->pmu_config_low |= FIELD_PREP(CMN__PMEVCNT0_GLOBAL_NUM, hw->dtc_idx[d]) << shift;
                 dtm->pmu_config_low |= CMN__PMEVCNT_PAIRED(dtm_idx);
                 reg = (u64)le32_to_cpu(dtm->pmu_config_high) << 32 | dtm->pmu_config_low;
                 writeq_relaxed(reg, dtm->base + CMN_DTM_PMU_CONFIG);
@@ -1891,7 +1895,7 @@ static void arm_cmn_event_del(struct perf_event *event, int flags)
         arm_cmn_event_stop(event, PERF_EF_UPDATE);
  
         if (type == CMN_TYPE_DTC)
-               cmn->dtc[__ffs(hw->dtcs_used)].cycles = NULL;
+               cmn->dtc[hw->dtc_idx[0]].cycles = NULL;
         else
                 arm_cmn_event_clear(cmn, event, hw->num_dns);
  }
@@ -2072,7 +2076,6 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn)
  {
         struct arm_cmn_node *dn, *xp;
         int dtc_idx = 0;
-       u8 dtcs_present = (1 << cmn->num_dtcs) - 1;
  
         cmn->dtc = devm_kcalloc(cmn->dev, cmn->num_dtcs, sizeof(cmn->dtc[0]), GFP_KERNEL);
         if (!cmn->dtc)
@@ -2082,23 +2085,26 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn)
  
         cmn->xps = arm_cmn_node(cmn, CMN_TYPE_XP);
  
+       if (cmn->part == PART_CMN600 && cmn->num_dtcs > 1) {
+               /* We do at least know that a DTC's XP must be in that DTC's domain */
+               dn = arm_cmn_node(cmn, CMN_TYPE_DTC);
+               for (int i = 0; i < cmn->num_dtcs; i++)
+                       arm_cmn_node_to_xp(cmn, dn + i)->dtc = i;
+       }
+
         for (dn = cmn->dns; dn->type; dn++) {
-               if (dn->type == CMN_TYPE_XP) {
-                       dn->dtc &= dtcs_present;
+               if (dn->type == CMN_TYPE_XP)
                         continue;
-               }
  
                 xp = arm_cmn_node_to_xp(cmn, dn);
+               dn->dtc = xp->dtc;
                 dn->dtm = xp->dtm;
                 if (cmn->multi_dtm)
                         dn->dtm += arm_cmn_nid(cmn, dn->id).port / 2;
  
                 if (dn->type == CMN_TYPE_DTC) {
-                       int err;
-                       /* We do at least know that a DTC's XP must be in that DTC's domain */
-                       if (xp->dtc == 0xf)
-                               xp->dtc = 1 << dtc_idx;
-                       err = arm_cmn_init_dtc(cmn, dn, dtc_idx++);
+                       int err = arm_cmn_init_dtc(cmn, dn, dtc_idx++);
+
                         if (err)
                                 return err;
                 }
@@ -2117,6 +2123,16 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn)
         return 0;
  }
  
+static unsigned int arm_cmn_dtc_domain(struct arm_cmn *cmn, void __iomem *xp_region)
+{
+       int offset = CMN_DTM_UNIT_INFO;
+
+       if (cmn->part == PART_CMN650 || cmn->part == PART_CI700)
+               offset = CMN650_DTM_UNIT_INFO;
+
+       return FIELD_GET(CMN_DTM_UNIT_INFO_DTC_DOMAIN, readl_relaxed(xp_region + offset));
+}
+
  static void arm_cmn_init_node_info(struct arm_cmn *cmn, u32 offset, struct arm_cmn_node *node)
  {
         int level;
@@ -2246,9 +2262,9 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
                         cmn->mesh_x = xp->logid;
  
                 if (cmn->part == PART_CMN600)
-                       xp->dtc = 0xf;
+                       xp->dtc = -1;
                 else
-                       xp->dtc = 1 << readl_relaxed(xp_region + CMN_DTM_UNIT_INFO);
+                       xp->dtc = arm_cmn_dtc_domain(cmn, xp_region);
  
                 xp->dtm = dtm - cmn->dtms;
                 arm_cmn_init_dtm(dtm++, xp, 0);
diff --git a/drivers/perf/arm_cspmu/Kconfig b/drivers/perf/arm_cspmu/Kconfig

index 25d25de..6f4e28f 100644 (file)
--- a/drivers/perf/arm_cspmu/Kconfig
+++ b/drivers/perf/arm_cspmu/Kconfig
@@ -1,6 +1,6 @@
  # SPDX-License-Identifier: GPL-2.0
  #
-# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  
  config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
         tristate "ARM Coresight Architecture PMU"
@@ -10,3 +10,20 @@ config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
           based on ARM CoreSight PMU architecture. Note that this PMU
           architecture does not have relationship with the ARM CoreSight
           Self-Hosted Tracing.
+
+config NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU
+       tristate "NVIDIA Coresight Architecture PMU"
+       depends on ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
+       help
+         Provides NVIDIA specific attributes for performance monitoring unit
+         (PMU) devices based on ARM CoreSight PMU architecture.
+
+config AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU
+       tristate "Ampere Coresight Architecture PMU"
+       depends on  ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
+       help
+         Provides Ampere specific attributes for performance monitoring unit
+         (PMU) devices based on ARM CoreSight PMU architecture.
+
+         In the first phase, the driver enables support on MCU PMU used in
+         AmpereOne SoC family.
diff --git a/drivers/perf/arm_cspmu/Makefile b/drivers/perf/arm_cspmu/Makefile

index fedb17d..220a734 100644 (file)
--- a/drivers/perf/arm_cspmu/Makefile
+++ b/drivers/perf/arm_cspmu/Makefile
@@ -1,6 +1,10 @@
-# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  #
  # SPDX-License-Identifier: GPL-2.0
  
  obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu_module.o
-arm_cspmu_module-y := arm_cspmu.o nvidia_cspmu.o
+
+arm_cspmu_module-y := arm_cspmu.o
+
+obj-$(CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += nvidia_cspmu.o
+obj-$(CONFIG_AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += ampere_cspmu.o
diff --git a/drivers/perf/arm_cspmu/ampere_cspmu.c b/drivers/perf/arm_cspmu/ampere_cspmu.c

new file mode 100644 (file)

index 0000000..f146a45
--- /dev/null
+++ b/drivers/perf/arm_cspmu/ampere_cspmu.c
@@ -0,0 +1,272 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Ampere SoC PMU (Performance Monitor Unit)
+ *
+ * Copyright (c) 2023, Ampere Computing LLC
+ */
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/topology.h>
+
+#include "arm_cspmu.h"
+
+#define PMAUXR0                0xD80
+#define PMAUXR1                0xD84
+#define PMAUXR2                0xD88
+#define PMAUXR3                0xD8C
+
+#define to_ampere_cspmu_ctx(cspmu)     ((struct ampere_cspmu_ctx *)(cspmu->impl.ctx))
+
+struct ampere_cspmu_ctx {
+       const char *name;
+       struct attribute **event_attr;
+       struct attribute **format_attr;
+};
+
+static DEFINE_IDA(mcu_pmu_ida);
+
+#define SOC_PMU_EVENT_ATTR_EXTRACTOR(_name, _config, _start, _end)        \
+       static inline u32 get_##_name(const struct perf_event *event)     \
+       {                                                                 \
+               return FIELD_GET(GENMASK_ULL(_end, _start),               \
+                                event->attr._config);                    \
+       }                                                                 \
+
+SOC_PMU_EVENT_ATTR_EXTRACTOR(event, config, 0, 8);
+SOC_PMU_EVENT_ATTR_EXTRACTOR(threshold, config1, 0, 7);
+SOC_PMU_EVENT_ATTR_EXTRACTOR(rank, config1, 8, 23);
+SOC_PMU_EVENT_ATTR_EXTRACTOR(bank, config1, 24, 55);
+
+static struct attribute *ampereone_mcu_pmu_event_attrs[] = {
+       ARM_CSPMU_EVENT_ATTR(cycle_count,               0x00),
+       ARM_CSPMU_EVENT_ATTR(act_sent,                  0x01),
+       ARM_CSPMU_EVENT_ATTR(pre_sent,                  0x02),
+       ARM_CSPMU_EVENT_ATTR(rd_sent,                   0x03),
+       ARM_CSPMU_EVENT_ATTR(rda_sent,                  0x04),
+       ARM_CSPMU_EVENT_ATTR(wr_sent,                   0x05),
+       ARM_CSPMU_EVENT_ATTR(wra_sent,                  0x06),
+       ARM_CSPMU_EVENT_ATTR(pd_entry_vld,              0x07),
+       ARM_CSPMU_EVENT_ATTR(sref_entry_vld,            0x08),
+       ARM_CSPMU_EVENT_ATTR(prea_sent,                 0x09),
+       ARM_CSPMU_EVENT_ATTR(pre_sb_sent,               0x0a),
+       ARM_CSPMU_EVENT_ATTR(ref_sent,                  0x0b),
+       ARM_CSPMU_EVENT_ATTR(rfm_sent,                  0x0c),
+       ARM_CSPMU_EVENT_ATTR(ref_sb_sent,               0x0d),
+       ARM_CSPMU_EVENT_ATTR(rfm_sb_sent,               0x0e),
+       ARM_CSPMU_EVENT_ATTR(rd_rda_sent,               0x0f),
+       ARM_CSPMU_EVENT_ATTR(wr_wra_sent,               0x10),
+       ARM_CSPMU_EVENT_ATTR(raw_hazard,                0x11),
+       ARM_CSPMU_EVENT_ATTR(war_hazard,                0x12),
+       ARM_CSPMU_EVENT_ATTR(waw_hazard,                0x13),
+       ARM_CSPMU_EVENT_ATTR(rar_hazard,                0x14),
+       ARM_CSPMU_EVENT_ATTR(raw_war_waw_hazard,        0x15),
+       ARM_CSPMU_EVENT_ATTR(hprd_lprd_wr_req_vld,      0x16),
+       ARM_CSPMU_EVENT_ATTR(lprd_req_vld,              0x17),
+       ARM_CSPMU_EVENT_ATTR(hprd_req_vld,              0x18),
+       ARM_CSPMU_EVENT_ATTR(hprd_lprd_req_vld,         0x19),
+       ARM_CSPMU_EVENT_ATTR(prefetch_tgt,              0x1a),
+       ARM_CSPMU_EVENT_ATTR(wr_req_vld,                0x1b),
+       ARM_CSPMU_EVENT_ATTR(partial_wr_req_vld,        0x1c),
+       ARM_CSPMU_EVENT_ATTR(rd_retry,                  0x1d),
+       ARM_CSPMU_EVENT_ATTR(wr_retry,                  0x1e),
+       ARM_CSPMU_EVENT_ATTR(retry_gnt,                 0x1f),
+       ARM_CSPMU_EVENT_ATTR(rank_change,               0x20),
+       ARM_CSPMU_EVENT_ATTR(dir_change,                0x21),
+       ARM_CSPMU_EVENT_ATTR(rank_dir_change,           0x22),
+       ARM_CSPMU_EVENT_ATTR(rank_active,               0x23),
+       ARM_CSPMU_EVENT_ATTR(rank_idle,                 0x24),
+       ARM_CSPMU_EVENT_ATTR(rank_pd,                   0x25),
+       ARM_CSPMU_EVENT_ATTR(rank_sref,                 0x26),
+       ARM_CSPMU_EVENT_ATTR(queue_fill_gt_thresh,      0x27),
+       ARM_CSPMU_EVENT_ATTR(queue_rds_gt_thresh,       0x28),
+       ARM_CSPMU_EVENT_ATTR(queue_wrs_gt_thresh,       0x29),
+       ARM_CSPMU_EVENT_ATTR(phy_updt_complt,           0x2a),
+       ARM_CSPMU_EVENT_ATTR(tz_fail,                   0x2b),
+       ARM_CSPMU_EVENT_ATTR(dram_errc,                 0x2c),
+       ARM_CSPMU_EVENT_ATTR(dram_errd,                 0x2d),
+       ARM_CSPMU_EVENT_ATTR(read_data_return,          0x32),
+       ARM_CSPMU_EVENT_ATTR(chi_wr_data_delta,         0x33),
+       ARM_CSPMU_EVENT_ATTR(zq_start,                  0x34),
+       ARM_CSPMU_EVENT_ATTR(zq_latch,                  0x35),
+       ARM_CSPMU_EVENT_ATTR(wr_fifo_full,              0x36),
+       ARM_CSPMU_EVENT_ATTR(info_fifo_full,            0x37),
+       ARM_CSPMU_EVENT_ATTR(cmd_fifo_full,             0x38),
+       ARM_CSPMU_EVENT_ATTR(dfi_nop,                   0x39),
+       ARM_CSPMU_EVENT_ATTR(dfi_cmd,                   0x3a),
+       ARM_CSPMU_EVENT_ATTR(rd_run_len,                0x3b),
+       ARM_CSPMU_EVENT_ATTR(wr_run_len,                0x3c),
+
+       ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
+       NULL,
+};
+
+static struct attribute *ampereone_mcu_format_attrs[] = {
+       ARM_CSPMU_FORMAT_EVENT_ATTR,
+       ARM_CSPMU_FORMAT_ATTR(threshold, "config1:0-7"),
+       ARM_CSPMU_FORMAT_ATTR(rank, "config1:8-23"),
+       ARM_CSPMU_FORMAT_ATTR(bank, "config1:24-55"),
+       NULL,
+};
+
+static struct attribute **
+ampere_cspmu_get_event_attrs(const struct arm_cspmu *cspmu)
+{
+       const struct ampere_cspmu_ctx *ctx = to_ampere_cspmu_ctx(cspmu);
+
+       return ctx->event_attr;
+}
+
+static struct attribute **
+ampere_cspmu_get_format_attrs(const struct arm_cspmu *cspmu)
+{
+       const struct ampere_cspmu_ctx *ctx = to_ampere_cspmu_ctx(cspmu);
+
+       return ctx->format_attr;
+}
+
+static const char *
+ampere_cspmu_get_name(const struct arm_cspmu *cspmu)
+{
+       const struct ampere_cspmu_ctx *ctx = to_ampere_cspmu_ctx(cspmu);
+
+       return ctx->name;
+}
+
+static u32 ampere_cspmu_event_filter(const struct perf_event *event)
+{
+       /*
+        * PMEVFILTR or PMCCFILTR aren't used in Ampere SoC PMU but are marked
+        * as RES0. Make sure, PMCCFILTR is written zero.
+        */
+       return 0;
+}
+
+static void ampere_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
+                                      struct hw_perf_event *hwc,
+                                      u32 filter)
+{
+       struct perf_event *event;
+       unsigned int idx;
+       u32 threshold, rank, bank;
+
+       /*
+        * At this point, all the events have the same filter settings.
+        * Therefore, take the first event and use its configuration.
+        */
+       idx = find_first_bit(cspmu->hw_events.used_ctrs,
+                            cspmu->cycle_counter_logical_idx);
+
+       event = cspmu->hw_events.events[idx];
+
+       threshold       = get_threshold(event);
+       rank            = get_rank(event);
+       bank            = get_bank(event);
+
+       writel(threshold, cspmu->base0 + PMAUXR0);
+       writel(rank, cspmu->base0 + PMAUXR1);
+       writel(bank, cspmu->base0 + PMAUXR2);
+}
+
+static int ampere_cspmu_validate_configs(struct perf_event *event,
+                                        struct perf_event *event2)
+{
+       if (get_threshold(event) != get_threshold(event2) ||
+           get_rank(event) != get_rank(event2) ||
+           get_bank(event) != get_bank(event2))
+               return -EINVAL;
+
+       return 0;
+}
+
+static int ampere_cspmu_validate_event(struct arm_cspmu *cspmu,
+                                      struct perf_event *new)
+{
+       struct perf_event *curr, *leader = new->group_leader;
+       unsigned int idx;
+       int ret;
+
+       ret = ampere_cspmu_validate_configs(new, leader);
+       if (ret)
+               return ret;
+
+       /* We compare the global filter settings to the existing events */
+       idx = find_first_bit(cspmu->hw_events.used_ctrs,
+                            cspmu->cycle_counter_logical_idx);
+
+       /* This is the first event, thus any configuration is fine */
+       if (idx == cspmu->cycle_counter_logical_idx)
+               return 0;
+
+       curr = cspmu->hw_events.events[idx];
+
+       return ampere_cspmu_validate_configs(curr, new);
+}
+
+static char *ampere_cspmu_format_name(const struct arm_cspmu *cspmu,
+                                     const char *name_pattern)
+{
+       struct device *dev = cspmu->dev;
+       int id;
+
+       id = ida_alloc(&mcu_pmu_ida, GFP_KERNEL);
+       if (id < 0)
+               return ERR_PTR(id);
+
+       return devm_kasprintf(dev, GFP_KERNEL, name_pattern, id);
+}
+
+static int ampere_cspmu_init_ops(struct arm_cspmu *cspmu)
+{
+       struct device *dev = cspmu->dev;
+       struct ampere_cspmu_ctx *ctx;
+       struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
+
+       ctx = devm_kzalloc(dev, sizeof(struct ampere_cspmu_ctx), GFP_KERNEL);
+       if (!ctx)
+               return -ENOMEM;
+
+       ctx->event_attr = ampereone_mcu_pmu_event_attrs;
+       ctx->format_attr = ampereone_mcu_format_attrs;
+       ctx->name = ampere_cspmu_format_name(cspmu, "ampere_mcu_pmu_%d");
+       if (IS_ERR_OR_NULL(ctx->name))
+               return ctx->name ? PTR_ERR(ctx->name) : -ENOMEM;
+
+       cspmu->impl.ctx = ctx;
+
+       impl_ops->event_filter          = ampere_cspmu_event_filter;
+       impl_ops->set_ev_filter         = ampere_cspmu_set_ev_filter;
+       impl_ops->validate_event        = ampere_cspmu_validate_event;
+       impl_ops->get_name              = ampere_cspmu_get_name;
+       impl_ops->get_event_attrs       = ampere_cspmu_get_event_attrs;
+       impl_ops->get_format_attrs      = ampere_cspmu_get_format_attrs;
+
+       return 0;
+}
+
+/* Match all Ampere Coresight PMU devices */
+static const struct arm_cspmu_impl_match ampere_cspmu_param = {
+       .pmiidr_val     = ARM_CSPMU_IMPL_ID_AMPERE,
+       .module         = THIS_MODULE,
+       .impl_init_ops  = ampere_cspmu_init_ops
+};
+
+static int __init ampere_cspmu_init(void)
+{
+       int ret;
+
+       ret = arm_cspmu_impl_register(&ampere_cspmu_param);
+       if (ret)
+               pr_err("ampere_cspmu backend registration error: %d\n", ret);
+
+       return ret;
+}
+
+static void __exit ampere_cspmu_exit(void)
+{
+       arm_cspmu_impl_unregister(&ampere_cspmu_param);
+}
+
+module_init(ampere_cspmu_init);
+module_exit(ampere_cspmu_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c

index e2b7827..0e3fe00 100644 (file)
--- a/drivers/perf/arm_cspmu/arm_cspmu.c
+++ b/drivers/perf/arm_cspmu/arm_cspmu.c
@@ -16,7 +16,7 @@
   * The user should refer to the vendor technical documentation to get details
   * about the supported events.
   *
- * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
   *
   */
  
@@ -26,11 +26,11 @@
  #include <linux/interrupt.h>
  #include <linux/io-64-nonatomic-lo-hi.h>
  #include <linux/module.h>
+#include <linux/mutex.h>
  #include <linux/perf_event.h>
  #include <linux/platform_device.h>
  
  #include "arm_cspmu.h"
-#include "nvidia_cspmu.h"
  
  #define PMUNAME "arm_cspmu"
  #define DRVNAME "arm-cs-arch-pmu"
@@ -112,11 +112,13 @@
   */
  #define HILOHI_MAX_POLL        1000
  
-/* JEDEC-assigned JEP106 identification code */
-#define ARM_CSPMU_IMPL_ID_NVIDIA               0x36B
-
  static unsigned long arm_cspmu_cpuhp_state;
  
+static DEFINE_MUTEX(arm_cspmu_lock);
+
+static void arm_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
+                                   struct hw_perf_event *hwc, u32 filter);
+
  static struct acpi_apmt_node *arm_cspmu_apmt_node(struct device *dev)
  {
         return *(struct acpi_apmt_node **)dev_get_platdata(dev);
@@ -373,27 +375,45 @@ static struct attribute_group arm_cspmu_cpumask_attr_group = {
         .attrs = arm_cspmu_cpumask_attrs,
  };
  
-struct impl_match {
-       u32 pmiidr;
-       u32 mask;
-       int (*impl_init_ops)(struct arm_cspmu *cspmu);
-};
-
-static const struct impl_match impl_match[] = {
+static struct arm_cspmu_impl_match impl_match[] = {
         {
-         .pmiidr = ARM_CSPMU_IMPL_ID_NVIDIA,
-         .mask = ARM_CSPMU_PMIIDR_IMPLEMENTER,
-         .impl_init_ops = nv_cspmu_init_ops
+               .module_name    = "nvidia_cspmu",
+               .pmiidr_val     = ARM_CSPMU_IMPL_ID_NVIDIA,
+               .pmiidr_mask    = ARM_CSPMU_PMIIDR_IMPLEMENTER,
+               .module         = NULL,
+               .impl_init_ops  = NULL,
         },
-       {}
+       {
+               .module_name    = "ampere_cspmu",
+               .pmiidr_val     = ARM_CSPMU_IMPL_ID_AMPERE,
+               .pmiidr_mask    = ARM_CSPMU_PMIIDR_IMPLEMENTER,
+               .module         = NULL,
+               .impl_init_ops  = NULL,
+       },
+
+       {0}
  };
  
+static struct arm_cspmu_impl_match *arm_cspmu_impl_match_get(u32 pmiidr)
+{
+       struct arm_cspmu_impl_match *match = impl_match;
+
+       for (; match->pmiidr_val; match++) {
+               u32 mask = match->pmiidr_mask;
+
+               if ((match->pmiidr_val & mask) == (pmiidr & mask))
+                       return match;
+       }
+
+       return NULL;
+}
+
  static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu)
  {
-       int ret;
+       int ret = 0;
         struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
         struct acpi_apmt_node *apmt_node = arm_cspmu_apmt_node(cspmu->dev);
-       const struct impl_match *match = impl_match;
+       struct arm_cspmu_impl_match *match;
  
         /*
          * Get PMU implementer and product id from APMT node.
@@ -405,17 +425,36 @@ static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu)
                                        readl(cspmu->base0 + PMIIDR);
  
         /* Find implementer specific attribute ops. */
-       for (; match->pmiidr; match++) {
-               const u32 mask = match->mask;
+       match = arm_cspmu_impl_match_get(cspmu->impl.pmiidr);
+
+       /* Load implementer module and initialize the callbacks. */
+       if (match) {
+               mutex_lock(&arm_cspmu_lock);
+
+               if (match->impl_init_ops) {
+                       /* Prevent unload until PMU registration is done. */
+                       if (try_module_get(match->module)) {
+                               cspmu->impl.module = match->module;
+                               cspmu->impl.match = match;
+                               ret = match->impl_init_ops(cspmu);
+                               if (ret)
+                                       module_put(match->module);
+                       } else {
+                               WARN(1, "arm_cspmu failed to get module: %s\n",
+                                       match->module_name);
+                               ret = -EINVAL;
+                       }
+               } else {
+                       request_module_nowait(match->module_name);
+                       ret = -EPROBE_DEFER;
+               }
  
-               if ((match->pmiidr & mask) == (cspmu->impl.pmiidr & mask)) {
-                       ret = match->impl_init_ops(cspmu);
-                       if (ret)
-                               return ret;
+               mutex_unlock(&arm_cspmu_lock);
  
-                       break;
-               }
-       }
+               if (ret)
+                       return ret;
+       } else
+               cspmu->impl.module = THIS_MODULE;
  
         /* Use default callbacks if implementer doesn't provide one. */
         CHECK_DEFAULT_IMPL_OPS(impl_ops, get_event_attrs);
@@ -426,6 +465,7 @@ static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu)
         CHECK_DEFAULT_IMPL_OPS(impl_ops, event_type);
         CHECK_DEFAULT_IMPL_OPS(impl_ops, event_filter);
         CHECK_DEFAULT_IMPL_OPS(impl_ops, event_attr_is_visible);
+       CHECK_DEFAULT_IMPL_OPS(impl_ops, set_ev_filter);
  
         return 0;
  }
@@ -478,11 +518,6 @@ arm_cspmu_alloc_attr_group(struct arm_cspmu *cspmu)
         struct attribute_group **attr_groups = NULL;
         struct device *dev = cspmu->dev;
         const struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
-       int ret;
-
-       ret = arm_cspmu_init_impl_ops(cspmu);
-       if (ret)
-               return NULL;
  
         cspmu->identifier = impl_ops->get_identifier(cspmu);
         cspmu->name = impl_ops->get_name(cspmu);
@@ -549,7 +584,7 @@ static void arm_cspmu_disable(struct pmu *pmu)
  static int arm_cspmu_get_event_idx(struct arm_cspmu_hw_events *hw_events,
                                 struct perf_event *event)
  {
-       int idx;
+       int idx, ret;
         struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
  
         if (supports_cycle_counter(cspmu)) {
@@ -583,6 +618,12 @@ static int arm_cspmu_get_event_idx(struct arm_cspmu_hw_events *hw_events,
         if (idx >= cspmu->num_logical_ctrs)
                 return -EAGAIN;
  
+       if (cspmu->impl.ops.validate_event) {
+               ret = cspmu->impl.ops.validate_event(cspmu, event);
+               if (ret)
+                       return ret;
+       }
+
         set_bit(idx, hw_events->used_ctrs);
  
         return idx;
@@ -696,7 +737,10 @@ static void arm_cspmu_write_counter(struct perf_event *event, u64 val)
         if (use_64b_counter_reg(cspmu)) {
                 offset = counter_offset(sizeof(u64), event->hw.idx);
  
-               writeq(val, cspmu->base1 + offset);
+               if (cspmu->has_atomic_dword)
+                       writeq(val, cspmu->base1 + offset);
+               else
+                       lo_hi_writeq(val, cspmu->base1 + offset);
         } else {
                 offset = counter_offset(sizeof(u32), event->hw.idx);
  
@@ -789,9 +833,9 @@ static inline void arm_cspmu_set_event(struct arm_cspmu *cspmu,
         writel(hwc->config, cspmu->base0 + offset);
  }
  
-static inline void arm_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
-                                          struct hw_perf_event *hwc,
-                                          u32 filter)
+static void arm_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
+                                       struct hw_perf_event *hwc,
+                                       u32 filter)
  {
         u32 offset = PMEVFILTR + (4 * hwc->idx);
  
@@ -823,7 +867,7 @@ static void arm_cspmu_start(struct perf_event *event, int pmu_flags)
                 arm_cspmu_set_cc_filter(cspmu, filter);
         } else {
                 arm_cspmu_set_event(cspmu, hwc);
-               arm_cspmu_set_ev_filter(cspmu, hwc, filter);
+               cspmu->impl.ops.set_ev_filter(cspmu, hwc, filter);
         }
  
         hwc->state = 0;
@@ -1149,7 +1193,7 @@ static int arm_cspmu_register_pmu(struct arm_cspmu *cspmu)
  
         cspmu->pmu = (struct pmu){
                 .task_ctx_nr    = perf_invalid_context,
-               .module         = THIS_MODULE,
+               .module         = cspmu->impl.module,
                 .pmu_enable     = arm_cspmu_enable,
                 .pmu_disable    = arm_cspmu_disable,
                 .event_init     = arm_cspmu_event_init,
@@ -1196,11 +1240,17 @@ static int arm_cspmu_device_probe(struct platform_device *pdev)
         if (ret)
                 return ret;
  
-       ret = arm_cspmu_register_pmu(cspmu);
+       ret = arm_cspmu_init_impl_ops(cspmu);
         if (ret)
                 return ret;
  
-       return 0;
+       ret = arm_cspmu_register_pmu(cspmu);
+
+       /* Matches arm_cspmu_init_impl_ops() above. */
+       if (cspmu->impl.module != THIS_MODULE)
+               module_put(cspmu->impl.module);
+
+       return ret;
  }
  
  static int arm_cspmu_device_remove(struct platform_device *pdev)
@@ -1300,6 +1350,75 @@ static void __exit arm_cspmu_exit(void)
         cpuhp_remove_multi_state(arm_cspmu_cpuhp_state);
  }
  
+int arm_cspmu_impl_register(const struct arm_cspmu_impl_match *impl_match)
+{
+       struct arm_cspmu_impl_match *match;
+       int ret = 0;
+
+       match = arm_cspmu_impl_match_get(impl_match->pmiidr_val);
+
+       if (match) {
+               mutex_lock(&arm_cspmu_lock);
+
+               if (!match->impl_init_ops) {
+                       match->module = impl_match->module;
+                       match->impl_init_ops = impl_match->impl_init_ops;
+               } else {
+                       /* Broken match table may contain non-unique entries */
+                       WARN(1, "arm_cspmu backend already registered for module: %s, pmiidr: 0x%x, mask: 0x%x\n",
+                               match->module_name,
+                               match->pmiidr_val,
+                               match->pmiidr_mask);
+
+                       ret = -EINVAL;
+               }
+
+               mutex_unlock(&arm_cspmu_lock);
+
+               if (!ret)
+                       ret = driver_attach(&arm_cspmu_driver.driver);
+       } else {
+               pr_err("arm_cspmu reg failed, unable to find a match for pmiidr: 0x%x\n",
+                       impl_match->pmiidr_val);
+
+               ret = -EINVAL;
+       }
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(arm_cspmu_impl_register);
+
+static int arm_cspmu_match_device(struct device *dev, const void *match)
+{
+       struct arm_cspmu *cspmu = platform_get_drvdata(to_platform_device(dev));
+
+       return (cspmu && cspmu->impl.match == match) ? 1 : 0;
+}
+
+void arm_cspmu_impl_unregister(const struct arm_cspmu_impl_match *impl_match)
+{
+       struct device *dev;
+       struct arm_cspmu_impl_match *match;
+
+       match = arm_cspmu_impl_match_get(impl_match->pmiidr_val);
+
+       if (WARN_ON(!match))
+               return;
+
+       /* Unbind the driver from all matching backend devices. */
+       while ((dev = driver_find_device(&arm_cspmu_driver.driver, NULL,
+                       match, arm_cspmu_match_device)))
+               device_release_driver(dev);
+
+       mutex_lock(&arm_cspmu_lock);
+
+       match->module = NULL;
+       match->impl_init_ops = NULL;
+
+       mutex_unlock(&arm_cspmu_lock);
+}
+EXPORT_SYMBOL_GPL(arm_cspmu_impl_unregister);
+
  module_init(arm_cspmu_init);
  module_exit(arm_cspmu_exit);
  
diff --git a/drivers/perf/arm_cspmu/arm_cspmu.h b/drivers/perf/arm_cspmu/arm_cspmu.h

index 83df53d..2fe7235 100644 (file)
--- a/drivers/perf/arm_cspmu/arm_cspmu.h
+++ b/drivers/perf/arm_cspmu/arm_cspmu.h
@@ -1,7 +1,7 @@
  /* SPDX-License-Identifier: GPL-2.0
   *
   * ARM CoreSight Architecture PMU driver.
- * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
   *
   */
  
@@ -69,6 +69,10 @@
  #define ARM_CSPMU_PMIIDR_IMPLEMENTER   GENMASK(11, 0)
  #define ARM_CSPMU_PMIIDR_PRODUCTID     GENMASK(31, 20)
  
+/* JEDEC-assigned JEP106 identification code */
+#define ARM_CSPMU_IMPL_ID_NVIDIA       0x36B
+#define ARM_CSPMU_IMPL_ID_AMPERE       0xA16
+
  struct arm_cspmu;
  
  /* This tracks the events assigned to each counter in the PMU. */
@@ -101,14 +105,34 @@ struct arm_cspmu_impl_ops {
         u32 (*event_type)(const struct perf_event *event);
         /* Decode filter value from configs */
         u32 (*event_filter)(const struct perf_event *event);
+       /* Set event filter */
+       void (*set_ev_filter)(struct arm_cspmu *cspmu,
+                             struct hw_perf_event *hwc, u32 filter);
+       /* Implementation specific event validation */
+       int (*validate_event)(struct arm_cspmu *cspmu,
+                             struct perf_event *event);
         /* Hide/show unsupported events */
         umode_t (*event_attr_is_visible)(struct kobject *kobj,
                                          struct attribute *attr, int unused);
  };
  
+/* Vendor/implementer registration parameter. */
+struct arm_cspmu_impl_match {
+       /* Backend module. */
+       struct module *module;
+       const char *module_name;
+       /* PMIIDR value/mask. */
+       u32 pmiidr_val;
+       u32 pmiidr_mask;
+       /* Callback to vendor backend to init arm_cspmu_impl::ops. */
+       int (*impl_init_ops)(struct arm_cspmu *cspmu);
+};
+
  /* Vendor/implementer descriptor. */
  struct arm_cspmu_impl {
         u32 pmiidr;
+       struct module *module;
+       struct arm_cspmu_impl_match *match;
         struct arm_cspmu_impl_ops ops;
         void *ctx;
  };
@@ -147,4 +171,10 @@ ssize_t arm_cspmu_sysfs_format_show(struct device *dev,
                                     struct device_attribute *attr,
                                     char *buf);
  
+/* Register vendor backend. */
+int arm_cspmu_impl_register(const struct arm_cspmu_impl_match *impl_match);
+
+/* Unregister vendor backend. */
+void arm_cspmu_impl_unregister(const struct arm_cspmu_impl_match *impl_match);
+
  #endif /* __ARM_CSPMU_H__ */
diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.c b/drivers/perf/arm_cspmu/nvidia_cspmu.c

index 72ef80c..0382b70 100644 (file)
--- a/drivers/perf/arm_cspmu/nvidia_cspmu.c
+++ b/drivers/perf/arm_cspmu/nvidia_cspmu.c
@@ -1,14 +1,15 @@
  // SPDX-License-Identifier: GPL-2.0
  /*
- * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
   *
   */
  
  /* Support for NVIDIA specific attributes. */
  
+#include <linux/module.h>
  #include <linux/topology.h>
  
-#include "nvidia_cspmu.h"
+#include "arm_cspmu.h"
  
  #define NV_PCIE_PORT_COUNT           10ULL
  #define NV_PCIE_FILTER_ID_MASK       GENMASK_ULL(NV_PCIE_PORT_COUNT - 1, 0)
@@ -351,7 +352,7 @@ static char *nv_cspmu_format_name(const struct arm_cspmu *cspmu,
         return name;
  }
  
-int nv_cspmu_init_ops(struct arm_cspmu *cspmu)
+static int nv_cspmu_init_ops(struct arm_cspmu *cspmu)
  {
         u32 prodid;
         struct nv_cspmu_ctx *ctx;
@@ -395,6 +396,31 @@ int nv_cspmu_init_ops(struct arm_cspmu *cspmu)
  
         return 0;
  }
-EXPORT_SYMBOL_GPL(nv_cspmu_init_ops);
+
+/* Match all NVIDIA Coresight PMU devices */
+static const struct arm_cspmu_impl_match nv_cspmu_param = {
+       .pmiidr_val     = ARM_CSPMU_IMPL_ID_NVIDIA,
+       .module         = THIS_MODULE,
+       .impl_init_ops  = nv_cspmu_init_ops
+};
+
+static int __init nvidia_cspmu_init(void)
+{
+       int ret;
+
+       ret = arm_cspmu_impl_register(&nv_cspmu_param);
+       if (ret)
+               pr_err("nvidia_cspmu backend registration error: %d\n", ret);
+
+       return ret;
+}
+
+static void __exit nvidia_cspmu_exit(void)
+{
+       arm_cspmu_impl_unregister(&nv_cspmu_param);
+}
+
+module_init(nvidia_cspmu_init);
+module_exit(nvidia_cspmu_exit);
  
  MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.h b/drivers/perf/arm_cspmu/nvidia_cspmu.h

deleted file mode 100644 (file)

index 71e18f0..0000000
--- a/drivers/perf/arm_cspmu/nvidia_cspmu.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- *
- */
-
-/* Support for NVIDIA specific attributes. */
-
-#ifndef __NVIDIA_CSPMU_H__
-#define __NVIDIA_CSPMU_H__
-
-#include "arm_cspmu.h"
-
-/* Allocate NVIDIA descriptor. */
-int nv_cspmu_init_ops(struct arm_cspmu *cspmu);
-
-#endif /* __NVIDIA_CSPMU_H__ */
diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c

index 8fcaa26..4f6923a 100644 (file)
--- a/drivers/perf/arm_pmuv3.c
+++ b/drivers/perf/arm_pmuv3.c
@@ -1126,7 +1126,7 @@ static void __armv8pmu_probe_pmu(void *info)
                              pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS);
  
         /* store PMMIR register for sysfs */
-       if (is_pmuv3p4(pmuver) && (pmceid_raw[1] & BIT(31)))
+       if (is_pmuv3p4(pmuver))
                 cpu_pmu->reg_pmmir = read_pmmir();
         else
                 cpu_pmu->reg_pmmir = 0;
@@ -1187,10 +1187,7 @@ static void armv8_pmu_register_sysctl_table(void)
  }
  
  static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name,
-                         int (*map_event)(struct perf_event *event),
-                         const struct attribute_group *events,
-                         const struct attribute_group *format,
-                         const struct attribute_group *caps)
+                         int (*map_event)(struct perf_event *event))
  {
         int ret = armv8pmu_probe_pmu(cpu_pmu);
         if (ret)
@@ -1212,27 +1209,17 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name,
  
         cpu_pmu->name                   = name;
         cpu_pmu->map_event              = map_event;
-       cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = events ?
-                       events : &armv8_pmuv3_events_attr_group;
-       cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = format ?
-                       format : &armv8_pmuv3_format_attr_group;
-       cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_CAPS] = caps ?
-                       caps : &armv8_pmuv3_caps_attr_group;
-
+       cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = &armv8_pmuv3_events_attr_group;
+       cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = &armv8_pmuv3_format_attr_group;
+       cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_CAPS] = &armv8_pmuv3_caps_attr_group;
         armv8_pmu_register_sysctl_table();
         return 0;
  }
  
-static int armv8_pmu_init_nogroups(struct arm_pmu *cpu_pmu, char *name,
-                                  int (*map_event)(struct perf_event *event))
-{
-       return armv8_pmu_init(cpu_pmu, name, map_event, NULL, NULL, NULL);
-}
-
  #define PMUV3_INIT_SIMPLE(name)                                                \
  static int name##_pmu_init(struct arm_pmu *cpu_pmu)                    \
  {                                                                      \
-       return armv8_pmu_init_nogroups(cpu_pmu, #name, armv8_pmuv3_map_event);\
+       return armv8_pmu_init(cpu_pmu, #name, armv8_pmuv3_map_event);   \
  }
  
  PMUV3_INIT_SIMPLE(armv8_pmuv3)
@@ -1263,44 +1250,37 @@ PMUV3_INIT_SIMPLE(armv8_nvidia_denver)
  
  static int armv8_a35_pmu_init(struct arm_pmu *cpu_pmu)
  {
-       return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a35",
-                                      armv8_a53_map_event);
+       return armv8_pmu_init(cpu_pmu, "armv8_cortex_a35", armv8_a53_map_event);
  }
  
  static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu)
  {
-       return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a53",
-                                      armv8_a53_map_event);
+       return armv8_pmu_init(cpu_pmu, "armv8_cortex_a53", armv8_a53_map_event);
  }
  
  static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu)
  {
-       return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a57",
-                                      armv8_a57_map_event);
+       return armv8_pmu_init(cpu_pmu, "armv8_cortex_a57", armv8_a57_map_event);
  }
  
  static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu)
  {
-       return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a72",
-                                      armv8_a57_map_event);
+       return armv8_pmu_init(cpu_pmu, "armv8_cortex_a72", armv8_a57_map_event);
  }
  
  static int armv8_a73_pmu_init(struct arm_pmu *cpu_pmu)
  {
-       return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a73",
-                                      armv8_a73_map_event);
+       return armv8_pmu_init(cpu_pmu, "armv8_cortex_a73", armv8_a73_map_event);
  }
  
  static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu)
  {
-       return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cavium_thunder",
-                                      armv8_thunder_map_event);
+       return armv8_pmu_init(cpu_pmu, "armv8_cavium_thunder", armv8_thunder_map_event);
  }
  
  static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu)
  {
-       return armv8_pmu_init_nogroups(cpu_pmu, "armv8_brcm_vulcan",
-                                      armv8_vulcan_map_event);
+       return armv8_pmu_init(cpu_pmu, "armv8_brcm_vulcan", armv8_vulcan_map_event);
  }
  
  static const struct of_device_id armv8_pmu_of_device_ids[] = {
diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c

index 5a00adb..b90ba8a 100644 (file)
--- a/drivers/perf/hisilicon/hisi_pcie_pmu.c
+++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c
@@ -353,16 +353,15 @@ static int hisi_pcie_pmu_event_init(struct perf_event *event)
         struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
         struct hw_perf_event *hwc = &event->hw;
  
-       event->cpu = pcie_pmu->on_cpu;
+       /* Check the type first before going on, otherwise it's not our event */
+       if (event->attr.type != event->pmu->type)
+               return -ENOENT;
  
         if (EXT_COUNTER_IS_USED(hisi_pcie_get_event(event)))
                 hwc->event_base = HISI_PCIE_EXT_CNT;
         else
                 hwc->event_base = HISI_PCIE_CNT;
  
-       if (event->attr.type != event->pmu->type)
-               return -ENOENT;
-
         /* Sampling is not supported. */
         if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
                 return -EOPNOTSUPP;
@@ -373,6 +372,8 @@ static int hisi_pcie_pmu_event_init(struct perf_event *event)
         if (!hisi_pcie_pmu_validate_event_group(event))
                 return -EINVAL;
  
+       event->cpu = pcie_pmu->on_cpu;
+
         return 0;
  }
  
diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c

index d941e74..797cf20 100644 (file)
--- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c
@@ -505,8 +505,8 @@ static int hisi_pa_pmu_probe(struct platform_device *pdev)
         ret = perf_pmu_register(&pa_pmu->pmu, name, -1);
         if (ret) {
                 dev_err(pa_pmu->dev, "PMU register failed, ret = %d\n", ret);
-               cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE,
-                                           &pa_pmu->node);
+               cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE,
+                                                   &pa_pmu->node);
                 return ret;
         }
  
diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c

index 6fe534a..e706ca5 100644 (file)
--- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c
@@ -450,8 +450,8 @@ static int hisi_sllc_pmu_probe(struct platform_device *pdev)
         ret = perf_pmu_register(&sllc_pmu->pmu, name, -1);
         if (ret) {
                 dev_err(sllc_pmu->dev, "PMU register failed, ret = %d\n", ret);
-               cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE,
-                                           &sllc_pmu->node);
+               cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE,
+                                                   &sllc_pmu->node);
                 return ret;
         }
  
diff --git a/drivers/perf/hisilicon/hns3_pmu.c b/drivers/perf/hisilicon/hns3_pmu.c

index e0457d8..16869bf 100644 (file)
--- a/drivers/perf/hisilicon/hns3_pmu.c
+++ b/drivers/perf/hisilicon/hns3_pmu.c
@@ -1556,8 +1556,8 @@ static int hns3_pmu_init_pmu(struct pci_dev *pdev, struct hns3_pmu *hns3_pmu)
         ret = perf_pmu_register(&hns3_pmu->pmu, hns3_pmu->pmu.name, -1);
         if (ret) {
                 pci_err(pdev, "failed to register perf PMU, ret = %d.\n", ret);
-               cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE,
-                                           &hns3_pmu->node);
+               cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE,
+                                                   &hns3_pmu->node);
         }
  
         return ret;
@@ -1568,8 +1568,8 @@ static void hns3_pmu_uninit_pmu(struct pci_dev *pdev)
         struct hns3_pmu *hns3_pmu = pci_get_drvdata(pdev);
  
         perf_pmu_unregister(&hns3_pmu->pmu);
-       cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE,
-                                   &hns3_pmu->node);
+       cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE,
+                                           &hns3_pmu->node);
  }
  
  static int hns3_pmu_init_dev(struct pci_dev *pdev)
diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c

index 9972bfc..7ce3442 100644 (file)
--- a/drivers/perf/xgene_pmu.c
+++ b/drivers/perf/xgene_pmu.c
@@ -16,11 +16,9 @@
  #include <linux/mfd/syscon.h>
  #include <linux/module.h>
  #include <linux/of_address.h>
-#include <linux/of_fdt.h>
-#include <linux/of_irq.h>
-#include <linux/of_platform.h>
  #include <linux/perf_event.h>
  #include <linux/platform_device.h>
+#include <linux/property.h>
  #include <linux/regmap.h>
  #include <linux/slab.h>
  
@@ -1731,6 +1729,12 @@ static const struct xgene_pmu_data xgene_pmu_v2_data = {
         .id   = PCP_PMU_V2,
  };
  
+#ifdef CONFIG_ACPI
+static const struct xgene_pmu_data xgene_pmu_v3_data = {
+       .id   = PCP_PMU_V3,
+};
+#endif
+
  static const struct xgene_pmu_ops xgene_pmu_ops = {
         .mask_int = xgene_pmu_mask_int,
         .unmask_int = xgene_pmu_unmask_int,
@@ -1773,9 +1777,9 @@ static const struct of_device_id xgene_pmu_of_match[] = {
  MODULE_DEVICE_TABLE(of, xgene_pmu_of_match);
  #ifdef CONFIG_ACPI
  static const struct acpi_device_id xgene_pmu_acpi_match[] = {
-       {"APMC0D5B", PCP_PMU_V1},
-       {"APMC0D5C", PCP_PMU_V2},
-       {"APMC0D83", PCP_PMU_V3},
+       {"APMC0D5B", (kernel_ulong_t)&xgene_pmu_data},
+       {"APMC0D5C", (kernel_ulong_t)&xgene_pmu_v2_data},
+       {"APMC0D83", (kernel_ulong_t)&xgene_pmu_v3_data},
         {},
  };
  MODULE_DEVICE_TABLE(acpi, xgene_pmu_acpi_match);
@@ -1831,7 +1835,6 @@ static int xgene_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
  static int xgene_pmu_probe(struct platform_device *pdev)
  {
         const struct xgene_pmu_data *dev_data;
-       const struct of_device_id *of_id;
         struct xgene_pmu *xgene_pmu;
         int irq, rc;
         int version;
@@ -1850,24 +1853,10 @@ static int xgene_pmu_probe(struct platform_device *pdev)
         xgene_pmu->dev = &pdev->dev;
         platform_set_drvdata(pdev, xgene_pmu);
  
-       version = -EINVAL;
-       of_id = of_match_device(xgene_pmu_of_match, &pdev->dev);
-       if (of_id) {
-               dev_data = (const struct xgene_pmu_data *) of_id->data;
-               version = dev_data->id;
-       }
-
-#ifdef CONFIG_ACPI
-       if (ACPI_COMPANION(&pdev->dev)) {
-               const struct acpi_device_id *acpi_id;
-
-               acpi_id = acpi_match_device(xgene_pmu_acpi_match, &pdev->dev);
-               if (acpi_id)
-                       version = (int) acpi_id->driver_data;
-       }
-#endif
-       if (version < 0)
+       dev_data = device_get_match_data(&pdev->dev);
+       if (!dev_data)
                 return -ENODEV;
+       version = dev_data->id;
  
         if (version == PCP_PMU_V3)
                 xgene_pmu->ops = &xgene_pmu_v3_ops;
diff --git a/include/linux/acpi.h b/include/linux/acpi.h

index a73246c..fae2aa0 100644 (file)
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -256,6 +256,11 @@ acpi_table_parse_cedt(enum acpi_cedt_type id,
  int acpi_parse_mcfg (struct acpi_table_header *header);
  void acpi_table_print_madt_entry (struct acpi_subtable_header *madt);
  
+static inline bool acpi_gicc_is_usable(struct acpi_madt_generic_interrupt *gicc)
+{
+       return gicc->flags & ACPI_MADT_ENABLED;
+}
+
  /* the following numa functions are architecture-dependent */
  void acpi_numa_slit_init (struct acpi_table_slit *slit);
  
diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S

index 4328895..547d077 100644 (file)
--- a/tools/testing/selftests/arm64/fp/sve-test.S
+++ b/tools/testing/selftests/arm64/fp/sve-test.S
@@ -473,6 +473,13 @@ function _start
  //     mov     x8, #__NR_sched_yield   // Encourage preemption
  //     svc     #0
  
+#ifdef SSVE
+       mrs     x0, S3_3_C4_C2_2        // SVCR should have ZA=0,SM=1
+       and     x1, x0, #3
+       cmp     x1, #1
+       b.ne    svcr_barf
+#endif
+
         mov     x21, #0
  0:     mov     x0, x21
         bl      check_zreg
@@ -553,3 +560,15 @@ function vl_barf
         mov     x1, #1
         svc     #0
  endfunction
+
+function svcr_barf
+       mov     x10, x0
+
+       puts    "Bad SVCR: "
+       mov     x0, x10
+       bl      putdecn
+
+       mov     x8, #__NR_exit
+       mov     x1, #1
+       svc     #0
+endfunction
author	Catalin Marinas <catalin.marinas@arm.com>
	Thu, 26 Oct 2023 16:09:52 +0000 (17:09 +0100)
committer	Catalin Marinas <catalin.marinas@arm.com>
	Thu, 26 Oct 2023 16:09:52 +0000 (17:09 +0100)
Documentation/admin-guide/perf/ampere_cspmu.rst	[new file with mode: 0644]	patch \| blob
Documentation/admin-guide/perf/index.rst		patch \| blob \| history
arch/arm64/Kconfig		patch \| blob \| history
arch/arm64/include/asm/cpu.h		patch \| blob \| history
arch/arm64/include/asm/cpufeature.h		patch \| blob \| history
arch/arm64/include/asm/cputype.h		patch \| blob \| history
arch/arm64/include/asm/fpsimd.h		patch \| blob \| history
arch/arm64/include/asm/lse.h		patch \| blob \| history
arch/arm64/include/asm/mte.h		patch \| blob \| history
arch/arm64/include/asm/pgtable.h		patch \| blob \| history
arch/arm64/kernel/cpufeature.c		patch \| blob \| history
arch/arm64/kernel/fpsimd.c		patch \| blob \| history
arch/arm64/kernel/module-plts.c		patch \| blob \| history
arch/arm64/kernel/mte.c		patch \| blob \| history
arch/arm64/kernel/smp.c		patch \| blob \| history
arch/arm64/kvm/guest.c		patch \| blob \| history
arch/arm64/mm/init.c		patch \| blob \| history
drivers/acpi/processor_core.c		patch \| blob \| history
drivers/clocksource/arm_arch_timer.c		patch \| blob \| history
drivers/irqchip/irq-gic-v3.c		patch \| blob \| history
drivers/perf/amlogic/meson_g12_ddr_pmu.c		patch \| blob \| history
drivers/perf/arm-cmn.c		patch \| blob \| history
drivers/perf/arm_cspmu/Kconfig		patch \| blob \| history
drivers/perf/arm_cspmu/Makefile		patch \| blob \| history
drivers/perf/arm_cspmu/ampere_cspmu.c	[new file with mode: 0644]	patch \| blob
drivers/perf/arm_cspmu/arm_cspmu.c		patch \| blob \| history
drivers/perf/arm_cspmu/arm_cspmu.h		patch \| blob \| history
drivers/perf/arm_cspmu/nvidia_cspmu.c		patch \| blob \| history
drivers/perf/arm_cspmu/nvidia_cspmu.h	[deleted file]	patch \| blob \| history
drivers/perf/arm_pmuv3.c		patch \| blob \| history
drivers/perf/hisilicon/hisi_pcie_pmu.c		patch \| blob \| history
drivers/perf/hisilicon/hisi_uncore_pa_pmu.c		patch \| blob \| history
drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c		patch \| blob \| history
drivers/perf/hisilicon/hns3_pmu.c		patch \| blob \| history
drivers/perf/xgene_pmu.c		patch \| blob \| history
include/linux/acpi.h		patch \| blob \| history
tools/testing/selftests/arm64/fp/sve-test.S		patch \| blob \| history