arm64/sme: Basic enumeration support
authorMark Brown <broonie@kernel.org>
Tue, 19 Apr 2022 11:22:16 +0000 (12:22 +0100)
committerCatalin Marinas <catalin.marinas@arm.com>
Fri, 22 Apr 2022 17:50:49 +0000 (18:50 +0100)
This patch introduces basic cpufeature support for discovering the presence
of the Scalable Matrix Extension.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220419112247.711548-9-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Documentation/arm64/elf_hwcaps.rst
arch/arm64/include/asm/cpu.h
arch/arm64/include/asm/cpufeature.h
arch/arm64/include/asm/fpsimd.h
arch/arm64/include/asm/hwcap.h
arch/arm64/include/uapi/asm/hwcap.h
arch/arm64/kernel/cpufeature.c
arch/arm64/kernel/cpuinfo.c
arch/arm64/kernel/fpsimd.c
arch/arm64/tools/cpucaps

index a8f3096..f8d818e 100644 (file)
@@ -264,6 +264,39 @@ HWCAP2_MTE3
     Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0011, as described
     by Documentation/arm64/memory-tagging-extension.rst.
 
+HWCAP2_SME
+
+    Functionality implied by ID_AA64PFR1_EL1.SME == 0b0001, as described
+    by Documentation/arm64/sme.rst.
+
+HWCAP2_SME_I16I64
+
+    Functionality implied by ID_AA64SMFR0_EL1.I16I64 == 0b1111.
+
+HWCAP2_SME_F64F64
+
+    Functionality implied by ID_AA64SMFR0_EL1.F64F64 == 0b1.
+
+HWCAP2_SME_I8I32
+
+    Functionality implied by ID_AA64SMFR0_EL1.I8I32 == 0b1111.
+
+HWCAP2_SME_F16F32
+
+    Functionality implied by ID_AA64SMFR0_EL1.F16F32 == 0b1.
+
+HWCAP2_SME_B16F32
+
+    Functionality implied by ID_AA64SMFR0_EL1.B16F32 == 0b1.
+
+HWCAP2_SME_F32F32
+
+    Functionality implied by ID_AA64SMFR0_EL1.F32F32 == 0b1.
+
+HWCAP2_SME_FA64
+
+    Functionality implied by ID_AA64SMFR0_EL1.FA64 == 0b1.
+
 4. Unused AT_HWCAP bits
 -----------------------
 
index a58e366..d08062b 100644 (file)
@@ -58,6 +58,7 @@ struct cpuinfo_arm64 {
        u64             reg_id_aa64pfr0;
        u64             reg_id_aa64pfr1;
        u64             reg_id_aa64zfr0;
+       u64             reg_id_aa64smfr0;
 
        struct cpuinfo_32bit    aarch32;
 
index c62e7e5..8ac12e4 100644 (file)
@@ -759,6 +759,18 @@ static __always_inline bool system_supports_sve(void)
                cpus_have_const_cap(ARM64_SVE);
 }
 
+static __always_inline bool system_supports_sme(void)
+{
+       return IS_ENABLED(CONFIG_ARM64_SME) &&
+               cpus_have_const_cap(ARM64_SME);
+}
+
+static __always_inline bool system_supports_fa64(void)
+{
+       return IS_ENABLED(CONFIG_ARM64_SME) &&
+               cpus_have_const_cap(ARM64_SME_FA64);
+}
+
 static __always_inline bool system_supports_cnp(void)
 {
        return IS_ENABLED(CONFIG_ARM64_CNP) &&
index 6e2dc9d..2e8ef00 100644 (file)
@@ -74,6 +74,8 @@ extern void sve_set_vq(unsigned long vq_minus_1);
 
 struct arm64_cpu_capabilities;
 extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);
+extern void sme_kernel_enable(const struct arm64_cpu_capabilities *__unused);
+extern void fa64_kernel_enable(const struct arm64_cpu_capabilities *__unused);
 
 extern u64 read_zcr_features(void);
 
index 8db5ec0..9f0ce00 100644 (file)
 #define KERNEL_HWCAP_AFP               __khwcap2_feature(AFP)
 #define KERNEL_HWCAP_RPRES             __khwcap2_feature(RPRES)
 #define KERNEL_HWCAP_MTE3              __khwcap2_feature(MTE3)
+#define KERNEL_HWCAP_SME               __khwcap2_feature(SME)
+#define KERNEL_HWCAP_SME_I16I64                __khwcap2_feature(SME_I16I64)
+#define KERNEL_HWCAP_SME_F64F64                __khwcap2_feature(SME_F64F64)
+#define KERNEL_HWCAP_SME_I8I32         __khwcap2_feature(SME_I8I32)
+#define KERNEL_HWCAP_SME_F16F32                __khwcap2_feature(SME_F16F32)
+#define KERNEL_HWCAP_SME_B16F32                __khwcap2_feature(SME_B16F32)
+#define KERNEL_HWCAP_SME_F32F32                __khwcap2_feature(SME_F32F32)
+#define KERNEL_HWCAP_SME_FA64          __khwcap2_feature(SME_FA64)
 
 /*
  * This yields a mask that user programs can use to figure out what
index 99cb5d3..b0256ce 100644 (file)
 #define HWCAP2_AFP             (1 << 20)
 #define HWCAP2_RPRES           (1 << 21)
 #define HWCAP2_MTE3            (1 << 22)
+#define HWCAP2_SME             (1 << 23)
+#define HWCAP2_SME_I16I64      (1 << 24)
+#define HWCAP2_SME_F64F64      (1 << 25)
+#define HWCAP2_SME_I8I32       (1 << 26)
+#define HWCAP2_SME_F16F32      (1 << 27)
+#define HWCAP2_SME_B16F32      (1 << 28)
+#define HWCAP2_SME_F32F32      (1 << 29)
+#define HWCAP2_SME_FA64                (1 << 30)
 
 #endif /* _UAPI__ASM_HWCAP_H */
index d72c4b4..0f2d7dd 100644 (file)
@@ -261,6 +261,8 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
+       ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+                      FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SME_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MPAMFRAC_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_RASFRAC_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_MTE),
@@ -293,6 +295,24 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
        ARM64_FTR_END,
 };
 
+static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = {
+       ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+                      FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_FA64_SHIFT, 1, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+                      FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_I16I64_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+                      FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F64F64_SHIFT, 1, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+                      FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_I8I32_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+                      FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F16F32_SHIFT, 1, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+                      FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_B16F32_SHIFT, 1, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+                      FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F32F32_SHIFT, 1, 0),
+       ARM64_FTR_END,
+};
+
 static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
        ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ECV_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_FGT_SHIFT, 4, 0),
@@ -645,6 +665,7 @@ static const struct __ftr_reg_entry {
        ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1,
                               &id_aa64pfr1_override),
        ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0),
+       ARM64_FTR_REG(SYS_ID_AA64SMFR0_EL1, ftr_id_aa64smfr0),
 
        /* Op1 = 0, CRn = 0, CRm = 5 */
        ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0),
@@ -960,6 +981,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
        init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0);
        init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);
        init_cpu_ftr_reg(SYS_ID_AA64ZFR0_EL1, info->reg_id_aa64zfr0);
+       init_cpu_ftr_reg(SYS_ID_AA64SMFR0_EL1, info->reg_id_aa64smfr0);
 
        if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0))
                init_32bit_cpu_features(&info->aarch32);
@@ -2442,6 +2464,33 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .matches = has_cpuid_feature,
                .min_field_value = 1,
        },
+#ifdef CONFIG_ARM64_SME
+       {
+               .desc = "Scalable Matrix Extension",
+               .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+               .capability = ARM64_SME,
+               .sys_reg = SYS_ID_AA64PFR1_EL1,
+               .sign = FTR_UNSIGNED,
+               .field_pos = ID_AA64PFR1_SME_SHIFT,
+               .field_width = 4,
+               .min_field_value = ID_AA64PFR1_SME,
+               .matches = has_cpuid_feature,
+               .cpu_enable = sme_kernel_enable,
+       },
+       /* FA64 should be sorted after the base SME capability */
+       {
+               .desc = "FA64",
+               .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+               .capability = ARM64_SME_FA64,
+               .sys_reg = SYS_ID_AA64SMFR0_EL1,
+               .sign = FTR_UNSIGNED,
+               .field_pos = ID_AA64SMFR0_FA64_SHIFT,
+               .field_width = 1,
+               .min_field_value = ID_AA64SMFR0_FA64,
+               .matches = has_cpuid_feature,
+               .cpu_enable = fa64_kernel_enable,
+       },
+#endif /* CONFIG_ARM64_SME */
        {},
 };
 
@@ -2575,6 +2624,16 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
        HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV),
        HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP),
        HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_RPRES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES),
+#ifdef CONFIG_ARM64_SME
+       HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SME_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_SME, CAP_HWCAP, KERNEL_HWCAP_SME),
+       HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_FA64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_FA64, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
+       HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_I16I64_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_I16I64, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64),
+       HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F64F64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F64F64, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64),
+       HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_I8I32_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_I8I32, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32),
+       HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F16F32, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32),
+       HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_B16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_B16F32, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32),
+       HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F32F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F32F32, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32),
+#endif /* CONFIG_ARM64_SME */
        {},
 };
 
index 330b92e..a73fe28 100644 (file)
@@ -98,6 +98,14 @@ static const char *const hwcap_str[] = {
        [KERNEL_HWCAP_AFP]              = "afp",
        [KERNEL_HWCAP_RPRES]            = "rpres",
        [KERNEL_HWCAP_MTE3]             = "mte3",
+       [KERNEL_HWCAP_SME]              = "sme",
+       [KERNEL_HWCAP_SME_I16I64]       = "smei16i64",
+       [KERNEL_HWCAP_SME_F64F64]       = "smef64f64",
+       [KERNEL_HWCAP_SME_I8I32]        = "smei8i32",
+       [KERNEL_HWCAP_SME_F16F32]       = "smef16f32",
+       [KERNEL_HWCAP_SME_B16F32]       = "smeb16f32",
+       [KERNEL_HWCAP_SME_F32F32]       = "smef32f32",
+       [KERNEL_HWCAP_SME_FA64]         = "smefa64",
 };
 
 #ifdef CONFIG_COMPAT
@@ -401,6 +409,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
        info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1);
        info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1);
        info->reg_id_aa64zfr0 = read_cpuid(ID_AA64ZFR0_EL1);
+       info->reg_id_aa64smfr0 = read_cpuid(ID_AA64SMFR0_EL1);
 
        if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
                info->reg_gmid = read_cpuid(GMID_EL1);
index 47af76e..e4fba0b 100644 (file)
@@ -993,6 +993,32 @@ void fpsimd_release_task(struct task_struct *dead_task)
 
 #endif /* CONFIG_ARM64_SVE */
 
+#ifdef CONFIG_ARM64_SME
+
+void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
+{
+       /* Set priority for all PEs to architecturally defined minimum */
+       write_sysreg_s(read_sysreg_s(SYS_SMPRI_EL1) & ~SMPRI_EL1_PRIORITY_MASK,
+                      SYS_SMPRI_EL1);
+
+       /* Allow SME in kernel */
+       write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_SMEN_EL1EN, CPACR_EL1);
+       isb();
+}
+
+/*
+ * This must be called after sme_kernel_enable(), we rely on the
+ * feature table being sorted to ensure this.
+ */
+void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
+{
+       /* Allow use of FA64 */
+       write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_FA64_MASK,
+                      SYS_SMCR_EL1);
+}
+
+#endif /* CONFIG_ARM64_SVE */
+
 /*
  * Trapped SVE access
  *
@@ -1538,6 +1564,10 @@ static int __init fpsimd_init(void)
        if (!cpu_have_named_feature(ASIMD))
                pr_notice("Advanced SIMD is not implemented\n");
 
+
+       if (cpu_have_named_feature(SME) && !cpu_have_named_feature(SVE))
+               pr_notice("SME is implemented but not SVE\n");
+
        return sve_sysctl_init();
 }
 core_initcall(fpsimd_init);
index 3ed418f..e52b289 100644 (file)
@@ -43,6 +43,8 @@ KVM_PROTECTED_MODE
 MISMATCHED_CACHE_TYPE
 MTE
 MTE_ASYMM
+SME
+SME_FA64
 SPECTRE_V2
 SPECTRE_V3A
 SPECTRE_V4