Merge branch 'for-next/feat_sve_b16b16' into for-next/core
authorCatalin Marinas <catalin.marinas@arm.com>
Thu, 26 Oct 2023 16:10:01 +0000 (17:10 +0100)
committerCatalin Marinas <catalin.marinas@arm.com>
Thu, 26 Oct 2023 16:10:01 +0000 (17:10 +0100)
* for-next/feat_sve_b16b16:
  : Add support for FEAT_SVE_B16B16 (BFloat16)
  kselftest/arm64: Verify HWCAP2_SVE_B16B16
  arm64/sve: Report FEAT_SVE_B16B16 to userspace

Documentation/arch/arm64/cpu-feature-registers.rst
Documentation/arch/arm64/elf_hwcaps.rst
arch/arm64/include/asm/hwcap.h
arch/arm64/include/uapi/asm/hwcap.h
arch/arm64/kernel/cpufeature.c
arch/arm64/kernel/cpuinfo.c
arch/arm64/tools/sysreg
tools/testing/selftests/arm64/abi/hwcap.c

index de6d8a4..44f9bd7 100644 (file)
@@ -268,6 +268,8 @@ infrastructure:
      +------------------------------+---------+---------+
      | SHA3                         | [35-32] |    y    |
      +------------------------------+---------+---------+
+     | B16B16                       | [27-24] |    y    |
+     +------------------------------+---------+---------+
      | BF16                         | [23-20] |    y    |
      +------------------------------+---------+---------+
      | BitPerm                      | [19-16] |    y    |
index 76ff9d7..2ad0a36 100644 (file)
@@ -308,6 +308,9 @@ HWCAP2_MOPS
 HWCAP2_HBC
     Functionality implied by ID_AA64ISAR2_EL1.BC == 0b0001.
 
+HWCAP2_SVE_B16B16
+    Functionality implied by ID_AA64ZFR0_EL1.B16B16 == 0b0001.
+
 4. Unused AT_HWCAP bits
 -----------------------
 
index 5212674..210a41f 100644 (file)
 #define KERNEL_HWCAP_SME_F16F16                __khwcap2_feature(SME_F16F16)
 #define KERNEL_HWCAP_MOPS              __khwcap2_feature(MOPS)
 #define KERNEL_HWCAP_HBC               __khwcap2_feature(HBC)
+#define KERNEL_HWCAP_SVE_B16B16                __khwcap2_feature(SVE_B16B16)
 
 /*
  * This yields a mask that user programs can use to figure out what
index 53026f4..6faf549 100644 (file)
 #define HWCAP2_SME_F16F16      (1UL << 42)
 #define HWCAP2_MOPS            (1UL << 43)
 #define HWCAP2_HBC             (1UL << 44)
+#define HWCAP2_SVE_B16B16      (1UL << 45)
 
 #endif /* _UAPI__ASM_HWCAP_H */
index 2ccb9df..aacc821 100644 (file)
@@ -278,6 +278,8 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
                       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SM4_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
                       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SHA3_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+                      FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_B16B16_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
                       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_BF16_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
@@ -2778,6 +2780,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
        HWCAP_CAP(ID_AA64ZFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEAES),
        HWCAP_CAP(ID_AA64ZFR0_EL1, AES, PMULL128, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL),
        HWCAP_CAP(ID_AA64ZFR0_EL1, BitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM),
+       HWCAP_CAP(ID_AA64ZFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE_B16B16),
        HWCAP_CAP(ID_AA64ZFR0_EL1, BF16, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBF16),
        HWCAP_CAP(ID_AA64ZFR0_EL1, BF16, EBF16, CAP_HWCAP, KERNEL_HWCAP_SVE_EBF16),
        HWCAP_CAP(ID_AA64ZFR0_EL1, SHA3, IMP, CAP_HWCAP, KERNEL_HWCAP_SVESHA3),
index 98fda85..ea2a319 100644 (file)
@@ -127,6 +127,7 @@ static const char *const hwcap_str[] = {
        [KERNEL_HWCAP_SME_F16F16]       = "smef16f16",
        [KERNEL_HWCAP_MOPS]             = "mops",
        [KERNEL_HWCAP_HBC]              = "hbc",
+       [KERNEL_HWCAP_SVE_B16B16]       = "sveb16b16",
 };
 
 #ifdef CONFIG_COMPAT
index 76ce150..bb69ab3 100644 (file)
@@ -1026,7 +1026,11 @@ UnsignedEnum     35:32   SHA3
        0b0000  NI
        0b0001  IMP
 EndEnum
-Res0   31:24
+Res0   31:28
+UnsignedEnum   27:24   B16B16
+       0b0000  NI
+       0b0001  IMP
+EndEnum
 UnsignedEnum   23:20   BF16
        0b0000  NI
        0b0001  IMP
index e3d2628..d8a1442 100644 (file)
@@ -226,6 +226,12 @@ static void sveaes_sigill(void)
        asm volatile(".inst 0x4522e400" : : : "z0");
 }
 
+static void sveb16b16_sigill(void)
+{
+       /* BFADD ZA.H[W0, 0], {Z0.H-Z1.H} */
+       asm volatile(".inst 0xC1E41C00" : : : );
+}
+
 static void svepmull_sigill(void)
 {
        /* PMULLB Z0.Q, Z0.D, Z0.D */
@@ -493,6 +499,13 @@ static const struct hwcap_data {
                .cpuinfo = "sveaes",
                .sigill_fn = sveaes_sigill,
        },
+       {
+               .name = "SVE2 B16B16",
+               .at_hwcap = AT_HWCAP2,
+               .hwcap_bit = HWCAP2_SVE_B16B16,
+               .cpuinfo = "sveb16b16",
+               .sigill_fn = sveb16b16_sigill,
+       },
        {
                .name = "SVE2 PMULL",
                .at_hwcap = AT_HWCAP2,